clang  7.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = llvm::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
178  Parser->addUnwrappedLine();
179  if (Style.BraceWrapping.IndentBraces)
180  ++LineLevel;
181  }
182  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
183 
184 private:
185  unsigned &LineLevel;
186  unsigned OldLineLevel;
187 };
188 
189 namespace {
190 
191 class IndexedTokenSource : public FormatTokenSource {
192 public:
193  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
194  : Tokens(Tokens), Position(-1) {}
195 
196  FormatToken *getNextToken() override {
197  ++Position;
198  return Tokens[Position];
199  }
200 
201  unsigned getPosition() override {
202  assert(Position >= 0);
203  return Position;
204  }
205 
206  FormatToken *setPosition(unsigned P) override {
207  Position = P;
208  return Tokens[Position];
209  }
210 
211  void reset() { Position = -1; }
212 
213 private:
215  int Position;
216 };
217 
218 } // end anonymous namespace
219 
221  const AdditionalKeywords &Keywords,
222  unsigned FirstStartColumn,
224  UnwrappedLineConsumer &Callback)
225  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
226  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
227  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
228  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
229  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
230  ? IG_Rejected
231  : IG_Inited),
232  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
233 
234 void UnwrappedLineParser::reset() {
235  PPBranchLevel = -1;
236  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
237  ? IG_Rejected
238  : IG_Inited;
239  IncludeGuardToken = nullptr;
240  Line.reset(new UnwrappedLine);
241  CommentsBeforeNextToken.clear();
242  FormatTok = nullptr;
243  MustBreakBeforeNextToken = false;
244  PreprocessorDirectives.clear();
245  CurrentLines = &Lines;
246  DeclarationScopeStack.clear();
247  PPStack.clear();
248  Line->FirstStartColumn = FirstStartColumn;
249 }
250 
252  IndexedTokenSource TokenSource(AllTokens);
253  Line->FirstStartColumn = FirstStartColumn;
254  do {
255  LLVM_DEBUG(llvm::dbgs() << "----\n");
256  reset();
257  Tokens = &TokenSource;
258  TokenSource.reset();
259 
260  readToken();
261  parseFile();
262 
263  // If we found an include guard then all preprocessor directives (other than
264  // the guard) are over-indented by one.
265  if (IncludeGuard == IG_Found)
266  for (auto &Line : Lines)
267  if (Line.InPPDirective && Line.Level > 0)
268  --Line.Level;
269 
270  // Create line with eof token.
271  pushToken(FormatTok);
272  addUnwrappedLine();
273 
274  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
275  E = Lines.end();
276  I != E; ++I) {
277  Callback.consumeUnwrappedLine(*I);
278  }
279  Callback.finishRun();
280  Lines.clear();
281  while (!PPLevelBranchIndex.empty() &&
282  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
283  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
284  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
285  }
286  if (!PPLevelBranchIndex.empty()) {
287  ++PPLevelBranchIndex.back();
288  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
289  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
290  }
291  } while (!PPLevelBranchIndex.empty());
292 }
293 
294 void UnwrappedLineParser::parseFile() {
295  // The top-level context in a file always has declarations, except for pre-
296  // processor directives and JavaScript files.
297  bool MustBeDeclaration =
298  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
299  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
300  MustBeDeclaration);
301  if (Style.Language == FormatStyle::LK_TextProto)
302  parseBracedList();
303  else
304  parseLevel(/*HasOpeningBrace=*/false);
305  // Make sure to format the remaining tokens.
306  flushComments(true);
307  addUnwrappedLine();
308 }
309 
310 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
311  bool SwitchLabelEncountered = false;
312  do {
313  tok::TokenKind kind = FormatTok->Tok.getKind();
314  if (FormatTok->Type == TT_MacroBlockBegin) {
315  kind = tok::l_brace;
316  } else if (FormatTok->Type == TT_MacroBlockEnd) {
317  kind = tok::r_brace;
318  }
319 
320  switch (kind) {
321  case tok::comment:
322  nextToken();
323  addUnwrappedLine();
324  break;
325  case tok::l_brace:
326  // FIXME: Add parameter whether this can happen - if this happens, we must
327  // be in a non-declaration context.
328  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
329  continue;
330  parseBlock(/*MustBeDeclaration=*/false);
331  addUnwrappedLine();
332  break;
333  case tok::r_brace:
334  if (HasOpeningBrace)
335  return;
336  nextToken();
337  addUnwrappedLine();
338  break;
339  case tok::kw_default: {
340  unsigned StoredPosition = Tokens->getPosition();
341  FormatToken *Next = Tokens->getNextToken();
342  FormatTok = Tokens->setPosition(StoredPosition);
343  if (Next && Next->isNot(tok::colon)) {
344  // default not followed by ':' is not a case label; treat it like
345  // an identifier.
346  parseStructuralElement();
347  break;
348  }
349  // Else, if it is 'default:', fall through to the case handling.
350  LLVM_FALLTHROUGH;
351  }
352  case tok::kw_case:
353  if (Style.Language == FormatStyle::LK_JavaScript &&
354  Line->MustBeDeclaration) {
355  // A 'case: string' style field declaration.
356  parseStructuralElement();
357  break;
358  }
359  if (!SwitchLabelEncountered &&
360  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
361  ++Line->Level;
362  SwitchLabelEncountered = true;
363  parseStructuralElement();
364  break;
365  default:
366  parseStructuralElement();
367  break;
368  }
369  } while (!eof());
370 }
371 
372 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
373  // We'll parse forward through the tokens until we hit
374  // a closing brace or eof - note that getNextToken() will
375  // parse macros, so this will magically work inside macro
376  // definitions, too.
377  unsigned StoredPosition = Tokens->getPosition();
378  FormatToken *Tok = FormatTok;
379  const FormatToken *PrevTok = Tok->Previous;
380  // Keep a stack of positions of lbrace tokens. We will
381  // update information about whether an lbrace starts a
382  // braced init list or a different block during the loop.
383  SmallVector<FormatToken *, 8> LBraceStack;
384  assert(Tok->Tok.is(tok::l_brace));
385  do {
386  // Get next non-comment token.
387  FormatToken *NextTok;
388  unsigned ReadTokens = 0;
389  do {
390  NextTok = Tokens->getNextToken();
391  ++ReadTokens;
392  } while (NextTok->is(tok::comment));
393 
394  switch (Tok->Tok.getKind()) {
395  case tok::l_brace:
396  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
397  if (PrevTok->isOneOf(tok::colon, tok::less))
398  // A ':' indicates this code is in a type, or a braced list
399  // following a label in an object literal ({a: {b: 1}}).
400  // A '<' could be an object used in a comparison, but that is nonsense
401  // code (can never return true), so more likely it is a generic type
402  // argument (`X<{a: string; b: number}>`).
403  // The code below could be confused by semicolons between the
404  // individual members in a type member list, which would normally
405  // trigger BK_Block. In both cases, this must be parsed as an inline
406  // braced init.
407  Tok->BlockKind = BK_BracedInit;
408  else if (PrevTok->is(tok::r_paren))
409  // `) { }` can only occur in function or method declarations in JS.
410  Tok->BlockKind = BK_Block;
411  } else {
412  Tok->BlockKind = BK_Unknown;
413  }
414  LBraceStack.push_back(Tok);
415  break;
416  case tok::r_brace:
417  if (LBraceStack.empty())
418  break;
419  if (LBraceStack.back()->BlockKind == BK_Unknown) {
420  bool ProbablyBracedList = false;
421  if (Style.Language == FormatStyle::LK_Proto) {
422  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
423  } else {
424  // Using OriginalColumn to distinguish between ObjC methods and
425  // binary operators is a bit hacky.
426  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
427  NextTok->OriginalColumn == 0;
428 
429  // If there is a comma, semicolon or right paren after the closing
430  // brace, we assume this is a braced initializer list. Note that
431  // regardless how we mark inner braces here, we will overwrite the
432  // BlockKind later if we parse a braced list (where all blocks
433  // inside are by default braced lists), or when we explicitly detect
434  // blocks (for example while parsing lambdas).
435  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
436  // braced list in JS.
437  ProbablyBracedList =
439  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
440  Keywords.kw_as)) ||
441  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
442  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
443  tok::r_paren, tok::r_square, tok::l_brace,
444  tok::ellipsis) ||
445  (NextTok->is(tok::identifier) &&
446  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
447  (NextTok->is(tok::semi) &&
448  (!ExpectClassBody || LBraceStack.size() != 1)) ||
449  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
450  if (NextTok->is(tok::l_square)) {
451  // We can have an array subscript after a braced init
452  // list, but C++11 attributes are expected after blocks.
453  NextTok = Tokens->getNextToken();
454  ++ReadTokens;
455  ProbablyBracedList = NextTok->isNot(tok::l_square);
456  }
457  }
458  if (ProbablyBracedList) {
459  Tok->BlockKind = BK_BracedInit;
460  LBraceStack.back()->BlockKind = BK_BracedInit;
461  } else {
462  Tok->BlockKind = BK_Block;
463  LBraceStack.back()->BlockKind = BK_Block;
464  }
465  }
466  LBraceStack.pop_back();
467  break;
468  case tok::at:
469  case tok::semi:
470  case tok::kw_if:
471  case tok::kw_while:
472  case tok::kw_for:
473  case tok::kw_switch:
474  case tok::kw_try:
475  case tok::kw___try:
476  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
477  LBraceStack.back()->BlockKind = BK_Block;
478  break;
479  default:
480  break;
481  }
482  PrevTok = Tok;
483  Tok = NextTok;
484  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
485 
486  // Assume other blocks for all unclosed opening braces.
487  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
488  if (LBraceStack[i]->BlockKind == BK_Unknown)
489  LBraceStack[i]->BlockKind = BK_Block;
490  }
491 
492  FormatTok = Tokens->setPosition(StoredPosition);
493 }
494 
495 template <class T>
496 static inline void hash_combine(std::size_t &seed, const T &v) {
497  std::hash<T> hasher;
498  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
499 }
500 
501 size_t UnwrappedLineParser::computePPHash() const {
502  size_t h = 0;
503  for (const auto &i : PPStack) {
504  hash_combine(h, size_t(i.Kind));
505  hash_combine(h, i.Line);
506  }
507  return h;
508 }
509 
510 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
511  bool MunchSemi) {
512  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
513  "'{' or macro block token expected");
514  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
515  FormatTok->BlockKind = BK_Block;
516 
517  size_t PPStartHash = computePPHash();
518 
519  unsigned InitialLevel = Line->Level;
520  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
521 
522  if (MacroBlock && FormatTok->is(tok::l_paren))
523  parseParens();
524 
525  size_t NbPreprocessorDirectives =
526  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
527  addUnwrappedLine();
528  size_t OpeningLineIndex =
529  CurrentLines->empty()
531  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
532 
533  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
534  MustBeDeclaration);
535  if (AddLevel)
536  ++Line->Level;
537  parseLevel(/*HasOpeningBrace=*/true);
538 
539  if (eof())
540  return;
541 
542  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
543  : !FormatTok->is(tok::r_brace)) {
544  Line->Level = InitialLevel;
545  FormatTok->BlockKind = BK_Block;
546  return;
547  }
548 
549  size_t PPEndHash = computePPHash();
550 
551  // Munch the closing brace.
552  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
553 
554  if (MacroBlock && FormatTok->is(tok::l_paren))
555  parseParens();
556 
557  if (MunchSemi && FormatTok->Tok.is(tok::semi))
558  nextToken();
559  Line->Level = InitialLevel;
560 
561  if (PPStartHash == PPEndHash) {
562  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
563  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
564  // Update the opening line to add the forward reference as well
565  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
566  CurrentLines->size() - 1;
567  }
568  }
569 }
570 
571 static bool isGoogScope(const UnwrappedLine &Line) {
572  // FIXME: Closure-library specific stuff should not be hard-coded but be
573  // configurable.
574  if (Line.Tokens.size() < 4)
575  return false;
576  auto I = Line.Tokens.begin();
577  if (I->Tok->TokenText != "goog")
578  return false;
579  ++I;
580  if (I->Tok->isNot(tok::period))
581  return false;
582  ++I;
583  if (I->Tok->TokenText != "scope")
584  return false;
585  ++I;
586  return I->Tok->is(tok::l_paren);
587 }
588 
589 static bool isIIFE(const UnwrappedLine &Line,
590  const AdditionalKeywords &Keywords) {
591  // Look for the start of an immediately invoked anonymous function.
592  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
593  // This is commonly done in JavaScript to create a new, anonymous scope.
594  // Example: (function() { ... })()
595  if (Line.Tokens.size() < 3)
596  return false;
597  auto I = Line.Tokens.begin();
598  if (I->Tok->isNot(tok::l_paren))
599  return false;
600  ++I;
601  if (I->Tok->isNot(Keywords.kw_function))
602  return false;
603  ++I;
604  return I->Tok->is(tok::l_paren);
605 }
606 
607 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
608  const FormatToken &InitialToken) {
609  if (InitialToken.is(tok::kw_namespace))
610  return Style.BraceWrapping.AfterNamespace;
611  if (InitialToken.is(tok::kw_class))
612  return Style.BraceWrapping.AfterClass;
613  if (InitialToken.is(tok::kw_union))
614  return Style.BraceWrapping.AfterUnion;
615  if (InitialToken.is(tok::kw_struct))
616  return Style.BraceWrapping.AfterStruct;
617  return false;
618 }
619 
620 void UnwrappedLineParser::parseChildBlock() {
621  FormatTok->BlockKind = BK_Block;
622  nextToken();
623  {
624  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
625  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
626  ScopedLineState LineState(*this);
627  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
628  /*MustBeDeclaration=*/false);
629  Line->Level += SkipIndent ? 0 : 1;
630  parseLevel(/*HasOpeningBrace=*/true);
631  flushComments(isOnNewLine(*FormatTok));
632  Line->Level -= SkipIndent ? 0 : 1;
633  }
634  nextToken();
635 }
636 
637 void UnwrappedLineParser::parsePPDirective() {
638  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
639  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
640  nextToken();
641 
642  if (!FormatTok->Tok.getIdentifierInfo()) {
643  parsePPUnknown();
644  return;
645  }
646 
647  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
648  case tok::pp_define:
649  parsePPDefine();
650  return;
651  case tok::pp_if:
652  parsePPIf(/*IfDef=*/false);
653  break;
654  case tok::pp_ifdef:
655  case tok::pp_ifndef:
656  parsePPIf(/*IfDef=*/true);
657  break;
658  case tok::pp_else:
659  parsePPElse();
660  break;
661  case tok::pp_elif:
662  parsePPElIf();
663  break;
664  case tok::pp_endif:
665  parsePPEndIf();
666  break;
667  default:
668  parsePPUnknown();
669  break;
670  }
671 }
672 
673 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
674  size_t Line = CurrentLines->size();
675  if (CurrentLines == &PreprocessorDirectives)
676  Line += Lines.size();
677 
678  if (Unreachable ||
679  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
680  PPStack.push_back({PP_Unreachable, Line});
681  else
682  PPStack.push_back({PP_Conditional, Line});
683 }
684 
685 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
686  ++PPBranchLevel;
687  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
688  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
689  PPLevelBranchIndex.push_back(0);
690  PPLevelBranchCount.push_back(0);
691  }
692  PPChainBranchIndex.push(0);
693  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
694  conditionalCompilationCondition(Unreachable || Skip);
695 }
696 
697 void UnwrappedLineParser::conditionalCompilationAlternative() {
698  if (!PPStack.empty())
699  PPStack.pop_back();
700  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
701  if (!PPChainBranchIndex.empty())
702  ++PPChainBranchIndex.top();
703  conditionalCompilationCondition(
704  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
705  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
706 }
707 
708 void UnwrappedLineParser::conditionalCompilationEnd() {
709  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
710  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
711  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
712  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
713  }
714  }
715  // Guard against #endif's without #if.
716  if (PPBranchLevel > -1)
717  --PPBranchLevel;
718  if (!PPChainBranchIndex.empty())
719  PPChainBranchIndex.pop();
720  if (!PPStack.empty())
721  PPStack.pop_back();
722 }
723 
724 void UnwrappedLineParser::parsePPIf(bool IfDef) {
725  bool IfNDef = FormatTok->is(tok::pp_ifndef);
726  nextToken();
727  bool Unreachable = false;
728  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
729  Unreachable = true;
730  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
731  Unreachable = true;
732  conditionalCompilationStart(Unreachable);
733  FormatToken *IfCondition = FormatTok;
734  // If there's a #ifndef on the first line, and the only lines before it are
735  // comments, it could be an include guard.
736  bool MaybeIncludeGuard = IfNDef;
737  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
738  for (auto &Line : Lines) {
739  if (!Line.Tokens.front().Tok->is(tok::comment)) {
740  MaybeIncludeGuard = false;
741  IncludeGuard = IG_Rejected;
742  break;
743  }
744  }
745  --PPBranchLevel;
746  parsePPUnknown();
747  ++PPBranchLevel;
748  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
749  IncludeGuard = IG_IfNdefed;
750  IncludeGuardToken = IfCondition;
751  }
752 }
753 
754 void UnwrappedLineParser::parsePPElse() {
755  // If a potential include guard has an #else, it's not an include guard.
756  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
757  IncludeGuard = IG_Rejected;
758  conditionalCompilationAlternative();
759  if (PPBranchLevel > -1)
760  --PPBranchLevel;
761  parsePPUnknown();
762  ++PPBranchLevel;
763 }
764 
765 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
766 
767 void UnwrappedLineParser::parsePPEndIf() {
768  conditionalCompilationEnd();
769  parsePPUnknown();
770  // If the #endif of a potential include guard is the last thing in the file,
771  // then we found an include guard.
772  unsigned TokenPosition = Tokens->getPosition();
773  FormatToken *PeekNext = AllTokens[TokenPosition];
774  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
775  PeekNext->is(tok::eof) &&
777  IncludeGuard = IG_Found;
778 }
779 
780 void UnwrappedLineParser::parsePPDefine() {
781  nextToken();
782 
783  if (FormatTok->Tok.getKind() != tok::identifier) {
784  IncludeGuard = IG_Rejected;
785  IncludeGuardToken = nullptr;
786  parsePPUnknown();
787  return;
788  }
789 
790  if (IncludeGuard == IG_IfNdefed &&
791  IncludeGuardToken->TokenText == FormatTok->TokenText) {
792  IncludeGuard = IG_Defined;
793  IncludeGuardToken = nullptr;
794  for (auto &Line : Lines) {
795  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
796  IncludeGuard = IG_Rejected;
797  break;
798  }
799  }
800  }
801 
802  nextToken();
803  if (FormatTok->Tok.getKind() == tok::l_paren &&
804  FormatTok->WhitespaceRange.getBegin() ==
805  FormatTok->WhitespaceRange.getEnd()) {
806  parseParens();
807  }
809  Line->Level += PPBranchLevel + 1;
810  addUnwrappedLine();
811  ++Line->Level;
812 
813  // Errors during a preprocessor directive can only affect the layout of the
814  // preprocessor directive, and thus we ignore them. An alternative approach
815  // would be to use the same approach we use on the file level (no
816  // re-indentation if there was a structural error) within the macro
817  // definition.
818  parseFile();
819 }
820 
821 void UnwrappedLineParser::parsePPUnknown() {
822  do {
823  nextToken();
824  } while (!eof());
826  Line->Level += PPBranchLevel + 1;
827  addUnwrappedLine();
828 }
829 
830 // Here we blacklist certain tokens that are not usually the first token in an
831 // unwrapped line. This is used in attempt to distinguish macro calls without
832 // trailing semicolons from other constructs split to several lines.
833 static bool tokenCanStartNewLine(const clang::Token &Tok) {
834  // Semicolon can be a null-statement, l_square can be a start of a macro or
835  // a C++11 attribute, but this doesn't seem to be common.
836  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
837  Tok.isNot(tok::l_square) &&
838  // Tokens that can only be used as binary operators and a part of
839  // overloaded operator names.
840  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
841  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
842  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
843  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
844  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
845  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
846  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
847  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
848  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
849  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
850  Tok.isNot(tok::lesslessequal) &&
851  // Colon is used in labels, base class lists, initializer lists,
852  // range-based for loops, ternary operator, but should never be the
853  // first token in an unwrapped line.
854  Tok.isNot(tok::colon) &&
855  // 'noexcept' is a trailing annotation.
856  Tok.isNot(tok::kw_noexcept);
857 }
858 
859 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
860  const FormatToken *FormatTok) {
861  // FIXME: This returns true for C/C++ keywords like 'struct'.
862  return FormatTok->is(tok::identifier) &&
863  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
864  !FormatTok->isOneOf(
865  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
866  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
867  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
868  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
869  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
870  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
871  Keywords.kw_from));
872 }
873 
874 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
875  const FormatToken *FormatTok) {
876  return FormatTok->Tok.isLiteral() ||
877  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
878  mustBeJSIdent(Keywords, FormatTok);
879 }
880 
881 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
882 // when encountered after a value (see mustBeJSIdentOrValue).
883 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
884  const FormatToken *FormatTok) {
885  return FormatTok->isOneOf(
886  tok::kw_return, Keywords.kw_yield,
887  // conditionals
888  tok::kw_if, tok::kw_else,
889  // loops
890  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
891  // switch/case
892  tok::kw_switch, tok::kw_case,
893  // exceptions
894  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
895  // declaration
896  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
897  Keywords.kw_async, Keywords.kw_function,
898  // import/export
899  Keywords.kw_import, tok::kw_export);
900 }
901 
902 // readTokenWithJavaScriptASI reads the next token and terminates the current
903 // line if JavaScript Automatic Semicolon Insertion must
904 // happen between the current token and the next token.
905 //
906 // This method is conservative - it cannot cover all edge cases of JavaScript,
907 // but only aims to correctly handle certain well known cases. It *must not*
908 // return true in speculative cases.
909 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
910  FormatToken *Previous = FormatTok;
911  readToken();
912  FormatToken *Next = FormatTok;
913 
914  bool IsOnSameLine =
915  CommentsBeforeNextToken.empty()
916  ? Next->NewlinesBefore == 0
917  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
918  if (IsOnSameLine)
919  return;
920 
921  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
922  bool PreviousStartsTemplateExpr =
923  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
924  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
925  // If the line contains an '@' sign, the previous token might be an
926  // annotation, which can precede another identifier/value.
927  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
928  [](UnwrappedLineNode &LineNode) {
929  return LineNode.Tok->is(tok::at);
930  }) != Line->Tokens.end();
931  if (HasAt)
932  return;
933  }
934  if (Next->is(tok::exclaim) && PreviousMustBeValue)
935  return addUnwrappedLine();
936  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
937  bool NextEndsTemplateExpr =
938  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
939  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
940  (PreviousMustBeValue ||
941  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
942  tok::minusminus)))
943  return addUnwrappedLine();
944  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
945  isJSDeclOrStmt(Keywords, Next))
946  return addUnwrappedLine();
947 }
948 
949 void UnwrappedLineParser::parseStructuralElement() {
950  assert(!FormatTok->is(tok::l_brace));
951  if (Style.Language == FormatStyle::LK_TableGen &&
952  FormatTok->is(tok::pp_include)) {
953  nextToken();
954  if (FormatTok->is(tok::string_literal))
955  nextToken();
956  addUnwrappedLine();
957  return;
958  }
959  switch (FormatTok->Tok.getKind()) {
960  case tok::kw_asm:
961  nextToken();
962  if (FormatTok->is(tok::l_brace)) {
963  FormatTok->Type = TT_InlineASMBrace;
964  nextToken();
965  while (FormatTok && FormatTok->isNot(tok::eof)) {
966  if (FormatTok->is(tok::r_brace)) {
967  FormatTok->Type = TT_InlineASMBrace;
968  nextToken();
969  addUnwrappedLine();
970  break;
971  }
972  FormatTok->Finalized = true;
973  nextToken();
974  }
975  }
976  break;
977  case tok::kw_namespace:
978  parseNamespace();
979  return;
980  case tok::kw_inline:
981  nextToken();
982  if (FormatTok->Tok.is(tok::kw_namespace)) {
983  parseNamespace();
984  return;
985  }
986  break;
987  case tok::kw_public:
988  case tok::kw_protected:
989  case tok::kw_private:
990  if (Style.Language == FormatStyle::LK_Java ||
992  nextToken();
993  else
994  parseAccessSpecifier();
995  return;
996  case tok::kw_if:
997  parseIfThenElse();
998  return;
999  case tok::kw_for:
1000  case tok::kw_while:
1001  parseForOrWhileLoop();
1002  return;
1003  case tok::kw_do:
1004  parseDoWhile();
1005  return;
1006  case tok::kw_switch:
1007  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1008  // 'switch: string' field declaration.
1009  break;
1010  parseSwitch();
1011  return;
1012  case tok::kw_default:
1013  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1014  // 'default: string' field declaration.
1015  break;
1016  nextToken();
1017  if (FormatTok->is(tok::colon)) {
1018  parseLabel();
1019  return;
1020  }
1021  // e.g. "default void f() {}" in a Java interface.
1022  break;
1023  case tok::kw_case:
1024  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1025  // 'case: string' field declaration.
1026  break;
1027  parseCaseLabel();
1028  return;
1029  case tok::kw_try:
1030  case tok::kw___try:
1031  parseTryCatch();
1032  return;
1033  case tok::kw_extern:
1034  nextToken();
1035  if (FormatTok->Tok.is(tok::string_literal)) {
1036  nextToken();
1037  if (FormatTok->Tok.is(tok::l_brace)) {
1038  if (Style.BraceWrapping.AfterExternBlock) {
1039  addUnwrappedLine();
1040  parseBlock(/*MustBeDeclaration=*/true);
1041  } else {
1042  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1043  }
1044  addUnwrappedLine();
1045  return;
1046  }
1047  }
1048  break;
1049  case tok::kw_export:
1050  if (Style.Language == FormatStyle::LK_JavaScript) {
1051  parseJavaScriptEs6ImportExport();
1052  return;
1053  }
1054  break;
1055  case tok::identifier:
1056  if (FormatTok->is(TT_ForEachMacro)) {
1057  parseForOrWhileLoop();
1058  return;
1059  }
1060  if (FormatTok->is(TT_MacroBlockBegin)) {
1061  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1062  /*MunchSemi=*/false);
1063  return;
1064  }
1065  if (FormatTok->is(Keywords.kw_import)) {
1066  if (Style.Language == FormatStyle::LK_JavaScript) {
1067  parseJavaScriptEs6ImportExport();
1068  return;
1069  }
1070  if (Style.Language == FormatStyle::LK_Proto) {
1071  nextToken();
1072  if (FormatTok->is(tok::kw_public))
1073  nextToken();
1074  if (!FormatTok->is(tok::string_literal))
1075  return;
1076  nextToken();
1077  if (FormatTok->is(tok::semi))
1078  nextToken();
1079  addUnwrappedLine();
1080  return;
1081  }
1082  }
1083  if (Style.isCpp() &&
1084  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1085  Keywords.kw_slots, Keywords.kw_qslots)) {
1086  nextToken();
1087  if (FormatTok->is(tok::colon)) {
1088  nextToken();
1089  addUnwrappedLine();
1090  return;
1091  }
1092  }
1093  // In all other cases, parse the declaration.
1094  break;
1095  default:
1096  break;
1097  }
1098  do {
1099  const FormatToken *Previous = FormatTok->Previous;
1100  switch (FormatTok->Tok.getKind()) {
1101  case tok::at:
1102  nextToken();
1103  if (FormatTok->Tok.is(tok::l_brace)) {
1104  nextToken();
1105  parseBracedList();
1106  break;
1107  }
1108  switch (FormatTok->Tok.getObjCKeywordID()) {
1109  case tok::objc_public:
1110  case tok::objc_protected:
1111  case tok::objc_package:
1112  case tok::objc_private:
1113  return parseAccessSpecifier();
1114  case tok::objc_interface:
1115  case tok::objc_implementation:
1116  return parseObjCInterfaceOrImplementation();
1117  case tok::objc_protocol:
1118  if (parseObjCProtocol())
1119  return;
1120  break;
1121  case tok::objc_end:
1122  return; // Handled by the caller.
1123  case tok::objc_optional:
1124  case tok::objc_required:
1125  nextToken();
1126  addUnwrappedLine();
1127  return;
1128  case tok::objc_autoreleasepool:
1129  nextToken();
1130  if (FormatTok->Tok.is(tok::l_brace)) {
1132  addUnwrappedLine();
1133  parseBlock(/*MustBeDeclaration=*/false);
1134  }
1135  addUnwrappedLine();
1136  return;
1137  case tok::objc_synchronized:
1138  nextToken();
1139  if (FormatTok->Tok.is(tok::l_paren))
1140  // Skip synchronization object
1141  parseParens();
1142  if (FormatTok->Tok.is(tok::l_brace)) {
1144  addUnwrappedLine();
1145  parseBlock(/*MustBeDeclaration=*/false);
1146  }
1147  addUnwrappedLine();
1148  return;
1149  case tok::objc_try:
1150  // This branch isn't strictly necessary (the kw_try case below would
1151  // do this too after the tok::at is parsed above). But be explicit.
1152  parseTryCatch();
1153  return;
1154  default:
1155  break;
1156  }
1157  break;
1158  case tok::kw_enum:
1159  // Ignore if this is part of "template <enum ...".
1160  if (Previous && Previous->is(tok::less)) {
1161  nextToken();
1162  break;
1163  }
1164 
1165  // parseEnum falls through and does not yet add an unwrapped line as an
1166  // enum definition can start a structural element.
1167  if (!parseEnum())
1168  break;
1169  // This only applies for C++.
1170  if (!Style.isCpp()) {
1171  addUnwrappedLine();
1172  return;
1173  }
1174  break;
1175  case tok::kw_typedef:
1176  nextToken();
1177  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1178  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1179  parseEnum();
1180  break;
1181  case tok::kw_struct:
1182  case tok::kw_union:
1183  case tok::kw_class:
1184  // parseRecord falls through and does not yet add an unwrapped line as a
1185  // record declaration or definition can start a structural element.
1186  parseRecord();
1187  // This does not apply for Java and JavaScript.
1188  if (Style.Language == FormatStyle::LK_Java ||
1190  if (FormatTok->is(tok::semi))
1191  nextToken();
1192  addUnwrappedLine();
1193  return;
1194  }
1195  break;
1196  case tok::period:
1197  nextToken();
1198  // In Java, classes have an implicit static member "class".
1199  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1200  FormatTok->is(tok::kw_class))
1201  nextToken();
1202  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1203  FormatTok->Tok.getIdentifierInfo())
1204  // JavaScript only has pseudo keywords, all keywords are allowed to
1205  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1206  nextToken();
1207  break;
1208  case tok::semi:
1209  nextToken();
1210  addUnwrappedLine();
1211  return;
1212  case tok::r_brace:
1213  addUnwrappedLine();
1214  return;
1215  case tok::l_paren:
1216  parseParens();
1217  break;
1218  case tok::kw_operator:
1219  nextToken();
1220  if (FormatTok->isBinaryOperator())
1221  nextToken();
1222  break;
1223  case tok::caret:
1224  nextToken();
1225  if (FormatTok->Tok.isAnyIdentifier() ||
1226  FormatTok->isSimpleTypeSpecifier())
1227  nextToken();
1228  if (FormatTok->is(tok::l_paren))
1229  parseParens();
1230  if (FormatTok->is(tok::l_brace))
1231  parseChildBlock();
1232  break;
1233  case tok::l_brace:
1234  if (!tryToParseBracedList()) {
1235  // A block outside of parentheses must be the last part of a
1236  // structural element.
1237  // FIXME: Figure out cases where this is not true, and add projections
1238  // for them (the one we know is missing are lambdas).
1239  if (Style.BraceWrapping.AfterFunction)
1240  addUnwrappedLine();
1241  FormatTok->Type = TT_FunctionLBrace;
1242  parseBlock(/*MustBeDeclaration=*/false);
1243  addUnwrappedLine();
1244  return;
1245  }
1246  // Otherwise this was a braced init list, and the structural
1247  // element continues.
1248  break;
1249  case tok::kw_try:
1250  // We arrive here when parsing function-try blocks.
1251  parseTryCatch();
1252  return;
1253  case tok::identifier: {
1254  if (FormatTok->is(TT_MacroBlockEnd)) {
1255  addUnwrappedLine();
1256  return;
1257  }
1258 
1259  // Function declarations (as opposed to function expressions) are parsed
1260  // on their own unwrapped line by continuing this loop. Function
1261  // expressions (functions that are not on their own line) must not create
1262  // a new unwrapped line, so they are special cased below.
1263  size_t TokenCount = Line->Tokens.size();
1264  if (Style.Language == FormatStyle::LK_JavaScript &&
1265  FormatTok->is(Keywords.kw_function) &&
1266  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1267  Keywords.kw_async)))) {
1268  tryToParseJSFunction();
1269  break;
1270  }
1271  if ((Style.Language == FormatStyle::LK_JavaScript ||
1272  Style.Language == FormatStyle::LK_Java) &&
1273  FormatTok->is(Keywords.kw_interface)) {
1274  if (Style.Language == FormatStyle::LK_JavaScript) {
1275  // In JavaScript/TypeScript, "interface" can be used as a standalone
1276  // identifier, e.g. in `var interface = 1;`. If "interface" is
1277  // followed by another identifier, it is very like to be an actual
1278  // interface declaration.
1279  unsigned StoredPosition = Tokens->getPosition();
1280  FormatToken *Next = Tokens->getNextToken();
1281  FormatTok = Tokens->setPosition(StoredPosition);
1282  if (Next && !mustBeJSIdent(Keywords, Next)) {
1283  nextToken();
1284  break;
1285  }
1286  }
1287  parseRecord();
1288  addUnwrappedLine();
1289  return;
1290  }
1291 
1292  // See if the following token should start a new unwrapped line.
1293  StringRef Text = FormatTok->TokenText;
1294  nextToken();
1295  if (Line->Tokens.size() == 1 &&
1296  // JS doesn't have macros, and within classes colons indicate fields,
1297  // not labels.
1299  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1300  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1301  parseLabel();
1302  return;
1303  }
1304  // Recognize function-like macro usages without trailing semicolon as
1305  // well as free-standing macros like Q_OBJECT.
1306  bool FunctionLike = FormatTok->is(tok::l_paren);
1307  if (FunctionLike)
1308  parseParens();
1309 
1310  bool FollowedByNewline =
1311  CommentsBeforeNextToken.empty()
1312  ? FormatTok->NewlinesBefore > 0
1313  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1314 
1315  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1316  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1317  addUnwrappedLine();
1318  return;
1319  }
1320  }
1321  break;
1322  }
1323  case tok::equal:
1324  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1325  // TT_JsFatArrow. The always start an expression or a child block if
1326  // followed by a curly.
1327  if (FormatTok->is(TT_JsFatArrow)) {
1328  nextToken();
1329  if (FormatTok->is(tok::l_brace))
1330  parseChildBlock();
1331  break;
1332  }
1333 
1334  nextToken();
1335  if (FormatTok->Tok.is(tok::l_brace)) {
1336  nextToken();
1337  parseBracedList();
1338  } else if (Style.Language == FormatStyle::LK_Proto &&
1339  FormatTok->Tok.is(tok::less)) {
1340  nextToken();
1341  parseBracedList(/*ContinueOnSemicolons=*/false,
1342  /*ClosingBraceKind=*/tok::greater);
1343  }
1344  break;
1345  case tok::l_square:
1346  parseSquare();
1347  break;
1348  case tok::kw_new:
1349  parseNew();
1350  break;
1351  default:
1352  nextToken();
1353  break;
1354  }
1355  } while (!eof());
1356 }
1357 
1358 bool UnwrappedLineParser::tryToParseLambda() {
1359  if (!Style.isCpp()) {
1360  nextToken();
1361  return false;
1362  }
1363  assert(FormatTok->is(tok::l_square));
1364  FormatToken &LSquare = *FormatTok;
1365  if (!tryToParseLambdaIntroducer())
1366  return false;
1367 
1368  while (FormatTok->isNot(tok::l_brace)) {
1369  if (FormatTok->isSimpleTypeSpecifier()) {
1370  nextToken();
1371  continue;
1372  }
1373  switch (FormatTok->Tok.getKind()) {
1374  case tok::l_brace:
1375  break;
1376  case tok::l_paren:
1377  parseParens();
1378  break;
1379  case tok::amp:
1380  case tok::star:
1381  case tok::kw_const:
1382  case tok::comma:
1383  case tok::less:
1384  case tok::greater:
1385  case tok::identifier:
1386  case tok::numeric_constant:
1387  case tok::coloncolon:
1388  case tok::kw_mutable:
1389  nextToken();
1390  break;
1391  case tok::arrow:
1392  FormatTok->Type = TT_LambdaArrow;
1393  nextToken();
1394  break;
1395  default:
1396  return true;
1397  }
1398  }
1399  LSquare.Type = TT_LambdaLSquare;
1400  parseChildBlock();
1401  return true;
1402 }
1403 
1404 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1405  const FormatToken *Previous = FormatTok->Previous;
1406  if (Previous &&
1407  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1408  tok::kw_delete, tok::l_square) ||
1409  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1410  Previous->isSimpleTypeSpecifier())) {
1411  nextToken();
1412  return false;
1413  }
1414  nextToken();
1415  if (FormatTok->is(tok::l_square)) {
1416  return false;
1417  }
1418  parseSquare(/*LambdaIntroducer=*/true);
1419  return true;
1420 }
1421 
1422 void UnwrappedLineParser::tryToParseJSFunction() {
1423  assert(FormatTok->is(Keywords.kw_function) ||
1424  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1425  if (FormatTok->is(Keywords.kw_async))
1426  nextToken();
1427  // Consume "function".
1428  nextToken();
1429 
1430  // Consume * (generator function). Treat it like C++'s overloaded operators.
1431  if (FormatTok->is(tok::star)) {
1432  FormatTok->Type = TT_OverloadedOperator;
1433  nextToken();
1434  }
1435 
1436  // Consume function name.
1437  if (FormatTok->is(tok::identifier))
1438  nextToken();
1439 
1440  if (FormatTok->isNot(tok::l_paren))
1441  return;
1442 
1443  // Parse formal parameter list.
1444  parseParens();
1445 
1446  if (FormatTok->is(tok::colon)) {
1447  // Parse a type definition.
1448  nextToken();
1449 
1450  // Eat the type declaration. For braced inline object types, balance braces,
1451  // otherwise just parse until finding an l_brace for the function body.
1452  if (FormatTok->is(tok::l_brace))
1453  tryToParseBracedList();
1454  else
1455  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1456  nextToken();
1457  }
1458 
1459  if (FormatTok->is(tok::semi))
1460  return;
1461 
1462  parseChildBlock();
1463 }
1464 
1465 bool UnwrappedLineParser::tryToParseBracedList() {
1466  if (FormatTok->BlockKind == BK_Unknown)
1467  calculateBraceTypes();
1468  assert(FormatTok->BlockKind != BK_Unknown);
1469  if (FormatTok->BlockKind == BK_Block)
1470  return false;
1471  nextToken();
1472  parseBracedList();
1473  return true;
1474 }
1475 
1476 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1477  tok::TokenKind ClosingBraceKind) {
1478  bool HasError = false;
1479 
1480  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1481  // replace this by using parseAssigmentExpression() inside.
1482  do {
1483  if (Style.Language == FormatStyle::LK_JavaScript) {
1484  if (FormatTok->is(Keywords.kw_function) ||
1485  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1486  tryToParseJSFunction();
1487  continue;
1488  }
1489  if (FormatTok->is(TT_JsFatArrow)) {
1490  nextToken();
1491  // Fat arrows can be followed by simple expressions or by child blocks
1492  // in curly braces.
1493  if (FormatTok->is(tok::l_brace)) {
1494  parseChildBlock();
1495  continue;
1496  }
1497  }
1498  if (FormatTok->is(tok::l_brace)) {
1499  // Could be a method inside of a braced list `{a() { return 1; }}`.
1500  if (tryToParseBracedList())
1501  continue;
1502  parseChildBlock();
1503  }
1504  }
1505  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1506  nextToken();
1507  return !HasError;
1508  }
1509  switch (FormatTok->Tok.getKind()) {
1510  case tok::caret:
1511  nextToken();
1512  if (FormatTok->is(tok::l_brace)) {
1513  parseChildBlock();
1514  }
1515  break;
1516  case tok::l_square:
1517  tryToParseLambda();
1518  break;
1519  case tok::l_paren:
1520  parseParens();
1521  // JavaScript can just have free standing methods and getters/setters in
1522  // object literals. Detect them by a "{" following ")".
1523  if (Style.Language == FormatStyle::LK_JavaScript) {
1524  if (FormatTok->is(tok::l_brace))
1525  parseChildBlock();
1526  break;
1527  }
1528  break;
1529  case tok::l_brace:
1530  // Assume there are no blocks inside a braced init list apart
1531  // from the ones we explicitly parse out (like lambdas).
1532  FormatTok->BlockKind = BK_BracedInit;
1533  nextToken();
1534  parseBracedList();
1535  break;
1536  case tok::less:
1537  if (Style.Language == FormatStyle::LK_Proto) {
1538  nextToken();
1539  parseBracedList(/*ContinueOnSemicolons=*/false,
1540  /*ClosingBraceKind=*/tok::greater);
1541  } else {
1542  nextToken();
1543  }
1544  break;
1545  case tok::semi:
1546  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1547  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1548  // used for error recovery if we have otherwise determined that this is
1549  // a braced list.
1550  if (Style.Language == FormatStyle::LK_JavaScript) {
1551  nextToken();
1552  break;
1553  }
1554  HasError = true;
1555  if (!ContinueOnSemicolons)
1556  return !HasError;
1557  nextToken();
1558  break;
1559  case tok::comma:
1560  nextToken();
1561  break;
1562  default:
1563  nextToken();
1564  break;
1565  }
1566  } while (!eof());
1567  return false;
1568 }
1569 
1570 void UnwrappedLineParser::parseParens() {
1571  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1572  nextToken();
1573  do {
1574  switch (FormatTok->Tok.getKind()) {
1575  case tok::l_paren:
1576  parseParens();
1577  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1578  parseChildBlock();
1579  break;
1580  case tok::r_paren:
1581  nextToken();
1582  return;
1583  case tok::r_brace:
1584  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1585  return;
1586  case tok::l_square:
1587  tryToParseLambda();
1588  break;
1589  case tok::l_brace:
1590  if (!tryToParseBracedList())
1591  parseChildBlock();
1592  break;
1593  case tok::at:
1594  nextToken();
1595  if (FormatTok->Tok.is(tok::l_brace)) {
1596  nextToken();
1597  parseBracedList();
1598  }
1599  break;
1600  case tok::kw_class:
1601  if (Style.Language == FormatStyle::LK_JavaScript)
1602  parseRecord(/*ParseAsExpr=*/true);
1603  else
1604  nextToken();
1605  break;
1606  case tok::identifier:
1607  if (Style.Language == FormatStyle::LK_JavaScript &&
1608  (FormatTok->is(Keywords.kw_function) ||
1609  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1610  tryToParseJSFunction();
1611  else
1612  nextToken();
1613  break;
1614  default:
1615  nextToken();
1616  break;
1617  }
1618  } while (!eof());
1619 }
1620 
1621 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1622  if (!LambdaIntroducer) {
1623  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1624  if (tryToParseLambda())
1625  return;
1626  }
1627  do {
1628  switch (FormatTok->Tok.getKind()) {
1629  case tok::l_paren:
1630  parseParens();
1631  break;
1632  case tok::r_square:
1633  nextToken();
1634  return;
1635  case tok::r_brace:
1636  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1637  return;
1638  case tok::l_square:
1639  parseSquare();
1640  break;
1641  case tok::l_brace: {
1642  if (!tryToParseBracedList())
1643  parseChildBlock();
1644  break;
1645  }
1646  case tok::at:
1647  nextToken();
1648  if (FormatTok->Tok.is(tok::l_brace)) {
1649  nextToken();
1650  parseBracedList();
1651  }
1652  break;
1653  default:
1654  nextToken();
1655  break;
1656  }
1657  } while (!eof());
1658 }
1659 
1660 void UnwrappedLineParser::parseIfThenElse() {
1661  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1662  nextToken();
1663  if (FormatTok->Tok.is(tok::kw_constexpr))
1664  nextToken();
1665  if (FormatTok->Tok.is(tok::l_paren))
1666  parseParens();
1667  bool NeedsUnwrappedLine = false;
1668  if (FormatTok->Tok.is(tok::l_brace)) {
1669  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1670  parseBlock(/*MustBeDeclaration=*/false);
1671  if (Style.BraceWrapping.BeforeElse)
1672  addUnwrappedLine();
1673  else
1674  NeedsUnwrappedLine = true;
1675  } else {
1676  addUnwrappedLine();
1677  ++Line->Level;
1678  parseStructuralElement();
1679  --Line->Level;
1680  }
1681  if (FormatTok->Tok.is(tok::kw_else)) {
1682  nextToken();
1683  if (FormatTok->Tok.is(tok::l_brace)) {
1684  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1685  parseBlock(/*MustBeDeclaration=*/false);
1686  addUnwrappedLine();
1687  } else if (FormatTok->Tok.is(tok::kw_if)) {
1688  parseIfThenElse();
1689  } else {
1690  addUnwrappedLine();
1691  ++Line->Level;
1692  parseStructuralElement();
1693  if (FormatTok->is(tok::eof))
1694  addUnwrappedLine();
1695  --Line->Level;
1696  }
1697  } else if (NeedsUnwrappedLine) {
1698  addUnwrappedLine();
1699  }
1700 }
1701 
1702 void UnwrappedLineParser::parseTryCatch() {
1703  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1704  nextToken();
1705  bool NeedsUnwrappedLine = false;
1706  if (FormatTok->is(tok::colon)) {
1707  // We are in a function try block, what comes is an initializer list.
1708  nextToken();
1709  while (FormatTok->is(tok::identifier)) {
1710  nextToken();
1711  if (FormatTok->is(tok::l_paren))
1712  parseParens();
1713  if (FormatTok->is(tok::comma))
1714  nextToken();
1715  }
1716  }
1717  // Parse try with resource.
1718  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1719  parseParens();
1720  }
1721  if (FormatTok->is(tok::l_brace)) {
1722  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1723  parseBlock(/*MustBeDeclaration=*/false);
1724  if (Style.BraceWrapping.BeforeCatch) {
1725  addUnwrappedLine();
1726  } else {
1727  NeedsUnwrappedLine = true;
1728  }
1729  } else if (!FormatTok->is(tok::kw_catch)) {
1730  // The C++ standard requires a compound-statement after a try.
1731  // If there's none, we try to assume there's a structuralElement
1732  // and try to continue.
1733  addUnwrappedLine();
1734  ++Line->Level;
1735  parseStructuralElement();
1736  --Line->Level;
1737  }
1738  while (1) {
1739  if (FormatTok->is(tok::at))
1740  nextToken();
1741  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1742  tok::kw___finally) ||
1743  ((Style.Language == FormatStyle::LK_Java ||
1745  FormatTok->is(Keywords.kw_finally)) ||
1746  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1747  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1748  break;
1749  nextToken();
1750  while (FormatTok->isNot(tok::l_brace)) {
1751  if (FormatTok->is(tok::l_paren)) {
1752  parseParens();
1753  continue;
1754  }
1755  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1756  return;
1757  nextToken();
1758  }
1759  NeedsUnwrappedLine = false;
1760  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1761  parseBlock(/*MustBeDeclaration=*/false);
1762  if (Style.BraceWrapping.BeforeCatch)
1763  addUnwrappedLine();
1764  else
1765  NeedsUnwrappedLine = true;
1766  }
1767  if (NeedsUnwrappedLine)
1768  addUnwrappedLine();
1769 }
1770 
1771 void UnwrappedLineParser::parseNamespace() {
1772  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1773 
1774  const FormatToken &InitialToken = *FormatTok;
1775  nextToken();
1776  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1777  nextToken();
1778  if (FormatTok->Tok.is(tok::l_brace)) {
1779  if (ShouldBreakBeforeBrace(Style, InitialToken))
1780  addUnwrappedLine();
1781 
1782  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1784  DeclarationScopeStack.size() > 1);
1785  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1786  // Munch the semicolon after a namespace. This is more common than one would
1787  // think. Puttin the semicolon into its own line is very ugly.
1788  if (FormatTok->Tok.is(tok::semi))
1789  nextToken();
1790  addUnwrappedLine();
1791  }
1792  // FIXME: Add error handling.
1793 }
1794 
1795 void UnwrappedLineParser::parseNew() {
1796  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1797  nextToken();
1798  if (Style.Language != FormatStyle::LK_Java)
1799  return;
1800 
1801  // In Java, we can parse everything up to the parens, which aren't optional.
1802  do {
1803  // There should not be a ;, { or } before the new's open paren.
1804  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1805  return;
1806 
1807  // Consume the parens.
1808  if (FormatTok->is(tok::l_paren)) {
1809  parseParens();
1810 
1811  // If there is a class body of an anonymous class, consume that as child.
1812  if (FormatTok->is(tok::l_brace))
1813  parseChildBlock();
1814  return;
1815  }
1816  nextToken();
1817  } while (!eof());
1818 }
1819 
1820 void UnwrappedLineParser::parseForOrWhileLoop() {
1821  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1822  "'for', 'while' or foreach macro expected");
1823  nextToken();
1824  // JS' for await ( ...
1825  if (Style.Language == FormatStyle::LK_JavaScript &&
1826  FormatTok->is(Keywords.kw_await))
1827  nextToken();
1828  if (FormatTok->Tok.is(tok::l_paren))
1829  parseParens();
1830  if (FormatTok->Tok.is(tok::l_brace)) {
1831  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1832  parseBlock(/*MustBeDeclaration=*/false);
1833  addUnwrappedLine();
1834  } else {
1835  addUnwrappedLine();
1836  ++Line->Level;
1837  parseStructuralElement();
1838  --Line->Level;
1839  }
1840 }
1841 
1842 void UnwrappedLineParser::parseDoWhile() {
1843  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1844  nextToken();
1845  if (FormatTok->Tok.is(tok::l_brace)) {
1846  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1847  parseBlock(/*MustBeDeclaration=*/false);
1848  if (Style.BraceWrapping.IndentBraces)
1849  addUnwrappedLine();
1850  } else {
1851  addUnwrappedLine();
1852  ++Line->Level;
1853  parseStructuralElement();
1854  --Line->Level;
1855  }
1856 
1857  // FIXME: Add error handling.
1858  if (!FormatTok->Tok.is(tok::kw_while)) {
1859  addUnwrappedLine();
1860  return;
1861  }
1862 
1863  nextToken();
1864  parseStructuralElement();
1865 }
1866 
1867 void UnwrappedLineParser::parseLabel() {
1868  nextToken();
1869  unsigned OldLineLevel = Line->Level;
1870  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1871  --Line->Level;
1872  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1873  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1874  parseBlock(/*MustBeDeclaration=*/false);
1875  if (FormatTok->Tok.is(tok::kw_break)) {
1877  addUnwrappedLine();
1878  parseStructuralElement();
1879  }
1880  addUnwrappedLine();
1881  } else {
1882  if (FormatTok->is(tok::semi))
1883  nextToken();
1884  addUnwrappedLine();
1885  }
1886  Line->Level = OldLineLevel;
1887  if (FormatTok->isNot(tok::l_brace)) {
1888  parseStructuralElement();
1889  addUnwrappedLine();
1890  }
1891 }
1892 
1893 void UnwrappedLineParser::parseCaseLabel() {
1894  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1895  // FIXME: fix handling of complex expressions here.
1896  do {
1897  nextToken();
1898  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1899  parseLabel();
1900 }
1901 
1902 void UnwrappedLineParser::parseSwitch() {
1903  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1904  nextToken();
1905  if (FormatTok->Tok.is(tok::l_paren))
1906  parseParens();
1907  if (FormatTok->Tok.is(tok::l_brace)) {
1908  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1909  parseBlock(/*MustBeDeclaration=*/false);
1910  addUnwrappedLine();
1911  } else {
1912  addUnwrappedLine();
1913  ++Line->Level;
1914  parseStructuralElement();
1915  --Line->Level;
1916  }
1917 }
1918 
1919 void UnwrappedLineParser::parseAccessSpecifier() {
1920  nextToken();
1921  // Understand Qt's slots.
1922  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1923  nextToken();
1924  // Otherwise, we don't know what it is, and we'd better keep the next token.
1925  if (FormatTok->Tok.is(tok::colon))
1926  nextToken();
1927  addUnwrappedLine();
1928 }
1929 
1930 bool UnwrappedLineParser::parseEnum() {
1931  // Won't be 'enum' for NS_ENUMs.
1932  if (FormatTok->Tok.is(tok::kw_enum))
1933  nextToken();
1934 
1935  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1936  // declarations. An "enum" keyword followed by a colon would be a syntax
1937  // error and thus assume it is just an identifier.
1938  if (Style.Language == FormatStyle::LK_JavaScript &&
1939  FormatTok->isOneOf(tok::colon, tok::question))
1940  return false;
1941 
1942  // Eat up enum class ...
1943  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1944  nextToken();
1945 
1946  while (FormatTok->Tok.getIdentifierInfo() ||
1947  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1948  tok::greater, tok::comma, tok::question)) {
1949  nextToken();
1950  // We can have macros or attributes in between 'enum' and the enum name.
1951  if (FormatTok->is(tok::l_paren))
1952  parseParens();
1953  if (FormatTok->is(tok::identifier)) {
1954  nextToken();
1955  // If there are two identifiers in a row, this is likely an elaborate
1956  // return type. In Java, this can be "implements", etc.
1957  if (Style.isCpp() && FormatTok->is(tok::identifier))
1958  return false;
1959  }
1960  }
1961 
1962  // Just a declaration or something is wrong.
1963  if (FormatTok->isNot(tok::l_brace))
1964  return true;
1965  FormatTok->BlockKind = BK_Block;
1966 
1967  if (Style.Language == FormatStyle::LK_Java) {
1968  // Java enums are different.
1969  parseJavaEnumBody();
1970  return true;
1971  }
1972  if (Style.Language == FormatStyle::LK_Proto) {
1973  parseBlock(/*MustBeDeclaration=*/true);
1974  return true;
1975  }
1976 
1977  // Parse enum body.
1978  nextToken();
1979  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1980  if (HasError) {
1981  if (FormatTok->is(tok::semi))
1982  nextToken();
1983  addUnwrappedLine();
1984  }
1985  return true;
1986 
1987  // There is no addUnwrappedLine() here so that we fall through to parsing a
1988  // structural element afterwards. Thus, in "enum A {} n, m;",
1989  // "} n, m;" will end up in one unwrapped line.
1990 }
1991 
1992 void UnwrappedLineParser::parseJavaEnumBody() {
1993  // Determine whether the enum is simple, i.e. does not have a semicolon or
1994  // constants with class bodies. Simple enums can be formatted like braced
1995  // lists, contracted to a single line, etc.
1996  unsigned StoredPosition = Tokens->getPosition();
1997  bool IsSimple = true;
1998  FormatToken *Tok = Tokens->getNextToken();
1999  while (Tok) {
2000  if (Tok->is(tok::r_brace))
2001  break;
2002  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2003  IsSimple = false;
2004  break;
2005  }
2006  // FIXME: This will also mark enums with braces in the arguments to enum
2007  // constants as "not simple". This is probably fine in practice, though.
2008  Tok = Tokens->getNextToken();
2009  }
2010  FormatTok = Tokens->setPosition(StoredPosition);
2011 
2012  if (IsSimple) {
2013  nextToken();
2014  parseBracedList();
2015  addUnwrappedLine();
2016  return;
2017  }
2018 
2019  // Parse the body of a more complex enum.
2020  // First add a line for everything up to the "{".
2021  nextToken();
2022  addUnwrappedLine();
2023  ++Line->Level;
2024 
2025  // Parse the enum constants.
2026  while (FormatTok) {
2027  if (FormatTok->is(tok::l_brace)) {
2028  // Parse the constant's class body.
2029  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2030  /*MunchSemi=*/false);
2031  } else if (FormatTok->is(tok::l_paren)) {
2032  parseParens();
2033  } else if (FormatTok->is(tok::comma)) {
2034  nextToken();
2035  addUnwrappedLine();
2036  } else if (FormatTok->is(tok::semi)) {
2037  nextToken();
2038  addUnwrappedLine();
2039  break;
2040  } else if (FormatTok->is(tok::r_brace)) {
2041  addUnwrappedLine();
2042  break;
2043  } else {
2044  nextToken();
2045  }
2046  }
2047 
2048  // Parse the class body after the enum's ";" if any.
2049  parseLevel(/*HasOpeningBrace=*/true);
2050  nextToken();
2051  --Line->Level;
2052  addUnwrappedLine();
2053 }
2054 
2055 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2056  const FormatToken &InitialToken = *FormatTok;
2057  nextToken();
2058 
2059  // The actual identifier can be a nested name specifier, and in macros
2060  // it is often token-pasted.
2061  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2062  tok::kw___attribute, tok::kw___declspec,
2063  tok::kw_alignas) ||
2064  ((Style.Language == FormatStyle::LK_Java ||
2066  FormatTok->isOneOf(tok::period, tok::comma))) {
2067  if (Style.Language == FormatStyle::LK_JavaScript &&
2068  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2069  // JavaScript/TypeScript supports inline object types in
2070  // extends/implements positions:
2071  // class Foo implements {bar: number} { }
2072  nextToken();
2073  if (FormatTok->is(tok::l_brace)) {
2074  tryToParseBracedList();
2075  continue;
2076  }
2077  }
2078  bool IsNonMacroIdentifier =
2079  FormatTok->is(tok::identifier) &&
2080  FormatTok->TokenText != FormatTok->TokenText.upper();
2081  nextToken();
2082  // We can have macros or attributes in between 'class' and the class name.
2083  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2084  parseParens();
2085  }
2086 
2087  // Note that parsing away template declarations here leads to incorrectly
2088  // accepting function declarations as record declarations.
2089  // In general, we cannot solve this problem. Consider:
2090  // class A<int> B() {}
2091  // which can be a function definition or a class definition when B() is a
2092  // macro. If we find enough real-world cases where this is a problem, we
2093  // can parse for the 'template' keyword in the beginning of the statement,
2094  // and thus rule out the record production in case there is no template
2095  // (this would still leave us with an ambiguity between template function
2096  // and class declarations).
2097  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2098  while (!eof()) {
2099  if (FormatTok->is(tok::l_brace)) {
2100  calculateBraceTypes(/*ExpectClassBody=*/true);
2101  if (!tryToParseBracedList())
2102  break;
2103  }
2104  if (FormatTok->Tok.is(tok::semi))
2105  return;
2106  nextToken();
2107  }
2108  }
2109  if (FormatTok->Tok.is(tok::l_brace)) {
2110  if (ParseAsExpr) {
2111  parseChildBlock();
2112  } else {
2113  if (ShouldBreakBeforeBrace(Style, InitialToken))
2114  addUnwrappedLine();
2115 
2116  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2117  /*MunchSemi=*/false);
2118  }
2119  }
2120  // There is no addUnwrappedLine() here so that we fall through to parsing a
2121  // structural element afterwards. Thus, in "class A {} n, m;",
2122  // "} n, m;" will end up in one unwrapped line.
2123 }
2124 
2125 void UnwrappedLineParser::parseObjCMethod() {
2126  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2127  "'(' or identifier expected.");
2128  do {
2129  if (FormatTok->Tok.is(tok::semi)) {
2130  nextToken();
2131  addUnwrappedLine();
2132  return;
2133  } else if (FormatTok->Tok.is(tok::l_brace)) {
2134  parseBlock(/*MustBeDeclaration=*/false);
2135  addUnwrappedLine();
2136  return;
2137  } else {
2138  nextToken();
2139  }
2140  } while (!eof());
2141 }
2142 
2143 void UnwrappedLineParser::parseObjCProtocolList() {
2144  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2145  do {
2146  nextToken();
2147  // Early exit in case someone forgot a close angle.
2148  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2149  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2150  return;
2151  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2152  nextToken(); // Skip '>'.
2153 }
2154 
2155 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2156  do {
2157  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2158  nextToken();
2159  addUnwrappedLine();
2160  break;
2161  }
2162  if (FormatTok->is(tok::l_brace)) {
2163  parseBlock(/*MustBeDeclaration=*/false);
2164  // In ObjC interfaces, nothing should be following the "}".
2165  addUnwrappedLine();
2166  } else if (FormatTok->is(tok::r_brace)) {
2167  // Ignore stray "}". parseStructuralElement doesn't consume them.
2168  nextToken();
2169  addUnwrappedLine();
2170  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2171  nextToken();
2172  parseObjCMethod();
2173  } else {
2174  parseStructuralElement();
2175  }
2176  } while (!eof());
2177 }
2178 
2179 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2180  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2181  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2182  nextToken();
2183  nextToken(); // interface name
2184 
2185  // @interface can be followed by a lightweight generic
2186  // specialization list, then either a base class or a category.
2187  if (FormatTok->Tok.is(tok::less)) {
2188  // Unlike protocol lists, generic parameterizations support
2189  // nested angles:
2190  //
2191  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2192  // NSObject <NSCopying, NSSecureCoding>
2193  //
2194  // so we need to count how many open angles we have left.
2195  unsigned NumOpenAngles = 1;
2196  do {
2197  nextToken();
2198  // Early exit in case someone forgot a close angle.
2199  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2200  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2201  break;
2202  if (FormatTok->Tok.is(tok::less))
2203  ++NumOpenAngles;
2204  else if (FormatTok->Tok.is(tok::greater)) {
2205  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2206  --NumOpenAngles;
2207  }
2208  } while (!eof() && NumOpenAngles != 0);
2209  nextToken(); // Skip '>'.
2210  }
2211  if (FormatTok->Tok.is(tok::colon)) {
2212  nextToken();
2213  nextToken(); // base class name
2214  } else if (FormatTok->Tok.is(tok::l_paren))
2215  // Skip category, if present.
2216  parseParens();
2217 
2218  if (FormatTok->Tok.is(tok::less))
2219  parseObjCProtocolList();
2220 
2221  if (FormatTok->Tok.is(tok::l_brace)) {
2223  addUnwrappedLine();
2224  parseBlock(/*MustBeDeclaration=*/true);
2225  }
2226 
2227  // With instance variables, this puts '}' on its own line. Without instance
2228  // variables, this ends the @interface line.
2229  addUnwrappedLine();
2230 
2231  parseObjCUntilAtEnd();
2232 }
2233 
2234 // Returns true for the declaration/definition form of @protocol,
2235 // false for the expression form.
2236 bool UnwrappedLineParser::parseObjCProtocol() {
2237  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2238  nextToken();
2239 
2240  if (FormatTok->is(tok::l_paren))
2241  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2242  return false;
2243 
2244  // The definition/declaration form,
2245  // @protocol Foo
2246  // - (int)someMethod;
2247  // @end
2248 
2249  nextToken(); // protocol name
2250 
2251  if (FormatTok->Tok.is(tok::less))
2252  parseObjCProtocolList();
2253 
2254  // Check for protocol declaration.
2255  if (FormatTok->Tok.is(tok::semi)) {
2256  nextToken();
2257  addUnwrappedLine();
2258  return true;
2259  }
2260 
2261  addUnwrappedLine();
2262  parseObjCUntilAtEnd();
2263  return true;
2264 }
2265 
2266 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2267  bool IsImport = FormatTok->is(Keywords.kw_import);
2268  assert(IsImport || FormatTok->is(tok::kw_export));
2269  nextToken();
2270 
2271  // Consume the "default" in "export default class/function".
2272  if (FormatTok->is(tok::kw_default))
2273  nextToken();
2274 
2275  // Consume "async function", "function" and "default function", so that these
2276  // get parsed as free-standing JS functions, i.e. do not require a trailing
2277  // semicolon.
2278  if (FormatTok->is(Keywords.kw_async))
2279  nextToken();
2280  if (FormatTok->is(Keywords.kw_function)) {
2281  nextToken();
2282  return;
2283  }
2284 
2285  // For imports, `export *`, `export {...}`, consume the rest of the line up
2286  // to the terminating `;`. For everything else, just return and continue
2287  // parsing the structural element, i.e. the declaration or expression for
2288  // `export default`.
2289  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2290  !FormatTok->isStringLiteral())
2291  return;
2292 
2293  while (!eof()) {
2294  if (FormatTok->is(tok::semi))
2295  return;
2296  if (Line->Tokens.empty()) {
2297  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2298  // import statement should terminate.
2299  return;
2300  }
2301  if (FormatTok->is(tok::l_brace)) {
2302  FormatTok->BlockKind = BK_Block;
2303  nextToken();
2304  parseBracedList();
2305  } else {
2306  nextToken();
2307  }
2308  }
2309 }
2310 
2311 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2312  StringRef Prefix = "") {
2313  llvm::dbgs() << Prefix << "Line(" << Line.Level
2314  << ", FSC=" << Line.FirstStartColumn << ")"
2315  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2316  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2317  E = Line.Tokens.end();
2318  I != E; ++I) {
2319  llvm::dbgs() << I->Tok->Tok.getName() << "["
2320  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2321  << "] ";
2322  }
2323  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2324  E = Line.Tokens.end();
2325  I != E; ++I) {
2326  const UnwrappedLineNode &Node = *I;
2328  I = Node.Children.begin(),
2329  E = Node.Children.end();
2330  I != E; ++I) {
2331  printDebugInfo(*I, "\nChild: ");
2332  }
2333  }
2334  llvm::dbgs() << "\n";
2335 }
2336 
2337 void UnwrappedLineParser::addUnwrappedLine() {
2338  if (Line->Tokens.empty())
2339  return;
2340  LLVM_DEBUG({
2341  if (CurrentLines == &Lines)
2342  printDebugInfo(*Line);
2343  });
2344  CurrentLines->push_back(std::move(*Line));
2345  Line->Tokens.clear();
2346  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2347  Line->FirstStartColumn = 0;
2348  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2349  CurrentLines->append(
2350  std::make_move_iterator(PreprocessorDirectives.begin()),
2351  std::make_move_iterator(PreprocessorDirectives.end()));
2352  PreprocessorDirectives.clear();
2353  }
2354  // Disconnect the current token from the last token on the previous line.
2355  FormatTok->Previous = nullptr;
2356 }
2357 
2358 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2359 
2360 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2361  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2362  FormatTok.NewlinesBefore > 0;
2363 }
2364 
2365 // Checks if \p FormatTok is a line comment that continues the line comment
2366 // section on \p Line.
2367 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2368  const UnwrappedLine &Line,
2369  llvm::Regex &CommentPragmasRegex) {
2370  if (Line.Tokens.empty())
2371  return false;
2372 
2373  StringRef IndentContent = FormatTok.TokenText;
2374  if (FormatTok.TokenText.startswith("//") ||
2375  FormatTok.TokenText.startswith("/*"))
2376  IndentContent = FormatTok.TokenText.substr(2);
2377  if (CommentPragmasRegex.match(IndentContent))
2378  return false;
2379 
2380  // If Line starts with a line comment, then FormatTok continues the comment
2381  // section if its original column is greater or equal to the original start
2382  // column of the line.
2383  //
2384  // Define the min column token of a line as follows: if a line ends in '{' or
2385  // contains a '{' followed by a line comment, then the min column token is
2386  // that '{'. Otherwise, the min column token of the line is the first token of
2387  // the line.
2388  //
2389  // If Line starts with a token other than a line comment, then FormatTok
2390  // continues the comment section if its original column is greater than the
2391  // original start column of the min column token of the line.
2392  //
2393  // For example, the second line comment continues the first in these cases:
2394  //
2395  // // first line
2396  // // second line
2397  //
2398  // and:
2399  //
2400  // // first line
2401  // // second line
2402  //
2403  // and:
2404  //
2405  // int i; // first line
2406  // // second line
2407  //
2408  // and:
2409  //
2410  // do { // first line
2411  // // second line
2412  // int i;
2413  // } while (true);
2414  //
2415  // and:
2416  //
2417  // enum {
2418  // a, // first line
2419  // // second line
2420  // b
2421  // };
2422  //
2423  // The second line comment doesn't continue the first in these cases:
2424  //
2425  // // first line
2426  // // second line
2427  //
2428  // and:
2429  //
2430  // int i; // first line
2431  // // second line
2432  //
2433  // and:
2434  //
2435  // do { // first line
2436  // // second line
2437  // int i;
2438  // } while (true);
2439  //
2440  // and:
2441  //
2442  // enum {
2443  // a, // first line
2444  // // second line
2445  // };
2446  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2447 
2448  // Scan for '{//'. If found, use the column of '{' as a min column for line
2449  // comment section continuation.
2450  const FormatToken *PreviousToken = nullptr;
2451  for (const UnwrappedLineNode &Node : Line.Tokens) {
2452  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2453  isLineComment(*Node.Tok)) {
2454  MinColumnToken = PreviousToken;
2455  break;
2456  }
2457  PreviousToken = Node.Tok;
2458 
2459  // Grab the last newline preceding a token in this unwrapped line.
2460  if (Node.Tok->NewlinesBefore > 0) {
2461  MinColumnToken = Node.Tok;
2462  }
2463  }
2464  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2465  MinColumnToken = PreviousToken;
2466  }
2467 
2468  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2469  MinColumnToken);
2470 }
2471 
2472 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2473  bool JustComments = Line->Tokens.empty();
2475  I = CommentsBeforeNextToken.begin(),
2476  E = CommentsBeforeNextToken.end();
2477  I != E; ++I) {
2478  // Line comments that belong to the same line comment section are put on the
2479  // same line since later we might want to reflow content between them.
2480  // Additional fine-grained breaking of line comment sections is controlled
2481  // by the class BreakableLineCommentSection in case it is desirable to keep
2482  // several line comment sections in the same unwrapped line.
2483  //
2484  // FIXME: Consider putting separate line comment sections as children to the
2485  // unwrapped line instead.
2487  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2488  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2489  addUnwrappedLine();
2490  pushToken(*I);
2491  }
2492  if (NewlineBeforeNext && JustComments)
2493  addUnwrappedLine();
2494  CommentsBeforeNextToken.clear();
2495 }
2496 
2497 void UnwrappedLineParser::nextToken(int LevelDifference) {
2498  if (eof())
2499  return;
2500  flushComments(isOnNewLine(*FormatTok));
2501  pushToken(FormatTok);
2502  FormatToken *Previous = FormatTok;
2503  if (Style.Language != FormatStyle::LK_JavaScript)
2504  readToken(LevelDifference);
2505  else
2506  readTokenWithJavaScriptASI();
2507  FormatTok->Previous = Previous;
2508 }
2509 
2510 void UnwrappedLineParser::distributeComments(
2511  const SmallVectorImpl<FormatToken *> &Comments,
2512  const FormatToken *NextTok) {
2513  // Whether or not a line comment token continues a line is controlled by
2514  // the method continuesLineCommentSection, with the following caveat:
2515  //
2516  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2517  // that each comment line from the trail is aligned with the next token, if
2518  // the next token exists. If a trail exists, the beginning of the maximal
2519  // trail is marked as a start of a new comment section.
2520  //
2521  // For example in this code:
2522  //
2523  // int a; // line about a
2524  // // line 1 about b
2525  // // line 2 about b
2526  // int b;
2527  //
2528  // the two lines about b form a maximal trail, so there are two sections, the
2529  // first one consisting of the single comment "// line about a" and the
2530  // second one consisting of the next two comments.
2531  if (Comments.empty())
2532  return;
2533  bool ShouldPushCommentsInCurrentLine = true;
2534  bool HasTrailAlignedWithNextToken = false;
2535  unsigned StartOfTrailAlignedWithNextToken = 0;
2536  if (NextTok) {
2537  // We are skipping the first element intentionally.
2538  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2539  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2540  HasTrailAlignedWithNextToken = true;
2541  StartOfTrailAlignedWithNextToken = i;
2542  }
2543  }
2544  }
2545  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2546  FormatToken *FormatTok = Comments[i];
2547  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2548  FormatTok->ContinuesLineCommentSection = false;
2549  } else {
2550  FormatTok->ContinuesLineCommentSection =
2551  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2552  }
2553  if (!FormatTok->ContinuesLineCommentSection &&
2554  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2555  ShouldPushCommentsInCurrentLine = false;
2556  }
2557  if (ShouldPushCommentsInCurrentLine) {
2558  pushToken(FormatTok);
2559  } else {
2560  CommentsBeforeNextToken.push_back(FormatTok);
2561  }
2562  }
2563 }
2564 
2565 void UnwrappedLineParser::readToken(int LevelDifference) {
2567  do {
2568  FormatTok = Tokens->getNextToken();
2569  assert(FormatTok);
2570  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2571  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2572  distributeComments(Comments, FormatTok);
2573  Comments.clear();
2574  // If there is an unfinished unwrapped line, we flush the preprocessor
2575  // directives only after that unwrapped line was finished later.
2576  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2577  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2578  assert((LevelDifference >= 0 ||
2579  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2580  "LevelDifference makes Line->Level negative");
2581  Line->Level += LevelDifference;
2582  // Comments stored before the preprocessor directive need to be output
2583  // before the preprocessor directive, at the same level as the
2584  // preprocessor directive, as we consider them to apply to the directive.
2585  flushComments(isOnNewLine(*FormatTok));
2586  parsePPDirective();
2587  }
2588  while (FormatTok->Type == TT_ConflictStart ||
2589  FormatTok->Type == TT_ConflictEnd ||
2590  FormatTok->Type == TT_ConflictAlternative) {
2591  if (FormatTok->Type == TT_ConflictStart) {
2592  conditionalCompilationStart(/*Unreachable=*/false);
2593  } else if (FormatTok->Type == TT_ConflictAlternative) {
2594  conditionalCompilationAlternative();
2595  } else if (FormatTok->Type == TT_ConflictEnd) {
2596  conditionalCompilationEnd();
2597  }
2598  FormatTok = Tokens->getNextToken();
2599  FormatTok->MustBreakBefore = true;
2600  }
2601 
2602  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2603  !Line->InPPDirective) {
2604  continue;
2605  }
2606 
2607  if (!FormatTok->Tok.is(tok::comment)) {
2608  distributeComments(Comments, FormatTok);
2609  Comments.clear();
2610  return;
2611  }
2612 
2613  Comments.push_back(FormatTok);
2614  } while (!eof());
2615 
2616  distributeComments(Comments, nullptr);
2617  Comments.clear();
2618 }
2619 
2620 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2621  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2622  if (MustBreakBeforeNextToken) {
2623  Line->Tokens.back().Tok->MustBreakBefore = true;
2624  MustBreakBeforeNextToken = false;
2625  }
2626 }
2627 
2628 } // end namespace format
2629 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:728
Indent in all namespaces.
Definition: Format.h:1278
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c.h:60
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:127
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1191
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1070
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:218
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:56
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:742
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:159
Does not indent any directives.
Definition: Format.h:1082
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:402
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:1095
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:133
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:370
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:700
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:774
Should be used for Java.
Definition: Format.h:1184
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:287
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:320
static void hash_combine(std::size_t &seed, const T &v)
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1282
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:493
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:313
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1186
ContinuationIndenter * Indenter
const AnnotatedLine * Line
const FunctionProtoType * T
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:823
bool AfterFunction
Wrap function definitions.
Definition: Format.h:680
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
SourceLocation getEnd() const
do v
Definition: arm_acle.h:78
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:304
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:649
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:140
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:171
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:48
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1268
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1201
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:302
Should be used for TableGen code.
Definition: Format.h:1193
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:714
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:325
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:652
Indents directives after the hash.
Definition: Format.h:1091
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:61
Represents a complete lambda introducer.
Definition: DeclSpec.h:2542
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:165
bool AfterClass
Wrap class definitions.
Definition: Format.h:634
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1196
StringRef Text
Definition: Format.cpp:1599
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:281
bool isStringLiteral() const
Definition: FormatToken.h:336
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:696
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:137
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:177
const FormatStyle & Style