clang  7.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = llvm::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
178  Parser->addUnwrappedLine();
179  if (Style.BraceWrapping.IndentBraces)
180  ++LineLevel;
181  }
182  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
183 
184 private:
185  unsigned &LineLevel;
186  unsigned OldLineLevel;
187 };
188 
189 namespace {
190 
191 class IndexedTokenSource : public FormatTokenSource {
192 public:
193  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
194  : Tokens(Tokens), Position(-1) {}
195 
196  FormatToken *getNextToken() override {
197  ++Position;
198  return Tokens[Position];
199  }
200 
201  unsigned getPosition() override {
202  assert(Position >= 0);
203  return Position;
204  }
205 
206  FormatToken *setPosition(unsigned P) override {
207  Position = P;
208  return Tokens[Position];
209  }
210 
211  void reset() { Position = -1; }
212 
213 private:
215  int Position;
216 };
217 
218 } // end anonymous namespace
219 
221  const AdditionalKeywords &Keywords,
222  unsigned FirstStartColumn,
224  UnwrappedLineConsumer &Callback)
225  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
226  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
227  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
228  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
229  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
230  ? IG_Rejected
231  : IG_Inited),
232  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
233 
234 void UnwrappedLineParser::reset() {
235  PPBranchLevel = -1;
236  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
237  ? IG_Rejected
238  : IG_Inited;
239  IncludeGuardToken = nullptr;
240  Line.reset(new UnwrappedLine);
241  CommentsBeforeNextToken.clear();
242  FormatTok = nullptr;
243  MustBreakBeforeNextToken = false;
244  PreprocessorDirectives.clear();
245  CurrentLines = &Lines;
246  DeclarationScopeStack.clear();
247  PPStack.clear();
248  Line->FirstStartColumn = FirstStartColumn;
249 }
250 
252  IndexedTokenSource TokenSource(AllTokens);
253  Line->FirstStartColumn = FirstStartColumn;
254  do {
255  LLVM_DEBUG(llvm::dbgs() << "----\n");
256  reset();
257  Tokens = &TokenSource;
258  TokenSource.reset();
259 
260  readToken();
261  parseFile();
262 
263  // If we found an include guard then all preprocessor directives (other than
264  // the guard) are over-indented by one.
265  if (IncludeGuard == IG_Found)
266  for (auto &Line : Lines)
267  if (Line.InPPDirective && Line.Level > 0)
268  --Line.Level;
269 
270  // Create line with eof token.
271  pushToken(FormatTok);
272  addUnwrappedLine();
273 
274  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
275  E = Lines.end();
276  I != E; ++I) {
277  Callback.consumeUnwrappedLine(*I);
278  }
279  Callback.finishRun();
280  Lines.clear();
281  while (!PPLevelBranchIndex.empty() &&
282  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
283  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
284  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
285  }
286  if (!PPLevelBranchIndex.empty()) {
287  ++PPLevelBranchIndex.back();
288  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
289  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
290  }
291  } while (!PPLevelBranchIndex.empty());
292 }
293 
294 void UnwrappedLineParser::parseFile() {
295  // The top-level context in a file always has declarations, except for pre-
296  // processor directives and JavaScript files.
297  bool MustBeDeclaration =
298  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
299  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
300  MustBeDeclaration);
301  if (Style.Language == FormatStyle::LK_TextProto)
302  parseBracedList();
303  else
304  parseLevel(/*HasOpeningBrace=*/false);
305  // Make sure to format the remaining tokens.
306  //
307  // LK_TextProto is special since its top-level is parsed as the body of a
308  // braced list, which does not necessarily have natural line separators such
309  // as a semicolon. Comments after the last entry that have been determined to
310  // not belong to that line, as in:
311  // key: value
312  // // endfile comment
313  // do not have a chance to be put on a line of their own until this point.
314  // Here we add this newline before end-of-file comments.
315  if (Style.Language == FormatStyle::LK_TextProto &&
316  !CommentsBeforeNextToken.empty())
317  addUnwrappedLine();
318  flushComments(true);
319  addUnwrappedLine();
320 }
321 
322 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
323  bool SwitchLabelEncountered = false;
324  do {
325  tok::TokenKind kind = FormatTok->Tok.getKind();
326  if (FormatTok->Type == TT_MacroBlockBegin) {
327  kind = tok::l_brace;
328  } else if (FormatTok->Type == TT_MacroBlockEnd) {
329  kind = tok::r_brace;
330  }
331 
332  switch (kind) {
333  case tok::comment:
334  nextToken();
335  addUnwrappedLine();
336  break;
337  case tok::l_brace:
338  // FIXME: Add parameter whether this can happen - if this happens, we must
339  // be in a non-declaration context.
340  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
341  continue;
342  parseBlock(/*MustBeDeclaration=*/false);
343  addUnwrappedLine();
344  break;
345  case tok::r_brace:
346  if (HasOpeningBrace)
347  return;
348  nextToken();
349  addUnwrappedLine();
350  break;
351  case tok::kw_default: {
352  unsigned StoredPosition = Tokens->getPosition();
353  FormatToken *Next = Tokens->getNextToken();
354  FormatTok = Tokens->setPosition(StoredPosition);
355  if (Next && Next->isNot(tok::colon)) {
356  // default not followed by ':' is not a case label; treat it like
357  // an identifier.
358  parseStructuralElement();
359  break;
360  }
361  // Else, if it is 'default:', fall through to the case handling.
362  LLVM_FALLTHROUGH;
363  }
364  case tok::kw_case:
365  if (Style.Language == FormatStyle::LK_JavaScript &&
366  Line->MustBeDeclaration) {
367  // A 'case: string' style field declaration.
368  parseStructuralElement();
369  break;
370  }
371  if (!SwitchLabelEncountered &&
372  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
373  ++Line->Level;
374  SwitchLabelEncountered = true;
375  parseStructuralElement();
376  break;
377  default:
378  parseStructuralElement();
379  break;
380  }
381  } while (!eof());
382 }
383 
384 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
385  // We'll parse forward through the tokens until we hit
386  // a closing brace or eof - note that getNextToken() will
387  // parse macros, so this will magically work inside macro
388  // definitions, too.
389  unsigned StoredPosition = Tokens->getPosition();
390  FormatToken *Tok = FormatTok;
391  const FormatToken *PrevTok = Tok->Previous;
392  // Keep a stack of positions of lbrace tokens. We will
393  // update information about whether an lbrace starts a
394  // braced init list or a different block during the loop.
395  SmallVector<FormatToken *, 8> LBraceStack;
396  assert(Tok->Tok.is(tok::l_brace));
397  do {
398  // Get next non-comment token.
399  FormatToken *NextTok;
400  unsigned ReadTokens = 0;
401  do {
402  NextTok = Tokens->getNextToken();
403  ++ReadTokens;
404  } while (NextTok->is(tok::comment));
405 
406  switch (Tok->Tok.getKind()) {
407  case tok::l_brace:
408  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
409  if (PrevTok->isOneOf(tok::colon, tok::less))
410  // A ':' indicates this code is in a type, or a braced list
411  // following a label in an object literal ({a: {b: 1}}).
412  // A '<' could be an object used in a comparison, but that is nonsense
413  // code (can never return true), so more likely it is a generic type
414  // argument (`X<{a: string; b: number}>`).
415  // The code below could be confused by semicolons between the
416  // individual members in a type member list, which would normally
417  // trigger BK_Block. In both cases, this must be parsed as an inline
418  // braced init.
419  Tok->BlockKind = BK_BracedInit;
420  else if (PrevTok->is(tok::r_paren))
421  // `) { }` can only occur in function or method declarations in JS.
422  Tok->BlockKind = BK_Block;
423  } else {
424  Tok->BlockKind = BK_Unknown;
425  }
426  LBraceStack.push_back(Tok);
427  break;
428  case tok::r_brace:
429  if (LBraceStack.empty())
430  break;
431  if (LBraceStack.back()->BlockKind == BK_Unknown) {
432  bool ProbablyBracedList = false;
433  if (Style.Language == FormatStyle::LK_Proto) {
434  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
435  } else {
436  // Using OriginalColumn to distinguish between ObjC methods and
437  // binary operators is a bit hacky.
438  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
439  NextTok->OriginalColumn == 0;
440 
441  // If there is a comma, semicolon or right paren after the closing
442  // brace, we assume this is a braced initializer list. Note that
443  // regardless how we mark inner braces here, we will overwrite the
444  // BlockKind later if we parse a braced list (where all blocks
445  // inside are by default braced lists), or when we explicitly detect
446  // blocks (for example while parsing lambdas).
447  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
448  // braced list in JS.
449  ProbablyBracedList =
451  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
452  Keywords.kw_as)) ||
453  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
454  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
455  tok::r_paren, tok::r_square, tok::l_brace,
456  tok::ellipsis) ||
457  (NextTok->is(tok::identifier) &&
458  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
459  (NextTok->is(tok::semi) &&
460  (!ExpectClassBody || LBraceStack.size() != 1)) ||
461  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
462  if (NextTok->is(tok::l_square)) {
463  // We can have an array subscript after a braced init
464  // list, but C++11 attributes are expected after blocks.
465  NextTok = Tokens->getNextToken();
466  ++ReadTokens;
467  ProbablyBracedList = NextTok->isNot(tok::l_square);
468  }
469  }
470  if (ProbablyBracedList) {
471  Tok->BlockKind = BK_BracedInit;
472  LBraceStack.back()->BlockKind = BK_BracedInit;
473  } else {
474  Tok->BlockKind = BK_Block;
475  LBraceStack.back()->BlockKind = BK_Block;
476  }
477  }
478  LBraceStack.pop_back();
479  break;
480  case tok::at:
481  case tok::semi:
482  case tok::kw_if:
483  case tok::kw_while:
484  case tok::kw_for:
485  case tok::kw_switch:
486  case tok::kw_try:
487  case tok::kw___try:
488  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
489  LBraceStack.back()->BlockKind = BK_Block;
490  break;
491  default:
492  break;
493  }
494  PrevTok = Tok;
495  Tok = NextTok;
496  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
497 
498  // Assume other blocks for all unclosed opening braces.
499  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
500  if (LBraceStack[i]->BlockKind == BK_Unknown)
501  LBraceStack[i]->BlockKind = BK_Block;
502  }
503 
504  FormatTok = Tokens->setPosition(StoredPosition);
505 }
506 
507 template <class T>
508 static inline void hash_combine(std::size_t &seed, const T &v) {
509  std::hash<T> hasher;
510  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
511 }
512 
513 size_t UnwrappedLineParser::computePPHash() const {
514  size_t h = 0;
515  for (const auto &i : PPStack) {
516  hash_combine(h, size_t(i.Kind));
517  hash_combine(h, i.Line);
518  }
519  return h;
520 }
521 
522 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
523  bool MunchSemi) {
524  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
525  "'{' or macro block token expected");
526  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
527  FormatTok->BlockKind = BK_Block;
528 
529  size_t PPStartHash = computePPHash();
530 
531  unsigned InitialLevel = Line->Level;
532  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
533 
534  if (MacroBlock && FormatTok->is(tok::l_paren))
535  parseParens();
536 
537  size_t NbPreprocessorDirectives =
538  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
539  addUnwrappedLine();
540  size_t OpeningLineIndex =
541  CurrentLines->empty()
543  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
544 
545  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
546  MustBeDeclaration);
547  if (AddLevel)
548  ++Line->Level;
549  parseLevel(/*HasOpeningBrace=*/true);
550 
551  if (eof())
552  return;
553 
554  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
555  : !FormatTok->is(tok::r_brace)) {
556  Line->Level = InitialLevel;
557  FormatTok->BlockKind = BK_Block;
558  return;
559  }
560 
561  size_t PPEndHash = computePPHash();
562 
563  // Munch the closing brace.
564  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
565 
566  if (MacroBlock && FormatTok->is(tok::l_paren))
567  parseParens();
568 
569  if (MunchSemi && FormatTok->Tok.is(tok::semi))
570  nextToken();
571  Line->Level = InitialLevel;
572 
573  if (PPStartHash == PPEndHash) {
574  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
575  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
576  // Update the opening line to add the forward reference as well
577  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
578  CurrentLines->size() - 1;
579  }
580  }
581 }
582 
583 static bool isGoogScope(const UnwrappedLine &Line) {
584  // FIXME: Closure-library specific stuff should not be hard-coded but be
585  // configurable.
586  if (Line.Tokens.size() < 4)
587  return false;
588  auto I = Line.Tokens.begin();
589  if (I->Tok->TokenText != "goog")
590  return false;
591  ++I;
592  if (I->Tok->isNot(tok::period))
593  return false;
594  ++I;
595  if (I->Tok->TokenText != "scope")
596  return false;
597  ++I;
598  return I->Tok->is(tok::l_paren);
599 }
600 
601 static bool isIIFE(const UnwrappedLine &Line,
602  const AdditionalKeywords &Keywords) {
603  // Look for the start of an immediately invoked anonymous function.
604  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
605  // This is commonly done in JavaScript to create a new, anonymous scope.
606  // Example: (function() { ... })()
607  if (Line.Tokens.size() < 3)
608  return false;
609  auto I = Line.Tokens.begin();
610  if (I->Tok->isNot(tok::l_paren))
611  return false;
612  ++I;
613  if (I->Tok->isNot(Keywords.kw_function))
614  return false;
615  ++I;
616  return I->Tok->is(tok::l_paren);
617 }
618 
619 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
620  const FormatToken &InitialToken) {
621  if (InitialToken.is(tok::kw_namespace))
622  return Style.BraceWrapping.AfterNamespace;
623  if (InitialToken.is(tok::kw_class))
624  return Style.BraceWrapping.AfterClass;
625  if (InitialToken.is(tok::kw_union))
626  return Style.BraceWrapping.AfterUnion;
627  if (InitialToken.is(tok::kw_struct))
628  return Style.BraceWrapping.AfterStruct;
629  return false;
630 }
631 
632 void UnwrappedLineParser::parseChildBlock() {
633  FormatTok->BlockKind = BK_Block;
634  nextToken();
635  {
636  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
637  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
638  ScopedLineState LineState(*this);
639  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
640  /*MustBeDeclaration=*/false);
641  Line->Level += SkipIndent ? 0 : 1;
642  parseLevel(/*HasOpeningBrace=*/true);
643  flushComments(isOnNewLine(*FormatTok));
644  Line->Level -= SkipIndent ? 0 : 1;
645  }
646  nextToken();
647 }
648 
649 void UnwrappedLineParser::parsePPDirective() {
650  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
651  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
652  nextToken();
653 
654  if (!FormatTok->Tok.getIdentifierInfo()) {
655  parsePPUnknown();
656  return;
657  }
658 
659  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
660  case tok::pp_define:
661  parsePPDefine();
662  return;
663  case tok::pp_if:
664  parsePPIf(/*IfDef=*/false);
665  break;
666  case tok::pp_ifdef:
667  case tok::pp_ifndef:
668  parsePPIf(/*IfDef=*/true);
669  break;
670  case tok::pp_else:
671  parsePPElse();
672  break;
673  case tok::pp_elif:
674  parsePPElIf();
675  break;
676  case tok::pp_endif:
677  parsePPEndIf();
678  break;
679  default:
680  parsePPUnknown();
681  break;
682  }
683 }
684 
685 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
686  size_t Line = CurrentLines->size();
687  if (CurrentLines == &PreprocessorDirectives)
688  Line += Lines.size();
689 
690  if (Unreachable ||
691  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
692  PPStack.push_back({PP_Unreachable, Line});
693  else
694  PPStack.push_back({PP_Conditional, Line});
695 }
696 
697 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
698  ++PPBranchLevel;
699  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
700  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
701  PPLevelBranchIndex.push_back(0);
702  PPLevelBranchCount.push_back(0);
703  }
704  PPChainBranchIndex.push(0);
705  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
706  conditionalCompilationCondition(Unreachable || Skip);
707 }
708 
709 void UnwrappedLineParser::conditionalCompilationAlternative() {
710  if (!PPStack.empty())
711  PPStack.pop_back();
712  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
713  if (!PPChainBranchIndex.empty())
714  ++PPChainBranchIndex.top();
715  conditionalCompilationCondition(
716  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
717  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
718 }
719 
720 void UnwrappedLineParser::conditionalCompilationEnd() {
721  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
722  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
723  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
724  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
725  }
726  }
727  // Guard against #endif's without #if.
728  if (PPBranchLevel > -1)
729  --PPBranchLevel;
730  if (!PPChainBranchIndex.empty())
731  PPChainBranchIndex.pop();
732  if (!PPStack.empty())
733  PPStack.pop_back();
734 }
735 
736 void UnwrappedLineParser::parsePPIf(bool IfDef) {
737  bool IfNDef = FormatTok->is(tok::pp_ifndef);
738  nextToken();
739  bool Unreachable = false;
740  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
741  Unreachable = true;
742  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
743  Unreachable = true;
744  conditionalCompilationStart(Unreachable);
745  FormatToken *IfCondition = FormatTok;
746  // If there's a #ifndef on the first line, and the only lines before it are
747  // comments, it could be an include guard.
748  bool MaybeIncludeGuard = IfNDef;
749  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
750  for (auto &Line : Lines) {
751  if (!Line.Tokens.front().Tok->is(tok::comment)) {
752  MaybeIncludeGuard = false;
753  IncludeGuard = IG_Rejected;
754  break;
755  }
756  }
757  --PPBranchLevel;
758  parsePPUnknown();
759  ++PPBranchLevel;
760  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
761  IncludeGuard = IG_IfNdefed;
762  IncludeGuardToken = IfCondition;
763  }
764 }
765 
766 void UnwrappedLineParser::parsePPElse() {
767  // If a potential include guard has an #else, it's not an include guard.
768  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
769  IncludeGuard = IG_Rejected;
770  conditionalCompilationAlternative();
771  if (PPBranchLevel > -1)
772  --PPBranchLevel;
773  parsePPUnknown();
774  ++PPBranchLevel;
775 }
776 
777 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
778 
779 void UnwrappedLineParser::parsePPEndIf() {
780  conditionalCompilationEnd();
781  parsePPUnknown();
782  // If the #endif of a potential include guard is the last thing in the file,
783  // then we found an include guard.
784  unsigned TokenPosition = Tokens->getPosition();
785  FormatToken *PeekNext = AllTokens[TokenPosition];
786  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
787  PeekNext->is(tok::eof) &&
789  IncludeGuard = IG_Found;
790 }
791 
792 void UnwrappedLineParser::parsePPDefine() {
793  nextToken();
794 
795  if (FormatTok->Tok.getKind() != tok::identifier) {
796  IncludeGuard = IG_Rejected;
797  IncludeGuardToken = nullptr;
798  parsePPUnknown();
799  return;
800  }
801 
802  if (IncludeGuard == IG_IfNdefed &&
803  IncludeGuardToken->TokenText == FormatTok->TokenText) {
804  IncludeGuard = IG_Defined;
805  IncludeGuardToken = nullptr;
806  for (auto &Line : Lines) {
807  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
808  IncludeGuard = IG_Rejected;
809  break;
810  }
811  }
812  }
813 
814  nextToken();
815  if (FormatTok->Tok.getKind() == tok::l_paren &&
816  FormatTok->WhitespaceRange.getBegin() ==
817  FormatTok->WhitespaceRange.getEnd()) {
818  parseParens();
819  }
821  Line->Level += PPBranchLevel + 1;
822  addUnwrappedLine();
823  ++Line->Level;
824 
825  // Errors during a preprocessor directive can only affect the layout of the
826  // preprocessor directive, and thus we ignore them. An alternative approach
827  // would be to use the same approach we use on the file level (no
828  // re-indentation if there was a structural error) within the macro
829  // definition.
830  parseFile();
831 }
832 
833 void UnwrappedLineParser::parsePPUnknown() {
834  do {
835  nextToken();
836  } while (!eof());
838  Line->Level += PPBranchLevel + 1;
839  addUnwrappedLine();
840 }
841 
842 // Here we blacklist certain tokens that are not usually the first token in an
843 // unwrapped line. This is used in attempt to distinguish macro calls without
844 // trailing semicolons from other constructs split to several lines.
845 static bool tokenCanStartNewLine(const clang::Token &Tok) {
846  // Semicolon can be a null-statement, l_square can be a start of a macro or
847  // a C++11 attribute, but this doesn't seem to be common.
848  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
849  Tok.isNot(tok::l_square) &&
850  // Tokens that can only be used as binary operators and a part of
851  // overloaded operator names.
852  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
853  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
854  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
855  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
856  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
857  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
858  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
859  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
860  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
861  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
862  Tok.isNot(tok::lesslessequal) &&
863  // Colon is used in labels, base class lists, initializer lists,
864  // range-based for loops, ternary operator, but should never be the
865  // first token in an unwrapped line.
866  Tok.isNot(tok::colon) &&
867  // 'noexcept' is a trailing annotation.
868  Tok.isNot(tok::kw_noexcept);
869 }
870 
871 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
872  const FormatToken *FormatTok) {
873  // FIXME: This returns true for C/C++ keywords like 'struct'.
874  return FormatTok->is(tok::identifier) &&
875  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
876  !FormatTok->isOneOf(
877  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
878  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
879  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
880  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
881  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
882  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
883  Keywords.kw_from));
884 }
885 
886 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
887  const FormatToken *FormatTok) {
888  return FormatTok->Tok.isLiteral() ||
889  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
890  mustBeJSIdent(Keywords, FormatTok);
891 }
892 
893 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
894 // when encountered after a value (see mustBeJSIdentOrValue).
895 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
896  const FormatToken *FormatTok) {
897  return FormatTok->isOneOf(
898  tok::kw_return, Keywords.kw_yield,
899  // conditionals
900  tok::kw_if, tok::kw_else,
901  // loops
902  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
903  // switch/case
904  tok::kw_switch, tok::kw_case,
905  // exceptions
906  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
907  // declaration
908  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
909  Keywords.kw_async, Keywords.kw_function,
910  // import/export
911  Keywords.kw_import, tok::kw_export);
912 }
913 
914 // readTokenWithJavaScriptASI reads the next token and terminates the current
915 // line if JavaScript Automatic Semicolon Insertion must
916 // happen between the current token and the next token.
917 //
918 // This method is conservative - it cannot cover all edge cases of JavaScript,
919 // but only aims to correctly handle certain well known cases. It *must not*
920 // return true in speculative cases.
921 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
922  FormatToken *Previous = FormatTok;
923  readToken();
924  FormatToken *Next = FormatTok;
925 
926  bool IsOnSameLine =
927  CommentsBeforeNextToken.empty()
928  ? Next->NewlinesBefore == 0
929  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
930  if (IsOnSameLine)
931  return;
932 
933  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
934  bool PreviousStartsTemplateExpr =
935  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
936  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
937  // If the line contains an '@' sign, the previous token might be an
938  // annotation, which can precede another identifier/value.
939  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
940  [](UnwrappedLineNode &LineNode) {
941  return LineNode.Tok->is(tok::at);
942  }) != Line->Tokens.end();
943  if (HasAt)
944  return;
945  }
946  if (Next->is(tok::exclaim) && PreviousMustBeValue)
947  return addUnwrappedLine();
948  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
949  bool NextEndsTemplateExpr =
950  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
951  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
952  (PreviousMustBeValue ||
953  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
954  tok::minusminus)))
955  return addUnwrappedLine();
956  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
957  isJSDeclOrStmt(Keywords, Next))
958  return addUnwrappedLine();
959 }
960 
961 void UnwrappedLineParser::parseStructuralElement() {
962  assert(!FormatTok->is(tok::l_brace));
963  if (Style.Language == FormatStyle::LK_TableGen &&
964  FormatTok->is(tok::pp_include)) {
965  nextToken();
966  if (FormatTok->is(tok::string_literal))
967  nextToken();
968  addUnwrappedLine();
969  return;
970  }
971  switch (FormatTok->Tok.getKind()) {
972  case tok::kw_asm:
973  nextToken();
974  if (FormatTok->is(tok::l_brace)) {
975  FormatTok->Type = TT_InlineASMBrace;
976  nextToken();
977  while (FormatTok && FormatTok->isNot(tok::eof)) {
978  if (FormatTok->is(tok::r_brace)) {
979  FormatTok->Type = TT_InlineASMBrace;
980  nextToken();
981  addUnwrappedLine();
982  break;
983  }
984  FormatTok->Finalized = true;
985  nextToken();
986  }
987  }
988  break;
989  case tok::kw_namespace:
990  parseNamespace();
991  return;
992  case tok::kw_inline:
993  nextToken();
994  if (FormatTok->Tok.is(tok::kw_namespace)) {
995  parseNamespace();
996  return;
997  }
998  break;
999  case tok::kw_public:
1000  case tok::kw_protected:
1001  case tok::kw_private:
1002  if (Style.Language == FormatStyle::LK_Java ||
1004  nextToken();
1005  else
1006  parseAccessSpecifier();
1007  return;
1008  case tok::kw_if:
1009  parseIfThenElse();
1010  return;
1011  case tok::kw_for:
1012  case tok::kw_while:
1013  parseForOrWhileLoop();
1014  return;
1015  case tok::kw_do:
1016  parseDoWhile();
1017  return;
1018  case tok::kw_switch:
1019  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1020  // 'switch: string' field declaration.
1021  break;
1022  parseSwitch();
1023  return;
1024  case tok::kw_default:
1025  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026  // 'default: string' field declaration.
1027  break;
1028  nextToken();
1029  if (FormatTok->is(tok::colon)) {
1030  parseLabel();
1031  return;
1032  }
1033  // e.g. "default void f() {}" in a Java interface.
1034  break;
1035  case tok::kw_case:
1036  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1037  // 'case: string' field declaration.
1038  break;
1039  parseCaseLabel();
1040  return;
1041  case tok::kw_try:
1042  case tok::kw___try:
1043  parseTryCatch();
1044  return;
1045  case tok::kw_extern:
1046  nextToken();
1047  if (FormatTok->Tok.is(tok::string_literal)) {
1048  nextToken();
1049  if (FormatTok->Tok.is(tok::l_brace)) {
1050  if (Style.BraceWrapping.AfterExternBlock) {
1051  addUnwrappedLine();
1052  parseBlock(/*MustBeDeclaration=*/true);
1053  } else {
1054  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1055  }
1056  addUnwrappedLine();
1057  return;
1058  }
1059  }
1060  break;
1061  case tok::kw_export:
1062  if (Style.Language == FormatStyle::LK_JavaScript) {
1063  parseJavaScriptEs6ImportExport();
1064  return;
1065  }
1066  break;
1067  case tok::identifier:
1068  if (FormatTok->is(TT_ForEachMacro)) {
1069  parseForOrWhileLoop();
1070  return;
1071  }
1072  if (FormatTok->is(TT_MacroBlockBegin)) {
1073  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1074  /*MunchSemi=*/false);
1075  return;
1076  }
1077  if (FormatTok->is(Keywords.kw_import)) {
1078  if (Style.Language == FormatStyle::LK_JavaScript) {
1079  parseJavaScriptEs6ImportExport();
1080  return;
1081  }
1082  if (Style.Language == FormatStyle::LK_Proto) {
1083  nextToken();
1084  if (FormatTok->is(tok::kw_public))
1085  nextToken();
1086  if (!FormatTok->is(tok::string_literal))
1087  return;
1088  nextToken();
1089  if (FormatTok->is(tok::semi))
1090  nextToken();
1091  addUnwrappedLine();
1092  return;
1093  }
1094  }
1095  if (Style.isCpp() &&
1096  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1097  Keywords.kw_slots, Keywords.kw_qslots)) {
1098  nextToken();
1099  if (FormatTok->is(tok::colon)) {
1100  nextToken();
1101  addUnwrappedLine();
1102  return;
1103  }
1104  }
1105  // In all other cases, parse the declaration.
1106  break;
1107  default:
1108  break;
1109  }
1110  do {
1111  const FormatToken *Previous = FormatTok->Previous;
1112  switch (FormatTok->Tok.getKind()) {
1113  case tok::at:
1114  nextToken();
1115  if (FormatTok->Tok.is(tok::l_brace)) {
1116  nextToken();
1117  parseBracedList();
1118  break;
1119  }
1120  switch (FormatTok->Tok.getObjCKeywordID()) {
1121  case tok::objc_public:
1122  case tok::objc_protected:
1123  case tok::objc_package:
1124  case tok::objc_private:
1125  return parseAccessSpecifier();
1126  case tok::objc_interface:
1127  case tok::objc_implementation:
1128  return parseObjCInterfaceOrImplementation();
1129  case tok::objc_protocol:
1130  if (parseObjCProtocol())
1131  return;
1132  break;
1133  case tok::objc_end:
1134  return; // Handled by the caller.
1135  case tok::objc_optional:
1136  case tok::objc_required:
1137  nextToken();
1138  addUnwrappedLine();
1139  return;
1140  case tok::objc_autoreleasepool:
1141  nextToken();
1142  if (FormatTok->Tok.is(tok::l_brace)) {
1144  addUnwrappedLine();
1145  parseBlock(/*MustBeDeclaration=*/false);
1146  }
1147  addUnwrappedLine();
1148  return;
1149  case tok::objc_synchronized:
1150  nextToken();
1151  if (FormatTok->Tok.is(tok::l_paren))
1152  // Skip synchronization object
1153  parseParens();
1154  if (FormatTok->Tok.is(tok::l_brace)) {
1156  addUnwrappedLine();
1157  parseBlock(/*MustBeDeclaration=*/false);
1158  }
1159  addUnwrappedLine();
1160  return;
1161  case tok::objc_try:
1162  // This branch isn't strictly necessary (the kw_try case below would
1163  // do this too after the tok::at is parsed above). But be explicit.
1164  parseTryCatch();
1165  return;
1166  default:
1167  break;
1168  }
1169  break;
1170  case tok::kw_enum:
1171  // Ignore if this is part of "template <enum ...".
1172  if (Previous && Previous->is(tok::less)) {
1173  nextToken();
1174  break;
1175  }
1176 
1177  // parseEnum falls through and does not yet add an unwrapped line as an
1178  // enum definition can start a structural element.
1179  if (!parseEnum())
1180  break;
1181  // This only applies for C++.
1182  if (!Style.isCpp()) {
1183  addUnwrappedLine();
1184  return;
1185  }
1186  break;
1187  case tok::kw_typedef:
1188  nextToken();
1189  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1190  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1191  parseEnum();
1192  break;
1193  case tok::kw_struct:
1194  case tok::kw_union:
1195  case tok::kw_class:
1196  // parseRecord falls through and does not yet add an unwrapped line as a
1197  // record declaration or definition can start a structural element.
1198  parseRecord();
1199  // This does not apply for Java and JavaScript.
1200  if (Style.Language == FormatStyle::LK_Java ||
1202  if (FormatTok->is(tok::semi))
1203  nextToken();
1204  addUnwrappedLine();
1205  return;
1206  }
1207  break;
1208  case tok::period:
1209  nextToken();
1210  // In Java, classes have an implicit static member "class".
1211  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1212  FormatTok->is(tok::kw_class))
1213  nextToken();
1214  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1215  FormatTok->Tok.getIdentifierInfo())
1216  // JavaScript only has pseudo keywords, all keywords are allowed to
1217  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1218  nextToken();
1219  break;
1220  case tok::semi:
1221  nextToken();
1222  addUnwrappedLine();
1223  return;
1224  case tok::r_brace:
1225  addUnwrappedLine();
1226  return;
1227  case tok::l_paren:
1228  parseParens();
1229  break;
1230  case tok::kw_operator:
1231  nextToken();
1232  if (FormatTok->isBinaryOperator())
1233  nextToken();
1234  break;
1235  case tok::caret:
1236  nextToken();
1237  if (FormatTok->Tok.isAnyIdentifier() ||
1238  FormatTok->isSimpleTypeSpecifier())
1239  nextToken();
1240  if (FormatTok->is(tok::l_paren))
1241  parseParens();
1242  if (FormatTok->is(tok::l_brace))
1243  parseChildBlock();
1244  break;
1245  case tok::l_brace:
1246  if (!tryToParseBracedList()) {
1247  // A block outside of parentheses must be the last part of a
1248  // structural element.
1249  // FIXME: Figure out cases where this is not true, and add projections
1250  // for them (the one we know is missing are lambdas).
1251  if (Style.BraceWrapping.AfterFunction)
1252  addUnwrappedLine();
1253  FormatTok->Type = TT_FunctionLBrace;
1254  parseBlock(/*MustBeDeclaration=*/false);
1255  addUnwrappedLine();
1256  return;
1257  }
1258  // Otherwise this was a braced init list, and the structural
1259  // element continues.
1260  break;
1261  case tok::kw_try:
1262  // We arrive here when parsing function-try blocks.
1263  parseTryCatch();
1264  return;
1265  case tok::identifier: {
1266  if (FormatTok->is(TT_MacroBlockEnd)) {
1267  addUnwrappedLine();
1268  return;
1269  }
1270 
1271  // Function declarations (as opposed to function expressions) are parsed
1272  // on their own unwrapped line by continuing this loop. Function
1273  // expressions (functions that are not on their own line) must not create
1274  // a new unwrapped line, so they are special cased below.
1275  size_t TokenCount = Line->Tokens.size();
1276  if (Style.Language == FormatStyle::LK_JavaScript &&
1277  FormatTok->is(Keywords.kw_function) &&
1278  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1279  Keywords.kw_async)))) {
1280  tryToParseJSFunction();
1281  break;
1282  }
1283  if ((Style.Language == FormatStyle::LK_JavaScript ||
1284  Style.Language == FormatStyle::LK_Java) &&
1285  FormatTok->is(Keywords.kw_interface)) {
1286  if (Style.Language == FormatStyle::LK_JavaScript) {
1287  // In JavaScript/TypeScript, "interface" can be used as a standalone
1288  // identifier, e.g. in `var interface = 1;`. If "interface" is
1289  // followed by another identifier, it is very like to be an actual
1290  // interface declaration.
1291  unsigned StoredPosition = Tokens->getPosition();
1292  FormatToken *Next = Tokens->getNextToken();
1293  FormatTok = Tokens->setPosition(StoredPosition);
1294  if (Next && !mustBeJSIdent(Keywords, Next)) {
1295  nextToken();
1296  break;
1297  }
1298  }
1299  parseRecord();
1300  addUnwrappedLine();
1301  return;
1302  }
1303 
1304  // See if the following token should start a new unwrapped line.
1305  StringRef Text = FormatTok->TokenText;
1306  nextToken();
1307  if (Line->Tokens.size() == 1 &&
1308  // JS doesn't have macros, and within classes colons indicate fields,
1309  // not labels.
1311  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1312  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1313  parseLabel();
1314  return;
1315  }
1316  // Recognize function-like macro usages without trailing semicolon as
1317  // well as free-standing macros like Q_OBJECT.
1318  bool FunctionLike = FormatTok->is(tok::l_paren);
1319  if (FunctionLike)
1320  parseParens();
1321 
1322  bool FollowedByNewline =
1323  CommentsBeforeNextToken.empty()
1324  ? FormatTok->NewlinesBefore > 0
1325  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1326 
1327  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1328  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1329  addUnwrappedLine();
1330  return;
1331  }
1332  }
1333  break;
1334  }
1335  case tok::equal:
1336  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1337  // TT_JsFatArrow. The always start an expression or a child block if
1338  // followed by a curly.
1339  if (FormatTok->is(TT_JsFatArrow)) {
1340  nextToken();
1341  if (FormatTok->is(tok::l_brace))
1342  parseChildBlock();
1343  break;
1344  }
1345 
1346  nextToken();
1347  if (FormatTok->Tok.is(tok::l_brace)) {
1348  nextToken();
1349  parseBracedList();
1350  } else if (Style.Language == FormatStyle::LK_Proto &&
1351  FormatTok->Tok.is(tok::less)) {
1352  nextToken();
1353  parseBracedList(/*ContinueOnSemicolons=*/false,
1354  /*ClosingBraceKind=*/tok::greater);
1355  }
1356  break;
1357  case tok::l_square:
1358  parseSquare();
1359  break;
1360  case tok::kw_new:
1361  parseNew();
1362  break;
1363  default:
1364  nextToken();
1365  break;
1366  }
1367  } while (!eof());
1368 }
1369 
1370 bool UnwrappedLineParser::tryToParseLambda() {
1371  if (!Style.isCpp()) {
1372  nextToken();
1373  return false;
1374  }
1375  assert(FormatTok->is(tok::l_square));
1376  FormatToken &LSquare = *FormatTok;
1377  if (!tryToParseLambdaIntroducer())
1378  return false;
1379 
1380  while (FormatTok->isNot(tok::l_brace)) {
1381  if (FormatTok->isSimpleTypeSpecifier()) {
1382  nextToken();
1383  continue;
1384  }
1385  switch (FormatTok->Tok.getKind()) {
1386  case tok::l_brace:
1387  break;
1388  case tok::l_paren:
1389  parseParens();
1390  break;
1391  case tok::amp:
1392  case tok::star:
1393  case tok::kw_const:
1394  case tok::comma:
1395  case tok::less:
1396  case tok::greater:
1397  case tok::identifier:
1398  case tok::numeric_constant:
1399  case tok::coloncolon:
1400  case tok::kw_mutable:
1401  nextToken();
1402  break;
1403  case tok::arrow:
1404  FormatTok->Type = TT_LambdaArrow;
1405  nextToken();
1406  break;
1407  default:
1408  return true;
1409  }
1410  }
1411  LSquare.Type = TT_LambdaLSquare;
1412  parseChildBlock();
1413  return true;
1414 }
1415 
1416 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1417  const FormatToken *Previous = FormatTok->Previous;
1418  if (Previous &&
1419  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1420  tok::kw_delete, tok::l_square) ||
1421  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1422  Previous->isSimpleTypeSpecifier())) {
1423  nextToken();
1424  return false;
1425  }
1426  nextToken();
1427  if (FormatTok->is(tok::l_square)) {
1428  return false;
1429  }
1430  parseSquare(/*LambdaIntroducer=*/true);
1431  return true;
1432 }
1433 
1434 void UnwrappedLineParser::tryToParseJSFunction() {
1435  assert(FormatTok->is(Keywords.kw_function) ||
1436  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1437  if (FormatTok->is(Keywords.kw_async))
1438  nextToken();
1439  // Consume "function".
1440  nextToken();
1441 
1442  // Consume * (generator function). Treat it like C++'s overloaded operators.
1443  if (FormatTok->is(tok::star)) {
1444  FormatTok->Type = TT_OverloadedOperator;
1445  nextToken();
1446  }
1447 
1448  // Consume function name.
1449  if (FormatTok->is(tok::identifier))
1450  nextToken();
1451 
1452  if (FormatTok->isNot(tok::l_paren))
1453  return;
1454 
1455  // Parse formal parameter list.
1456  parseParens();
1457 
1458  if (FormatTok->is(tok::colon)) {
1459  // Parse a type definition.
1460  nextToken();
1461 
1462  // Eat the type declaration. For braced inline object types, balance braces,
1463  // otherwise just parse until finding an l_brace for the function body.
1464  if (FormatTok->is(tok::l_brace))
1465  tryToParseBracedList();
1466  else
1467  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1468  nextToken();
1469  }
1470 
1471  if (FormatTok->is(tok::semi))
1472  return;
1473 
1474  parseChildBlock();
1475 }
1476 
1477 bool UnwrappedLineParser::tryToParseBracedList() {
1478  if (FormatTok->BlockKind == BK_Unknown)
1479  calculateBraceTypes();
1480  assert(FormatTok->BlockKind != BK_Unknown);
1481  if (FormatTok->BlockKind == BK_Block)
1482  return false;
1483  nextToken();
1484  parseBracedList();
1485  return true;
1486 }
1487 
1488 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1489  tok::TokenKind ClosingBraceKind) {
1490  bool HasError = false;
1491 
1492  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1493  // replace this by using parseAssigmentExpression() inside.
1494  do {
1495  if (Style.Language == FormatStyle::LK_JavaScript) {
1496  if (FormatTok->is(Keywords.kw_function) ||
1497  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1498  tryToParseJSFunction();
1499  continue;
1500  }
1501  if (FormatTok->is(TT_JsFatArrow)) {
1502  nextToken();
1503  // Fat arrows can be followed by simple expressions or by child blocks
1504  // in curly braces.
1505  if (FormatTok->is(tok::l_brace)) {
1506  parseChildBlock();
1507  continue;
1508  }
1509  }
1510  if (FormatTok->is(tok::l_brace)) {
1511  // Could be a method inside of a braced list `{a() { return 1; }}`.
1512  if (tryToParseBracedList())
1513  continue;
1514  parseChildBlock();
1515  }
1516  }
1517  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1518  nextToken();
1519  return !HasError;
1520  }
1521  switch (FormatTok->Tok.getKind()) {
1522  case tok::caret:
1523  nextToken();
1524  if (FormatTok->is(tok::l_brace)) {
1525  parseChildBlock();
1526  }
1527  break;
1528  case tok::l_square:
1529  tryToParseLambda();
1530  break;
1531  case tok::l_paren:
1532  parseParens();
1533  // JavaScript can just have free standing methods and getters/setters in
1534  // object literals. Detect them by a "{" following ")".
1535  if (Style.Language == FormatStyle::LK_JavaScript) {
1536  if (FormatTok->is(tok::l_brace))
1537  parseChildBlock();
1538  break;
1539  }
1540  break;
1541  case tok::l_brace:
1542  // Assume there are no blocks inside a braced init list apart
1543  // from the ones we explicitly parse out (like lambdas).
1544  FormatTok->BlockKind = BK_BracedInit;
1545  nextToken();
1546  parseBracedList();
1547  break;
1548  case tok::less:
1549  if (Style.Language == FormatStyle::LK_Proto) {
1550  nextToken();
1551  parseBracedList(/*ContinueOnSemicolons=*/false,
1552  /*ClosingBraceKind=*/tok::greater);
1553  } else {
1554  nextToken();
1555  }
1556  break;
1557  case tok::semi:
1558  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1559  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1560  // used for error recovery if we have otherwise determined that this is
1561  // a braced list.
1562  if (Style.Language == FormatStyle::LK_JavaScript) {
1563  nextToken();
1564  break;
1565  }
1566  HasError = true;
1567  if (!ContinueOnSemicolons)
1568  return !HasError;
1569  nextToken();
1570  break;
1571  case tok::comma:
1572  nextToken();
1573  break;
1574  default:
1575  nextToken();
1576  break;
1577  }
1578  } while (!eof());
1579  return false;
1580 }
1581 
1582 void UnwrappedLineParser::parseParens() {
1583  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1584  nextToken();
1585  do {
1586  switch (FormatTok->Tok.getKind()) {
1587  case tok::l_paren:
1588  parseParens();
1589  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1590  parseChildBlock();
1591  break;
1592  case tok::r_paren:
1593  nextToken();
1594  return;
1595  case tok::r_brace:
1596  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1597  return;
1598  case tok::l_square:
1599  tryToParseLambda();
1600  break;
1601  case tok::l_brace:
1602  if (!tryToParseBracedList())
1603  parseChildBlock();
1604  break;
1605  case tok::at:
1606  nextToken();
1607  if (FormatTok->Tok.is(tok::l_brace)) {
1608  nextToken();
1609  parseBracedList();
1610  }
1611  break;
1612  case tok::kw_class:
1613  if (Style.Language == FormatStyle::LK_JavaScript)
1614  parseRecord(/*ParseAsExpr=*/true);
1615  else
1616  nextToken();
1617  break;
1618  case tok::identifier:
1619  if (Style.Language == FormatStyle::LK_JavaScript &&
1620  (FormatTok->is(Keywords.kw_function) ||
1621  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1622  tryToParseJSFunction();
1623  else
1624  nextToken();
1625  break;
1626  default:
1627  nextToken();
1628  break;
1629  }
1630  } while (!eof());
1631 }
1632 
1633 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1634  if (!LambdaIntroducer) {
1635  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1636  if (tryToParseLambda())
1637  return;
1638  }
1639  do {
1640  switch (FormatTok->Tok.getKind()) {
1641  case tok::l_paren:
1642  parseParens();
1643  break;
1644  case tok::r_square:
1645  nextToken();
1646  return;
1647  case tok::r_brace:
1648  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1649  return;
1650  case tok::l_square:
1651  parseSquare();
1652  break;
1653  case tok::l_brace: {
1654  if (!tryToParseBracedList())
1655  parseChildBlock();
1656  break;
1657  }
1658  case tok::at:
1659  nextToken();
1660  if (FormatTok->Tok.is(tok::l_brace)) {
1661  nextToken();
1662  parseBracedList();
1663  }
1664  break;
1665  default:
1666  nextToken();
1667  break;
1668  }
1669  } while (!eof());
1670 }
1671 
1672 void UnwrappedLineParser::parseIfThenElse() {
1673  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1674  nextToken();
1675  if (FormatTok->Tok.is(tok::kw_constexpr))
1676  nextToken();
1677  if (FormatTok->Tok.is(tok::l_paren))
1678  parseParens();
1679  bool NeedsUnwrappedLine = false;
1680  if (FormatTok->Tok.is(tok::l_brace)) {
1681  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1682  parseBlock(/*MustBeDeclaration=*/false);
1683  if (Style.BraceWrapping.BeforeElse)
1684  addUnwrappedLine();
1685  else
1686  NeedsUnwrappedLine = true;
1687  } else {
1688  addUnwrappedLine();
1689  ++Line->Level;
1690  parseStructuralElement();
1691  --Line->Level;
1692  }
1693  if (FormatTok->Tok.is(tok::kw_else)) {
1694  nextToken();
1695  if (FormatTok->Tok.is(tok::l_brace)) {
1696  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1697  parseBlock(/*MustBeDeclaration=*/false);
1698  addUnwrappedLine();
1699  } else if (FormatTok->Tok.is(tok::kw_if)) {
1700  parseIfThenElse();
1701  } else {
1702  addUnwrappedLine();
1703  ++Line->Level;
1704  parseStructuralElement();
1705  if (FormatTok->is(tok::eof))
1706  addUnwrappedLine();
1707  --Line->Level;
1708  }
1709  } else if (NeedsUnwrappedLine) {
1710  addUnwrappedLine();
1711  }
1712 }
1713 
1714 void UnwrappedLineParser::parseTryCatch() {
1715  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1716  nextToken();
1717  bool NeedsUnwrappedLine = false;
1718  if (FormatTok->is(tok::colon)) {
1719  // We are in a function try block, what comes is an initializer list.
1720  nextToken();
1721  while (FormatTok->is(tok::identifier)) {
1722  nextToken();
1723  if (FormatTok->is(tok::l_paren))
1724  parseParens();
1725  if (FormatTok->is(tok::comma))
1726  nextToken();
1727  }
1728  }
1729  // Parse try with resource.
1730  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1731  parseParens();
1732  }
1733  if (FormatTok->is(tok::l_brace)) {
1734  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1735  parseBlock(/*MustBeDeclaration=*/false);
1736  if (Style.BraceWrapping.BeforeCatch) {
1737  addUnwrappedLine();
1738  } else {
1739  NeedsUnwrappedLine = true;
1740  }
1741  } else if (!FormatTok->is(tok::kw_catch)) {
1742  // The C++ standard requires a compound-statement after a try.
1743  // If there's none, we try to assume there's a structuralElement
1744  // and try to continue.
1745  addUnwrappedLine();
1746  ++Line->Level;
1747  parseStructuralElement();
1748  --Line->Level;
1749  }
1750  while (1) {
1751  if (FormatTok->is(tok::at))
1752  nextToken();
1753  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1754  tok::kw___finally) ||
1755  ((Style.Language == FormatStyle::LK_Java ||
1757  FormatTok->is(Keywords.kw_finally)) ||
1758  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1759  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1760  break;
1761  nextToken();
1762  while (FormatTok->isNot(tok::l_brace)) {
1763  if (FormatTok->is(tok::l_paren)) {
1764  parseParens();
1765  continue;
1766  }
1767  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1768  return;
1769  nextToken();
1770  }
1771  NeedsUnwrappedLine = false;
1772  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1773  parseBlock(/*MustBeDeclaration=*/false);
1774  if (Style.BraceWrapping.BeforeCatch)
1775  addUnwrappedLine();
1776  else
1777  NeedsUnwrappedLine = true;
1778  }
1779  if (NeedsUnwrappedLine)
1780  addUnwrappedLine();
1781 }
1782 
1783 void UnwrappedLineParser::parseNamespace() {
1784  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1785 
1786  const FormatToken &InitialToken = *FormatTok;
1787  nextToken();
1788  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1789  nextToken();
1790  if (FormatTok->Tok.is(tok::l_brace)) {
1791  if (ShouldBreakBeforeBrace(Style, InitialToken))
1792  addUnwrappedLine();
1793 
1794  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1796  DeclarationScopeStack.size() > 1);
1797  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1798  // Munch the semicolon after a namespace. This is more common than one would
1799  // think. Puttin the semicolon into its own line is very ugly.
1800  if (FormatTok->Tok.is(tok::semi))
1801  nextToken();
1802  addUnwrappedLine();
1803  }
1804  // FIXME: Add error handling.
1805 }
1806 
1807 void UnwrappedLineParser::parseNew() {
1808  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1809  nextToken();
1810  if (Style.Language != FormatStyle::LK_Java)
1811  return;
1812 
1813  // In Java, we can parse everything up to the parens, which aren't optional.
1814  do {
1815  // There should not be a ;, { or } before the new's open paren.
1816  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1817  return;
1818 
1819  // Consume the parens.
1820  if (FormatTok->is(tok::l_paren)) {
1821  parseParens();
1822 
1823  // If there is a class body of an anonymous class, consume that as child.
1824  if (FormatTok->is(tok::l_brace))
1825  parseChildBlock();
1826  return;
1827  }
1828  nextToken();
1829  } while (!eof());
1830 }
1831 
1832 void UnwrappedLineParser::parseForOrWhileLoop() {
1833  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1834  "'for', 'while' or foreach macro expected");
1835  nextToken();
1836  // JS' for await ( ...
1837  if (Style.Language == FormatStyle::LK_JavaScript &&
1838  FormatTok->is(Keywords.kw_await))
1839  nextToken();
1840  if (FormatTok->Tok.is(tok::l_paren))
1841  parseParens();
1842  if (FormatTok->Tok.is(tok::l_brace)) {
1843  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1844  parseBlock(/*MustBeDeclaration=*/false);
1845  addUnwrappedLine();
1846  } else {
1847  addUnwrappedLine();
1848  ++Line->Level;
1849  parseStructuralElement();
1850  --Line->Level;
1851  }
1852 }
1853 
1854 void UnwrappedLineParser::parseDoWhile() {
1855  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1856  nextToken();
1857  if (FormatTok->Tok.is(tok::l_brace)) {
1858  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1859  parseBlock(/*MustBeDeclaration=*/false);
1860  if (Style.BraceWrapping.IndentBraces)
1861  addUnwrappedLine();
1862  } else {
1863  addUnwrappedLine();
1864  ++Line->Level;
1865  parseStructuralElement();
1866  --Line->Level;
1867  }
1868 
1869  // FIXME: Add error handling.
1870  if (!FormatTok->Tok.is(tok::kw_while)) {
1871  addUnwrappedLine();
1872  return;
1873  }
1874 
1875  nextToken();
1876  parseStructuralElement();
1877 }
1878 
1879 void UnwrappedLineParser::parseLabel() {
1880  nextToken();
1881  unsigned OldLineLevel = Line->Level;
1882  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1883  --Line->Level;
1884  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1885  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1886  parseBlock(/*MustBeDeclaration=*/false);
1887  if (FormatTok->Tok.is(tok::kw_break)) {
1889  addUnwrappedLine();
1890  parseStructuralElement();
1891  }
1892  addUnwrappedLine();
1893  } else {
1894  if (FormatTok->is(tok::semi))
1895  nextToken();
1896  addUnwrappedLine();
1897  }
1898  Line->Level = OldLineLevel;
1899  if (FormatTok->isNot(tok::l_brace)) {
1900  parseStructuralElement();
1901  addUnwrappedLine();
1902  }
1903 }
1904 
1905 void UnwrappedLineParser::parseCaseLabel() {
1906  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1907  // FIXME: fix handling of complex expressions here.
1908  do {
1909  nextToken();
1910  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1911  parseLabel();
1912 }
1913 
1914 void UnwrappedLineParser::parseSwitch() {
1915  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1916  nextToken();
1917  if (FormatTok->Tok.is(tok::l_paren))
1918  parseParens();
1919  if (FormatTok->Tok.is(tok::l_brace)) {
1920  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1921  parseBlock(/*MustBeDeclaration=*/false);
1922  addUnwrappedLine();
1923  } else {
1924  addUnwrappedLine();
1925  ++Line->Level;
1926  parseStructuralElement();
1927  --Line->Level;
1928  }
1929 }
1930 
1931 void UnwrappedLineParser::parseAccessSpecifier() {
1932  nextToken();
1933  // Understand Qt's slots.
1934  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1935  nextToken();
1936  // Otherwise, we don't know what it is, and we'd better keep the next token.
1937  if (FormatTok->Tok.is(tok::colon))
1938  nextToken();
1939  addUnwrappedLine();
1940 }
1941 
1942 bool UnwrappedLineParser::parseEnum() {
1943  // Won't be 'enum' for NS_ENUMs.
1944  if (FormatTok->Tok.is(tok::kw_enum))
1945  nextToken();
1946 
1947  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1948  // declarations. An "enum" keyword followed by a colon would be a syntax
1949  // error and thus assume it is just an identifier.
1950  if (Style.Language == FormatStyle::LK_JavaScript &&
1951  FormatTok->isOneOf(tok::colon, tok::question))
1952  return false;
1953 
1954  // Eat up enum class ...
1955  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1956  nextToken();
1957 
1958  while (FormatTok->Tok.getIdentifierInfo() ||
1959  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1960  tok::greater, tok::comma, tok::question)) {
1961  nextToken();
1962  // We can have macros or attributes in between 'enum' and the enum name.
1963  if (FormatTok->is(tok::l_paren))
1964  parseParens();
1965  if (FormatTok->is(tok::identifier)) {
1966  nextToken();
1967  // If there are two identifiers in a row, this is likely an elaborate
1968  // return type. In Java, this can be "implements", etc.
1969  if (Style.isCpp() && FormatTok->is(tok::identifier))
1970  return false;
1971  }
1972  }
1973 
1974  // Just a declaration or something is wrong.
1975  if (FormatTok->isNot(tok::l_brace))
1976  return true;
1977  FormatTok->BlockKind = BK_Block;
1978 
1979  if (Style.Language == FormatStyle::LK_Java) {
1980  // Java enums are different.
1981  parseJavaEnumBody();
1982  return true;
1983  }
1984  if (Style.Language == FormatStyle::LK_Proto) {
1985  parseBlock(/*MustBeDeclaration=*/true);
1986  return true;
1987  }
1988 
1989  // Parse enum body.
1990  nextToken();
1991  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1992  if (HasError) {
1993  if (FormatTok->is(tok::semi))
1994  nextToken();
1995  addUnwrappedLine();
1996  }
1997  return true;
1998 
1999  // There is no addUnwrappedLine() here so that we fall through to parsing a
2000  // structural element afterwards. Thus, in "enum A {} n, m;",
2001  // "} n, m;" will end up in one unwrapped line.
2002 }
2003 
2004 void UnwrappedLineParser::parseJavaEnumBody() {
2005  // Determine whether the enum is simple, i.e. does not have a semicolon or
2006  // constants with class bodies. Simple enums can be formatted like braced
2007  // lists, contracted to a single line, etc.
2008  unsigned StoredPosition = Tokens->getPosition();
2009  bool IsSimple = true;
2010  FormatToken *Tok = Tokens->getNextToken();
2011  while (Tok) {
2012  if (Tok->is(tok::r_brace))
2013  break;
2014  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2015  IsSimple = false;
2016  break;
2017  }
2018  // FIXME: This will also mark enums with braces in the arguments to enum
2019  // constants as "not simple". This is probably fine in practice, though.
2020  Tok = Tokens->getNextToken();
2021  }
2022  FormatTok = Tokens->setPosition(StoredPosition);
2023 
2024  if (IsSimple) {
2025  nextToken();
2026  parseBracedList();
2027  addUnwrappedLine();
2028  return;
2029  }
2030 
2031  // Parse the body of a more complex enum.
2032  // First add a line for everything up to the "{".
2033  nextToken();
2034  addUnwrappedLine();
2035  ++Line->Level;
2036 
2037  // Parse the enum constants.
2038  while (FormatTok) {
2039  if (FormatTok->is(tok::l_brace)) {
2040  // Parse the constant's class body.
2041  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2042  /*MunchSemi=*/false);
2043  } else if (FormatTok->is(tok::l_paren)) {
2044  parseParens();
2045  } else if (FormatTok->is(tok::comma)) {
2046  nextToken();
2047  addUnwrappedLine();
2048  } else if (FormatTok->is(tok::semi)) {
2049  nextToken();
2050  addUnwrappedLine();
2051  break;
2052  } else if (FormatTok->is(tok::r_brace)) {
2053  addUnwrappedLine();
2054  break;
2055  } else {
2056  nextToken();
2057  }
2058  }
2059 
2060  // Parse the class body after the enum's ";" if any.
2061  parseLevel(/*HasOpeningBrace=*/true);
2062  nextToken();
2063  --Line->Level;
2064  addUnwrappedLine();
2065 }
2066 
2067 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2068  const FormatToken &InitialToken = *FormatTok;
2069  nextToken();
2070 
2071  // The actual identifier can be a nested name specifier, and in macros
2072  // it is often token-pasted.
2073  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2074  tok::kw___attribute, tok::kw___declspec,
2075  tok::kw_alignas) ||
2076  ((Style.Language == FormatStyle::LK_Java ||
2078  FormatTok->isOneOf(tok::period, tok::comma))) {
2079  if (Style.Language == FormatStyle::LK_JavaScript &&
2080  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2081  // JavaScript/TypeScript supports inline object types in
2082  // extends/implements positions:
2083  // class Foo implements {bar: number} { }
2084  nextToken();
2085  if (FormatTok->is(tok::l_brace)) {
2086  tryToParseBracedList();
2087  continue;
2088  }
2089  }
2090  bool IsNonMacroIdentifier =
2091  FormatTok->is(tok::identifier) &&
2092  FormatTok->TokenText != FormatTok->TokenText.upper();
2093  nextToken();
2094  // We can have macros or attributes in between 'class' and the class name.
2095  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2096  parseParens();
2097  }
2098 
2099  // Note that parsing away template declarations here leads to incorrectly
2100  // accepting function declarations as record declarations.
2101  // In general, we cannot solve this problem. Consider:
2102  // class A<int> B() {}
2103  // which can be a function definition or a class definition when B() is a
2104  // macro. If we find enough real-world cases where this is a problem, we
2105  // can parse for the 'template' keyword in the beginning of the statement,
2106  // and thus rule out the record production in case there is no template
2107  // (this would still leave us with an ambiguity between template function
2108  // and class declarations).
2109  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2110  while (!eof()) {
2111  if (FormatTok->is(tok::l_brace)) {
2112  calculateBraceTypes(/*ExpectClassBody=*/true);
2113  if (!tryToParseBracedList())
2114  break;
2115  }
2116  if (FormatTok->Tok.is(tok::semi))
2117  return;
2118  nextToken();
2119  }
2120  }
2121  if (FormatTok->Tok.is(tok::l_brace)) {
2122  if (ParseAsExpr) {
2123  parseChildBlock();
2124  } else {
2125  if (ShouldBreakBeforeBrace(Style, InitialToken))
2126  addUnwrappedLine();
2127 
2128  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2129  /*MunchSemi=*/false);
2130  }
2131  }
2132  // There is no addUnwrappedLine() here so that we fall through to parsing a
2133  // structural element afterwards. Thus, in "class A {} n, m;",
2134  // "} n, m;" will end up in one unwrapped line.
2135 }
2136 
2137 void UnwrappedLineParser::parseObjCMethod() {
2138  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2139  "'(' or identifier expected.");
2140  do {
2141  if (FormatTok->Tok.is(tok::semi)) {
2142  nextToken();
2143  addUnwrappedLine();
2144  return;
2145  } else if (FormatTok->Tok.is(tok::l_brace)) {
2146  parseBlock(/*MustBeDeclaration=*/false);
2147  addUnwrappedLine();
2148  return;
2149  } else {
2150  nextToken();
2151  }
2152  } while (!eof());
2153 }
2154 
2155 void UnwrappedLineParser::parseObjCProtocolList() {
2156  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2157  do {
2158  nextToken();
2159  // Early exit in case someone forgot a close angle.
2160  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2161  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2162  return;
2163  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2164  nextToken(); // Skip '>'.
2165 }
2166 
2167 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2168  do {
2169  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2170  nextToken();
2171  addUnwrappedLine();
2172  break;
2173  }
2174  if (FormatTok->is(tok::l_brace)) {
2175  parseBlock(/*MustBeDeclaration=*/false);
2176  // In ObjC interfaces, nothing should be following the "}".
2177  addUnwrappedLine();
2178  } else if (FormatTok->is(tok::r_brace)) {
2179  // Ignore stray "}". parseStructuralElement doesn't consume them.
2180  nextToken();
2181  addUnwrappedLine();
2182  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2183  nextToken();
2184  parseObjCMethod();
2185  } else {
2186  parseStructuralElement();
2187  }
2188  } while (!eof());
2189 }
2190 
2191 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2192  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2193  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2194  nextToken();
2195  nextToken(); // interface name
2196 
2197  // @interface can be followed by a lightweight generic
2198  // specialization list, then either a base class or a category.
2199  if (FormatTok->Tok.is(tok::less)) {
2200  // Unlike protocol lists, generic parameterizations support
2201  // nested angles:
2202  //
2203  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2204  // NSObject <NSCopying, NSSecureCoding>
2205  //
2206  // so we need to count how many open angles we have left.
2207  unsigned NumOpenAngles = 1;
2208  do {
2209  nextToken();
2210  // Early exit in case someone forgot a close angle.
2211  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2212  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2213  break;
2214  if (FormatTok->Tok.is(tok::less))
2215  ++NumOpenAngles;
2216  else if (FormatTok->Tok.is(tok::greater)) {
2217  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2218  --NumOpenAngles;
2219  }
2220  } while (!eof() && NumOpenAngles != 0);
2221  nextToken(); // Skip '>'.
2222  }
2223  if (FormatTok->Tok.is(tok::colon)) {
2224  nextToken();
2225  nextToken(); // base class name
2226  } else if (FormatTok->Tok.is(tok::l_paren))
2227  // Skip category, if present.
2228  parseParens();
2229 
2230  if (FormatTok->Tok.is(tok::less))
2231  parseObjCProtocolList();
2232 
2233  if (FormatTok->Tok.is(tok::l_brace)) {
2235  addUnwrappedLine();
2236  parseBlock(/*MustBeDeclaration=*/true);
2237  }
2238 
2239  // With instance variables, this puts '}' on its own line. Without instance
2240  // variables, this ends the @interface line.
2241  addUnwrappedLine();
2242 
2243  parseObjCUntilAtEnd();
2244 }
2245 
2246 // Returns true for the declaration/definition form of @protocol,
2247 // false for the expression form.
2248 bool UnwrappedLineParser::parseObjCProtocol() {
2249  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2250  nextToken();
2251 
2252  if (FormatTok->is(tok::l_paren))
2253  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2254  return false;
2255 
2256  // The definition/declaration form,
2257  // @protocol Foo
2258  // - (int)someMethod;
2259  // @end
2260 
2261  nextToken(); // protocol name
2262 
2263  if (FormatTok->Tok.is(tok::less))
2264  parseObjCProtocolList();
2265 
2266  // Check for protocol declaration.
2267  if (FormatTok->Tok.is(tok::semi)) {
2268  nextToken();
2269  addUnwrappedLine();
2270  return true;
2271  }
2272 
2273  addUnwrappedLine();
2274  parseObjCUntilAtEnd();
2275  return true;
2276 }
2277 
2278 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2279  bool IsImport = FormatTok->is(Keywords.kw_import);
2280  assert(IsImport || FormatTok->is(tok::kw_export));
2281  nextToken();
2282 
2283  // Consume the "default" in "export default class/function".
2284  if (FormatTok->is(tok::kw_default))
2285  nextToken();
2286 
2287  // Consume "async function", "function" and "default function", so that these
2288  // get parsed as free-standing JS functions, i.e. do not require a trailing
2289  // semicolon.
2290  if (FormatTok->is(Keywords.kw_async))
2291  nextToken();
2292  if (FormatTok->is(Keywords.kw_function)) {
2293  nextToken();
2294  return;
2295  }
2296 
2297  // For imports, `export *`, `export {...}`, consume the rest of the line up
2298  // to the terminating `;`. For everything else, just return and continue
2299  // parsing the structural element, i.e. the declaration or expression for
2300  // `export default`.
2301  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2302  !FormatTok->isStringLiteral())
2303  return;
2304 
2305  while (!eof()) {
2306  if (FormatTok->is(tok::semi))
2307  return;
2308  if (Line->Tokens.empty()) {
2309  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2310  // import statement should terminate.
2311  return;
2312  }
2313  if (FormatTok->is(tok::l_brace)) {
2314  FormatTok->BlockKind = BK_Block;
2315  nextToken();
2316  parseBracedList();
2317  } else {
2318  nextToken();
2319  }
2320  }
2321 }
2322 
2323 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2324  StringRef Prefix = "") {
2325  llvm::dbgs() << Prefix << "Line(" << Line.Level
2326  << ", FSC=" << Line.FirstStartColumn << ")"
2327  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2328  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2329  E = Line.Tokens.end();
2330  I != E; ++I) {
2331  llvm::dbgs() << I->Tok->Tok.getName() << "["
2332  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2333  << "] ";
2334  }
2335  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2336  E = Line.Tokens.end();
2337  I != E; ++I) {
2338  const UnwrappedLineNode &Node = *I;
2340  I = Node.Children.begin(),
2341  E = Node.Children.end();
2342  I != E; ++I) {
2343  printDebugInfo(*I, "\nChild: ");
2344  }
2345  }
2346  llvm::dbgs() << "\n";
2347 }
2348 
2349 void UnwrappedLineParser::addUnwrappedLine() {
2350  if (Line->Tokens.empty())
2351  return;
2352  LLVM_DEBUG({
2353  if (CurrentLines == &Lines)
2354  printDebugInfo(*Line);
2355  });
2356  CurrentLines->push_back(std::move(*Line));
2357  Line->Tokens.clear();
2358  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2359  Line->FirstStartColumn = 0;
2360  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2361  CurrentLines->append(
2362  std::make_move_iterator(PreprocessorDirectives.begin()),
2363  std::make_move_iterator(PreprocessorDirectives.end()));
2364  PreprocessorDirectives.clear();
2365  }
2366  // Disconnect the current token from the last token on the previous line.
2367  FormatTok->Previous = nullptr;
2368 }
2369 
2370 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2371 
2372 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2373  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2374  FormatTok.NewlinesBefore > 0;
2375 }
2376 
2377 // Checks if \p FormatTok is a line comment that continues the line comment
2378 // section on \p Line.
2379 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2380  const UnwrappedLine &Line,
2381  llvm::Regex &CommentPragmasRegex) {
2382  if (Line.Tokens.empty())
2383  return false;
2384 
2385  StringRef IndentContent = FormatTok.TokenText;
2386  if (FormatTok.TokenText.startswith("//") ||
2387  FormatTok.TokenText.startswith("/*"))
2388  IndentContent = FormatTok.TokenText.substr(2);
2389  if (CommentPragmasRegex.match(IndentContent))
2390  return false;
2391 
2392  // If Line starts with a line comment, then FormatTok continues the comment
2393  // section if its original column is greater or equal to the original start
2394  // column of the line.
2395  //
2396  // Define the min column token of a line as follows: if a line ends in '{' or
2397  // contains a '{' followed by a line comment, then the min column token is
2398  // that '{'. Otherwise, the min column token of the line is the first token of
2399  // the line.
2400  //
2401  // If Line starts with a token other than a line comment, then FormatTok
2402  // continues the comment section if its original column is greater than the
2403  // original start column of the min column token of the line.
2404  //
2405  // For example, the second line comment continues the first in these cases:
2406  //
2407  // // first line
2408  // // second line
2409  //
2410  // and:
2411  //
2412  // // first line
2413  // // second line
2414  //
2415  // and:
2416  //
2417  // int i; // first line
2418  // // second line
2419  //
2420  // and:
2421  //
2422  // do { // first line
2423  // // second line
2424  // int i;
2425  // } while (true);
2426  //
2427  // and:
2428  //
2429  // enum {
2430  // a, // first line
2431  // // second line
2432  // b
2433  // };
2434  //
2435  // The second line comment doesn't continue the first in these cases:
2436  //
2437  // // first line
2438  // // second line
2439  //
2440  // and:
2441  //
2442  // int i; // first line
2443  // // second line
2444  //
2445  // and:
2446  //
2447  // do { // first line
2448  // // second line
2449  // int i;
2450  // } while (true);
2451  //
2452  // and:
2453  //
2454  // enum {
2455  // a, // first line
2456  // // second line
2457  // };
2458  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2459 
2460  // Scan for '{//'. If found, use the column of '{' as a min column for line
2461  // comment section continuation.
2462  const FormatToken *PreviousToken = nullptr;
2463  for (const UnwrappedLineNode &Node : Line.Tokens) {
2464  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2465  isLineComment(*Node.Tok)) {
2466  MinColumnToken = PreviousToken;
2467  break;
2468  }
2469  PreviousToken = Node.Tok;
2470 
2471  // Grab the last newline preceding a token in this unwrapped line.
2472  if (Node.Tok->NewlinesBefore > 0) {
2473  MinColumnToken = Node.Tok;
2474  }
2475  }
2476  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2477  MinColumnToken = PreviousToken;
2478  }
2479 
2480  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2481  MinColumnToken);
2482 }
2483 
2484 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2485  bool JustComments = Line->Tokens.empty();
2487  I = CommentsBeforeNextToken.begin(),
2488  E = CommentsBeforeNextToken.end();
2489  I != E; ++I) {
2490  // Line comments that belong to the same line comment section are put on the
2491  // same line since later we might want to reflow content between them.
2492  // Additional fine-grained breaking of line comment sections is controlled
2493  // by the class BreakableLineCommentSection in case it is desirable to keep
2494  // several line comment sections in the same unwrapped line.
2495  //
2496  // FIXME: Consider putting separate line comment sections as children to the
2497  // unwrapped line instead.
2499  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2500  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2501  addUnwrappedLine();
2502  pushToken(*I);
2503  }
2504  if (NewlineBeforeNext && JustComments)
2505  addUnwrappedLine();
2506  CommentsBeforeNextToken.clear();
2507 }
2508 
2509 void UnwrappedLineParser::nextToken(int LevelDifference) {
2510  if (eof())
2511  return;
2512  flushComments(isOnNewLine(*FormatTok));
2513  pushToken(FormatTok);
2514  FormatToken *Previous = FormatTok;
2515  if (Style.Language != FormatStyle::LK_JavaScript)
2516  readToken(LevelDifference);
2517  else
2518  readTokenWithJavaScriptASI();
2519  FormatTok->Previous = Previous;
2520 }
2521 
2522 void UnwrappedLineParser::distributeComments(
2523  const SmallVectorImpl<FormatToken *> &Comments,
2524  const FormatToken *NextTok) {
2525  // Whether or not a line comment token continues a line is controlled by
2526  // the method continuesLineCommentSection, with the following caveat:
2527  //
2528  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2529  // that each comment line from the trail is aligned with the next token, if
2530  // the next token exists. If a trail exists, the beginning of the maximal
2531  // trail is marked as a start of a new comment section.
2532  //
2533  // For example in this code:
2534  //
2535  // int a; // line about a
2536  // // line 1 about b
2537  // // line 2 about b
2538  // int b;
2539  //
2540  // the two lines about b form a maximal trail, so there are two sections, the
2541  // first one consisting of the single comment "// line about a" and the
2542  // second one consisting of the next two comments.
2543  if (Comments.empty())
2544  return;
2545  bool ShouldPushCommentsInCurrentLine = true;
2546  bool HasTrailAlignedWithNextToken = false;
2547  unsigned StartOfTrailAlignedWithNextToken = 0;
2548  if (NextTok) {
2549  // We are skipping the first element intentionally.
2550  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2551  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2552  HasTrailAlignedWithNextToken = true;
2553  StartOfTrailAlignedWithNextToken = i;
2554  }
2555  }
2556  }
2557  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2558  FormatToken *FormatTok = Comments[i];
2559  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2560  FormatTok->ContinuesLineCommentSection = false;
2561  } else {
2562  FormatTok->ContinuesLineCommentSection =
2563  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2564  }
2565  if (!FormatTok->ContinuesLineCommentSection &&
2566  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2567  ShouldPushCommentsInCurrentLine = false;
2568  }
2569  if (ShouldPushCommentsInCurrentLine) {
2570  pushToken(FormatTok);
2571  } else {
2572  CommentsBeforeNextToken.push_back(FormatTok);
2573  }
2574  }
2575 }
2576 
2577 void UnwrappedLineParser::readToken(int LevelDifference) {
2579  do {
2580  FormatTok = Tokens->getNextToken();
2581  assert(FormatTok);
2582  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2583  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2584  distributeComments(Comments, FormatTok);
2585  Comments.clear();
2586  // If there is an unfinished unwrapped line, we flush the preprocessor
2587  // directives only after that unwrapped line was finished later.
2588  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2589  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2590  assert((LevelDifference >= 0 ||
2591  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2592  "LevelDifference makes Line->Level negative");
2593  Line->Level += LevelDifference;
2594  // Comments stored before the preprocessor directive need to be output
2595  // before the preprocessor directive, at the same level as the
2596  // preprocessor directive, as we consider them to apply to the directive.
2597  flushComments(isOnNewLine(*FormatTok));
2598  parsePPDirective();
2599  }
2600  while (FormatTok->Type == TT_ConflictStart ||
2601  FormatTok->Type == TT_ConflictEnd ||
2602  FormatTok->Type == TT_ConflictAlternative) {
2603  if (FormatTok->Type == TT_ConflictStart) {
2604  conditionalCompilationStart(/*Unreachable=*/false);
2605  } else if (FormatTok->Type == TT_ConflictAlternative) {
2606  conditionalCompilationAlternative();
2607  } else if (FormatTok->Type == TT_ConflictEnd) {
2608  conditionalCompilationEnd();
2609  }
2610  FormatTok = Tokens->getNextToken();
2611  FormatTok->MustBreakBefore = true;
2612  }
2613 
2614  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2615  !Line->InPPDirective) {
2616  continue;
2617  }
2618 
2619  if (!FormatTok->Tok.is(tok::comment)) {
2620  distributeComments(Comments, FormatTok);
2621  Comments.clear();
2622  return;
2623  }
2624 
2625  Comments.push_back(FormatTok);
2626  } while (!eof());
2627 
2628  distributeComments(Comments, nullptr);
2629  Comments.clear();
2630 }
2631 
2632 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2633  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2634  if (MustBreakBeforeNextToken) {
2635  Line->Tokens.back().Tok->MustBreakBefore = true;
2636  MustBreakBeforeNextToken = false;
2637  }
2638 }
2639 
2640 } // end namespace format
2641 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:728
Indent in all namespaces.
Definition: Format.h:1278
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c.h:60
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:127
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1191
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1070
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:218
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:742
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:159
Does not indent any directives.
Definition: Format.h:1082
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:408
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:1095
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:133
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:376
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:700
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:774
Should be used for Java.
Definition: Format.h:1184
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:293
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:326
static void hash_combine(std::size_t &seed, const T &v)
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1282
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:499
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:319
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1186
ContinuationIndenter * Indenter
const AnnotatedLine * Line
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:823
bool AfterFunction
Wrap function definitions.
Definition: Format.h:680
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
SourceLocation getEnd() const
do v
Definition: arm_acle.h:78
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:310
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:655
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:140
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:171
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:48
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1268
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1201
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:308
Should be used for TableGen code.
Definition: Format.h:1193
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:714
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:331
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:652
Indents directives after the hash.
Definition: Format.h:1091
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:61
Represents a complete lambda introducer.
Definition: DeclSpec.h:2533
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:165
bool AfterClass
Wrap class definitions.
Definition: Format.h:634
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1196
StringRef Text
Definition: Format.cpp:1605
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:287
bool isStringLiteral() const
Definition: FormatToken.h:342
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:696
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:137
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:177
const FormatStyle & Style