clang  7.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  TokenSource = this;
87  Line.Level = 0;
88  Line.InPPDirective = true;
89  }
90 
91  ~ScopedMacroState() override {
92  TokenSource = PreviousTokenSource;
93  ResetToken = Token;
94  Line.InPPDirective = false;
95  Line.Level = PreviousLineLevel;
96  }
97 
98  FormatToken *getNextToken() override {
99  // The \c UnwrappedLineParser guards against this by never calling
100  // \c getNextToken() after it has encountered the first eof token.
101  assert(!eof());
102  PreviousToken = Token;
103  Token = PreviousTokenSource->getNextToken();
104  if (eof())
105  return getFakeEOF();
106  return Token;
107  }
108 
109  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
110 
111  FormatToken *setPosition(unsigned Position) override {
112  PreviousToken = nullptr;
113  Token = PreviousTokenSource->setPosition(Position);
114  return Token;
115  }
116 
117 private:
118  bool eof() {
119  return Token && Token->HasUnescapedNewline &&
120  !continuesLineComment(*Token, PreviousToken,
121  /*MinColumnToken=*/PreviousToken);
122  }
123 
124  FormatToken *getFakeEOF() {
125  static bool EOFInitialized = false;
126  static FormatToken FormatTok;
127  if (!EOFInitialized) {
128  FormatTok.Tok.startToken();
129  FormatTok.Tok.setKind(tok::eof);
130  EOFInitialized = true;
131  }
132  return &FormatTok;
133  }
134 
136  FormatTokenSource *&TokenSource;
137  FormatToken *&ResetToken;
138  unsigned PreviousLineLevel;
139  FormatTokenSource *PreviousTokenSource;
140 
142  FormatToken *PreviousToken;
143 };
144 
145 } // end anonymous namespace
146 
148 public:
150  bool SwitchToPreprocessorLines = false)
151  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
152  if (SwitchToPreprocessorLines)
153  Parser.CurrentLines = &Parser.PreprocessorDirectives;
154  else if (!Parser.Line->Tokens.empty())
155  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
156  PreBlockLine = std::move(Parser.Line);
157  Parser.Line = llvm::make_unique<UnwrappedLine>();
158  Parser.Line->Level = PreBlockLine->Level;
159  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
160  }
161 
163  if (!Parser.Line->Tokens.empty()) {
164  Parser.addUnwrappedLine();
165  }
166  assert(Parser.Line->Tokens.empty());
167  Parser.Line = std::move(PreBlockLine);
168  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
169  Parser.MustBreakBeforeNextToken = true;
170  Parser.CurrentLines = OriginalLines;
171  }
172 
173 private:
175 
176  std::unique_ptr<UnwrappedLine> PreBlockLine;
177  SmallVectorImpl<UnwrappedLine> *OriginalLines;
178 };
179 
181 public:
183  const FormatStyle &Style, unsigned &LineLevel)
184  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
186  Parser->addUnwrappedLine();
187  if (Style.BraceWrapping.IndentBraces)
188  ++LineLevel;
189  }
190  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
191 
192 private:
193  unsigned &LineLevel;
194  unsigned OldLineLevel;
195 };
196 
197 namespace {
198 
199 class IndexedTokenSource : public FormatTokenSource {
200 public:
201  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
202  : Tokens(Tokens), Position(-1) {}
203 
204  FormatToken *getNextToken() override {
205  ++Position;
206  return Tokens[Position];
207  }
208 
209  unsigned getPosition() override {
210  assert(Position >= 0);
211  return Position;
212  }
213 
214  FormatToken *setPosition(unsigned P) override {
215  Position = P;
216  return Tokens[Position];
217  }
218 
219  void reset() { Position = -1; }
220 
221 private:
223  int Position;
224 };
225 
226 } // end anonymous namespace
227 
229  const AdditionalKeywords &Keywords,
230  unsigned FirstStartColumn,
232  UnwrappedLineConsumer &Callback)
233  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
234  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
235  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
236  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
237  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
238  ? IG_Rejected
239  : IG_Inited),
240  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
241 
242 void UnwrappedLineParser::reset() {
243  PPBranchLevel = -1;
244  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
245  ? IG_Rejected
246  : IG_Inited;
247  IncludeGuardToken = nullptr;
248  Line.reset(new UnwrappedLine);
249  CommentsBeforeNextToken.clear();
250  FormatTok = nullptr;
251  MustBreakBeforeNextToken = false;
252  PreprocessorDirectives.clear();
253  CurrentLines = &Lines;
254  DeclarationScopeStack.clear();
255  PPStack.clear();
256  Line->FirstStartColumn = FirstStartColumn;
257 }
258 
260  IndexedTokenSource TokenSource(AllTokens);
261  Line->FirstStartColumn = FirstStartColumn;
262  do {
263  DEBUG(llvm::dbgs() << "----\n");
264  reset();
265  Tokens = &TokenSource;
266  TokenSource.reset();
267 
268  readToken();
269  parseFile();
270 
271  // If we found an include guard then all preprocessor directives (other than
272  // the guard) are over-indented by one.
273  if (IncludeGuard == IG_Found)
274  for (auto &Line : Lines)
275  if (Line.InPPDirective && Line.Level > 0)
276  --Line.Level;
277 
278  // Create line with eof token.
279  pushToken(FormatTok);
280  addUnwrappedLine();
281 
282  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
283  E = Lines.end();
284  I != E; ++I) {
285  Callback.consumeUnwrappedLine(*I);
286  }
287  Callback.finishRun();
288  Lines.clear();
289  while (!PPLevelBranchIndex.empty() &&
290  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
291  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
292  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
293  }
294  if (!PPLevelBranchIndex.empty()) {
295  ++PPLevelBranchIndex.back();
296  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
297  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
298  }
299  } while (!PPLevelBranchIndex.empty());
300 }
301 
302 void UnwrappedLineParser::parseFile() {
303  // The top-level context in a file always has declarations, except for pre-
304  // processor directives and JavaScript files.
305  bool MustBeDeclaration =
306  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
307  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
308  MustBeDeclaration);
309  if (Style.Language == FormatStyle::LK_TextProto)
310  parseBracedList();
311  else
312  parseLevel(/*HasOpeningBrace=*/false);
313  // Make sure to format the remaining tokens.
314  flushComments(true);
315  addUnwrappedLine();
316 }
317 
318 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
319  bool SwitchLabelEncountered = false;
320  do {
321  tok::TokenKind kind = FormatTok->Tok.getKind();
322  if (FormatTok->Type == TT_MacroBlockBegin) {
323  kind = tok::l_brace;
324  } else if (FormatTok->Type == TT_MacroBlockEnd) {
325  kind = tok::r_brace;
326  }
327 
328  switch (kind) {
329  case tok::comment:
330  nextToken();
331  addUnwrappedLine();
332  break;
333  case tok::l_brace:
334  // FIXME: Add parameter whether this can happen - if this happens, we must
335  // be in a non-declaration context.
336  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
337  continue;
338  parseBlock(/*MustBeDeclaration=*/false);
339  addUnwrappedLine();
340  break;
341  case tok::r_brace:
342  if (HasOpeningBrace)
343  return;
344  nextToken();
345  addUnwrappedLine();
346  break;
347  case tok::kw_default: {
348  unsigned StoredPosition = Tokens->getPosition();
349  FormatToken *Next = Tokens->getNextToken();
350  FormatTok = Tokens->setPosition(StoredPosition);
351  if (Next && Next->isNot(tok::colon)) {
352  // default not followed by ':' is not a case label; treat it like
353  // an identifier.
354  parseStructuralElement();
355  break;
356  }
357  // Else, if it is 'default:', fall through to the case handling.
358  LLVM_FALLTHROUGH;
359  }
360  case tok::kw_case:
361  if (Style.Language == FormatStyle::LK_JavaScript &&
362  Line->MustBeDeclaration) {
363  // A 'case: string' style field declaration.
364  parseStructuralElement();
365  break;
366  }
367  if (!SwitchLabelEncountered &&
368  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
369  ++Line->Level;
370  SwitchLabelEncountered = true;
371  parseStructuralElement();
372  break;
373  default:
374  parseStructuralElement();
375  break;
376  }
377  } while (!eof());
378 }
379 
380 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
381  // We'll parse forward through the tokens until we hit
382  // a closing brace or eof - note that getNextToken() will
383  // parse macros, so this will magically work inside macro
384  // definitions, too.
385  unsigned StoredPosition = Tokens->getPosition();
386  FormatToken *Tok = FormatTok;
387  const FormatToken *PrevTok = Tok->Previous;
388  // Keep a stack of positions of lbrace tokens. We will
389  // update information about whether an lbrace starts a
390  // braced init list or a different block during the loop.
391  SmallVector<FormatToken *, 8> LBraceStack;
392  assert(Tok->Tok.is(tok::l_brace));
393  do {
394  // Get next non-comment token.
395  FormatToken *NextTok;
396  unsigned ReadTokens = 0;
397  do {
398  NextTok = Tokens->getNextToken();
399  ++ReadTokens;
400  } while (NextTok->is(tok::comment));
401 
402  switch (Tok->Tok.getKind()) {
403  case tok::l_brace:
404  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
405  if (PrevTok->isOneOf(tok::colon, tok::less))
406  // A ':' indicates this code is in a type, or a braced list
407  // following a label in an object literal ({a: {b: 1}}).
408  // A '<' could be an object used in a comparison, but that is nonsense
409  // code (can never return true), so more likely it is a generic type
410  // argument (`X<{a: string; b: number}>`).
411  // The code below could be confused by semicolons between the
412  // individual members in a type member list, which would normally
413  // trigger BK_Block. In both cases, this must be parsed as an inline
414  // braced init.
415  Tok->BlockKind = BK_BracedInit;
416  else if (PrevTok->is(tok::r_paren))
417  // `) { }` can only occur in function or method declarations in JS.
418  Tok->BlockKind = BK_Block;
419  } else {
420  Tok->BlockKind = BK_Unknown;
421  }
422  LBraceStack.push_back(Tok);
423  break;
424  case tok::r_brace:
425  if (LBraceStack.empty())
426  break;
427  if (LBraceStack.back()->BlockKind == BK_Unknown) {
428  bool ProbablyBracedList = false;
429  if (Style.Language == FormatStyle::LK_Proto) {
430  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
431  } else {
432  // Using OriginalColumn to distinguish between ObjC methods and
433  // binary operators is a bit hacky.
434  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
435  NextTok->OriginalColumn == 0;
436 
437  // If there is a comma, semicolon or right paren after the closing
438  // brace, we assume this is a braced initializer list. Note that
439  // regardless how we mark inner braces here, we will overwrite the
440  // BlockKind later if we parse a braced list (where all blocks
441  // inside are by default braced lists), or when we explicitly detect
442  // blocks (for example while parsing lambdas).
443  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
444  // braced list in JS.
445  ProbablyBracedList =
447  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
448  Keywords.kw_as)) ||
449  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
450  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
451  tok::r_paren, tok::r_square, tok::l_brace,
452  tok::ellipsis) ||
453  (NextTok->is(tok::identifier) &&
454  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
455  (NextTok->is(tok::semi) &&
456  (!ExpectClassBody || LBraceStack.size() != 1)) ||
457  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
458  if (NextTok->is(tok::l_square)) {
459  // We can have an array subscript after a braced init
460  // list, but C++11 attributes are expected after blocks.
461  NextTok = Tokens->getNextToken();
462  ++ReadTokens;
463  ProbablyBracedList = NextTok->isNot(tok::l_square);
464  }
465  }
466  if (ProbablyBracedList) {
467  Tok->BlockKind = BK_BracedInit;
468  LBraceStack.back()->BlockKind = BK_BracedInit;
469  } else {
470  Tok->BlockKind = BK_Block;
471  LBraceStack.back()->BlockKind = BK_Block;
472  }
473  }
474  LBraceStack.pop_back();
475  break;
476  case tok::at:
477  case tok::semi:
478  case tok::kw_if:
479  case tok::kw_while:
480  case tok::kw_for:
481  case tok::kw_switch:
482  case tok::kw_try:
483  case tok::kw___try:
484  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
485  LBraceStack.back()->BlockKind = BK_Block;
486  break;
487  default:
488  break;
489  }
490  PrevTok = Tok;
491  Tok = NextTok;
492  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
493 
494  // Assume other blocks for all unclosed opening braces.
495  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
496  if (LBraceStack[i]->BlockKind == BK_Unknown)
497  LBraceStack[i]->BlockKind = BK_Block;
498  }
499 
500  FormatTok = Tokens->setPosition(StoredPosition);
501 }
502 
503 template <class T>
504 static inline void hash_combine(std::size_t &seed, const T &v) {
505  std::hash<T> hasher;
506  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
507 }
508 
509 size_t UnwrappedLineParser::computePPHash() const {
510  size_t h = 0;
511  for (const auto &i : PPStack) {
512  hash_combine(h, size_t(i.Kind));
513  hash_combine(h, i.Line);
514  }
515  return h;
516 }
517 
518 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
519  bool MunchSemi) {
520  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
521  "'{' or macro block token expected");
522  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
523  FormatTok->BlockKind = BK_Block;
524 
525  size_t PPStartHash = computePPHash();
526 
527  unsigned InitialLevel = Line->Level;
528  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
529 
530  if (MacroBlock && FormatTok->is(tok::l_paren))
531  parseParens();
532 
533  size_t NbPreprocessorDirectives =
534  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
535  addUnwrappedLine();
536  size_t OpeningLineIndex =
537  CurrentLines->empty()
539  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
540 
541  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
542  MustBeDeclaration);
543  if (AddLevel)
544  ++Line->Level;
545  parseLevel(/*HasOpeningBrace=*/true);
546 
547  if (eof())
548  return;
549 
550  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
551  : !FormatTok->is(tok::r_brace)) {
552  Line->Level = InitialLevel;
553  FormatTok->BlockKind = BK_Block;
554  return;
555  }
556 
557  size_t PPEndHash = computePPHash();
558 
559  // Munch the closing brace.
560  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
561 
562  if (MacroBlock && FormatTok->is(tok::l_paren))
563  parseParens();
564 
565  if (MunchSemi && FormatTok->Tok.is(tok::semi))
566  nextToken();
567  Line->Level = InitialLevel;
568 
569  if (PPStartHash == PPEndHash) {
570  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
571  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
572  // Update the opening line to add the forward reference as well
573  (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
574  CurrentLines->size() - 1;
575  }
576  }
577 }
578 
579 static bool isGoogScope(const UnwrappedLine &Line) {
580  // FIXME: Closure-library specific stuff should not be hard-coded but be
581  // configurable.
582  if (Line.Tokens.size() < 4)
583  return false;
584  auto I = Line.Tokens.begin();
585  if (I->Tok->TokenText != "goog")
586  return false;
587  ++I;
588  if (I->Tok->isNot(tok::period))
589  return false;
590  ++I;
591  if (I->Tok->TokenText != "scope")
592  return false;
593  ++I;
594  return I->Tok->is(tok::l_paren);
595 }
596 
597 static bool isIIFE(const UnwrappedLine &Line,
598  const AdditionalKeywords &Keywords) {
599  // Look for the start of an immediately invoked anonymous function.
600  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
601  // This is commonly done in JavaScript to create a new, anonymous scope.
602  // Example: (function() { ... })()
603  if (Line.Tokens.size() < 3)
604  return false;
605  auto I = Line.Tokens.begin();
606  if (I->Tok->isNot(tok::l_paren))
607  return false;
608  ++I;
609  if (I->Tok->isNot(Keywords.kw_function))
610  return false;
611  ++I;
612  return I->Tok->is(tok::l_paren);
613 }
614 
615 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
616  const FormatToken &InitialToken) {
617  if (InitialToken.is(tok::kw_namespace))
618  return Style.BraceWrapping.AfterNamespace;
619  if (InitialToken.is(tok::kw_class))
620  return Style.BraceWrapping.AfterClass;
621  if (InitialToken.is(tok::kw_union))
622  return Style.BraceWrapping.AfterUnion;
623  if (InitialToken.is(tok::kw_struct))
624  return Style.BraceWrapping.AfterStruct;
625  return false;
626 }
627 
628 void UnwrappedLineParser::parseChildBlock() {
629  FormatTok->BlockKind = BK_Block;
630  nextToken();
631  {
632  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
633  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
634  ScopedLineState LineState(*this);
635  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
636  /*MustBeDeclaration=*/false);
637  Line->Level += SkipIndent ? 0 : 1;
638  parseLevel(/*HasOpeningBrace=*/true);
639  flushComments(isOnNewLine(*FormatTok));
640  Line->Level -= SkipIndent ? 0 : 1;
641  }
642  nextToken();
643 }
644 
645 void UnwrappedLineParser::parsePPDirective() {
646  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
647  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
648  nextToken();
649 
650  if (!FormatTok->Tok.getIdentifierInfo()) {
651  parsePPUnknown();
652  return;
653  }
654 
655  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
656  case tok::pp_define:
657  parsePPDefine();
658  return;
659  case tok::pp_if:
660  parsePPIf(/*IfDef=*/false);
661  break;
662  case tok::pp_ifdef:
663  case tok::pp_ifndef:
664  parsePPIf(/*IfDef=*/true);
665  break;
666  case tok::pp_else:
667  parsePPElse();
668  break;
669  case tok::pp_elif:
670  parsePPElIf();
671  break;
672  case tok::pp_endif:
673  parsePPEndIf();
674  break;
675  default:
676  parsePPUnknown();
677  break;
678  }
679 }
680 
681 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
682  size_t Line = CurrentLines->size();
683  if (CurrentLines == &PreprocessorDirectives)
684  Line += Lines.size();
685 
686  if (Unreachable ||
687  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
688  PPStack.push_back({PP_Unreachable, Line});
689  else
690  PPStack.push_back({PP_Conditional, Line});
691 }
692 
693 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
694  ++PPBranchLevel;
695  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
696  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
697  PPLevelBranchIndex.push_back(0);
698  PPLevelBranchCount.push_back(0);
699  }
700  PPChainBranchIndex.push(0);
701  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
702  conditionalCompilationCondition(Unreachable || Skip);
703 }
704 
705 void UnwrappedLineParser::conditionalCompilationAlternative() {
706  if (!PPStack.empty())
707  PPStack.pop_back();
708  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
709  if (!PPChainBranchIndex.empty())
710  ++PPChainBranchIndex.top();
711  conditionalCompilationCondition(
712  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
713  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
714 }
715 
716 void UnwrappedLineParser::conditionalCompilationEnd() {
717  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
718  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
719  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
720  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
721  }
722  }
723  // Guard against #endif's without #if.
724  if (PPBranchLevel > -1)
725  --PPBranchLevel;
726  if (!PPChainBranchIndex.empty())
727  PPChainBranchIndex.pop();
728  if (!PPStack.empty())
729  PPStack.pop_back();
730 }
731 
732 void UnwrappedLineParser::parsePPIf(bool IfDef) {
733  bool IfNDef = FormatTok->is(tok::pp_ifndef);
734  nextToken();
735  bool Unreachable = false;
736  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
737  Unreachable = true;
738  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
739  Unreachable = true;
740  conditionalCompilationStart(Unreachable);
741  FormatToken *IfCondition = FormatTok;
742  // If there's a #ifndef on the first line, and the only lines before it are
743  // comments, it could be an include guard.
744  bool MaybeIncludeGuard = IfNDef;
745  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
746  for (auto &Line : Lines) {
747  if (!Line.Tokens.front().Tok->is(tok::comment)) {
748  MaybeIncludeGuard = false;
749  IncludeGuard = IG_Rejected;
750  break;
751  }
752  }
753  --PPBranchLevel;
754  parsePPUnknown();
755  ++PPBranchLevel;
756  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
757  IncludeGuard = IG_IfNdefed;
758  IncludeGuardToken = IfCondition;
759  }
760 }
761 
762 void UnwrappedLineParser::parsePPElse() {
763  // If a potential include guard has an #else, it's not an include guard.
764  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
765  IncludeGuard = IG_Rejected;
766  conditionalCompilationAlternative();
767  if (PPBranchLevel > -1)
768  --PPBranchLevel;
769  parsePPUnknown();
770  ++PPBranchLevel;
771 }
772 
773 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
774 
775 void UnwrappedLineParser::parsePPEndIf() {
776  conditionalCompilationEnd();
777  parsePPUnknown();
778  // If the #endif of a potential include guard is the last thing in the file,
779  // then we found an include guard.
780  unsigned TokenPosition = Tokens->getPosition();
781  FormatToken *PeekNext = AllTokens[TokenPosition];
782  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
783  PeekNext->is(tok::eof) &&
785  IncludeGuard = IG_Found;
786 }
787 
788 void UnwrappedLineParser::parsePPDefine() {
789  nextToken();
790 
791  if (FormatTok->Tok.getKind() != tok::identifier) {
792  IncludeGuard = IG_Rejected;
793  IncludeGuardToken = nullptr;
794  parsePPUnknown();
795  return;
796  }
797 
798  if (IncludeGuard == IG_IfNdefed &&
799  IncludeGuardToken->TokenText == FormatTok->TokenText) {
800  IncludeGuard = IG_Defined;
801  IncludeGuardToken = nullptr;
802  for (auto &Line : Lines) {
803  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
804  IncludeGuard = IG_Rejected;
805  break;
806  }
807  }
808  }
809 
810  nextToken();
811  if (FormatTok->Tok.getKind() == tok::l_paren &&
812  FormatTok->WhitespaceRange.getBegin() ==
813  FormatTok->WhitespaceRange.getEnd()) {
814  parseParens();
815  }
817  Line->Level += PPBranchLevel + 1;
818  addUnwrappedLine();
819  ++Line->Level;
820 
821  // Errors during a preprocessor directive can only affect the layout of the
822  // preprocessor directive, and thus we ignore them. An alternative approach
823  // would be to use the same approach we use on the file level (no
824  // re-indentation if there was a structural error) within the macro
825  // definition.
826  parseFile();
827 }
828 
829 void UnwrappedLineParser::parsePPUnknown() {
830  do {
831  nextToken();
832  } while (!eof());
834  Line->Level += PPBranchLevel + 1;
835  addUnwrappedLine();
836 }
837 
838 // Here we blacklist certain tokens that are not usually the first token in an
839 // unwrapped line. This is used in attempt to distinguish macro calls without
840 // trailing semicolons from other constructs split to several lines.
841 static bool tokenCanStartNewLine(const clang::Token &Tok) {
842  // Semicolon can be a null-statement, l_square can be a start of a macro or
843  // a C++11 attribute, but this doesn't seem to be common.
844  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
845  Tok.isNot(tok::l_square) &&
846  // Tokens that can only be used as binary operators and a part of
847  // overloaded operator names.
848  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
849  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
850  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
851  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
852  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
853  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
854  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
855  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
856  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
857  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
858  Tok.isNot(tok::lesslessequal) &&
859  // Colon is used in labels, base class lists, initializer lists,
860  // range-based for loops, ternary operator, but should never be the
861  // first token in an unwrapped line.
862  Tok.isNot(tok::colon) &&
863  // 'noexcept' is a trailing annotation.
864  Tok.isNot(tok::kw_noexcept);
865 }
866 
867 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
868  const FormatToken *FormatTok) {
869  // FIXME: This returns true for C/C++ keywords like 'struct'.
870  return FormatTok->is(tok::identifier) &&
871  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
872  !FormatTok->isOneOf(
873  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
874  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
875  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
876  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
877  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
878  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
879  Keywords.kw_from));
880 }
881 
882 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
883  const FormatToken *FormatTok) {
884  return FormatTok->Tok.isLiteral() ||
885  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
886  mustBeJSIdent(Keywords, FormatTok);
887 }
888 
889 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
890 // when encountered after a value (see mustBeJSIdentOrValue).
891 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
892  const FormatToken *FormatTok) {
893  return FormatTok->isOneOf(
894  tok::kw_return, Keywords.kw_yield,
895  // conditionals
896  tok::kw_if, tok::kw_else,
897  // loops
898  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
899  // switch/case
900  tok::kw_switch, tok::kw_case,
901  // exceptions
902  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
903  // declaration
904  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
905  Keywords.kw_async, Keywords.kw_function,
906  // import/export
907  Keywords.kw_import, tok::kw_export);
908 }
909 
910 // readTokenWithJavaScriptASI reads the next token and terminates the current
911 // line if JavaScript Automatic Semicolon Insertion must
912 // happen between the current token and the next token.
913 //
914 // This method is conservative - it cannot cover all edge cases of JavaScript,
915 // but only aims to correctly handle certain well known cases. It *must not*
916 // return true in speculative cases.
917 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
918  FormatToken *Previous = FormatTok;
919  readToken();
920  FormatToken *Next = FormatTok;
921 
922  bool IsOnSameLine =
923  CommentsBeforeNextToken.empty()
924  ? Next->NewlinesBefore == 0
925  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
926  if (IsOnSameLine)
927  return;
928 
929  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
930  bool PreviousStartsTemplateExpr =
931  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
932  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
933  // If the line contains an '@' sign, the previous token might be an
934  // annotation, which can precede another identifier/value.
935  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
936  [](UnwrappedLineNode &LineNode) {
937  return LineNode.Tok->is(tok::at);
938  }) != Line->Tokens.end();
939  if (HasAt)
940  return;
941  }
942  if (Next->is(tok::exclaim) && PreviousMustBeValue)
943  return addUnwrappedLine();
944  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
945  bool NextEndsTemplateExpr =
946  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
947  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
948  (PreviousMustBeValue ||
949  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
950  tok::minusminus)))
951  return addUnwrappedLine();
952  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
953  isJSDeclOrStmt(Keywords, Next))
954  return addUnwrappedLine();
955 }
956 
957 void UnwrappedLineParser::parseStructuralElement() {
958  assert(!FormatTok->is(tok::l_brace));
959  if (Style.Language == FormatStyle::LK_TableGen &&
960  FormatTok->is(tok::pp_include)) {
961  nextToken();
962  if (FormatTok->is(tok::string_literal))
963  nextToken();
964  addUnwrappedLine();
965  return;
966  }
967  switch (FormatTok->Tok.getKind()) {
968  case tok::kw_asm:
969  nextToken();
970  if (FormatTok->is(tok::l_brace)) {
971  FormatTok->Type = TT_InlineASMBrace;
972  nextToken();
973  while (FormatTok && FormatTok->isNot(tok::eof)) {
974  if (FormatTok->is(tok::r_brace)) {
975  FormatTok->Type = TT_InlineASMBrace;
976  nextToken();
977  addUnwrappedLine();
978  break;
979  }
980  FormatTok->Finalized = true;
981  nextToken();
982  }
983  }
984  break;
985  case tok::kw_namespace:
986  parseNamespace();
987  return;
988  case tok::kw_inline:
989  nextToken();
990  if (FormatTok->Tok.is(tok::kw_namespace)) {
991  parseNamespace();
992  return;
993  }
994  break;
995  case tok::kw_public:
996  case tok::kw_protected:
997  case tok::kw_private:
998  if (Style.Language == FormatStyle::LK_Java ||
1000  nextToken();
1001  else
1002  parseAccessSpecifier();
1003  return;
1004  case tok::kw_if:
1005  parseIfThenElse();
1006  return;
1007  case tok::kw_for:
1008  case tok::kw_while:
1009  parseForOrWhileLoop();
1010  return;
1011  case tok::kw_do:
1012  parseDoWhile();
1013  return;
1014  case tok::kw_switch:
1015  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1016  // 'switch: string' field declaration.
1017  break;
1018  parseSwitch();
1019  return;
1020  case tok::kw_default:
1021  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1022  // 'default: string' field declaration.
1023  break;
1024  nextToken();
1025  if (FormatTok->is(tok::colon)) {
1026  parseLabel();
1027  return;
1028  }
1029  // e.g. "default void f() {}" in a Java interface.
1030  break;
1031  case tok::kw_case:
1032  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1033  // 'case: string' field declaration.
1034  break;
1035  parseCaseLabel();
1036  return;
1037  case tok::kw_try:
1038  case tok::kw___try:
1039  parseTryCatch();
1040  return;
1041  case tok::kw_extern:
1042  nextToken();
1043  if (FormatTok->Tok.is(tok::string_literal)) {
1044  nextToken();
1045  if (FormatTok->Tok.is(tok::l_brace)) {
1046  if (Style.BraceWrapping.AfterExternBlock) {
1047  addUnwrappedLine();
1048  parseBlock(/*MustBeDeclaration=*/true);
1049  } else {
1050  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1051  }
1052  addUnwrappedLine();
1053  return;
1054  }
1055  }
1056  break;
1057  case tok::kw_export:
1058  if (Style.Language == FormatStyle::LK_JavaScript) {
1059  parseJavaScriptEs6ImportExport();
1060  return;
1061  }
1062  break;
1063  case tok::identifier:
1064  if (FormatTok->is(TT_ForEachMacro)) {
1065  parseForOrWhileLoop();
1066  return;
1067  }
1068  if (FormatTok->is(TT_MacroBlockBegin)) {
1069  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1070  /*MunchSemi=*/false);
1071  return;
1072  }
1073  if (FormatTok->is(Keywords.kw_import)) {
1074  if (Style.Language == FormatStyle::LK_JavaScript) {
1075  parseJavaScriptEs6ImportExport();
1076  return;
1077  }
1078  if (Style.Language == FormatStyle::LK_Proto) {
1079  nextToken();
1080  if (FormatTok->is(tok::kw_public))
1081  nextToken();
1082  if (!FormatTok->is(tok::string_literal))
1083  return;
1084  nextToken();
1085  if (FormatTok->is(tok::semi))
1086  nextToken();
1087  addUnwrappedLine();
1088  return;
1089  }
1090  }
1091  if (Style.isCpp() &&
1092  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1093  Keywords.kw_slots, Keywords.kw_qslots)) {
1094  nextToken();
1095  if (FormatTok->is(tok::colon)) {
1096  nextToken();
1097  addUnwrappedLine();
1098  return;
1099  }
1100  }
1101  // In all other cases, parse the declaration.
1102  break;
1103  default:
1104  break;
1105  }
1106  do {
1107  const FormatToken *Previous = FormatTok->Previous;
1108  switch (FormatTok->Tok.getKind()) {
1109  case tok::at:
1110  nextToken();
1111  if (FormatTok->Tok.is(tok::l_brace)) {
1112  nextToken();
1113  parseBracedList();
1114  break;
1115  }
1116  switch (FormatTok->Tok.getObjCKeywordID()) {
1117  case tok::objc_public:
1118  case tok::objc_protected:
1119  case tok::objc_package:
1120  case tok::objc_private:
1121  return parseAccessSpecifier();
1122  case tok::objc_interface:
1123  case tok::objc_implementation:
1124  return parseObjCInterfaceOrImplementation();
1125  case tok::objc_protocol:
1126  if (parseObjCProtocol())
1127  return;
1128  break;
1129  case tok::objc_end:
1130  return; // Handled by the caller.
1131  case tok::objc_optional:
1132  case tok::objc_required:
1133  nextToken();
1134  addUnwrappedLine();
1135  return;
1136  case tok::objc_autoreleasepool:
1137  nextToken();
1138  if (FormatTok->Tok.is(tok::l_brace)) {
1140  addUnwrappedLine();
1141  parseBlock(/*MustBeDeclaration=*/false);
1142  }
1143  addUnwrappedLine();
1144  return;
1145  case tok::objc_synchronized:
1146  nextToken();
1147  if (FormatTok->Tok.is(tok::l_paren))
1148  // Skip synchronization object
1149  parseParens();
1150  if (FormatTok->Tok.is(tok::l_brace)) {
1152  addUnwrappedLine();
1153  parseBlock(/*MustBeDeclaration=*/false);
1154  }
1155  addUnwrappedLine();
1156  return;
1157  case tok::objc_try:
1158  // This branch isn't strictly necessary (the kw_try case below would
1159  // do this too after the tok::at is parsed above). But be explicit.
1160  parseTryCatch();
1161  return;
1162  default:
1163  break;
1164  }
1165  break;
1166  case tok::kw_enum:
1167  // Ignore if this is part of "template <enum ...".
1168  if (Previous && Previous->is(tok::less)) {
1169  nextToken();
1170  break;
1171  }
1172 
1173  // parseEnum falls through and does not yet add an unwrapped line as an
1174  // enum definition can start a structural element.
1175  if (!parseEnum())
1176  break;
1177  // This only applies for C++.
1178  if (!Style.isCpp()) {
1179  addUnwrappedLine();
1180  return;
1181  }
1182  break;
1183  case tok::kw_typedef:
1184  nextToken();
1185  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1186  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1187  parseEnum();
1188  break;
1189  case tok::kw_struct:
1190  case tok::kw_union:
1191  case tok::kw_class:
1192  // parseRecord falls through and does not yet add an unwrapped line as a
1193  // record declaration or definition can start a structural element.
1194  parseRecord();
1195  // This does not apply for Java and JavaScript.
1196  if (Style.Language == FormatStyle::LK_Java ||
1198  if (FormatTok->is(tok::semi))
1199  nextToken();
1200  addUnwrappedLine();
1201  return;
1202  }
1203  break;
1204  case tok::period:
1205  nextToken();
1206  // In Java, classes have an implicit static member "class".
1207  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1208  FormatTok->is(tok::kw_class))
1209  nextToken();
1210  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1211  FormatTok->Tok.getIdentifierInfo())
1212  // JavaScript only has pseudo keywords, all keywords are allowed to
1213  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1214  nextToken();
1215  break;
1216  case tok::semi:
1217  nextToken();
1218  addUnwrappedLine();
1219  return;
1220  case tok::r_brace:
1221  addUnwrappedLine();
1222  return;
1223  case tok::l_paren:
1224  parseParens();
1225  break;
1226  case tok::kw_operator:
1227  nextToken();
1228  if (FormatTok->isBinaryOperator())
1229  nextToken();
1230  break;
1231  case tok::caret:
1232  nextToken();
1233  if (FormatTok->Tok.isAnyIdentifier() ||
1234  FormatTok->isSimpleTypeSpecifier())
1235  nextToken();
1236  if (FormatTok->is(tok::l_paren))
1237  parseParens();
1238  if (FormatTok->is(tok::l_brace))
1239  parseChildBlock();
1240  break;
1241  case tok::l_brace:
1242  if (!tryToParseBracedList()) {
1243  // A block outside of parentheses must be the last part of a
1244  // structural element.
1245  // FIXME: Figure out cases where this is not true, and add projections
1246  // for them (the one we know is missing are lambdas).
1247  if (Style.BraceWrapping.AfterFunction)
1248  addUnwrappedLine();
1249  FormatTok->Type = TT_FunctionLBrace;
1250  parseBlock(/*MustBeDeclaration=*/false);
1251  addUnwrappedLine();
1252  return;
1253  }
1254  // Otherwise this was a braced init list, and the structural
1255  // element continues.
1256  break;
1257  case tok::kw_try:
1258  // We arrive here when parsing function-try blocks.
1259  parseTryCatch();
1260  return;
1261  case tok::identifier: {
1262  if (FormatTok->is(TT_MacroBlockEnd)) {
1263  addUnwrappedLine();
1264  return;
1265  }
1266 
1267  // Function declarations (as opposed to function expressions) are parsed
1268  // on their own unwrapped line by continuing this loop. Function
1269  // expressions (functions that are not on their own line) must not create
1270  // a new unwrapped line, so they are special cased below.
1271  size_t TokenCount = Line->Tokens.size();
1272  if (Style.Language == FormatStyle::LK_JavaScript &&
1273  FormatTok->is(Keywords.kw_function) &&
1274  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1275  Keywords.kw_async)))) {
1276  tryToParseJSFunction();
1277  break;
1278  }
1279  if ((Style.Language == FormatStyle::LK_JavaScript ||
1280  Style.Language == FormatStyle::LK_Java) &&
1281  FormatTok->is(Keywords.kw_interface)) {
1282  if (Style.Language == FormatStyle::LK_JavaScript) {
1283  // In JavaScript/TypeScript, "interface" can be used as a standalone
1284  // identifier, e.g. in `var interface = 1;`. If "interface" is
1285  // followed by another identifier, it is very like to be an actual
1286  // interface declaration.
1287  unsigned StoredPosition = Tokens->getPosition();
1288  FormatToken *Next = Tokens->getNextToken();
1289  FormatTok = Tokens->setPosition(StoredPosition);
1290  if (Next && !mustBeJSIdent(Keywords, Next)) {
1291  nextToken();
1292  break;
1293  }
1294  }
1295  parseRecord();
1296  addUnwrappedLine();
1297  return;
1298  }
1299 
1300  // See if the following token should start a new unwrapped line.
1301  StringRef Text = FormatTok->TokenText;
1302  nextToken();
1303  if (Line->Tokens.size() == 1 &&
1304  // JS doesn't have macros, and within classes colons indicate fields,
1305  // not labels.
1307  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1308  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1309  parseLabel();
1310  return;
1311  }
1312  // Recognize function-like macro usages without trailing semicolon as
1313  // well as free-standing macros like Q_OBJECT.
1314  bool FunctionLike = FormatTok->is(tok::l_paren);
1315  if (FunctionLike)
1316  parseParens();
1317 
1318  bool FollowedByNewline =
1319  CommentsBeforeNextToken.empty()
1320  ? FormatTok->NewlinesBefore > 0
1321  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1322 
1323  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1324  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1325  addUnwrappedLine();
1326  return;
1327  }
1328  }
1329  break;
1330  }
1331  case tok::equal:
1332  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1333  // TT_JsFatArrow. The always start an expression or a child block if
1334  // followed by a curly.
1335  if (FormatTok->is(TT_JsFatArrow)) {
1336  nextToken();
1337  if (FormatTok->is(tok::l_brace))
1338  parseChildBlock();
1339  break;
1340  }
1341 
1342  nextToken();
1343  if (FormatTok->Tok.is(tok::l_brace)) {
1344  nextToken();
1345  parseBracedList();
1346  } else if (Style.Language == FormatStyle::LK_Proto &&
1347  FormatTok->Tok.is(tok::less)) {
1348  nextToken();
1349  parseBracedList(/*ContinueOnSemicolons=*/false,
1350  /*ClosingBraceKind=*/tok::greater);
1351  }
1352  break;
1353  case tok::l_square:
1354  parseSquare();
1355  break;
1356  case tok::kw_new:
1357  parseNew();
1358  break;
1359  default:
1360  nextToken();
1361  break;
1362  }
1363  } while (!eof());
1364 }
1365 
1366 bool UnwrappedLineParser::tryToParseLambda() {
1367  if (!Style.isCpp()) {
1368  nextToken();
1369  return false;
1370  }
1371  assert(FormatTok->is(tok::l_square));
1372  FormatToken &LSquare = *FormatTok;
1373  if (!tryToParseLambdaIntroducer())
1374  return false;
1375 
1376  while (FormatTok->isNot(tok::l_brace)) {
1377  if (FormatTok->isSimpleTypeSpecifier()) {
1378  nextToken();
1379  continue;
1380  }
1381  switch (FormatTok->Tok.getKind()) {
1382  case tok::l_brace:
1383  break;
1384  case tok::l_paren:
1385  parseParens();
1386  break;
1387  case tok::amp:
1388  case tok::star:
1389  case tok::kw_const:
1390  case tok::comma:
1391  case tok::less:
1392  case tok::greater:
1393  case tok::identifier:
1394  case tok::numeric_constant:
1395  case tok::coloncolon:
1396  case tok::kw_mutable:
1397  nextToken();
1398  break;
1399  case tok::arrow:
1400  FormatTok->Type = TT_LambdaArrow;
1401  nextToken();
1402  break;
1403  default:
1404  return true;
1405  }
1406  }
1407  LSquare.Type = TT_LambdaLSquare;
1408  parseChildBlock();
1409  return true;
1410 }
1411 
1412 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1413  const FormatToken *Previous = FormatTok->Previous;
1414  if (Previous &&
1415  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1416  tok::kw_delete, tok::l_square) ||
1417  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1418  Previous->isSimpleTypeSpecifier())) {
1419  nextToken();
1420  return false;
1421  }
1422  nextToken();
1423  if (FormatTok->is(tok::l_square)) {
1424  return false;
1425  }
1426  parseSquare(/*LambdaIntroducer=*/true);
1427  return true;
1428 }
1429 
1430 void UnwrappedLineParser::tryToParseJSFunction() {
1431  assert(FormatTok->is(Keywords.kw_function) ||
1432  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1433  if (FormatTok->is(Keywords.kw_async))
1434  nextToken();
1435  // Consume "function".
1436  nextToken();
1437 
1438  // Consume * (generator function). Treat it like C++'s overloaded operators.
1439  if (FormatTok->is(tok::star)) {
1440  FormatTok->Type = TT_OverloadedOperator;
1441  nextToken();
1442  }
1443 
1444  // Consume function name.
1445  if (FormatTok->is(tok::identifier))
1446  nextToken();
1447 
1448  if (FormatTok->isNot(tok::l_paren))
1449  return;
1450 
1451  // Parse formal parameter list.
1452  parseParens();
1453 
1454  if (FormatTok->is(tok::colon)) {
1455  // Parse a type definition.
1456  nextToken();
1457 
1458  // Eat the type declaration. For braced inline object types, balance braces,
1459  // otherwise just parse until finding an l_brace for the function body.
1460  if (FormatTok->is(tok::l_brace))
1461  tryToParseBracedList();
1462  else
1463  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1464  nextToken();
1465  }
1466 
1467  if (FormatTok->is(tok::semi))
1468  return;
1469 
1470  parseChildBlock();
1471 }
1472 
1473 bool UnwrappedLineParser::tryToParseBracedList() {
1474  if (FormatTok->BlockKind == BK_Unknown)
1475  calculateBraceTypes();
1476  assert(FormatTok->BlockKind != BK_Unknown);
1477  if (FormatTok->BlockKind == BK_Block)
1478  return false;
1479  nextToken();
1480  parseBracedList();
1481  return true;
1482 }
1483 
1484 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1485  tok::TokenKind ClosingBraceKind) {
1486  bool HasError = false;
1487 
1488  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1489  // replace this by using parseAssigmentExpression() inside.
1490  do {
1491  if (Style.Language == FormatStyle::LK_JavaScript) {
1492  if (FormatTok->is(Keywords.kw_function) ||
1493  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1494  tryToParseJSFunction();
1495  continue;
1496  }
1497  if (FormatTok->is(TT_JsFatArrow)) {
1498  nextToken();
1499  // Fat arrows can be followed by simple expressions or by child blocks
1500  // in curly braces.
1501  if (FormatTok->is(tok::l_brace)) {
1502  parseChildBlock();
1503  continue;
1504  }
1505  }
1506  if (FormatTok->is(tok::l_brace)) {
1507  // Could be a method inside of a braced list `{a() { return 1; }}`.
1508  if (tryToParseBracedList())
1509  continue;
1510  parseChildBlock();
1511  }
1512  }
1513  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1514  nextToken();
1515  return !HasError;
1516  }
1517  switch (FormatTok->Tok.getKind()) {
1518  case tok::caret:
1519  nextToken();
1520  if (FormatTok->is(tok::l_brace)) {
1521  parseChildBlock();
1522  }
1523  break;
1524  case tok::l_square:
1525  tryToParseLambda();
1526  break;
1527  case tok::l_paren:
1528  parseParens();
1529  // JavaScript can just have free standing methods and getters/setters in
1530  // object literals. Detect them by a "{" following ")".
1531  if (Style.Language == FormatStyle::LK_JavaScript) {
1532  if (FormatTok->is(tok::l_brace))
1533  parseChildBlock();
1534  break;
1535  }
1536  break;
1537  case tok::l_brace:
1538  // Assume there are no blocks inside a braced init list apart
1539  // from the ones we explicitly parse out (like lambdas).
1540  FormatTok->BlockKind = BK_BracedInit;
1541  nextToken();
1542  parseBracedList();
1543  break;
1544  case tok::less:
1545  if (Style.Language == FormatStyle::LK_Proto) {
1546  nextToken();
1547  parseBracedList(/*ContinueOnSemicolons=*/false,
1548  /*ClosingBraceKind=*/tok::greater);
1549  } else {
1550  nextToken();
1551  }
1552  break;
1553  case tok::semi:
1554  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1555  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1556  // used for error recovery if we have otherwise determined that this is
1557  // a braced list.
1558  if (Style.Language == FormatStyle::LK_JavaScript) {
1559  nextToken();
1560  break;
1561  }
1562  HasError = true;
1563  if (!ContinueOnSemicolons)
1564  return !HasError;
1565  nextToken();
1566  break;
1567  case tok::comma:
1568  nextToken();
1569  break;
1570  default:
1571  nextToken();
1572  break;
1573  }
1574  } while (!eof());
1575  return false;
1576 }
1577 
1578 void UnwrappedLineParser::parseParens() {
1579  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1580  nextToken();
1581  do {
1582  switch (FormatTok->Tok.getKind()) {
1583  case tok::l_paren:
1584  parseParens();
1585  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1586  parseChildBlock();
1587  break;
1588  case tok::r_paren:
1589  nextToken();
1590  return;
1591  case tok::r_brace:
1592  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1593  return;
1594  case tok::l_square:
1595  tryToParseLambda();
1596  break;
1597  case tok::l_brace:
1598  if (!tryToParseBracedList())
1599  parseChildBlock();
1600  break;
1601  case tok::at:
1602  nextToken();
1603  if (FormatTok->Tok.is(tok::l_brace)) {
1604  nextToken();
1605  parseBracedList();
1606  }
1607  break;
1608  case tok::kw_class:
1609  if (Style.Language == FormatStyle::LK_JavaScript)
1610  parseRecord(/*ParseAsExpr=*/true);
1611  else
1612  nextToken();
1613  break;
1614  case tok::identifier:
1615  if (Style.Language == FormatStyle::LK_JavaScript &&
1616  (FormatTok->is(Keywords.kw_function) ||
1617  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1618  tryToParseJSFunction();
1619  else
1620  nextToken();
1621  break;
1622  default:
1623  nextToken();
1624  break;
1625  }
1626  } while (!eof());
1627 }
1628 
1629 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1630  if (!LambdaIntroducer) {
1631  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1632  if (tryToParseLambda())
1633  return;
1634  }
1635  do {
1636  switch (FormatTok->Tok.getKind()) {
1637  case tok::l_paren:
1638  parseParens();
1639  break;
1640  case tok::r_square:
1641  nextToken();
1642  return;
1643  case tok::r_brace:
1644  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1645  return;
1646  case tok::l_square:
1647  parseSquare();
1648  break;
1649  case tok::l_brace: {
1650  if (!tryToParseBracedList())
1651  parseChildBlock();
1652  break;
1653  }
1654  case tok::at:
1655  nextToken();
1656  if (FormatTok->Tok.is(tok::l_brace)) {
1657  nextToken();
1658  parseBracedList();
1659  }
1660  break;
1661  default:
1662  nextToken();
1663  break;
1664  }
1665  } while (!eof());
1666 }
1667 
1668 void UnwrappedLineParser::parseIfThenElse() {
1669  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1670  nextToken();
1671  if (FormatTok->Tok.is(tok::kw_constexpr))
1672  nextToken();
1673  if (FormatTok->Tok.is(tok::l_paren))
1674  parseParens();
1675  bool NeedsUnwrappedLine = false;
1676  if (FormatTok->Tok.is(tok::l_brace)) {
1677  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1678  parseBlock(/*MustBeDeclaration=*/false);
1679  if (Style.BraceWrapping.BeforeElse)
1680  addUnwrappedLine();
1681  else
1682  NeedsUnwrappedLine = true;
1683  } else {
1684  addUnwrappedLine();
1685  ++Line->Level;
1686  parseStructuralElement();
1687  --Line->Level;
1688  }
1689  if (FormatTok->Tok.is(tok::kw_else)) {
1690  nextToken();
1691  if (FormatTok->Tok.is(tok::l_brace)) {
1692  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1693  parseBlock(/*MustBeDeclaration=*/false);
1694  addUnwrappedLine();
1695  } else if (FormatTok->Tok.is(tok::kw_if)) {
1696  parseIfThenElse();
1697  } else {
1698  addUnwrappedLine();
1699  ++Line->Level;
1700  parseStructuralElement();
1701  if (FormatTok->is(tok::eof))
1702  addUnwrappedLine();
1703  --Line->Level;
1704  }
1705  } else if (NeedsUnwrappedLine) {
1706  addUnwrappedLine();
1707  }
1708 }
1709 
1710 void UnwrappedLineParser::parseTryCatch() {
1711  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1712  nextToken();
1713  bool NeedsUnwrappedLine = false;
1714  if (FormatTok->is(tok::colon)) {
1715  // We are in a function try block, what comes is an initializer list.
1716  nextToken();
1717  while (FormatTok->is(tok::identifier)) {
1718  nextToken();
1719  if (FormatTok->is(tok::l_paren))
1720  parseParens();
1721  if (FormatTok->is(tok::comma))
1722  nextToken();
1723  }
1724  }
1725  // Parse try with resource.
1726  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1727  parseParens();
1728  }
1729  if (FormatTok->is(tok::l_brace)) {
1730  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1731  parseBlock(/*MustBeDeclaration=*/false);
1732  if (Style.BraceWrapping.BeforeCatch) {
1733  addUnwrappedLine();
1734  } else {
1735  NeedsUnwrappedLine = true;
1736  }
1737  } else if (!FormatTok->is(tok::kw_catch)) {
1738  // The C++ standard requires a compound-statement after a try.
1739  // If there's none, we try to assume there's a structuralElement
1740  // and try to continue.
1741  addUnwrappedLine();
1742  ++Line->Level;
1743  parseStructuralElement();
1744  --Line->Level;
1745  }
1746  while (1) {
1747  if (FormatTok->is(tok::at))
1748  nextToken();
1749  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1750  tok::kw___finally) ||
1751  ((Style.Language == FormatStyle::LK_Java ||
1753  FormatTok->is(Keywords.kw_finally)) ||
1754  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1755  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1756  break;
1757  nextToken();
1758  while (FormatTok->isNot(tok::l_brace)) {
1759  if (FormatTok->is(tok::l_paren)) {
1760  parseParens();
1761  continue;
1762  }
1763  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1764  return;
1765  nextToken();
1766  }
1767  NeedsUnwrappedLine = false;
1768  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1769  parseBlock(/*MustBeDeclaration=*/false);
1770  if (Style.BraceWrapping.BeforeCatch)
1771  addUnwrappedLine();
1772  else
1773  NeedsUnwrappedLine = true;
1774  }
1775  if (NeedsUnwrappedLine)
1776  addUnwrappedLine();
1777 }
1778 
1779 void UnwrappedLineParser::parseNamespace() {
1780  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1781 
1782  const FormatToken &InitialToken = *FormatTok;
1783  nextToken();
1784  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1785  nextToken();
1786  if (FormatTok->Tok.is(tok::l_brace)) {
1787  if (ShouldBreakBeforeBrace(Style, InitialToken))
1788  addUnwrappedLine();
1789 
1790  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1792  DeclarationScopeStack.size() > 1);
1793  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1794  // Munch the semicolon after a namespace. This is more common than one would
1795  // think. Puttin the semicolon into its own line is very ugly.
1796  if (FormatTok->Tok.is(tok::semi))
1797  nextToken();
1798  addUnwrappedLine();
1799  }
1800  // FIXME: Add error handling.
1801 }
1802 
1803 void UnwrappedLineParser::parseNew() {
1804  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1805  nextToken();
1806  if (Style.Language != FormatStyle::LK_Java)
1807  return;
1808 
1809  // In Java, we can parse everything up to the parens, which aren't optional.
1810  do {
1811  // There should not be a ;, { or } before the new's open paren.
1812  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1813  return;
1814 
1815  // Consume the parens.
1816  if (FormatTok->is(tok::l_paren)) {
1817  parseParens();
1818 
1819  // If there is a class body of an anonymous class, consume that as child.
1820  if (FormatTok->is(tok::l_brace))
1821  parseChildBlock();
1822  return;
1823  }
1824  nextToken();
1825  } while (!eof());
1826 }
1827 
1828 void UnwrappedLineParser::parseForOrWhileLoop() {
1829  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1830  "'for', 'while' or foreach macro expected");
1831  nextToken();
1832  // JS' for await ( ...
1833  if (Style.Language == FormatStyle::LK_JavaScript &&
1834  FormatTok->is(Keywords.kw_await))
1835  nextToken();
1836  if (FormatTok->Tok.is(tok::l_paren))
1837  parseParens();
1838  if (FormatTok->Tok.is(tok::l_brace)) {
1839  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1840  parseBlock(/*MustBeDeclaration=*/false);
1841  addUnwrappedLine();
1842  } else {
1843  addUnwrappedLine();
1844  ++Line->Level;
1845  parseStructuralElement();
1846  --Line->Level;
1847  }
1848 }
1849 
1850 void UnwrappedLineParser::parseDoWhile() {
1851  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1852  nextToken();
1853  if (FormatTok->Tok.is(tok::l_brace)) {
1854  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1855  parseBlock(/*MustBeDeclaration=*/false);
1856  if (Style.BraceWrapping.IndentBraces)
1857  addUnwrappedLine();
1858  } else {
1859  addUnwrappedLine();
1860  ++Line->Level;
1861  parseStructuralElement();
1862  --Line->Level;
1863  }
1864 
1865  // FIXME: Add error handling.
1866  if (!FormatTok->Tok.is(tok::kw_while)) {
1867  addUnwrappedLine();
1868  return;
1869  }
1870 
1871  nextToken();
1872  parseStructuralElement();
1873 }
1874 
1875 void UnwrappedLineParser::parseLabel() {
1876  nextToken();
1877  unsigned OldLineLevel = Line->Level;
1878  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1879  --Line->Level;
1880  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1881  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1882  parseBlock(/*MustBeDeclaration=*/false);
1883  if (FormatTok->Tok.is(tok::kw_break)) {
1885  addUnwrappedLine();
1886  parseStructuralElement();
1887  }
1888  addUnwrappedLine();
1889  } else {
1890  if (FormatTok->is(tok::semi))
1891  nextToken();
1892  addUnwrappedLine();
1893  }
1894  Line->Level = OldLineLevel;
1895  if (FormatTok->isNot(tok::l_brace)) {
1896  parseStructuralElement();
1897  addUnwrappedLine();
1898  }
1899 }
1900 
1901 void UnwrappedLineParser::parseCaseLabel() {
1902  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1903  // FIXME: fix handling of complex expressions here.
1904  do {
1905  nextToken();
1906  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1907  parseLabel();
1908 }
1909 
1910 void UnwrappedLineParser::parseSwitch() {
1911  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1912  nextToken();
1913  if (FormatTok->Tok.is(tok::l_paren))
1914  parseParens();
1915  if (FormatTok->Tok.is(tok::l_brace)) {
1916  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1917  parseBlock(/*MustBeDeclaration=*/false);
1918  addUnwrappedLine();
1919  } else {
1920  addUnwrappedLine();
1921  ++Line->Level;
1922  parseStructuralElement();
1923  --Line->Level;
1924  }
1925 }
1926 
1927 void UnwrappedLineParser::parseAccessSpecifier() {
1928  nextToken();
1929  // Understand Qt's slots.
1930  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1931  nextToken();
1932  // Otherwise, we don't know what it is, and we'd better keep the next token.
1933  if (FormatTok->Tok.is(tok::colon))
1934  nextToken();
1935  addUnwrappedLine();
1936 }
1937 
1938 bool UnwrappedLineParser::parseEnum() {
1939  // Won't be 'enum' for NS_ENUMs.
1940  if (FormatTok->Tok.is(tok::kw_enum))
1941  nextToken();
1942 
1943  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1944  // declarations. An "enum" keyword followed by a colon would be a syntax
1945  // error and thus assume it is just an identifier.
1946  if (Style.Language == FormatStyle::LK_JavaScript &&
1947  FormatTok->isOneOf(tok::colon, tok::question))
1948  return false;
1949 
1950  // Eat up enum class ...
1951  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1952  nextToken();
1953 
1954  while (FormatTok->Tok.getIdentifierInfo() ||
1955  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1956  tok::greater, tok::comma, tok::question)) {
1957  nextToken();
1958  // We can have macros or attributes in between 'enum' and the enum name.
1959  if (FormatTok->is(tok::l_paren))
1960  parseParens();
1961  if (FormatTok->is(tok::identifier)) {
1962  nextToken();
1963  // If there are two identifiers in a row, this is likely an elaborate
1964  // return type. In Java, this can be "implements", etc.
1965  if (Style.isCpp() && FormatTok->is(tok::identifier))
1966  return false;
1967  }
1968  }
1969 
1970  // Just a declaration or something is wrong.
1971  if (FormatTok->isNot(tok::l_brace))
1972  return true;
1973  FormatTok->BlockKind = BK_Block;
1974 
1975  if (Style.Language == FormatStyle::LK_Java) {
1976  // Java enums are different.
1977  parseJavaEnumBody();
1978  return true;
1979  }
1980  if (Style.Language == FormatStyle::LK_Proto) {
1981  parseBlock(/*MustBeDeclaration=*/true);
1982  return true;
1983  }
1984 
1985  // Parse enum body.
1986  nextToken();
1987  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1988  if (HasError) {
1989  if (FormatTok->is(tok::semi))
1990  nextToken();
1991  addUnwrappedLine();
1992  }
1993  return true;
1994 
1995  // There is no addUnwrappedLine() here so that we fall through to parsing a
1996  // structural element afterwards. Thus, in "enum A {} n, m;",
1997  // "} n, m;" will end up in one unwrapped line.
1998 }
1999 
2000 void UnwrappedLineParser::parseJavaEnumBody() {
2001  // Determine whether the enum is simple, i.e. does not have a semicolon or
2002  // constants with class bodies. Simple enums can be formatted like braced
2003  // lists, contracted to a single line, etc.
2004  unsigned StoredPosition = Tokens->getPosition();
2005  bool IsSimple = true;
2006  FormatToken *Tok = Tokens->getNextToken();
2007  while (Tok) {
2008  if (Tok->is(tok::r_brace))
2009  break;
2010  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2011  IsSimple = false;
2012  break;
2013  }
2014  // FIXME: This will also mark enums with braces in the arguments to enum
2015  // constants as "not simple". This is probably fine in practice, though.
2016  Tok = Tokens->getNextToken();
2017  }
2018  FormatTok = Tokens->setPosition(StoredPosition);
2019 
2020  if (IsSimple) {
2021  nextToken();
2022  parseBracedList();
2023  addUnwrappedLine();
2024  return;
2025  }
2026 
2027  // Parse the body of a more complex enum.
2028  // First add a line for everything up to the "{".
2029  nextToken();
2030  addUnwrappedLine();
2031  ++Line->Level;
2032 
2033  // Parse the enum constants.
2034  while (FormatTok) {
2035  if (FormatTok->is(tok::l_brace)) {
2036  // Parse the constant's class body.
2037  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2038  /*MunchSemi=*/false);
2039  } else if (FormatTok->is(tok::l_paren)) {
2040  parseParens();
2041  } else if (FormatTok->is(tok::comma)) {
2042  nextToken();
2043  addUnwrappedLine();
2044  } else if (FormatTok->is(tok::semi)) {
2045  nextToken();
2046  addUnwrappedLine();
2047  break;
2048  } else if (FormatTok->is(tok::r_brace)) {
2049  addUnwrappedLine();
2050  break;
2051  } else {
2052  nextToken();
2053  }
2054  }
2055 
2056  // Parse the class body after the enum's ";" if any.
2057  parseLevel(/*HasOpeningBrace=*/true);
2058  nextToken();
2059  --Line->Level;
2060  addUnwrappedLine();
2061 }
2062 
2063 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2064  const FormatToken &InitialToken = *FormatTok;
2065  nextToken();
2066 
2067  // The actual identifier can be a nested name specifier, and in macros
2068  // it is often token-pasted.
2069  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2070  tok::kw___attribute, tok::kw___declspec,
2071  tok::kw_alignas) ||
2072  ((Style.Language == FormatStyle::LK_Java ||
2074  FormatTok->isOneOf(tok::period, tok::comma))) {
2075  if (Style.Language == FormatStyle::LK_JavaScript &&
2076  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2077  // JavaScript/TypeScript supports inline object types in
2078  // extends/implements positions:
2079  // class Foo implements {bar: number} { }
2080  nextToken();
2081  if (FormatTok->is(tok::l_brace)) {
2082  tryToParseBracedList();
2083  continue;
2084  }
2085  }
2086  bool IsNonMacroIdentifier =
2087  FormatTok->is(tok::identifier) &&
2088  FormatTok->TokenText != FormatTok->TokenText.upper();
2089  nextToken();
2090  // We can have macros or attributes in between 'class' and the class name.
2091  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2092  parseParens();
2093  }
2094 
2095  // Note that parsing away template declarations here leads to incorrectly
2096  // accepting function declarations as record declarations.
2097  // In general, we cannot solve this problem. Consider:
2098  // class A<int> B() {}
2099  // which can be a function definition or a class definition when B() is a
2100  // macro. If we find enough real-world cases where this is a problem, we
2101  // can parse for the 'template' keyword in the beginning of the statement,
2102  // and thus rule out the record production in case there is no template
2103  // (this would still leave us with an ambiguity between template function
2104  // and class declarations).
2105  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2106  while (!eof()) {
2107  if (FormatTok->is(tok::l_brace)) {
2108  calculateBraceTypes(/*ExpectClassBody=*/true);
2109  if (!tryToParseBracedList())
2110  break;
2111  }
2112  if (FormatTok->Tok.is(tok::semi))
2113  return;
2114  nextToken();
2115  }
2116  }
2117  if (FormatTok->Tok.is(tok::l_brace)) {
2118  if (ParseAsExpr) {
2119  parseChildBlock();
2120  } else {
2121  if (ShouldBreakBeforeBrace(Style, InitialToken))
2122  addUnwrappedLine();
2123 
2124  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2125  /*MunchSemi=*/false);
2126  }
2127  }
2128  // There is no addUnwrappedLine() here so that we fall through to parsing a
2129  // structural element afterwards. Thus, in "class A {} n, m;",
2130  // "} n, m;" will end up in one unwrapped line.
2131 }
2132 
2133 void UnwrappedLineParser::parseObjCProtocolList() {
2134  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2135  do {
2136  nextToken();
2137  // Early exit in case someone forgot a close angle.
2138  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2139  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2140  return;
2141  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2142  nextToken(); // Skip '>'.
2143 }
2144 
2145 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2146  do {
2147  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2148  nextToken();
2149  addUnwrappedLine();
2150  break;
2151  }
2152  if (FormatTok->is(tok::l_brace)) {
2153  parseBlock(/*MustBeDeclaration=*/false);
2154  // In ObjC interfaces, nothing should be following the "}".
2155  addUnwrappedLine();
2156  } else if (FormatTok->is(tok::r_brace)) {
2157  // Ignore stray "}". parseStructuralElement doesn't consume them.
2158  nextToken();
2159  addUnwrappedLine();
2160  } else {
2161  parseStructuralElement();
2162  }
2163  } while (!eof());
2164 }
2165 
2166 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2167  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2168  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2169  nextToken();
2170  nextToken(); // interface name
2171 
2172  // @interface can be followed by a lightweight generic
2173  // specialization list, then either a base class or a category.
2174  if (FormatTok->Tok.is(tok::less)) {
2175  // Unlike protocol lists, generic parameterizations support
2176  // nested angles:
2177  //
2178  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2179  // NSObject <NSCopying, NSSecureCoding>
2180  //
2181  // so we need to count how many open angles we have left.
2182  unsigned NumOpenAngles = 1;
2183  do {
2184  nextToken();
2185  // Early exit in case someone forgot a close angle.
2186  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2187  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2188  break;
2189  if (FormatTok->Tok.is(tok::less))
2190  ++NumOpenAngles;
2191  else if (FormatTok->Tok.is(tok::greater)) {
2192  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2193  --NumOpenAngles;
2194  }
2195  } while (!eof() && NumOpenAngles != 0);
2196  nextToken(); // Skip '>'.
2197  }
2198  if (FormatTok->Tok.is(tok::colon)) {
2199  nextToken();
2200  nextToken(); // base class name
2201  } else if (FormatTok->Tok.is(tok::l_paren))
2202  // Skip category, if present.
2203  parseParens();
2204 
2205  if (FormatTok->Tok.is(tok::less))
2206  parseObjCProtocolList();
2207 
2208  if (FormatTok->Tok.is(tok::l_brace)) {
2210  addUnwrappedLine();
2211  parseBlock(/*MustBeDeclaration=*/true);
2212  }
2213 
2214  // With instance variables, this puts '}' on its own line. Without instance
2215  // variables, this ends the @interface line.
2216  addUnwrappedLine();
2217 
2218  parseObjCUntilAtEnd();
2219 }
2220 
2221 // Returns true for the declaration/definition form of @protocol,
2222 // false for the expression form.
2223 bool UnwrappedLineParser::parseObjCProtocol() {
2224  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2225  nextToken();
2226 
2227  if (FormatTok->is(tok::l_paren))
2228  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2229  return false;
2230 
2231  // The definition/declaration form,
2232  // @protocol Foo
2233  // - (int)someMethod;
2234  // @end
2235 
2236  nextToken(); // protocol name
2237 
2238  if (FormatTok->Tok.is(tok::less))
2239  parseObjCProtocolList();
2240 
2241  // Check for protocol declaration.
2242  if (FormatTok->Tok.is(tok::semi)) {
2243  nextToken();
2244  addUnwrappedLine();
2245  return true;
2246  }
2247 
2248  addUnwrappedLine();
2249  parseObjCUntilAtEnd();
2250  return true;
2251 }
2252 
2253 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2254  bool IsImport = FormatTok->is(Keywords.kw_import);
2255  assert(IsImport || FormatTok->is(tok::kw_export));
2256  nextToken();
2257 
2258  // Consume the "default" in "export default class/function".
2259  if (FormatTok->is(tok::kw_default))
2260  nextToken();
2261 
2262  // Consume "async function", "function" and "default function", so that these
2263  // get parsed as free-standing JS functions, i.e. do not require a trailing
2264  // semicolon.
2265  if (FormatTok->is(Keywords.kw_async))
2266  nextToken();
2267  if (FormatTok->is(Keywords.kw_function)) {
2268  nextToken();
2269  return;
2270  }
2271 
2272  // For imports, `export *`, `export {...}`, consume the rest of the line up
2273  // to the terminating `;`. For everything else, just return and continue
2274  // parsing the structural element, i.e. the declaration or expression for
2275  // `export default`.
2276  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2277  !FormatTok->isStringLiteral())
2278  return;
2279 
2280  while (!eof()) {
2281  if (FormatTok->is(tok::semi))
2282  return;
2283  if (Line->Tokens.empty()) {
2284  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2285  // import statement should terminate.
2286  return;
2287  }
2288  if (FormatTok->is(tok::l_brace)) {
2289  FormatTok->BlockKind = BK_Block;
2290  nextToken();
2291  parseBracedList();
2292  } else {
2293  nextToken();
2294  }
2295  }
2296 }
2297 
2298 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2299  StringRef Prefix = "") {
2300  llvm::dbgs() << Prefix << "Line(" << Line.Level
2301  << ", FSC=" << Line.FirstStartColumn << ")"
2302  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2303  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2304  E = Line.Tokens.end();
2305  I != E; ++I) {
2306  llvm::dbgs() << I->Tok->Tok.getName() << "["
2307  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2308  << "] ";
2309  }
2310  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2311  E = Line.Tokens.end();
2312  I != E; ++I) {
2313  const UnwrappedLineNode &Node = *I;
2315  I = Node.Children.begin(),
2316  E = Node.Children.end();
2317  I != E; ++I) {
2318  printDebugInfo(*I, "\nChild: ");
2319  }
2320  }
2321  llvm::dbgs() << "\n";
2322 }
2323 
2324 void UnwrappedLineParser::addUnwrappedLine() {
2325  if (Line->Tokens.empty())
2326  return;
2327  DEBUG({
2328  if (CurrentLines == &Lines)
2329  printDebugInfo(*Line);
2330  });
2331  CurrentLines->push_back(std::move(*Line));
2332  Line->Tokens.clear();
2333  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2334  Line->FirstStartColumn = 0;
2335  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2336  CurrentLines->append(
2337  std::make_move_iterator(PreprocessorDirectives.begin()),
2338  std::make_move_iterator(PreprocessorDirectives.end()));
2339  PreprocessorDirectives.clear();
2340  }
2341  // Disconnect the current token from the last token on the previous line.
2342  FormatTok->Previous = nullptr;
2343 }
2344 
2345 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2346 
2347 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2348  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2349  FormatTok.NewlinesBefore > 0;
2350 }
2351 
2352 // Checks if \p FormatTok is a line comment that continues the line comment
2353 // section on \p Line.
2354 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2355  const UnwrappedLine &Line,
2356  llvm::Regex &CommentPragmasRegex) {
2357  if (Line.Tokens.empty())
2358  return false;
2359 
2360  StringRef IndentContent = FormatTok.TokenText;
2361  if (FormatTok.TokenText.startswith("//") ||
2362  FormatTok.TokenText.startswith("/*"))
2363  IndentContent = FormatTok.TokenText.substr(2);
2364  if (CommentPragmasRegex.match(IndentContent))
2365  return false;
2366 
2367  // If Line starts with a line comment, then FormatTok continues the comment
2368  // section if its original column is greater or equal to the original start
2369  // column of the line.
2370  //
2371  // Define the min column token of a line as follows: if a line ends in '{' or
2372  // contains a '{' followed by a line comment, then the min column token is
2373  // that '{'. Otherwise, the min column token of the line is the first token of
2374  // the line.
2375  //
2376  // If Line starts with a token other than a line comment, then FormatTok
2377  // continues the comment section if its original column is greater than the
2378  // original start column of the min column token of the line.
2379  //
2380  // For example, the second line comment continues the first in these cases:
2381  //
2382  // // first line
2383  // // second line
2384  //
2385  // and:
2386  //
2387  // // first line
2388  // // second line
2389  //
2390  // and:
2391  //
2392  // int i; // first line
2393  // // second line
2394  //
2395  // and:
2396  //
2397  // do { // first line
2398  // // second line
2399  // int i;
2400  // } while (true);
2401  //
2402  // and:
2403  //
2404  // enum {
2405  // a, // first line
2406  // // second line
2407  // b
2408  // };
2409  //
2410  // The second line comment doesn't continue the first in these cases:
2411  //
2412  // // first line
2413  // // second line
2414  //
2415  // and:
2416  //
2417  // int i; // first line
2418  // // second line
2419  //
2420  // and:
2421  //
2422  // do { // first line
2423  // // second line
2424  // int i;
2425  // } while (true);
2426  //
2427  // and:
2428  //
2429  // enum {
2430  // a, // first line
2431  // // second line
2432  // };
2433  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2434 
2435  // Scan for '{//'. If found, use the column of '{' as a min column for line
2436  // comment section continuation.
2437  const FormatToken *PreviousToken = nullptr;
2438  for (const UnwrappedLineNode &Node : Line.Tokens) {
2439  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2440  isLineComment(*Node.Tok)) {
2441  MinColumnToken = PreviousToken;
2442  break;
2443  }
2444  PreviousToken = Node.Tok;
2445 
2446  // Grab the last newline preceding a token in this unwrapped line.
2447  if (Node.Tok->NewlinesBefore > 0) {
2448  MinColumnToken = Node.Tok;
2449  }
2450  }
2451  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2452  MinColumnToken = PreviousToken;
2453  }
2454 
2455  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2456  MinColumnToken);
2457 }
2458 
2459 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2460  bool JustComments = Line->Tokens.empty();
2462  I = CommentsBeforeNextToken.begin(),
2463  E = CommentsBeforeNextToken.end();
2464  I != E; ++I) {
2465  // Line comments that belong to the same line comment section are put on the
2466  // same line since later we might want to reflow content between them.
2467  // Additional fine-grained breaking of line comment sections is controlled
2468  // by the class BreakableLineCommentSection in case it is desirable to keep
2469  // several line comment sections in the same unwrapped line.
2470  //
2471  // FIXME: Consider putting separate line comment sections as children to the
2472  // unwrapped line instead.
2474  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2475  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2476  addUnwrappedLine();
2477  pushToken(*I);
2478  }
2479  if (NewlineBeforeNext && JustComments)
2480  addUnwrappedLine();
2481  CommentsBeforeNextToken.clear();
2482 }
2483 
2484 void UnwrappedLineParser::nextToken(int LevelDifference) {
2485  if (eof())
2486  return;
2487  flushComments(isOnNewLine(*FormatTok));
2488  pushToken(FormatTok);
2489  FormatToken *Previous = FormatTok;
2490  if (Style.Language != FormatStyle::LK_JavaScript)
2491  readToken(LevelDifference);
2492  else
2493  readTokenWithJavaScriptASI();
2494  FormatTok->Previous = Previous;
2495 }
2496 
2497 void UnwrappedLineParser::distributeComments(
2498  const SmallVectorImpl<FormatToken *> &Comments,
2499  const FormatToken *NextTok) {
2500  // Whether or not a line comment token continues a line is controlled by
2501  // the method continuesLineCommentSection, with the following caveat:
2502  //
2503  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2504  // that each comment line from the trail is aligned with the next token, if
2505  // the next token exists. If a trail exists, the beginning of the maximal
2506  // trail is marked as a start of a new comment section.
2507  //
2508  // For example in this code:
2509  //
2510  // int a; // line about a
2511  // // line 1 about b
2512  // // line 2 about b
2513  // int b;
2514  //
2515  // the two lines about b form a maximal trail, so there are two sections, the
2516  // first one consisting of the single comment "// line about a" and the
2517  // second one consisting of the next two comments.
2518  if (Comments.empty())
2519  return;
2520  bool ShouldPushCommentsInCurrentLine = true;
2521  bool HasTrailAlignedWithNextToken = false;
2522  unsigned StartOfTrailAlignedWithNextToken = 0;
2523  if (NextTok) {
2524  // We are skipping the first element intentionally.
2525  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2526  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2527  HasTrailAlignedWithNextToken = true;
2528  StartOfTrailAlignedWithNextToken = i;
2529  }
2530  }
2531  }
2532  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2533  FormatToken *FormatTok = Comments[i];
2534  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2535  FormatTok->ContinuesLineCommentSection = false;
2536  } else {
2537  FormatTok->ContinuesLineCommentSection =
2538  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2539  }
2540  if (!FormatTok->ContinuesLineCommentSection &&
2541  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2542  ShouldPushCommentsInCurrentLine = false;
2543  }
2544  if (ShouldPushCommentsInCurrentLine) {
2545  pushToken(FormatTok);
2546  } else {
2547  CommentsBeforeNextToken.push_back(FormatTok);
2548  }
2549  }
2550 }
2551 
2552 void UnwrappedLineParser::readToken(int LevelDifference) {
2554  do {
2555  FormatTok = Tokens->getNextToken();
2556  assert(FormatTok);
2557  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2558  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2559  distributeComments(Comments, FormatTok);
2560  Comments.clear();
2561  // If there is an unfinished unwrapped line, we flush the preprocessor
2562  // directives only after that unwrapped line was finished later.
2563  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2564  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2565  assert((LevelDifference >= 0 ||
2566  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2567  "LevelDifference makes Line->Level negative");
2568  Line->Level += LevelDifference;
2569  // Comments stored before the preprocessor directive need to be output
2570  // before the preprocessor directive, at the same level as the
2571  // preprocessor directive, as we consider them to apply to the directive.
2572  flushComments(isOnNewLine(*FormatTok));
2573  parsePPDirective();
2574  }
2575  while (FormatTok->Type == TT_ConflictStart ||
2576  FormatTok->Type == TT_ConflictEnd ||
2577  FormatTok->Type == TT_ConflictAlternative) {
2578  if (FormatTok->Type == TT_ConflictStart) {
2579  conditionalCompilationStart(/*Unreachable=*/false);
2580  } else if (FormatTok->Type == TT_ConflictAlternative) {
2581  conditionalCompilationAlternative();
2582  } else if (FormatTok->Type == TT_ConflictEnd) {
2583  conditionalCompilationEnd();
2584  }
2585  FormatTok = Tokens->getNextToken();
2586  FormatTok->MustBreakBefore = true;
2587  }
2588 
2589  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2590  !Line->InPPDirective) {
2591  continue;
2592  }
2593 
2594  if (!FormatTok->Tok.is(tok::comment)) {
2595  distributeComments(Comments, FormatTok);
2596  Comments.clear();
2597  return;
2598  }
2599 
2600  Comments.push_back(FormatTok);
2601  } while (!eof());
2602 
2603  distributeComments(Comments, nullptr);
2604  Comments.clear();
2605 }
2606 
2607 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2608  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2609  if (MustBreakBeforeNextToken) {
2610  Line->Tokens.back().Tok->MustBreakBefore = true;
2611  MustBreakBeforeNextToken = false;
2612  }
2613 }
2614 
2615 } // end namespace format
2616 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:696
Indent in all namespaces.
Definition: Format.h:1309
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c.h:60
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:127
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1222
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1101
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:218
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:710
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:159
Does not indent any directives.
Definition: Format.h:1113
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:402
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:1126
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:133
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:370
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:668
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:742
Should be used for Java.
Definition: Format.h:1215
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:287
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:91
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:320
static void hash_combine(std::size_t &seed, const T &v)
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1313
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:493
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:313
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1217
ContinuationIndenter * Indenter
const AnnotatedLine * Line
const FunctionProtoType * T
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:791
bool AfterFunction
Wrap function definitions.
Definition: Format.h:648
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
SourceLocation getEnd() const
do v
Definition: arm_acle.h:78
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:304
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:649
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:140
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:171
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1299
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1232
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:302
Should be used for TableGen code.
Definition: Format.h:1224
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:682
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:325
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:620
Indents directives after the hash.
Definition: Format.h:1122
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:61
Represents a complete lambda introducer.
Definition: DeclSpec.h:2533
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:165
bool AfterClass
Wrap class definitions.
Definition: Format.h:602
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1227
StringRef Text
Definition: Format.cpp:1553
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:281
bool isStringLiteral() const
Definition: FormatToken.h:336
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:664
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:137
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:177
void startToken()
Reset all flags to cleared.
Definition: Token.h:169
const FormatStyle & Style