clang  8.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
29 public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32 
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42  bool MustBeDeclaration)
43  : Line(Line), Stack(Stack) {
44  Line.MustBeDeclaration = MustBeDeclaration;
45  Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48  Stack.pop_back();
49  if (!Stack.empty())
50  Line.MustBeDeclaration = Stack.back();
51  else
52  Line.MustBeDeclaration = true;
53  }
54 
55 private:
57  std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68  const FormatToken *Previous,
69  const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71  return false;
72  unsigned MinContinueColumn =
73  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75  isLineComment(*Previous) &&
76  FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82  FormatToken *&ResetToken)
83  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85  Token(nullptr), PreviousToken(nullptr) {
86  FakeEOF.Tok.startToken();
87  FakeEOF.Tok.setKind(tok::eof);
88  TokenSource = this;
89  Line.Level = 0;
90  Line.InPPDirective = true;
91  }
92 
93  ~ScopedMacroState() override {
94  TokenSource = PreviousTokenSource;
95  ResetToken = Token;
96  Line.InPPDirective = false;
97  Line.Level = PreviousLineLevel;
98  }
99 
100  FormatToken *getNextToken() override {
101  // The \c UnwrappedLineParser guards against this by never calling
102  // \c getNextToken() after it has encountered the first eof token.
103  assert(!eof());
104  PreviousToken = Token;
105  Token = PreviousTokenSource->getNextToken();
106  if (eof())
107  return &FakeEOF;
108  return Token;
109  }
110 
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113  FormatToken *setPosition(unsigned Position) override {
114  PreviousToken = nullptr;
115  Token = PreviousTokenSource->setPosition(Position);
116  return Token;
117  }
118 
119 private:
120  bool eof() {
121  return Token && Token->HasUnescapedNewline &&
122  !continuesLineComment(*Token, PreviousToken,
123  /*MinColumnToken=*/PreviousToken);
124  }
125 
126  FormatToken FakeEOF;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132 
134  FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
140 public:
142  bool SwitchToPreprocessorLines = false)
143  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144  if (SwitchToPreprocessorLines)
145  Parser.CurrentLines = &Parser.PreprocessorDirectives;
146  else if (!Parser.Line->Tokens.empty())
147  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148  PreBlockLine = std::move(Parser.Line);
149  Parser.Line = llvm::make_unique<UnwrappedLine>();
150  Parser.Line->Level = PreBlockLine->Level;
151  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153 
155  if (!Parser.Line->Tokens.empty()) {
156  Parser.addUnwrappedLine();
157  }
158  assert(Parser.Line->Tokens.empty());
159  Parser.Line = std::move(PreBlockLine);
160  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161  Parser.MustBreakBeforeNextToken = true;
162  Parser.CurrentLines = OriginalLines;
163  }
164 
165 private:
167 
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
173 public:
175  const FormatStyle &Style, unsigned &LineLevel)
176  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
178  Parser->addUnwrappedLine();
179  if (Style.BraceWrapping.IndentBraces)
180  ++LineLevel;
181  }
182  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
183 
184 private:
185  unsigned &LineLevel;
186  unsigned OldLineLevel;
187 };
188 
189 namespace {
190 
191 class IndexedTokenSource : public FormatTokenSource {
192 public:
193  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
194  : Tokens(Tokens), Position(-1) {}
195 
196  FormatToken *getNextToken() override {
197  ++Position;
198  return Tokens[Position];
199  }
200 
201  unsigned getPosition() override {
202  assert(Position >= 0);
203  return Position;
204  }
205 
206  FormatToken *setPosition(unsigned P) override {
207  Position = P;
208  return Tokens[Position];
209  }
210 
211  void reset() { Position = -1; }
212 
213 private:
215  int Position;
216 };
217 
218 } // end anonymous namespace
219 
221  const AdditionalKeywords &Keywords,
222  unsigned FirstStartColumn,
224  UnwrappedLineConsumer &Callback)
225  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
226  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
227  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
228  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
229  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
230  ? IG_Rejected
231  : IG_Inited),
232  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
233 
234 void UnwrappedLineParser::reset() {
235  PPBranchLevel = -1;
236  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
237  ? IG_Rejected
238  : IG_Inited;
239  IncludeGuardToken = nullptr;
240  Line.reset(new UnwrappedLine);
241  CommentsBeforeNextToken.clear();
242  FormatTok = nullptr;
243  MustBreakBeforeNextToken = false;
244  PreprocessorDirectives.clear();
245  CurrentLines = &Lines;
246  DeclarationScopeStack.clear();
247  PPStack.clear();
248  Line->FirstStartColumn = FirstStartColumn;
249 }
250 
252  IndexedTokenSource TokenSource(AllTokens);
253  Line->FirstStartColumn = FirstStartColumn;
254  do {
255  LLVM_DEBUG(llvm::dbgs() << "----\n");
256  reset();
257  Tokens = &TokenSource;
258  TokenSource.reset();
259 
260  readToken();
261  parseFile();
262 
263  // If we found an include guard then all preprocessor directives (other than
264  // the guard) are over-indented by one.
265  if (IncludeGuard == IG_Found)
266  for (auto &Line : Lines)
267  if (Line.InPPDirective && Line.Level > 0)
268  --Line.Level;
269 
270  // Create line with eof token.
271  pushToken(FormatTok);
272  addUnwrappedLine();
273 
274  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
275  E = Lines.end();
276  I != E; ++I) {
277  Callback.consumeUnwrappedLine(*I);
278  }
279  Callback.finishRun();
280  Lines.clear();
281  while (!PPLevelBranchIndex.empty() &&
282  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
283  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
284  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
285  }
286  if (!PPLevelBranchIndex.empty()) {
287  ++PPLevelBranchIndex.back();
288  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
289  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
290  }
291  } while (!PPLevelBranchIndex.empty());
292 }
293 
294 void UnwrappedLineParser::parseFile() {
295  // The top-level context in a file always has declarations, except for pre-
296  // processor directives and JavaScript files.
297  bool MustBeDeclaration =
298  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
299  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
300  MustBeDeclaration);
301  if (Style.Language == FormatStyle::LK_TextProto)
302  parseBracedList();
303  else
304  parseLevel(/*HasOpeningBrace=*/false);
305  // Make sure to format the remaining tokens.
306  //
307  // LK_TextProto is special since its top-level is parsed as the body of a
308  // braced list, which does not necessarily have natural line separators such
309  // as a semicolon. Comments after the last entry that have been determined to
310  // not belong to that line, as in:
311  // key: value
312  // // endfile comment
313  // do not have a chance to be put on a line of their own until this point.
314  // Here we add this newline before end-of-file comments.
315  if (Style.Language == FormatStyle::LK_TextProto &&
316  !CommentsBeforeNextToken.empty())
317  addUnwrappedLine();
318  flushComments(true);
319  addUnwrappedLine();
320 }
321 
322 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
323  bool SwitchLabelEncountered = false;
324  do {
325  tok::TokenKind kind = FormatTok->Tok.getKind();
326  if (FormatTok->Type == TT_MacroBlockBegin) {
327  kind = tok::l_brace;
328  } else if (FormatTok->Type == TT_MacroBlockEnd) {
329  kind = tok::r_brace;
330  }
331 
332  switch (kind) {
333  case tok::comment:
334  nextToken();
335  addUnwrappedLine();
336  break;
337  case tok::l_brace:
338  // FIXME: Add parameter whether this can happen - if this happens, we must
339  // be in a non-declaration context.
340  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
341  continue;
342  parseBlock(/*MustBeDeclaration=*/false);
343  addUnwrappedLine();
344  break;
345  case tok::r_brace:
346  if (HasOpeningBrace)
347  return;
348  nextToken();
349  addUnwrappedLine();
350  break;
351  case tok::kw_default: {
352  unsigned StoredPosition = Tokens->getPosition();
353  FormatToken *Next;
354  do {
355  Next = Tokens->getNextToken();
356  } while (Next && Next->is(tok::comment));
357  FormatTok = Tokens->setPosition(StoredPosition);
358  if (Next && Next->isNot(tok::colon)) {
359  // default not followed by ':' is not a case label; treat it like
360  // an identifier.
361  parseStructuralElement();
362  break;
363  }
364  // Else, if it is 'default:', fall through to the case handling.
365  LLVM_FALLTHROUGH;
366  }
367  case tok::kw_case:
368  if (Style.Language == FormatStyle::LK_JavaScript &&
369  Line->MustBeDeclaration) {
370  // A 'case: string' style field declaration.
371  parseStructuralElement();
372  break;
373  }
374  if (!SwitchLabelEncountered &&
375  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
376  ++Line->Level;
377  SwitchLabelEncountered = true;
378  parseStructuralElement();
379  break;
380  default:
381  parseStructuralElement();
382  break;
383  }
384  } while (!eof());
385 }
386 
387 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
388  // We'll parse forward through the tokens until we hit
389  // a closing brace or eof - note that getNextToken() will
390  // parse macros, so this will magically work inside macro
391  // definitions, too.
392  unsigned StoredPosition = Tokens->getPosition();
393  FormatToken *Tok = FormatTok;
394  const FormatToken *PrevTok = Tok->Previous;
395  // Keep a stack of positions of lbrace tokens. We will
396  // update information about whether an lbrace starts a
397  // braced init list or a different block during the loop.
398  SmallVector<FormatToken *, 8> LBraceStack;
399  assert(Tok->Tok.is(tok::l_brace));
400  do {
401  // Get next non-comment token.
402  FormatToken *NextTok;
403  unsigned ReadTokens = 0;
404  do {
405  NextTok = Tokens->getNextToken();
406  ++ReadTokens;
407  } while (NextTok->is(tok::comment));
408 
409  switch (Tok->Tok.getKind()) {
410  case tok::l_brace:
411  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
412  if (PrevTok->isOneOf(tok::colon, tok::less))
413  // A ':' indicates this code is in a type, or a braced list
414  // following a label in an object literal ({a: {b: 1}}).
415  // A '<' could be an object used in a comparison, but that is nonsense
416  // code (can never return true), so more likely it is a generic type
417  // argument (`X<{a: string; b: number}>`).
418  // The code below could be confused by semicolons between the
419  // individual members in a type member list, which would normally
420  // trigger BK_Block. In both cases, this must be parsed as an inline
421  // braced init.
422  Tok->BlockKind = BK_BracedInit;
423  else if (PrevTok->is(tok::r_paren))
424  // `) { }` can only occur in function or method declarations in JS.
425  Tok->BlockKind = BK_Block;
426  } else {
427  Tok->BlockKind = BK_Unknown;
428  }
429  LBraceStack.push_back(Tok);
430  break;
431  case tok::r_brace:
432  if (LBraceStack.empty())
433  break;
434  if (LBraceStack.back()->BlockKind == BK_Unknown) {
435  bool ProbablyBracedList = false;
436  if (Style.Language == FormatStyle::LK_Proto) {
437  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
438  } else {
439  // Using OriginalColumn to distinguish between ObjC methods and
440  // binary operators is a bit hacky.
441  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
442  NextTok->OriginalColumn == 0;
443 
444  // If there is a comma, semicolon or right paren after the closing
445  // brace, we assume this is a braced initializer list. Note that
446  // regardless how we mark inner braces here, we will overwrite the
447  // BlockKind later if we parse a braced list (where all blocks
448  // inside are by default braced lists), or when we explicitly detect
449  // blocks (for example while parsing lambdas).
450  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
451  // braced list in JS.
452  ProbablyBracedList =
454  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
455  Keywords.kw_as)) ||
456  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
457  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
458  tok::r_paren, tok::r_square, tok::l_brace,
459  tok::ellipsis) ||
460  (NextTok->is(tok::identifier) &&
461  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
462  (NextTok->is(tok::semi) &&
463  (!ExpectClassBody || LBraceStack.size() != 1)) ||
464  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
465  if (NextTok->is(tok::l_square)) {
466  // We can have an array subscript after a braced init
467  // list, but C++11 attributes are expected after blocks.
468  NextTok = Tokens->getNextToken();
469  ++ReadTokens;
470  ProbablyBracedList = NextTok->isNot(tok::l_square);
471  }
472  }
473  if (ProbablyBracedList) {
474  Tok->BlockKind = BK_BracedInit;
475  LBraceStack.back()->BlockKind = BK_BracedInit;
476  } else {
477  Tok->BlockKind = BK_Block;
478  LBraceStack.back()->BlockKind = BK_Block;
479  }
480  }
481  LBraceStack.pop_back();
482  break;
483  case tok::at:
484  case tok::semi:
485  case tok::kw_if:
486  case tok::kw_while:
487  case tok::kw_for:
488  case tok::kw_switch:
489  case tok::kw_try:
490  case tok::kw___try:
491  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
492  LBraceStack.back()->BlockKind = BK_Block;
493  break;
494  default:
495  break;
496  }
497  PrevTok = Tok;
498  Tok = NextTok;
499  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
500 
501  // Assume other blocks for all unclosed opening braces.
502  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
503  if (LBraceStack[i]->BlockKind == BK_Unknown)
504  LBraceStack[i]->BlockKind = BK_Block;
505  }
506 
507  FormatTok = Tokens->setPosition(StoredPosition);
508 }
509 
510 template <class T>
511 static inline void hash_combine(std::size_t &seed, const T &v) {
512  std::hash<T> hasher;
513  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
514 }
515 
516 size_t UnwrappedLineParser::computePPHash() const {
517  size_t h = 0;
518  for (const auto &i : PPStack) {
519  hash_combine(h, size_t(i.Kind));
520  hash_combine(h, i.Line);
521  }
522  return h;
523 }
524 
525 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
526  bool MunchSemi) {
527  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
528  "'{' or macro block token expected");
529  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
530  FormatTok->BlockKind = BK_Block;
531 
532  size_t PPStartHash = computePPHash();
533 
534  unsigned InitialLevel = Line->Level;
535  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
536 
537  if (MacroBlock && FormatTok->is(tok::l_paren))
538  parseParens();
539 
540  size_t NbPreprocessorDirectives =
541  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
542  addUnwrappedLine();
543  size_t OpeningLineIndex =
544  CurrentLines->empty()
546  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
547 
548  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
549  MustBeDeclaration);
550  if (AddLevel)
551  ++Line->Level;
552  parseLevel(/*HasOpeningBrace=*/true);
553 
554  if (eof())
555  return;
556 
557  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
558  : !FormatTok->is(tok::r_brace)) {
559  Line->Level = InitialLevel;
560  FormatTok->BlockKind = BK_Block;
561  return;
562  }
563 
564  size_t PPEndHash = computePPHash();
565 
566  // Munch the closing brace.
567  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
568 
569  if (MacroBlock && FormatTok->is(tok::l_paren))
570  parseParens();
571 
572  if (MunchSemi && FormatTok->Tok.is(tok::semi))
573  nextToken();
574  Line->Level = InitialLevel;
575 
576  if (PPStartHash == PPEndHash) {
577  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
578  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
579  // Update the opening line to add the forward reference as well
580  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
581  CurrentLines->size() - 1;
582  }
583  }
584 }
585 
586 static bool isGoogScope(const UnwrappedLine &Line) {
587  // FIXME: Closure-library specific stuff should not be hard-coded but be
588  // configurable.
589  if (Line.Tokens.size() < 4)
590  return false;
591  auto I = Line.Tokens.begin();
592  if (I->Tok->TokenText != "goog")
593  return false;
594  ++I;
595  if (I->Tok->isNot(tok::period))
596  return false;
597  ++I;
598  if (I->Tok->TokenText != "scope")
599  return false;
600  ++I;
601  return I->Tok->is(tok::l_paren);
602 }
603 
604 static bool isIIFE(const UnwrappedLine &Line,
605  const AdditionalKeywords &Keywords) {
606  // Look for the start of an immediately invoked anonymous function.
607  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
608  // This is commonly done in JavaScript to create a new, anonymous scope.
609  // Example: (function() { ... })()
610  if (Line.Tokens.size() < 3)
611  return false;
612  auto I = Line.Tokens.begin();
613  if (I->Tok->isNot(tok::l_paren))
614  return false;
615  ++I;
616  if (I->Tok->isNot(Keywords.kw_function))
617  return false;
618  ++I;
619  return I->Tok->is(tok::l_paren);
620 }
621 
622 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
623  const FormatToken &InitialToken) {
624  if (InitialToken.is(tok::kw_namespace))
625  return Style.BraceWrapping.AfterNamespace;
626  if (InitialToken.is(tok::kw_class))
627  return Style.BraceWrapping.AfterClass;
628  if (InitialToken.is(tok::kw_union))
629  return Style.BraceWrapping.AfterUnion;
630  if (InitialToken.is(tok::kw_struct))
631  return Style.BraceWrapping.AfterStruct;
632  return false;
633 }
634 
635 void UnwrappedLineParser::parseChildBlock() {
636  FormatTok->BlockKind = BK_Block;
637  nextToken();
638  {
639  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
640  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
641  ScopedLineState LineState(*this);
642  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
643  /*MustBeDeclaration=*/false);
644  Line->Level += SkipIndent ? 0 : 1;
645  parseLevel(/*HasOpeningBrace=*/true);
646  flushComments(isOnNewLine(*FormatTok));
647  Line->Level -= SkipIndent ? 0 : 1;
648  }
649  nextToken();
650 }
651 
652 void UnwrappedLineParser::parsePPDirective() {
653  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
654  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
655  nextToken();
656 
657  if (!FormatTok->Tok.getIdentifierInfo()) {
658  parsePPUnknown();
659  return;
660  }
661 
662  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
663  case tok::pp_define:
664  parsePPDefine();
665  return;
666  case tok::pp_if:
667  parsePPIf(/*IfDef=*/false);
668  break;
669  case tok::pp_ifdef:
670  case tok::pp_ifndef:
671  parsePPIf(/*IfDef=*/true);
672  break;
673  case tok::pp_else:
674  parsePPElse();
675  break;
676  case tok::pp_elif:
677  parsePPElIf();
678  break;
679  case tok::pp_endif:
680  parsePPEndIf();
681  break;
682  default:
683  parsePPUnknown();
684  break;
685  }
686 }
687 
688 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
689  size_t Line = CurrentLines->size();
690  if (CurrentLines == &PreprocessorDirectives)
691  Line += Lines.size();
692 
693  if (Unreachable ||
694  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
695  PPStack.push_back({PP_Unreachable, Line});
696  else
697  PPStack.push_back({PP_Conditional, Line});
698 }
699 
700 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
701  ++PPBranchLevel;
702  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
703  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
704  PPLevelBranchIndex.push_back(0);
705  PPLevelBranchCount.push_back(0);
706  }
707  PPChainBranchIndex.push(0);
708  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
709  conditionalCompilationCondition(Unreachable || Skip);
710 }
711 
712 void UnwrappedLineParser::conditionalCompilationAlternative() {
713  if (!PPStack.empty())
714  PPStack.pop_back();
715  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
716  if (!PPChainBranchIndex.empty())
717  ++PPChainBranchIndex.top();
718  conditionalCompilationCondition(
719  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
720  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
721 }
722 
723 void UnwrappedLineParser::conditionalCompilationEnd() {
724  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
725  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
726  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
727  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
728  }
729  }
730  // Guard against #endif's without #if.
731  if (PPBranchLevel > -1)
732  --PPBranchLevel;
733  if (!PPChainBranchIndex.empty())
734  PPChainBranchIndex.pop();
735  if (!PPStack.empty())
736  PPStack.pop_back();
737 }
738 
739 void UnwrappedLineParser::parsePPIf(bool IfDef) {
740  bool IfNDef = FormatTok->is(tok::pp_ifndef);
741  nextToken();
742  bool Unreachable = false;
743  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
744  Unreachable = true;
745  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
746  Unreachable = true;
747  conditionalCompilationStart(Unreachable);
748  FormatToken *IfCondition = FormatTok;
749  // If there's a #ifndef on the first line, and the only lines before it are
750  // comments, it could be an include guard.
751  bool MaybeIncludeGuard = IfNDef;
752  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
753  for (auto &Line : Lines) {
754  if (!Line.Tokens.front().Tok->is(tok::comment)) {
755  MaybeIncludeGuard = false;
756  IncludeGuard = IG_Rejected;
757  break;
758  }
759  }
760  --PPBranchLevel;
761  parsePPUnknown();
762  ++PPBranchLevel;
763  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
764  IncludeGuard = IG_IfNdefed;
765  IncludeGuardToken = IfCondition;
766  }
767 }
768 
769 void UnwrappedLineParser::parsePPElse() {
770  // If a potential include guard has an #else, it's not an include guard.
771  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
772  IncludeGuard = IG_Rejected;
773  conditionalCompilationAlternative();
774  if (PPBranchLevel > -1)
775  --PPBranchLevel;
776  parsePPUnknown();
777  ++PPBranchLevel;
778 }
779 
780 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
781 
782 void UnwrappedLineParser::parsePPEndIf() {
783  conditionalCompilationEnd();
784  parsePPUnknown();
785  // If the #endif of a potential include guard is the last thing in the file,
786  // then we found an include guard.
787  unsigned TokenPosition = Tokens->getPosition();
788  FormatToken *PeekNext = AllTokens[TokenPosition];
789  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
790  PeekNext->is(tok::eof) &&
792  IncludeGuard = IG_Found;
793 }
794 
795 void UnwrappedLineParser::parsePPDefine() {
796  nextToken();
797 
798  if (FormatTok->Tok.getKind() != tok::identifier) {
799  IncludeGuard = IG_Rejected;
800  IncludeGuardToken = nullptr;
801  parsePPUnknown();
802  return;
803  }
804 
805  if (IncludeGuard == IG_IfNdefed &&
806  IncludeGuardToken->TokenText == FormatTok->TokenText) {
807  IncludeGuard = IG_Defined;
808  IncludeGuardToken = nullptr;
809  for (auto &Line : Lines) {
810  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
811  IncludeGuard = IG_Rejected;
812  break;
813  }
814  }
815  }
816 
817  nextToken();
818  if (FormatTok->Tok.getKind() == tok::l_paren &&
819  FormatTok->WhitespaceRange.getBegin() ==
820  FormatTok->WhitespaceRange.getEnd()) {
821  parseParens();
822  }
824  Line->Level += PPBranchLevel + 1;
825  addUnwrappedLine();
826  ++Line->Level;
827 
828  // Errors during a preprocessor directive can only affect the layout of the
829  // preprocessor directive, and thus we ignore them. An alternative approach
830  // would be to use the same approach we use on the file level (no
831  // re-indentation if there was a structural error) within the macro
832  // definition.
833  parseFile();
834 }
835 
836 void UnwrappedLineParser::parsePPUnknown() {
837  do {
838  nextToken();
839  } while (!eof());
841  Line->Level += PPBranchLevel + 1;
842  addUnwrappedLine();
843 }
844 
845 // Here we blacklist certain tokens that are not usually the first token in an
846 // unwrapped line. This is used in attempt to distinguish macro calls without
847 // trailing semicolons from other constructs split to several lines.
848 static bool tokenCanStartNewLine(const clang::Token &Tok) {
849  // Semicolon can be a null-statement, l_square can be a start of a macro or
850  // a C++11 attribute, but this doesn't seem to be common.
851  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
852  Tok.isNot(tok::l_square) &&
853  // Tokens that can only be used as binary operators and a part of
854  // overloaded operator names.
855  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
856  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
857  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
858  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
859  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
860  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
861  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
862  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
863  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
864  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
865  Tok.isNot(tok::lesslessequal) &&
866  // Colon is used in labels, base class lists, initializer lists,
867  // range-based for loops, ternary operator, but should never be the
868  // first token in an unwrapped line.
869  Tok.isNot(tok::colon) &&
870  // 'noexcept' is a trailing annotation.
871  Tok.isNot(tok::kw_noexcept);
872 }
873 
874 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
875  const FormatToken *FormatTok) {
876  // FIXME: This returns true for C/C++ keywords like 'struct'.
877  return FormatTok->is(tok::identifier) &&
878  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
879  !FormatTok->isOneOf(
880  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
881  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
882  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
883  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
884  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
885  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
886  Keywords.kw_from));
887 }
888 
889 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
890  const FormatToken *FormatTok) {
891  return FormatTok->Tok.isLiteral() ||
892  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
893  mustBeJSIdent(Keywords, FormatTok);
894 }
895 
896 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
897 // when encountered after a value (see mustBeJSIdentOrValue).
898 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
899  const FormatToken *FormatTok) {
900  return FormatTok->isOneOf(
901  tok::kw_return, Keywords.kw_yield,
902  // conditionals
903  tok::kw_if, tok::kw_else,
904  // loops
905  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
906  // switch/case
907  tok::kw_switch, tok::kw_case,
908  // exceptions
909  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
910  // declaration
911  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
912  Keywords.kw_async, Keywords.kw_function,
913  // import/export
914  Keywords.kw_import, tok::kw_export);
915 }
916 
917 // readTokenWithJavaScriptASI reads the next token and terminates the current
918 // line if JavaScript Automatic Semicolon Insertion must
919 // happen between the current token and the next token.
920 //
921 // This method is conservative - it cannot cover all edge cases of JavaScript,
922 // but only aims to correctly handle certain well known cases. It *must not*
923 // return true in speculative cases.
924 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
925  FormatToken *Previous = FormatTok;
926  readToken();
927  FormatToken *Next = FormatTok;
928 
929  bool IsOnSameLine =
930  CommentsBeforeNextToken.empty()
931  ? Next->NewlinesBefore == 0
932  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
933  if (IsOnSameLine)
934  return;
935 
936  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
937  bool PreviousStartsTemplateExpr =
938  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
939  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
940  // If the line contains an '@' sign, the previous token might be an
941  // annotation, which can precede another identifier/value.
942  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
943  [](UnwrappedLineNode &LineNode) {
944  return LineNode.Tok->is(tok::at);
945  }) != Line->Tokens.end();
946  if (HasAt)
947  return;
948  }
949  if (Next->is(tok::exclaim) && PreviousMustBeValue)
950  return addUnwrappedLine();
951  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
952  bool NextEndsTemplateExpr =
953  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
954  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
955  (PreviousMustBeValue ||
956  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
957  tok::minusminus)))
958  return addUnwrappedLine();
959  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
960  isJSDeclOrStmt(Keywords, Next))
961  return addUnwrappedLine();
962 }
963 
964 void UnwrappedLineParser::parseStructuralElement() {
965  assert(!FormatTok->is(tok::l_brace));
966  if (Style.Language == FormatStyle::LK_TableGen &&
967  FormatTok->is(tok::pp_include)) {
968  nextToken();
969  if (FormatTok->is(tok::string_literal))
970  nextToken();
971  addUnwrappedLine();
972  return;
973  }
974  switch (FormatTok->Tok.getKind()) {
975  case tok::kw_asm:
976  nextToken();
977  if (FormatTok->is(tok::l_brace)) {
978  FormatTok->Type = TT_InlineASMBrace;
979  nextToken();
980  while (FormatTok && FormatTok->isNot(tok::eof)) {
981  if (FormatTok->is(tok::r_brace)) {
982  FormatTok->Type = TT_InlineASMBrace;
983  nextToken();
984  addUnwrappedLine();
985  break;
986  }
987  FormatTok->Finalized = true;
988  nextToken();
989  }
990  }
991  break;
992  case tok::kw_namespace:
993  parseNamespace();
994  return;
995  case tok::kw_public:
996  case tok::kw_protected:
997  case tok::kw_private:
998  if (Style.Language == FormatStyle::LK_Java ||
1000  nextToken();
1001  else
1002  parseAccessSpecifier();
1003  return;
1004  case tok::kw_if:
1005  parseIfThenElse();
1006  return;
1007  case tok::kw_for:
1008  case tok::kw_while:
1009  parseForOrWhileLoop();
1010  return;
1011  case tok::kw_do:
1012  parseDoWhile();
1013  return;
1014  case tok::kw_switch:
1015  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1016  // 'switch: string' field declaration.
1017  break;
1018  parseSwitch();
1019  return;
1020  case tok::kw_default:
1021  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1022  // 'default: string' field declaration.
1023  break;
1024  nextToken();
1025  if (FormatTok->is(tok::colon)) {
1026  parseLabel();
1027  return;
1028  }
1029  // e.g. "default void f() {}" in a Java interface.
1030  break;
1031  case tok::kw_case:
1032  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1033  // 'case: string' field declaration.
1034  break;
1035  parseCaseLabel();
1036  return;
1037  case tok::kw_try:
1038  case tok::kw___try:
1039  parseTryCatch();
1040  return;
1041  case tok::kw_extern:
1042  nextToken();
1043  if (FormatTok->Tok.is(tok::string_literal)) {
1044  nextToken();
1045  if (FormatTok->Tok.is(tok::l_brace)) {
1046  if (Style.BraceWrapping.AfterExternBlock) {
1047  addUnwrappedLine();
1048  parseBlock(/*MustBeDeclaration=*/true);
1049  } else {
1050  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1051  }
1052  addUnwrappedLine();
1053  return;
1054  }
1055  }
1056  break;
1057  case tok::kw_export:
1058  if (Style.Language == FormatStyle::LK_JavaScript) {
1059  parseJavaScriptEs6ImportExport();
1060  return;
1061  }
1062  if (!Style.isCpp())
1063  break;
1064  // Handle C++ "(inline|export) namespace".
1065  LLVM_FALLTHROUGH;
1066  case tok::kw_inline:
1067  nextToken();
1068  if (FormatTok->Tok.is(tok::kw_namespace)) {
1069  parseNamespace();
1070  return;
1071  }
1072  break;
1073  case tok::identifier:
1074  if (FormatTok->is(TT_ForEachMacro)) {
1075  parseForOrWhileLoop();
1076  return;
1077  }
1078  if (FormatTok->is(TT_MacroBlockBegin)) {
1079  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1080  /*MunchSemi=*/false);
1081  return;
1082  }
1083  if (FormatTok->is(Keywords.kw_import)) {
1084  if (Style.Language == FormatStyle::LK_JavaScript) {
1085  parseJavaScriptEs6ImportExport();
1086  return;
1087  }
1088  if (Style.Language == FormatStyle::LK_Proto) {
1089  nextToken();
1090  if (FormatTok->is(tok::kw_public))
1091  nextToken();
1092  if (!FormatTok->is(tok::string_literal))
1093  return;
1094  nextToken();
1095  if (FormatTok->is(tok::semi))
1096  nextToken();
1097  addUnwrappedLine();
1098  return;
1099  }
1100  }
1101  if (Style.isCpp() &&
1102  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1103  Keywords.kw_slots, Keywords.kw_qslots)) {
1104  nextToken();
1105  if (FormatTok->is(tok::colon)) {
1106  nextToken();
1107  addUnwrappedLine();
1108  return;
1109  }
1110  }
1111  // In all other cases, parse the declaration.
1112  break;
1113  default:
1114  break;
1115  }
1116  do {
1117  const FormatToken *Previous = FormatTok->Previous;
1118  switch (FormatTok->Tok.getKind()) {
1119  case tok::at:
1120  nextToken();
1121  if (FormatTok->Tok.is(tok::l_brace)) {
1122  nextToken();
1123  parseBracedList();
1124  break;
1125  }
1126  switch (FormatTok->Tok.getObjCKeywordID()) {
1127  case tok::objc_public:
1128  case tok::objc_protected:
1129  case tok::objc_package:
1130  case tok::objc_private:
1131  return parseAccessSpecifier();
1132  case tok::objc_interface:
1133  case tok::objc_implementation:
1134  return parseObjCInterfaceOrImplementation();
1135  case tok::objc_protocol:
1136  if (parseObjCProtocol())
1137  return;
1138  break;
1139  case tok::objc_end:
1140  return; // Handled by the caller.
1141  case tok::objc_optional:
1142  case tok::objc_required:
1143  nextToken();
1144  addUnwrappedLine();
1145  return;
1146  case tok::objc_autoreleasepool:
1147  nextToken();
1148  if (FormatTok->Tok.is(tok::l_brace)) {
1150  addUnwrappedLine();
1151  parseBlock(/*MustBeDeclaration=*/false);
1152  }
1153  addUnwrappedLine();
1154  return;
1155  case tok::objc_synchronized:
1156  nextToken();
1157  if (FormatTok->Tok.is(tok::l_paren))
1158  // Skip synchronization object
1159  parseParens();
1160  if (FormatTok->Tok.is(tok::l_brace)) {
1162  addUnwrappedLine();
1163  parseBlock(/*MustBeDeclaration=*/false);
1164  }
1165  addUnwrappedLine();
1166  return;
1167  case tok::objc_try:
1168  // This branch isn't strictly necessary (the kw_try case below would
1169  // do this too after the tok::at is parsed above). But be explicit.
1170  parseTryCatch();
1171  return;
1172  default:
1173  break;
1174  }
1175  break;
1176  case tok::kw_enum:
1177  // Ignore if this is part of "template <enum ...".
1178  if (Previous && Previous->is(tok::less)) {
1179  nextToken();
1180  break;
1181  }
1182 
1183  // parseEnum falls through and does not yet add an unwrapped line as an
1184  // enum definition can start a structural element.
1185  if (!parseEnum())
1186  break;
1187  // This only applies for C++.
1188  if (!Style.isCpp()) {
1189  addUnwrappedLine();
1190  return;
1191  }
1192  break;
1193  case tok::kw_typedef:
1194  nextToken();
1195  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1196  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1197  parseEnum();
1198  break;
1199  case tok::kw_struct:
1200  case tok::kw_union:
1201  case tok::kw_class:
1202  // parseRecord falls through and does not yet add an unwrapped line as a
1203  // record declaration or definition can start a structural element.
1204  parseRecord();
1205  // This does not apply for Java and JavaScript.
1206  if (Style.Language == FormatStyle::LK_Java ||
1208  if (FormatTok->is(tok::semi))
1209  nextToken();
1210  addUnwrappedLine();
1211  return;
1212  }
1213  break;
1214  case tok::period:
1215  nextToken();
1216  // In Java, classes have an implicit static member "class".
1217  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1218  FormatTok->is(tok::kw_class))
1219  nextToken();
1220  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1221  FormatTok->Tok.getIdentifierInfo())
1222  // JavaScript only has pseudo keywords, all keywords are allowed to
1223  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1224  nextToken();
1225  break;
1226  case tok::semi:
1227  nextToken();
1228  addUnwrappedLine();
1229  return;
1230  case tok::r_brace:
1231  addUnwrappedLine();
1232  return;
1233  case tok::l_paren:
1234  parseParens();
1235  break;
1236  case tok::kw_operator:
1237  nextToken();
1238  if (FormatTok->isBinaryOperator())
1239  nextToken();
1240  break;
1241  case tok::caret:
1242  nextToken();
1243  if (FormatTok->Tok.isAnyIdentifier() ||
1244  FormatTok->isSimpleTypeSpecifier())
1245  nextToken();
1246  if (FormatTok->is(tok::l_paren))
1247  parseParens();
1248  if (FormatTok->is(tok::l_brace))
1249  parseChildBlock();
1250  break;
1251  case tok::l_brace:
1252  if (!tryToParseBracedList()) {
1253  // A block outside of parentheses must be the last part of a
1254  // structural element.
1255  // FIXME: Figure out cases where this is not true, and add projections
1256  // for them (the one we know is missing are lambdas).
1257  if (Style.BraceWrapping.AfterFunction)
1258  addUnwrappedLine();
1259  FormatTok->Type = TT_FunctionLBrace;
1260  parseBlock(/*MustBeDeclaration=*/false);
1261  addUnwrappedLine();
1262  return;
1263  }
1264  // Otherwise this was a braced init list, and the structural
1265  // element continues.
1266  break;
1267  case tok::kw_try:
1268  // We arrive here when parsing function-try blocks.
1269  parseTryCatch();
1270  return;
1271  case tok::identifier: {
1272  if (FormatTok->is(TT_MacroBlockEnd)) {
1273  addUnwrappedLine();
1274  return;
1275  }
1276 
1277  // Function declarations (as opposed to function expressions) are parsed
1278  // on their own unwrapped line by continuing this loop. Function
1279  // expressions (functions that are not on their own line) must not create
1280  // a new unwrapped line, so they are special cased below.
1281  size_t TokenCount = Line->Tokens.size();
1282  if (Style.Language == FormatStyle::LK_JavaScript &&
1283  FormatTok->is(Keywords.kw_function) &&
1284  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1285  Keywords.kw_async)))) {
1286  tryToParseJSFunction();
1287  break;
1288  }
1289  if ((Style.Language == FormatStyle::LK_JavaScript ||
1290  Style.Language == FormatStyle::LK_Java) &&
1291  FormatTok->is(Keywords.kw_interface)) {
1292  if (Style.Language == FormatStyle::LK_JavaScript) {
1293  // In JavaScript/TypeScript, "interface" can be used as a standalone
1294  // identifier, e.g. in `var interface = 1;`. If "interface" is
1295  // followed by another identifier, it is very like to be an actual
1296  // interface declaration.
1297  unsigned StoredPosition = Tokens->getPosition();
1298  FormatToken *Next = Tokens->getNextToken();
1299  FormatTok = Tokens->setPosition(StoredPosition);
1300  if (Next && !mustBeJSIdent(Keywords, Next)) {
1301  nextToken();
1302  break;
1303  }
1304  }
1305  parseRecord();
1306  addUnwrappedLine();
1307  return;
1308  }
1309 
1310  // See if the following token should start a new unwrapped line.
1311  StringRef Text = FormatTok->TokenText;
1312  nextToken();
1313  if (Line->Tokens.size() == 1 &&
1314  // JS doesn't have macros, and within classes colons indicate fields,
1315  // not labels.
1317  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1318  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1319  parseLabel();
1320  return;
1321  }
1322  // Recognize function-like macro usages without trailing semicolon as
1323  // well as free-standing macros like Q_OBJECT.
1324  bool FunctionLike = FormatTok->is(tok::l_paren);
1325  if (FunctionLike)
1326  parseParens();
1327 
1328  bool FollowedByNewline =
1329  CommentsBeforeNextToken.empty()
1330  ? FormatTok->NewlinesBefore > 0
1331  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1332 
1333  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1334  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1335  addUnwrappedLine();
1336  return;
1337  }
1338  }
1339  break;
1340  }
1341  case tok::equal:
1342  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1343  // TT_JsFatArrow. The always start an expression or a child block if
1344  // followed by a curly.
1345  if (FormatTok->is(TT_JsFatArrow)) {
1346  nextToken();
1347  if (FormatTok->is(tok::l_brace))
1348  parseChildBlock();
1349  break;
1350  }
1351 
1352  nextToken();
1353  if (FormatTok->Tok.is(tok::l_brace)) {
1354  nextToken();
1355  parseBracedList();
1356  } else if (Style.Language == FormatStyle::LK_Proto &&
1357  FormatTok->Tok.is(tok::less)) {
1358  nextToken();
1359  parseBracedList(/*ContinueOnSemicolons=*/false,
1360  /*ClosingBraceKind=*/tok::greater);
1361  }
1362  break;
1363  case tok::l_square:
1364  parseSquare();
1365  break;
1366  case tok::kw_new:
1367  parseNew();
1368  break;
1369  default:
1370  nextToken();
1371  break;
1372  }
1373  } while (!eof());
1374 }
1375 
1376 bool UnwrappedLineParser::tryToParseLambda() {
1377  if (!Style.isCpp()) {
1378  nextToken();
1379  return false;
1380  }
1381  assert(FormatTok->is(tok::l_square));
1382  FormatToken &LSquare = *FormatTok;
1383  if (!tryToParseLambdaIntroducer())
1384  return false;
1385 
1386  while (FormatTok->isNot(tok::l_brace)) {
1387  if (FormatTok->isSimpleTypeSpecifier()) {
1388  nextToken();
1389  continue;
1390  }
1391  switch (FormatTok->Tok.getKind()) {
1392  case tok::l_brace:
1393  break;
1394  case tok::l_paren:
1395  parseParens();
1396  break;
1397  case tok::amp:
1398  case tok::star:
1399  case tok::kw_const:
1400  case tok::comma:
1401  case tok::less:
1402  case tok::greater:
1403  case tok::identifier:
1404  case tok::numeric_constant:
1405  case tok::coloncolon:
1406  case tok::kw_mutable:
1407  nextToken();
1408  break;
1409  case tok::arrow:
1410  FormatTok->Type = TT_LambdaArrow;
1411  nextToken();
1412  break;
1413  default:
1414  return true;
1415  }
1416  }
1417  LSquare.Type = TT_LambdaLSquare;
1418  parseChildBlock();
1419  return true;
1420 }
1421 
1422 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1423  const FormatToken *Previous = FormatTok->Previous;
1424  if (Previous &&
1425  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1426  tok::kw_delete, tok::l_square) ||
1427  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1428  Previous->isSimpleTypeSpecifier())) {
1429  nextToken();
1430  return false;
1431  }
1432  nextToken();
1433  if (FormatTok->is(tok::l_square)) {
1434  return false;
1435  }
1436  parseSquare(/*LambdaIntroducer=*/true);
1437  return true;
1438 }
1439 
1440 void UnwrappedLineParser::tryToParseJSFunction() {
1441  assert(FormatTok->is(Keywords.kw_function) ||
1442  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1443  if (FormatTok->is(Keywords.kw_async))
1444  nextToken();
1445  // Consume "function".
1446  nextToken();
1447 
1448  // Consume * (generator function). Treat it like C++'s overloaded operators.
1449  if (FormatTok->is(tok::star)) {
1450  FormatTok->Type = TT_OverloadedOperator;
1451  nextToken();
1452  }
1453 
1454  // Consume function name.
1455  if (FormatTok->is(tok::identifier))
1456  nextToken();
1457 
1458  if (FormatTok->isNot(tok::l_paren))
1459  return;
1460 
1461  // Parse formal parameter list.
1462  parseParens();
1463 
1464  if (FormatTok->is(tok::colon)) {
1465  // Parse a type definition.
1466  nextToken();
1467 
1468  // Eat the type declaration. For braced inline object types, balance braces,
1469  // otherwise just parse until finding an l_brace for the function body.
1470  if (FormatTok->is(tok::l_brace))
1471  tryToParseBracedList();
1472  else
1473  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1474  nextToken();
1475  }
1476 
1477  if (FormatTok->is(tok::semi))
1478  return;
1479 
1480  parseChildBlock();
1481 }
1482 
1483 bool UnwrappedLineParser::tryToParseBracedList() {
1484  if (FormatTok->BlockKind == BK_Unknown)
1485  calculateBraceTypes();
1486  assert(FormatTok->BlockKind != BK_Unknown);
1487  if (FormatTok->BlockKind == BK_Block)
1488  return false;
1489  nextToken();
1490  parseBracedList();
1491  return true;
1492 }
1493 
1494 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1495  tok::TokenKind ClosingBraceKind) {
1496  bool HasError = false;
1497 
1498  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1499  // replace this by using parseAssigmentExpression() inside.
1500  do {
1501  if (Style.Language == FormatStyle::LK_JavaScript) {
1502  if (FormatTok->is(Keywords.kw_function) ||
1503  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1504  tryToParseJSFunction();
1505  continue;
1506  }
1507  if (FormatTok->is(TT_JsFatArrow)) {
1508  nextToken();
1509  // Fat arrows can be followed by simple expressions or by child blocks
1510  // in curly braces.
1511  if (FormatTok->is(tok::l_brace)) {
1512  parseChildBlock();
1513  continue;
1514  }
1515  }
1516  if (FormatTok->is(tok::l_brace)) {
1517  // Could be a method inside of a braced list `{a() { return 1; }}`.
1518  if (tryToParseBracedList())
1519  continue;
1520  parseChildBlock();
1521  }
1522  }
1523  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1524  nextToken();
1525  return !HasError;
1526  }
1527  switch (FormatTok->Tok.getKind()) {
1528  case tok::caret:
1529  nextToken();
1530  if (FormatTok->is(tok::l_brace)) {
1531  parseChildBlock();
1532  }
1533  break;
1534  case tok::l_square:
1535  tryToParseLambda();
1536  break;
1537  case tok::l_paren:
1538  parseParens();
1539  // JavaScript can just have free standing methods and getters/setters in
1540  // object literals. Detect them by a "{" following ")".
1541  if (Style.Language == FormatStyle::LK_JavaScript) {
1542  if (FormatTok->is(tok::l_brace))
1543  parseChildBlock();
1544  break;
1545  }
1546  break;
1547  case tok::l_brace:
1548  // Assume there are no blocks inside a braced init list apart
1549  // from the ones we explicitly parse out (like lambdas).
1550  FormatTok->BlockKind = BK_BracedInit;
1551  nextToken();
1552  parseBracedList();
1553  break;
1554  case tok::less:
1555  if (Style.Language == FormatStyle::LK_Proto) {
1556  nextToken();
1557  parseBracedList(/*ContinueOnSemicolons=*/false,
1558  /*ClosingBraceKind=*/tok::greater);
1559  } else {
1560  nextToken();
1561  }
1562  break;
1563  case tok::semi:
1564  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1565  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1566  // used for error recovery if we have otherwise determined that this is
1567  // a braced list.
1568  if (Style.Language == FormatStyle::LK_JavaScript) {
1569  nextToken();
1570  break;
1571  }
1572  HasError = true;
1573  if (!ContinueOnSemicolons)
1574  return !HasError;
1575  nextToken();
1576  break;
1577  case tok::comma:
1578  nextToken();
1579  break;
1580  default:
1581  nextToken();
1582  break;
1583  }
1584  } while (!eof());
1585  return false;
1586 }
1587 
1588 void UnwrappedLineParser::parseParens() {
1589  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1590  nextToken();
1591  do {
1592  switch (FormatTok->Tok.getKind()) {
1593  case tok::l_paren:
1594  parseParens();
1595  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1596  parseChildBlock();
1597  break;
1598  case tok::r_paren:
1599  nextToken();
1600  return;
1601  case tok::r_brace:
1602  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1603  return;
1604  case tok::l_square:
1605  tryToParseLambda();
1606  break;
1607  case tok::l_brace:
1608  if (!tryToParseBracedList())
1609  parseChildBlock();
1610  break;
1611  case tok::at:
1612  nextToken();
1613  if (FormatTok->Tok.is(tok::l_brace)) {
1614  nextToken();
1615  parseBracedList();
1616  }
1617  break;
1618  case tok::kw_class:
1619  if (Style.Language == FormatStyle::LK_JavaScript)
1620  parseRecord(/*ParseAsExpr=*/true);
1621  else
1622  nextToken();
1623  break;
1624  case tok::identifier:
1625  if (Style.Language == FormatStyle::LK_JavaScript &&
1626  (FormatTok->is(Keywords.kw_function) ||
1627  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1628  tryToParseJSFunction();
1629  else
1630  nextToken();
1631  break;
1632  default:
1633  nextToken();
1634  break;
1635  }
1636  } while (!eof());
1637 }
1638 
1639 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1640  if (!LambdaIntroducer) {
1641  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1642  if (tryToParseLambda())
1643  return;
1644  }
1645  do {
1646  switch (FormatTok->Tok.getKind()) {
1647  case tok::l_paren:
1648  parseParens();
1649  break;
1650  case tok::r_square:
1651  nextToken();
1652  return;
1653  case tok::r_brace:
1654  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1655  return;
1656  case tok::l_square:
1657  parseSquare();
1658  break;
1659  case tok::l_brace: {
1660  if (!tryToParseBracedList())
1661  parseChildBlock();
1662  break;
1663  }
1664  case tok::at:
1665  nextToken();
1666  if (FormatTok->Tok.is(tok::l_brace)) {
1667  nextToken();
1668  parseBracedList();
1669  }
1670  break;
1671  default:
1672  nextToken();
1673  break;
1674  }
1675  } while (!eof());
1676 }
1677 
1678 void UnwrappedLineParser::parseIfThenElse() {
1679  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1680  nextToken();
1681  if (FormatTok->Tok.is(tok::kw_constexpr))
1682  nextToken();
1683  if (FormatTok->Tok.is(tok::l_paren))
1684  parseParens();
1685  bool NeedsUnwrappedLine = false;
1686  if (FormatTok->Tok.is(tok::l_brace)) {
1687  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1688  parseBlock(/*MustBeDeclaration=*/false);
1689  if (Style.BraceWrapping.BeforeElse)
1690  addUnwrappedLine();
1691  else
1692  NeedsUnwrappedLine = true;
1693  } else {
1694  addUnwrappedLine();
1695  ++Line->Level;
1696  parseStructuralElement();
1697  --Line->Level;
1698  }
1699  if (FormatTok->Tok.is(tok::kw_else)) {
1700  nextToken();
1701  if (FormatTok->Tok.is(tok::l_brace)) {
1702  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1703  parseBlock(/*MustBeDeclaration=*/false);
1704  addUnwrappedLine();
1705  } else if (FormatTok->Tok.is(tok::kw_if)) {
1706  parseIfThenElse();
1707  } else {
1708  addUnwrappedLine();
1709  ++Line->Level;
1710  parseStructuralElement();
1711  if (FormatTok->is(tok::eof))
1712  addUnwrappedLine();
1713  --Line->Level;
1714  }
1715  } else if (NeedsUnwrappedLine) {
1716  addUnwrappedLine();
1717  }
1718 }
1719 
1720 void UnwrappedLineParser::parseTryCatch() {
1721  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1722  nextToken();
1723  bool NeedsUnwrappedLine = false;
1724  if (FormatTok->is(tok::colon)) {
1725  // We are in a function try block, what comes is an initializer list.
1726  nextToken();
1727  while (FormatTok->is(tok::identifier)) {
1728  nextToken();
1729  if (FormatTok->is(tok::l_paren))
1730  parseParens();
1731  if (FormatTok->is(tok::comma))
1732  nextToken();
1733  }
1734  }
1735  // Parse try with resource.
1736  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1737  parseParens();
1738  }
1739  if (FormatTok->is(tok::l_brace)) {
1740  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1741  parseBlock(/*MustBeDeclaration=*/false);
1742  if (Style.BraceWrapping.BeforeCatch) {
1743  addUnwrappedLine();
1744  } else {
1745  NeedsUnwrappedLine = true;
1746  }
1747  } else if (!FormatTok->is(tok::kw_catch)) {
1748  // The C++ standard requires a compound-statement after a try.
1749  // If there's none, we try to assume there's a structuralElement
1750  // and try to continue.
1751  addUnwrappedLine();
1752  ++Line->Level;
1753  parseStructuralElement();
1754  --Line->Level;
1755  }
1756  while (1) {
1757  if (FormatTok->is(tok::at))
1758  nextToken();
1759  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1760  tok::kw___finally) ||
1761  ((Style.Language == FormatStyle::LK_Java ||
1763  FormatTok->is(Keywords.kw_finally)) ||
1764  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1765  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1766  break;
1767  nextToken();
1768  while (FormatTok->isNot(tok::l_brace)) {
1769  if (FormatTok->is(tok::l_paren)) {
1770  parseParens();
1771  continue;
1772  }
1773  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1774  return;
1775  nextToken();
1776  }
1777  NeedsUnwrappedLine = false;
1778  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1779  parseBlock(/*MustBeDeclaration=*/false);
1780  if (Style.BraceWrapping.BeforeCatch)
1781  addUnwrappedLine();
1782  else
1783  NeedsUnwrappedLine = true;
1784  }
1785  if (NeedsUnwrappedLine)
1786  addUnwrappedLine();
1787 }
1788 
1789 void UnwrappedLineParser::parseNamespace() {
1790  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1791 
1792  const FormatToken &InitialToken = *FormatTok;
1793  nextToken();
1794  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1795  nextToken();
1796  if (FormatTok->Tok.is(tok::l_brace)) {
1797  if (ShouldBreakBeforeBrace(Style, InitialToken))
1798  addUnwrappedLine();
1799 
1800  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1802  DeclarationScopeStack.size() > 1);
1803  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1804  // Munch the semicolon after a namespace. This is more common than one would
1805  // think. Puttin the semicolon into its own line is very ugly.
1806  if (FormatTok->Tok.is(tok::semi))
1807  nextToken();
1808  addUnwrappedLine();
1809  }
1810  // FIXME: Add error handling.
1811 }
1812 
1813 void UnwrappedLineParser::parseNew() {
1814  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1815  nextToken();
1816  if (Style.Language != FormatStyle::LK_Java)
1817  return;
1818 
1819  // In Java, we can parse everything up to the parens, which aren't optional.
1820  do {
1821  // There should not be a ;, { or } before the new's open paren.
1822  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1823  return;
1824 
1825  // Consume the parens.
1826  if (FormatTok->is(tok::l_paren)) {
1827  parseParens();
1828 
1829  // If there is a class body of an anonymous class, consume that as child.
1830  if (FormatTok->is(tok::l_brace))
1831  parseChildBlock();
1832  return;
1833  }
1834  nextToken();
1835  } while (!eof());
1836 }
1837 
1838 void UnwrappedLineParser::parseForOrWhileLoop() {
1839  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1840  "'for', 'while' or foreach macro expected");
1841  nextToken();
1842  // JS' for await ( ...
1843  if (Style.Language == FormatStyle::LK_JavaScript &&
1844  FormatTok->is(Keywords.kw_await))
1845  nextToken();
1846  if (FormatTok->Tok.is(tok::l_paren))
1847  parseParens();
1848  if (FormatTok->Tok.is(tok::l_brace)) {
1849  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1850  parseBlock(/*MustBeDeclaration=*/false);
1851  addUnwrappedLine();
1852  } else {
1853  addUnwrappedLine();
1854  ++Line->Level;
1855  parseStructuralElement();
1856  --Line->Level;
1857  }
1858 }
1859 
1860 void UnwrappedLineParser::parseDoWhile() {
1861  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1862  nextToken();
1863  if (FormatTok->Tok.is(tok::l_brace)) {
1864  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1865  parseBlock(/*MustBeDeclaration=*/false);
1866  if (Style.BraceWrapping.IndentBraces)
1867  addUnwrappedLine();
1868  } else {
1869  addUnwrappedLine();
1870  ++Line->Level;
1871  parseStructuralElement();
1872  --Line->Level;
1873  }
1874 
1875  // FIXME: Add error handling.
1876  if (!FormatTok->Tok.is(tok::kw_while)) {
1877  addUnwrappedLine();
1878  return;
1879  }
1880 
1881  nextToken();
1882  parseStructuralElement();
1883 }
1884 
1885 void UnwrappedLineParser::parseLabel() {
1886  nextToken();
1887  unsigned OldLineLevel = Line->Level;
1888  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1889  --Line->Level;
1890  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1891  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1892  parseBlock(/*MustBeDeclaration=*/false);
1893  if (FormatTok->Tok.is(tok::kw_break)) {
1895  addUnwrappedLine();
1896  parseStructuralElement();
1897  }
1898  addUnwrappedLine();
1899  } else {
1900  if (FormatTok->is(tok::semi))
1901  nextToken();
1902  addUnwrappedLine();
1903  }
1904  Line->Level = OldLineLevel;
1905  if (FormatTok->isNot(tok::l_brace)) {
1906  parseStructuralElement();
1907  addUnwrappedLine();
1908  }
1909 }
1910 
1911 void UnwrappedLineParser::parseCaseLabel() {
1912  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1913  // FIXME: fix handling of complex expressions here.
1914  do {
1915  nextToken();
1916  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1917  parseLabel();
1918 }
1919 
1920 void UnwrappedLineParser::parseSwitch() {
1921  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1922  nextToken();
1923  if (FormatTok->Tok.is(tok::l_paren))
1924  parseParens();
1925  if (FormatTok->Tok.is(tok::l_brace)) {
1926  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1927  parseBlock(/*MustBeDeclaration=*/false);
1928  addUnwrappedLine();
1929  } else {
1930  addUnwrappedLine();
1931  ++Line->Level;
1932  parseStructuralElement();
1933  --Line->Level;
1934  }
1935 }
1936 
1937 void UnwrappedLineParser::parseAccessSpecifier() {
1938  nextToken();
1939  // Understand Qt's slots.
1940  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1941  nextToken();
1942  // Otherwise, we don't know what it is, and we'd better keep the next token.
1943  if (FormatTok->Tok.is(tok::colon))
1944  nextToken();
1945  addUnwrappedLine();
1946 }
1947 
1948 bool UnwrappedLineParser::parseEnum() {
1949  // Won't be 'enum' for NS_ENUMs.
1950  if (FormatTok->Tok.is(tok::kw_enum))
1951  nextToken();
1952 
1953  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1954  // declarations. An "enum" keyword followed by a colon would be a syntax
1955  // error and thus assume it is just an identifier.
1956  if (Style.Language == FormatStyle::LK_JavaScript &&
1957  FormatTok->isOneOf(tok::colon, tok::question))
1958  return false;
1959 
1960  // Eat up enum class ...
1961  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1962  nextToken();
1963 
1964  while (FormatTok->Tok.getIdentifierInfo() ||
1965  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1966  tok::greater, tok::comma, tok::question)) {
1967  nextToken();
1968  // We can have macros or attributes in between 'enum' and the enum name.
1969  if (FormatTok->is(tok::l_paren))
1970  parseParens();
1971  if (FormatTok->is(tok::identifier)) {
1972  nextToken();
1973  // If there are two identifiers in a row, this is likely an elaborate
1974  // return type. In Java, this can be "implements", etc.
1975  if (Style.isCpp() && FormatTok->is(tok::identifier))
1976  return false;
1977  }
1978  }
1979 
1980  // Just a declaration or something is wrong.
1981  if (FormatTok->isNot(tok::l_brace))
1982  return true;
1983  FormatTok->BlockKind = BK_Block;
1984 
1985  if (Style.Language == FormatStyle::LK_Java) {
1986  // Java enums are different.
1987  parseJavaEnumBody();
1988  return true;
1989  }
1990  if (Style.Language == FormatStyle::LK_Proto) {
1991  parseBlock(/*MustBeDeclaration=*/true);
1992  return true;
1993  }
1994 
1995  // Parse enum body.
1996  nextToken();
1997  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1998  if (HasError) {
1999  if (FormatTok->is(tok::semi))
2000  nextToken();
2001  addUnwrappedLine();
2002  }
2003  return true;
2004 
2005  // There is no addUnwrappedLine() here so that we fall through to parsing a
2006  // structural element afterwards. Thus, in "enum A {} n, m;",
2007  // "} n, m;" will end up in one unwrapped line.
2008 }
2009 
2010 void UnwrappedLineParser::parseJavaEnumBody() {
2011  // Determine whether the enum is simple, i.e. does not have a semicolon or
2012  // constants with class bodies. Simple enums can be formatted like braced
2013  // lists, contracted to a single line, etc.
2014  unsigned StoredPosition = Tokens->getPosition();
2015  bool IsSimple = true;
2016  FormatToken *Tok = Tokens->getNextToken();
2017  while (Tok) {
2018  if (Tok->is(tok::r_brace))
2019  break;
2020  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2021  IsSimple = false;
2022  break;
2023  }
2024  // FIXME: This will also mark enums with braces in the arguments to enum
2025  // constants as "not simple". This is probably fine in practice, though.
2026  Tok = Tokens->getNextToken();
2027  }
2028  FormatTok = Tokens->setPosition(StoredPosition);
2029 
2030  if (IsSimple) {
2031  nextToken();
2032  parseBracedList();
2033  addUnwrappedLine();
2034  return;
2035  }
2036 
2037  // Parse the body of a more complex enum.
2038  // First add a line for everything up to the "{".
2039  nextToken();
2040  addUnwrappedLine();
2041  ++Line->Level;
2042 
2043  // Parse the enum constants.
2044  while (FormatTok) {
2045  if (FormatTok->is(tok::l_brace)) {
2046  // Parse the constant's class body.
2047  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2048  /*MunchSemi=*/false);
2049  } else if (FormatTok->is(tok::l_paren)) {
2050  parseParens();
2051  } else if (FormatTok->is(tok::comma)) {
2052  nextToken();
2053  addUnwrappedLine();
2054  } else if (FormatTok->is(tok::semi)) {
2055  nextToken();
2056  addUnwrappedLine();
2057  break;
2058  } else if (FormatTok->is(tok::r_brace)) {
2059  addUnwrappedLine();
2060  break;
2061  } else {
2062  nextToken();
2063  }
2064  }
2065 
2066  // Parse the class body after the enum's ";" if any.
2067  parseLevel(/*HasOpeningBrace=*/true);
2068  nextToken();
2069  --Line->Level;
2070  addUnwrappedLine();
2071 }
2072 
2073 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2074  const FormatToken &InitialToken = *FormatTok;
2075  nextToken();
2076 
2077  // The actual identifier can be a nested name specifier, and in macros
2078  // it is often token-pasted.
2079  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2080  tok::kw___attribute, tok::kw___declspec,
2081  tok::kw_alignas) ||
2082  ((Style.Language == FormatStyle::LK_Java ||
2084  FormatTok->isOneOf(tok::period, tok::comma))) {
2085  if (Style.Language == FormatStyle::LK_JavaScript &&
2086  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2087  // JavaScript/TypeScript supports inline object types in
2088  // extends/implements positions:
2089  // class Foo implements {bar: number} { }
2090  nextToken();
2091  if (FormatTok->is(tok::l_brace)) {
2092  tryToParseBracedList();
2093  continue;
2094  }
2095  }
2096  bool IsNonMacroIdentifier =
2097  FormatTok->is(tok::identifier) &&
2098  FormatTok->TokenText != FormatTok->TokenText.upper();
2099  nextToken();
2100  // We can have macros or attributes in between 'class' and the class name.
2101  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2102  parseParens();
2103  }
2104 
2105  // Note that parsing away template declarations here leads to incorrectly
2106  // accepting function declarations as record declarations.
2107  // In general, we cannot solve this problem. Consider:
2108  // class A<int> B() {}
2109  // which can be a function definition or a class definition when B() is a
2110  // macro. If we find enough real-world cases where this is a problem, we
2111  // can parse for the 'template' keyword in the beginning of the statement,
2112  // and thus rule out the record production in case there is no template
2113  // (this would still leave us with an ambiguity between template function
2114  // and class declarations).
2115  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2116  while (!eof()) {
2117  if (FormatTok->is(tok::l_brace)) {
2118  calculateBraceTypes(/*ExpectClassBody=*/true);
2119  if (!tryToParseBracedList())
2120  break;
2121  }
2122  if (FormatTok->Tok.is(tok::semi))
2123  return;
2124  nextToken();
2125  }
2126  }
2127  if (FormatTok->Tok.is(tok::l_brace)) {
2128  if (ParseAsExpr) {
2129  parseChildBlock();
2130  } else {
2131  if (ShouldBreakBeforeBrace(Style, InitialToken))
2132  addUnwrappedLine();
2133 
2134  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2135  /*MunchSemi=*/false);
2136  }
2137  }
2138  // There is no addUnwrappedLine() here so that we fall through to parsing a
2139  // structural element afterwards. Thus, in "class A {} n, m;",
2140  // "} n, m;" will end up in one unwrapped line.
2141 }
2142 
2143 void UnwrappedLineParser::parseObjCMethod() {
2144  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2145  "'(' or identifier expected.");
2146  do {
2147  if (FormatTok->Tok.is(tok::semi)) {
2148  nextToken();
2149  addUnwrappedLine();
2150  return;
2151  } else if (FormatTok->Tok.is(tok::l_brace)) {
2152  parseBlock(/*MustBeDeclaration=*/false);
2153  addUnwrappedLine();
2154  return;
2155  } else {
2156  nextToken();
2157  }
2158  } while (!eof());
2159 }
2160 
2161 void UnwrappedLineParser::parseObjCProtocolList() {
2162  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2163  do {
2164  nextToken();
2165  // Early exit in case someone forgot a close angle.
2166  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2167  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2168  return;
2169  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2170  nextToken(); // Skip '>'.
2171 }
2172 
2173 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2174  do {
2175  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2176  nextToken();
2177  addUnwrappedLine();
2178  break;
2179  }
2180  if (FormatTok->is(tok::l_brace)) {
2181  parseBlock(/*MustBeDeclaration=*/false);
2182  // In ObjC interfaces, nothing should be following the "}".
2183  addUnwrappedLine();
2184  } else if (FormatTok->is(tok::r_brace)) {
2185  // Ignore stray "}". parseStructuralElement doesn't consume them.
2186  nextToken();
2187  addUnwrappedLine();
2188  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2189  nextToken();
2190  parseObjCMethod();
2191  } else {
2192  parseStructuralElement();
2193  }
2194  } while (!eof());
2195 }
2196 
2197 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2198  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2199  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2200  nextToken();
2201  nextToken(); // interface name
2202 
2203  // @interface can be followed by a lightweight generic
2204  // specialization list, then either a base class or a category.
2205  if (FormatTok->Tok.is(tok::less)) {
2206  // Unlike protocol lists, generic parameterizations support
2207  // nested angles:
2208  //
2209  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2210  // NSObject <NSCopying, NSSecureCoding>
2211  //
2212  // so we need to count how many open angles we have left.
2213  unsigned NumOpenAngles = 1;
2214  do {
2215  nextToken();
2216  // Early exit in case someone forgot a close angle.
2217  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2218  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2219  break;
2220  if (FormatTok->Tok.is(tok::less))
2221  ++NumOpenAngles;
2222  else if (FormatTok->Tok.is(tok::greater)) {
2223  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2224  --NumOpenAngles;
2225  }
2226  } while (!eof() && NumOpenAngles != 0);
2227  nextToken(); // Skip '>'.
2228  }
2229  if (FormatTok->Tok.is(tok::colon)) {
2230  nextToken();
2231  nextToken(); // base class name
2232  } else if (FormatTok->Tok.is(tok::l_paren))
2233  // Skip category, if present.
2234  parseParens();
2235 
2236  if (FormatTok->Tok.is(tok::less))
2237  parseObjCProtocolList();
2238 
2239  if (FormatTok->Tok.is(tok::l_brace)) {
2241  addUnwrappedLine();
2242  parseBlock(/*MustBeDeclaration=*/true);
2243  }
2244 
2245  // With instance variables, this puts '}' on its own line. Without instance
2246  // variables, this ends the @interface line.
2247  addUnwrappedLine();
2248 
2249  parseObjCUntilAtEnd();
2250 }
2251 
2252 // Returns true for the declaration/definition form of @protocol,
2253 // false for the expression form.
2254 bool UnwrappedLineParser::parseObjCProtocol() {
2255  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2256  nextToken();
2257 
2258  if (FormatTok->is(tok::l_paren))
2259  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2260  return false;
2261 
2262  // The definition/declaration form,
2263  // @protocol Foo
2264  // - (int)someMethod;
2265  // @end
2266 
2267  nextToken(); // protocol name
2268 
2269  if (FormatTok->Tok.is(tok::less))
2270  parseObjCProtocolList();
2271 
2272  // Check for protocol declaration.
2273  if (FormatTok->Tok.is(tok::semi)) {
2274  nextToken();
2275  addUnwrappedLine();
2276  return true;
2277  }
2278 
2279  addUnwrappedLine();
2280  parseObjCUntilAtEnd();
2281  return true;
2282 }
2283 
2284 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2285  bool IsImport = FormatTok->is(Keywords.kw_import);
2286  assert(IsImport || FormatTok->is(tok::kw_export));
2287  nextToken();
2288 
2289  // Consume the "default" in "export default class/function".
2290  if (FormatTok->is(tok::kw_default))
2291  nextToken();
2292 
2293  // Consume "async function", "function" and "default function", so that these
2294  // get parsed as free-standing JS functions, i.e. do not require a trailing
2295  // semicolon.
2296  if (FormatTok->is(Keywords.kw_async))
2297  nextToken();
2298  if (FormatTok->is(Keywords.kw_function)) {
2299  nextToken();
2300  return;
2301  }
2302 
2303  // For imports, `export *`, `export {...}`, consume the rest of the line up
2304  // to the terminating `;`. For everything else, just return and continue
2305  // parsing the structural element, i.e. the declaration or expression for
2306  // `export default`.
2307  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2308  !FormatTok->isStringLiteral())
2309  return;
2310 
2311  while (!eof()) {
2312  if (FormatTok->is(tok::semi))
2313  return;
2314  if (Line->Tokens.empty()) {
2315  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2316  // import statement should terminate.
2317  return;
2318  }
2319  if (FormatTok->is(tok::l_brace)) {
2320  FormatTok->BlockKind = BK_Block;
2321  nextToken();
2322  parseBracedList();
2323  } else {
2324  nextToken();
2325  }
2326  }
2327 }
2328 
2329 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2330  StringRef Prefix = "") {
2331  llvm::dbgs() << Prefix << "Line(" << Line.Level
2332  << ", FSC=" << Line.FirstStartColumn << ")"
2333  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2334  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2335  E = Line.Tokens.end();
2336  I != E; ++I) {
2337  llvm::dbgs() << I->Tok->Tok.getName() << "["
2338  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2339  << "] ";
2340  }
2341  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2342  E = Line.Tokens.end();
2343  I != E; ++I) {
2344  const UnwrappedLineNode &Node = *I;
2346  I = Node.Children.begin(),
2347  E = Node.Children.end();
2348  I != E; ++I) {
2349  printDebugInfo(*I, "\nChild: ");
2350  }
2351  }
2352  llvm::dbgs() << "\n";
2353 }
2354 
2355 void UnwrappedLineParser::addUnwrappedLine() {
2356  if (Line->Tokens.empty())
2357  return;
2358  LLVM_DEBUG({
2359  if (CurrentLines == &Lines)
2360  printDebugInfo(*Line);
2361  });
2362  CurrentLines->push_back(std::move(*Line));
2363  Line->Tokens.clear();
2364  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2365  Line->FirstStartColumn = 0;
2366  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2367  CurrentLines->append(
2368  std::make_move_iterator(PreprocessorDirectives.begin()),
2369  std::make_move_iterator(PreprocessorDirectives.end()));
2370  PreprocessorDirectives.clear();
2371  }
2372  // Disconnect the current token from the last token on the previous line.
2373  FormatTok->Previous = nullptr;
2374 }
2375 
2376 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2377 
2378 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2379  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2380  FormatTok.NewlinesBefore > 0;
2381 }
2382 
2383 // Checks if \p FormatTok is a line comment that continues the line comment
2384 // section on \p Line.
2385 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2386  const UnwrappedLine &Line,
2387  llvm::Regex &CommentPragmasRegex) {
2388  if (Line.Tokens.empty())
2389  return false;
2390 
2391  StringRef IndentContent = FormatTok.TokenText;
2392  if (FormatTok.TokenText.startswith("//") ||
2393  FormatTok.TokenText.startswith("/*"))
2394  IndentContent = FormatTok.TokenText.substr(2);
2395  if (CommentPragmasRegex.match(IndentContent))
2396  return false;
2397 
2398  // If Line starts with a line comment, then FormatTok continues the comment
2399  // section if its original column is greater or equal to the original start
2400  // column of the line.
2401  //
2402  // Define the min column token of a line as follows: if a line ends in '{' or
2403  // contains a '{' followed by a line comment, then the min column token is
2404  // that '{'. Otherwise, the min column token of the line is the first token of
2405  // the line.
2406  //
2407  // If Line starts with a token other than a line comment, then FormatTok
2408  // continues the comment section if its original column is greater than the
2409  // original start column of the min column token of the line.
2410  //
2411  // For example, the second line comment continues the first in these cases:
2412  //
2413  // // first line
2414  // // second line
2415  //
2416  // and:
2417  //
2418  // // first line
2419  // // second line
2420  //
2421  // and:
2422  //
2423  // int i; // first line
2424  // // second line
2425  //
2426  // and:
2427  //
2428  // do { // first line
2429  // // second line
2430  // int i;
2431  // } while (true);
2432  //
2433  // and:
2434  //
2435  // enum {
2436  // a, // first line
2437  // // second line
2438  // b
2439  // };
2440  //
2441  // The second line comment doesn't continue the first in these cases:
2442  //
2443  // // first line
2444  // // second line
2445  //
2446  // and:
2447  //
2448  // int i; // first line
2449  // // second line
2450  //
2451  // and:
2452  //
2453  // do { // first line
2454  // // second line
2455  // int i;
2456  // } while (true);
2457  //
2458  // and:
2459  //
2460  // enum {
2461  // a, // first line
2462  // // second line
2463  // };
2464  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2465 
2466  // Scan for '{//'. If found, use the column of '{' as a min column for line
2467  // comment section continuation.
2468  const FormatToken *PreviousToken = nullptr;
2469  for (const UnwrappedLineNode &Node : Line.Tokens) {
2470  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2471  isLineComment(*Node.Tok)) {
2472  MinColumnToken = PreviousToken;
2473  break;
2474  }
2475  PreviousToken = Node.Tok;
2476 
2477  // Grab the last newline preceding a token in this unwrapped line.
2478  if (Node.Tok->NewlinesBefore > 0) {
2479  MinColumnToken = Node.Tok;
2480  }
2481  }
2482  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2483  MinColumnToken = PreviousToken;
2484  }
2485 
2486  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2487  MinColumnToken);
2488 }
2489 
2490 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2491  bool JustComments = Line->Tokens.empty();
2493  I = CommentsBeforeNextToken.begin(),
2494  E = CommentsBeforeNextToken.end();
2495  I != E; ++I) {
2496  // Line comments that belong to the same line comment section are put on the
2497  // same line since later we might want to reflow content between them.
2498  // Additional fine-grained breaking of line comment sections is controlled
2499  // by the class BreakableLineCommentSection in case it is desirable to keep
2500  // several line comment sections in the same unwrapped line.
2501  //
2502  // FIXME: Consider putting separate line comment sections as children to the
2503  // unwrapped line instead.
2505  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2506  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2507  addUnwrappedLine();
2508  pushToken(*I);
2509  }
2510  if (NewlineBeforeNext && JustComments)
2511  addUnwrappedLine();
2512  CommentsBeforeNextToken.clear();
2513 }
2514 
2515 void UnwrappedLineParser::nextToken(int LevelDifference) {
2516  if (eof())
2517  return;
2518  flushComments(isOnNewLine(*FormatTok));
2519  pushToken(FormatTok);
2520  FormatToken *Previous = FormatTok;
2521  if (Style.Language != FormatStyle::LK_JavaScript)
2522  readToken(LevelDifference);
2523  else
2524  readTokenWithJavaScriptASI();
2525  FormatTok->Previous = Previous;
2526 }
2527 
2528 void UnwrappedLineParser::distributeComments(
2529  const SmallVectorImpl<FormatToken *> &Comments,
2530  const FormatToken *NextTok) {
2531  // Whether or not a line comment token continues a line is controlled by
2532  // the method continuesLineCommentSection, with the following caveat:
2533  //
2534  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2535  // that each comment line from the trail is aligned with the next token, if
2536  // the next token exists. If a trail exists, the beginning of the maximal
2537  // trail is marked as a start of a new comment section.
2538  //
2539  // For example in this code:
2540  //
2541  // int a; // line about a
2542  // // line 1 about b
2543  // // line 2 about b
2544  // int b;
2545  //
2546  // the two lines about b form a maximal trail, so there are two sections, the
2547  // first one consisting of the single comment "// line about a" and the
2548  // second one consisting of the next two comments.
2549  if (Comments.empty())
2550  return;
2551  bool ShouldPushCommentsInCurrentLine = true;
2552  bool HasTrailAlignedWithNextToken = false;
2553  unsigned StartOfTrailAlignedWithNextToken = 0;
2554  if (NextTok) {
2555  // We are skipping the first element intentionally.
2556  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2557  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2558  HasTrailAlignedWithNextToken = true;
2559  StartOfTrailAlignedWithNextToken = i;
2560  }
2561  }
2562  }
2563  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2564  FormatToken *FormatTok = Comments[i];
2565  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2566  FormatTok->ContinuesLineCommentSection = false;
2567  } else {
2568  FormatTok->ContinuesLineCommentSection =
2569  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2570  }
2571  if (!FormatTok->ContinuesLineCommentSection &&
2572  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2573  ShouldPushCommentsInCurrentLine = false;
2574  }
2575  if (ShouldPushCommentsInCurrentLine) {
2576  pushToken(FormatTok);
2577  } else {
2578  CommentsBeforeNextToken.push_back(FormatTok);
2579  }
2580  }
2581 }
2582 
2583 void UnwrappedLineParser::readToken(int LevelDifference) {
2585  do {
2586  FormatTok = Tokens->getNextToken();
2587  assert(FormatTok);
2588  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2589  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2590  distributeComments(Comments, FormatTok);
2591  Comments.clear();
2592  // If there is an unfinished unwrapped line, we flush the preprocessor
2593  // directives only after that unwrapped line was finished later.
2594  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2595  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2596  assert((LevelDifference >= 0 ||
2597  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2598  "LevelDifference makes Line->Level negative");
2599  Line->Level += LevelDifference;
2600  // Comments stored before the preprocessor directive need to be output
2601  // before the preprocessor directive, at the same level as the
2602  // preprocessor directive, as we consider them to apply to the directive.
2603  flushComments(isOnNewLine(*FormatTok));
2604  parsePPDirective();
2605  }
2606  while (FormatTok->Type == TT_ConflictStart ||
2607  FormatTok->Type == TT_ConflictEnd ||
2608  FormatTok->Type == TT_ConflictAlternative) {
2609  if (FormatTok->Type == TT_ConflictStart) {
2610  conditionalCompilationStart(/*Unreachable=*/false);
2611  } else if (FormatTok->Type == TT_ConflictAlternative) {
2612  conditionalCompilationAlternative();
2613  } else if (FormatTok->Type == TT_ConflictEnd) {
2614  conditionalCompilationEnd();
2615  }
2616  FormatTok = Tokens->getNextToken();
2617  FormatTok->MustBreakBefore = true;
2618  }
2619 
2620  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2621  !Line->InPPDirective) {
2622  continue;
2623  }
2624 
2625  if (!FormatTok->Tok.is(tok::comment)) {
2626  distributeComments(Comments, FormatTok);
2627  Comments.clear();
2628  return;
2629  }
2630 
2631  Comments.push_back(FormatTok);
2632  } while (!eof());
2633 
2634  distributeComments(Comments, nullptr);
2635  Comments.clear();
2636 }
2637 
2638 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2639  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2640  if (MustBreakBeforeNextToken) {
2641  Line->Tokens.back().Tok->MustBreakBefore = true;
2642  MustBreakBeforeNextToken = false;
2643  }
2644 }
2645 
2646 } // end namespace format
2647 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
bool AfterUnion
Wrap union definitions.
Definition: Format.h:728
Indent in all namespaces.
Definition: Format.h:1278
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c.h:60
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:127
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1191
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:1070
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:218
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:742
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:159
Does not indent any directives.
Definition: Format.h:1082
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:107
bool isBinaryOperator() const
Definition: FormatToken.h:416
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:1095
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:133
tok::TokenKind getKind() const
Definition: Token.h:90
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:384
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:700
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:774
Should be used for Java.
Definition: Format.h:1184
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:293
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:326
static void hash_combine(std::size_t &seed, const T &v)
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:1282
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:507
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:319
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:1186
ContinuationIndenter * Indenter
const AnnotatedLine * Line
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:823
bool AfterFunction
Wrap function definitions.
Definition: Format.h:680
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
SourceLocation getEnd() const
do v
Definition: arm_acle.h:78
#define false
Definition: stdbool.h:33
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:310
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:663
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:140
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:171
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:48
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:1268
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1201
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:96
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:308
Should be used for TableGen code.
Definition: Format.h:1193
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:97
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:714
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:339
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:652
Indents directives after the hash.
Definition: Format.h:1091
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:61
Represents a complete lambda introducer.
Definition: DeclSpec.h:2532
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:165
bool AfterClass
Wrap class definitions.
Definition: Format.h:634
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1196
StringRef Text
Definition: Format.cpp:1603
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:287
bool isStringLiteral() const
Definition: FormatToken.h:350
SourceLocation getBegin() const
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:696
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:137
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:177
const FormatStyle & Style