clang  10.0.0svn
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
28 public:
29  virtual ~FormatTokenSource() {}
30  virtual FormatToken *getNextToken() = 0;
31 
32  virtual unsigned getPosition() = 0;
33  virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41  bool MustBeDeclaration)
42  : Line(Line), Stack(Stack) {
43  Line.MustBeDeclaration = MustBeDeclaration;
44  Stack.push_back(MustBeDeclaration);
45  }
46  ~ScopedDeclarationState() {
47  Stack.pop_back();
48  if (!Stack.empty())
49  Line.MustBeDeclaration = Stack.back();
50  else
51  Line.MustBeDeclaration = true;
52  }
53 
54 private:
56  std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67  const FormatToken *Previous,
68  const FormatToken *MinColumnToken) {
69  if (!Previous || !MinColumnToken)
70  return false;
71  unsigned MinContinueColumn =
72  MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74  isLineComment(*Previous) &&
75  FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81  FormatToken *&ResetToken)
82  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84  Token(nullptr), PreviousToken(nullptr) {
85  FakeEOF.Tok.startToken();
86  FakeEOF.Tok.setKind(tok::eof);
87  TokenSource = this;
88  Line.Level = 0;
89  Line.InPPDirective = true;
90  }
91 
92  ~ScopedMacroState() override {
93  TokenSource = PreviousTokenSource;
94  ResetToken = Token;
95  Line.InPPDirective = false;
96  Line.Level = PreviousLineLevel;
97  }
98 
99  FormatToken *getNextToken() override {
100  // The \c UnwrappedLineParser guards against this by never calling
101  // \c getNextToken() after it has encountered the first eof token.
102  assert(!eof());
103  PreviousToken = Token;
104  Token = PreviousTokenSource->getNextToken();
105  if (eof())
106  return &FakeEOF;
107  return Token;
108  }
109 
110  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112  FormatToken *setPosition(unsigned Position) override {
113  PreviousToken = nullptr;
114  Token = PreviousTokenSource->setPosition(Position);
115  return Token;
116  }
117 
118 private:
119  bool eof() {
120  return Token && Token->HasUnescapedNewline &&
121  !continuesLineComment(*Token, PreviousToken,
122  /*MinColumnToken=*/PreviousToken);
123  }
124 
125  FormatToken FakeEOF;
127  FormatTokenSource *&TokenSource;
128  FormatToken *&ResetToken;
129  unsigned PreviousLineLevel;
130  FormatTokenSource *PreviousTokenSource;
131 
133  FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
139 public:
141  bool SwitchToPreprocessorLines = false)
142  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143  if (SwitchToPreprocessorLines)
144  Parser.CurrentLines = &Parser.PreprocessorDirectives;
145  else if (!Parser.Line->Tokens.empty())
146  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147  PreBlockLine = std::move(Parser.Line);
148  Parser.Line = llvm::make_unique<UnwrappedLine>();
149  Parser.Line->Level = PreBlockLine->Level;
150  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151  }
152 
154  if (!Parser.Line->Tokens.empty()) {
155  Parser.addUnwrappedLine();
156  }
157  assert(Parser.Line->Tokens.empty());
158  Parser.Line = std::move(PreBlockLine);
159  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160  Parser.MustBreakBeforeNextToken = true;
161  Parser.CurrentLines = OriginalLines;
162  }
163 
164 private:
166 
167  std::unique_ptr<UnwrappedLine> PreBlockLine;
168  SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
172 public:
174  const FormatStyle &Style, unsigned &LineLevel)
175  : CompoundStatementIndenter(Parser, LineLevel,
176  Style.BraceWrapping.AfterControlStatement,
177  Style.BraceWrapping.IndentBraces) {
178  }
180  bool WrapBrace, bool IndentBrace)
181  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182  if (WrapBrace)
183  Parser->addUnwrappedLine();
184  if (IndentBrace)
185  ++LineLevel;
186  }
187  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190  unsigned &LineLevel;
191  unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199  : Tokens(Tokens), Position(-1) {}
200 
201  FormatToken *getNextToken() override {
202  ++Position;
203  return Tokens[Position];
204  }
205 
206  unsigned getPosition() override {
207  assert(Position >= 0);
208  return Position;
209  }
210 
211  FormatToken *setPosition(unsigned P) override {
212  Position = P;
213  return Tokens[Position];
214  }
215 
216  void reset() { Position = -1; }
217 
218 private:
220  int Position;
221 };
222 
223 } // end anonymous namespace
224 
226  const AdditionalKeywords &Keywords,
227  unsigned FirstStartColumn,
229  UnwrappedLineConsumer &Callback)
230  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231  CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232  CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234  IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235  ? IG_Rejected
236  : IG_Inited),
237  IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240  PPBranchLevel = -1;
241  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242  ? IG_Rejected
243  : IG_Inited;
244  IncludeGuardToken = nullptr;
245  Line.reset(new UnwrappedLine);
246  CommentsBeforeNextToken.clear();
247  FormatTok = nullptr;
248  MustBreakBeforeNextToken = false;
249  PreprocessorDirectives.clear();
250  CurrentLines = &Lines;
251  DeclarationScopeStack.clear();
252  PPStack.clear();
253  Line->FirstStartColumn = FirstStartColumn;
254 }
255 
257  IndexedTokenSource TokenSource(AllTokens);
258  Line->FirstStartColumn = FirstStartColumn;
259  do {
260  LLVM_DEBUG(llvm::dbgs() << "----\n");
261  reset();
262  Tokens = &TokenSource;
263  TokenSource.reset();
264 
265  readToken();
266  parseFile();
267 
268  // If we found an include guard then all preprocessor directives (other than
269  // the guard) are over-indented by one.
270  if (IncludeGuard == IG_Found)
271  for (auto &Line : Lines)
272  if (Line.InPPDirective && Line.Level > 0)
273  --Line.Level;
274 
275  // Create line with eof token.
276  pushToken(FormatTok);
277  addUnwrappedLine();
278 
279  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280  E = Lines.end();
281  I != E; ++I) {
282  Callback.consumeUnwrappedLine(*I);
283  }
284  Callback.finishRun();
285  Lines.clear();
286  while (!PPLevelBranchIndex.empty() &&
287  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290  }
291  if (!PPLevelBranchIndex.empty()) {
292  ++PPLevelBranchIndex.back();
293  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295  }
296  } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300  // The top-level context in a file always has declarations, except for pre-
301  // processor directives and JavaScript files.
302  bool MustBeDeclaration =
303  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305  MustBeDeclaration);
306  if (Style.Language == FormatStyle::LK_TextProto)
307  parseBracedList();
308  else
309  parseLevel(/*HasOpeningBrace=*/false);
310  // Make sure to format the remaining tokens.
311  //
312  // LK_TextProto is special since its top-level is parsed as the body of a
313  // braced list, which does not necessarily have natural line separators such
314  // as a semicolon. Comments after the last entry that have been determined to
315  // not belong to that line, as in:
316  // key: value
317  // // endfile comment
318  // do not have a chance to be put on a line of their own until this point.
319  // Here we add this newline before end-of-file comments.
320  if (Style.Language == FormatStyle::LK_TextProto &&
321  !CommentsBeforeNextToken.empty())
322  addUnwrappedLine();
323  flushComments(true);
324  addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
328  bool SwitchLabelEncountered = false;
329  do {
330  tok::TokenKind kind = FormatTok->Tok.getKind();
331  if (FormatTok->Type == TT_MacroBlockBegin) {
332  kind = tok::l_brace;
333  } else if (FormatTok->Type == TT_MacroBlockEnd) {
334  kind = tok::r_brace;
335  }
336 
337  switch (kind) {
338  case tok::comment:
339  nextToken();
340  addUnwrappedLine();
341  break;
342  case tok::l_brace:
343  // FIXME: Add parameter whether this can happen - if this happens, we must
344  // be in a non-declaration context.
345  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
346  continue;
347  parseBlock(/*MustBeDeclaration=*/false);
348  addUnwrappedLine();
349  break;
350  case tok::r_brace:
351  if (HasOpeningBrace)
352  return;
353  nextToken();
354  addUnwrappedLine();
355  break;
356  case tok::kw_default: {
357  unsigned StoredPosition = Tokens->getPosition();
358  FormatToken *Next;
359  do {
360  Next = Tokens->getNextToken();
361  } while (Next && Next->is(tok::comment));
362  FormatTok = Tokens->setPosition(StoredPosition);
363  if (Next && Next->isNot(tok::colon)) {
364  // default not followed by ':' is not a case label; treat it like
365  // an identifier.
366  parseStructuralElement();
367  break;
368  }
369  // Else, if it is 'default:', fall through to the case handling.
370  LLVM_FALLTHROUGH;
371  }
372  case tok::kw_case:
373  if (Style.Language == FormatStyle::LK_JavaScript &&
374  Line->MustBeDeclaration) {
375  // A 'case: string' style field declaration.
376  parseStructuralElement();
377  break;
378  }
379  if (!SwitchLabelEncountered &&
380  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
381  ++Line->Level;
382  SwitchLabelEncountered = true;
383  parseStructuralElement();
384  break;
385  default:
386  parseStructuralElement();
387  break;
388  }
389  } while (!eof());
390 }
391 
392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
393  // We'll parse forward through the tokens until we hit
394  // a closing brace or eof - note that getNextToken() will
395  // parse macros, so this will magically work inside macro
396  // definitions, too.
397  unsigned StoredPosition = Tokens->getPosition();
398  FormatToken *Tok = FormatTok;
399  const FormatToken *PrevTok = Tok->Previous;
400  // Keep a stack of positions of lbrace tokens. We will
401  // update information about whether an lbrace starts a
402  // braced init list or a different block during the loop.
403  SmallVector<FormatToken *, 8> LBraceStack;
404  assert(Tok->Tok.is(tok::l_brace));
405  do {
406  // Get next non-comment token.
407  FormatToken *NextTok;
408  unsigned ReadTokens = 0;
409  do {
410  NextTok = Tokens->getNextToken();
411  ++ReadTokens;
412  } while (NextTok->is(tok::comment));
413 
414  switch (Tok->Tok.getKind()) {
415  case tok::l_brace:
416  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
417  if (PrevTok->isOneOf(tok::colon, tok::less))
418  // A ':' indicates this code is in a type, or a braced list
419  // following a label in an object literal ({a: {b: 1}}).
420  // A '<' could be an object used in a comparison, but that is nonsense
421  // code (can never return true), so more likely it is a generic type
422  // argument (`X<{a: string; b: number}>`).
423  // The code below could be confused by semicolons between the
424  // individual members in a type member list, which would normally
425  // trigger BK_Block. In both cases, this must be parsed as an inline
426  // braced init.
427  Tok->BlockKind = BK_BracedInit;
428  else if (PrevTok->is(tok::r_paren))
429  // `) { }` can only occur in function or method declarations in JS.
430  Tok->BlockKind = BK_Block;
431  } else {
432  Tok->BlockKind = BK_Unknown;
433  }
434  LBraceStack.push_back(Tok);
435  break;
436  case tok::r_brace:
437  if (LBraceStack.empty())
438  break;
439  if (LBraceStack.back()->BlockKind == BK_Unknown) {
440  bool ProbablyBracedList = false;
441  if (Style.Language == FormatStyle::LK_Proto) {
442  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
443  } else {
444  // Using OriginalColumn to distinguish between ObjC methods and
445  // binary operators is a bit hacky.
446  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
447  NextTok->OriginalColumn == 0;
448 
449  // If there is a comma, semicolon or right paren after the closing
450  // brace, we assume this is a braced initializer list. Note that
451  // regardless how we mark inner braces here, we will overwrite the
452  // BlockKind later if we parse a braced list (where all blocks
453  // inside are by default braced lists), or when we explicitly detect
454  // blocks (for example while parsing lambdas).
455  // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
456  // braced list in JS.
457  ProbablyBracedList =
458  (Style.Language == FormatStyle::LK_JavaScript &&
459  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
460  Keywords.kw_as)) ||
461  (Style.isCpp() && NextTok->is(tok::l_paren)) ||
462  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
463  tok::r_paren, tok::r_square, tok::l_brace,
464  tok::ellipsis) ||
465  (NextTok->is(tok::identifier) &&
466  !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
467  (NextTok->is(tok::semi) &&
468  (!ExpectClassBody || LBraceStack.size() != 1)) ||
469  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
470  if (NextTok->is(tok::l_square)) {
471  // We can have an array subscript after a braced init
472  // list, but C++11 attributes are expected after blocks.
473  NextTok = Tokens->getNextToken();
474  ++ReadTokens;
475  ProbablyBracedList = NextTok->isNot(tok::l_square);
476  }
477  }
478  if (ProbablyBracedList) {
479  Tok->BlockKind = BK_BracedInit;
480  LBraceStack.back()->BlockKind = BK_BracedInit;
481  } else {
482  Tok->BlockKind = BK_Block;
483  LBraceStack.back()->BlockKind = BK_Block;
484  }
485  }
486  LBraceStack.pop_back();
487  break;
488  case tok::identifier:
489  if (!Tok->is(TT_StatementMacro))
490  break;
491  LLVM_FALLTHROUGH;
492  case tok::at:
493  case tok::semi:
494  case tok::kw_if:
495  case tok::kw_while:
496  case tok::kw_for:
497  case tok::kw_switch:
498  case tok::kw_try:
499  case tok::kw___try:
500  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
501  LBraceStack.back()->BlockKind = BK_Block;
502  break;
503  default:
504  break;
505  }
506  PrevTok = Tok;
507  Tok = NextTok;
508  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
509 
510  // Assume other blocks for all unclosed opening braces.
511  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
512  if (LBraceStack[i]->BlockKind == BK_Unknown)
513  LBraceStack[i]->BlockKind = BK_Block;
514  }
515 
516  FormatTok = Tokens->setPosition(StoredPosition);
517 }
518 
519 template <class T>
520 static inline void hash_combine(std::size_t &seed, const T &v) {
521  std::hash<T> hasher;
522  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
523 }
524 
525 size_t UnwrappedLineParser::computePPHash() const {
526  size_t h = 0;
527  for (const auto &i : PPStack) {
528  hash_combine(h, size_t(i.Kind));
529  hash_combine(h, i.Line);
530  }
531  return h;
532 }
533 
534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
535  bool MunchSemi) {
536  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
537  "'{' or macro block token expected");
538  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
539  FormatTok->BlockKind = BK_Block;
540 
541  size_t PPStartHash = computePPHash();
542 
543  unsigned InitialLevel = Line->Level;
544  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
545 
546  if (MacroBlock && FormatTok->is(tok::l_paren))
547  parseParens();
548 
549  size_t NbPreprocessorDirectives =
550  CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
551  addUnwrappedLine();
552  size_t OpeningLineIndex =
553  CurrentLines->empty()
555  : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
556 
557  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
558  MustBeDeclaration);
559  if (AddLevel)
560  ++Line->Level;
561  parseLevel(/*HasOpeningBrace=*/true);
562 
563  if (eof())
564  return;
565 
566  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
567  : !FormatTok->is(tok::r_brace)) {
568  Line->Level = InitialLevel;
569  FormatTok->BlockKind = BK_Block;
570  return;
571  }
572 
573  size_t PPEndHash = computePPHash();
574 
575  // Munch the closing brace.
576  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
577 
578  if (MacroBlock && FormatTok->is(tok::l_paren))
579  parseParens();
580 
581  if (MunchSemi && FormatTok->Tok.is(tok::semi))
582  nextToken();
583  Line->Level = InitialLevel;
584 
585  if (PPStartHash == PPEndHash) {
586  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
587  if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
588  // Update the opening line to add the forward reference as well
589  (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
590  CurrentLines->size() - 1;
591  }
592  }
593 }
594 
595 static bool isGoogScope(const UnwrappedLine &Line) {
596  // FIXME: Closure-library specific stuff should not be hard-coded but be
597  // configurable.
598  if (Line.Tokens.size() < 4)
599  return false;
600  auto I = Line.Tokens.begin();
601  if (I->Tok->TokenText != "goog")
602  return false;
603  ++I;
604  if (I->Tok->isNot(tok::period))
605  return false;
606  ++I;
607  if (I->Tok->TokenText != "scope")
608  return false;
609  ++I;
610  return I->Tok->is(tok::l_paren);
611 }
612 
613 static bool isIIFE(const UnwrappedLine &Line,
614  const AdditionalKeywords &Keywords) {
615  // Look for the start of an immediately invoked anonymous function.
616  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
617  // This is commonly done in JavaScript to create a new, anonymous scope.
618  // Example: (function() { ... })()
619  if (Line.Tokens.size() < 3)
620  return false;
621  auto I = Line.Tokens.begin();
622  if (I->Tok->isNot(tok::l_paren))
623  return false;
624  ++I;
625  if (I->Tok->isNot(Keywords.kw_function))
626  return false;
627  ++I;
628  return I->Tok->is(tok::l_paren);
629 }
630 
631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
632  const FormatToken &InitialToken) {
633  if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
634  return Style.BraceWrapping.AfterNamespace;
635  if (InitialToken.is(tok::kw_class))
636  return Style.BraceWrapping.AfterClass;
637  if (InitialToken.is(tok::kw_union))
638  return Style.BraceWrapping.AfterUnion;
639  if (InitialToken.is(tok::kw_struct))
640  return Style.BraceWrapping.AfterStruct;
641  return false;
642 }
643 
644 void UnwrappedLineParser::parseChildBlock() {
645  FormatTok->BlockKind = BK_Block;
646  nextToken();
647  {
648  bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
649  (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
650  ScopedLineState LineState(*this);
651  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
652  /*MustBeDeclaration=*/false);
653  Line->Level += SkipIndent ? 0 : 1;
654  parseLevel(/*HasOpeningBrace=*/true);
655  flushComments(isOnNewLine(*FormatTok));
656  Line->Level -= SkipIndent ? 0 : 1;
657  }
658  nextToken();
659 }
660 
661 void UnwrappedLineParser::parsePPDirective() {
662  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
663  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
664 
665  nextToken();
666 
667  if (!FormatTok->Tok.getIdentifierInfo()) {
668  parsePPUnknown();
669  return;
670  }
671 
672  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
673  case tok::pp_define:
674  parsePPDefine();
675  return;
676  case tok::pp_if:
677  parsePPIf(/*IfDef=*/false);
678  break;
679  case tok::pp_ifdef:
680  case tok::pp_ifndef:
681  parsePPIf(/*IfDef=*/true);
682  break;
683  case tok::pp_else:
684  parsePPElse();
685  break;
686  case tok::pp_elif:
687  parsePPElIf();
688  break;
689  case tok::pp_endif:
690  parsePPEndIf();
691  break;
692  default:
693  parsePPUnknown();
694  break;
695  }
696 }
697 
698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
699  size_t Line = CurrentLines->size();
700  if (CurrentLines == &PreprocessorDirectives)
701  Line += Lines.size();
702 
703  if (Unreachable ||
704  (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
705  PPStack.push_back({PP_Unreachable, Line});
706  else
707  PPStack.push_back({PP_Conditional, Line});
708 }
709 
710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
711  ++PPBranchLevel;
712  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
713  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
714  PPLevelBranchIndex.push_back(0);
715  PPLevelBranchCount.push_back(0);
716  }
717  PPChainBranchIndex.push(0);
718  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
719  conditionalCompilationCondition(Unreachable || Skip);
720 }
721 
722 void UnwrappedLineParser::conditionalCompilationAlternative() {
723  if (!PPStack.empty())
724  PPStack.pop_back();
725  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
726  if (!PPChainBranchIndex.empty())
727  ++PPChainBranchIndex.top();
728  conditionalCompilationCondition(
729  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
730  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
731 }
732 
733 void UnwrappedLineParser::conditionalCompilationEnd() {
734  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
735  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
736  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
737  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
738  }
739  }
740  // Guard against #endif's without #if.
741  if (PPBranchLevel > -1)
742  --PPBranchLevel;
743  if (!PPChainBranchIndex.empty())
744  PPChainBranchIndex.pop();
745  if (!PPStack.empty())
746  PPStack.pop_back();
747 }
748 
749 void UnwrappedLineParser::parsePPIf(bool IfDef) {
750  bool IfNDef = FormatTok->is(tok::pp_ifndef);
751  nextToken();
752  bool Unreachable = false;
753  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
754  Unreachable = true;
755  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
756  Unreachable = true;
757  conditionalCompilationStart(Unreachable);
758  FormatToken *IfCondition = FormatTok;
759  // If there's a #ifndef on the first line, and the only lines before it are
760  // comments, it could be an include guard.
761  bool MaybeIncludeGuard = IfNDef;
762  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
763  for (auto &Line : Lines) {
764  if (!Line.Tokens.front().Tok->is(tok::comment)) {
765  MaybeIncludeGuard = false;
766  IncludeGuard = IG_Rejected;
767  break;
768  }
769  }
770  --PPBranchLevel;
771  parsePPUnknown();
772  ++PPBranchLevel;
773  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
774  IncludeGuard = IG_IfNdefed;
775  IncludeGuardToken = IfCondition;
776  }
777 }
778 
779 void UnwrappedLineParser::parsePPElse() {
780  // If a potential include guard has an #else, it's not an include guard.
781  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
782  IncludeGuard = IG_Rejected;
783  conditionalCompilationAlternative();
784  if (PPBranchLevel > -1)
785  --PPBranchLevel;
786  parsePPUnknown();
787  ++PPBranchLevel;
788 }
789 
790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
791 
792 void UnwrappedLineParser::parsePPEndIf() {
793  conditionalCompilationEnd();
794  parsePPUnknown();
795  // If the #endif of a potential include guard is the last thing in the file,
796  // then we found an include guard.
797  unsigned TokenPosition = Tokens->getPosition();
798  FormatToken *PeekNext = AllTokens[TokenPosition];
799  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
800  PeekNext->is(tok::eof) &&
801  Style.IndentPPDirectives != FormatStyle::PPDIS_None)
802  IncludeGuard = IG_Found;
803 }
804 
805 void UnwrappedLineParser::parsePPDefine() {
806  nextToken();
807 
808  if (!FormatTok->Tok.getIdentifierInfo()) {
809  IncludeGuard = IG_Rejected;
810  IncludeGuardToken = nullptr;
811  parsePPUnknown();
812  return;
813  }
814 
815  if (IncludeGuard == IG_IfNdefed &&
816  IncludeGuardToken->TokenText == FormatTok->TokenText) {
817  IncludeGuard = IG_Defined;
818  IncludeGuardToken = nullptr;
819  for (auto &Line : Lines) {
820  if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
821  IncludeGuard = IG_Rejected;
822  break;
823  }
824  }
825  }
826 
827  nextToken();
828  if (FormatTok->Tok.getKind() == tok::l_paren &&
829  FormatTok->WhitespaceRange.getBegin() ==
830  FormatTok->WhitespaceRange.getEnd()) {
831  parseParens();
832  }
833  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
834  Line->Level += PPBranchLevel + 1;
835  addUnwrappedLine();
836  ++Line->Level;
837 
838  // Errors during a preprocessor directive can only affect the layout of the
839  // preprocessor directive, and thus we ignore them. An alternative approach
840  // would be to use the same approach we use on the file level (no
841  // re-indentation if there was a structural error) within the macro
842  // definition.
843  parseFile();
844 }
845 
846 void UnwrappedLineParser::parsePPUnknown() {
847  do {
848  nextToken();
849  } while (!eof());
850  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
851  Line->Level += PPBranchLevel + 1;
852  addUnwrappedLine();
853 }
854 
855 // Here we blacklist certain tokens that are not usually the first token in an
856 // unwrapped line. This is used in attempt to distinguish macro calls without
857 // trailing semicolons from other constructs split to several lines.
858 static bool tokenCanStartNewLine(const clang::Token &Tok) {
859  // Semicolon can be a null-statement, l_square can be a start of a macro or
860  // a C++11 attribute, but this doesn't seem to be common.
861  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
862  Tok.isNot(tok::l_square) &&
863  // Tokens that can only be used as binary operators and a part of
864  // overloaded operator names.
865  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
866  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
867  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
868  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
869  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
870  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
871  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
872  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
873  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
874  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
875  Tok.isNot(tok::lesslessequal) &&
876  // Colon is used in labels, base class lists, initializer lists,
877  // range-based for loops, ternary operator, but should never be the
878  // first token in an unwrapped line.
879  Tok.isNot(tok::colon) &&
880  // 'noexcept' is a trailing annotation.
881  Tok.isNot(tok::kw_noexcept);
882 }
883 
884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
885  const FormatToken *FormatTok) {
886  // FIXME: This returns true for C/C++ keywords like 'struct'.
887  return FormatTok->is(tok::identifier) &&
888  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
889  !FormatTok->isOneOf(
890  Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
891  Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
892  Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
893  Keywords.kw_let, Keywords.kw_var, tok::kw_const,
894  Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
895  Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
896  Keywords.kw_from));
897 }
898 
899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
900  const FormatToken *FormatTok) {
901  return FormatTok->Tok.isLiteral() ||
902  FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
903  mustBeJSIdent(Keywords, FormatTok);
904 }
905 
906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
907 // when encountered after a value (see mustBeJSIdentOrValue).
908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
909  const FormatToken *FormatTok) {
910  return FormatTok->isOneOf(
911  tok::kw_return, Keywords.kw_yield,
912  // conditionals
913  tok::kw_if, tok::kw_else,
914  // loops
915  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
916  // switch/case
917  tok::kw_switch, tok::kw_case,
918  // exceptions
919  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
920  // declaration
921  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
922  Keywords.kw_async, Keywords.kw_function,
923  // import/export
924  Keywords.kw_import, tok::kw_export);
925 }
926 
927 // readTokenWithJavaScriptASI reads the next token and terminates the current
928 // line if JavaScript Automatic Semicolon Insertion must
929 // happen between the current token and the next token.
930 //
931 // This method is conservative - it cannot cover all edge cases of JavaScript,
932 // but only aims to correctly handle certain well known cases. It *must not*
933 // return true in speculative cases.
934 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
935  FormatToken *Previous = FormatTok;
936  readToken();
937  FormatToken *Next = FormatTok;
938 
939  bool IsOnSameLine =
940  CommentsBeforeNextToken.empty()
941  ? Next->NewlinesBefore == 0
942  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
943  if (IsOnSameLine)
944  return;
945 
946  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
947  bool PreviousStartsTemplateExpr =
948  Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
949  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
950  // If the line contains an '@' sign, the previous token might be an
951  // annotation, which can precede another identifier/value.
952  bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
953  [](UnwrappedLineNode &LineNode) {
954  return LineNode.Tok->is(tok::at);
955  }) != Line->Tokens.end();
956  if (HasAt)
957  return;
958  }
959  if (Next->is(tok::exclaim) && PreviousMustBeValue)
960  return addUnwrappedLine();
961  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
962  bool NextEndsTemplateExpr =
963  Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
964  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
965  (PreviousMustBeValue ||
966  Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
967  tok::minusminus)))
968  return addUnwrappedLine();
969  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
970  isJSDeclOrStmt(Keywords, Next))
971  return addUnwrappedLine();
972 }
973 
974 void UnwrappedLineParser::parseStructuralElement() {
975  assert(!FormatTok->is(tok::l_brace));
976  if (Style.Language == FormatStyle::LK_TableGen &&
977  FormatTok->is(tok::pp_include)) {
978  nextToken();
979  if (FormatTok->is(tok::string_literal))
980  nextToken();
981  addUnwrappedLine();
982  return;
983  }
984  switch (FormatTok->Tok.getKind()) {
985  case tok::kw_asm:
986  nextToken();
987  if (FormatTok->is(tok::l_brace)) {
988  FormatTok->Type = TT_InlineASMBrace;
989  nextToken();
990  while (FormatTok && FormatTok->isNot(tok::eof)) {
991  if (FormatTok->is(tok::r_brace)) {
992  FormatTok->Type = TT_InlineASMBrace;
993  nextToken();
994  addUnwrappedLine();
995  break;
996  }
997  FormatTok->Finalized = true;
998  nextToken();
999  }
1000  }
1001  break;
1002  case tok::kw_namespace:
1003  parseNamespace();
1004  return;
1005  case tok::kw_public:
1006  case tok::kw_protected:
1007  case tok::kw_private:
1008  if (Style.Language == FormatStyle::LK_Java ||
1009  Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1010  nextToken();
1011  else
1012  parseAccessSpecifier();
1013  return;
1014  case tok::kw_if:
1015  parseIfThenElse();
1016  return;
1017  case tok::kw_for:
1018  case tok::kw_while:
1019  parseForOrWhileLoop();
1020  return;
1021  case tok::kw_do:
1022  parseDoWhile();
1023  return;
1024  case tok::kw_switch:
1025  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026  // 'switch: string' field declaration.
1027  break;
1028  parseSwitch();
1029  return;
1030  case tok::kw_default:
1031  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1032  // 'default: string' field declaration.
1033  break;
1034  nextToken();
1035  if (FormatTok->is(tok::colon)) {
1036  parseLabel();
1037  return;
1038  }
1039  // e.g. "default void f() {}" in a Java interface.
1040  break;
1041  case tok::kw_case:
1042  if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1043  // 'case: string' field declaration.
1044  break;
1045  parseCaseLabel();
1046  return;
1047  case tok::kw_try:
1048  case tok::kw___try:
1049  parseTryCatch();
1050  return;
1051  case tok::kw_extern:
1052  nextToken();
1053  if (FormatTok->Tok.is(tok::string_literal)) {
1054  nextToken();
1055  if (FormatTok->Tok.is(tok::l_brace)) {
1056  if (Style.BraceWrapping.AfterExternBlock) {
1057  addUnwrappedLine();
1058  parseBlock(/*MustBeDeclaration=*/true);
1059  } else {
1060  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1061  }
1062  addUnwrappedLine();
1063  return;
1064  }
1065  }
1066  break;
1067  case tok::kw_export:
1068  if (Style.Language == FormatStyle::LK_JavaScript) {
1069  parseJavaScriptEs6ImportExport();
1070  return;
1071  }
1072  if (!Style.isCpp())
1073  break;
1074  // Handle C++ "(inline|export) namespace".
1075  LLVM_FALLTHROUGH;
1076  case tok::kw_inline:
1077  nextToken();
1078  if (FormatTok->Tok.is(tok::kw_namespace)) {
1079  parseNamespace();
1080  return;
1081  }
1082  break;
1083  case tok::identifier:
1084  if (FormatTok->is(TT_ForEachMacro)) {
1085  parseForOrWhileLoop();
1086  return;
1087  }
1088  if (FormatTok->is(TT_MacroBlockBegin)) {
1089  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1090  /*MunchSemi=*/false);
1091  return;
1092  }
1093  if (FormatTok->is(Keywords.kw_import)) {
1094  if (Style.Language == FormatStyle::LK_JavaScript) {
1095  parseJavaScriptEs6ImportExport();
1096  return;
1097  }
1098  if (Style.Language == FormatStyle::LK_Proto) {
1099  nextToken();
1100  if (FormatTok->is(tok::kw_public))
1101  nextToken();
1102  if (!FormatTok->is(tok::string_literal))
1103  return;
1104  nextToken();
1105  if (FormatTok->is(tok::semi))
1106  nextToken();
1107  addUnwrappedLine();
1108  return;
1109  }
1110  }
1111  if (Style.isCpp() &&
1112  FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1113  Keywords.kw_slots, Keywords.kw_qslots)) {
1114  nextToken();
1115  if (FormatTok->is(tok::colon)) {
1116  nextToken();
1117  addUnwrappedLine();
1118  return;
1119  }
1120  }
1121  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1122  parseStatementMacro();
1123  return;
1124  }
1125  if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1126  parseNamespace();
1127  return;
1128  }
1129  // In all other cases, parse the declaration.
1130  break;
1131  default:
1132  break;
1133  }
1134  do {
1135  const FormatToken *Previous = FormatTok->Previous;
1136  switch (FormatTok->Tok.getKind()) {
1137  case tok::at:
1138  nextToken();
1139  if (FormatTok->Tok.is(tok::l_brace)) {
1140  nextToken();
1141  parseBracedList();
1142  break;
1143  } else if (Style.Language == FormatStyle::LK_Java &&
1144  FormatTok->is(Keywords.kw_interface)) {
1145  nextToken();
1146  break;
1147  }
1148  switch (FormatTok->Tok.getObjCKeywordID()) {
1149  case tok::objc_public:
1150  case tok::objc_protected:
1151  case tok::objc_package:
1152  case tok::objc_private:
1153  return parseAccessSpecifier();
1154  case tok::objc_interface:
1155  case tok::objc_implementation:
1156  return parseObjCInterfaceOrImplementation();
1157  case tok::objc_protocol:
1158  if (parseObjCProtocol())
1159  return;
1160  break;
1161  case tok::objc_end:
1162  return; // Handled by the caller.
1163  case tok::objc_optional:
1164  case tok::objc_required:
1165  nextToken();
1166  addUnwrappedLine();
1167  return;
1168  case tok::objc_autoreleasepool:
1169  nextToken();
1170  if (FormatTok->Tok.is(tok::l_brace)) {
1171  if (Style.BraceWrapping.AfterControlStatement)
1172  addUnwrappedLine();
1173  parseBlock(/*MustBeDeclaration=*/false);
1174  }
1175  addUnwrappedLine();
1176  return;
1177  case tok::objc_synchronized:
1178  nextToken();
1179  if (FormatTok->Tok.is(tok::l_paren))
1180  // Skip synchronization object
1181  parseParens();
1182  if (FormatTok->Tok.is(tok::l_brace)) {
1183  if (Style.BraceWrapping.AfterControlStatement)
1184  addUnwrappedLine();
1185  parseBlock(/*MustBeDeclaration=*/false);
1186  }
1187  addUnwrappedLine();
1188  return;
1189  case tok::objc_try:
1190  // This branch isn't strictly necessary (the kw_try case below would
1191  // do this too after the tok::at is parsed above). But be explicit.
1192  parseTryCatch();
1193  return;
1194  default:
1195  break;
1196  }
1197  break;
1198  case tok::kw_enum:
1199  // Ignore if this is part of "template <enum ...".
1200  if (Previous && Previous->is(tok::less)) {
1201  nextToken();
1202  break;
1203  }
1204 
1205  // parseEnum falls through and does not yet add an unwrapped line as an
1206  // enum definition can start a structural element.
1207  if (!parseEnum())
1208  break;
1209  // This only applies for C++.
1210  if (!Style.isCpp()) {
1211  addUnwrappedLine();
1212  return;
1213  }
1214  break;
1215  case tok::kw_typedef:
1216  nextToken();
1217  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1218  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1219  parseEnum();
1220  break;
1221  case tok::kw_struct:
1222  case tok::kw_union:
1223  case tok::kw_class:
1224  // parseRecord falls through and does not yet add an unwrapped line as a
1225  // record declaration or definition can start a structural element.
1226  parseRecord();
1227  // This does not apply for Java, JavaScript and C#.
1228  if (Style.Language == FormatStyle::LK_Java ||
1229  Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1230  if (FormatTok->is(tok::semi))
1231  nextToken();
1232  addUnwrappedLine();
1233  return;
1234  }
1235  break;
1236  case tok::period:
1237  nextToken();
1238  // In Java, classes have an implicit static member "class".
1239  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1240  FormatTok->is(tok::kw_class))
1241  nextToken();
1242  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1243  FormatTok->Tok.getIdentifierInfo())
1244  // JavaScript only has pseudo keywords, all keywords are allowed to
1245  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1246  nextToken();
1247  break;
1248  case tok::semi:
1249  nextToken();
1250  addUnwrappedLine();
1251  return;
1252  case tok::r_brace:
1253  addUnwrappedLine();
1254  return;
1255  case tok::l_paren:
1256  parseParens();
1257  break;
1258  case tok::kw_operator:
1259  nextToken();
1260  if (FormatTok->isBinaryOperator())
1261  nextToken();
1262  break;
1263  case tok::caret:
1264  nextToken();
1265  if (FormatTok->Tok.isAnyIdentifier() ||
1266  FormatTok->isSimpleTypeSpecifier())
1267  nextToken();
1268  if (FormatTok->is(tok::l_paren))
1269  parseParens();
1270  if (FormatTok->is(tok::l_brace))
1271  parseChildBlock();
1272  break;
1273  case tok::l_brace:
1274  if (!tryToParseBracedList()) {
1275  // A block outside of parentheses must be the last part of a
1276  // structural element.
1277  // FIXME: Figure out cases where this is not true, and add projections
1278  // for them (the one we know is missing are lambdas).
1279  if (Style.BraceWrapping.AfterFunction)
1280  addUnwrappedLine();
1281  FormatTok->Type = TT_FunctionLBrace;
1282  parseBlock(/*MustBeDeclaration=*/false);
1283  addUnwrappedLine();
1284  return;
1285  }
1286  // Otherwise this was a braced init list, and the structural
1287  // element continues.
1288  break;
1289  case tok::kw_try:
1290  // We arrive here when parsing function-try blocks.
1291  if (Style.BraceWrapping.AfterFunction)
1292  addUnwrappedLine();
1293  parseTryCatch();
1294  return;
1295  case tok::identifier: {
1296  if (FormatTok->is(TT_MacroBlockEnd)) {
1297  addUnwrappedLine();
1298  return;
1299  }
1300 
1301  // Function declarations (as opposed to function expressions) are parsed
1302  // on their own unwrapped line by continuing this loop. Function
1303  // expressions (functions that are not on their own line) must not create
1304  // a new unwrapped line, so they are special cased below.
1305  size_t TokenCount = Line->Tokens.size();
1306  if (Style.Language == FormatStyle::LK_JavaScript &&
1307  FormatTok->is(Keywords.kw_function) &&
1308  (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1309  Keywords.kw_async)))) {
1310  tryToParseJSFunction();
1311  break;
1312  }
1313  if ((Style.Language == FormatStyle::LK_JavaScript ||
1314  Style.Language == FormatStyle::LK_Java) &&
1315  FormatTok->is(Keywords.kw_interface)) {
1316  if (Style.Language == FormatStyle::LK_JavaScript) {
1317  // In JavaScript/TypeScript, "interface" can be used as a standalone
1318  // identifier, e.g. in `var interface = 1;`. If "interface" is
1319  // followed by another identifier, it is very like to be an actual
1320  // interface declaration.
1321  unsigned StoredPosition = Tokens->getPosition();
1322  FormatToken *Next = Tokens->getNextToken();
1323  FormatTok = Tokens->setPosition(StoredPosition);
1324  if (Next && !mustBeJSIdent(Keywords, Next)) {
1325  nextToken();
1326  break;
1327  }
1328  }
1329  parseRecord();
1330  addUnwrappedLine();
1331  return;
1332  }
1333 
1334  if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1335  parseStatementMacro();
1336  return;
1337  }
1338 
1339  // See if the following token should start a new unwrapped line.
1340  StringRef Text = FormatTok->TokenText;
1341  nextToken();
1342 
1343  // JS doesn't have macros, and within classes colons indicate fields, not
1344  // labels.
1345  if (Style.Language == FormatStyle::LK_JavaScript)
1346  break;
1347 
1348  TokenCount = Line->Tokens.size();
1349  if (TokenCount == 1 ||
1350  (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1351  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1352  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1353  parseLabel();
1354  return;
1355  }
1356  // Recognize function-like macro usages without trailing semicolon as
1357  // well as free-standing macros like Q_OBJECT.
1358  bool FunctionLike = FormatTok->is(tok::l_paren);
1359  if (FunctionLike)
1360  parseParens();
1361 
1362  bool FollowedByNewline =
1363  CommentsBeforeNextToken.empty()
1364  ? FormatTok->NewlinesBefore > 0
1365  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1366 
1367  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1368  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1369  addUnwrappedLine();
1370  return;
1371  }
1372  }
1373  break;
1374  }
1375  case tok::equal:
1376  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1377  // TT_JsFatArrow. The always start an expression or a child block if
1378  // followed by a curly.
1379  if (FormatTok->is(TT_JsFatArrow)) {
1380  nextToken();
1381  if (FormatTok->is(tok::l_brace))
1382  parseChildBlock();
1383  break;
1384  }
1385 
1386  nextToken();
1387  if (FormatTok->Tok.is(tok::l_brace)) {
1388  nextToken();
1389  parseBracedList();
1390  } else if (Style.Language == FormatStyle::LK_Proto &&
1391  FormatTok->Tok.is(tok::less)) {
1392  nextToken();
1393  parseBracedList(/*ContinueOnSemicolons=*/false,
1394  /*ClosingBraceKind=*/tok::greater);
1395  }
1396  break;
1397  case tok::l_square:
1398  parseSquare();
1399  break;
1400  case tok::kw_new:
1401  parseNew();
1402  break;
1403  default:
1404  nextToken();
1405  break;
1406  }
1407  } while (!eof());
1408 }
1409 
1410 bool UnwrappedLineParser::tryToParseLambda() {
1411  if (!Style.isCpp()) {
1412  nextToken();
1413  return false;
1414  }
1415  assert(FormatTok->is(tok::l_square));
1416  FormatToken &LSquare = *FormatTok;
1417  if (!tryToParseLambdaIntroducer())
1418  return false;
1419 
1420  bool SeenArrow = false;
1421 
1422  while (FormatTok->isNot(tok::l_brace)) {
1423  if (FormatTok->isSimpleTypeSpecifier()) {
1424  nextToken();
1425  continue;
1426  }
1427  switch (FormatTok->Tok.getKind()) {
1428  case tok::l_brace:
1429  break;
1430  case tok::l_paren:
1431  parseParens();
1432  break;
1433  case tok::amp:
1434  case tok::star:
1435  case tok::kw_const:
1436  case tok::comma:
1437  case tok::less:
1438  case tok::greater:
1439  case tok::identifier:
1440  case tok::numeric_constant:
1441  case tok::coloncolon:
1442  case tok::kw_mutable:
1443  case tok::kw_noexcept:
1444  nextToken();
1445  break;
1446  // Specialization of a template with an integer parameter can contain
1447  // arithmetic, logical, comparison and ternary operators.
1448  //
1449  // FIXME: This also accepts sequences of operators that are not in the scope
1450  // of a template argument list.
1451  //
1452  // In a C++ lambda a template type can only occur after an arrow. We use
1453  // this as an heuristic to distinguish between Objective-C expressions
1454  // followed by an `a->b` expression, such as:
1455  // ([obj func:arg] + a->b)
1456  // Otherwise the code below would parse as a lambda.
1457  case tok::plus:
1458  case tok::minus:
1459  case tok::exclaim:
1460  case tok::tilde:
1461  case tok::slash:
1462  case tok::percent:
1463  case tok::lessless:
1464  case tok::pipe:
1465  case tok::pipepipe:
1466  case tok::ampamp:
1467  case tok::caret:
1468  case tok::equalequal:
1469  case tok::exclaimequal:
1470  case tok::greaterequal:
1471  case tok::lessequal:
1472  case tok::question:
1473  case tok::colon:
1474  case tok::kw_true:
1475  case tok::kw_false:
1476  if (SeenArrow) {
1477  nextToken();
1478  break;
1479  }
1480  return true;
1481  case tok::arrow:
1482  // This might or might not actually be a lambda arrow (this could be an
1483  // ObjC method invocation followed by a dereferencing arrow). We might
1484  // reset this back to TT_Unknown in TokenAnnotator.
1485  FormatTok->Type = TT_LambdaArrow;
1486  SeenArrow = true;
1487  nextToken();
1488  break;
1489  default:
1490  return true;
1491  }
1492  }
1493  FormatTok->Type = TT_LambdaLBrace;
1494  LSquare.Type = TT_LambdaLSquare;
1495  parseChildBlock();
1496  return true;
1497 }
1498 
1499 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1500  const FormatToken *Previous = FormatTok->Previous;
1501  if (Previous &&
1502  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1503  tok::kw_delete, tok::l_square) ||
1504  FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1505  Previous->isSimpleTypeSpecifier())) {
1506  nextToken();
1507  return false;
1508  }
1509  nextToken();
1510  if (FormatTok->is(tok::l_square)) {
1511  return false;
1512  }
1513  parseSquare(/*LambdaIntroducer=*/true);
1514  return true;
1515 }
1516 
1517 void UnwrappedLineParser::tryToParseJSFunction() {
1518  assert(FormatTok->is(Keywords.kw_function) ||
1519  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1520  if (FormatTok->is(Keywords.kw_async))
1521  nextToken();
1522  // Consume "function".
1523  nextToken();
1524 
1525  // Consume * (generator function). Treat it like C++'s overloaded operators.
1526  if (FormatTok->is(tok::star)) {
1527  FormatTok->Type = TT_OverloadedOperator;
1528  nextToken();
1529  }
1530 
1531  // Consume function name.
1532  if (FormatTok->is(tok::identifier))
1533  nextToken();
1534 
1535  if (FormatTok->isNot(tok::l_paren))
1536  return;
1537 
1538  // Parse formal parameter list.
1539  parseParens();
1540 
1541  if (FormatTok->is(tok::colon)) {
1542  // Parse a type definition.
1543  nextToken();
1544 
1545  // Eat the type declaration. For braced inline object types, balance braces,
1546  // otherwise just parse until finding an l_brace for the function body.
1547  if (FormatTok->is(tok::l_brace))
1548  tryToParseBracedList();
1549  else
1550  while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1551  nextToken();
1552  }
1553 
1554  if (FormatTok->is(tok::semi))
1555  return;
1556 
1557  parseChildBlock();
1558 }
1559 
1560 bool UnwrappedLineParser::tryToParseBracedList() {
1561  if (FormatTok->BlockKind == BK_Unknown)
1562  calculateBraceTypes();
1563  assert(FormatTok->BlockKind != BK_Unknown);
1564  if (FormatTok->BlockKind == BK_Block)
1565  return false;
1566  nextToken();
1567  parseBracedList();
1568  return true;
1569 }
1570 
1571 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1572  tok::TokenKind ClosingBraceKind) {
1573  bool HasError = false;
1574 
1575  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1576  // replace this by using parseAssigmentExpression() inside.
1577  do {
1578  if (Style.Language == FormatStyle::LK_JavaScript) {
1579  if (FormatTok->is(Keywords.kw_function) ||
1580  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1581  tryToParseJSFunction();
1582  continue;
1583  }
1584  if (FormatTok->is(TT_JsFatArrow)) {
1585  nextToken();
1586  // Fat arrows can be followed by simple expressions or by child blocks
1587  // in curly braces.
1588  if (FormatTok->is(tok::l_brace)) {
1589  parseChildBlock();
1590  continue;
1591  }
1592  }
1593  if (FormatTok->is(tok::l_brace)) {
1594  // Could be a method inside of a braced list `{a() { return 1; }}`.
1595  if (tryToParseBracedList())
1596  continue;
1597  parseChildBlock();
1598  }
1599  }
1600  if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1601  nextToken();
1602  return !HasError;
1603  }
1604  switch (FormatTok->Tok.getKind()) {
1605  case tok::caret:
1606  nextToken();
1607  if (FormatTok->is(tok::l_brace)) {
1608  parseChildBlock();
1609  }
1610  break;
1611  case tok::l_square:
1612  tryToParseLambda();
1613  break;
1614  case tok::l_paren:
1615  parseParens();
1616  // JavaScript can just have free standing methods and getters/setters in
1617  // object literals. Detect them by a "{" following ")".
1618  if (Style.Language == FormatStyle::LK_JavaScript) {
1619  if (FormatTok->is(tok::l_brace))
1620  parseChildBlock();
1621  break;
1622  }
1623  break;
1624  case tok::l_brace:
1625  // Assume there are no blocks inside a braced init list apart
1626  // from the ones we explicitly parse out (like lambdas).
1627  FormatTok->BlockKind = BK_BracedInit;
1628  nextToken();
1629  parseBracedList();
1630  break;
1631  case tok::less:
1632  if (Style.Language == FormatStyle::LK_Proto) {
1633  nextToken();
1634  parseBracedList(/*ContinueOnSemicolons=*/false,
1635  /*ClosingBraceKind=*/tok::greater);
1636  } else {
1637  nextToken();
1638  }
1639  break;
1640  case tok::semi:
1641  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1642  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1643  // used for error recovery if we have otherwise determined that this is
1644  // a braced list.
1645  if (Style.Language == FormatStyle::LK_JavaScript) {
1646  nextToken();
1647  break;
1648  }
1649  HasError = true;
1650  if (!ContinueOnSemicolons)
1651  return !HasError;
1652  nextToken();
1653  break;
1654  case tok::comma:
1655  nextToken();
1656  break;
1657  default:
1658  nextToken();
1659  break;
1660  }
1661  } while (!eof());
1662  return false;
1663 }
1664 
1665 void UnwrappedLineParser::parseParens() {
1666  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1667  nextToken();
1668  do {
1669  switch (FormatTok->Tok.getKind()) {
1670  case tok::l_paren:
1671  parseParens();
1672  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1673  parseChildBlock();
1674  break;
1675  case tok::r_paren:
1676  nextToken();
1677  return;
1678  case tok::r_brace:
1679  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1680  return;
1681  case tok::l_square:
1682  tryToParseLambda();
1683  break;
1684  case tok::l_brace:
1685  if (!tryToParseBracedList())
1686  parseChildBlock();
1687  break;
1688  case tok::at:
1689  nextToken();
1690  if (FormatTok->Tok.is(tok::l_brace)) {
1691  nextToken();
1692  parseBracedList();
1693  }
1694  break;
1695  case tok::kw_class:
1696  if (Style.Language == FormatStyle::LK_JavaScript)
1697  parseRecord(/*ParseAsExpr=*/true);
1698  else
1699  nextToken();
1700  break;
1701  case tok::identifier:
1702  if (Style.Language == FormatStyle::LK_JavaScript &&
1703  (FormatTok->is(Keywords.kw_function) ||
1704  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1705  tryToParseJSFunction();
1706  else
1707  nextToken();
1708  break;
1709  default:
1710  nextToken();
1711  break;
1712  }
1713  } while (!eof());
1714 }
1715 
1716 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1717  if (!LambdaIntroducer) {
1718  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1719  if (tryToParseLambda())
1720  return;
1721  }
1722  do {
1723  switch (FormatTok->Tok.getKind()) {
1724  case tok::l_paren:
1725  parseParens();
1726  break;
1727  case tok::r_square:
1728  nextToken();
1729  return;
1730  case tok::r_brace:
1731  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1732  return;
1733  case tok::l_square:
1734  parseSquare();
1735  break;
1736  case tok::l_brace: {
1737  if (!tryToParseBracedList())
1738  parseChildBlock();
1739  break;
1740  }
1741  case tok::at:
1742  nextToken();
1743  if (FormatTok->Tok.is(tok::l_brace)) {
1744  nextToken();
1745  parseBracedList();
1746  }
1747  break;
1748  default:
1749  nextToken();
1750  break;
1751  }
1752  } while (!eof());
1753 }
1754 
1755 void UnwrappedLineParser::parseIfThenElse() {
1756  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1757  nextToken();
1758  if (FormatTok->Tok.is(tok::kw_constexpr))
1759  nextToken();
1760  if (FormatTok->Tok.is(tok::l_paren))
1761  parseParens();
1762  bool NeedsUnwrappedLine = false;
1763  if (FormatTok->Tok.is(tok::l_brace)) {
1764  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1765  parseBlock(/*MustBeDeclaration=*/false);
1766  if (Style.BraceWrapping.BeforeElse)
1767  addUnwrappedLine();
1768  else
1769  NeedsUnwrappedLine = true;
1770  } else {
1771  addUnwrappedLine();
1772  ++Line->Level;
1773  parseStructuralElement();
1774  --Line->Level;
1775  }
1776  if (FormatTok->Tok.is(tok::kw_else)) {
1777  nextToken();
1778  if (FormatTok->Tok.is(tok::l_brace)) {
1779  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1780  parseBlock(/*MustBeDeclaration=*/false);
1781  addUnwrappedLine();
1782  } else if (FormatTok->Tok.is(tok::kw_if)) {
1783  parseIfThenElse();
1784  } else {
1785  addUnwrappedLine();
1786  ++Line->Level;
1787  parseStructuralElement();
1788  if (FormatTok->is(tok::eof))
1789  addUnwrappedLine();
1790  --Line->Level;
1791  }
1792  } else if (NeedsUnwrappedLine) {
1793  addUnwrappedLine();
1794  }
1795 }
1796 
1797 void UnwrappedLineParser::parseTryCatch() {
1798  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1799  nextToken();
1800  bool NeedsUnwrappedLine = false;
1801  if (FormatTok->is(tok::colon)) {
1802  // We are in a function try block, what comes is an initializer list.
1803  nextToken();
1804  while (FormatTok->is(tok::identifier)) {
1805  nextToken();
1806  if (FormatTok->is(tok::l_paren))
1807  parseParens();
1808  if (FormatTok->is(tok::comma))
1809  nextToken();
1810  }
1811  }
1812  // Parse try with resource.
1813  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1814  parseParens();
1815  }
1816  if (FormatTok->is(tok::l_brace)) {
1817  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1818  parseBlock(/*MustBeDeclaration=*/false);
1819  if (Style.BraceWrapping.BeforeCatch) {
1820  addUnwrappedLine();
1821  } else {
1822  NeedsUnwrappedLine = true;
1823  }
1824  } else if (!FormatTok->is(tok::kw_catch)) {
1825  // The C++ standard requires a compound-statement after a try.
1826  // If there's none, we try to assume there's a structuralElement
1827  // and try to continue.
1828  addUnwrappedLine();
1829  ++Line->Level;
1830  parseStructuralElement();
1831  --Line->Level;
1832  }
1833  while (1) {
1834  if (FormatTok->is(tok::at))
1835  nextToken();
1836  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1837  tok::kw___finally) ||
1838  ((Style.Language == FormatStyle::LK_Java ||
1839  Style.Language == FormatStyle::LK_JavaScript) &&
1840  FormatTok->is(Keywords.kw_finally)) ||
1841  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1842  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1843  break;
1844  nextToken();
1845  while (FormatTok->isNot(tok::l_brace)) {
1846  if (FormatTok->is(tok::l_paren)) {
1847  parseParens();
1848  continue;
1849  }
1850  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1851  return;
1852  nextToken();
1853  }
1854  NeedsUnwrappedLine = false;
1855  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1856  parseBlock(/*MustBeDeclaration=*/false);
1857  if (Style.BraceWrapping.BeforeCatch)
1858  addUnwrappedLine();
1859  else
1860  NeedsUnwrappedLine = true;
1861  }
1862  if (NeedsUnwrappedLine)
1863  addUnwrappedLine();
1864 }
1865 
1866 void UnwrappedLineParser::parseNamespace() {
1867  assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1868  "'namespace' expected");
1869 
1870  const FormatToken &InitialToken = *FormatTok;
1871  nextToken();
1872  if (InitialToken.is(TT_NamespaceMacro)) {
1873  parseParens();
1874  } else {
1875  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1876  nextToken();
1877  }
1878  if (FormatTok->Tok.is(tok::l_brace)) {
1879  if (ShouldBreakBeforeBrace(Style, InitialToken))
1880  addUnwrappedLine();
1881 
1882  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1883  (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1884  DeclarationScopeStack.size() > 1);
1885  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1886  // Munch the semicolon after a namespace. This is more common than one would
1887  // think. Puttin the semicolon into its own line is very ugly.
1888  if (FormatTok->Tok.is(tok::semi))
1889  nextToken();
1890  addUnwrappedLine();
1891  }
1892  // FIXME: Add error handling.
1893 }
1894 
1895 void UnwrappedLineParser::parseNew() {
1896  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1897  nextToken();
1898  if (Style.Language != FormatStyle::LK_Java)
1899  return;
1900 
1901  // In Java, we can parse everything up to the parens, which aren't optional.
1902  do {
1903  // There should not be a ;, { or } before the new's open paren.
1904  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1905  return;
1906 
1907  // Consume the parens.
1908  if (FormatTok->is(tok::l_paren)) {
1909  parseParens();
1910 
1911  // If there is a class body of an anonymous class, consume that as child.
1912  if (FormatTok->is(tok::l_brace))
1913  parseChildBlock();
1914  return;
1915  }
1916  nextToken();
1917  } while (!eof());
1918 }
1919 
1920 void UnwrappedLineParser::parseForOrWhileLoop() {
1921  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1922  "'for', 'while' or foreach macro expected");
1923  nextToken();
1924  // JS' for await ( ...
1925  if (Style.Language == FormatStyle::LK_JavaScript &&
1926  FormatTok->is(Keywords.kw_await))
1927  nextToken();
1928  if (FormatTok->Tok.is(tok::l_paren))
1929  parseParens();
1930  if (FormatTok->Tok.is(tok::l_brace)) {
1931  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1932  parseBlock(/*MustBeDeclaration=*/false);
1933  addUnwrappedLine();
1934  } else {
1935  addUnwrappedLine();
1936  ++Line->Level;
1937  parseStructuralElement();
1938  --Line->Level;
1939  }
1940 }
1941 
1942 void UnwrappedLineParser::parseDoWhile() {
1943  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1944  nextToken();
1945  if (FormatTok->Tok.is(tok::l_brace)) {
1946  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1947  parseBlock(/*MustBeDeclaration=*/false);
1948  if (Style.BraceWrapping.IndentBraces)
1949  addUnwrappedLine();
1950  } else {
1951  addUnwrappedLine();
1952  ++Line->Level;
1953  parseStructuralElement();
1954  --Line->Level;
1955  }
1956 
1957  // FIXME: Add error handling.
1958  if (!FormatTok->Tok.is(tok::kw_while)) {
1959  addUnwrappedLine();
1960  return;
1961  }
1962 
1963  nextToken();
1964  parseStructuralElement();
1965 }
1966 
1967 void UnwrappedLineParser::parseLabel() {
1968  nextToken();
1969  unsigned OldLineLevel = Line->Level;
1970  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1971  --Line->Level;
1972  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1973  CompoundStatementIndenter Indenter(this, Line->Level,
1974  Style.BraceWrapping.AfterCaseLabel,
1975  Style.BraceWrapping.IndentBraces);
1976  parseBlock(/*MustBeDeclaration=*/false);
1977  if (FormatTok->Tok.is(tok::kw_break)) {
1978  if (Style.BraceWrapping.AfterControlStatement)
1979  addUnwrappedLine();
1980  parseStructuralElement();
1981  }
1982  addUnwrappedLine();
1983  } else {
1984  if (FormatTok->is(tok::semi))
1985  nextToken();
1986  addUnwrappedLine();
1987  }
1988  Line->Level = OldLineLevel;
1989  if (FormatTok->isNot(tok::l_brace)) {
1990  parseStructuralElement();
1991  addUnwrappedLine();
1992  }
1993 }
1994 
1995 void UnwrappedLineParser::parseCaseLabel() {
1996  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1997  // FIXME: fix handling of complex expressions here.
1998  do {
1999  nextToken();
2000  } while (!eof() && !FormatTok->Tok.is(tok::colon));
2001  parseLabel();
2002 }
2003 
2004 void UnwrappedLineParser::parseSwitch() {
2005  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2006  nextToken();
2007  if (FormatTok->Tok.is(tok::l_paren))
2008  parseParens();
2009  if (FormatTok->Tok.is(tok::l_brace)) {
2010  CompoundStatementIndenter Indenter(this, Style, Line->Level);
2011  parseBlock(/*MustBeDeclaration=*/false);
2012  addUnwrappedLine();
2013  } else {
2014  addUnwrappedLine();
2015  ++Line->Level;
2016  parseStructuralElement();
2017  --Line->Level;
2018  }
2019 }
2020 
2021 void UnwrappedLineParser::parseAccessSpecifier() {
2022  nextToken();
2023  // Understand Qt's slots.
2024  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2025  nextToken();
2026  // Otherwise, we don't know what it is, and we'd better keep the next token.
2027  if (FormatTok->Tok.is(tok::colon))
2028  nextToken();
2029  addUnwrappedLine();
2030 }
2031 
2032 bool UnwrappedLineParser::parseEnum() {
2033  // Won't be 'enum' for NS_ENUMs.
2034  if (FormatTok->Tok.is(tok::kw_enum))
2035  nextToken();
2036 
2037  // In TypeScript, "enum" can also be used as property name, e.g. in interface
2038  // declarations. An "enum" keyword followed by a colon would be a syntax
2039  // error and thus assume it is just an identifier.
2040  if (Style.Language == FormatStyle::LK_JavaScript &&
2041  FormatTok->isOneOf(tok::colon, tok::question))
2042  return false;
2043 
2044  // In protobuf, "enum" can be used as a field name.
2045  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2046  return false;
2047 
2048  // Eat up enum class ...
2049  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2050  nextToken();
2051 
2052  while (FormatTok->Tok.getIdentifierInfo() ||
2053  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2054  tok::greater, tok::comma, tok::question)) {
2055  nextToken();
2056  // We can have macros or attributes in between 'enum' and the enum name.
2057  if (FormatTok->is(tok::l_paren))
2058  parseParens();
2059  if (FormatTok->is(tok::identifier)) {
2060  nextToken();
2061  // If there are two identifiers in a row, this is likely an elaborate
2062  // return type. In Java, this can be "implements", etc.
2063  if (Style.isCpp() && FormatTok->is(tok::identifier))
2064  return false;
2065  }
2066  }
2067 
2068  // Just a declaration or something is wrong.
2069  if (FormatTok->isNot(tok::l_brace))
2070  return true;
2071  FormatTok->BlockKind = BK_Block;
2072 
2073  if (Style.Language == FormatStyle::LK_Java) {
2074  // Java enums are different.
2075  parseJavaEnumBody();
2076  return true;
2077  }
2078  if (Style.Language == FormatStyle::LK_Proto) {
2079  parseBlock(/*MustBeDeclaration=*/true);
2080  return true;
2081  }
2082 
2083  // Parse enum body.
2084  nextToken();
2085  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2086  if (HasError) {
2087  if (FormatTok->is(tok::semi))
2088  nextToken();
2089  addUnwrappedLine();
2090  }
2091  return true;
2092 
2093  // There is no addUnwrappedLine() here so that we fall through to parsing a
2094  // structural element afterwards. Thus, in "enum A {} n, m;",
2095  // "} n, m;" will end up in one unwrapped line.
2096 }
2097 
2098 void UnwrappedLineParser::parseJavaEnumBody() {
2099  // Determine whether the enum is simple, i.e. does not have a semicolon or
2100  // constants with class bodies. Simple enums can be formatted like braced
2101  // lists, contracted to a single line, etc.
2102  unsigned StoredPosition = Tokens->getPosition();
2103  bool IsSimple = true;
2104  FormatToken *Tok = Tokens->getNextToken();
2105  while (Tok) {
2106  if (Tok->is(tok::r_brace))
2107  break;
2108  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2109  IsSimple = false;
2110  break;
2111  }
2112  // FIXME: This will also mark enums with braces in the arguments to enum
2113  // constants as "not simple". This is probably fine in practice, though.
2114  Tok = Tokens->getNextToken();
2115  }
2116  FormatTok = Tokens->setPosition(StoredPosition);
2117 
2118  if (IsSimple) {
2119  nextToken();
2120  parseBracedList();
2121  addUnwrappedLine();
2122  return;
2123  }
2124 
2125  // Parse the body of a more complex enum.
2126  // First add a line for everything up to the "{".
2127  nextToken();
2128  addUnwrappedLine();
2129  ++Line->Level;
2130 
2131  // Parse the enum constants.
2132  while (FormatTok) {
2133  if (FormatTok->is(tok::l_brace)) {
2134  // Parse the constant's class body.
2135  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2136  /*MunchSemi=*/false);
2137  } else if (FormatTok->is(tok::l_paren)) {
2138  parseParens();
2139  } else if (FormatTok->is(tok::comma)) {
2140  nextToken();
2141  addUnwrappedLine();
2142  } else if (FormatTok->is(tok::semi)) {
2143  nextToken();
2144  addUnwrappedLine();
2145  break;
2146  } else if (FormatTok->is(tok::r_brace)) {
2147  addUnwrappedLine();
2148  break;
2149  } else {
2150  nextToken();
2151  }
2152  }
2153 
2154  // Parse the class body after the enum's ";" if any.
2155  parseLevel(/*HasOpeningBrace=*/true);
2156  nextToken();
2157  --Line->Level;
2158  addUnwrappedLine();
2159 }
2160 
2161 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2162  const FormatToken &InitialToken = *FormatTok;
2163  nextToken();
2164 
2165  // The actual identifier can be a nested name specifier, and in macros
2166  // it is often token-pasted.
2167  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2168  tok::kw___attribute, tok::kw___declspec,
2169  tok::kw_alignas) ||
2170  ((Style.Language == FormatStyle::LK_Java ||
2171  Style.Language == FormatStyle::LK_JavaScript) &&
2172  FormatTok->isOneOf(tok::period, tok::comma))) {
2173  if (Style.Language == FormatStyle::LK_JavaScript &&
2174  FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2175  // JavaScript/TypeScript supports inline object types in
2176  // extends/implements positions:
2177  // class Foo implements {bar: number} { }
2178  nextToken();
2179  if (FormatTok->is(tok::l_brace)) {
2180  tryToParseBracedList();
2181  continue;
2182  }
2183  }
2184  bool IsNonMacroIdentifier =
2185  FormatTok->is(tok::identifier) &&
2186  FormatTok->TokenText != FormatTok->TokenText.upper();
2187  nextToken();
2188  // We can have macros or attributes in between 'class' and the class name.
2189  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2190  parseParens();
2191  }
2192 
2193  // Note that parsing away template declarations here leads to incorrectly
2194  // accepting function declarations as record declarations.
2195  // In general, we cannot solve this problem. Consider:
2196  // class A<int> B() {}
2197  // which can be a function definition or a class definition when B() is a
2198  // macro. If we find enough real-world cases where this is a problem, we
2199  // can parse for the 'template' keyword in the beginning of the statement,
2200  // and thus rule out the record production in case there is no template
2201  // (this would still leave us with an ambiguity between template function
2202  // and class declarations).
2203  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2204  while (!eof()) {
2205  if (FormatTok->is(tok::l_brace)) {
2206  calculateBraceTypes(/*ExpectClassBody=*/true);
2207  if (!tryToParseBracedList())
2208  break;
2209  }
2210  if (FormatTok->Tok.is(tok::semi))
2211  return;
2212  nextToken();
2213  }
2214  }
2215  if (FormatTok->Tok.is(tok::l_brace)) {
2216  if (ParseAsExpr) {
2217  parseChildBlock();
2218  } else {
2219  if (ShouldBreakBeforeBrace(Style, InitialToken))
2220  addUnwrappedLine();
2221 
2222  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2223  /*MunchSemi=*/false);
2224  }
2225  }
2226  // There is no addUnwrappedLine() here so that we fall through to parsing a
2227  // structural element afterwards. Thus, in "class A {} n, m;",
2228  // "} n, m;" will end up in one unwrapped line.
2229 }
2230 
2231 void UnwrappedLineParser::parseObjCMethod() {
2232  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2233  "'(' or identifier expected.");
2234  do {
2235  if (FormatTok->Tok.is(tok::semi)) {
2236  nextToken();
2237  addUnwrappedLine();
2238  return;
2239  } else if (FormatTok->Tok.is(tok::l_brace)) {
2240  if (Style.BraceWrapping.AfterFunction)
2241  addUnwrappedLine();
2242  parseBlock(/*MustBeDeclaration=*/false);
2243  addUnwrappedLine();
2244  return;
2245  } else {
2246  nextToken();
2247  }
2248  } while (!eof());
2249 }
2250 
2251 void UnwrappedLineParser::parseObjCProtocolList() {
2252  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2253  do {
2254  nextToken();
2255  // Early exit in case someone forgot a close angle.
2256  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2257  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2258  return;
2259  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2260  nextToken(); // Skip '>'.
2261 }
2262 
2263 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2264  do {
2265  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2266  nextToken();
2267  addUnwrappedLine();
2268  break;
2269  }
2270  if (FormatTok->is(tok::l_brace)) {
2271  parseBlock(/*MustBeDeclaration=*/false);
2272  // In ObjC interfaces, nothing should be following the "}".
2273  addUnwrappedLine();
2274  } else if (FormatTok->is(tok::r_brace)) {
2275  // Ignore stray "}". parseStructuralElement doesn't consume them.
2276  nextToken();
2277  addUnwrappedLine();
2278  } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2279  nextToken();
2280  parseObjCMethod();
2281  } else {
2282  parseStructuralElement();
2283  }
2284  } while (!eof());
2285 }
2286 
2287 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2288  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2289  FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2290  nextToken();
2291  nextToken(); // interface name
2292 
2293  // @interface can be followed by a lightweight generic
2294  // specialization list, then either a base class or a category.
2295  if (FormatTok->Tok.is(tok::less)) {
2296  // Unlike protocol lists, generic parameterizations support
2297  // nested angles:
2298  //
2299  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2300  // NSObject <NSCopying, NSSecureCoding>
2301  //
2302  // so we need to count how many open angles we have left.
2303  unsigned NumOpenAngles = 1;
2304  do {
2305  nextToken();
2306  // Early exit in case someone forgot a close angle.
2307  if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2308  FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2309  break;
2310  if (FormatTok->Tok.is(tok::less))
2311  ++NumOpenAngles;
2312  else if (FormatTok->Tok.is(tok::greater)) {
2313  assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2314  --NumOpenAngles;
2315  }
2316  } while (!eof() && NumOpenAngles != 0);
2317  nextToken(); // Skip '>'.
2318  }
2319  if (FormatTok->Tok.is(tok::colon)) {
2320  nextToken();
2321  nextToken(); // base class name
2322  } else if (FormatTok->Tok.is(tok::l_paren))
2323  // Skip category, if present.
2324  parseParens();
2325 
2326  if (FormatTok->Tok.is(tok::less))
2327  parseObjCProtocolList();
2328 
2329  if (FormatTok->Tok.is(tok::l_brace)) {
2330  if (Style.BraceWrapping.AfterObjCDeclaration)
2331  addUnwrappedLine();
2332  parseBlock(/*MustBeDeclaration=*/true);
2333  }
2334 
2335  // With instance variables, this puts '}' on its own line. Without instance
2336  // variables, this ends the @interface line.
2337  addUnwrappedLine();
2338 
2339  parseObjCUntilAtEnd();
2340 }
2341 
2342 // Returns true for the declaration/definition form of @protocol,
2343 // false for the expression form.
2344 bool UnwrappedLineParser::parseObjCProtocol() {
2345  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2346  nextToken();
2347 
2348  if (FormatTok->is(tok::l_paren))
2349  // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2350  return false;
2351 
2352  // The definition/declaration form,
2353  // @protocol Foo
2354  // - (int)someMethod;
2355  // @end
2356 
2357  nextToken(); // protocol name
2358 
2359  if (FormatTok->Tok.is(tok::less))
2360  parseObjCProtocolList();
2361 
2362  // Check for protocol declaration.
2363  if (FormatTok->Tok.is(tok::semi)) {
2364  nextToken();
2365  addUnwrappedLine();
2366  return true;
2367  }
2368 
2369  addUnwrappedLine();
2370  parseObjCUntilAtEnd();
2371  return true;
2372 }
2373 
2374 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2375  bool IsImport = FormatTok->is(Keywords.kw_import);
2376  assert(IsImport || FormatTok->is(tok::kw_export));
2377  nextToken();
2378 
2379  // Consume the "default" in "export default class/function".
2380  if (FormatTok->is(tok::kw_default))
2381  nextToken();
2382 
2383  // Consume "async function", "function" and "default function", so that these
2384  // get parsed as free-standing JS functions, i.e. do not require a trailing
2385  // semicolon.
2386  if (FormatTok->is(Keywords.kw_async))
2387  nextToken();
2388  if (FormatTok->is(Keywords.kw_function)) {
2389  nextToken();
2390  return;
2391  }
2392 
2393  // For imports, `export *`, `export {...}`, consume the rest of the line up
2394  // to the terminating `;`. For everything else, just return and continue
2395  // parsing the structural element, i.e. the declaration or expression for
2396  // `export default`.
2397  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2398  !FormatTok->isStringLiteral())
2399  return;
2400 
2401  while (!eof()) {
2402  if (FormatTok->is(tok::semi))
2403  return;
2404  if (Line->Tokens.empty()) {
2405  // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2406  // import statement should terminate.
2407  return;
2408  }
2409  if (FormatTok->is(tok::l_brace)) {
2410  FormatTok->BlockKind = BK_Block;
2411  nextToken();
2412  parseBracedList();
2413  } else {
2414  nextToken();
2415  }
2416  }
2417 }
2418 
2419 void UnwrappedLineParser::parseStatementMacro() {
2420  nextToken();
2421  if (FormatTok->is(tok::l_paren))
2422  parseParens();
2423  if (FormatTok->is(tok::semi))
2424  nextToken();
2425  addUnwrappedLine();
2426 }
2427 
2428 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2429  StringRef Prefix = "") {
2430  llvm::dbgs() << Prefix << "Line(" << Line.Level
2431  << ", FSC=" << Line.FirstStartColumn << ")"
2432  << (Line.InPPDirective ? " MACRO" : "") << ": ";
2433  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2434  E = Line.Tokens.end();
2435  I != E; ++I) {
2436  llvm::dbgs() << I->Tok->Tok.getName() << "["
2437  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2438  << "] ";
2439  }
2440  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2441  E = Line.Tokens.end();
2442  I != E; ++I) {
2443  const UnwrappedLineNode &Node = *I;
2445  I = Node.Children.begin(),
2446  E = Node.Children.end();
2447  I != E; ++I) {
2448  printDebugInfo(*I, "\nChild: ");
2449  }
2450  }
2451  llvm::dbgs() << "\n";
2452 }
2453 
2454 void UnwrappedLineParser::addUnwrappedLine() {
2455  if (Line->Tokens.empty())
2456  return;
2457  LLVM_DEBUG({
2458  if (CurrentLines == &Lines)
2459  printDebugInfo(*Line);
2460  });
2461  CurrentLines->push_back(std::move(*Line));
2462  Line->Tokens.clear();
2463  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2464  Line->FirstStartColumn = 0;
2465  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2466  CurrentLines->append(
2467  std::make_move_iterator(PreprocessorDirectives.begin()),
2468  std::make_move_iterator(PreprocessorDirectives.end()));
2469  PreprocessorDirectives.clear();
2470  }
2471  // Disconnect the current token from the last token on the previous line.
2472  FormatTok->Previous = nullptr;
2473 }
2474 
2475 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2476 
2477 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2478  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2479  FormatTok.NewlinesBefore > 0;
2480 }
2481 
2482 // Checks if \p FormatTok is a line comment that continues the line comment
2483 // section on \p Line.
2484 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2485  const UnwrappedLine &Line,
2486  llvm::Regex &CommentPragmasRegex) {
2487  if (Line.Tokens.empty())
2488  return false;
2489 
2490  StringRef IndentContent = FormatTok.TokenText;
2491  if (FormatTok.TokenText.startswith("//") ||
2492  FormatTok.TokenText.startswith("/*"))
2493  IndentContent = FormatTok.TokenText.substr(2);
2494  if (CommentPragmasRegex.match(IndentContent))
2495  return false;
2496 
2497  // If Line starts with a line comment, then FormatTok continues the comment
2498  // section if its original column is greater or equal to the original start
2499  // column of the line.
2500  //
2501  // Define the min column token of a line as follows: if a line ends in '{' or
2502  // contains a '{' followed by a line comment, then the min column token is
2503  // that '{'. Otherwise, the min column token of the line is the first token of
2504  // the line.
2505  //
2506  // If Line starts with a token other than a line comment, then FormatTok
2507  // continues the comment section if its original column is greater than the
2508  // original start column of the min column token of the line.
2509  //
2510  // For example, the second line comment continues the first in these cases:
2511  //
2512  // // first line
2513  // // second line
2514  //
2515  // and:
2516  //
2517  // // first line
2518  // // second line
2519  //
2520  // and:
2521  //
2522  // int i; // first line
2523  // // second line
2524  //
2525  // and:
2526  //
2527  // do { // first line
2528  // // second line
2529  // int i;
2530  // } while (true);
2531  //
2532  // and:
2533  //
2534  // enum {
2535  // a, // first line
2536  // // second line
2537  // b
2538  // };
2539  //
2540  // The second line comment doesn't continue the first in these cases:
2541  //
2542  // // first line
2543  // // second line
2544  //
2545  // and:
2546  //
2547  // int i; // first line
2548  // // second line
2549  //
2550  // and:
2551  //
2552  // do { // first line
2553  // // second line
2554  // int i;
2555  // } while (true);
2556  //
2557  // and:
2558  //
2559  // enum {
2560  // a, // first line
2561  // // second line
2562  // };
2563  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2564 
2565  // Scan for '{//'. If found, use the column of '{' as a min column for line
2566  // comment section continuation.
2567  const FormatToken *PreviousToken = nullptr;
2568  for (const UnwrappedLineNode &Node : Line.Tokens) {
2569  if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2570  isLineComment(*Node.Tok)) {
2571  MinColumnToken = PreviousToken;
2572  break;
2573  }
2574  PreviousToken = Node.Tok;
2575 
2576  // Grab the last newline preceding a token in this unwrapped line.
2577  if (Node.Tok->NewlinesBefore > 0) {
2578  MinColumnToken = Node.Tok;
2579  }
2580  }
2581  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2582  MinColumnToken = PreviousToken;
2583  }
2584 
2585  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2586  MinColumnToken);
2587 }
2588 
2589 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2590  bool JustComments = Line->Tokens.empty();
2592  I = CommentsBeforeNextToken.begin(),
2593  E = CommentsBeforeNextToken.end();
2594  I != E; ++I) {
2595  // Line comments that belong to the same line comment section are put on the
2596  // same line since later we might want to reflow content between them.
2597  // Additional fine-grained breaking of line comment sections is controlled
2598  // by the class BreakableLineCommentSection in case it is desirable to keep
2599  // several line comment sections in the same unwrapped line.
2600  //
2601  // FIXME: Consider putting separate line comment sections as children to the
2602  // unwrapped line instead.
2604  continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2605  if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2606  addUnwrappedLine();
2607  pushToken(*I);
2608  }
2609  if (NewlineBeforeNext && JustComments)
2610  addUnwrappedLine();
2611  CommentsBeforeNextToken.clear();
2612 }
2613 
2614 void UnwrappedLineParser::nextToken(int LevelDifference) {
2615  if (eof())
2616  return;
2617  flushComments(isOnNewLine(*FormatTok));
2618  pushToken(FormatTok);
2619  FormatToken *Previous = FormatTok;
2620  if (Style.Language != FormatStyle::LK_JavaScript)
2621  readToken(LevelDifference);
2622  else
2623  readTokenWithJavaScriptASI();
2624  FormatTok->Previous = Previous;
2625 }
2626 
2627 void UnwrappedLineParser::distributeComments(
2628  const SmallVectorImpl<FormatToken *> &Comments,
2629  const FormatToken *NextTok) {
2630  // Whether or not a line comment token continues a line is controlled by
2631  // the method continuesLineCommentSection, with the following caveat:
2632  //
2633  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2634  // that each comment line from the trail is aligned with the next token, if
2635  // the next token exists. If a trail exists, the beginning of the maximal
2636  // trail is marked as a start of a new comment section.
2637  //
2638  // For example in this code:
2639  //
2640  // int a; // line about a
2641  // // line 1 about b
2642  // // line 2 about b
2643  // int b;
2644  //
2645  // the two lines about b form a maximal trail, so there are two sections, the
2646  // first one consisting of the single comment "// line about a" and the
2647  // second one consisting of the next two comments.
2648  if (Comments.empty())
2649  return;
2650  bool ShouldPushCommentsInCurrentLine = true;
2651  bool HasTrailAlignedWithNextToken = false;
2652  unsigned StartOfTrailAlignedWithNextToken = 0;
2653  if (NextTok) {
2654  // We are skipping the first element intentionally.
2655  for (unsigned i = Comments.size() - 1; i > 0; --i) {
2656  if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2657  HasTrailAlignedWithNextToken = true;
2658  StartOfTrailAlignedWithNextToken = i;
2659  }
2660  }
2661  }
2662  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2663  FormatToken *FormatTok = Comments[i];
2664  if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2665  FormatTok->ContinuesLineCommentSection = false;
2666  } else {
2667  FormatTok->ContinuesLineCommentSection =
2668  continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2669  }
2670  if (!FormatTok->ContinuesLineCommentSection &&
2671  (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2672  ShouldPushCommentsInCurrentLine = false;
2673  }
2674  if (ShouldPushCommentsInCurrentLine) {
2675  pushToken(FormatTok);
2676  } else {
2677  CommentsBeforeNextToken.push_back(FormatTok);
2678  }
2679  }
2680 }
2681 
2682 void UnwrappedLineParser::readToken(int LevelDifference) {
2684  do {
2685  FormatTok = Tokens->getNextToken();
2686  assert(FormatTok);
2687  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2688  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2689  distributeComments(Comments, FormatTok);
2690  Comments.clear();
2691  // If there is an unfinished unwrapped line, we flush the preprocessor
2692  // directives only after that unwrapped line was finished later.
2693  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2694  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2695  assert((LevelDifference >= 0 ||
2696  static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2697  "LevelDifference makes Line->Level negative");
2698  Line->Level += LevelDifference;
2699  // Comments stored before the preprocessor directive need to be output
2700  // before the preprocessor directive, at the same level as the
2701  // preprocessor directive, as we consider them to apply to the directive.
2702  if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2703  PPBranchLevel > 0)
2704  Line->Level += PPBranchLevel;
2705  flushComments(isOnNewLine(*FormatTok));
2706  parsePPDirective();
2707  }
2708  while (FormatTok->Type == TT_ConflictStart ||
2709  FormatTok->Type == TT_ConflictEnd ||
2710  FormatTok->Type == TT_ConflictAlternative) {
2711  if (FormatTok->Type == TT_ConflictStart) {
2712  conditionalCompilationStart(/*Unreachable=*/false);
2713  } else if (FormatTok->Type == TT_ConflictAlternative) {
2714  conditionalCompilationAlternative();
2715  } else if (FormatTok->Type == TT_ConflictEnd) {
2716  conditionalCompilationEnd();
2717  }
2718  FormatTok = Tokens->getNextToken();
2719  FormatTok->MustBreakBefore = true;
2720  }
2721 
2722  if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2723  !Line->InPPDirective) {
2724  continue;
2725  }
2726 
2727  if (!FormatTok->Tok.is(tok::comment)) {
2728  distributeComments(Comments, FormatTok);
2729  Comments.clear();
2730  return;
2731  }
2732 
2733  Comments.push_back(FormatTok);
2734  } while (!eof());
2735 
2736  distributeComments(Comments, nullptr);
2737  Comments.clear();
2738 }
2739 
2740 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2741  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2742  if (MustBreakBeforeNextToken) {
2743  Line->Tokens.back().Tok->MustBreakBefore = true;
2744  MustBreakBeforeNextToken = false;
2745  }
2746 }
2747 
2748 } // end namespace format
2749 } // end namespace clang
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
SmallVector< UnwrappedLine, 0 > Children
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, llvm::Regex &CommentPragmasRegex)
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken *> Tokens, UnwrappedLineConsumer &Callback)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
Token Tok
The Token.
Definition: FormatToken.h:133
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:97
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:220
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:165
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:109
bool isBinaryOperator() const
Definition: FormatToken.h:418
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:139
long i
Definition: xmmintrin.h:1456
tok::TokenKind getKind() const
Definition: Token.h:92
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
unsigned Level
The indent level of the UnwrappedLine.
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:386
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:295
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
__SIZE_TYPE__ size_t
The unsigned integer type of the result of the sizeof operator.
Definition: opencl-c-base.h:40
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:57
bool isNot(T Kind) const
Definition: FormatToken.h:328
static void hash_combine(std::size_t &seed, const T &v)
const FormatToken & Tok
static bool isGoogScope(const UnwrappedLine &Line)
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:508
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:321
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
ContinuationIndenter * Indenter
const AnnotatedLine * Line
StateNode * Previous
static const size_t kInvalidIndex
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:129
SourceLocation getEnd() const
do v
Definition: arm_acle.h:64
#define false
Definition: stdbool.h:17
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:312
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:674
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:146
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:24
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:177
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:66
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:39
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:49
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
ast_type_traits::DynTypedNode Node
bool isNot(tok::TokenKind K) const
Definition: Token.h:98
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:310
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:99
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
virtual unsigned getPosition()=0
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:341
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:60
Represents a complete lambda introducer.
Definition: DeclSpec.h:2559
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:171
StringRef Text
Definition: Format.cpp:1712
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:289
bool isStringLiteral() const
Definition: FormatToken.h:352
SourceLocation getBegin() const
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:143
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:183
const FormatStyle & Style