clang  6.0.0svn
TokenAnnotator.cpp
Go to the documentation of this file.
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// \brief A parser that gathers additional information about tokens.
29 ///
30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
32 /// into template parameter lists.
33 class AnnotatingParser {
34 public:
35  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
36  const AdditionalKeywords &Keywords)
37  : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
38  Keywords(Keywords) {
39  Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
40  resetTokenMetadata(CurrentToken);
41  }
42 
43 private:
44  bool parseAngle() {
45  if (!CurrentToken || !CurrentToken->Previous)
46  return false;
47  if (NonTemplateLess.count(CurrentToken->Previous))
48  return false;
49 
50  const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
51  if (Previous.Previous) {
52  if (Previous.Previous->Tok.isLiteral())
53  return false;
54  if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
55  (!Previous.Previous->MatchingParen ||
56  !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
57  return false;
58  }
59 
60  FormatToken *Left = CurrentToken->Previous;
61  Left->ParentBracket = Contexts.back().ContextKind;
62  ScopedContextCreator ContextCreator(*this, tok::less, 12);
63 
64  // If this angle is in the context of an expression, we need to be more
65  // hesitant to detect it as opening template parameters.
66  bool InExprContext = Contexts.back().IsExpression;
67 
68  Contexts.back().IsExpression = false;
69  // If there's a template keyword before the opening angle bracket, this is a
70  // template parameter, not an argument.
71  Contexts.back().InTemplateArgument =
72  Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
73 
74  if (Style.Language == FormatStyle::LK_Java &&
75  CurrentToken->is(tok::question))
76  next();
77 
78  while (CurrentToken) {
79  if (CurrentToken->is(tok::greater)) {
80  Left->MatchingParen = CurrentToken;
81  CurrentToken->MatchingParen = Left;
82  CurrentToken->Type = TT_TemplateCloser;
83  next();
84  return true;
85  }
86  if (CurrentToken->is(tok::question) &&
87  Style.Language == FormatStyle::LK_Java) {
88  next();
89  continue;
90  }
91  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
92  (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
93  Style.Language != FormatStyle::LK_Proto &&
94  Style.Language != FormatStyle::LK_TextProto))
95  return false;
96  // If a && or || is found and interpreted as a binary operator, this set
97  // of angles is likely part of something like "a < b && c > d". If the
98  // angles are inside an expression, the ||/&& might also be a binary
99  // operator that was misinterpreted because we are parsing template
100  // parameters.
101  // FIXME: This is getting out of hand, write a decent parser.
102  if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
103  CurrentToken->Previous->is(TT_BinaryOperator) &&
104  Contexts[Contexts.size() - 2].IsExpression &&
105  !Line.startsWith(tok::kw_template))
106  return false;
107  updateParameterCount(Left, CurrentToken);
108  if (Style.Language == FormatStyle::LK_Proto) {
109  if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
110  if (CurrentToken->is(tok::colon) ||
111  (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
112  Previous->isNot(tok::colon)))
113  Previous->Type = TT_SelectorName;
114  }
115  }
116  if (!consumeToken())
117  return false;
118  }
119  return false;
120  }
121 
122  bool parseParens(bool LookForDecls = false) {
123  if (!CurrentToken)
124  return false;
125  FormatToken *Left = CurrentToken->Previous;
126  Left->ParentBracket = Contexts.back().ContextKind;
127  ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
128 
129  // FIXME: This is a bit of a hack. Do better.
130  Contexts.back().ColonIsForRangeExpr =
131  Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
132 
133  bool StartsObjCMethodExpr = false;
134  if (CurrentToken->is(tok::caret)) {
135  // (^ can start a block type.
136  Left->Type = TT_ObjCBlockLParen;
137  } else if (FormatToken *MaybeSel = Left->Previous) {
138  // @selector( starts a selector.
139  if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
140  MaybeSel->Previous->is(tok::at)) {
141  StartsObjCMethodExpr = true;
142  }
143  }
144 
145  if (Left->is(TT_OverloadedOperatorLParen)) {
146  Contexts.back().IsExpression = false;
147  } else if (Style.Language == FormatStyle::LK_JavaScript &&
148  (Line.startsWith(Keywords.kw_type, tok::identifier) ||
149  Line.startsWith(tok::kw_export, Keywords.kw_type,
150  tok::identifier))) {
151  // type X = (...);
152  // export type X = (...);
153  Contexts.back().IsExpression = false;
154  } else if (Left->Previous &&
155  (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
156  tok::kw_if, tok::kw_while, tok::l_paren,
157  tok::comma) ||
158  Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
159  Left->Previous->is(TT_BinaryOperator))) {
160  // static_assert, if and while usually contain expressions.
161  Contexts.back().IsExpression = true;
162  } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
163  (Left->Previous->is(Keywords.kw_function) ||
164  (Left->Previous->endsSequence(tok::identifier,
165  Keywords.kw_function)))) {
166  // function(...) or function f(...)
167  Contexts.back().IsExpression = false;
168  } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
169  Left->Previous->is(TT_JsTypeColon)) {
170  // let x: (SomeType);
171  Contexts.back().IsExpression = false;
172  } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
173  Left->Previous->MatchingParen &&
174  Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
175  // This is a parameter list of a lambda expression.
176  Contexts.back().IsExpression = false;
177  } else if (Line.InPPDirective &&
178  (!Left->Previous || !Left->Previous->is(tok::identifier))) {
179  Contexts.back().IsExpression = true;
180  } else if (Contexts[Contexts.size() - 2].CaretFound) {
181  // This is the parameter list of an ObjC block.
182  Contexts.back().IsExpression = false;
183  } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
184  Left->Type = TT_AttributeParen;
185  } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
186  // The first argument to a foreach macro is a declaration.
187  Contexts.back().IsForEachMacro = true;
188  Contexts.back().IsExpression = false;
189  } else if (Left->Previous && Left->Previous->MatchingParen &&
190  Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
191  Contexts.back().IsExpression = false;
192  } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
193  bool IsForOrCatch =
194  Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
195  Contexts.back().IsExpression = !IsForOrCatch;
196  }
197 
198  if (StartsObjCMethodExpr) {
199  Contexts.back().ColonIsObjCMethodExpr = true;
200  Left->Type = TT_ObjCMethodExpr;
201  }
202 
203  bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
204  bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp);
205  bool HasMultipleLines = false;
206  bool HasMultipleParametersOnALine = false;
207  bool MightBeObjCForRangeLoop =
208  Left->Previous && Left->Previous->is(tok::kw_for);
209  while (CurrentToken) {
210  // LookForDecls is set when "if (" has been seen. Check for
211  // 'identifier' '*' 'identifier' followed by not '=' -- this
212  // '*' has to be a binary operator but determineStarAmpUsage() will
213  // categorize it as an unary operator, so set the right type here.
214  if (LookForDecls && CurrentToken->Next) {
215  FormatToken *Prev = CurrentToken->getPreviousNonComment();
216  if (Prev) {
217  FormatToken *PrevPrev = Prev->getPreviousNonComment();
218  FormatToken *Next = CurrentToken->Next;
219  if (PrevPrev && PrevPrev->is(tok::identifier) &&
220  Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
221  CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
222  Prev->Type = TT_BinaryOperator;
223  LookForDecls = false;
224  }
225  }
226  }
227 
228  if (CurrentToken->Previous->is(TT_PointerOrReference) &&
229  CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
230  tok::coloncolon))
231  ProbablyFunctionType = true;
232  if (CurrentToken->is(tok::comma))
233  MightBeFunctionType = false;
234  if (CurrentToken->Previous->is(TT_BinaryOperator))
235  Contexts.back().IsExpression = true;
236  if (CurrentToken->is(tok::r_paren)) {
237  if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
238  (CurrentToken->Next->is(tok::l_paren) ||
239  (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
240  Left->Type = TT_FunctionTypeLParen;
241  Left->MatchingParen = CurrentToken;
242  CurrentToken->MatchingParen = Left;
243 
244  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
245  Left->Previous && Left->Previous->is(tok::l_paren)) {
246  // Detect the case where macros are used to generate lambdas or
247  // function bodies, e.g.:
248  // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
249  for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
250  if (Tok->is(TT_BinaryOperator) &&
251  Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
252  Tok->Type = TT_PointerOrReference;
253  }
254  }
255 
256  if (StartsObjCMethodExpr) {
257  CurrentToken->Type = TT_ObjCMethodExpr;
258  if (Contexts.back().FirstObjCSelectorName) {
259  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
260  Contexts.back().LongestObjCSelectorName;
261  }
262  }
263 
264  if (Left->is(TT_AttributeParen))
265  CurrentToken->Type = TT_AttributeParen;
266  if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
267  CurrentToken->Type = TT_JavaAnnotation;
268  if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
269  CurrentToken->Type = TT_LeadingJavaAnnotation;
270 
271  if (!HasMultipleLines)
272  Left->PackingKind = PPK_Inconclusive;
273  else if (HasMultipleParametersOnALine)
274  Left->PackingKind = PPK_BinPacked;
275  else
276  Left->PackingKind = PPK_OnePerLine;
277 
278  next();
279  return true;
280  }
281  if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
282  return false;
283 
284  if (CurrentToken->is(tok::l_brace))
285  Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
286  if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
287  !CurrentToken->Next->HasUnescapedNewline &&
288  !CurrentToken->Next->isTrailingComment())
289  HasMultipleParametersOnALine = true;
290  if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
291  CurrentToken->Previous->isSimpleTypeSpecifier()) &&
292  !CurrentToken->is(tok::l_brace))
293  Contexts.back().IsExpression = false;
294  if (CurrentToken->isOneOf(tok::semi, tok::colon))
295  MightBeObjCForRangeLoop = false;
296  if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
297  CurrentToken->Type = TT_ObjCForIn;
298  // When we discover a 'new', we set CanBeExpression to 'false' in order to
299  // parse the type correctly. Reset that after a comma.
300  if (CurrentToken->is(tok::comma))
301  Contexts.back().CanBeExpression = true;
302 
303  FormatToken *Tok = CurrentToken;
304  if (!consumeToken())
305  return false;
306  updateParameterCount(Left, Tok);
307  if (CurrentToken && CurrentToken->HasUnescapedNewline)
308  HasMultipleLines = true;
309  }
310  return false;
311  }
312 
313  bool parseSquare() {
314  if (!CurrentToken)
315  return false;
316 
317  // A '[' could be an index subscript (after an identifier or after
318  // ')' or ']'), it could be the start of an Objective-C method
319  // expression, or it could the start of an Objective-C array literal.
320  FormatToken *Left = CurrentToken->Previous;
321  Left->ParentBracket = Contexts.back().ContextKind;
322  FormatToken *Parent = Left->getPreviousNonComment();
323 
324  // Cases where '>' is followed by '['.
325  // In C++, this can happen either in array of templates (foo<int>[10])
326  // or when array is a nested template type (unique_ptr<type1<type2>[]>).
327  bool CppArrayTemplates =
328  Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
329  (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
330  Contexts.back().InTemplateArgument);
331 
332  bool StartsObjCMethodExpr =
333  !CppArrayTemplates && Style.isCpp() &&
334  Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
335  CurrentToken->isNot(tok::l_brace) &&
336  (!Parent ||
337  Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
338  tok::kw_return, tok::kw_throw) ||
339  Parent->isUnaryOperator() ||
340  Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
341  getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
342  bool ColonFound = false;
343 
344  unsigned BindingIncrease = 1;
345  if (Left->isCppStructuredBinding(Style)) {
346  Left->Type = TT_StructuredBindingLSquare;
347  } else if (Left->is(TT_Unknown)) {
348  if (StartsObjCMethodExpr) {
349  Left->Type = TT_ObjCMethodExpr;
350  } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
351  Contexts.back().ContextKind == tok::l_brace &&
352  Parent->isOneOf(tok::l_brace, tok::comma)) {
353  Left->Type = TT_JsComputedPropertyName;
354  } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
355  Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
356  Left->Type = TT_DesignatedInitializerLSquare;
357  } else if (CurrentToken->is(tok::r_square) && Parent &&
358  Parent->is(TT_TemplateCloser)) {
359  Left->Type = TT_ArraySubscriptLSquare;
360  } else if (Style.Language == FormatStyle::LK_Proto ||
361  (!CppArrayTemplates && Parent &&
362  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
363  tok::comma, tok::l_paren, tok::l_square,
364  tok::question, tok::colon, tok::kw_return,
365  // Should only be relevant to JavaScript:
366  tok::kw_default))) {
367  Left->Type = TT_ArrayInitializerLSquare;
368  } else {
369  BindingIncrease = 10;
370  Left->Type = TT_ArraySubscriptLSquare;
371  }
372  }
373 
374  ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
375  Contexts.back().IsExpression = true;
376  if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
377  Parent->is(TT_JsTypeColon))
378  Contexts.back().IsExpression = false;
379 
380  Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
381 
382  while (CurrentToken) {
383  if (CurrentToken->is(tok::r_square)) {
384  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
385  Left->is(TT_ObjCMethodExpr)) {
386  // An ObjC method call is rarely followed by an open parenthesis.
387  // FIXME: Do we incorrectly label ":" with this?
388  StartsObjCMethodExpr = false;
389  Left->Type = TT_Unknown;
390  }
391  if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
392  CurrentToken->Type = TT_ObjCMethodExpr;
393  // determineStarAmpUsage() thinks that '*' '[' is allocating an
394  // array of pointers, but if '[' starts a selector then '*' is a
395  // binary operator.
396  if (Parent && Parent->is(TT_PointerOrReference))
397  Parent->Type = TT_BinaryOperator;
398  }
399  Left->MatchingParen = CurrentToken;
400  CurrentToken->MatchingParen = Left;
401  if (Contexts.back().FirstObjCSelectorName) {
402  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
403  Contexts.back().LongestObjCSelectorName;
404  if (Left->BlockParameterCount > 1)
405  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
406  }
407  next();
408  return true;
409  }
410  if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
411  return false;
412  if (CurrentToken->is(tok::colon)) {
413  if (Left->isOneOf(TT_ArraySubscriptLSquare,
414  TT_DesignatedInitializerLSquare)) {
415  Left->Type = TT_ObjCMethodExpr;
416  StartsObjCMethodExpr = true;
417  Contexts.back().ColonIsObjCMethodExpr = true;
418  if (Parent && Parent->is(tok::r_paren))
419  Parent->Type = TT_CastRParen;
420  }
421  ColonFound = true;
422  }
423  if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
424  !ColonFound)
425  Left->Type = TT_ArrayInitializerLSquare;
426  FormatToken *Tok = CurrentToken;
427  if (!consumeToken())
428  return false;
429  updateParameterCount(Left, Tok);
430  }
431  return false;
432  }
433 
434  bool parseBrace() {
435  if (CurrentToken) {
436  FormatToken *Left = CurrentToken->Previous;
437  Left->ParentBracket = Contexts.back().ContextKind;
438 
439  if (Contexts.back().CaretFound)
440  Left->Type = TT_ObjCBlockLBrace;
441  Contexts.back().CaretFound = false;
442 
443  ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
444  Contexts.back().ColonIsDictLiteral = true;
445  if (Left->BlockKind == BK_BracedInit)
446  Contexts.back().IsExpression = true;
447  if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
448  Left->Previous->is(TT_JsTypeColon))
449  Contexts.back().IsExpression = false;
450 
451  while (CurrentToken) {
452  if (CurrentToken->is(tok::r_brace)) {
453  Left->MatchingParen = CurrentToken;
454  CurrentToken->MatchingParen = Left;
455  next();
456  return true;
457  }
458  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
459  return false;
460  updateParameterCount(Left, CurrentToken);
461  if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
462  FormatToken *Previous = CurrentToken->getPreviousNonComment();
463  if (Previous->is(TT_JsTypeOptionalQuestion))
464  Previous = Previous->getPreviousNonComment();
465  if (((CurrentToken->is(tok::colon) &&
466  (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
467  Style.Language == FormatStyle::LK_Proto ||
468  Style.Language == FormatStyle::LK_TextProto) &&
469  (Previous->Tok.getIdentifierInfo() ||
470  Previous->is(tok::string_literal)))
471  Previous->Type = TT_SelectorName;
472  if (CurrentToken->is(tok::colon) ||
473  Style.Language == FormatStyle::LK_JavaScript)
474  Left->Type = TT_DictLiteral;
475  }
476  if (CurrentToken->is(tok::comma) &&
477  Style.Language == FormatStyle::LK_JavaScript)
478  Left->Type = TT_DictLiteral;
479  if (!consumeToken())
480  return false;
481  }
482  }
483  return true;
484  }
485 
486  void updateParameterCount(FormatToken *Left, FormatToken *Current) {
487  if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
488  ++Left->BlockParameterCount;
489  if (Current->is(tok::comma)) {
490  ++Left->ParameterCount;
491  if (!Left->Role)
492  Left->Role.reset(new CommaSeparatedList(Style));
493  Left->Role->CommaFound(Current);
494  } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
495  Left->ParameterCount = 1;
496  }
497  }
498 
499  bool parseConditional() {
500  while (CurrentToken) {
501  if (CurrentToken->is(tok::colon)) {
502  CurrentToken->Type = TT_ConditionalExpr;
503  next();
504  return true;
505  }
506  if (!consumeToken())
507  return false;
508  }
509  return false;
510  }
511 
512  bool parseTemplateDeclaration() {
513  if (CurrentToken && CurrentToken->is(tok::less)) {
514  CurrentToken->Type = TT_TemplateOpener;
515  next();
516  if (!parseAngle())
517  return false;
518  if (CurrentToken)
519  CurrentToken->Previous->ClosesTemplateDeclaration = true;
520  return true;
521  }
522  return false;
523  }
524 
525  bool consumeToken() {
526  FormatToken *Tok = CurrentToken;
527  next();
528  switch (Tok->Tok.getKind()) {
529  case tok::plus:
530  case tok::minus:
531  if (!Tok->Previous && Line.MustBeDeclaration)
532  Tok->Type = TT_ObjCMethodSpecifier;
533  break;
534  case tok::colon:
535  if (!Tok->Previous)
536  return false;
537  // Colons from ?: are handled in parseConditional().
538  if (Style.Language == FormatStyle::LK_JavaScript) {
539  if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
540  (Contexts.size() == 1 && // switch/case labels
541  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
542  Contexts.back().ContextKind == tok::l_paren || // function params
543  Contexts.back().ContextKind == tok::l_square || // array type
544  (!Contexts.back().IsExpression &&
545  Contexts.back().ContextKind == tok::l_brace) || // object type
546  (Contexts.size() == 1 &&
547  Line.MustBeDeclaration)) { // method/property declaration
548  Contexts.back().IsExpression = false;
549  Tok->Type = TT_JsTypeColon;
550  break;
551  }
552  }
553  if (Contexts.back().ColonIsDictLiteral ||
554  Style.Language == FormatStyle::LK_Proto ||
555  Style.Language == FormatStyle::LK_TextProto) {
556  Tok->Type = TT_DictLiteral;
557  if (Style.Language == FormatStyle::LK_TextProto) {
558  if (FormatToken *Previous = Tok->getPreviousNonComment())
559  Previous->Type = TT_SelectorName;
560  }
561  } else if (Contexts.back().ColonIsObjCMethodExpr ||
562  Line.startsWith(TT_ObjCMethodSpecifier)) {
563  Tok->Type = TT_ObjCMethodExpr;
564  const FormatToken *BeforePrevious = Tok->Previous->Previous;
565  if (!BeforePrevious ||
566  !(BeforePrevious->is(TT_CastRParen) ||
567  (BeforePrevious->is(TT_ObjCMethodExpr) &&
568  BeforePrevious->is(tok::colon))) ||
569  BeforePrevious->is(tok::r_square) ||
570  Contexts.back().LongestObjCSelectorName == 0) {
571  Tok->Previous->Type = TT_SelectorName;
572  if (Tok->Previous->ColumnWidth >
573  Contexts.back().LongestObjCSelectorName)
574  Contexts.back().LongestObjCSelectorName =
575  Tok->Previous->ColumnWidth;
576  if (!Contexts.back().FirstObjCSelectorName)
577  Contexts.back().FirstObjCSelectorName = Tok->Previous;
578  }
579  } else if (Contexts.back().ColonIsForRangeExpr) {
580  Tok->Type = TT_RangeBasedForLoopColon;
581  } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
582  Tok->Type = TT_BitFieldColon;
583  } else if (Contexts.size() == 1 &&
584  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
585  if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
586  tok::kw_noexcept))
587  Tok->Type = TT_CtorInitializerColon;
588  else
589  Tok->Type = TT_InheritanceColon;
590  } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
591  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
592  // This handles a special macro in ObjC code where selectors including
593  // the colon are passed as macro arguments.
594  Tok->Type = TT_ObjCMethodExpr;
595  } else if (Contexts.back().ContextKind == tok::l_paren) {
596  Tok->Type = TT_InlineASMColon;
597  }
598  break;
599  case tok::pipe:
600  case tok::amp:
601  // | and & in declarations/type expressions represent union and
602  // intersection types, respectively.
603  if (Style.Language == FormatStyle::LK_JavaScript &&
604  !Contexts.back().IsExpression)
605  Tok->Type = TT_JsTypeOperator;
606  break;
607  case tok::kw_if:
608  case tok::kw_while:
609  if (Tok->is(tok::kw_if) && CurrentToken &&
610  CurrentToken->is(tok::kw_constexpr))
611  next();
612  if (CurrentToken && CurrentToken->is(tok::l_paren)) {
613  next();
614  if (!parseParens(/*LookForDecls=*/true))
615  return false;
616  }
617  break;
618  case tok::kw_for:
619  if (Style.Language == FormatStyle::LK_JavaScript) {
620  // x.for and {for: ...}
621  if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
622  (Tok->Next && Tok->Next->is(tok::colon)))
623  break;
624  // JS' for await ( ...
625  if (CurrentToken && CurrentToken->is(Keywords.kw_await))
626  next();
627  }
628  Contexts.back().ColonIsForRangeExpr = true;
629  next();
630  if (!parseParens())
631  return false;
632  break;
633  case tok::l_paren:
634  // When faced with 'operator()()', the kw_operator handler incorrectly
635  // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
636  // the first two parens OverloadedOperators and the second l_paren an
637  // OverloadedOperatorLParen.
638  if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
639  Tok->Previous->MatchingParen &&
640  Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
641  Tok->Previous->Type = TT_OverloadedOperator;
642  Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
643  Tok->Type = TT_OverloadedOperatorLParen;
644  }
645 
646  if (!parseParens())
647  return false;
648  if (Line.MustBeDeclaration && Contexts.size() == 1 &&
649  !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
650  (!Tok->Previous ||
651  !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
652  TT_LeadingJavaAnnotation)))
653  Line.MightBeFunctionDecl = true;
654  break;
655  case tok::l_square:
656  if (!parseSquare())
657  return false;
658  break;
659  case tok::l_brace:
660  if (Style.Language == FormatStyle::LK_TextProto) {
661  FormatToken *Previous = Tok->getPreviousNonComment();
662  if (Previous && Previous->Type != TT_DictLiteral)
663  Previous->Type = TT_SelectorName;
664  }
665  if (!parseBrace())
666  return false;
667  break;
668  case tok::less:
669  if (parseAngle()) {
670  Tok->Type = TT_TemplateOpener;
671  if (Style.Language == FormatStyle::LK_TextProto) {
672  FormatToken *Previous = Tok->getPreviousNonComment();
673  if (Previous && Previous->Type != TT_DictLiteral)
674  Previous->Type = TT_SelectorName;
675  }
676  } else {
677  Tok->Type = TT_BinaryOperator;
678  NonTemplateLess.insert(Tok);
679  CurrentToken = Tok;
680  next();
681  }
682  break;
683  case tok::r_paren:
684  case tok::r_square:
685  return false;
686  case tok::r_brace:
687  // Lines can start with '}'.
688  if (Tok->Previous)
689  return false;
690  break;
691  case tok::greater:
692  Tok->Type = TT_BinaryOperator;
693  break;
694  case tok::kw_operator:
695  while (CurrentToken &&
696  !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
697  if (CurrentToken->isOneOf(tok::star, tok::amp))
698  CurrentToken->Type = TT_PointerOrReference;
699  consumeToken();
700  if (CurrentToken &&
701  CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
702  tok::comma))
703  CurrentToken->Previous->Type = TT_OverloadedOperator;
704  }
705  if (CurrentToken) {
706  CurrentToken->Type = TT_OverloadedOperatorLParen;
707  if (CurrentToken->Previous->is(TT_BinaryOperator))
708  CurrentToken->Previous->Type = TT_OverloadedOperator;
709  }
710  break;
711  case tok::question:
712  if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
713  Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
714  tok::r_brace)) {
715  // Question marks before semicolons, colons, etc. indicate optional
716  // types (fields, parameters), e.g.
717  // function(x?: string, y?) {...}
718  // class X { y?; }
719  Tok->Type = TT_JsTypeOptionalQuestion;
720  break;
721  }
722  // Declarations cannot be conditional expressions, this can only be part
723  // of a type declaration.
724  if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
725  Style.Language == FormatStyle::LK_JavaScript)
726  break;
727  parseConditional();
728  break;
729  case tok::kw_template:
730  parseTemplateDeclaration();
731  break;
732  case tok::comma:
733  if (Contexts.back().InCtorInitializer)
734  Tok->Type = TT_CtorInitializerComma;
735  else if (Contexts.back().InInheritanceList)
736  Tok->Type = TT_InheritanceComma;
737  else if (Contexts.back().FirstStartOfName &&
738  (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
739  Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
740  Line.IsMultiVariableDeclStmt = true;
741  }
742  if (Contexts.back().IsForEachMacro)
743  Contexts.back().IsExpression = true;
744  break;
745  case tok::identifier:
746  if (Tok->isOneOf(Keywords.kw___has_include,
747  Keywords.kw___has_include_next)) {
748  parseHasInclude();
749  }
750  break;
751  default:
752  break;
753  }
754  return true;
755  }
756 
757  void parseIncludeDirective() {
758  if (CurrentToken && CurrentToken->is(tok::less)) {
759  next();
760  while (CurrentToken) {
761  // Mark tokens up to the trailing line comments as implicit string
762  // literals.
763  if (CurrentToken->isNot(tok::comment) &&
764  !CurrentToken->TokenText.startswith("//"))
765  CurrentToken->Type = TT_ImplicitStringLiteral;
766  next();
767  }
768  }
769  }
770 
771  void parseWarningOrError() {
772  next();
773  // We still want to format the whitespace left of the first token of the
774  // warning or error.
775  next();
776  while (CurrentToken) {
777  CurrentToken->Type = TT_ImplicitStringLiteral;
778  next();
779  }
780  }
781 
782  void parsePragma() {
783  next(); // Consume "pragma".
784  if (CurrentToken &&
785  CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
786  bool IsMark = CurrentToken->is(Keywords.kw_mark);
787  next(); // Consume "mark".
788  next(); // Consume first token (so we fix leading whitespace).
789  while (CurrentToken) {
790  if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
791  CurrentToken->Type = TT_ImplicitStringLiteral;
792  next();
793  }
794  }
795  }
796 
797  void parseHasInclude() {
798  if (!CurrentToken || !CurrentToken->is(tok::l_paren))
799  return;
800  next(); // '('
801  parseIncludeDirective();
802  next(); // ')'
803  }
804 
805  LineType parsePreprocessorDirective() {
806  bool IsFirstToken = CurrentToken->IsFirst;
808  next();
809  if (!CurrentToken)
810  return Type;
811 
812  if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
813  // JavaScript files can contain shebang lines of the form:
814  // #!/usr/bin/env node
815  // Treat these like C++ #include directives.
816  while (CurrentToken) {
817  // Tokens cannot be comments here.
818  CurrentToken->Type = TT_ImplicitStringLiteral;
819  next();
820  }
821  return LT_ImportStatement;
822  }
823 
824  if (CurrentToken->Tok.is(tok::numeric_constant)) {
825  CurrentToken->SpacesRequiredBefore = 1;
826  return Type;
827  }
828  // Hashes in the middle of a line can lead to any strange token
829  // sequence.
830  if (!CurrentToken->Tok.getIdentifierInfo())
831  return Type;
832  switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
833  case tok::pp_include:
834  case tok::pp_include_next:
835  case tok::pp_import:
836  next();
837  parseIncludeDirective();
838  Type = LT_ImportStatement;
839  break;
840  case tok::pp_error:
841  case tok::pp_warning:
842  parseWarningOrError();
843  break;
844  case tok::pp_pragma:
845  parsePragma();
846  break;
847  case tok::pp_if:
848  case tok::pp_elif:
849  Contexts.back().IsExpression = true;
850  parseLine();
851  break;
852  default:
853  break;
854  }
855  while (CurrentToken) {
856  FormatToken *Tok = CurrentToken;
857  next();
858  if (Tok->is(tok::l_paren))
859  parseParens();
860  else if (Tok->isOneOf(Keywords.kw___has_include,
861  Keywords.kw___has_include_next))
862  parseHasInclude();
863  }
864  return Type;
865  }
866 
867 public:
868  LineType parseLine() {
869  NonTemplateLess.clear();
870  if (CurrentToken->is(tok::hash))
871  return parsePreprocessorDirective();
872 
873  // Directly allow to 'import <string-literal>' to support protocol buffer
874  // definitions (github.com/google/protobuf) or missing "#" (either way we
875  // should not break the line).
876  IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
877  if ((Style.Language == FormatStyle::LK_Java &&
878  CurrentToken->is(Keywords.kw_package)) ||
879  (Info && Info->getPPKeywordID() == tok::pp_import &&
880  CurrentToken->Next &&
881  CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
882  tok::kw_static))) {
883  next();
884  parseIncludeDirective();
885  return LT_ImportStatement;
886  }
887 
888  // If this line starts and ends in '<' and '>', respectively, it is likely
889  // part of "#define <a/b.h>".
890  if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
891  parseIncludeDirective();
892  return LT_ImportStatement;
893  }
894 
895  // In .proto files, top-level options are very similar to import statements
896  // and should not be line-wrapped.
897  if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
898  CurrentToken->is(Keywords.kw_option)) {
899  next();
900  if (CurrentToken && CurrentToken->is(tok::identifier))
901  return LT_ImportStatement;
902  }
903 
904  bool KeywordVirtualFound = false;
905  bool ImportStatement = false;
906 
907  // import {...} from '...';
908  if (Style.Language == FormatStyle::LK_JavaScript &&
909  CurrentToken->is(Keywords.kw_import))
910  ImportStatement = true;
911 
912  while (CurrentToken) {
913  if (CurrentToken->is(tok::kw_virtual))
914  KeywordVirtualFound = true;
915  if (Style.Language == FormatStyle::LK_JavaScript) {
916  // export {...} from '...';
917  // An export followed by "from 'some string';" is a re-export from
918  // another module identified by a URI and is treated as a
919  // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
920  // Just "export {...};" or "export class ..." should not be treated as
921  // an import in this sense.
922  if (Line.First->is(tok::kw_export) &&
923  CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
924  CurrentToken->Next->isStringLiteral())
925  ImportStatement = true;
926  if (isClosureImportStatement(*CurrentToken))
927  ImportStatement = true;
928  }
929  if (!consumeToken())
930  return LT_Invalid;
931  }
932  if (KeywordVirtualFound)
933  return LT_VirtualFunctionDecl;
934  if (ImportStatement)
935  return LT_ImportStatement;
936 
937  if (Line.startsWith(TT_ObjCMethodSpecifier)) {
938  if (Contexts.back().FirstObjCSelectorName)
939  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
940  Contexts.back().LongestObjCSelectorName;
941  return LT_ObjCMethodDecl;
942  }
943 
944  return LT_Other;
945  }
946 
947 private:
948  bool isClosureImportStatement(const FormatToken &Tok) {
949  // FIXME: Closure-library specific stuff should not be hard-coded but be
950  // configurable.
951  return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
952  Tok.Next->Next &&
953  (Tok.Next->Next->TokenText == "module" ||
954  Tok.Next->Next->TokenText == "provide" ||
955  Tok.Next->Next->TokenText == "require" ||
956  Tok.Next->Next->TokenText == "forwardDeclare") &&
957  Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
958  }
959 
960  void resetTokenMetadata(FormatToken *Token) {
961  if (!Token)
962  return;
963 
964  // Reset token type in case we have already looked at it and then
965  // recovered from an error (e.g. failure to find the matching >).
966  if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
967  TT_FunctionLBrace, TT_ImplicitStringLiteral,
968  TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
969  TT_OverloadedOperator, TT_RegexLiteral,
970  TT_TemplateString, TT_ObjCStringLiteral))
971  CurrentToken->Type = TT_Unknown;
972  CurrentToken->Role.reset();
973  CurrentToken->MatchingParen = nullptr;
974  CurrentToken->FakeLParens.clear();
975  CurrentToken->FakeRParens = 0;
976  }
977 
978  void next() {
979  if (CurrentToken) {
980  CurrentToken->NestingLevel = Contexts.size() - 1;
981  CurrentToken->BindingStrength = Contexts.back().BindingStrength;
982  modifyContext(*CurrentToken);
983  determineTokenType(*CurrentToken);
984  CurrentToken = CurrentToken->Next;
985  }
986 
987  resetTokenMetadata(CurrentToken);
988  }
989 
990  /// \brief A struct to hold information valid in a specific context, e.g.
991  /// a pair of parenthesis.
992  struct Context {
993  Context(tok::TokenKind ContextKind, unsigned BindingStrength,
994  bool IsExpression)
995  : ContextKind(ContextKind), BindingStrength(BindingStrength),
996  IsExpression(IsExpression) {}
997 
999  unsigned BindingStrength;
1002  bool ColonIsForRangeExpr = false;
1003  bool ColonIsDictLiteral = false;
1005  FormatToken *FirstObjCSelectorName = nullptr;
1006  FormatToken *FirstStartOfName = nullptr;
1007  bool CanBeExpression = true;
1008  bool InTemplateArgument = false;
1009  bool InCtorInitializer = false;
1010  bool InInheritanceList = false;
1011  bool CaretFound = false;
1012  bool IsForEachMacro = false;
1013  };
1014 
1015  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
1016  /// of each instance.
1017  struct ScopedContextCreator {
1018  AnnotatingParser &P;
1019 
1020  ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1021  unsigned Increase)
1022  : P(P) {
1023  P.Contexts.push_back(Context(ContextKind,
1024  P.Contexts.back().BindingStrength + Increase,
1025  P.Contexts.back().IsExpression));
1026  }
1027 
1028  ~ScopedContextCreator() { P.Contexts.pop_back(); }
1029  };
1030 
1031  void modifyContext(const FormatToken &Current) {
1032  if (Current.getPrecedence() == prec::Assignment &&
1033  !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1034  // Type aliases use `type X = ...;` in TypeScript and can be exported
1035  // using `export type ...`.
1036  !(Style.Language == FormatStyle::LK_JavaScript &&
1037  (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1038  Line.startsWith(tok::kw_export, Keywords.kw_type,
1039  tok::identifier))) &&
1040  (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1041  Contexts.back().IsExpression = true;
1042  if (!Line.startsWith(TT_UnaryOperator)) {
1043  for (FormatToken *Previous = Current.Previous;
1044  Previous && Previous->Previous &&
1045  !Previous->Previous->isOneOf(tok::comma, tok::semi);
1046  Previous = Previous->Previous) {
1047  if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1048  Previous = Previous->MatchingParen;
1049  if (!Previous)
1050  break;
1051  }
1052  if (Previous->opensScope())
1053  break;
1054  if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1055  Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1056  Previous->Previous && Previous->Previous->isNot(tok::equal))
1057  Previous->Type = TT_PointerOrReference;
1058  }
1059  }
1060  } else if (Current.is(tok::lessless) &&
1061  (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1062  Contexts.back().IsExpression = true;
1063  } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1064  Contexts.back().IsExpression = true;
1065  } else if (Current.is(TT_TrailingReturnArrow)) {
1066  Contexts.back().IsExpression = false;
1067  } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1068  Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1069  } else if (Current.Previous &&
1070  Current.Previous->is(TT_CtorInitializerColon)) {
1071  Contexts.back().IsExpression = true;
1072  Contexts.back().InCtorInitializer = true;
1073  } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1074  Contexts.back().InInheritanceList = true;
1075  } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1076  for (FormatToken *Previous = Current.Previous;
1077  Previous && Previous->isOneOf(tok::star, tok::amp);
1078  Previous = Previous->Previous)
1079  Previous->Type = TT_PointerOrReference;
1080  if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1081  Contexts.back().IsExpression = false;
1082  } else if (Current.is(tok::kw_new)) {
1083  Contexts.back().CanBeExpression = false;
1084  } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
1085  // This should be the condition or increment in a for-loop.
1086  Contexts.back().IsExpression = true;
1087  }
1088  }
1089 
1090  void determineTokenType(FormatToken &Current) {
1091  if (!Current.is(TT_Unknown))
1092  // The token type is already known.
1093  return;
1094 
1095  if (Style.Language == FormatStyle::LK_JavaScript) {
1096  if (Current.is(tok::exclaim)) {
1097  if (Current.Previous &&
1098  (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
1099  tok::r_paren, tok::r_square,
1100  tok::r_brace) ||
1101  Current.Previous->Tok.isLiteral())) {
1102  Current.Type = TT_JsNonNullAssertion;
1103  return;
1104  }
1105  if (Current.Next &&
1106  Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1107  Current.Type = TT_JsNonNullAssertion;
1108  return;
1109  }
1110  }
1111  }
1112 
1113  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1114  // function declaration have been found. In this case, 'Current' is a
1115  // trailing token of this declaration and thus cannot be a name.
1116  if (Current.is(Keywords.kw_instanceof)) {
1117  Current.Type = TT_BinaryOperator;
1118  } else if (isStartOfName(Current) &&
1119  (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1120  Contexts.back().FirstStartOfName = &Current;
1121  Current.Type = TT_StartOfName;
1122  } else if (Current.is(tok::semi)) {
1123  // Reset FirstStartOfName after finding a semicolon so that a for loop
1124  // with multiple increment statements is not confused with a for loop
1125  // having multiple variable declarations.
1126  Contexts.back().FirstStartOfName = nullptr;
1127  } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1128  AutoFound = true;
1129  } else if (Current.is(tok::arrow) &&
1130  Style.Language == FormatStyle::LK_Java) {
1131  Current.Type = TT_LambdaArrow;
1132  } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1133  Current.NestingLevel == 0) {
1134  Current.Type = TT_TrailingReturnArrow;
1135  } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1136  Current.Type = determineStarAmpUsage(Current,
1137  Contexts.back().CanBeExpression &&
1138  Contexts.back().IsExpression,
1139  Contexts.back().InTemplateArgument);
1140  } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1141  Current.Type = determinePlusMinusCaretUsage(Current);
1142  if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1143  Contexts.back().CaretFound = true;
1144  } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1145  Current.Type = determineIncrementUsage(Current);
1146  } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1147  Current.Type = TT_UnaryOperator;
1148  } else if (Current.is(tok::question)) {
1149  if (Style.Language == FormatStyle::LK_JavaScript &&
1150  Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1151  // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1152  // on the interface, not a ternary expression.
1153  Current.Type = TT_JsTypeOptionalQuestion;
1154  } else {
1155  Current.Type = TT_ConditionalExpr;
1156  }
1157  } else if (Current.isBinaryOperator() &&
1158  (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
1159  Current.Type = TT_BinaryOperator;
1160  } else if (Current.is(tok::comment)) {
1161  if (Current.TokenText.startswith("/*")) {
1162  if (Current.TokenText.endswith("*/"))
1163  Current.Type = TT_BlockComment;
1164  else
1165  // The lexer has for some reason determined a comment here. But we
1166  // cannot really handle it, if it isn't properly terminated.
1167  Current.Tok.setKind(tok::unknown);
1168  } else {
1169  Current.Type = TT_LineComment;
1170  }
1171  } else if (Current.is(tok::r_paren)) {
1172  if (rParenEndsCast(Current))
1173  Current.Type = TT_CastRParen;
1174  if (Current.MatchingParen && Current.Next &&
1175  !Current.Next->isBinaryOperator() &&
1176  !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1177  tok::comma, tok::period, tok::arrow,
1178  tok::coloncolon))
1179  if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1180  // Make sure this isn't the return type of an Obj-C block declaration
1181  if (AfterParen->Tok.isNot(tok::caret)) {
1182  if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1183  if (BeforeParen->is(tok::identifier) &&
1184  BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1185  (!BeforeParen->Previous ||
1186  BeforeParen->Previous->ClosesTemplateDeclaration))
1187  Current.Type = TT_FunctionAnnotationRParen;
1188  }
1189  }
1190  } else if (Current.is(tok::at) && Current.Next &&
1191  Style.Language != FormatStyle::LK_JavaScript &&
1192  Style.Language != FormatStyle::LK_Java) {
1193  // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1194  // marks declarations and properties that need special formatting.
1195  switch (Current.Next->Tok.getObjCKeywordID()) {
1196  case tok::objc_interface:
1197  case tok::objc_implementation:
1198  case tok::objc_protocol:
1199  Current.Type = TT_ObjCDecl;
1200  break;
1201  case tok::objc_property:
1202  Current.Type = TT_ObjCProperty;
1203  break;
1204  default:
1205  break;
1206  }
1207  } else if (Current.is(tok::period)) {
1208  FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1209  if (PreviousNoComment &&
1210  PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1211  Current.Type = TT_DesignatedInitializerPeriod;
1212  else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1213  Current.Previous->isOneOf(TT_JavaAnnotation,
1214  TT_LeadingJavaAnnotation)) {
1215  Current.Type = Current.Previous->Type;
1216  }
1217  } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1218  Current.Previous &&
1219  !Current.Previous->isOneOf(tok::equal, tok::at) &&
1220  Line.MightBeFunctionDecl && Contexts.size() == 1) {
1221  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1222  // function declaration have been found.
1223  Current.Type = TT_TrailingAnnotation;
1224  } else if ((Style.Language == FormatStyle::LK_Java ||
1225  Style.Language == FormatStyle::LK_JavaScript) &&
1226  Current.Previous) {
1227  if (Current.Previous->is(tok::at) &&
1228  Current.isNot(Keywords.kw_interface)) {
1229  const FormatToken &AtToken = *Current.Previous;
1230  const FormatToken *Previous = AtToken.getPreviousNonComment();
1231  if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1232  Current.Type = TT_LeadingJavaAnnotation;
1233  else
1234  Current.Type = TT_JavaAnnotation;
1235  } else if (Current.Previous->is(tok::period) &&
1236  Current.Previous->isOneOf(TT_JavaAnnotation,
1237  TT_LeadingJavaAnnotation)) {
1238  Current.Type = Current.Previous->Type;
1239  }
1240  }
1241  }
1242 
1243  /// \brief Take a guess at whether \p Tok starts a name of a function or
1244  /// variable declaration.
1245  ///
1246  /// This is a heuristic based on whether \p Tok is an identifier following
1247  /// something that is likely a type.
1248  bool isStartOfName(const FormatToken &Tok) {
1249  if (Tok.isNot(tok::identifier) || !Tok.Previous)
1250  return false;
1251 
1252  if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1253  Keywords.kw_as))
1254  return false;
1255  if (Style.Language == FormatStyle::LK_JavaScript &&
1256  Tok.Previous->is(Keywords.kw_in))
1257  return false;
1258 
1259  // Skip "const" as it does not have an influence on whether this is a name.
1260  FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1261  while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1262  PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1263 
1264  if (!PreviousNotConst)
1265  return false;
1266 
1267  bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1268  PreviousNotConst->Previous &&
1269  PreviousNotConst->Previous->is(tok::hash);
1270 
1271  if (PreviousNotConst->is(TT_TemplateCloser))
1272  return PreviousNotConst && PreviousNotConst->MatchingParen &&
1273  PreviousNotConst->MatchingParen->Previous &&
1274  PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1275  PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1276 
1277  if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1278  PreviousNotConst->MatchingParen->Previous &&
1279  PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1280  return true;
1281 
1282  return (!IsPPKeyword &&
1283  PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1284  PreviousNotConst->is(TT_PointerOrReference) ||
1285  PreviousNotConst->isSimpleTypeSpecifier();
1286  }
1287 
1288  /// \brief Determine whether ')' is ending a cast.
1289  bool rParenEndsCast(const FormatToken &Tok) {
1290  // C-style casts are only used in C++ and Java.
1291  if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
1292  return false;
1293 
1294  // Empty parens aren't casts and there are no casts at the end of the line.
1295  if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1296  return false;
1297 
1298  FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1299  if (LeftOfParens) {
1300  // If there is a closing parenthesis left of the current parentheses,
1301  // look past it as these might be chained casts.
1302  if (LeftOfParens->is(tok::r_paren)) {
1303  if (!LeftOfParens->MatchingParen ||
1304  !LeftOfParens->MatchingParen->Previous)
1305  return false;
1306  LeftOfParens = LeftOfParens->MatchingParen->Previous;
1307  }
1308 
1309  // If there is an identifier (or with a few exceptions a keyword) right
1310  // before the parentheses, this is unlikely to be a cast.
1311  if (LeftOfParens->Tok.getIdentifierInfo() &&
1312  !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1313  tok::kw_delete))
1314  return false;
1315 
1316  // Certain other tokens right before the parentheses are also signals that
1317  // this cannot be a cast.
1318  if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1319  TT_TemplateCloser, tok::ellipsis))
1320  return false;
1321  }
1322 
1323  if (Tok.Next->is(tok::question))
1324  return false;
1325 
1326  // As Java has no function types, a "(" after the ")" likely means that this
1327  // is a cast.
1328  if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1329  return true;
1330 
1331  // If a (non-string) literal follows, this is likely a cast.
1332  if (Tok.Next->isNot(tok::string_literal) &&
1333  (Tok.Next->Tok.isLiteral() ||
1334  Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1335  return true;
1336 
1337  // Heuristically try to determine whether the parentheses contain a type.
1338  bool ParensAreType =
1339  !Tok.Previous ||
1340  Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1341  Tok.Previous->isSimpleTypeSpecifier();
1342  bool ParensCouldEndDecl =
1343  Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1344  if (ParensAreType && !ParensCouldEndDecl)
1345  return true;
1346 
1347  // At this point, we heuristically assume that there are no casts at the
1348  // start of the line. We assume that we have found most cases where there
1349  // are by the logic above, e.g. "(void)x;".
1350  if (!LeftOfParens)
1351  return false;
1352 
1353  // Certain token types inside the parentheses mean that this can't be a
1354  // cast.
1355  for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1356  Token = Token->Next)
1357  if (Token->is(TT_BinaryOperator))
1358  return false;
1359 
1360  // If the following token is an identifier or 'this', this is a cast. All
1361  // cases where this can be something else are handled above.
1362  if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1363  return true;
1364 
1365  if (!Tok.Next->Next)
1366  return false;
1367 
1368  // If the next token after the parenthesis is a unary operator, assume
1369  // that this is cast, unless there are unexpected tokens inside the
1370  // parenthesis.
1371  bool NextIsUnary =
1372  Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1373  if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1374  !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1375  return false;
1376  // Search for unexpected tokens.
1377  for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1378  Prev = Prev->Previous) {
1379  if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1380  return false;
1381  }
1382  return true;
1383  }
1384 
1385  /// \brief Return the type of the given token assuming it is * or &.
1386  TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1387  bool InTemplateArgument) {
1388  if (Style.Language == FormatStyle::LK_JavaScript)
1389  return TT_BinaryOperator;
1390 
1391  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1392  if (!PrevToken)
1393  return TT_UnaryOperator;
1394 
1395  const FormatToken *NextToken = Tok.getNextNonComment();
1396  if (!NextToken ||
1397  NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
1398  (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1399  return TT_PointerOrReference;
1400 
1401  if (PrevToken->is(tok::coloncolon))
1402  return TT_PointerOrReference;
1403 
1404  if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1405  tok::comma, tok::semi, tok::kw_return, tok::colon,
1406  tok::equal, tok::kw_delete, tok::kw_sizeof,
1407  tok::kw_throw) ||
1408  PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1409  TT_UnaryOperator, TT_CastRParen))
1410  return TT_UnaryOperator;
1411 
1412  if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1413  return TT_PointerOrReference;
1414  if (NextToken->is(tok::kw_operator) && !IsExpression)
1415  return TT_PointerOrReference;
1416  if (NextToken->isOneOf(tok::comma, tok::semi))
1417  return TT_PointerOrReference;
1418 
1419  if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {
1420  FormatToken *TokenBeforeMatchingParen =
1421  PrevToken->MatchingParen->getPreviousNonComment();
1422  if (TokenBeforeMatchingParen &&
1423  TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
1424  return TT_PointerOrReference;
1425  }
1426 
1427  if (PrevToken->Tok.isLiteral() ||
1428  PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1429  tok::kw_false, tok::r_brace) ||
1430  NextToken->Tok.isLiteral() ||
1431  NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1432  NextToken->isUnaryOperator() ||
1433  // If we know we're in a template argument, there are no named
1434  // declarations. Thus, having an identifier on the right-hand side
1435  // indicates a binary operator.
1436  (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1437  return TT_BinaryOperator;
1438 
1439  // "&&(" is quite unlikely to be two successive unary "&".
1440  if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1441  return TT_BinaryOperator;
1442 
1443  // This catches some cases where evaluation order is used as control flow:
1444  // aaa && aaa->f();
1445  const FormatToken *NextNextToken = NextToken->getNextNonComment();
1446  if (NextNextToken && NextNextToken->is(tok::arrow))
1447  return TT_BinaryOperator;
1448 
1449  // It is very unlikely that we are going to find a pointer or reference type
1450  // definition on the RHS of an assignment.
1451  if (IsExpression && !Contexts.back().CaretFound)
1452  return TT_BinaryOperator;
1453 
1454  return TT_PointerOrReference;
1455  }
1456 
1457  TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1458  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1459  if (!PrevToken)
1460  return TT_UnaryOperator;
1461 
1462  if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) &&
1463  !PrevToken->is(tok::exclaim))
1464  // There aren't any trailing unary operators except for TypeScript's
1465  // non-null operator (!). Thus, this must be squence of leading operators.
1466  return TT_UnaryOperator;
1467 
1468  // Use heuristics to recognize unary operators.
1469  if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1470  tok::question, tok::colon, tok::kw_return,
1471  tok::kw_case, tok::at, tok::l_brace))
1472  return TT_UnaryOperator;
1473 
1474  // There can't be two consecutive binary operators.
1475  if (PrevToken->is(TT_BinaryOperator))
1476  return TT_UnaryOperator;
1477 
1478  // Fall back to marking the token as binary operator.
1479  return TT_BinaryOperator;
1480  }
1481 
1482  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1483  TokenType determineIncrementUsage(const FormatToken &Tok) {
1484  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1485  if (!PrevToken || PrevToken->is(TT_CastRParen))
1486  return TT_UnaryOperator;
1487  if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1488  return TT_TrailingUnaryOperator;
1489 
1490  return TT_UnaryOperator;
1491  }
1492 
1493  SmallVector<Context, 8> Contexts;
1494 
1495  const FormatStyle &Style;
1496  AnnotatedLine &Line;
1497  FormatToken *CurrentToken;
1498  bool AutoFound;
1499  const AdditionalKeywords &Keywords;
1500 
1501  // Set of "<" tokens that do not open a template parameter list. If parseAngle
1502  // determines that a specific token can't be a template opener, it will make
1503  // same decision irrespective of the decisions for tokens leading up to it.
1504  // Store this information to prevent this from causing exponential runtime.
1505  llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1506 };
1507 
1508 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1509 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1510 
1511 /// \brief Parses binary expressions by inserting fake parenthesis based on
1512 /// operator precedence.
1513 class ExpressionParser {
1514 public:
1515  ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1516  AnnotatedLine &Line)
1517  : Style(Style), Keywords(Keywords), Current(Line.First) {}
1518 
1519  /// \brief Parse expressions with the given operatore precedence.
1520  void parse(int Precedence = 0) {
1521  // Skip 'return' and ObjC selector colons as they are not part of a binary
1522  // expression.
1523  while (Current && (Current->is(tok::kw_return) ||
1524  (Current->is(tok::colon) &&
1525  Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1526  next();
1527 
1528  if (!Current || Precedence > PrecedenceArrowAndPeriod)
1529  return;
1530 
1531  // Conditional expressions need to be parsed separately for proper nesting.
1532  if (Precedence == prec::Conditional) {
1533  parseConditionalExpr();
1534  return;
1535  }
1536 
1537  // Parse unary operators, which all have a higher precedence than binary
1538  // operators.
1539  if (Precedence == PrecedenceUnaryOperator) {
1540  parseUnaryOperator();
1541  return;
1542  }
1543 
1544  FormatToken *Start = Current;
1545  FormatToken *LatestOperator = nullptr;
1546  unsigned OperatorIndex = 0;
1547 
1548  while (Current) {
1549  // Consume operators with higher precedence.
1550  parse(Precedence + 1);
1551 
1552  int CurrentPrecedence = getCurrentPrecedence();
1553 
1554  if (Current && Current->is(TT_SelectorName) &&
1555  Precedence == CurrentPrecedence) {
1556  if (LatestOperator)
1557  addFakeParenthesis(Start, prec::Level(Precedence));
1558  Start = Current;
1559  }
1560 
1561  // At the end of the line or when an operator with higher precedence is
1562  // found, insert fake parenthesis and return.
1563  if (!Current ||
1564  (Current->closesScope() &&
1565  (Current->MatchingParen || Current->is(TT_TemplateString))) ||
1566  (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1567  (CurrentPrecedence == prec::Conditional &&
1568  Precedence == prec::Assignment && Current->is(tok::colon))) {
1569  break;
1570  }
1571 
1572  // Consume scopes: (), [], <> and {}
1573  if (Current->opensScope()) {
1574  // In fragment of a JavaScript template string can look like '}..${' and
1575  // thus close a scope and open a new one at the same time.
1576  while (Current && (!Current->closesScope() || Current->opensScope())) {
1577  next();
1578  parse();
1579  }
1580  next();
1581  } else {
1582  // Operator found.
1583  if (CurrentPrecedence == Precedence) {
1584  if (LatestOperator)
1585  LatestOperator->NextOperator = Current;
1586  LatestOperator = Current;
1587  Current->OperatorIndex = OperatorIndex;
1588  ++OperatorIndex;
1589  }
1590  next(/*SkipPastLeadingComments=*/Precedence > 0);
1591  }
1592  }
1593 
1594  if (LatestOperator && (Current || Precedence > 0)) {
1595  // LatestOperator->LastOperator = true;
1596  if (Precedence == PrecedenceArrowAndPeriod) {
1597  // Call expressions don't have a binary operator precedence.
1598  addFakeParenthesis(Start, prec::Unknown);
1599  } else {
1600  addFakeParenthesis(Start, prec::Level(Precedence));
1601  }
1602  }
1603  }
1604 
1605 private:
1606  /// \brief Gets the precedence (+1) of the given token for binary operators
1607  /// and other tokens that we treat like binary operators.
1608  int getCurrentPrecedence() {
1609  if (Current) {
1610  const FormatToken *NextNonComment = Current->getNextNonComment();
1611  if (Current->is(TT_ConditionalExpr))
1612  return prec::Conditional;
1613  if (NextNonComment && Current->is(TT_SelectorName) &&
1614  (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
1615  ((Style.Language == FormatStyle::LK_Proto ||
1616  Style.Language == FormatStyle::LK_TextProto) &&
1617  NextNonComment->is(tok::less))))
1618  return prec::Assignment;
1619  if (Current->is(TT_JsComputedPropertyName))
1620  return prec::Assignment;
1621  if (Current->is(TT_LambdaArrow))
1622  return prec::Comma;
1623  if (Current->is(TT_JsFatArrow))
1624  return prec::Assignment;
1625  if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
1626  (Current->is(tok::comment) && NextNonComment &&
1627  NextNonComment->is(TT_SelectorName)))
1628  return 0;
1629  if (Current->is(TT_RangeBasedForLoopColon))
1630  return prec::Comma;
1631  if ((Style.Language == FormatStyle::LK_Java ||
1632  Style.Language == FormatStyle::LK_JavaScript) &&
1633  Current->is(Keywords.kw_instanceof))
1634  return prec::Relational;
1635  if (Style.Language == FormatStyle::LK_JavaScript &&
1636  Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
1637  return prec::Relational;
1638  if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1639  return Current->getPrecedence();
1640  if (Current->isOneOf(tok::period, tok::arrow))
1641  return PrecedenceArrowAndPeriod;
1642  if ((Style.Language == FormatStyle::LK_Java ||
1643  Style.Language == FormatStyle::LK_JavaScript) &&
1644  Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1645  Keywords.kw_throws))
1646  return 0;
1647  }
1648  return -1;
1649  }
1650 
1651  void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1652  Start->FakeLParens.push_back(Precedence);
1653  if (Precedence > prec::Unknown)
1654  Start->StartsBinaryExpression = true;
1655  if (Current) {
1656  FormatToken *Previous = Current->Previous;
1657  while (Previous->is(tok::comment) && Previous->Previous)
1658  Previous = Previous->Previous;
1659  ++Previous->FakeRParens;
1660  if (Precedence > prec::Unknown)
1661  Previous->EndsBinaryExpression = true;
1662  }
1663  }
1664 
1665  /// \brief Parse unary operator expressions and surround them with fake
1666  /// parentheses if appropriate.
1667  void parseUnaryOperator() {
1669  while (Current && Current->is(TT_UnaryOperator)) {
1670  Tokens.push_back(Current);
1671  next();
1672  }
1673  parse(PrecedenceArrowAndPeriod);
1674  for (FormatToken *Token : llvm::reverse(Tokens))
1675  // The actual precedence doesn't matter.
1676  addFakeParenthesis(Token, prec::Unknown);
1677  }
1678 
1679  void parseConditionalExpr() {
1680  while (Current && Current->isTrailingComment()) {
1681  next();
1682  }
1683  FormatToken *Start = Current;
1684  parse(prec::LogicalOr);
1685  if (!Current || !Current->is(tok::question))
1686  return;
1687  next();
1688  parse(prec::Assignment);
1689  if (!Current || Current->isNot(TT_ConditionalExpr))
1690  return;
1691  next();
1692  parse(prec::Assignment);
1693  addFakeParenthesis(Start, prec::Conditional);
1694  }
1695 
1696  void next(bool SkipPastLeadingComments = true) {
1697  if (Current)
1698  Current = Current->Next;
1699  while (Current &&
1700  (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1701  Current->isTrailingComment())
1702  Current = Current->Next;
1703  }
1704 
1705  const FormatStyle &Style;
1706  const AdditionalKeywords &Keywords;
1707  FormatToken *Current;
1708 };
1709 
1710 } // end anonymous namespace
1711 
1714  const AnnotatedLine *NextNonCommentLine = nullptr;
1716  E = Lines.rend();
1717  I != E; ++I) {
1718  bool CommentLine = true;
1719  for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
1720  if (!Tok->is(tok::comment)) {
1721  CommentLine = false;
1722  break;
1723  }
1724  }
1725 
1726  if (NextNonCommentLine && CommentLine) {
1727  // If the comment is currently aligned with the line immediately following
1728  // it, that's probably intentional and we should keep it.
1729  bool AlignedWithNextLine =
1730  NextNonCommentLine->First->NewlinesBefore <= 1 &&
1731  NextNonCommentLine->First->OriginalColumn ==
1732  (*I)->First->OriginalColumn;
1733  if (AlignedWithNextLine)
1734  (*I)->Level = NextNonCommentLine->Level;
1735  } else {
1736  NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1737  }
1738 
1739  setCommentLineLevels((*I)->Children);
1740  }
1741 }
1742 
1743 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
1744  unsigned Result = 0;
1745  for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
1746  Result = std::max(Result, Tok->NestingLevel);
1747  return Result;
1748 }
1749 
1752  E = Line.Children.end();
1753  I != E; ++I) {
1754  annotate(**I);
1755  }
1756  AnnotatingParser Parser(Style, Line, Keywords);
1757  Line.Type = Parser.parseLine();
1758 
1759  // With very deep nesting, ExpressionParser uses lots of stack and the
1760  // formatting algorithm is very slow. We're not going to do a good job here
1761  // anyway - it's probably generated code being formatted by mistake.
1762  // Just skip the whole line.
1763  if (maxNestingDepth(Line) > 50)
1764  Line.Type = LT_Invalid;
1765 
1766  if (Line.Type == LT_Invalid)
1767  return;
1768 
1769  ExpressionParser ExprParser(Style, Keywords, Line);
1770  ExprParser.parse();
1771 
1772  if (Line.startsWith(TT_ObjCMethodSpecifier))
1773  Line.Type = LT_ObjCMethodDecl;
1774  else if (Line.startsWith(TT_ObjCDecl))
1775  Line.Type = LT_ObjCDecl;
1776  else if (Line.startsWith(TT_ObjCProperty))
1777  Line.Type = LT_ObjCProperty;
1778 
1779  Line.First->SpacesRequiredBefore = 1;
1780  Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1781 }
1782 
1783 // This function heuristically determines whether 'Current' starts the name of a
1784 // function declaration.
1785 static bool isFunctionDeclarationName(const FormatToken &Current,
1786  const AnnotatedLine &Line) {
1787  auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
1788  for (; Next; Next = Next->Next) {
1789  if (Next->is(TT_OverloadedOperatorLParen))
1790  return Next;
1791  if (Next->is(TT_OverloadedOperator))
1792  continue;
1793  if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1794  // For 'new[]' and 'delete[]'.
1795  if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next &&
1796  Next->Next->Next->is(tok::r_square))
1797  Next = Next->Next->Next;
1798  continue;
1799  }
1800 
1801  break;
1802  }
1803  return nullptr;
1804  };
1805 
1806  // Find parentheses of parameter list.
1807  const FormatToken *Next = Current.Next;
1808  if (Current.is(tok::kw_operator)) {
1809  if (Current.Previous && Current.Previous->is(tok::coloncolon))
1810  return false;
1811  Next = skipOperatorName(Next);
1812  } else {
1813  if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1814  return false;
1815  for (; Next; Next = Next->Next) {
1816  if (Next->is(TT_TemplateOpener)) {
1817  Next = Next->MatchingParen;
1818  } else if (Next->is(tok::coloncolon)) {
1819  Next = Next->Next;
1820  if (!Next)
1821  return false;
1822  if (Next->is(tok::kw_operator)) {
1823  Next = skipOperatorName(Next->Next);
1824  break;
1825  }
1826  if (!Next->is(tok::identifier))
1827  return false;
1828  } else if (Next->is(tok::l_paren)) {
1829  break;
1830  } else {
1831  return false;
1832  }
1833  }
1834  }
1835 
1836  // Check whether parameter list can belong to a function declaration.
1837  if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
1838  return false;
1839  // If the lines ends with "{", this is likely an function definition.
1840  if (Line.Last->is(tok::l_brace))
1841  return true;
1842  if (Next->Next == Next->MatchingParen)
1843  return true; // Empty parentheses.
1844  // If there is an &/&& after the r_paren, this is likely a function.
1845  if (Next->MatchingParen->Next &&
1846  Next->MatchingParen->Next->is(TT_PointerOrReference))
1847  return true;
1848  for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
1849  Tok = Tok->Next) {
1850  if (Tok->is(tok::l_paren) && Tok->MatchingParen) {
1851  Tok = Tok->MatchingParen;
1852  continue;
1853  }
1854  if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1855  Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
1856  return true;
1857  if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
1858  Tok->Tok.isLiteral())
1859  return false;
1860  }
1861  return false;
1862 }
1863 
1864 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
1865  assert(Line.MightBeFunctionDecl);
1866 
1870  Line.Level > 0)
1871  return false;
1872 
1875  return false;
1876  case FormatStyle::RTBS_All:
1878  return true;
1881  return Line.mightBeFunctionDefinition();
1882  }
1883 
1884  return false;
1885 }
1886 
1889  E = Line.Children.end();
1890  I != E; ++I) {
1891  calculateFormattingInformation(**I);
1892  }
1893 
1894  Line.First->TotalLength =
1896  : Line.FirstStartColumn + Line.First->ColumnWidth;
1897  FormatToken *Current = Line.First->Next;
1898  bool InFunctionDecl = Line.MightBeFunctionDecl;
1899  while (Current) {
1900  if (isFunctionDeclarationName(*Current, Line))
1901  Current->Type = TT_FunctionDeclarationName;
1902  if (Current->is(TT_LineComment)) {
1903  if (Current->Previous->BlockKind == BK_BracedInit &&
1904  Current->Previous->opensScope())
1905  Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1906  else
1908 
1909  // If we find a trailing comment, iterate backwards to determine whether
1910  // it seems to relate to a specific parameter. If so, break before that
1911  // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1912  // to the previous line in:
1913  // SomeFunction(a,
1914  // b, // comment
1915  // c);
1916  if (!Current->HasUnescapedNewline) {
1917  for (FormatToken *Parameter = Current->Previous; Parameter;
1918  Parameter = Parameter->Previous) {
1919  if (Parameter->isOneOf(tok::comment, tok::r_brace))
1920  break;
1921  if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1922  if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
1923  Parameter->HasUnescapedNewline)
1924  Parameter->MustBreakBefore = true;
1925  break;
1926  }
1927  }
1928  }
1929  } else if (Current->SpacesRequiredBefore == 0 &&
1930  spaceRequiredBefore(Line, *Current)) {
1931  Current->SpacesRequiredBefore = 1;
1932  }
1933 
1934  Current->MustBreakBefore =
1935  Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1936 
1937  if (!Current->MustBreakBefore && InFunctionDecl &&
1938  Current->is(TT_FunctionDeclarationName))
1939  Current->MustBreakBefore = mustBreakForReturnType(Line);
1940 
1941  Current->CanBreakBefore =
1942  Current->MustBreakBefore || canBreakBefore(Line, *Current);
1943  unsigned ChildSize = 0;
1944  if (Current->Previous->Children.size() == 1) {
1945  FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1946  ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1947  : LastOfChild.TotalLength + 1;
1948  }
1949  const FormatToken *Prev = Current->Previous;
1950  if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
1951  (Prev->Children.size() == 1 &&
1952  Prev->Children[0]->First->MustBreakBefore) ||
1953  Current->IsMultiline)
1954  Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
1955  else
1956  Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
1957  ChildSize + Current->SpacesRequiredBefore;
1958 
1959  if (Current->is(TT_CtorInitializerColon))
1960  InFunctionDecl = false;
1961 
1962  // FIXME: Only calculate this if CanBreakBefore is true once static
1963  // initializers etc. are sorted out.
1964  // FIXME: Move magic numbers to a better place.
1965  Current->SplitPenalty = 20 * Current->BindingStrength +
1966  splitPenalty(Line, *Current, InFunctionDecl);
1967 
1968  Current = Current->Next;
1969  }
1970 
1971  calculateUnbreakableTailLengths(Line);
1972  unsigned IndentLevel = Line.Level;
1973  for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1974  if (Current->Role)
1975  Current->Role->precomputeFormattingInfos(Current);
1976  if (Current->MatchingParen &&
1978  assert(IndentLevel > 0);
1979  --IndentLevel;
1980  }
1981  Current->IndentLevel = IndentLevel;
1982  if (Current->opensBlockOrBlockTypeList(Style))
1983  ++IndentLevel;
1984  }
1985 
1986  DEBUG({ printDebugInfo(Line); });
1987 }
1988 
1989 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1990  unsigned UnbreakableTailLength = 0;
1991  FormatToken *Current = Line.Last;
1992  while (Current) {
1993  Current->UnbreakableTailLength = UnbreakableTailLength;
1994  if (Current->CanBreakBefore ||
1995  Current->isOneOf(tok::comment, tok::string_literal)) {
1996  UnbreakableTailLength = 0;
1997  } else {
1998  UnbreakableTailLength +=
1999  Current->ColumnWidth + Current->SpacesRequiredBefore;
2000  }
2001  Current = Current->Previous;
2002  }
2003 }
2004 
2005 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2006  const FormatToken &Tok,
2007  bool InFunctionDecl) {
2008  const FormatToken &Left = *Tok.Previous;
2009  const FormatToken &Right = Tok;
2010 
2011  if (Left.is(tok::semi))
2012  return 0;
2013 
2015  if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2016  return 1;
2017  if (Right.is(Keywords.kw_implements))
2018  return 2;
2019  if (Left.is(tok::comma) && Left.NestingLevel == 0)
2020  return 3;
2021  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2022  if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2023  return 100;
2024  if (Left.is(TT_JsTypeColon))
2025  return 35;
2026  if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2027  (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2028  return 100;
2029  // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2030  if (Left.opensScope() && Right.closesScope())
2031  return 200;
2032  }
2033 
2034  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2035  return 1;
2036  if (Right.is(tok::l_square)) {
2038  return 1;
2039  if (Left.is(tok::r_square))
2040  return 200;
2041  // Slightly prefer formatting local lambda definitions like functions.
2042  if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2043  return 35;
2044  if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2045  TT_ArrayInitializerLSquare,
2046  TT_DesignatedInitializerLSquare))
2047  return 500;
2048  }
2049 
2050  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2051  Right.is(tok::kw_operator)) {
2052  if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2053  return 3;
2054  if (Left.is(TT_StartOfName))
2055  return 110;
2056  if (InFunctionDecl && Right.NestingLevel == 0)
2058  return 200;
2059  }
2060  if (Right.is(TT_PointerOrReference))
2061  return 190;
2062  if (Right.is(TT_LambdaArrow))
2063  return 110;
2064  if (Left.is(tok::equal) && Right.is(tok::l_brace))
2065  return 160;
2066  if (Left.is(TT_CastRParen))
2067  return 100;
2068  if (Left.is(tok::coloncolon) ||
2069  (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2070  return 500;
2071  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2072  return 5000;
2073  if (Left.is(tok::comment))
2074  return 1000;
2075 
2076  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2077  TT_CtorInitializerColon))
2078  return 2;
2079 
2080  if (Right.isMemberAccess()) {
2081  // Breaking before the "./->" of a chained call/member access is reasonably
2082  // cheap, as formatting those with one call per line is generally
2083  // desirable. In particular, it should be cheaper to break before the call
2084  // than it is to break inside a call's parameters, which could lead to weird
2085  // "hanging" indents. The exception is the very last "./->" to support this
2086  // frequent pattern:
2087  //
2088  // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2089  // dddddddd);
2090  //
2091  // which might otherwise be blown up onto many lines. Here, clang-format
2092  // won't produce "hanging" indents anyway as there is no other trailing
2093  // call.
2094  //
2095  // Also apply higher penalty is not a call as that might lead to a wrapping
2096  // like:
2097  //
2098  // aaaaaaa
2099  // .aaaaaaaaa.bbbbbbbb(cccccccc);
2100  return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2101  ? 150
2102  : 35;
2103  }
2104 
2105  if (Right.is(TT_TrailingAnnotation) &&
2106  (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2107  // Moving trailing annotations to the next line is fine for ObjC method
2108  // declarations.
2109  if (Line.startsWith(TT_ObjCMethodSpecifier))
2110  return 10;
2111  // Generally, breaking before a trailing annotation is bad unless it is
2112  // function-like. It seems to be especially preferable to keep standard
2113  // annotations (i.e. "const", "final" and "override") on the same line.
2114  // Use a slightly higher penalty after ")" so that annotations like
2115  // "const override" are kept together.
2116  bool is_short_annotation = Right.TokenText.size() < 10;
2117  return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2118  }
2119 
2120  // In for-loops, prefer breaking at ',' and ';'.
2121  if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2122  return 4;
2123 
2124  // In Objective-C method expressions, prefer breaking before "param:" over
2125  // breaking after it.
2126  if (Right.is(TT_SelectorName))
2127  return 0;
2128  if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2129  return Line.MightBeFunctionDecl ? 50 : 500;
2130 
2131  if (Left.is(tok::l_paren) && InFunctionDecl &&
2133  return 100;
2134  if (Left.is(tok::l_paren) && Left.Previous &&
2135  (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) ||
2136  Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
2137  return 1000;
2138  if (Left.is(tok::equal) && InFunctionDecl)
2139  return 110;
2140  if (Right.is(tok::r_brace))
2141  return 1;
2142  if (Left.is(TT_TemplateOpener))
2143  return 100;
2144  if (Left.opensScope()) {
2146  return 0;
2148  : 19;
2149  }
2150  if (Left.is(TT_JavaAnnotation))
2151  return 50;
2152 
2153  if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2154  Left.Previous->isLabelString() &&
2155  (Left.NextOperator || Left.OperatorIndex != 0))
2156  return 50;
2157  if (Right.is(tok::plus) && Left.isLabelString() &&
2158  (Right.NextOperator || Right.OperatorIndex != 0))
2159  return 25;
2160  if (Left.is(tok::comma))
2161  return 1;
2162  if (Right.is(tok::lessless) && Left.isLabelString() &&
2163  (Right.NextOperator || Right.OperatorIndex != 1))
2164  return 25;
2165  if (Right.is(tok::lessless)) {
2166  // Breaking at a << is really cheap.
2167  if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2168  // Slightly prefer to break before the first one in log-like statements.
2169  return 2;
2170  return 1;
2171  }
2172  if (Left.is(TT_ConditionalExpr))
2173  return prec::Conditional;
2174  prec::Level Level = Left.getPrecedence();
2175  if (Level == prec::Unknown)
2176  Level = Right.getPrecedence();
2177  if (Level == prec::Assignment)
2179  if (Level != prec::Unknown)
2180  return Level;
2181 
2182  return 3;
2183 }
2184 
2185 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2186  const FormatToken &Left,
2187  const FormatToken &Right) {
2188  if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2189  return true;
2190  if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2191  return true;
2193  Left.Tok.getObjCKeywordID() == tok::objc_property)
2194  return true;
2195  if (Right.is(tok::hashhash))
2196  return Left.is(tok::hash);
2197  if (Left.isOneOf(tok::hashhash, tok::hash))
2198  return Right.is(tok::hash);
2199  if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
2201  if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2202  return (Right.is(TT_CastRParen) ||
2203  (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2206  if (Right.isOneOf(tok::semi, tok::comma))
2207  return false;
2208  if (Right.is(tok::less) && Line.Type == LT_ObjCDecl &&
2210  return true;
2211  if (Right.is(tok::less) && Left.is(tok::kw_template))
2213  if (Left.isOneOf(tok::exclaim, tok::tilde))
2214  return false;
2215  if (Left.is(tok::at) &&
2216  Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2217  tok::numeric_constant, tok::l_paren, tok::l_brace,
2218  tok::kw_true, tok::kw_false))
2219  return false;
2220  if (Left.is(tok::colon))
2221  return !Left.is(TT_ObjCMethodExpr);
2222  if (Left.is(tok::coloncolon))
2223  return false;
2224  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
2225  return false;
2226  if (Right.is(tok::ellipsis))
2227  return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2228  Left.Previous->is(tok::kw_case));
2229  if (Left.is(tok::l_square) && Right.is(tok::amp))
2230  return false;
2231  if (Right.is(TT_PointerOrReference)) {
2232  if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
2233  if (!Left.MatchingParen)
2234  return true;
2235  FormatToken *TokenBeforeMatchingParen =
2237  if (!TokenBeforeMatchingParen ||
2238  !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
2239  return true;
2240  }
2241  return (Left.Tok.isLiteral() ||
2242  (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2244  (Line.IsMultiVariableDeclStmt &&
2245  (Left.NestingLevel == 0 ||
2246  (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2247  }
2248  if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2249  (!Left.is(TT_PointerOrReference) ||
2251  !Line.IsMultiVariableDeclStmt)))
2252  return true;
2253  if (Left.is(TT_PointerOrReference))
2254  return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
2255  (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
2256  !Right.is(TT_StartOfName)) ||
2257  (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2258  (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2259  tok::l_paren) &&
2261  !Line.IsMultiVariableDeclStmt) &&
2262  Left.Previous &&
2263  !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2264  if (Right.is(tok::star) && Left.is(tok::l_paren))
2265  return false;
2266  if (Left.is(tok::l_square))
2267  return (Left.is(TT_ArrayInitializerLSquare) &&
2268  Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
2269  (Left.isOneOf(TT_ArraySubscriptLSquare,
2270  TT_StructuredBindingLSquare) &&
2271  Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
2272  if (Right.is(tok::r_square))
2273  return Right.MatchingParen &&
2275  Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
2277  Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
2278  TT_StructuredBindingLSquare)));
2279  if (Right.is(tok::l_square) &&
2280  !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2281  TT_DesignatedInitializerLSquare,
2282  TT_StructuredBindingLSquare) &&
2283  !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2284  return false;
2285  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2286  return !Left.Children.empty(); // No spaces in "{}".
2287  if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2288  (Right.is(tok::r_brace) && Right.MatchingParen &&
2289  Right.MatchingParen->BlockKind != BK_Block))
2290  return !Style.Cpp11BracedListStyle;
2291  if (Left.is(TT_BlockComment))
2292  return !Left.TokenText.endswith("=*/");
2293  if (Right.is(tok::l_paren)) {
2294  if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
2295  return true;
2296  return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2298  (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
2299  tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2300  TT_ObjCForIn) ||
2301  Left.endsSequence(tok::kw_constexpr, tok::kw_if) ||
2302  (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2303  tok::kw_new, tok::kw_delete) &&
2304  (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2306  (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2307  Left.is(tok::r_paren)) &&
2308  Line.Type != LT_PreprocessorDirective);
2309  }
2310  if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2311  return false;
2312  if (Right.is(TT_UnaryOperator))
2313  return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2314  (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2315  if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2316  tok::r_paren) ||
2317  Left.isSimpleTypeSpecifier()) &&
2318  Right.is(tok::l_brace) && Right.getNextNonComment() &&
2319  Right.BlockKind != BK_Block)
2320  return false;
2321  if (Left.is(tok::period) || Right.is(tok::period))
2322  return false;
2323  if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2324  return false;
2325  if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2326  Left.MatchingParen->Previous &&
2327  Left.MatchingParen->Previous->is(tok::period))
2328  // A.<B<C<...>>>DoSomething();
2329  return false;
2330  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2331  return false;
2332  return true;
2333 }
2334 
2335 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2336  const FormatToken &Right) {
2337  const FormatToken &Left = *Right.Previous;
2338  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2339  return true; // Never ever merge two identifiers.
2340  if (Style.isCpp()) {
2341  if (Left.is(tok::kw_operator))
2342  return Right.is(tok::coloncolon);
2343  } else if (Style.Language == FormatStyle::LK_Proto ||
2345  if (Right.is(tok::period) &&
2346  Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2347  Keywords.kw_repeated, Keywords.kw_extend))
2348  return true;
2349  if (Right.is(tok::l_paren) &&
2350  Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2351  return true;
2352  if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
2353  return true;
2354  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2355  if (Left.is(TT_JsFatArrow))
2356  return true;
2357  // for await ( ...
2358  if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
2359  Left.Previous->is(tok::kw_for))
2360  return true;
2361  if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
2362  Right.MatchingParen) {
2363  const FormatToken *Next = Right.MatchingParen->getNextNonComment();
2364  // An async arrow function, for example: `x = async () => foo();`,
2365  // as opposed to calling a function called async: `x = async();`
2366  if (Next && Next->is(TT_JsFatArrow))
2367  return true;
2368  }
2369  if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2370  (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2371  return false;
2372  // In tagged template literals ("html`bar baz`"), there is no space between
2373  // the tag identifier and the template string. getIdentifierInfo makes sure
2374  // that the identifier is not a pseudo keyword like `yield`, either.
2375  if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
2376  Right.is(TT_TemplateString))
2377  return false;
2378  if (Right.is(tok::star) &&
2379  Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2380  return false;
2381  if (Right.isOneOf(tok::l_brace, tok::l_square) &&
2382  Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
2383  Keywords.kw_extends, Keywords.kw_implements))
2384  return true;
2385  if (Right.is(tok::l_paren)) {
2386  // JS methods can use some keywords as names (e.g. `delete()`).
2387  if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
2388  return false;
2389  // Valid JS method names can include keywords, e.g. `foo.delete()` or
2390  // `bar.instanceof()`. Recognize call positions by preceding period.
2391  if (Left.Previous && Left.Previous->is(tok::period) &&
2392  Left.Tok.getIdentifierInfo())
2393  return false;
2394  // Additional unary JavaScript operators that need a space after.
2395  if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
2396  tok::kw_void))
2397  return true;
2398  }
2399  if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2400  tok::kw_const) ||
2401  // "of" is only a keyword if it appears after another identifier
2402  // (e.g. as "const x of y" in a for loop), or after a destructuring
2403  // operation (const [x, y] of z, const {a, b} of c).
2404  (Left.is(Keywords.kw_of) && Left.Previous &&
2405  (Left.Previous->Tok.getIdentifierInfo() ||
2406  Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
2407  (!Left.Previous || !Left.Previous->is(tok::period)))
2408  return true;
2409  if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
2410  Left.Previous->is(tok::period) && Right.is(tok::l_paren))
2411  return false;
2412  if (Left.is(Keywords.kw_as) &&
2413  Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
2414  return true;
2415  if (Left.is(tok::kw_default) && Left.Previous &&
2416  Left.Previous->is(tok::kw_export))
2417  return true;
2418  if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2419  return true;
2420  if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2421  return false;
2422  if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2423  return false;
2424  if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2425  Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2426  return false;
2427  if (Left.is(tok::ellipsis))
2428  return false;
2429  if (Left.is(TT_TemplateCloser) &&
2430  !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2431  Keywords.kw_implements, Keywords.kw_extends))
2432  // Type assertions ('<type>expr') are not followed by whitespace. Other
2433  // locations that should have whitespace following are identified by the
2434  // above set of follower tokens.
2435  return false;
2436  if (Right.is(TT_JsNonNullAssertion))
2437  return false;
2438  if (Left.is(TT_JsNonNullAssertion) &&
2439  Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
2440  return true; // "x! as string", "x! in y"
2441  } else if (Style.Language == FormatStyle::LK_Java) {
2442  if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2443  return true;
2444  if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2446  if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2447  tok::kw_protected) ||
2448  Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2449  Keywords.kw_native)) &&
2450  Right.is(TT_TemplateOpener))
2451  return true;
2452  }
2453  if (Left.is(TT_ImplicitStringLiteral))
2454  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2455  if (Line.Type == LT_ObjCMethodDecl) {
2456  if (Left.is(TT_ObjCMethodSpecifier))
2457  return true;
2458  if (Left.is(tok::r_paren) && Right.is(tok::identifier))
2459  // Don't space between ')' and <id>
2460  return false;
2461  }
2462  if (Line.Type == LT_ObjCProperty &&
2463  (Right.is(tok::equal) || Left.is(tok::equal)))
2464  return false;
2465 
2466  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2467  Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2468  return true;
2469  if (Right.is(TT_OverloadedOperatorLParen))
2471  if (Left.is(tok::comma))
2472  return true;
2473  if (Right.is(tok::comma))
2474  return false;
2475  if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
2476  return true;
2477  if (Right.is(tok::colon)) {
2478  if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2479  !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2480  return false;
2481  if (Right.is(TT_ObjCMethodExpr))
2482  return false;
2483  if (Left.is(tok::question))
2484  return false;
2485  if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2486  return false;
2487  if (Right.is(TT_DictLiteral))
2489  return true;
2490  }
2491  if (Left.is(TT_UnaryOperator))
2492  return Right.is(TT_BinaryOperator);
2493 
2494  // If the next token is a binary operator or a selector name, we have
2495  // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2496  if (Left.is(TT_CastRParen))
2497  return Style.SpaceAfterCStyleCast ||
2498  Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2499 
2500  if (Left.is(tok::greater) && Right.is(tok::greater))
2501  return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2503  if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
2504  Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2505  (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
2506  return false;
2508  Right.getPrecedence() == prec::Assignment)
2509  return false;
2510  if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
2511  // Generally don't remove existing spaces between an identifier and "::".
2512  // The identifier might actually be a macro name such as ALWAYS_INLINE. If
2513  // this turns out to be too lenient, add analysis of the identifier itself.
2514  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2515  if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2516  return (Left.is(TT_TemplateOpener) &&
2518  !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
2519  tok::kw___super, TT_TemplateCloser,
2520  TT_TemplateOpener));
2521  if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2522  return Style.SpacesInAngles;
2523  // Space before TT_StructuredBindingLSquare.
2524  if (Right.is(TT_StructuredBindingLSquare))
2525  return !Left.isOneOf(tok::amp, tok::ampamp) ||
2527  // Space before & or && following a TT_StructuredBindingLSquare.
2528  if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
2529  Right.isOneOf(tok::amp, tok::ampamp))
2531  if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2532  (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2533  !Right.is(tok::r_paren)))
2534  return true;
2535  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2536  Right.isNot(TT_FunctionTypeLParen))
2538  if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2539  Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2540  return false;
2541  if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2542  Line.startsWith(tok::hash))
2543  return true;
2544  if (Right.is(TT_TrailingUnaryOperator))
2545  return false;
2546  if (Left.is(TT_RegexLiteral))
2547  return false;
2548  return spaceRequiredBetween(Line, Left, Right);
2549 }
2550 
2551 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2552 static bool isAllmanBrace(const FormatToken &Tok) {
2553  return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2554  !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2555 }
2556 
2557 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2558  const FormatToken &Right) {
2559  const FormatToken &Left = *Right.Previous;
2560  if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2561  return true;
2562 
2564  // FIXME: This might apply to other languages and token kinds.
2565  if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2566  Left.Previous->is(tok::string_literal))
2567  return true;
2568  if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2569  Left.Previous && Left.Previous->is(tok::equal) &&
2570  Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2571  tok::kw_const) &&
2572  // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2573  // above.
2574  !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2575  // Object literals on the top level of a file are treated as "enum-style".
2576  // Each key/value pair is put on a separate line, instead of bin-packing.
2577  return true;
2578  if (Left.is(tok::l_brace) && Line.Level == 0 &&
2579  (Line.startsWith(tok::kw_enum) ||
2580  Line.startsWith(tok::kw_const, tok::kw_enum) ||
2581  Line.startsWith(tok::kw_export, tok::kw_enum) ||
2582  Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
2583  // JavaScript top-level enum key/value pairs are put on separate lines
2584  // instead of bin-packing.
2585  return true;
2586  if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2587  !Left.Children.empty())
2588  // Support AllowShortFunctionsOnASingleLine for JavaScript.
2591  (Left.NestingLevel == 0 && Line.Level == 0 &&
2594  } else if (Style.Language == FormatStyle::LK_Java) {
2595  if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2596  Right.Next->is(tok::string_literal))
2597  return true;
2598  } else if (Style.Language == FormatStyle::LK_Cpp ||
2601  if (Left.isStringLiteral() && Right.isStringLiteral())
2602  return true;
2603  }
2604 
2605  // If the last token before a '}', ']', or ')' is a comma or a trailing
2606  // comment, the intention is to insert a line break after it in order to make
2607  // shuffling around entries easier. Import statements, especially in
2608  // JavaScript, can be an exception to this rule.
2610  const FormatToken *BeforeClosingBrace = nullptr;
2611  if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
2613  Left.is(tok::l_paren))) &&
2614  Left.BlockKind != BK_Block && Left.MatchingParen)
2615  BeforeClosingBrace = Left.MatchingParen->Previous;
2616  else if (Right.MatchingParen &&
2617  (Right.MatchingParen->isOneOf(tok::l_brace,
2618  TT_ArrayInitializerLSquare) ||
2620  Right.MatchingParen->is(tok::l_paren))))
2621  BeforeClosingBrace = &Left;
2622  if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2623  BeforeClosingBrace->isTrailingComment()))
2624  return true;
2625  }
2626 
2627  if (Right.is(tok::comment))
2628  return Left.BlockKind != BK_BracedInit &&
2629  Left.isNot(TT_CtorInitializerColon) &&
2630  (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2631  if (Left.isTrailingComment())
2632  return true;
2633  if (Right.Previous->IsUnterminatedLiteral)
2634  return true;
2635  if (Right.is(tok::lessless) && Right.Next &&
2636  Right.Previous->is(tok::string_literal) &&
2637  Right.Next->is(tok::string_literal))
2638  return true;
2639  if (Right.Previous->ClosesTemplateDeclaration &&
2640  Right.Previous->MatchingParen &&
2641  Right.Previous->MatchingParen->NestingLevel == 0 &&
2643  return true;
2644  if (Right.is(TT_CtorInitializerComma) &&
2647  return true;
2648  if (Right.is(TT_CtorInitializerColon) &&
2651  return true;
2652  // Break only if we have multiple inheritance.
2653  if (Style.BreakBeforeInheritanceComma && Right.is(TT_InheritanceComma))
2654  return true;
2655  if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2656  // Raw string literals are special wrt. line breaks. The author has made a
2657  // deliberate choice and might have aligned the contents of the string
2658  // literal accordingly. Thus, we try keep existing line breaks.
2659  return Right.NewlinesBefore > 0;
2660  if ((Right.Previous->is(tok::l_brace) ||
2661  (Right.Previous->is(tok::less) && Right.Previous->Previous &&
2662  Right.Previous->Previous->is(tok::equal))) &&
2663  Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
2664  // Don't put enums or option definitions onto single lines in protocol
2665  // buffers.
2666  return true;
2667  }
2668  if (Right.is(TT_InlineASMBrace))
2669  return Right.HasUnescapedNewline;
2670  if (isAllmanBrace(Left) || isAllmanBrace(Right))
2671  return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2672  (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
2674  (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2675  (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2676  if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2677  return true;
2678 
2681  Left.is(TT_LeadingJavaAnnotation) &&
2682  Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2683  (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2684  return true;
2685 
2686  return false;
2687 }
2688 
2689 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
2690  const FormatToken &Right) {
2691  const FormatToken &Left = *Right.Previous;
2692 
2693  // Language-specific stuff.
2695  if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2696  Keywords.kw_implements))
2697  return false;
2698  if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2699  Keywords.kw_implements))
2700  return true;
2701  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2702  const FormatToken *NonComment = Right.getPreviousNonComment();
2703  if (NonComment &&
2704  NonComment->isOneOf(
2705  tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
2706  tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
2707  tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
2708  Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
2709  Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
2710  return false; // Otherwise automatic semicolon insertion would trigger.
2711  if (Left.Tok.getIdentifierInfo() &&
2712  Right.startsSequence(tok::l_square, tok::r_square))
2713  return false; // breaking in "foo[]" creates illegal TS type syntax.
2714  if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
2715  return false;
2716  if (Left.is(TT_JsTypeColon))
2717  return true;
2718  if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
2719  return false;
2720  if (Left.is(Keywords.kw_in))
2722  if (Right.is(Keywords.kw_in))
2724  if (Right.is(Keywords.kw_as))
2725  return false; // must not break before as in 'x as type' casts
2726  if (Left.is(Keywords.kw_as))
2727  return true;
2728  if (Left.is(TT_JsNonNullAssertion))
2729  return true;
2730  if (Left.is(Keywords.kw_declare) &&
2731  Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
2732  Keywords.kw_function, tok::kw_class, tok::kw_enum,
2733  Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
2734  Keywords.kw_let, tok::kw_const))
2735  // See grammar for 'declare' statements at:
2736  // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
2737  return false;
2738  if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
2739  Right.isOneOf(tok::identifier, tok::string_literal))
2740  return false; // must not break in "module foo { ...}"
2741  if (Right.is(TT_TemplateString) && Right.closesScope())
2742  return false;
2743  if (Left.is(TT_TemplateString) && Left.opensScope())
2744  return true;
2745  }
2746 
2747  if (Left.is(tok::at))
2748  return false;
2749  if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
2750  return false;
2751  if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
2752  return !Right.is(tok::l_paren);
2753  if (Right.is(TT_PointerOrReference))
2754  return Line.IsMultiVariableDeclStmt ||
2756  (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
2757  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2758  Right.is(tok::kw_operator))
2759  return true;
2760  if (Left.is(TT_PointerOrReference))
2761  return false;
2762  if (Right.isTrailingComment())
2763  // We rely on MustBreakBefore being set correctly here as we should not
2764  // change the "binding" behavior of a comment.
2765  // The first comment in a braced lists is always interpreted as belonging to
2766  // the first list element. Otherwise, it should be placed outside of the
2767  // list.
2768  return Left.BlockKind == BK_BracedInit ||
2769  (Left.is(TT_CtorInitializerColon) &&
2771  if (Left.is(tok::question) && Right.is(tok::colon))
2772  return false;
2773  if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
2775  if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
2777  if (Right.is(TT_InheritanceColon))
2778  return true;
2779  if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
2780  Left.isNot(TT_SelectorName))
2781  return true;
2782  if (Right.is(tok::colon) &&
2783  !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
2784  return false;
2785  if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))
2786  return true;
2787  if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
2788  Right.Next->is(TT_ObjCMethodExpr)))
2789  return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
2790  if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
2791  return true;
2792  if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
2793  return true;
2794  if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
2795  TT_OverloadedOperator))
2796  return false;
2797  if (Left.is(TT_RangeBasedForLoopColon))
2798  return true;
2799  if (Right.is(TT_RangeBasedForLoopColon))
2800  return false;
2801  if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
2802  return true;
2803  if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
2804  Left.is(tok::kw_operator))
2805  return false;
2806  if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
2807  Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
2808  return false;
2809  if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
2810  return false;
2811  if (Left.is(tok::l_paren) && Left.Previous &&
2812  (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
2813  return false;
2814  if (Right.is(TT_ImplicitStringLiteral))
2815  return false;
2816 
2817  if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
2818  return false;
2819  if (Right.is(tok::r_square) && Right.MatchingParen &&
2820  Right.MatchingParen->is(TT_LambdaLSquare))
2821  return false;
2822 
2823  // We only break before r_brace if there was a corresponding break before
2824  // the l_brace, which is tracked by BreakBeforeClosingBrace.
2825  if (Right.is(tok::r_brace))
2826  return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
2827 
2828  // Allow breaking after a trailing annotation, e.g. after a method
2829  // declaration.
2830  if (Left.is(TT_TrailingAnnotation))
2831  return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
2832  tok::less, tok::coloncolon);
2833 
2834  if (Right.is(tok::kw___attribute))
2835  return true;
2836 
2837  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
2838  return true;
2839 
2840  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2841  return true;
2842 
2843  if (Left.is(TT_CtorInitializerColon))
2845  if (Right.is(TT_CtorInitializerColon))
2847  if (Left.is(TT_CtorInitializerComma) &&
2849  return false;
2850  if (Right.is(TT_CtorInitializerComma) &&
2852  return true;
2853  if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
2854  return false;
2855  if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
2856  return true;
2857  if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
2858  (Left.is(tok::less) && Right.is(tok::less)))
2859  return false;
2860  if (Right.is(TT_BinaryOperator) &&
2863  Right.getPrecedence() != prec::Assignment))
2864  return true;
2865  if (Left.is(TT_ArrayInitializerLSquare))
2866  return true;
2867  if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
2868  return true;
2869  if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
2870  !Left.isOneOf(tok::arrowstar, tok::lessless) &&
2873  Left.getPrecedence() == prec::Assignment))
2874  return true;
2875  return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
2876  tok::kw_class, tok::kw_struct, tok::comment) ||
2877  Right.isMemberAccess() ||
2878  Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
2879  tok::colon, tok::l_square, tok::at) ||
2880  (Left.is(tok::r_paren) &&
2881  Right.isOneOf(tok::identifier, tok::kw_const)) ||
2882  (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
2883  (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
2884 }
2885 
2886 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
2887  llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
2888  const FormatToken *Tok = Line.First;
2889  while (Tok) {
2890  llvm::errs() << " M=" << Tok->MustBreakBefore
2891  << " C=" << Tok->CanBreakBefore
2892  << " T=" << getTokenTypeName(Tok->Type)
2893  << " S=" << Tok->SpacesRequiredBefore
2894  << " B=" << Tok->BlockParameterCount
2895  << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
2896  << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
2897  << " PPK=" << Tok->PackingKind << " FakeLParens=";
2898  for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
2899  llvm::errs() << Tok->FakeLParens[i] << "/";
2900  llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
2901  llvm::errs() << " Text='" << Tok->TokenText << "'\n";
2902  if (!Tok->Next)
2903  assert(Tok == Line.Last);
2904  Tok = Tok->Next;
2905  }
2906  llvm::errs() << "----\n";
2907 }
2908 
2909 } // namespace format
2910 } // namespace clang
Always break after the return type of top-level definitions.
Definition: Format.h:328
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers...
Definition: FormatToken.h:325
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:228
unsigned PenaltyBreakBeforeFirstCallParameter
The penalty for breaking a function call after call(.
Definition: Format.h:1324
Token Tok
The Token.
Definition: FormatToken.h:124
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1209
Defines the SourceManager interface.
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token&#39;s formatting...
Definition: FormatToken.h:204
Break constructor initializers before the colon and commas, and align the commas with the colon...
Definition: Format.h:810
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:215
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
bool isMemberAccess() const
Returns true if this is a "." or "->" accessing a member.
Definition: FormatToken.h:367
bool isFunctionLikeKeyword() const
Returns true if this is a keyword that can be used like a function call (e.g.
Definition: FormatToken.h:401
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:456
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
bool SpaceAfterTemplateKeyword
If true, a space will be inserted after the &#39;template&#39; keyword.
Definition: Format.h:1449
PointerAlignmentStyle PointerAlignment
Pointer and reference alignment style.
Definition: Format.h:1362
Align pointer to the left.
Definition: Format.h:1348
Should be used for C, C++.
Definition: Format.h:1200
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:153
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:621
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:211
bool isBinaryOperator() const
Definition: FormatToken.h:389
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:130
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:283
Break after operators.
Definition: Format.h:406
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool CanBeExpression
tok::TokenKind ContextKind
unsigned UnbreakableTailLength
The length of following tokens until the next natural split point, or the next token that can be brok...
Definition: FormatToken.h:219
bool closesScope() const
Returns whether Tok is )]} or a template closing >.
Definition: FormatToken.h:359
bool JavaScriptWrapImports
Whether to wrap JavaScript import/export statements.
Definition: Format.h:1179
Always break after the return type.
Definition: Format.h:288
unsigned SplitPenalty
Penalty for inserting a line break before this token.
Definition: FormatToken.h:234
bool ColonIsForRangeExpr
prec::Level getPrecedence() const
Definition: FormatToken.h:443
unsigned ParameterCount
Number of parameters, if this is "(", "[" or "<".
Definition: FormatToken.h:192
unsigned FakeRParens
Insert this many fake ) after this token for correct indentation.
Definition: FormatToken.h:250
bool SpaceInEmptyParentheses
If true, spaces may be inserted into ().
Definition: Format.h:1507
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:182
Should be used for Java.
Definition: Format.h:1202
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:280
Always break after the return type of function definitions.
Definition: Format.h:316
Only merge functions defined inside a class.
Definition: Format.h:207
This file implements a token annotator, i.e.
bool BreakAfterJavaFieldAnnotations
Break after each annotation on a field in Java files.
Definition: Format.h:830
bool ConstructorInitializerAllOnOneLineOrOnePerLine
If the constructor initializers don&#39;t fit on a line, put each initializer on its own line...
Definition: Format.h:901
unsigned OperatorIndex
Is this is an operator (or "."/"->") in a sequence of operators with the same precedence, contains the 0-based operator index.
Definition: FormatToken.h:260
unsigned SpacesRequiredBefore
The number of spaces that should be inserted before this token.
Definition: FormatToken.h:179
bool isNot(T Kind) const
Definition: FormatToken.h:313
bool SpacesInParentheses
If true, spaces will be inserted after ( and before ).
Definition: Format.h:1555
const FormatToken & Tok
ReturnTypeBreakingStyle AlwaysBreakAfterReturnType
The function declaration return type breaking style to use.
Definition: Format.h:336
unsigned BlockParameterCount
Number of parameters that are nested blocks, if this is "(", "[" or "<".
Definition: FormatToken.h:196
bool InInheritanceList
bool SpaceBeforeAssignmentOperators
If false, spaces will be removed before assignment operators.
Definition: Format.h:1457
SpaceBeforeParensOptions SpaceBeforeParens
Defines in which cases to put a space before opening parentheses.
Definition: Format.h:1495
unsigned PenaltyBreakAssignment
The penalty for breaking around an assignment operator.
Definition: Format.h:1321
const char * getName() const
Definition: Token.h:166
Always put a space before opening parentheses, except when it&#39;s prohibited by the syntax rules (in fu...
Definition: Format.h:1491
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
unsigned ColumnLimit
The column limit.
Definition: Format.h:840
Never merge functions into a single line.
Definition: Format.h:193
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:448
AnnotatingParser & P
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:77
NodeId Parent
Definition: ASTDiff.cpp:192
bool isLabelString() const
Returns true if this is a string literal that&#39;s like a label, e.g.
Definition: FormatToken.h:421
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:306
Break after return type automatically.
Definition: Format.h:272
Only merge empty functions.
Definition: Format.h:215
Should be used for JavaScript.
Definition: Format.h:1204
const AnnotatedLine * Line
StateNode * Previous
SmallVector< AnnotatedLine *, 0 > Children
ParameterPackingKind PackingKind
If this is an opening parenthesis, how are the parameters packed?
Definition: FormatToken.h:207
bool SpacesInContainerLiterals
If true, spaces are inserted inside container literals (e.g.
Definition: Format.h:1541
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:778
bool SpacesInAngles
If true, spaces will be inserted after < and before > in template argument lists. ...
Definition: Format.h:1532
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:120
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine *> &Lines)
Adapts the indent levels of comment lines to the indent of the subsequent line.
static unsigned maxNestingDepth(const AnnotatedLine &Line)
FormatToken * FirstStartOfName
SourceLocation getEnd() const
bool isTrailingComment() const
Definition: FormatToken.h:394
void annotate(AnnotatedLine &Line)
Don&#39;t align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:63
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:25
static bool isAllmanBrace(const FormatToken &Tok)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:297
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Break constructor initializers after the colon and commas.
Definition: Format.h:817
bool BreakBeforeTernaryOperators
If true, ternary operators will be placed after line breaks.
Definition: Format.h:792
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:137
bool AlwaysBreakTemplateDeclarations
If true, always break after the template<...> of a template declaration.
Definition: Format.h:359
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool SpacesInCStyleCastParentheses
If true, spaces may be inserted into C style casts.
Definition: Format.h:1548
bool IsUnterminatedLiteral
Set to true if this token is an unterminated literal.
Definition: FormatToken.h:171
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
bool SpacesInSquareBrackets
If true, spaces will be inserted after [ and before ].
Definition: Format.h:1564
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:168
SmallVector< prec::Level, 4 > FakeLParens
Stores the number of required fake parentheses and the corresponding operator precedence.
Definition: FormatToken.h:248
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
Never put a space before opening parentheses.
Definition: Format.h:1469
unsigned PenaltyReturnTypeOnItsOwnLine
Penalty for putting the return type of a function onto its own line.
Definition: Format.h:1340
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:231
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1219
bool ColonIsObjCMethodExpr
Dataflow Directional Tag Classes.
ShortFunctionStyle AllowShortFunctionsOnASingleLine
Dependent on the value, int f() { return 0; } can be put on a single line.
Definition: Format.h:239
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:146
Use C++03-compatible syntax.
Definition: Format.h:1569
Always break after the return types of top-level functions.
Definition: Format.h:301
bool IsExpression
Use features of C++11, C++14 and C++1z (e.g.
Definition: Format.h:1572
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:937
static bool isFunctionDeclarationName(const FormatToken &Current, const AnnotatedLine &Line)
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:669
void calculateFormattingInformation(AnnotatedLine &Line)
bool SpaceAfterCStyleCast
If true, a space is inserted after C style casts.
Definition: Format.h:1442
bool BreakBeforeInheritanceComma
If true, in the class inheritance expression clang-format will break before : and ...
Definition: Format.h:860
Should be used for Objective-C, Objective-C++.
Definition: Format.h:1206
LanguageStandard Standard
Format compatible with this standard, e.g.
Definition: Format.h:1579
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, points to the next operator.
Definition: FormatToken.h:264
BreakConstructorInitializersStyle BreakConstructorInitializers
The constructor initializers style to use.
Definition: Format.h:821
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
unsigned BindingStrength
bool ClosesTemplateDeclaration
true if this is the ">" of "template<..>".
Definition: FormatToken.h:185
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:277
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:287
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:318
FormatToken * FirstObjCSelectorName
bool InTemplateArgument
bool ColonIsDictLiteral
The parameter type of a method or function.
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:434
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.h:465
Break before operators.
Definition: Format.h:430
bool ObjCSpaceBeforeProtocolList
Add a space in front of an Objective-C protocol list, i.e.
Definition: Format.h:1318
unsigned SpacesBeforeTrailingComments
The number of spaces before trailing line comments (// - comments).
Definition: Format.h:1523
bool opensScope() const
Returns whether Tok is ([{ or a template opening <.
Definition: FormatToken.h:352
bool AllowShortBlocksOnASingleLine
Allows contracting simple braced statements to a single line.
Definition: Format.h:174
unsigned LongestObjCSelectorName
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:162
bool AfterClass
Wrap class definitions.
Definition: Format.h:591
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1214
prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus11)
Return the precedence of the specified binary operator token.
Align pointer to the right.
Definition: Format.h:1353
unsigned BindingStrength
The binding strength of a token.
Definition: FormatToken.h:224
bool CaretFound
bool InCtorInitializer
bool isStringLiteral() const
Definition: FormatToken.h:329
SourceLocation getBegin() const
unsigned MaxEmptyLinesToKeep
The maximum number of consecutive empty lines to keep.
Definition: Format.h:1263
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:134
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:174
bool PartOfMultiVariableDeclStmt
Is this token part of a DeclStmt defining multiple variables?
Definition: FormatToken.h:269
bool IsForEachMacro
const FormatStyle & Style