clang  8.0.0svn
TokenAnnotator.cpp
Go to the documentation of this file.
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// Returns \c true if the token can be used as an identifier in
29 /// an Objective-C \c @selector, \c false otherwise.
30 ///
31 /// Because getFormattingLangOpts() always lexes source code as
32 /// Objective-C++, C++ keywords like \c new and \c delete are
33 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
34 ///
35 /// For Objective-C and Objective-C++, both identifiers and keywords
36 /// are valid inside @selector(...) (or a macro which
37 /// invokes @selector(...)). So, we allow treat any identifier or
38 /// keyword as a potential Objective-C selector component.
39 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
40  return Tok.Tok.getIdentifierInfo() != nullptr;
41 }
42 
43 /// A parser that gathers additional information about tokens.
44 ///
45 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
46 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
47 /// into template parameter lists.
48 class AnnotatingParser {
49 public:
50  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
51  const AdditionalKeywords &Keywords)
52  : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
53  Keywords(Keywords) {
54  Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
55  resetTokenMetadata(CurrentToken);
56  }
57 
58 private:
59  bool parseAngle() {
60  if (!CurrentToken || !CurrentToken->Previous)
61  return false;
62  if (NonTemplateLess.count(CurrentToken->Previous))
63  return false;
64 
65  const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
66  if (Previous.Previous) {
67  if (Previous.Previous->Tok.isLiteral())
68  return false;
69  if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
70  (!Previous.Previous->MatchingParen ||
71  !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
72  return false;
73  }
74 
75  FormatToken *Left = CurrentToken->Previous;
76  Left->ParentBracket = Contexts.back().ContextKind;
77  ScopedContextCreator ContextCreator(*this, tok::less, 12);
78 
79  // If this angle is in the context of an expression, we need to be more
80  // hesitant to detect it as opening template parameters.
81  bool InExprContext = Contexts.back().IsExpression;
82 
83  Contexts.back().IsExpression = false;
84  // If there's a template keyword before the opening angle bracket, this is a
85  // template parameter, not an argument.
86  Contexts.back().InTemplateArgument =
87  Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
88 
89  if (Style.Language == FormatStyle::LK_Java &&
90  CurrentToken->is(tok::question))
91  next();
92 
93  while (CurrentToken) {
94  if (CurrentToken->is(tok::greater)) {
95  Left->MatchingParen = CurrentToken;
96  CurrentToken->MatchingParen = Left;
97  // In TT_Proto, we must distignuish between:
98  // map<key, value>
99  // msg < item: data >
100  // msg: < item: data >
101  // In TT_TextProto, map<key, value> does not occur.
102  if (Style.Language == FormatStyle::LK_TextProto ||
103  (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
104  Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
105  CurrentToken->Type = TT_DictLiteral;
106  else
107  CurrentToken->Type = TT_TemplateCloser;
108  next();
109  return true;
110  }
111  if (CurrentToken->is(tok::question) &&
112  Style.Language == FormatStyle::LK_Java) {
113  next();
114  continue;
115  }
116  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
117  (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
118  Style.Language != FormatStyle::LK_Proto &&
119  Style.Language != FormatStyle::LK_TextProto))
120  return false;
121  // If a && or || is found and interpreted as a binary operator, this set
122  // of angles is likely part of something like "a < b && c > d". If the
123  // angles are inside an expression, the ||/&& might also be a binary
124  // operator that was misinterpreted because we are parsing template
125  // parameters.
126  // FIXME: This is getting out of hand, write a decent parser.
127  if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
128  CurrentToken->Previous->is(TT_BinaryOperator) &&
129  Contexts[Contexts.size() - 2].IsExpression &&
130  !Line.startsWith(tok::kw_template))
131  return false;
132  updateParameterCount(Left, CurrentToken);
133  if (Style.Language == FormatStyle::LK_Proto) {
134  if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
135  if (CurrentToken->is(tok::colon) ||
136  (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
137  Previous->isNot(tok::colon)))
138  Previous->Type = TT_SelectorName;
139  }
140  }
141  if (!consumeToken())
142  return false;
143  }
144  return false;
145  }
146 
147  bool parseParens(bool LookForDecls = false) {
148  if (!CurrentToken)
149  return false;
150  FormatToken *Left = CurrentToken->Previous;
151  Left->ParentBracket = Contexts.back().ContextKind;
152  ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
153 
154  // FIXME: This is a bit of a hack. Do better.
155  Contexts.back().ColonIsForRangeExpr =
156  Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
157 
158  bool StartsObjCMethodExpr = false;
159  if (FormatToken *MaybeSel = Left->Previous) {
160  // @selector( starts a selector.
161  if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
162  MaybeSel->Previous->is(tok::at)) {
163  StartsObjCMethodExpr = true;
164  }
165  }
166 
167  if (Left->is(TT_OverloadedOperatorLParen)) {
168  Contexts.back().IsExpression = false;
169  } else if (Style.Language == FormatStyle::LK_JavaScript &&
170  (Line.startsWith(Keywords.kw_type, tok::identifier) ||
171  Line.startsWith(tok::kw_export, Keywords.kw_type,
172  tok::identifier))) {
173  // type X = (...);
174  // export type X = (...);
175  Contexts.back().IsExpression = false;
176  } else if (Left->Previous &&
177  (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
178  tok::kw_if, tok::kw_while, tok::l_paren,
179  tok::comma) ||
180  Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
181  Left->Previous->is(TT_BinaryOperator))) {
182  // static_assert, if and while usually contain expressions.
183  Contexts.back().IsExpression = true;
184  } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
185  (Left->Previous->is(Keywords.kw_function) ||
186  (Left->Previous->endsSequence(tok::identifier,
187  Keywords.kw_function)))) {
188  // function(...) or function f(...)
189  Contexts.back().IsExpression = false;
190  } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
191  Left->Previous->is(TT_JsTypeColon)) {
192  // let x: (SomeType);
193  Contexts.back().IsExpression = false;
194  } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
195  Left->Previous->MatchingParen &&
196  Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
197  // This is a parameter list of a lambda expression.
198  Contexts.back().IsExpression = false;
199  } else if (Line.InPPDirective &&
200  (!Left->Previous || !Left->Previous->is(tok::identifier))) {
201  Contexts.back().IsExpression = true;
202  } else if (Contexts[Contexts.size() - 2].CaretFound) {
203  // This is the parameter list of an ObjC block.
204  Contexts.back().IsExpression = false;
205  } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
206  Left->Type = TT_AttributeParen;
207  } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
208  // The first argument to a foreach macro is a declaration.
209  Contexts.back().IsForEachMacro = true;
210  Contexts.back().IsExpression = false;
211  } else if (Left->Previous && Left->Previous->MatchingParen &&
212  Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
213  Contexts.back().IsExpression = false;
214  } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
215  bool IsForOrCatch =
216  Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
217  Contexts.back().IsExpression = !IsForOrCatch;
218  }
219 
220  if (StartsObjCMethodExpr) {
221  Contexts.back().ColonIsObjCMethodExpr = true;
222  Left->Type = TT_ObjCMethodExpr;
223  }
224 
225  // MightBeFunctionType and ProbablyFunctionType are used for
226  // function pointer and reference types as well as Objective-C
227  // block types:
228  //
229  // void (*FunctionPointer)(void);
230  // void (&FunctionReference)(void);
231  // void (^ObjCBlock)(void);
232  bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
233  bool ProbablyFunctionType =
234  CurrentToken->isOneOf(tok::star, tok::amp, tok::caret);
235  bool HasMultipleLines = false;
236  bool HasMultipleParametersOnALine = false;
237  bool MightBeObjCForRangeLoop =
238  Left->Previous && Left->Previous->is(tok::kw_for);
239  FormatToken *PossibleObjCForInToken = nullptr;
240  while (CurrentToken) {
241  // LookForDecls is set when "if (" has been seen. Check for
242  // 'identifier' '*' 'identifier' followed by not '=' -- this
243  // '*' has to be a binary operator but determineStarAmpUsage() will
244  // categorize it as an unary operator, so set the right type here.
245  if (LookForDecls && CurrentToken->Next) {
246  FormatToken *Prev = CurrentToken->getPreviousNonComment();
247  if (Prev) {
248  FormatToken *PrevPrev = Prev->getPreviousNonComment();
249  FormatToken *Next = CurrentToken->Next;
250  if (PrevPrev && PrevPrev->is(tok::identifier) &&
251  Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
252  CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
253  Prev->Type = TT_BinaryOperator;
254  LookForDecls = false;
255  }
256  }
257  }
258 
259  if (CurrentToken->Previous->is(TT_PointerOrReference) &&
260  CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
261  tok::coloncolon))
262  ProbablyFunctionType = true;
263  if (CurrentToken->is(tok::comma))
264  MightBeFunctionType = false;
265  if (CurrentToken->Previous->is(TT_BinaryOperator))
266  Contexts.back().IsExpression = true;
267  if (CurrentToken->is(tok::r_paren)) {
268  if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
269  (CurrentToken->Next->is(tok::l_paren) ||
270  (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
271  Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen
272  : TT_FunctionTypeLParen;
273  Left->MatchingParen = CurrentToken;
274  CurrentToken->MatchingParen = Left;
275 
276  if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
277  Left->Previous && Left->Previous->is(tok::l_paren)) {
278  // Detect the case where macros are used to generate lambdas or
279  // function bodies, e.g.:
280  // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
281  for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
282  if (Tok->is(TT_BinaryOperator) &&
283  Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
284  Tok->Type = TT_PointerOrReference;
285  }
286  }
287 
288  if (StartsObjCMethodExpr) {
289  CurrentToken->Type = TT_ObjCMethodExpr;
290  if (Contexts.back().FirstObjCSelectorName) {
291  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
292  Contexts.back().LongestObjCSelectorName;
293  }
294  }
295 
296  if (Left->is(TT_AttributeParen))
297  CurrentToken->Type = TT_AttributeParen;
298  if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
299  CurrentToken->Type = TT_JavaAnnotation;
300  if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
301  CurrentToken->Type = TT_LeadingJavaAnnotation;
302 
303  if (!HasMultipleLines)
304  Left->PackingKind = PPK_Inconclusive;
305  else if (HasMultipleParametersOnALine)
306  Left->PackingKind = PPK_BinPacked;
307  else
308  Left->PackingKind = PPK_OnePerLine;
309 
310  next();
311  return true;
312  }
313  if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
314  return false;
315 
316  if (CurrentToken->is(tok::l_brace))
317  Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
318  if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
319  !CurrentToken->Next->HasUnescapedNewline &&
320  !CurrentToken->Next->isTrailingComment())
321  HasMultipleParametersOnALine = true;
322  if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
323  CurrentToken->Previous->isSimpleTypeSpecifier()) &&
324  !CurrentToken->is(tok::l_brace))
325  Contexts.back().IsExpression = false;
326  if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
327  MightBeObjCForRangeLoop = false;
328  if (PossibleObjCForInToken) {
329  PossibleObjCForInToken->Type = TT_Unknown;
330  PossibleObjCForInToken = nullptr;
331  }
332  }
333  if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
334  PossibleObjCForInToken = CurrentToken;
335  PossibleObjCForInToken->Type = TT_ObjCForIn;
336  }
337  // When we discover a 'new', we set CanBeExpression to 'false' in order to
338  // parse the type correctly. Reset that after a comma.
339  if (CurrentToken->is(tok::comma))
340  Contexts.back().CanBeExpression = true;
341 
342  FormatToken *Tok = CurrentToken;
343  if (!consumeToken())
344  return false;
345  updateParameterCount(Left, Tok);
346  if (CurrentToken && CurrentToken->HasUnescapedNewline)
347  HasMultipleLines = true;
348  }
349  return false;
350  }
351 
352  bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
353  if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
354  return false;
355  const FormatToken *AttrTok = Tok.Next->Next;
356  if (!AttrTok)
357  return false;
358  // C++17 '[[using ns: foo, bar(baz, blech)]]'
359  // We assume nobody will name an ObjC variable 'using'.
360  if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
361  return true;
362  if (AttrTok->isNot(tok::identifier))
363  return false;
364  while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
365  // ObjC message send. We assume nobody will use : in a C++11 attribute
366  // specifier parameter, although this is technically valid:
367  // [[foo(:)]]
368  if (AttrTok->is(tok::colon) ||
369  AttrTok->startsSequence(tok::identifier, tok::identifier))
370  return false;
371  if (AttrTok->is(tok::ellipsis))
372  return true;
373  AttrTok = AttrTok->Next;
374  }
375  return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
376  }
377 
378  bool parseSquare() {
379  if (!CurrentToken)
380  return false;
381 
382  // A '[' could be an index subscript (after an identifier or after
383  // ')' or ']'), it could be the start of an Objective-C method
384  // expression, it could the start of an Objective-C array literal,
385  // or it could be a C++ attribute specifier [[foo::bar]].
386  FormatToken *Left = CurrentToken->Previous;
387  Left->ParentBracket = Contexts.back().ContextKind;
388  FormatToken *Parent = Left->getPreviousNonComment();
389 
390  // Cases where '>' is followed by '['.
391  // In C++, this can happen either in array of templates (foo<int>[10])
392  // or when array is a nested template type (unique_ptr<type1<type2>[]>).
393  bool CppArrayTemplates =
394  Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
395  (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
396  Contexts.back().InTemplateArgument);
397 
398  bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
399  Contexts.back().InCpp11AttributeSpecifier;
400 
401  bool StartsObjCMethodExpr =
402  !CppArrayTemplates && Style.isCpp() && !IsCpp11AttributeSpecifier &&
403  Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
404  !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
405  (!Parent ||
406  Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
407  tok::kw_return, tok::kw_throw) ||
408  Parent->isUnaryOperator() ||
409  // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
410  Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
411  getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
412  bool ColonFound = false;
413 
414  unsigned BindingIncrease = 1;
415  if (Left->isCppStructuredBinding(Style)) {
416  Left->Type = TT_StructuredBindingLSquare;
417  } else if (Left->is(TT_Unknown)) {
418  if (StartsObjCMethodExpr) {
419  Left->Type = TT_ObjCMethodExpr;
420  } else if (IsCpp11AttributeSpecifier) {
421  Left->Type = TT_AttributeSquare;
422  } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
423  Contexts.back().ContextKind == tok::l_brace &&
424  Parent->isOneOf(tok::l_brace, tok::comma)) {
425  Left->Type = TT_JsComputedPropertyName;
426  } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
427  Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
428  Left->Type = TT_DesignatedInitializerLSquare;
429  } else if (CurrentToken->is(tok::r_square) && Parent &&
430  Parent->is(TT_TemplateCloser)) {
431  Left->Type = TT_ArraySubscriptLSquare;
432  } else if (Style.Language == FormatStyle::LK_Proto ||
433  Style.Language == FormatStyle::LK_TextProto) {
434  // Square braces in LK_Proto can either be message field attributes:
435  //
436  // optional Aaa aaa = 1 [
437  // (aaa) = aaa
438  // ];
439  //
440  // extensions 123 [
441  // (aaa) = aaa
442  // ];
443  //
444  // or text proto extensions (in options):
445  //
446  // option (Aaa.options) = {
447  // [type.type/type] {
448  // key: value
449  // }
450  // }
451  //
452  // or repeated fields (in options):
453  //
454  // option (Aaa.options) = {
455  // keys: [ 1, 2, 3 ]
456  // }
457  //
458  // In the first and the third case we want to spread the contents inside
459  // the square braces; in the second we want to keep them inline.
460  Left->Type = TT_ArrayInitializerLSquare;
461  if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
462  tok::equal) &&
463  !Left->endsSequence(tok::l_square, tok::numeric_constant,
464  tok::identifier) &&
465  !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
466  Left->Type = TT_ProtoExtensionLSquare;
467  BindingIncrease = 10;
468  }
469  } else if (!CppArrayTemplates && Parent &&
470  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
471  tok::comma, tok::l_paren, tok::l_square,
472  tok::question, tok::colon, tok::kw_return,
473  // Should only be relevant to JavaScript:
474  tok::kw_default)) {
475  Left->Type = TT_ArrayInitializerLSquare;
476  } else {
477  BindingIncrease = 10;
478  Left->Type = TT_ArraySubscriptLSquare;
479  }
480  }
481 
482  ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
483  Contexts.back().IsExpression = true;
484  if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
485  Parent->is(TT_JsTypeColon))
486  Contexts.back().IsExpression = false;
487 
488  Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
489  Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
490 
491  while (CurrentToken) {
492  if (CurrentToken->is(tok::r_square)) {
493  if (IsCpp11AttributeSpecifier)
494  CurrentToken->Type = TT_AttributeSquare;
495  else if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
496  Left->is(TT_ObjCMethodExpr)) {
497  // An ObjC method call is rarely followed by an open parenthesis.
498  // FIXME: Do we incorrectly label ":" with this?
499  StartsObjCMethodExpr = false;
500  Left->Type = TT_Unknown;
501  }
502  if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
503  CurrentToken->Type = TT_ObjCMethodExpr;
504  // If we haven't seen a colon yet, make sure the last identifier
505  // before the r_square is tagged as a selector name component.
506  if (!ColonFound && CurrentToken->Previous &&
507  CurrentToken->Previous->is(TT_Unknown) &&
508  canBeObjCSelectorComponent(*CurrentToken->Previous))
509  CurrentToken->Previous->Type = TT_SelectorName;
510  // determineStarAmpUsage() thinks that '*' '[' is allocating an
511  // array of pointers, but if '[' starts a selector then '*' is a
512  // binary operator.
513  if (Parent && Parent->is(TT_PointerOrReference))
514  Parent->Type = TT_BinaryOperator;
515  }
516  Left->MatchingParen = CurrentToken;
517  CurrentToken->MatchingParen = Left;
518  // FirstObjCSelectorName is set when a colon is found. This does
519  // not work, however, when the method has no parameters.
520  // Here, we set FirstObjCSelectorName when the end of the method call is
521  // reached, in case it was not set already.
522  if (!Contexts.back().FirstObjCSelectorName) {
523  FormatToken* Previous = CurrentToken->getPreviousNonComment();
524  if (Previous && Previous->is(TT_SelectorName)) {
525  Previous->ObjCSelectorNameParts = 1;
526  Contexts.back().FirstObjCSelectorName = Previous;
527  }
528  } else {
529  Left->ParameterCount =
530  Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
531  }
532  if (Contexts.back().FirstObjCSelectorName) {
533  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
534  Contexts.back().LongestObjCSelectorName;
535  if (Left->BlockParameterCount > 1)
536  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
537  }
538  next();
539  return true;
540  }
541  if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
542  return false;
543  if (CurrentToken->is(tok::colon)) {
544  if (IsCpp11AttributeSpecifier &&
545  CurrentToken->endsSequence(tok::colon, tok::identifier,
546  tok::kw_using)) {
547  // Remember that this is a [[using ns: foo]] C++ attribute, so we
548  // don't add a space before the colon (unlike other colons).
549  CurrentToken->Type = TT_AttributeColon;
550  } else if (Left->isOneOf(TT_ArraySubscriptLSquare,
551  TT_DesignatedInitializerLSquare)) {
552  Left->Type = TT_ObjCMethodExpr;
553  StartsObjCMethodExpr = true;
554  Contexts.back().ColonIsObjCMethodExpr = true;
555  if (Parent && Parent->is(tok::r_paren))
556  // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
557  Parent->Type = TT_CastRParen;
558  }
559  ColonFound = true;
560  }
561  if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
562  !ColonFound)
563  Left->Type = TT_ArrayInitializerLSquare;
564  FormatToken *Tok = CurrentToken;
565  if (!consumeToken())
566  return false;
567  updateParameterCount(Left, Tok);
568  }
569  return false;
570  }
571 
572  bool parseBrace() {
573  if (CurrentToken) {
574  FormatToken *Left = CurrentToken->Previous;
575  Left->ParentBracket = Contexts.back().ContextKind;
576 
577  if (Contexts.back().CaretFound)
578  Left->Type = TT_ObjCBlockLBrace;
579  Contexts.back().CaretFound = false;
580 
581  ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
582  Contexts.back().ColonIsDictLiteral = true;
583  if (Left->BlockKind == BK_BracedInit)
584  Contexts.back().IsExpression = true;
585  if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
586  Left->Previous->is(TT_JsTypeColon))
587  Contexts.back().IsExpression = false;
588 
589  while (CurrentToken) {
590  if (CurrentToken->is(tok::r_brace)) {
591  Left->MatchingParen = CurrentToken;
592  CurrentToken->MatchingParen = Left;
593  next();
594  return true;
595  }
596  if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
597  return false;
598  updateParameterCount(Left, CurrentToken);
599  if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
600  FormatToken *Previous = CurrentToken->getPreviousNonComment();
601  if (Previous->is(TT_JsTypeOptionalQuestion))
602  Previous = Previous->getPreviousNonComment();
603  if ((CurrentToken->is(tok::colon) &&
604  (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
605  Style.Language == FormatStyle::LK_Proto ||
606  Style.Language == FormatStyle::LK_TextProto) {
607  Left->Type = TT_DictLiteral;
608  if (Previous->Tok.getIdentifierInfo() ||
609  Previous->is(tok::string_literal))
610  Previous->Type = TT_SelectorName;
611  }
612  if (CurrentToken->is(tok::colon) ||
613  Style.Language == FormatStyle::LK_JavaScript)
614  Left->Type = TT_DictLiteral;
615  }
616  if (CurrentToken->is(tok::comma) &&
617  Style.Language == FormatStyle::LK_JavaScript)
618  Left->Type = TT_DictLiteral;
619  if (!consumeToken())
620  return false;
621  }
622  }
623  return true;
624  }
625 
626  void updateParameterCount(FormatToken *Left, FormatToken *Current) {
627  // For ObjC methods, the number of parameters is calculated differently as
628  // method declarations have a different structure (the parameters are not
629  // inside a bracket scope).
630  if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
631  ++Left->BlockParameterCount;
632  if (Current->is(tok::comma)) {
633  ++Left->ParameterCount;
634  if (!Left->Role)
635  Left->Role.reset(new CommaSeparatedList(Style));
636  Left->Role->CommaFound(Current);
637  } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
638  Left->ParameterCount = 1;
639  }
640  }
641 
642  bool parseConditional() {
643  while (CurrentToken) {
644  if (CurrentToken->is(tok::colon)) {
645  CurrentToken->Type = TT_ConditionalExpr;
646  next();
647  return true;
648  }
649  if (!consumeToken())
650  return false;
651  }
652  return false;
653  }
654 
655  bool parseTemplateDeclaration() {
656  if (CurrentToken && CurrentToken->is(tok::less)) {
657  CurrentToken->Type = TT_TemplateOpener;
658  next();
659  if (!parseAngle())
660  return false;
661  if (CurrentToken)
662  CurrentToken->Previous->ClosesTemplateDeclaration = true;
663  return true;
664  }
665  return false;
666  }
667 
668  bool consumeToken() {
669  FormatToken *Tok = CurrentToken;
670  next();
671  switch (Tok->Tok.getKind()) {
672  case tok::plus:
673  case tok::minus:
674  if (!Tok->Previous && Line.MustBeDeclaration)
675  Tok->Type = TT_ObjCMethodSpecifier;
676  break;
677  case tok::colon:
678  if (!Tok->Previous)
679  return false;
680  // Colons from ?: are handled in parseConditional().
681  if (Style.Language == FormatStyle::LK_JavaScript) {
682  if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
683  (Contexts.size() == 1 && // switch/case labels
684  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
685  Contexts.back().ContextKind == tok::l_paren || // function params
686  Contexts.back().ContextKind == tok::l_square || // array type
687  (!Contexts.back().IsExpression &&
688  Contexts.back().ContextKind == tok::l_brace) || // object type
689  (Contexts.size() == 1 &&
690  Line.MustBeDeclaration)) { // method/property declaration
691  Contexts.back().IsExpression = false;
692  Tok->Type = TT_JsTypeColon;
693  break;
694  }
695  }
696  if (Contexts.back().ColonIsDictLiteral ||
697  Style.Language == FormatStyle::LK_Proto ||
698  Style.Language == FormatStyle::LK_TextProto) {
699  Tok->Type = TT_DictLiteral;
700  if (Style.Language == FormatStyle::LK_TextProto) {
701  if (FormatToken *Previous = Tok->getPreviousNonComment())
702  Previous->Type = TT_SelectorName;
703  }
704  } else if (Contexts.back().ColonIsObjCMethodExpr ||
705  Line.startsWith(TT_ObjCMethodSpecifier)) {
706  Tok->Type = TT_ObjCMethodExpr;
707  const FormatToken *BeforePrevious = Tok->Previous->Previous;
708  // Ensure we tag all identifiers in method declarations as
709  // TT_SelectorName.
710  bool UnknownIdentifierInMethodDeclaration =
711  Line.startsWith(TT_ObjCMethodSpecifier) &&
712  Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
713  if (!BeforePrevious ||
714  // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
715  !(BeforePrevious->is(TT_CastRParen) ||
716  (BeforePrevious->is(TT_ObjCMethodExpr) &&
717  BeforePrevious->is(tok::colon))) ||
718  BeforePrevious->is(tok::r_square) ||
719  Contexts.back().LongestObjCSelectorName == 0 ||
720  UnknownIdentifierInMethodDeclaration) {
721  Tok->Previous->Type = TT_SelectorName;
722  if (!Contexts.back().FirstObjCSelectorName)
723  Contexts.back().FirstObjCSelectorName = Tok->Previous;
724  else if (Tok->Previous->ColumnWidth >
725  Contexts.back().LongestObjCSelectorName)
726  Contexts.back().LongestObjCSelectorName =
727  Tok->Previous->ColumnWidth;
728  Tok->Previous->ParameterIndex =
729  Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
730  ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
731  }
732  } else if (Contexts.back().ColonIsForRangeExpr) {
733  Tok->Type = TT_RangeBasedForLoopColon;
734  } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
735  Tok->Type = TT_BitFieldColon;
736  } else if (Contexts.size() == 1 &&
737  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
738  if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
739  tok::kw_noexcept))
740  Tok->Type = TT_CtorInitializerColon;
741  else
742  Tok->Type = TT_InheritanceColon;
743  } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
744  (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
745  (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
746  Tok->Next->Next->is(tok::colon)))) {
747  // This handles a special macro in ObjC code where selectors including
748  // the colon are passed as macro arguments.
749  Tok->Type = TT_ObjCMethodExpr;
750  } else if (Contexts.back().ContextKind == tok::l_paren) {
751  Tok->Type = TT_InlineASMColon;
752  }
753  break;
754  case tok::pipe:
755  case tok::amp:
756  // | and & in declarations/type expressions represent union and
757  // intersection types, respectively.
758  if (Style.Language == FormatStyle::LK_JavaScript &&
759  !Contexts.back().IsExpression)
760  Tok->Type = TT_JsTypeOperator;
761  break;
762  case tok::kw_if:
763  case tok::kw_while:
764  if (Tok->is(tok::kw_if) && CurrentToken &&
765  CurrentToken->is(tok::kw_constexpr))
766  next();
767  if (CurrentToken && CurrentToken->is(tok::l_paren)) {
768  next();
769  if (!parseParens(/*LookForDecls=*/true))
770  return false;
771  }
772  break;
773  case tok::kw_for:
774  if (Style.Language == FormatStyle::LK_JavaScript) {
775  // x.for and {for: ...}
776  if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
777  (Tok->Next && Tok->Next->is(tok::colon)))
778  break;
779  // JS' for await ( ...
780  if (CurrentToken && CurrentToken->is(Keywords.kw_await))
781  next();
782  }
783  Contexts.back().ColonIsForRangeExpr = true;
784  next();
785  if (!parseParens())
786  return false;
787  break;
788  case tok::l_paren:
789  // When faced with 'operator()()', the kw_operator handler incorrectly
790  // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
791  // the first two parens OverloadedOperators and the second l_paren an
792  // OverloadedOperatorLParen.
793  if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
794  Tok->Previous->MatchingParen &&
795  Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
796  Tok->Previous->Type = TT_OverloadedOperator;
797  Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
798  Tok->Type = TT_OverloadedOperatorLParen;
799  }
800 
801  if (!parseParens())
802  return false;
803  if (Line.MustBeDeclaration && Contexts.size() == 1 &&
804  !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
805  (!Tok->Previous ||
806  !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
807  TT_LeadingJavaAnnotation)))
808  Line.MightBeFunctionDecl = true;
809  break;
810  case tok::l_square:
811  if (!parseSquare())
812  return false;
813  break;
814  case tok::l_brace:
815  if (Style.Language == FormatStyle::LK_TextProto) {
816  FormatToken *Previous = Tok->getPreviousNonComment();
817  if (Previous && Previous->Type != TT_DictLiteral)
818  Previous->Type = TT_SelectorName;
819  }
820  if (!parseBrace())
821  return false;
822  break;
823  case tok::less:
824  if (parseAngle()) {
825  Tok->Type = TT_TemplateOpener;
826  // In TT_Proto, we must distignuish between:
827  // map<key, value>
828  // msg < item: data >
829  // msg: < item: data >
830  // In TT_TextProto, map<key, value> does not occur.
831  if (Style.Language == FormatStyle::LK_TextProto ||
832  (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
833  Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
834  Tok->Type = TT_DictLiteral;
835  FormatToken *Previous = Tok->getPreviousNonComment();
836  if (Previous && Previous->Type != TT_DictLiteral)
837  Previous->Type = TT_SelectorName;
838  }
839  } else {
840  Tok->Type = TT_BinaryOperator;
841  NonTemplateLess.insert(Tok);
842  CurrentToken = Tok;
843  next();
844  }
845  break;
846  case tok::r_paren:
847  case tok::r_square:
848  return false;
849  case tok::r_brace:
850  // Lines can start with '}'.
851  if (Tok->Previous)
852  return false;
853  break;
854  case tok::greater:
855  if (Style.Language != FormatStyle::LK_TextProto)
856  Tok->Type = TT_BinaryOperator;
857  break;
858  case tok::kw_operator:
859  if (Style.Language == FormatStyle::LK_TextProto ||
860  Style.Language == FormatStyle::LK_Proto)
861  break;
862  while (CurrentToken &&
863  !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
864  if (CurrentToken->isOneOf(tok::star, tok::amp))
865  CurrentToken->Type = TT_PointerOrReference;
866  consumeToken();
867  if (CurrentToken &&
868  CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
869  tok::comma))
870  CurrentToken->Previous->Type = TT_OverloadedOperator;
871  }
872  if (CurrentToken) {
873  CurrentToken->Type = TT_OverloadedOperatorLParen;
874  if (CurrentToken->Previous->is(TT_BinaryOperator))
875  CurrentToken->Previous->Type = TT_OverloadedOperator;
876  }
877  break;
878  case tok::question:
879  if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
880  Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
881  tok::r_brace)) {
882  // Question marks before semicolons, colons, etc. indicate optional
883  // types (fields, parameters), e.g.
884  // function(x?: string, y?) {...}
885  // class X { y?; }
886  Tok->Type = TT_JsTypeOptionalQuestion;
887  break;
888  }
889  // Declarations cannot be conditional expressions, this can only be part
890  // of a type declaration.
891  if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
892  Style.Language == FormatStyle::LK_JavaScript)
893  break;
894  parseConditional();
895  break;
896  case tok::kw_template:
897  parseTemplateDeclaration();
898  break;
899  case tok::comma:
900  if (Contexts.back().InCtorInitializer)
901  Tok->Type = TT_CtorInitializerComma;
902  else if (Contexts.back().InInheritanceList)
903  Tok->Type = TT_InheritanceComma;
904  else if (Contexts.back().FirstStartOfName &&
905  (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
906  Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
907  Line.IsMultiVariableDeclStmt = true;
908  }
909  if (Contexts.back().IsForEachMacro)
910  Contexts.back().IsExpression = true;
911  break;
912  case tok::identifier:
913  if (Tok->isOneOf(Keywords.kw___has_include,
914  Keywords.kw___has_include_next)) {
915  parseHasInclude();
916  }
917  break;
918  default:
919  break;
920  }
921  return true;
922  }
923 
924  void parseIncludeDirective() {
925  if (CurrentToken && CurrentToken->is(tok::less)) {
926  next();
927  while (CurrentToken) {
928  // Mark tokens up to the trailing line comments as implicit string
929  // literals.
930  if (CurrentToken->isNot(tok::comment) &&
931  !CurrentToken->TokenText.startswith("//"))
932  CurrentToken->Type = TT_ImplicitStringLiteral;
933  next();
934  }
935  }
936  }
937 
938  void parseWarningOrError() {
939  next();
940  // We still want to format the whitespace left of the first token of the
941  // warning or error.
942  next();
943  while (CurrentToken) {
944  CurrentToken->Type = TT_ImplicitStringLiteral;
945  next();
946  }
947  }
948 
949  void parsePragma() {
950  next(); // Consume "pragma".
951  if (CurrentToken &&
952  CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
953  bool IsMark = CurrentToken->is(Keywords.kw_mark);
954  next(); // Consume "mark".
955  next(); // Consume first token (so we fix leading whitespace).
956  while (CurrentToken) {
957  if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
958  CurrentToken->Type = TT_ImplicitStringLiteral;
959  next();
960  }
961  }
962  }
963 
964  void parseHasInclude() {
965  if (!CurrentToken || !CurrentToken->is(tok::l_paren))
966  return;
967  next(); // '('
968  parseIncludeDirective();
969  next(); // ')'
970  }
971 
972  LineType parsePreprocessorDirective() {
973  bool IsFirstToken = CurrentToken->IsFirst;
975  next();
976  if (!CurrentToken)
977  return Type;
978 
979  if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
980  // JavaScript files can contain shebang lines of the form:
981  // #!/usr/bin/env node
982  // Treat these like C++ #include directives.
983  while (CurrentToken) {
984  // Tokens cannot be comments here.
985  CurrentToken->Type = TT_ImplicitStringLiteral;
986  next();
987  }
988  return LT_ImportStatement;
989  }
990 
991  if (CurrentToken->Tok.is(tok::numeric_constant)) {
992  CurrentToken->SpacesRequiredBefore = 1;
993  return Type;
994  }
995  // Hashes in the middle of a line can lead to any strange token
996  // sequence.
997  if (!CurrentToken->Tok.getIdentifierInfo())
998  return Type;
999  switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1000  case tok::pp_include:
1001  case tok::pp_include_next:
1002  case tok::pp_import:
1003  next();
1004  parseIncludeDirective();
1005  Type = LT_ImportStatement;
1006  break;
1007  case tok::pp_error:
1008  case tok::pp_warning:
1009  parseWarningOrError();
1010  break;
1011  case tok::pp_pragma:
1012  parsePragma();
1013  break;
1014  case tok::pp_if:
1015  case tok::pp_elif:
1016  Contexts.back().IsExpression = true;
1017  parseLine();
1018  break;
1019  default:
1020  break;
1021  }
1022  while (CurrentToken) {
1023  FormatToken *Tok = CurrentToken;
1024  next();
1025  if (Tok->is(tok::l_paren))
1026  parseParens();
1027  else if (Tok->isOneOf(Keywords.kw___has_include,
1028  Keywords.kw___has_include_next))
1029  parseHasInclude();
1030  }
1031  return Type;
1032  }
1033 
1034 public:
1035  LineType parseLine() {
1036  NonTemplateLess.clear();
1037  if (CurrentToken->is(tok::hash))
1038  return parsePreprocessorDirective();
1039 
1040  // Directly allow to 'import <string-literal>' to support protocol buffer
1041  // definitions (github.com/google/protobuf) or missing "#" (either way we
1042  // should not break the line).
1043  IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1044  if ((Style.Language == FormatStyle::LK_Java &&
1045  CurrentToken->is(Keywords.kw_package)) ||
1046  (Info && Info->getPPKeywordID() == tok::pp_import &&
1047  CurrentToken->Next &&
1048  CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1049  tok::kw_static))) {
1050  next();
1051  parseIncludeDirective();
1052  return LT_ImportStatement;
1053  }
1054 
1055  // If this line starts and ends in '<' and '>', respectively, it is likely
1056  // part of "#define <a/b.h>".
1057  if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1058  parseIncludeDirective();
1059  return LT_ImportStatement;
1060  }
1061 
1062  // In .proto files, top-level options are very similar to import statements
1063  // and should not be line-wrapped.
1064  if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1065  CurrentToken->is(Keywords.kw_option)) {
1066  next();
1067  if (CurrentToken && CurrentToken->is(tok::identifier))
1068  return LT_ImportStatement;
1069  }
1070 
1071  bool KeywordVirtualFound = false;
1072  bool ImportStatement = false;
1073 
1074  // import {...} from '...';
1075  if (Style.Language == FormatStyle::LK_JavaScript &&
1076  CurrentToken->is(Keywords.kw_import))
1077  ImportStatement = true;
1078 
1079  while (CurrentToken) {
1080  if (CurrentToken->is(tok::kw_virtual))
1081  KeywordVirtualFound = true;
1082  if (Style.Language == FormatStyle::LK_JavaScript) {
1083  // export {...} from '...';
1084  // An export followed by "from 'some string';" is a re-export from
1085  // another module identified by a URI and is treated as a
1086  // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1087  // Just "export {...};" or "export class ..." should not be treated as
1088  // an import in this sense.
1089  if (Line.First->is(tok::kw_export) &&
1090  CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1091  CurrentToken->Next->isStringLiteral())
1092  ImportStatement = true;
1093  if (isClosureImportStatement(*CurrentToken))
1094  ImportStatement = true;
1095  }
1096  if (!consumeToken())
1097  return LT_Invalid;
1098  }
1099  if (KeywordVirtualFound)
1100  return LT_VirtualFunctionDecl;
1101  if (ImportStatement)
1102  return LT_ImportStatement;
1103 
1104  if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1105  if (Contexts.back().FirstObjCSelectorName)
1106  Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1107  Contexts.back().LongestObjCSelectorName;
1108  return LT_ObjCMethodDecl;
1109  }
1110 
1111  return LT_Other;
1112  }
1113 
1114 private:
1115  bool isClosureImportStatement(const FormatToken &Tok) {
1116  // FIXME: Closure-library specific stuff should not be hard-coded but be
1117  // configurable.
1118  return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1119  Tok.Next->Next &&
1120  (Tok.Next->Next->TokenText == "module" ||
1121  Tok.Next->Next->TokenText == "provide" ||
1122  Tok.Next->Next->TokenText == "require" ||
1123  Tok.Next->Next->TokenText == "forwardDeclare") &&
1124  Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1125  }
1126 
1127  void resetTokenMetadata(FormatToken *Token) {
1128  if (!Token)
1129  return;
1130 
1131  // Reset token type in case we have already looked at it and then
1132  // recovered from an error (e.g. failure to find the matching >).
1133  if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
1134  TT_FunctionLBrace, TT_ImplicitStringLiteral,
1135  TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
1136  TT_OverloadedOperator, TT_RegexLiteral,
1137  TT_TemplateString, TT_ObjCStringLiteral))
1138  CurrentToken->Type = TT_Unknown;
1139  CurrentToken->Role.reset();
1140  CurrentToken->MatchingParen = nullptr;
1141  CurrentToken->FakeLParens.clear();
1142  CurrentToken->FakeRParens = 0;
1143  }
1144 
1145  void next() {
1146  if (CurrentToken) {
1147  CurrentToken->NestingLevel = Contexts.size() - 1;
1148  CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1149  modifyContext(*CurrentToken);
1150  determineTokenType(*CurrentToken);
1151  CurrentToken = CurrentToken->Next;
1152  }
1153 
1154  resetTokenMetadata(CurrentToken);
1155  }
1156 
1157  /// A struct to hold information valid in a specific context, e.g.
1158  /// a pair of parenthesis.
1159  struct Context {
1160  Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1161  bool IsExpression)
1162  : ContextKind(ContextKind), BindingStrength(BindingStrength),
1163  IsExpression(IsExpression) {}
1164 
1169  bool ColonIsForRangeExpr = false;
1170  bool ColonIsDictLiteral = false;
1172  FormatToken *FirstObjCSelectorName = nullptr;
1173  FormatToken *FirstStartOfName = nullptr;
1174  bool CanBeExpression = true;
1175  bool InTemplateArgument = false;
1176  bool InCtorInitializer = false;
1177  bool InInheritanceList = false;
1178  bool CaretFound = false;
1179  bool IsForEachMacro = false;
1181  };
1182 
1183  /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1184  /// of each instance.
1185  struct ScopedContextCreator {
1186  AnnotatingParser &P;
1187 
1188  ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1189  unsigned Increase)
1190  : P(P) {
1191  P.Contexts.push_back(Context(ContextKind,
1192  P.Contexts.back().BindingStrength + Increase,
1193  P.Contexts.back().IsExpression));
1194  }
1195 
1196  ~ScopedContextCreator() { P.Contexts.pop_back(); }
1197  };
1198 
1199  void modifyContext(const FormatToken &Current) {
1200  if (Current.getPrecedence() == prec::Assignment &&
1201  !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1202  // Type aliases use `type X = ...;` in TypeScript and can be exported
1203  // using `export type ...`.
1204  !(Style.Language == FormatStyle::LK_JavaScript &&
1205  (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1206  Line.startsWith(tok::kw_export, Keywords.kw_type,
1207  tok::identifier))) &&
1208  (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1209  Contexts.back().IsExpression = true;
1210  if (!Line.startsWith(TT_UnaryOperator)) {
1211  for (FormatToken *Previous = Current.Previous;
1212  Previous && Previous->Previous &&
1213  !Previous->Previous->isOneOf(tok::comma, tok::semi);
1214  Previous = Previous->Previous) {
1215  if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1216  Previous = Previous->MatchingParen;
1217  if (!Previous)
1218  break;
1219  }
1220  if (Previous->opensScope())
1221  break;
1222  if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1223  Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1224  Previous->Previous && Previous->Previous->isNot(tok::equal))
1225  Previous->Type = TT_PointerOrReference;
1226  }
1227  }
1228  } else if (Current.is(tok::lessless) &&
1229  (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1230  Contexts.back().IsExpression = true;
1231  } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1232  Contexts.back().IsExpression = true;
1233  } else if (Current.is(TT_TrailingReturnArrow)) {
1234  Contexts.back().IsExpression = false;
1235  } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1236  Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1237  } else if (Current.Previous &&
1238  Current.Previous->is(TT_CtorInitializerColon)) {
1239  Contexts.back().IsExpression = true;
1240  Contexts.back().InCtorInitializer = true;
1241  } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1242  Contexts.back().InInheritanceList = true;
1243  } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1244  for (FormatToken *Previous = Current.Previous;
1245  Previous && Previous->isOneOf(tok::star, tok::amp);
1246  Previous = Previous->Previous)
1247  Previous->Type = TT_PointerOrReference;
1248  if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1249  Contexts.back().IsExpression = false;
1250  } else if (Current.is(tok::kw_new)) {
1251  Contexts.back().CanBeExpression = false;
1252  } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
1253  // This should be the condition or increment in a for-loop.
1254  Contexts.back().IsExpression = true;
1255  }
1256  }
1257 
1258  void determineTokenType(FormatToken &Current) {
1259  if (!Current.is(TT_Unknown))
1260  // The token type is already known.
1261  return;
1262 
1263  if (Style.Language == FormatStyle::LK_JavaScript) {
1264  if (Current.is(tok::exclaim)) {
1265  if (Current.Previous &&
1266  (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
1267  tok::r_paren, tok::r_square,
1268  tok::r_brace) ||
1269  Current.Previous->Tok.isLiteral())) {
1270  Current.Type = TT_JsNonNullAssertion;
1271  return;
1272  }
1273  if (Current.Next &&
1274  Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1275  Current.Type = TT_JsNonNullAssertion;
1276  return;
1277  }
1278  }
1279  }
1280 
1281  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1282  // function declaration have been found. In this case, 'Current' is a
1283  // trailing token of this declaration and thus cannot be a name.
1284  if (Current.is(Keywords.kw_instanceof)) {
1285  Current.Type = TT_BinaryOperator;
1286  } else if (isStartOfName(Current) &&
1287  (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1288  Contexts.back().FirstStartOfName = &Current;
1289  Current.Type = TT_StartOfName;
1290  } else if (Current.is(tok::semi)) {
1291  // Reset FirstStartOfName after finding a semicolon so that a for loop
1292  // with multiple increment statements is not confused with a for loop
1293  // having multiple variable declarations.
1294  Contexts.back().FirstStartOfName = nullptr;
1295  } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1296  AutoFound = true;
1297  } else if (Current.is(tok::arrow) &&
1298  Style.Language == FormatStyle::LK_Java) {
1299  Current.Type = TT_LambdaArrow;
1300  } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1301  Current.NestingLevel == 0) {
1302  Current.Type = TT_TrailingReturnArrow;
1303  } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1304  Current.Type = determineStarAmpUsage(Current,
1305  Contexts.back().CanBeExpression &&
1306  Contexts.back().IsExpression,
1307  Contexts.back().InTemplateArgument);
1308  } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1309  Current.Type = determinePlusMinusCaretUsage(Current);
1310  if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1311  Contexts.back().CaretFound = true;
1312  } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1313  Current.Type = determineIncrementUsage(Current);
1314  } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1315  Current.Type = TT_UnaryOperator;
1316  } else if (Current.is(tok::question)) {
1317  if (Style.Language == FormatStyle::LK_JavaScript &&
1318  Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1319  // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1320  // on the interface, not a ternary expression.
1321  Current.Type = TT_JsTypeOptionalQuestion;
1322  } else {
1323  Current.Type = TT_ConditionalExpr;
1324  }
1325  } else if (Current.isBinaryOperator() &&
1326  (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1327  (!Current.is(tok::greater) &&
1328  Style.Language != FormatStyle::LK_TextProto)) {
1329  Current.Type = TT_BinaryOperator;
1330  } else if (Current.is(tok::comment)) {
1331  if (Current.TokenText.startswith("/*")) {
1332  if (Current.TokenText.endswith("*/"))
1333  Current.Type = TT_BlockComment;
1334  else
1335  // The lexer has for some reason determined a comment here. But we
1336  // cannot really handle it, if it isn't properly terminated.
1337  Current.Tok.setKind(tok::unknown);
1338  } else {
1339  Current.Type = TT_LineComment;
1340  }
1341  } else if (Current.is(tok::r_paren)) {
1342  if (rParenEndsCast(Current))
1343  Current.Type = TT_CastRParen;
1344  if (Current.MatchingParen && Current.Next &&
1345  !Current.Next->isBinaryOperator() &&
1346  !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1347  tok::comma, tok::period, tok::arrow,
1348  tok::coloncolon))
1349  if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1350  // Make sure this isn't the return type of an Obj-C block declaration
1351  if (AfterParen->Tok.isNot(tok::caret)) {
1352  if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1353  if (BeforeParen->is(tok::identifier) &&
1354  BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1355  (!BeforeParen->Previous ||
1356  BeforeParen->Previous->ClosesTemplateDeclaration))
1357  Current.Type = TT_FunctionAnnotationRParen;
1358  }
1359  }
1360  } else if (Current.is(tok::at) && Current.Next &&
1361  Style.Language != FormatStyle::LK_JavaScript &&
1362  Style.Language != FormatStyle::LK_Java) {
1363  // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1364  // marks declarations and properties that need special formatting.
1365  switch (Current.Next->Tok.getObjCKeywordID()) {
1366  case tok::objc_interface:
1367  case tok::objc_implementation:
1368  case tok::objc_protocol:
1369  Current.Type = TT_ObjCDecl;
1370  break;
1371  case tok::objc_property:
1372  Current.Type = TT_ObjCProperty;
1373  break;
1374  default:
1375  break;
1376  }
1377  } else if (Current.is(tok::period)) {
1378  FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1379  if (PreviousNoComment &&
1380  PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1381  Current.Type = TT_DesignatedInitializerPeriod;
1382  else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1383  Current.Previous->isOneOf(TT_JavaAnnotation,
1384  TT_LeadingJavaAnnotation)) {
1385  Current.Type = Current.Previous->Type;
1386  }
1387  } else if (canBeObjCSelectorComponent(Current) &&
1388  // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1389  Current.Previous && Current.Previous->is(TT_CastRParen) &&
1390  Current.Previous->MatchingParen &&
1391  Current.Previous->MatchingParen->Previous &&
1392  Current.Previous->MatchingParen->Previous->is(
1393  TT_ObjCMethodSpecifier)) {
1394  // This is the first part of an Objective-C selector name. (If there's no
1395  // colon after this, this is the only place which annotates the identifier
1396  // as a selector.)
1397  Current.Type = TT_SelectorName;
1398  } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1399  Current.Previous &&
1400  !Current.Previous->isOneOf(tok::equal, tok::at) &&
1401  Line.MightBeFunctionDecl && Contexts.size() == 1) {
1402  // Line.MightBeFunctionDecl can only be true after the parentheses of a
1403  // function declaration have been found.
1404  Current.Type = TT_TrailingAnnotation;
1405  } else if ((Style.Language == FormatStyle::LK_Java ||
1406  Style.Language == FormatStyle::LK_JavaScript) &&
1407  Current.Previous) {
1408  if (Current.Previous->is(tok::at) &&
1409  Current.isNot(Keywords.kw_interface)) {
1410  const FormatToken &AtToken = *Current.Previous;
1411  const FormatToken *Previous = AtToken.getPreviousNonComment();
1412  if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1413  Current.Type = TT_LeadingJavaAnnotation;
1414  else
1415  Current.Type = TT_JavaAnnotation;
1416  } else if (Current.Previous->is(tok::period) &&
1417  Current.Previous->isOneOf(TT_JavaAnnotation,
1418  TT_LeadingJavaAnnotation)) {
1419  Current.Type = Current.Previous->Type;
1420  }
1421  }
1422  }
1423 
1424  /// Take a guess at whether \p Tok starts a name of a function or
1425  /// variable declaration.
1426  ///
1427  /// This is a heuristic based on whether \p Tok is an identifier following
1428  /// something that is likely a type.
1429  bool isStartOfName(const FormatToken &Tok) {
1430  if (Tok.isNot(tok::identifier) || !Tok.Previous)
1431  return false;
1432 
1433  if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1434  Keywords.kw_as))
1435  return false;
1436  if (Style.Language == FormatStyle::LK_JavaScript &&
1437  Tok.Previous->is(Keywords.kw_in))
1438  return false;
1439 
1440  // Skip "const" as it does not have an influence on whether this is a name.
1441  FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1442  while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1443  PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1444 
1445  if (!PreviousNotConst)
1446  return false;
1447 
1448  bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1449  PreviousNotConst->Previous &&
1450  PreviousNotConst->Previous->is(tok::hash);
1451 
1452  if (PreviousNotConst->is(TT_TemplateCloser))
1453  return PreviousNotConst && PreviousNotConst->MatchingParen &&
1454  PreviousNotConst->MatchingParen->Previous &&
1455  PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1456  PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1457 
1458  if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1459  PreviousNotConst->MatchingParen->Previous &&
1460  PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1461  return true;
1462 
1463  return (!IsPPKeyword &&
1464  PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1465  PreviousNotConst->is(TT_PointerOrReference) ||
1466  PreviousNotConst->isSimpleTypeSpecifier();
1467  }
1468 
1469  /// Determine whether ')' is ending a cast.
1470  bool rParenEndsCast(const FormatToken &Tok) {
1471  // C-style casts are only used in C++ and Java.
1472  if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
1473  return false;
1474 
1475  // Empty parens aren't casts and there are no casts at the end of the line.
1476  if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1477  return false;
1478 
1479  FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1480  if (LeftOfParens) {
1481  // If there is a closing parenthesis left of the current parentheses,
1482  // look past it as these might be chained casts.
1483  if (LeftOfParens->is(tok::r_paren)) {
1484  if (!LeftOfParens->MatchingParen ||
1485  !LeftOfParens->MatchingParen->Previous)
1486  return false;
1487  LeftOfParens = LeftOfParens->MatchingParen->Previous;
1488  }
1489 
1490  // If there is an identifier (or with a few exceptions a keyword) right
1491  // before the parentheses, this is unlikely to be a cast.
1492  if (LeftOfParens->Tok.getIdentifierInfo() &&
1493  !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1494  tok::kw_delete))
1495  return false;
1496 
1497  // Certain other tokens right before the parentheses are also signals that
1498  // this cannot be a cast.
1499  if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1500  TT_TemplateCloser, tok::ellipsis))
1501  return false;
1502  }
1503 
1504  if (Tok.Next->is(tok::question))
1505  return false;
1506 
1507  // As Java has no function types, a "(" after the ")" likely means that this
1508  // is a cast.
1509  if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1510  return true;
1511 
1512  // If a (non-string) literal follows, this is likely a cast.
1513  if (Tok.Next->isNot(tok::string_literal) &&
1514  (Tok.Next->Tok.isLiteral() ||
1515  Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1516  return true;
1517 
1518  // Heuristically try to determine whether the parentheses contain a type.
1519  bool ParensAreType =
1520  !Tok.Previous ||
1521  Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1522  Tok.Previous->isSimpleTypeSpecifier();
1523  bool ParensCouldEndDecl =
1524  Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1525  if (ParensAreType && !ParensCouldEndDecl)
1526  return true;
1527 
1528  // At this point, we heuristically assume that there are no casts at the
1529  // start of the line. We assume that we have found most cases where there
1530  // are by the logic above, e.g. "(void)x;".
1531  if (!LeftOfParens)
1532  return false;
1533 
1534  // Certain token types inside the parentheses mean that this can't be a
1535  // cast.
1536  for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1537  Token = Token->Next)
1538  if (Token->is(TT_BinaryOperator))
1539  return false;
1540 
1541  // If the following token is an identifier or 'this', this is a cast. All
1542  // cases where this can be something else are handled above.
1543  if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1544  return true;
1545 
1546  if (!Tok.Next->Next)
1547  return false;
1548 
1549  // If the next token after the parenthesis is a unary operator, assume
1550  // that this is cast, unless there are unexpected tokens inside the
1551  // parenthesis.
1552  bool NextIsUnary =
1553  Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1554  if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1555  !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1556  return false;
1557  // Search for unexpected tokens.
1558  for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1559  Prev = Prev->Previous) {
1560  if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1561  return false;
1562  }
1563  return true;
1564  }
1565 
1566  /// Return the type of the given token assuming it is * or &.
1567  TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1568  bool InTemplateArgument) {
1569  if (Style.Language == FormatStyle::LK_JavaScript)
1570  return TT_BinaryOperator;
1571 
1572  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1573  if (!PrevToken)
1574  return TT_UnaryOperator;
1575 
1576  const FormatToken *NextToken = Tok.getNextNonComment();
1577  if (!NextToken ||
1578  NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
1579  (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1580  return TT_PointerOrReference;
1581 
1582  if (PrevToken->is(tok::coloncolon))
1583  return TT_PointerOrReference;
1584 
1585  if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1586  tok::comma, tok::semi, tok::kw_return, tok::colon,
1587  tok::equal, tok::kw_delete, tok::kw_sizeof,
1588  tok::kw_throw) ||
1589  PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1590  TT_UnaryOperator, TT_CastRParen))
1591  return TT_UnaryOperator;
1592 
1593  if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1594  return TT_PointerOrReference;
1595  if (NextToken->is(tok::kw_operator) && !IsExpression)
1596  return TT_PointerOrReference;
1597  if (NextToken->isOneOf(tok::comma, tok::semi))
1598  return TT_PointerOrReference;
1599 
1600  if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {
1601  FormatToken *TokenBeforeMatchingParen =
1602  PrevToken->MatchingParen->getPreviousNonComment();
1603  if (TokenBeforeMatchingParen &&
1604  TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
1605  return TT_PointerOrReference;
1606  }
1607 
1608  if (PrevToken->Tok.isLiteral() ||
1609  PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1610  tok::kw_false, tok::r_brace) ||
1611  NextToken->Tok.isLiteral() ||
1612  NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1613  NextToken->isUnaryOperator() ||
1614  // If we know we're in a template argument, there are no named
1615  // declarations. Thus, having an identifier on the right-hand side
1616  // indicates a binary operator.
1617  (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1618  return TT_BinaryOperator;
1619 
1620  // "&&(" is quite unlikely to be two successive unary "&".
1621  if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1622  return TT_BinaryOperator;
1623 
1624  // This catches some cases where evaluation order is used as control flow:
1625  // aaa && aaa->f();
1626  const FormatToken *NextNextToken = NextToken->getNextNonComment();
1627  if (NextNextToken && NextNextToken->is(tok::arrow))
1628  return TT_BinaryOperator;
1629 
1630  // It is very unlikely that we are going to find a pointer or reference type
1631  // definition on the RHS of an assignment.
1632  if (IsExpression && !Contexts.back().CaretFound)
1633  return TT_BinaryOperator;
1634 
1635  return TT_PointerOrReference;
1636  }
1637 
1638  TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1639  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1640  if (!PrevToken)
1641  return TT_UnaryOperator;
1642 
1643  if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
1644  // This must be a sequence of leading unary operators.
1645  return TT_UnaryOperator;
1646 
1647  // Use heuristics to recognize unary operators.
1648  if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1649  tok::question, tok::colon, tok::kw_return,
1650  tok::kw_case, tok::at, tok::l_brace))
1651  return TT_UnaryOperator;
1652 
1653  // There can't be two consecutive binary operators.
1654  if (PrevToken->is(TT_BinaryOperator))
1655  return TT_UnaryOperator;
1656 
1657  // Fall back to marking the token as binary operator.
1658  return TT_BinaryOperator;
1659  }
1660 
1661  /// Determine whether ++/-- are pre- or post-increments/-decrements.
1662  TokenType determineIncrementUsage(const FormatToken &Tok) {
1663  const FormatToken *PrevToken = Tok.getPreviousNonComment();
1664  if (!PrevToken || PrevToken->is(TT_CastRParen))
1665  return TT_UnaryOperator;
1666  if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1667  return TT_TrailingUnaryOperator;
1668 
1669  return TT_UnaryOperator;
1670  }
1671 
1672  SmallVector<Context, 8> Contexts;
1673 
1674  const FormatStyle &Style;
1675  AnnotatedLine &Line;
1676  FormatToken *CurrentToken;
1677  bool AutoFound;
1678  const AdditionalKeywords &Keywords;
1679 
1680  // Set of "<" tokens that do not open a template parameter list. If parseAngle
1681  // determines that a specific token can't be a template opener, it will make
1682  // same decision irrespective of the decisions for tokens leading up to it.
1683  // Store this information to prevent this from causing exponential runtime.
1684  llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1685 };
1686 
1687 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1688 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1689 
1690 /// Parses binary expressions by inserting fake parenthesis based on
1691 /// operator precedence.
1692 class ExpressionParser {
1693 public:
1694  ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1695  AnnotatedLine &Line)
1696  : Style(Style), Keywords(Keywords), Current(Line.First) {}
1697 
1698  /// Parse expressions with the given operator precedence.
1699  void parse(int Precedence = 0) {
1700  // Skip 'return' and ObjC selector colons as they are not part of a binary
1701  // expression.
1702  while (Current && (Current->is(tok::kw_return) ||
1703  (Current->is(tok::colon) &&
1704  Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1705  next();
1706 
1707  if (!Current || Precedence > PrecedenceArrowAndPeriod)
1708  return;
1709 
1710  // Conditional expressions need to be parsed separately for proper nesting.
1711  if (Precedence == prec::Conditional) {
1712  parseConditionalExpr();
1713  return;
1714  }
1715 
1716  // Parse unary operators, which all have a higher precedence than binary
1717  // operators.
1718  if (Precedence == PrecedenceUnaryOperator) {
1719  parseUnaryOperator();
1720  return;
1721  }
1722 
1723  FormatToken *Start = Current;
1724  FormatToken *LatestOperator = nullptr;
1725  unsigned OperatorIndex = 0;
1726 
1727  while (Current) {
1728  // Consume operators with higher precedence.
1729  parse(Precedence + 1);
1730 
1731  int CurrentPrecedence = getCurrentPrecedence();
1732 
1733  if (Current && Current->is(TT_SelectorName) &&
1734  Precedence == CurrentPrecedence) {
1735  if (LatestOperator)
1736  addFakeParenthesis(Start, prec::Level(Precedence));
1737  Start = Current;
1738  }
1739 
1740  // At the end of the line or when an operator with higher precedence is
1741  // found, insert fake parenthesis and return.
1742  if (!Current ||
1743  (Current->closesScope() &&
1744  (Current->MatchingParen || Current->is(TT_TemplateString))) ||
1745  (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1746  (CurrentPrecedence == prec::Conditional &&
1747  Precedence == prec::Assignment && Current->is(tok::colon))) {
1748  break;
1749  }
1750 
1751  // Consume scopes: (), [], <> and {}
1752  if (Current->opensScope()) {
1753  // In fragment of a JavaScript template string can look like '}..${' and
1754  // thus close a scope and open a new one at the same time.
1755  while (Current && (!Current->closesScope() || Current->opensScope())) {
1756  next();
1757  parse();
1758  }
1759  next();
1760  } else {
1761  // Operator found.
1762  if (CurrentPrecedence == Precedence) {
1763  if (LatestOperator)
1764  LatestOperator->NextOperator = Current;
1765  LatestOperator = Current;
1766  Current->OperatorIndex = OperatorIndex;
1767  ++OperatorIndex;
1768  }
1769  next(/*SkipPastLeadingComments=*/Precedence > 0);
1770  }
1771  }
1772 
1773  if (LatestOperator && (Current || Precedence > 0)) {
1774  // LatestOperator->LastOperator = true;
1775  if (Precedence == PrecedenceArrowAndPeriod) {
1776  // Call expressions don't have a binary operator precedence.
1777  addFakeParenthesis(Start, prec::Unknown);
1778  } else {
1779  addFakeParenthesis(Start, prec::Level(Precedence));
1780  }
1781  }
1782  }
1783 
1784 private:
1785  /// Gets the precedence (+1) of the given token for binary operators
1786  /// and other tokens that we treat like binary operators.
1787  int getCurrentPrecedence() {
1788  if (Current) {
1789  const FormatToken *NextNonComment = Current->getNextNonComment();
1790  if (Current->is(TT_ConditionalExpr))
1791  return prec::Conditional;
1792  if (NextNonComment && Current->is(TT_SelectorName) &&
1793  (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
1794  ((Style.Language == FormatStyle::LK_Proto ||
1795  Style.Language == FormatStyle::LK_TextProto) &&
1796  NextNonComment->is(tok::less))))
1797  return prec::Assignment;
1798  if (Current->is(TT_JsComputedPropertyName))
1799  return prec::Assignment;
1800  if (Current->is(TT_LambdaArrow))
1801  return prec::Comma;
1802  if (Current->is(TT_JsFatArrow))
1803  return prec::Assignment;
1804  if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
1805  (Current->is(tok::comment) && NextNonComment &&
1806  NextNonComment->is(TT_SelectorName)))
1807  return 0;
1808  if (Current->is(TT_RangeBasedForLoopColon))
1809  return prec::Comma;
1810  if ((Style.Language == FormatStyle::LK_Java ||
1811  Style.Language == FormatStyle::LK_JavaScript) &&
1812  Current->is(Keywords.kw_instanceof))
1813  return prec::Relational;
1814  if (Style.Language == FormatStyle::LK_JavaScript &&
1815  Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
1816  return prec::Relational;
1817  if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1818  return Current->getPrecedence();
1819  if (Current->isOneOf(tok::period, tok::arrow))
1820  return PrecedenceArrowAndPeriod;
1821  if ((Style.Language == FormatStyle::LK_Java ||
1822  Style.Language == FormatStyle::LK_JavaScript) &&
1823  Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1824  Keywords.kw_throws))
1825  return 0;
1826  }
1827  return -1;
1828  }
1829 
1830  void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1831  Start->FakeLParens.push_back(Precedence);
1832  if (Precedence > prec::Unknown)
1833  Start->StartsBinaryExpression = true;
1834  if (Current) {
1835  FormatToken *Previous = Current->Previous;
1836  while (Previous->is(tok::comment) && Previous->Previous)
1837  Previous = Previous->Previous;
1838  ++Previous->FakeRParens;
1839  if (Precedence > prec::Unknown)
1840  Previous->EndsBinaryExpression = true;
1841  }
1842  }
1843 
1844  /// Parse unary operator expressions and surround them with fake
1845  /// parentheses if appropriate.
1846  void parseUnaryOperator() {
1848  while (Current && Current->is(TT_UnaryOperator)) {
1849  Tokens.push_back(Current);
1850  next();
1851  }
1852  parse(PrecedenceArrowAndPeriod);
1853  for (FormatToken *Token : llvm::reverse(Tokens))
1854  // The actual precedence doesn't matter.
1855  addFakeParenthesis(Token, prec::Unknown);
1856  }
1857 
1858  void parseConditionalExpr() {
1859  while (Current && Current->isTrailingComment()) {
1860  next();
1861  }
1862  FormatToken *Start = Current;
1863  parse(prec::LogicalOr);
1864  if (!Current || !Current->is(tok::question))
1865  return;
1866  next();
1867  parse(prec::Assignment);
1868  if (!Current || Current->isNot(TT_ConditionalExpr))
1869  return;
1870  next();
1871  parse(prec::Assignment);
1872  addFakeParenthesis(Start, prec::Conditional);
1873  }
1874 
1875  void next(bool SkipPastLeadingComments = true) {
1876  if (Current)
1877  Current = Current->Next;
1878  while (Current &&
1879  (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1880  Current->isTrailingComment())
1881  Current = Current->Next;
1882  }
1883 
1884  const FormatStyle &Style;
1885  const AdditionalKeywords &Keywords;
1886  FormatToken *Current;
1887 };
1888 
1889 } // end anonymous namespace
1890 
1893  const AnnotatedLine *NextNonCommentLine = nullptr;
1895  E = Lines.rend();
1896  I != E; ++I) {
1897  bool CommentLine = true;
1898  for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
1899  if (!Tok->is(tok::comment)) {
1900  CommentLine = false;
1901  break;
1902  }
1903  }
1904 
1905  // If the comment is currently aligned with the line immediately following
1906  // it, that's probably intentional and we should keep it.
1907  if (NextNonCommentLine && CommentLine &&
1908  NextNonCommentLine->First->NewlinesBefore <= 1 &&
1909  NextNonCommentLine->First->OriginalColumn ==
1910  (*I)->First->OriginalColumn) {
1911  // Align comments for preprocessor lines with the # in column 0.
1912  // Otherwise, align with the next line.
1913  (*I)->Level = (NextNonCommentLine->Type == LT_PreprocessorDirective ||
1914  NextNonCommentLine->Type == LT_ImportStatement)
1915  ? 0
1916  : NextNonCommentLine->Level;
1917  } else {
1918  NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1919  }
1920 
1921  setCommentLineLevels((*I)->Children);
1922  }
1923 }
1924 
1925 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
1926  unsigned Result = 0;
1927  for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
1928  Result = std::max(Result, Tok->NestingLevel);
1929  return Result;
1930 }
1931 
1934  E = Line.Children.end();
1935  I != E; ++I) {
1936  annotate(**I);
1937  }
1938  AnnotatingParser Parser(Style, Line, Keywords);
1939  Line.Type = Parser.parseLine();
1940 
1941  // With very deep nesting, ExpressionParser uses lots of stack and the
1942  // formatting algorithm is very slow. We're not going to do a good job here
1943  // anyway - it's probably generated code being formatted by mistake.
1944  // Just skip the whole line.
1945  if (maxNestingDepth(Line) > 50)
1946  Line.Type = LT_Invalid;
1947 
1948  if (Line.Type == LT_Invalid)
1949  return;
1950 
1951  ExpressionParser ExprParser(Style, Keywords, Line);
1952  ExprParser.parse();
1953 
1954  if (Line.startsWith(TT_ObjCMethodSpecifier))
1955  Line.Type = LT_ObjCMethodDecl;
1956  else if (Line.startsWith(TT_ObjCDecl))
1957  Line.Type = LT_ObjCDecl;
1958  else if (Line.startsWith(TT_ObjCProperty))
1959  Line.Type = LT_ObjCProperty;
1960 
1961  Line.First->SpacesRequiredBefore = 1;
1962  Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1963 }
1964 
1965 // This function heuristically determines whether 'Current' starts the name of a
1966 // function declaration.
1967 static bool isFunctionDeclarationName(const FormatToken &Current,
1968  const AnnotatedLine &Line) {
1969  auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
1970  for (; Next; Next = Next->Next) {
1971  if (Next->is(TT_OverloadedOperatorLParen))
1972  return Next;
1973  if (Next->is(TT_OverloadedOperator))
1974  continue;
1975  if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1976  // For 'new[]' and 'delete[]'.
1977  if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next &&
1978  Next->Next->Next->is(tok::r_square))
1979  Next = Next->Next->Next;
1980  continue;
1981  }
1982 
1983  break;
1984  }
1985  return nullptr;
1986  };
1987 
1988  // Find parentheses of parameter list.
1989  const FormatToken *Next = Current.Next;
1990  if (Current.is(tok::kw_operator)) {
1991  if (Current.Previous && Current.Previous->is(tok::coloncolon))
1992  return false;
1993  Next = skipOperatorName(Next);
1994  } else {
1995  if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1996  return false;
1997  for (; Next; Next = Next->Next) {
1998  if (Next->is(TT_TemplateOpener)) {
1999  Next = Next->MatchingParen;
2000  } else if (Next->is(tok::coloncolon)) {
2001  Next = Next->Next;
2002  if (!Next)
2003  return false;
2004  if (Next->is(tok::kw_operator)) {
2005  Next = skipOperatorName(Next->Next);
2006  break;
2007  }
2008  if (!Next->is(tok::identifier))
2009  return false;
2010  } else if (Next->is(tok::l_paren)) {
2011  break;
2012  } else {
2013  return false;
2014  }
2015  }
2016  }
2017 
2018  // Check whether parameter list can belong to a function declaration.
2019  if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2020  return false;
2021  // If the lines ends with "{", this is likely an function definition.
2022  if (Line.Last->is(tok::l_brace))
2023  return true;
2024  if (Next->Next == Next->MatchingParen)
2025  return true; // Empty parentheses.
2026  // If there is an &/&& after the r_paren, this is likely a function.
2027  if (Next->MatchingParen->Next &&
2028  Next->MatchingParen->Next->is(TT_PointerOrReference))
2029  return true;
2030  for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2031  Tok = Tok->Next) {
2032  if (Tok->is(tok::l_paren) && Tok->MatchingParen) {
2033  Tok = Tok->MatchingParen;
2034  continue;
2035  }
2036  if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2037  Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
2038  return true;
2039  if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2040  Tok->Tok.isLiteral())
2041  return false;
2042  }
2043  return false;
2044 }
2045 
2046 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2047  assert(Line.MightBeFunctionDecl);
2048 
2052  Line.Level > 0)
2053  return false;
2054 
2057  return false;
2058  case FormatStyle::RTBS_All:
2060  return true;
2063  return Line.mightBeFunctionDefinition();
2064  }
2065 
2066  return false;
2067 }
2068 
2071  E = Line.Children.end();
2072  I != E; ++I) {
2073  calculateFormattingInformation(**I);
2074  }
2075 
2076  Line.First->TotalLength =
2078  : Line.FirstStartColumn + Line.First->ColumnWidth;
2079  FormatToken *Current = Line.First->Next;
2080  bool InFunctionDecl = Line.MightBeFunctionDecl;
2081  while (Current) {
2082  if (isFunctionDeclarationName(*Current, Line))
2083  Current->Type = TT_FunctionDeclarationName;
2084  if (Current->is(TT_LineComment)) {
2085  if (Current->Previous->BlockKind == BK_BracedInit &&
2086  Current->Previous->opensScope())
2087  Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
2088  else
2090 
2091  // If we find a trailing comment, iterate backwards to determine whether
2092  // it seems to relate to a specific parameter. If so, break before that
2093  // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2094  // to the previous line in:
2095  // SomeFunction(a,
2096  // b, // comment
2097  // c);
2098  if (!Current->HasUnescapedNewline) {
2099  for (FormatToken *Parameter = Current->Previous; Parameter;
2100  Parameter = Parameter->Previous) {
2101  if (Parameter->isOneOf(tok::comment, tok::r_brace))
2102  break;
2103  if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
2104  if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
2105  Parameter->HasUnescapedNewline)
2106  Parameter->MustBreakBefore = true;
2107  break;
2108  }
2109  }
2110  }
2111  } else if (Current->SpacesRequiredBefore == 0 &&
2112  spaceRequiredBefore(Line, *Current)) {
2113  Current->SpacesRequiredBefore = 1;
2114  }
2115 
2116  Current->MustBreakBefore =
2117  Current->MustBreakBefore || mustBreakBefore(Line, *Current);
2118 
2119  if (!Current->MustBreakBefore && InFunctionDecl &&
2120  Current->is(TT_FunctionDeclarationName))
2121  Current->MustBreakBefore = mustBreakForReturnType(Line);
2122 
2123  Current->CanBreakBefore =
2124  Current->MustBreakBefore || canBreakBefore(Line, *Current);
2125  unsigned ChildSize = 0;
2126  if (Current->Previous->Children.size() == 1) {
2127  FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
2128  ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
2129  : LastOfChild.TotalLength + 1;
2130  }
2131  const FormatToken *Prev = Current->Previous;
2132  if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
2133  (Prev->Children.size() == 1 &&
2134  Prev->Children[0]->First->MustBreakBefore) ||
2135  Current->IsMultiline)
2136  Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
2137  else
2138  Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
2139  ChildSize + Current->SpacesRequiredBefore;
2140 
2141  if (Current->is(TT_CtorInitializerColon))
2142  InFunctionDecl = false;
2143 
2144  // FIXME: Only calculate this if CanBreakBefore is true once static
2145  // initializers etc. are sorted out.
2146  // FIXME: Move magic numbers to a better place.
2147 
2148  // Reduce penalty for aligning ObjC method arguments using the colon
2149  // alignment as this is the canonical way (still prefer fitting everything
2150  // into one line if possible). Trying to fit a whole expression into one
2151  // line should not force other line breaks (e.g. when ObjC method
2152  // expression is a part of other expression).
2153  Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
2155  Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
2156  if (Current->ParameterIndex == 1)
2157  Current->SplitPenalty += 5 * Current->BindingStrength;
2158  } else {
2159  Current->SplitPenalty += 20 * Current->BindingStrength;
2160  }
2161 
2162  Current = Current->Next;
2163  }
2164 
2165  calculateUnbreakableTailLengths(Line);
2166  unsigned IndentLevel = Line.Level;
2167  for (Current = Line.First; Current != nullptr; Current = Current->Next) {
2168  if (Current->Role)
2169  Current->Role->precomputeFormattingInfos(Current);
2170  if (Current->MatchingParen &&
2172  assert(IndentLevel > 0);
2173  --IndentLevel;
2174  }
2175  Current->IndentLevel = IndentLevel;
2176  if (Current->opensBlockOrBlockTypeList(Style))
2177  ++IndentLevel;
2178  }
2179 
2180  LLVM_DEBUG({ printDebugInfo(Line); });
2181 }
2182 
2183 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
2184  unsigned UnbreakableTailLength = 0;
2185  FormatToken *Current = Line.Last;
2186  while (Current) {
2187  Current->UnbreakableTailLength = UnbreakableTailLength;
2188  if (Current->CanBreakBefore ||
2189  Current->isOneOf(tok::comment, tok::string_literal)) {
2190  UnbreakableTailLength = 0;
2191  } else {
2192  UnbreakableTailLength +=
2193  Current->ColumnWidth + Current->SpacesRequiredBefore;
2194  }
2195  Current = Current->Previous;
2196  }
2197 }
2198 
2199 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2200  const FormatToken &Tok,
2201  bool InFunctionDecl) {
2202  const FormatToken &Left = *Tok.Previous;
2203  const FormatToken &Right = Tok;
2204 
2205  if (Left.is(tok::semi))
2206  return 0;
2207 
2209  if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2210  return 1;
2211  if (Right.is(Keywords.kw_implements))
2212  return 2;
2213  if (Left.is(tok::comma) && Left.NestingLevel == 0)
2214  return 3;
2215  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2216  if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2217  return 100;
2218  if (Left.is(TT_JsTypeColon))
2219  return 35;
2220  if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2221  (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2222  return 100;
2223  // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2224  if (Left.opensScope() && Right.closesScope())
2225  return 200;
2226  }
2227 
2228  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2229  return 1;
2230  if (Right.is(tok::l_square)) {
2232  return 1;
2233  if (Left.is(tok::r_square))
2234  return 200;
2235  // Slightly prefer formatting local lambda definitions like functions.
2236  if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2237  return 35;
2238  if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2239  TT_ArrayInitializerLSquare,
2240  TT_DesignatedInitializerLSquare, TT_AttributeSquare))
2241  return 500;
2242  }
2243 
2244  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2245  Right.is(tok::kw_operator)) {
2246  if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2247  return 3;
2248  if (Left.is(TT_StartOfName))
2249  return 110;
2250  if (InFunctionDecl && Right.NestingLevel == 0)
2252  return 200;
2253  }
2254  if (Right.is(TT_PointerOrReference))
2255  return 190;
2256  if (Right.is(TT_LambdaArrow))
2257  return 110;
2258  if (Left.is(tok::equal) && Right.is(tok::l_brace))
2259  return 160;
2260  if (Left.is(TT_CastRParen))
2261  return 100;
2262  if (Left.is(tok::coloncolon) ||
2263  (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2264  return 500;
2265  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2266  return 5000;
2267  if (Left.is(tok::comment))
2268  return 1000;
2269 
2270  if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2271  TT_CtorInitializerColon))
2272  return 2;
2273 
2274  if (Right.isMemberAccess()) {
2275  // Breaking before the "./->" of a chained call/member access is reasonably
2276  // cheap, as formatting those with one call per line is generally
2277  // desirable. In particular, it should be cheaper to break before the call
2278  // than it is to break inside a call's parameters, which could lead to weird
2279  // "hanging" indents. The exception is the very last "./->" to support this
2280  // frequent pattern:
2281  //
2282  // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2283  // dddddddd);
2284  //
2285  // which might otherwise be blown up onto many lines. Here, clang-format
2286  // won't produce "hanging" indents anyway as there is no other trailing
2287  // call.
2288  //
2289  // Also apply higher penalty is not a call as that might lead to a wrapping
2290  // like:
2291  //
2292  // aaaaaaa
2293  // .aaaaaaaaa.bbbbbbbb(cccccccc);
2294  return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2295  ? 150
2296  : 35;
2297  }
2298 
2299  if (Right.is(TT_TrailingAnnotation) &&
2300  (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2301  // Moving trailing annotations to the next line is fine for ObjC method
2302  // declarations.
2303  if (Line.startsWith(TT_ObjCMethodSpecifier))
2304  return 10;
2305  // Generally, breaking before a trailing annotation is bad unless it is
2306  // function-like. It seems to be especially preferable to keep standard
2307  // annotations (i.e. "const", "final" and "override") on the same line.
2308  // Use a slightly higher penalty after ")" so that annotations like
2309  // "const override" are kept together.
2310  bool is_short_annotation = Right.TokenText.size() < 10;
2311  return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2312  }
2313 
2314  // In for-loops, prefer breaking at ',' and ';'.
2315  if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2316  return 4;
2317 
2318  // In Objective-C method expressions, prefer breaking before "param:" over
2319  // breaking after it.
2320  if (Right.is(TT_SelectorName))
2321  return 0;
2322  if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2323  return Line.MightBeFunctionDecl ? 50 : 500;
2324 
2325  // In Objective-C type declarations, avoid breaking after the category's
2326  // open paren (we'll prefer breaking after the protocol list's opening
2327  // angle bracket, if present).
2328  if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
2329  Left.Previous->isOneOf(tok::identifier, tok::greater))
2330  return 500;
2331 
2332  if (Left.is(tok::l_paren) && InFunctionDecl &&
2334  return 100;
2335  if (Left.is(tok::l_paren) && Left.Previous &&
2336  (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) ||
2337  Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
2338  return 1000;
2339  if (Left.is(tok::equal) && InFunctionDecl)
2340  return 110;
2341  if (Right.is(tok::r_brace))
2342  return 1;
2343  if (Left.is(TT_TemplateOpener))
2344  return 100;
2345  if (Left.opensScope()) {
2347  return 0;
2348  if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
2349  return 19;
2351  : 19;
2352  }
2353  if (Left.is(TT_JavaAnnotation))
2354  return 50;
2355 
2356  if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2357  Left.Previous->isLabelString() &&
2358  (Left.NextOperator || Left.OperatorIndex != 0))
2359  return 50;
2360  if (Right.is(tok::plus) && Left.isLabelString() &&
2361  (Right.NextOperator || Right.OperatorIndex != 0))
2362  return 25;
2363  if (Left.is(tok::comma))
2364  return 1;
2365  if (Right.is(tok::lessless) && Left.isLabelString() &&
2366  (Right.NextOperator || Right.OperatorIndex != 1))
2367  return 25;
2368  if (Right.is(tok::lessless)) {
2369  // Breaking at a << is really cheap.
2370  if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2371  // Slightly prefer to break before the first one in log-like statements.
2372  return 2;
2373  return 1;
2374  }
2375  if (Left.ClosesTemplateDeclaration)
2377  if (Left.is(TT_ConditionalExpr))
2378  return prec::Conditional;
2379  prec::Level Level = Left.getPrecedence();
2380  if (Level == prec::Unknown)
2381  Level = Right.getPrecedence();
2382  if (Level == prec::Assignment)
2384  if (Level != prec::Unknown)
2385  return Level;
2386 
2387  return 3;
2388 }
2389 
2390 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2391  const FormatToken &Left,
2392  const FormatToken &Right) {
2393  if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2394  return true;
2395  if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2396  return true;
2398  Left.Tok.getObjCKeywordID() == tok::objc_property)
2399  return true;
2400  if (Right.is(tok::hashhash))
2401  return Left.is(tok::hash);
2402  if (Left.isOneOf(tok::hashhash, tok::hash))
2403  return Right.is(tok::hash);
2404  if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
2406  if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2407  return (Right.is(TT_CastRParen) ||
2408  (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2411  if (Right.isOneOf(tok::semi, tok::comma))
2412  return false;
2413  if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
2414  bool IsLightweightGeneric =
2415  Right.MatchingParen && Right.MatchingParen->Next &&
2416  Right.MatchingParen->Next->is(tok::colon);
2417  return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
2418  }
2419  if (Right.is(tok::less) && Left.is(tok::kw_template))
2421  if (Left.isOneOf(tok::exclaim, tok::tilde))
2422  return false;
2423  if (Left.is(tok::at) &&
2424  Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2425  tok::numeric_constant, tok::l_paren, tok::l_brace,
2426  tok::kw_true, tok::kw_false))
2427  return false;
2428  if (Left.is(tok::colon))
2429  return !Left.is(TT_ObjCMethodExpr);
2430  if (Left.is(tok::coloncolon))
2431  return false;
2432  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
2435  (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
2436  // Format empty list as `<>`.
2437  if (Left.is(tok::less) && Right.is(tok::greater))
2438  return false;
2439  return !Style.Cpp11BracedListStyle;
2440  }
2441  return false;
2442  }
2443  if (Right.is(tok::ellipsis))
2444  return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2445  Left.Previous->is(tok::kw_case));
2446  if (Left.is(tok::l_square) && Right.is(tok::amp))
2447  return false;
2448  if (Right.is(TT_PointerOrReference)) {
2449  if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
2450  if (!Left.MatchingParen)
2451  return true;
2452  FormatToken *TokenBeforeMatchingParen =
2454  if (!TokenBeforeMatchingParen ||
2455  !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
2456  return true;
2457  }
2458  return (Left.Tok.isLiteral() ||
2459  (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2461  (Line.IsMultiVariableDeclStmt &&
2462  (Left.NestingLevel == 0 ||
2463  (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2464  }
2465  if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2466  (!Left.is(TT_PointerOrReference) ||
2468  !Line.IsMultiVariableDeclStmt)))
2469  return true;
2470  if (Left.is(TT_PointerOrReference))
2471  return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
2472  (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
2473  !Right.is(TT_StartOfName)) ||
2474  (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2475  (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2476  tok::l_paren) &&
2478  !Line.IsMultiVariableDeclStmt) &&
2479  Left.Previous &&
2480  !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2481  if (Right.is(tok::star) && Left.is(tok::l_paren))
2482  return false;
2483  const auto SpaceRequiredForArrayInitializerLSquare =
2484  [](const FormatToken &LSquareTok, const FormatStyle &Style) {
2489  LSquareTok.endsSequence(tok::l_square, tok::colon,
2490  TT_SelectorName));
2491  };
2492  if (Left.is(tok::l_square))
2493  return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
2494  SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
2495  (Left.isOneOf(TT_ArraySubscriptLSquare,
2496  TT_StructuredBindingLSquare) &&
2497  Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
2498  if (Right.is(tok::r_square))
2499  return Right.MatchingParen &&
2500  ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
2501  SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
2502  Style)) ||
2504  Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
2505  TT_StructuredBindingLSquare)) ||
2506  Right.MatchingParen->is(TT_AttributeParen));
2507  if (Right.is(tok::l_square) &&
2508  !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2509  TT_DesignatedInitializerLSquare,
2510  TT_StructuredBindingLSquare, TT_AttributeSquare) &&
2511  !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2512  return false;
2513  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2514  return !Left.Children.empty(); // No spaces in "{}".
2515  if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2516  (Right.is(tok::r_brace) && Right.MatchingParen &&
2517  Right.MatchingParen->BlockKind != BK_Block))
2518  return !Style.Cpp11BracedListStyle;
2519  if (Left.is(TT_BlockComment))
2520  return !Left.TokenText.endswith("=*/");
2521  if (Right.is(tok::l_paren)) {
2522  if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
2523  (Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
2524  return true;
2525  return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2527  (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
2528  tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2529  TT_ObjCForIn) ||
2530  Left.endsSequence(tok::kw_constexpr, tok::kw_if) ||
2531  (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2532  tok::kw_new, tok::kw_delete) &&
2533  (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2535  (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2536  Left.is(tok::r_paren)) &&
2537  Line.Type != LT_PreprocessorDirective);
2538  }
2539  if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2540  return false;
2541  if (Right.is(TT_UnaryOperator))
2542  return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2543  (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2544  if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2545  tok::r_paren) ||
2546  Left.isSimpleTypeSpecifier()) &&
2547  Right.is(tok::l_brace) && Right.getNextNonComment() &&
2548  Right.BlockKind != BK_Block)
2549  return false;
2550  if (Left.is(tok::period) || Right.is(tok::period))
2551  return false;
2552  if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2553  return false;
2554  if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2555  Left.MatchingParen->Previous &&
2556  Left.MatchingParen->Previous->is(tok::period))
2557  // A.<B<C<...>>>DoSomething();
2558  return false;
2559  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2560  return false;
2561  if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at))
2562  // Objective-C dictionary literal -> no space after opening brace.
2563  return false;
2564  if (Right.is(tok::r_brace) && Right.MatchingParen &&
2565  Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at))
2566  // Objective-C dictionary literal -> no space before closing brace.
2567  return false;
2568  return true;
2569 }
2570 
2571 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2572  const FormatToken &Right) {
2573  const FormatToken &Left = *Right.Previous;
2574  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2575  return true; // Never ever merge two identifiers.
2576  if (Style.isCpp()) {
2577  if (Left.is(tok::kw_operator))
2578  return Right.is(tok::coloncolon);
2579  if (Right.is(tok::l_brace) && Right.BlockKind == BK_BracedInit &&
2581  return true;
2582  } else if (Style.Language == FormatStyle::LK_Proto ||
2584  if (Right.is(tok::period) &&
2585  Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2586  Keywords.kw_repeated, Keywords.kw_extend))
2587  return true;
2588  if (Right.is(tok::l_paren) &&
2589  Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2590  return true;
2591  if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
2592  return true;
2593  // Slashes occur in text protocol extension syntax: [type/type] { ... }.
2594  if (Left.is(tok::slash) || Right.is(tok::slash))
2595  return false;
2596  if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
2597  Right.isOneOf(tok::l_brace, tok::less))
2598  return !Style.Cpp11BracedListStyle;
2599  // A percent is probably part of a formatting specification, such as %lld.
2600  if (Left.is(tok::percent))
2601  return false;
2602  // Preserve the existence of a space before a percent for cases like 0x%04x
2603  // and "%d %d"
2604  if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
2605  return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin();
2606  } else if (Style.Language == FormatStyle::LK_JavaScript) {
2607  if (Left.is(TT_JsFatArrow))
2608  return true;
2609  // for await ( ...
2610  if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
2611  Left.Previous->is(tok::kw_for))
2612  return true;
2613  if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
2614  Right.MatchingParen) {
2615  const FormatToken *Next = Right.MatchingParen->getNextNonComment();
2616  // An async arrow function, for example: `x = async () => foo();`,
2617  // as opposed to calling a function called async: `x = async();`
2618  if (Next && Next->is(TT_JsFatArrow))
2619  return true;
2620  }
2621  if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2622  (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2623  return false;
2624  // In tagged template literals ("html`bar baz`"), there is no space between
2625  // the tag identifier and the template string. getIdentifierInfo makes sure
2626  // that the identifier is not a pseudo keyword like `yield`, either.
2627  if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
2628  Right.is(TT_TemplateString))
2629  return false;
2630  if (Right.is(tok::star) &&
2631  Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2632  return false;
2633  if (Right.isOneOf(tok::l_brace, tok::l_square) &&
2634  Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
2635  Keywords.kw_extends, Keywords.kw_implements))
2636  return true;
2637  if (Right.is(tok::l_paren)) {
2638  // JS methods can use some keywords as names (e.g. `delete()`).
2639  if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
2640  return false;
2641  // Valid JS method names can include keywords, e.g. `foo.delete()` or
2642  // `bar.instanceof()`. Recognize call positions by preceding period.
2643  if (Left.Previous && Left.Previous->is(tok::period) &&
2644  Left.Tok.getIdentifierInfo())
2645  return false;
2646  // Additional unary JavaScript operators that need a space after.
2647  if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
2648  tok::kw_void))
2649  return true;
2650  }
2651  if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2652  tok::kw_const) ||
2653  // "of" is only a keyword if it appears after another identifier
2654  // (e.g. as "const x of y" in a for loop), or after a destructuring
2655  // operation (const [x, y] of z, const {a, b} of c).
2656  (Left.is(Keywords.kw_of) && Left.Previous &&
2657  (Left.Previous->Tok.is(tok::identifier) ||
2658  Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
2659  (!Left.Previous || !Left.Previous->is(tok::period)))
2660  return true;
2661  if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
2662  Left.Previous->is(tok::period) && Right.is(tok::l_paren))
2663  return false;
2664  if (Left.is(Keywords.kw_as) &&
2665  Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
2666  return true;
2667  if (Left.is(tok::kw_default) && Left.Previous &&
2668  Left.Previous->is(tok::kw_export))
2669  return true;
2670  if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2671  return true;
2672  if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2673  return false;
2674  if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2675  return false;
2676  if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2677  Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2678  return false;
2679  if (Left.is(tok::ellipsis))
2680  return false;
2681  if (Left.is(TT_TemplateCloser) &&
2682  !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2683  Keywords.kw_implements, Keywords.kw_extends))
2684  // Type assertions ('<type>expr') are not followed by whitespace. Other
2685  // locations that should have whitespace following are identified by the
2686  // above set of follower tokens.
2687  return false;
2688  if (Right.is(TT_JsNonNullAssertion))
2689  return false;
2690  if (Left.is(TT_JsNonNullAssertion) &&
2691  Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
2692  return true; // "x! as string", "x! in y"
2693  } else if (Style.Language == FormatStyle::LK_Java) {
2694  if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2695  return true;
2696  if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2698  if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2699  tok::kw_protected) ||
2700  Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2701  Keywords.kw_native)) &&
2702  Right.is(TT_TemplateOpener))
2703  return true;
2704  }
2705  if (Left.is(TT_ImplicitStringLiteral))
2706  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2707  if (Line.Type == LT_ObjCMethodDecl) {
2708  if (Left.is(TT_ObjCMethodSpecifier))
2709  return true;
2710  if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
2711  // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
2712  // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
2713  // method declaration.
2714  return false;
2715  }
2716  if (Line.Type == LT_ObjCProperty &&
2717  (Right.is(tok::equal) || Left.is(tok::equal)))
2718  return false;
2719 
2720  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2721  Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2722  return true;
2723  if (Right.is(TT_OverloadedOperatorLParen))
2725  if (Left.is(tok::comma))
2726  return true;
2727  if (Right.is(tok::comma))
2728  return false;
2729  if (Right.is(TT_ObjCBlockLParen))
2730  return true;
2731  if (Right.is(TT_CtorInitializerColon))
2733  if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
2734  return false;
2735  if (Right.is(TT_RangeBasedForLoopColon) &&
2737  return false;
2738  if (Right.is(tok::colon)) {
2739  if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2740  !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2741  return false;
2742  if (Right.is(TT_ObjCMethodExpr))
2743  return false;
2744  if (Left.is(tok::question))
2745  return false;
2746  if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2747  return false;
2748  if (Right.is(TT_DictLiteral))
2750  if (Right.is(TT_AttributeColon))
2751  return false;
2752  return true;
2753  }
2754  if (Left.is(TT_UnaryOperator))
2755  return Right.is(TT_BinaryOperator);
2756 
2757  // If the next token is a binary operator or a selector name, we have
2758  // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2759  if (Left.is(TT_CastRParen))
2760  return Style.SpaceAfterCStyleCast ||
2761  Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2762 
2763  if (Left.is(tok::greater) && Right.is(tok::greater)) {
2765  (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
2766  return !Style.Cpp11BracedListStyle;
2767  return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2769  }
2770  if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
2771  Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2772  (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
2773  return false;
2775  Right.getPrecedence() == prec::Assignment)
2776  return false;
2777  if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
2778  // Generally don't remove existing spaces between an identifier and "::".
2779  // The identifier might actually be a macro name such as ALWAYS_INLINE. If
2780  // this turns out to be too lenient, add analysis of the identifier itself.
2781  return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2782  if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2783  return (Left.is(TT_TemplateOpener) &&
2785  !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
2786  tok::kw___super, TT_TemplateCloser,
2787  TT_TemplateOpener)) ||
2788  (Left.is(tok ::l_paren) && Style.SpacesInParentheses);
2789  if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2790  return Style.SpacesInAngles;
2791  // Space before TT_StructuredBindingLSquare.
2792  if (Right.is(TT_StructuredBindingLSquare))
2793  return !Left.isOneOf(tok::amp, tok::ampamp) ||
2795  // Space before & or && following a TT_StructuredBindingLSquare.
2796  if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
2797  Right.isOneOf(tok::amp, tok::ampamp))
2799  if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2800  (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2801  !Right.is(tok::r_paren)))
2802  return true;
2803  if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2804  Right.isNot(TT_FunctionTypeLParen))
2806  if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2807  Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2808  return false;
2809  if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2810  Line.startsWith(tok::hash))
2811  return true;
2812  if (Right.is(TT_TrailingUnaryOperator))
2813  return false;
2814  if (Left.is(TT_RegexLiteral))
2815  return false;
2816  return spaceRequiredBetween(Line, Left, Right);
2817 }
2818 
2819 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2820 static bool isAllmanBrace(const FormatToken &Tok) {
2821  return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2822  !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2823 }
2824 
2825 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2826  const FormatToken &Right) {
2827  const FormatToken &Left = *Right.Previous;
2828  if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2829  return true;
2830 
2832  // FIXME: This might apply to other languages and token kinds.
2833  if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2834  Left.Previous->is(tok::string_literal))
2835  return true;
2836  if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2837  Left.Previous && Left.Previous->is(tok::equal) &&
2838  Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2839  tok::kw_const) &&
2840  // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2841  // above.
2842  !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2843  // Object literals on the top level of a file are treated as "enum-style".
2844  // Each key/value pair is put on a separate line, instead of bin-packing.
2845  return true;
2846  if (Left.is(tok::l_brace) && Line.Level == 0 &&
2847  (Line.startsWith(tok::kw_enum) ||
2848  Line.startsWith(tok::kw_const, tok::kw_enum) ||
2849  Line.startsWith(tok::kw_export, tok::kw_enum) ||
2850  Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
2851  // JavaScript top-level enum key/value pairs are put on separate lines
2852  // instead of bin-packing.
2853  return true;
2854  if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2855  !Left.Children.empty())
2856  // Support AllowShortFunctionsOnASingleLine for JavaScript.
2859  (Left.NestingLevel == 0 && Line.Level == 0 &&
2862  } else if (Style.Language == FormatStyle::LK_Java) {
2863  if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2864  Right.Next->is(tok::string_literal))
2865  return true;
2866  } else if (Style.Language == FormatStyle::LK_Cpp ||
2870  if (Left.isStringLiteral() && Right.isStringLiteral())
2871  return true;
2872  }
2873 
2874  // If the last token before a '}', ']', or ')' is a comma or a trailing
2875  // comment, the intention is to insert a line break after it in order to make
2876  // shuffling around entries easier. Import statements, especially in
2877  // JavaScript, can be an exception to this rule.
2879  const FormatToken *BeforeClosingBrace = nullptr;
2880  if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
2882  Left.is(tok::l_paren))) &&
2883  Left.BlockKind != BK_Block && Left.MatchingParen)
2884  BeforeClosingBrace = Left.MatchingParen->Previous;
2885  else if (Right.MatchingParen &&
2886  (Right.MatchingParen->isOneOf(tok::l_brace,
2887  TT_ArrayInitializerLSquare) ||
2889  Right.MatchingParen->is(tok::l_paren))))
2890  BeforeClosingBrace = &Left;
2891  if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2892  BeforeClosingBrace->isTrailingComment()))
2893  return true;
2894  }
2895 
2896  if (Right.is(tok::comment))
2897  return Left.BlockKind != BK_BracedInit &&
2898  Left.isNot(TT_CtorInitializerColon) &&
2899  (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2900  if (Left.isTrailingComment())
2901  return true;
2902  if (Right.Previous->IsUnterminatedLiteral)
2903  return true;
2904  if (Right.is(tok::lessless) && Right.Next &&
2905  Right.Previous->is(tok::string_literal) &&
2906  Right.Next->is(tok::string_literal))
2907  return true;
2908  if (Right.Previous->ClosesTemplateDeclaration &&
2909  Right.Previous->MatchingParen &&
2910  Right.Previous->MatchingParen->NestingLevel == 0 &&
2912  return true;
2913  if (Right.is(TT_CtorInitializerComma) &&
2916  return true;
2917  if (Right.is(TT_CtorInitializerColon) &&
2920  return true;
2921  // Break only if we have multiple inheritance.
2923  Right.is(TT_InheritanceComma))
2924  return true;
2925  if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2926  // Multiline raw string literals are special wrt. line breaks. The author
2927  // has made a deliberate choice and might have aligned the contents of the
2928  // string literal accordingly. Thus, we try keep existing line breaks.
2929  return Right.IsMultiline && Right.NewlinesBefore > 0;
2930  if ((Right.Previous->is(tok::l_brace) ||
2931  (Right.Previous->is(tok::less) && Right.Previous->Previous &&
2932  Right.Previous->Previous->is(tok::equal))) &&
2933  Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
2934  // Don't put enums or option definitions onto single lines in protocol
2935  // buffers.
2936  return true;
2937  }
2938  if (Right.is(TT_InlineASMBrace))
2939  return Right.HasUnescapedNewline;
2940  if (isAllmanBrace(Left) || isAllmanBrace(Right))
2941  return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2942  (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
2944  (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2945  (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2946  if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2947  return true;
2948 
2951  Left.is(TT_LeadingJavaAnnotation) &&
2952  Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2953  (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2954  return true;
2955 
2956  if (Right.is(TT_ProtoExtensionLSquare))
2957  return true;
2958 
2959  // In text proto instances if a submessage contains at least 2 entries and at
2960  // least one of them is a submessage, like A { ... B { ... } ... },
2961  // put all of the entries of A on separate lines by forcing the selector of
2962  // the submessage B to be put on a newline.
2963  //
2964  // Example: these can stay on one line:
2965  // a { scalar_1: 1 scalar_2: 2 }
2966  // a { b { key: value } }
2967  //
2968  // and these entries need to be on a new line even if putting them all in one
2969  // line is under the column limit:
2970  // a {
2971  // scalar: 1
2972  // b { key: value }
2973  // }
2974  //
2975  // We enforce this by breaking before a submessage field that has previous
2976  // siblings, *and* breaking before a field that follows a submessage field.
2977  //
2978  // Be careful to exclude the case [proto.ext] { ... } since the `]` is
2979  // the TT_SelectorName there, but we don't want to break inside the brackets.
2980  //
2981  // Another edge case is @submessage { key: value }, which is a common
2982  // substitution placeholder. In this case we want to keep `@` and `submessage`
2983  // together.
2984  //
2985  // We ensure elsewhere that extensions are always on their own line.
2988  Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
2989  // Keep `@submessage` together in:
2990  // @submessage { key: value }
2991  if (Right.Previous && Right.Previous->is(tok::at))
2992  return false;
2993  // Look for the scope opener after selector in cases like:
2994  // selector { ...
2995  // selector: { ...
2996  // selector: @base { ...
2997  FormatToken *LBrace = Right.Next;
2998  if (LBrace && LBrace->is(tok::colon)) {
2999  LBrace = LBrace->Next;
3000  if (LBrace && LBrace->is(tok::at)) {
3001  LBrace = LBrace->Next;
3002  if (LBrace)
3003  LBrace = LBrace->Next;
3004  }
3005  }
3006  if (LBrace &&
3007  // The scope opener is one of {, [, <:
3008  // selector { ... }
3009  // selector [ ... ]
3010  // selector < ... >
3011  //
3012  // In case of selector { ... }, the l_brace is TT_DictLiteral.
3013  // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
3014  // so we check for immediately following r_brace.
3015  ((LBrace->is(tok::l_brace) &&
3016  (LBrace->is(TT_DictLiteral) ||
3017  (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
3018  LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
3019  // If Left.ParameterCount is 0, then this submessage entry is not the
3020  // first in its parent submessage, and we want to break before this entry.
3021  // If Left.ParameterCount is greater than 0, then its parent submessage
3022  // might contain 1 or more entries and we want to break before this entry
3023  // if it contains at least 2 entries. We deal with this case later by
3024  // detecting and breaking before the next entry in the parent submessage.
3025  if (Left.ParameterCount == 0)
3026  return true;
3027  // However, if this submessage is the first entry in its parent
3028  // submessage, Left.ParameterCount might be 1 in some cases.
3029  // We deal with this case later by detecting an entry
3030  // following a closing paren of this submessage.
3031  }
3032 
3033  // If this is an entry immediately following a submessage, it will be
3034  // preceded by a closing paren of that submessage, like in:
3035  // left---. .---right
3036  // v v
3037  // sub: { ... } key: value
3038  // If there was a comment between `}` an `key` above, then `key` would be
3039  // put on a new line anyways.
3040  if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
3041  return true;
3042  }
3043 
3044  return false;
3045 }
3046 
3047 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
3048  const FormatToken &Right) {
3049  const FormatToken &Left = *Right.Previous;
3050 
3051  // Language-specific stuff.
3053  if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3054  Keywords.kw_implements))
3055  return false;
3056  if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3057  Keywords.kw_implements))
3058  return true;
3059  } else if (Style.Language == FormatStyle::LK_JavaScript) {
3060  const FormatToken *NonComment = Right.getPreviousNonComment();
3061  if (NonComment &&
3062  NonComment->isOneOf(
3063  tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
3064  tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
3065  tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
3066  Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
3067  Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
3068  return false; // Otherwise automatic semicolon insertion would trigger.
3069  if (Right.NestingLevel == 0 &&
3070  (Left.Tok.getIdentifierInfo() ||
3071  Left.isOneOf(tok::r_square, tok::r_paren)) &&
3072  Right.isOneOf(tok::l_square, tok::l_paren))
3073  return false; // Otherwise automatic semicolon insertion would trigger.
3074  if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
3075  return false;
3076  if (Left.is(TT_JsTypeColon))
3077  return true;
3078  // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
3079  if (Left.is(tok::exclaim) && Right.is(tok::colon))
3080  return false;
3081  if (Right.is(Keywords.kw_is))
3082  return false;
3083  if (Left.is(Keywords.kw_in))
3085  if (Right.is(Keywords.kw_in))
3087  if (Right.is(Keywords.kw_as))
3088  return false; // must not break before as in 'x as type' casts
3089  if (Left.is(Keywords.kw_as))
3090  return true;
3091  if (Left.is(TT_JsNonNullAssertion))
3092  return true;
3093  if (Left.is(Keywords.kw_declare) &&
3094  Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
3095  Keywords.kw_function, tok::kw_class, tok::kw_enum,
3096  Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
3097  Keywords.kw_let, tok::kw_const))
3098  // See grammar for 'declare' statements at:
3099  // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
3100  return false;
3101  if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
3102  Right.isOneOf(tok::identifier, tok::string_literal))
3103  return false; // must not break in "module foo { ...}"
3104  if (Right.is(TT_TemplateString) && Right.closesScope())
3105  return false;
3106  if (Left.is(TT_TemplateString) && Left.opensScope())
3107  return true;
3108  }
3109 
3110  if (Left.is(tok::at))
3111  return false;
3112  if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
3113  return false;
3114  if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
3115  return !Right.is(tok::l_paren);
3116  if (Right.is(TT_PointerOrReference))
3117  return Line.IsMultiVariableDeclStmt ||
3119  (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
3120  if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3121  Right.is(tok::kw_operator))
3122  return true;
3123  if (Left.is(TT_PointerOrReference))
3124  return false;
3125  if (Right.isTrailingComment())
3126  // We rely on MustBreakBefore being set correctly here as we should not
3127  // change the "binding" behavior of a comment.
3128  // The first comment in a braced lists is always interpreted as belonging to
3129  // the first list element. Otherwise, it should be placed outside of the
3130  // list.
3131  return Left.BlockKind == BK_BracedInit ||
3132  (Left.is(TT_CtorInitializerColon) &&
3134  if (Left.is(tok::question) && Right.is(tok::colon))
3135  return false;
3136  if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
3138  if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
3140  if (Left.is(TT_InheritanceColon))
3142  if (Right.is(TT_InheritanceColon))
3144  if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
3145  Left.isNot(TT_SelectorName))
3146  return true;
3147 
3148  if (Right.is(tok::colon) &&
3149  !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
3150  return false;
3151  if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
3155  return false;
3156  // Prevent cases like:
3157  //
3158  // submessage:
3159  // { key: valueeeeeeeeeeee }
3160  //
3161  // when the snippet does not fit into one line.
3162  // Prefer:
3163  //
3164  // submessage: {
3165  // key: valueeeeeeeeeeee
3166  // }
3167  //
3168  // instead, even if it is longer by one line.
3169  //
3170  // Note that this allows allows the "{" to go over the column limit
3171  // when the column limit is just between ":" and "{", but that does
3172  // not happen too often and alternative formattings in this case are
3173  // not much better.
3174  //
3175  // The code covers the cases:
3176  //
3177  // submessage: { ... }
3178  // submessage: < ... >
3179  // repeated: [ ... ]
3180  if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
3181  Right.is(TT_DictLiteral)) ||
3182  Right.is(TT_ArrayInitializerLSquare))
3183  return false;
3184  }
3185  return true;
3186  }
3187  if (Right.is(tok::r_square) && Right.MatchingParen &&
3188  Right.MatchingParen->is(TT_ProtoExtensionLSquare))
3189  return false;
3190  if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
3191  Right.Next->is(TT_ObjCMethodExpr)))
3192  return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
3193  if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
3194  return true;
3195  if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
3196  return true;
3197  if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
3198  TT_OverloadedOperator))
3199  return false;
3200  if (Left.is(TT_RangeBasedForLoopColon))
3201  return true;
3202  if (Right.is(TT_RangeBasedForLoopColon))
3203  return false;
3204  if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
3205  return true;
3206  if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
3207  Left.is(tok::kw_operator))
3208  return false;
3209  if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
3210  Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
3211  return false;
3212  if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
3214  return false;
3215  if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
3216  return false;
3217  if (Left.is(tok::l_paren) && Left.Previous &&
3218  (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
3219  return false;
3220  if (Right.is(TT_ImplicitStringLiteral))
3221  return false;
3222 
3223  if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
3224  return false;
3225  if (Right.is(tok::r_square) && Right.MatchingParen &&
3226  Right.MatchingParen->is(TT_LambdaLSquare))
3227  return false;
3228 
3229  // We only break before r_brace if there was a corresponding break before
3230  // the l_brace, which is tracked by BreakBeforeClosingBrace.
3231  if (Right.is(tok::r_brace))
3232  return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
3233 
3234  // Allow breaking after a trailing annotation, e.g. after a method
3235  // declaration.
3236  if (Left.is(TT_TrailingAnnotation))
3237  return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
3238  tok::less, tok::coloncolon);
3239 
3240  if (Right.is(tok::kw___attribute) ||
3241  (Right.is(tok::l_square) && Right.is(TT_AttributeSquare)))
3242  return true;
3243 
3244  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
3245  return true;
3246 
3247  if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3248  return true;
3249 
3250  if (Left.is(TT_CtorInitializerColon))
3252  if (Right.is(TT_CtorInitializerColon))
3254  if (Left.is(TT_CtorInitializerComma) &&
3256  return false;
3257  if (Right.is(TT_CtorInitializerComma) &&
3259  return true;
3260  if (Left.is(TT_InheritanceComma) &&
3262  return false;
3263  if (Right.is(TT_InheritanceComma) &&
3265  return true;
3266  if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
3267  (Left.is(tok::less) && Right.is(tok::less)))
3268  return false;
3269  if (Right.is(TT_BinaryOperator) &&
3272  Right.getPrecedence() != prec::Assignment))
3273  return true;
3274  if (Left.is(TT_ArrayInitializerLSquare))
3275  return true;
3276  if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
3277  return true;
3278  if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
3279  !Left.isOneOf(tok::arrowstar, tok::lessless) &&
3282  Left.getPrecedence() == prec::Assignment))
3283  return true;
3284  if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
3285  (Left.is(tok::r_square) && Right.is(TT_AttributeSquare)))
3286  return false;
3287  return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
3288  tok::kw_class, tok::kw_struct, tok::comment) ||
3289  Right.isMemberAccess() ||
3290  Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
3291  tok::colon, tok::l_square, tok::at) ||
3292  (Left.is(tok::r_paren) &&
3293  Right.isOneOf(tok::identifier, tok::kw_const)) ||
3294  (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
3295  (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
3296 }
3297 
3298 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
3299  llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
3300  const FormatToken *Tok = Line.First;
3301  while (Tok) {
3302  llvm::errs() << " M=" << Tok->MustBreakBefore
3303  << " C=" << Tok->CanBreakBefore
3304  << " T=" << getTokenTypeName(Tok->Type)
3305  << " S=" << Tok->SpacesRequiredBefore
3306  << " B=" << Tok->BlockParameterCount
3307  << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
3308  << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
3309  << " PPK=" << Tok->PackingKind << " FakeLParens=";
3310  for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
3311  llvm::errs() << Tok->FakeLParens[i] << "/";
3312  llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
3313  llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
3314  llvm::errs() << " Text='" << Tok->TokenText << "'\n";
3315  if (!Tok->Next)
3316  assert(Tok == Line.Last);
3317  Tok = Tok->Next;
3318  }
3319  llvm::errs() << "----\n";
3320 }
3321 
3322 } // namespace format
3323 } // namespace clang
Always break after template declaration.
Definition: Format.h:387
Always break after the return type of top-level definitions.
Definition: Format.h:330
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers...
Definition: FormatToken.h:346
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:231
Break inheritance list after the colon and commas.
Definition: Format.h:922
bool AlwaysBreakBeforeMultilineStrings
If true, always break before multiline string literals.
Definition: Format.h:352
unsigned PenaltyBreakBeforeFirstCallParameter
The penalty for breaking a function call after call(.
Definition: Format.h:1338
Token Tok
The Token.
Definition: FormatToken.h:127
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1191
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
Defines the SourceManager interface.
bool SpaceBeforeRangeBasedForLoopColon
If false, spaces will be removed before range-based for loop colon.
Definition: Format.h:1570
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token&#39;s formatting...
Definition: FormatToken.h:207
Break constructor initializers before the colon and commas, and align the commas with the colon...
Definition: Format.h:855
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:218
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
bool isMemberAccess() const
Returns true if this is a "." or "->" accessing a member.
Definition: FormatToken.h:394
bool isFunctionLikeKeyword() const
Returns true if this is a keyword that can be used like a function call (e.g.
Definition: FormatToken.h:428
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:484
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:57
bool SpaceAfterTemplateKeyword
If true, a space will be inserted after the &#39;template&#39; keyword.
Definition: Format.h:1490
PointerAlignmentStyle PointerAlignment
Pointer and reference alignment style.
Definition: Format.h:1379
Align pointer to the left.
Definition: Format.h:1365
Should be used for C, C++.
Definition: Format.h:1182
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:156
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:664
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:214
bool isBinaryOperator() const
Definition: FormatToken.h:416
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:133
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:296
Break after operators.
Definition: Format.h:449
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:113
bool CanBeExpression
tok::TokenKind ContextKind
unsigned UnbreakableTailLength
The length of following tokens until the next natural split point, or the next token that can be brok...
Definition: FormatToken.h:222
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:384
bool JavaScriptWrapImports
Whether to wrap JavaScript import/export statements.
Definition: Format.h:1161
Always break after the return type.
Definition: Format.h:290
unsigned SplitPenalty
Penalty for inserting a line break before this token.
Definition: FormatToken.h:237
bool ColonIsForRangeExpr
prec::Level getPrecedence() const
Definition: FormatToken.h:470
unsigned ParameterCount
Number of parameters, if this is "(", "[" or "<".
Definition: FormatToken.h:195
unsigned FakeRParens
Insert this many fake ) after this token for correct indentation.
Definition: FormatToken.h:263
bool SpaceInEmptyParentheses
If true, spaces may be inserted into ().
Definition: Format.h:1582
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:185
Should be used for Java.
Definition: Format.h:1184
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:293
Always break after the return type of function definitions.
Definition: Format.h:318
Only merge functions defined inside a class.
Definition: Format.h:209
This file implements a token annotator, i.e.
bool BreakAfterJavaFieldAnnotations
Break after each annotation on a field in Java files.
Definition: Format.h:875
bool ConstructorInitializerAllOnOneLineOrOnePerLine
If the constructor initializers don&#39;t fit on a line, put each initializer on its own line...
Definition: Format.h:967
unsigned OperatorIndex
Is this is an operator (or "."/"->") in a sequence of operators with the same precedence, contains the 0-based operator index.
Definition: FormatToken.h:273
unsigned SpacesRequiredBefore
The number of spaces that should be inserted before this token.
Definition: FormatToken.h:182
bool isNot(T Kind) const
Definition: FormatToken.h:326
bool SpacesInParentheses
If true, spaces will be inserted after ( and before ).
Definition: Format.h:1630
const FormatToken & Tok
ReturnTypeBreakingStyle AlwaysBreakAfterReturnType
The function declaration return type breaking style to use.
Definition: Format.h:338
unsigned BlockParameterCount
Number of parameters that are nested blocks, if this is "(", "[" or "<".
Definition: FormatToken.h:199
bool InInheritanceList
bool SpaceBeforeAssignmentOperators
If false, spaces will be removed before assignment operators.
Definition: Format.h:1498
SpaceBeforeParensOptions SpaceBeforeParens
Defines in which cases to put a space before opening parentheses.
Definition: Format.h:1562
bool SpaceBeforeCtorInitializerColon
If false, spaces will be removed before constructor initializer colon.
Definition: Format.h:1517
unsigned PenaltyBreakAssignment
The penalty for breaking around an assignment operator.
Definition: Format.h:1335
const char * getName() const
Definition: Token.h:166
Always put a space before opening parentheses, except when it&#39;s prohibited by the syntax rules (in fu...
Definition: Format.h:1558
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
unsigned ColumnLimit
The column limit.
Definition: Format.h:885
Never merge functions into a single line.
Definition: Format.h:195
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:476
AnnotatingParser & P
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:79
NodeId Parent
Definition: ASTDiff.cpp:192
bool isLabelString() const
Returns true if this is a string literal that&#39;s like a label, e.g.
Definition: FormatToken.h:448
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:319
Break after return type automatically.
Definition: Format.h:274
Only merge empty functions.
Definition: Format.h:217
Should be used for JavaScript.
Definition: Format.h:1186
const AnnotatedLine * Line
StateNode * Previous
SmallVector< AnnotatedLine *, 0 > Children
ParameterPackingKind PackingKind
If this is an opening parenthesis, how are the parameters packed?
Definition: FormatToken.h:210
bool SpacesInContainerLiterals
If true, spaces are inserted inside container literals (e.g.
Definition: Format.h:1616
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:823
bool SpacesInAngles
If true, spaces will be inserted after < and before > in template argument lists. ...
Definition: Format.h:1607
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine *> &Lines)
Adapts the indent levels of comment lines to the indent of the subsequent line.
static unsigned maxNestingDepth(const AnnotatedLine &Line)
FormatToken * FirstStartOfName
SourceLocation getEnd() const
bool isTrailingComment() const
Definition: FormatToken.h:421
void annotate(AnnotatedLine &Line)
Don&#39;t align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:65
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:25
static bool isAllmanBrace(const FormatToken &Tok)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:310
bool InCpp11AttributeSpecifier
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Break constructor initializers after the colon and commas.
Definition: Format.h:862
bool BreakBeforeTernaryOperators
If true, ternary operators will be placed after line breaks.
Definition: Format.h:837
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:140
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool SpacesInCStyleCastParentheses
If true, spaces may be inserted into C style casts.
Definition: Format.h:1623
bool IsUnterminatedLiteral
Set to true if this token is an unterminated literal.
Definition: FormatToken.h:174
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
bool SpacesInSquareBrackets
If true, spaces will be inserted after [ and before ].
Definition: Format.h:1639
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:171
SmallVector< prec::Level, 4 > FakeLParens
Stores the number of required fake parentheses and the corresponding operator precedence.
Definition: FormatToken.h:261
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:67
BreakTemplateDeclarationsStyle AlwaysBreakTemplateDeclarations
The template declaration breaking style to use.
Definition: Format.h:391
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:48
Never put a space before opening parentheses.
Definition: Format.h:1536
unsigned PenaltyReturnTypeOnItsOwnLine
Penalty for putting the return type of a function onto its own line.
Definition: Format.h:1357
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:234
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1201
bool ColonIsObjCMethodExpr
Dataflow Directional Tag Classes.
ShortFunctionStyle AllowShortFunctionsOnASingleLine
Dependent on the value, int f() { return 0; } can be put on a single line.
Definition: Format.h:241
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:149
BreakInheritanceListStyle BreakInheritanceList
The inheritance list style to use.
Definition: Format.h:926
Use C++03-compatible syntax.
Definition: Format.h:1644
Always break after the return types of top-level functions.
Definition: Format.h:303
bool IsExpression
Use features of C++11, C++14 and C++1z (e.g.
Definition: Format.h:1647
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:1003
static bool isFunctionDeclarationName(const FormatToken &Current, const AnnotatedLine &Line)
bool SpaceBeforeCpp11BracedList
If true, a space will be inserted before a C++11 braced list used to initialize an object (after the ...
Definition: Format.h:1509
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:714
Break inheritance list before the colon and commas, and align the commas with the colon...
Definition: Format.h:914
void calculateFormattingInformation(AnnotatedLine &Line)
bool SpaceAfterCStyleCast
If true, a space is inserted after C style casts.
Definition: Format.h:1483
Should be used for Objective-C, Objective-C++.
Definition: Format.h:1188
LanguageStandard Standard
Format compatible with this standard, e.g.
Definition: Format.h:1654
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, points to the next operator.
Definition: FormatToken.h:277
BreakConstructorInitializersStyle BreakConstructorInitializers
The constructor initializers style to use.
Definition: Format.h:866
unsigned BindingStrength
bool ClosesTemplateDeclaration
true if this is the ">" of "template<..>".
Definition: FormatToken.h:188
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:290
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:300
FormatToken * FirstObjCSelectorName
bool InTemplateArgument
bool ColonIsDictLiteral
The parameter type of a method or function.
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:477
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.h:493
Break before operators.
Definition: Format.h:473
bool ObjCSpaceBeforeProtocolList
Add a space in front of an Objective-C protocol list, i.e.
Definition: Format.h:1332
unsigned SpacesBeforeTrailingComments
The number of spaces before trailing line comments (// - comments).
Definition: Format.h:1598
bool opensScope() const
Returns whether Tok is ([{ or an opening < of a template or in protos.
Definition: FormatToken.h:374
bool SpaceBeforeInheritanceColon
If false, spaces will be removed before inheritance colon.
Definition: Format.h:1524
bool AllowShortBlocksOnASingleLine
Allows contracting simple braced statements to a single line.
Definition: Format.h:176
unsigned LongestObjCSelectorName
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:165
unsigned PenaltyBreakTemplateDeclaration
The penalty for breaking after template declaration.
Definition: Format.h:1350
__DEVICE__ int max(int __a, int __b)
bool AfterClass
Wrap class definitions.
Definition: Format.h:634
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1196
prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus11)
Return the precedence of the specified binary operator token.
Align pointer to the right.
Definition: Format.h:1370
unsigned BindingStrength
The binding strength of a token.
Definition: FormatToken.h:227
bool CaretFound
bool InCtorInitializer
bool isStringLiteral() const
Definition: FormatToken.h:350
SourceLocation getBegin() const
unsigned MaxEmptyLinesToKeep
The maximum number of consecutive empty lines to keep.
Definition: Format.h:1245
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:137
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:177
bool PartOfMultiVariableDeclStmt
Is this token part of a DeclStmt defining multiple variables?
Definition: FormatToken.h:282
unsigned ParameterIndex
The 0-based index of the parameter/argument.
Definition: FormatToken.h:254
bool IsForEachMacro
const FormatStyle & Style