clang  8.0.0svn
FormatToken.h
Go to the documentation of this file.
1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the declaration of the FormatToken, a wrapper
12 /// around Token with additional information related to formatting.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
18 
21 #include "clang/Format/Format.h"
22 #include "clang/Lex/Lexer.h"
23 #include <memory>
24 #include <unordered_set>
25 
26 namespace clang {
27 namespace format {
28 
29 #define LIST_TOKEN_TYPES \
30  TYPE(ArrayInitializerLSquare) \
31  TYPE(ArraySubscriptLSquare) \
32  TYPE(AttributeColon) \
33  TYPE(AttributeParen) \
34  TYPE(AttributeSquare) \
35  TYPE(BinaryOperator) \
36  TYPE(BitFieldColon) \
37  TYPE(BlockComment) \
38  TYPE(CastRParen) \
39  TYPE(ConditionalExpr) \
40  TYPE(ConflictAlternative) \
41  TYPE(ConflictEnd) \
42  TYPE(ConflictStart) \
43  TYPE(CtorInitializerColon) \
44  TYPE(CtorInitializerComma) \
45  TYPE(DesignatedInitializerLSquare) \
46  TYPE(DesignatedInitializerPeriod) \
47  TYPE(DictLiteral) \
48  TYPE(ForEachMacro) \
49  TYPE(FunctionAnnotationRParen) \
50  TYPE(FunctionDeclarationName) \
51  TYPE(FunctionLBrace) \
52  TYPE(FunctionTypeLParen) \
53  TYPE(ImplicitStringLiteral) \
54  TYPE(InheritanceColon) \
55  TYPE(InheritanceComma) \
56  TYPE(InlineASMBrace) \
57  TYPE(InlineASMColon) \
58  TYPE(JavaAnnotation) \
59  TYPE(JsComputedPropertyName) \
60  TYPE(JsExponentiation) \
61  TYPE(JsExponentiationEqual) \
62  TYPE(JsFatArrow) \
63  TYPE(JsNonNullAssertion) \
64  TYPE(JsTypeColon) \
65  TYPE(JsTypeOperator) \
66  TYPE(JsTypeOptionalQuestion) \
67  TYPE(LambdaArrow) \
68  TYPE(LambdaLSquare) \
69  TYPE(LeadingJavaAnnotation) \
70  TYPE(LineComment) \
71  TYPE(MacroBlockBegin) \
72  TYPE(MacroBlockEnd) \
73  TYPE(ObjCBlockLBrace) \
74  TYPE(ObjCBlockLParen) \
75  TYPE(ObjCDecl) \
76  TYPE(ObjCForIn) \
77  TYPE(ObjCMethodExpr) \
78  TYPE(ObjCMethodSpecifier) \
79  TYPE(ObjCProperty) \
80  TYPE(ObjCStringLiteral) \
81  TYPE(OverloadedOperator) \
82  TYPE(OverloadedOperatorLParen) \
83  TYPE(PointerOrReference) \
84  TYPE(PureVirtualSpecifier) \
85  TYPE(RangeBasedForLoopColon) \
86  TYPE(RegexLiteral) \
87  TYPE(SelectorName) \
88  TYPE(StartOfName) \
89  TYPE(StatementMacro) \
90  TYPE(StructuredBindingLSquare) \
91  TYPE(TemplateCloser) \
92  TYPE(TemplateOpener) \
93  TYPE(TemplateString) \
94  TYPE(ProtoExtensionLSquare) \
95  TYPE(TrailingAnnotation) \
96  TYPE(TrailingReturnArrow) \
97  TYPE(TrailingUnaryOperator) \
98  TYPE(UnaryOperator) \
99  TYPE(Unknown)
100 
101 enum TokenType {
102 #define TYPE(X) TT_##X,
104 #undef TYPE
106 };
107 
108 /// Determines the name of a token type.
109 const char *getTokenTypeName(TokenType Type);
110 
111 // Represents what type of block a set of braces open.
113 
114 // The packing kind of a function's parameters.
116 
118 
119 class TokenRole;
120 class AnnotatedLine;
121 
122 /// A wrapper around a \c Token storing information about the
123 /// whitespace characters preceding it.
124 struct FormatToken {
126 
127  /// The \c Token.
129 
130  /// The number of newlines immediately before the \c Token.
131  ///
132  /// This can be used to determine what the user wrote in the original code
133  /// and thereby e.g. leave an empty line between two function definitions.
134  unsigned NewlinesBefore = 0;
135 
136  /// Whether there is at least one unescaped newline before the \c
137  /// Token.
138  bool HasUnescapedNewline = false;
139 
140  /// The range of the whitespace immediately preceding the \c Token.
142 
143  /// The offset just past the last '\n' in this token's leading
144  /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
145  unsigned LastNewlineOffset = 0;
146 
147  /// The width of the non-whitespace parts of the token (or its first
148  /// line for multi-line tokens) in columns.
149  /// We need this to correctly measure number of columns a token spans.
150  unsigned ColumnWidth = 0;
151 
152  /// Contains the width in columns of the last line of a multi-line
153  /// token.
154  unsigned LastLineColumnWidth = 0;
155 
156  /// Whether the token text contains newlines (escaped or not).
157  bool IsMultiline = false;
158 
159  /// Indicates that this is the first token of the file.
160  bool IsFirst = false;
161 
162  /// Whether there must be a line break before this token.
163  ///
164  /// This happens for example when a preprocessor directive ended directly
165  /// before the token.
166  bool MustBreakBefore = false;
167 
168  /// The raw text of the token.
169  ///
170  /// Contains the raw token text without leading whitespace and without leading
171  /// escaped newlines.
172  StringRef TokenText;
173 
174  /// Set to \c true if this token is an unterminated literal.
176 
177  /// Contains the kind of block if this token is a brace.
179 
180  TokenType Type = TT_Unknown;
181 
182  /// The number of spaces that should be inserted before this token.
183  unsigned SpacesRequiredBefore = 0;
184 
185  /// \c true if it is allowed to break before this token.
186  bool CanBreakBefore = false;
187 
188  /// \c true if this is the ">" of "template<..>".
190 
191  /// Number of parameters, if this is "(", "[" or "<".
192  unsigned ParameterCount = 0;
193 
194  /// Number of parameters that are nested blocks,
195  /// if this is "(", "[" or "<".
196  unsigned BlockParameterCount = 0;
197 
198  /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
199  /// the surrounding bracket.
201 
202  /// A token can have a special role that can carry extra information
203  /// about the token's formatting.
204  std::unique_ptr<TokenRole> Role;
205 
206  /// If this is an opening parenthesis, how are the parameters packed?
208 
209  /// The total length of the unwrapped line up to and including this
210  /// token.
211  unsigned TotalLength = 0;
212 
213  /// The original 0-based column of this token, including expanded tabs.
214  /// The configured TabWidth is used as tab width.
215  unsigned OriginalColumn = 0;
216 
217  /// The length of following tokens until the next natural split point,
218  /// or the next token that can be broken.
219  unsigned UnbreakableTailLength = 0;
220 
221  // FIXME: Come up with a 'cleaner' concept.
222  /// The binding strength of a token. This is a combined value of
223  /// operator precedence, parenthesis nesting, etc.
224  unsigned BindingStrength = 0;
225 
226  /// The nesting level of this token, i.e. the number of surrounding (),
227  /// [], {} or <>.
228  unsigned NestingLevel = 0;
229 
230  /// The indent level of this token. Copied from the surrounding line.
231  unsigned IndentLevel = 0;
232 
233  /// Penalty for inserting a line break before this token.
234  unsigned SplitPenalty = 0;
235 
236  /// If this is the first ObjC selector name in an ObjC method
237  /// definition or call, this contains the length of the longest name.
238  ///
239  /// This being set to 0 means that the selectors should not be colon-aligned,
240  /// e.g. because several of them are block-type.
242 
243  /// If this is the first ObjC selector name in an ObjC method
244  /// definition or call, this contains the number of parts that the whole
245  /// selector consist of.
246  unsigned ObjCSelectorNameParts = 0;
247 
248  /// The 0-based index of the parameter/argument. For ObjC it is set
249  /// for the selector name token.
250  /// For now calculated only for ObjC.
251  unsigned ParameterIndex = 0;
252 
253  /// Stores the number of required fake parentheses and the
254  /// corresponding operator precedence.
255  ///
256  /// If multiple fake parentheses start at a token, this vector stores them in
257  /// reverse order, i.e. inner fake parenthesis first.
259  /// Insert this many fake ) after this token for correct indentation.
260  unsigned FakeRParens = 0;
261 
262  /// \c true if this token starts a binary expression, i.e. has at least
263  /// one fake l_paren with a precedence greater than prec::Unknown.
265  /// \c true if this token ends a binary expression.
266  bool EndsBinaryExpression = false;
267 
268  /// If this is an operator (or "."/"->") in a sequence of operators
269  /// with the same precedence, contains the 0-based operator index.
270  unsigned OperatorIndex = 0;
271 
272  /// If this is an operator (or "."/"->") in a sequence of operators
273  /// with the same precedence, points to the next operator.
275 
276  /// Is this token part of a \c DeclStmt defining multiple variables?
277  ///
278  /// Only set if \c Type == \c TT_StartOfName.
280 
281  /// Does this line comment continue a line comment section?
282  ///
283  /// Only set to true if \c Type == \c TT_LineComment.
285 
286  /// If this is a bracket, this points to the matching one.
288 
289  /// The previous token in the unwrapped line.
290  FormatToken *Previous = nullptr;
291 
292  /// The next token in the unwrapped line.
293  FormatToken *Next = nullptr;
294 
295  /// If this token starts a block, this contains all the unwrapped lines
296  /// in it.
298 
299  /// Stores the formatting decision for the token once it was made.
301 
302  /// If \c true, this token has been fully formatted (indented and
303  /// potentially re-formatted inside), and we do not allow further formatting
304  /// changes.
305  bool Finalized = false;
306 
307  bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
308  bool is(TokenType TT) const { return Type == TT; }
309  bool is(const IdentifierInfo *II) const {
310  return II && II == Tok.getIdentifierInfo();
311  }
312  bool is(tok::PPKeywordKind Kind) const {
313  return Tok.getIdentifierInfo() &&
315  }
316  template <typename A, typename B> bool isOneOf(A K1, B K2) const {
317  return is(K1) || is(K2);
318  }
319  template <typename A, typename B, typename... Ts>
320  bool isOneOf(A K1, B K2, Ts... Ks) const {
321  return is(K1) || isOneOf(K2, Ks...);
322  }
323  template <typename T> bool isNot(T Kind) const { return !is(Kind); }
324 
325  bool closesScopeAfterBlock() const {
326  if (BlockKind == BK_Block)
327  return true;
328  if (closesScope())
329  return Previous->closesScopeAfterBlock();
330  return false;
331  }
332 
333  /// \c true if this token starts a sequence with the given tokens in order,
334  /// following the ``Next`` pointers, ignoring comments.
335  template <typename A, typename... Ts>
336  bool startsSequence(A K1, Ts... Tokens) const {
337  return startsSequenceInternal(K1, Tokens...);
338  }
339 
340  /// \c true if this token ends a sequence with the given tokens in order,
341  /// following the ``Previous`` pointers, ignoring comments.
342  template <typename A, typename... Ts>
343  bool endsSequence(A K1, Ts... Tokens) const {
344  return endsSequenceInternal(K1, Tokens...);
345  }
346 
347  bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
348 
350  return Tok.isObjCAtKeyword(Kind);
351  }
352 
353  bool isAccessSpecifier(bool ColonRequired = true) const {
354  return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
355  (!ColonRequired || (Next && Next->is(tok::colon)));
356  }
357 
358  /// Determine whether the token is a simple-type-specifier.
359  bool isSimpleTypeSpecifier() const;
360 
361  bool isObjCAccessSpecifier() const {
362  return is(tok::at) && Next &&
363  (Next->isObjCAtKeyword(tok::objc_public) ||
364  Next->isObjCAtKeyword(tok::objc_protected) ||
365  Next->isObjCAtKeyword(tok::objc_package) ||
366  Next->isObjCAtKeyword(tok::objc_private));
367  }
368 
369  /// Returns whether \p Tok is ([{ or an opening < of a template or in
370  /// protos.
371  bool opensScope() const {
372  if (is(TT_TemplateString) && TokenText.endswith("${"))
373  return true;
374  if (is(TT_DictLiteral) && is(tok::less))
375  return true;
376  return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
377  TT_TemplateOpener);
378  }
379  /// Returns whether \p Tok is )]} or a closing > of a template or in
380  /// protos.
381  bool closesScope() const {
382  if (is(TT_TemplateString) && TokenText.startswith("}"))
383  return true;
384  if (is(TT_DictLiteral) && is(tok::greater))
385  return true;
386  return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
387  TT_TemplateCloser);
388  }
389 
390  /// Returns \c true if this is a "." or "->" accessing a member.
391  bool isMemberAccess() const {
392  return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
393  !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
394  TT_LambdaArrow);
395  }
396 
397  bool isUnaryOperator() const {
398  switch (Tok.getKind()) {
399  case tok::plus:
400  case tok::plusplus:
401  case tok::minus:
402  case tok::minusminus:
403  case tok::exclaim:
404  case tok::tilde:
405  case tok::kw_sizeof:
406  case tok::kw_alignof:
407  return true;
408  default:
409  return false;
410  }
411  }
412 
413  bool isBinaryOperator() const {
414  // Comma is a binary operator, but does not behave as such wrt. formatting.
415  return getPrecedence() > prec::Comma;
416  }
417 
418  bool isTrailingComment() const {
419  return is(tok::comment) &&
420  (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
421  }
422 
423  /// Returns \c true if this is a keyword that can be used
424  /// like a function call (e.g. sizeof, typeid, ...).
425  bool isFunctionLikeKeyword() const {
426  switch (Tok.getKind()) {
427  case tok::kw_throw:
428  case tok::kw_typeid:
429  case tok::kw_return:
430  case tok::kw_sizeof:
431  case tok::kw_alignof:
432  case tok::kw_alignas:
433  case tok::kw_decltype:
434  case tok::kw_noexcept:
435  case tok::kw_static_assert:
436  case tok::kw___attribute:
437  return true;
438  default:
439  return false;
440  }
441  }
442 
443  /// Returns \c true if this is a string literal that's like a label,
444  /// e.g. ends with "=" or ":".
445  bool isLabelString() const {
446  if (!is(tok::string_literal))
447  return false;
448  StringRef Content = TokenText;
449  if (Content.startswith("\"") || Content.startswith("'"))
450  Content = Content.drop_front(1);
451  if (Content.endswith("\"") || Content.endswith("'"))
452  Content = Content.drop_back(1);
453  Content = Content.trim();
454  return Content.size() > 1 &&
455  (Content.back() == ':' || Content.back() == '=');
456  }
457 
458  /// Returns actual token start location without leading escaped
459  /// newlines and whitespace.
460  ///
461  /// This can be different to Tok.getLocation(), which includes leading escaped
462  /// newlines.
464  return WhitespaceRange.getEnd();
465  }
466 
468  return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
469  /*CPlusPlus11=*/true);
470  }
471 
472  /// Returns the previous token ignoring comments.
474  FormatToken *Tok = Previous;
475  while (Tok && Tok->is(tok::comment))
476  Tok = Tok->Previous;
477  return Tok;
478  }
479 
480  /// Returns the next token ignoring comments.
482  const FormatToken *Tok = Next;
483  while (Tok && Tok->is(tok::comment))
484  Tok = Tok->Next;
485  return Tok;
486  }
487 
488  /// Returns \c true if this tokens starts a block-type list, i.e. a
489  /// list that should be indented with a block indent.
491  if (is(TT_TemplateString) && opensScope())
492  return true;
493  return is(TT_ArrayInitializerLSquare) ||
494  is(TT_ProtoExtensionLSquare) ||
495  (is(tok::l_brace) &&
496  (BlockKind == BK_Block || is(TT_DictLiteral) ||
497  (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
498  (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
500  }
501 
502  /// Returns whether the token is the left square bracket of a C++
503  /// structured binding declaration.
505  if (!Style.isCpp() || isNot(tok::l_square))
506  return false;
507  const FormatToken *T = this;
508  do {
509  T = T->getPreviousNonComment();
510  } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
511  tok::ampamp));
512  return T && T->is(tok::kw_auto);
513  }
514 
515  /// Same as opensBlockOrBlockTypeList, but for the closing token.
517  if (is(TT_TemplateString) && closesScope())
518  return true;
519  return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
520  }
521 
522  /// Return the actual namespace token, if this token starts a namespace
523  /// block.
525  const FormatToken *NamespaceTok = this;
526  if (is(tok::comment))
527  NamespaceTok = NamespaceTok->getNextNonComment();
528  // Detect "(inline|export)? namespace" in the beginning of a line.
529  if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export))
530  NamespaceTok = NamespaceTok->getNextNonComment();
531  return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok
532  : nullptr;
533  }
534 
535 private:
536  // Disallow copying.
537  FormatToken(const FormatToken &) = delete;
538  void operator=(const FormatToken &) = delete;
539 
540  template <typename A, typename... Ts>
541  bool startsSequenceInternal(A K1, Ts... Tokens) const {
542  if (is(tok::comment) && Next)
543  return Next->startsSequenceInternal(K1, Tokens...);
544  return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
545  }
546 
547  template <typename A> bool startsSequenceInternal(A K1) const {
548  if (is(tok::comment) && Next)
549  return Next->startsSequenceInternal(K1);
550  return is(K1);
551  }
552 
553  template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
554  if (is(tok::comment) && Previous)
555  return Previous->endsSequenceInternal(K1);
556  return is(K1);
557  }
558 
559  template <typename A, typename... Ts>
560  bool endsSequenceInternal(A K1, Ts... Tokens) const {
561  if (is(tok::comment) && Previous)
562  return Previous->endsSequenceInternal(K1, Tokens...);
563  return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
564  }
565 };
566 
568 struct LineState;
569 
570 class TokenRole {
571 public:
572  TokenRole(const FormatStyle &Style) : Style(Style) {}
573  virtual ~TokenRole();
574 
575  /// After the \c TokenAnnotator has finished annotating all the tokens,
576  /// this function precomputes required information for formatting.
577  virtual void precomputeFormattingInfos(const FormatToken *Token);
578 
579  /// Apply the special formatting that the given role demands.
580  ///
581  /// Assumes that the token having this role is already formatted.
582  ///
583  /// Continues formatting from \p State leaving indentation to \p Indenter and
584  /// returns the total penalty that this formatting incurs.
585  virtual unsigned formatFromToken(LineState &State,
587  bool DryRun) {
588  return 0;
589  }
590 
591  /// Same as \c formatFromToken, but assumes that the first token has
592  /// already been set thereby deciding on the first line break.
593  virtual unsigned formatAfterToken(LineState &State,
595  bool DryRun) {
596  return 0;
597  }
598 
599  /// Notifies the \c Role that a comma was found.
600  virtual void CommaFound(const FormatToken *Token) {}
601 
602 protected:
604 };
605 
607 public:
609  : TokenRole(Style), HasNestedBracedList(false) {}
610 
611  void precomputeFormattingInfos(const FormatToken *Token) override;
612 
613  unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
614  bool DryRun) override;
615 
616  unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
617  bool DryRun) override;
618 
619  /// Adds \p Token as the next comma to the \c CommaSeparated list.
620  void CommaFound(const FormatToken *Token) override {
621  Commas.push_back(Token);
622  }
623 
624 private:
625  /// A struct that holds information on how to format a given list with
626  /// a specific number of columns.
627  struct ColumnFormat {
628  /// The number of columns to use.
629  unsigned Columns;
630 
631  /// The total width in characters.
632  unsigned TotalWidth;
633 
634  /// The number of lines required for this format.
635  unsigned LineCount;
636 
637  /// The size of each column in characters.
638  SmallVector<unsigned, 8> ColumnSizes;
639  };
640 
641  /// Calculate which \c ColumnFormat fits best into
642  /// \p RemainingCharacters.
643  const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
644 
645  /// The ordered \c FormatTokens making up the commas of this list.
647 
648  /// The length of each of the list's items in characters including the
649  /// trailing comma.
650  SmallVector<unsigned, 8> ItemLengths;
651 
652  /// Precomputed formats that can be used for this list.
654 
655  bool HasNestedBracedList;
656 };
657 
658 /// Encapsulates keywords that are context sensitive or for languages not
659 /// properly supported by Clang's lexer.
662  kw_final = &IdentTable.get("final");
663  kw_override = &IdentTable.get("override");
664  kw_in = &IdentTable.get("in");
665  kw_of = &IdentTable.get("of");
666  kw_CF_ENUM = &IdentTable.get("CF_ENUM");
667  kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
668  kw_NS_ENUM = &IdentTable.get("NS_ENUM");
669  kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
670 
671  kw_as = &IdentTable.get("as");
672  kw_async = &IdentTable.get("async");
673  kw_await = &IdentTable.get("await");
674  kw_declare = &IdentTable.get("declare");
675  kw_finally = &IdentTable.get("finally");
676  kw_from = &IdentTable.get("from");
677  kw_function = &IdentTable.get("function");
678  kw_get = &IdentTable.get("get");
679  kw_import = &IdentTable.get("import");
680  kw_infer = &IdentTable.get("infer");
681  kw_is = &IdentTable.get("is");
682  kw_let = &IdentTable.get("let");
683  kw_module = &IdentTable.get("module");
684  kw_readonly = &IdentTable.get("readonly");
685  kw_set = &IdentTable.get("set");
686  kw_type = &IdentTable.get("type");
687  kw_typeof = &IdentTable.get("typeof");
688  kw_var = &IdentTable.get("var");
689  kw_yield = &IdentTable.get("yield");
690 
691  kw_abstract = &IdentTable.get("abstract");
692  kw_assert = &IdentTable.get("assert");
693  kw_extends = &IdentTable.get("extends");
694  kw_implements = &IdentTable.get("implements");
695  kw_instanceof = &IdentTable.get("instanceof");
696  kw_interface = &IdentTable.get("interface");
697  kw_native = &IdentTable.get("native");
698  kw_package = &IdentTable.get("package");
699  kw_synchronized = &IdentTable.get("synchronized");
700  kw_throws = &IdentTable.get("throws");
701  kw___except = &IdentTable.get("__except");
702  kw___has_include = &IdentTable.get("__has_include");
703  kw___has_include_next = &IdentTable.get("__has_include_next");
704 
705  kw_mark = &IdentTable.get("mark");
706 
707  kw_extend = &IdentTable.get("extend");
708  kw_option = &IdentTable.get("option");
709  kw_optional = &IdentTable.get("optional");
710  kw_repeated = &IdentTable.get("repeated");
711  kw_required = &IdentTable.get("required");
712  kw_returns = &IdentTable.get("returns");
713 
714  kw_signals = &IdentTable.get("signals");
715  kw_qsignals = &IdentTable.get("Q_SIGNALS");
716  kw_slots = &IdentTable.get("slots");
717  kw_qslots = &IdentTable.get("Q_SLOTS");
718 
719  // Keep this at the end of the constructor to make sure everything here is
720  // already initialized.
721  JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
722  {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
723  kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
724  kw_set, kw_type, kw_typeof, kw_var, kw_yield,
725  // Keywords from the Java section.
726  kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
727  }
728 
729  // Context sensitive keywords.
741 
742  // JavaScript keywords.
762 
763  // Java keywords.
774 
775  // Pragma keywords.
777 
778  // Proto keywords.
785 
786  // QT keywords.
791 
792  /// Returns \c true if \p Tok is a true JavaScript identifier, returns
793  /// \c false if it is a keyword or a pseudo keyword.
795  return Tok.is(tok::identifier) &&
796  JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
797  JsExtraKeywords.end();
798  }
799 
800 private:
801  /// The JavaScript keywords beyond the C++ keyword set.
802  std::unordered_set<IdentifierInfo *> JsExtraKeywords;
803 };
804 
805 } // namespace format
806 } // namespace clang
807 
808 #endif
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers...
Definition: FormatToken.h:343
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:228
Token Tok
The Token.
Definition: FormatToken.h:128
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1232
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:95
CommaSeparatedList(const FormatStyle &Style)
Definition: FormatToken.h:608
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token&#39;s formatting...
Definition: FormatToken.h:204
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:215
bool isMemberAccess() const
Returns true if this is a "." or "->" accessing a member.
Definition: FormatToken.h:391
bool isFunctionLikeKeyword() const
Returns true if this is a keyword that can be used like a function call (e.g.
Definition: FormatToken.h:425
The base class of the type hierarchy.
Definition: Type.h:1415
bool isUnaryOperator() const
Definition: FormatToken.h:397
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:481
const FormatToken * getNamespaceToken() const
Return the actual namespace token, if this token starts a namespace block.
Definition: FormatToken.h:524
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:157
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:160
unsigned ObjCSelectorNameParts
If this is the first ObjC selector name in an ObjC method definition or call, this contains the numbe...
Definition: FormatToken.h:246
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
Definition: TokenKinds.h:79
bool isAccessSpecifier(bool ColonRequired=true) const
Definition: FormatToken.h:353
bool EndsBinaryExpression
true if this token ends a binary expression.
Definition: FormatToken.h:266
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:211
bool isBinaryOperator() const
Definition: FormatToken.h:413
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:134
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:293
tok::TokenKind getKind() const
Definition: Token.h:90
unsigned UnbreakableTailLength
The length of following tokens until the next natural split point, or the next token that can be brok...
Definition: FormatToken.h:219
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:381
unsigned SplitPenalty
Penalty for inserting a line break before this token.
Definition: FormatToken.h:234
prec::Level getPrecedence() const
Definition: FormatToken.h:467
One of these records is kept for each identifier that is lexed.
unsigned ParameterCount
Number of parameters, if this is "(", "[" or "<".
Definition: FormatToken.h:192
unsigned FakeRParens
Insert this many fake ) after this token for correct indentation.
Definition: FormatToken.h:260
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:186
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:290
AdditionalKeywords(IdentifierTable &IdentTable)
Definition: FormatToken.h:661
bool StartsBinaryExpression
true if this token starts a binary expression, i.e.
Definition: FormatToken.h:264
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
unsigned LongestObjCSelectorName
If this is the first ObjC selector name in an ObjC method definition or call, this contains the lengt...
Definition: FormatToken.h:241
unsigned OperatorIndex
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, contains the 0-based operator index.
Definition: FormatToken.h:270
bool IsJavaScriptIdentifier(const FormatToken &Tok) const
Returns true if Tok is a true JavaScript identifier, returns false if it is a keyword or a pseudo key...
Definition: FormatToken.h:794
unsigned SpacesRequiredBefore
The number of spaces that should be inserted before this token.
Definition: FormatToken.h:183
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:58
bool isNot(T Kind) const
Definition: FormatToken.h:323
bool closesBlockOrBlockTypeList(const FormatStyle &Style) const
Same as opensBlockOrBlockTypeList, but for the closing token.
Definition: FormatToken.h:516
unsigned BlockParameterCount
Number of parameters that are nested blocks, if this is "(", "[" or "<".
Definition: FormatToken.h:196
void CommaFound(const FormatToken *Token) override
Adds Token as the next comma to the CommaSeparated list.
Definition: FormatToken.h:620
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:504
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:473
SourceLocation getStartOfNonWhitespace() const
Returns actual token start location without leading escaped newlines and whitespace.
Definition: FormatToken.h:463
bool isLabelString() const
Returns true if this is a string literal that&#39;s like a label, e.g.
Definition: FormatToken.h:445
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:316
virtual void CommaFound(const FormatToken *Token)
Notifies the Role that a comma was found.
Definition: FormatToken.h:600
The current state when indenting a unwrapped line.
ContinuationIndenter * Indenter
Implements an efficient mapping from strings to IdentifierInfo nodes.
ParameterPackingKind PackingKind
If this is an opening parenthesis, how are the parameters packed?
Definition: FormatToken.h:207
PPKeywordKind
Provides a namespace for preprocessor keywords which start with a &#39;#&#39; at the beginning of the line...
Definition: TokenKinds.h:33
IdentifierInfo * kw___has_include_next
Definition: FormatToken.h:740
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:124
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Defines and computes precedence levels for binary/ternary operators.
SourceLocation getEnd() const
bool isObjCAccessSpecifier() const
Definition: FormatToken.h:361
bool isTrailingComment() const
Definition: FormatToken.h:418
TokenRole(const FormatStyle &Style)
Definition: FormatToken.h:572
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an &#39;@&#39;.
Definition: TokenKinds.h:41
unsigned LastNewlineOffset
The offset just past the last &#39; &#39; in this token&#39;s leading whitespace (relative to WhiteSpaceStart)...
Definition: FormatToken.h:145
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:25
#define false
Definition: stdbool.h:33
Kind
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
bool closesScopeAfterBlock() const
Definition: FormatToken.h:325
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:307
Various functions to configurably format source code.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:177
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:660
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:141
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
tok::TokenKind ParentBracket
If this is a bracket ("<", "(", "[" or "{"), contains the kind of the surrounding bracket...
Definition: FormatToken.h:200
bool IsUnterminatedLiteral
Set to true if this token is an unterminated literal.
Definition: FormatToken.h:175
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:172
bool is(TokenType TT) const
Definition: FormatToken.h:308
SmallVector< prec::Level, 4 > FakeLParens
Stores the number of required fake parentheses and the corresponding operator precedence.
Definition: FormatToken.h:258
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:50
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:231
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:1242
Dataflow Directional Tag Classes.
bool is(const IdentifierInfo *II) const
Definition: FormatToken.h:309
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:150
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:305
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:1005
virtual unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun)
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.h:593
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, points to the next operator.
Definition: FormatToken.h:274
bool ClosesTemplateDeclaration
true if this is the ">" of "template<..>".
Definition: FormatToken.h:189
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:287
bool isOneOf(A K1, B K2, Ts... Ks) const
Definition: FormatToken.h:320
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:297
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:336
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.h:490
bool opensScope() const
Returns whether Tok is ([{ or an opening < of a template or in protos.
Definition: FormatToken.h:371
const FormatStyle & Style
Definition: FormatToken.h:603
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:166
virtual unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun)
Apply the special formatting that the given role demands.
Definition: FormatToken.h:585
Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/).
Definition: Format.h:1237
prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus11)
Return the precedence of the specified binary operator token.
A trivial tuple used to represent a source range.
unsigned BindingStrength
The binding strength of a token.
Definition: FormatToken.h:224
FormatDecision Decision
Stores the formatting decision for the token once it was made.
Definition: FormatToken.h:300
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:284
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:349
bool isStringLiteral() const
Definition: FormatToken.h:347
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:138
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:178
bool PartOfMultiVariableDeclStmt
Is this token part of a DeclStmt defining multiple variables?
Definition: FormatToken.h:279
unsigned ParameterIndex
The 0-based index of the parameter/argument.
Definition: FormatToken.h:251
unsigned LastLineColumnWidth
Contains the width in columns of the last line of a multi-line token.
Definition: FormatToken.h:154
bool is(tok::PPKeywordKind Kind) const
Definition: FormatToken.h:312
const FormatStyle & Style
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:29