clang  14.0.0git
FormatTokenLexer.cpp
Go to the documentation of this file.
1 //===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements FormatTokenLexer, which tokenizes a source file
11 /// into a FormatToken stream suitable for ClangFormat.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatTokenLexer.h"
16 #include "FormatToken.h"
19 #include "clang/Format/Format.h"
20 #include "llvm/Support/Regex.h"
21 
22 namespace clang {
23 namespace format {
24 
26  const SourceManager &SourceMgr, FileID ID, unsigned Column,
28  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
29  IdentifierTable &IdentTable)
30  : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
31  Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
32  Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
33  Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
34  FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
35  MacroBlockEndRegex(Style.MacroBlockEnd) {
36  Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr,
37  getFormattingLangOpts(Style)));
38  Lex->SetKeepWhitespaceMode(true);
39 
40  for (const std::string &ForEachMacro : Style.ForEachMacros) {
41  auto Identifier = &IdentTable.get(ForEachMacro);
42  Macros.insert({Identifier, TT_ForEachMacro});
43  }
44  for (const std::string &IfMacro : Style.IfMacros) {
45  auto Identifier = &IdentTable.get(IfMacro);
46  Macros.insert({Identifier, TT_IfMacro});
47  }
48  for (const std::string &AttributeMacro : Style.AttributeMacros) {
49  auto Identifier = &IdentTable.get(AttributeMacro);
50  Macros.insert({Identifier, TT_AttributeMacro});
51  }
52  for (const std::string &StatementMacro : Style.StatementMacros) {
53  auto Identifier = &IdentTable.get(StatementMacro);
54  Macros.insert({Identifier, TT_StatementMacro});
55  }
56  for (const std::string &TypenameMacro : Style.TypenameMacros) {
57  auto Identifier = &IdentTable.get(TypenameMacro);
58  Macros.insert({Identifier, TT_TypenameMacro});
59  }
60  for (const std::string &NamespaceMacro : Style.NamespaceMacros) {
61  auto Identifier = &IdentTable.get(NamespaceMacro);
62  Macros.insert({Identifier, TT_NamespaceMacro});
63  }
64  for (const std::string &WhitespaceSensitiveMacro :
66  auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
67  Macros.insert({Identifier, TT_UntouchableMacroFunc});
68  }
69  for (const std::string &StatementAttributeLikeMacro :
71  auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
72  Macros.insert({Identifier, TT_StatementAttributeLikeMacro});
73  }
74 }
75 
77  assert(Tokens.empty());
78  assert(FirstInLineIndex == 0);
79  do {
80  Tokens.push_back(getNextToken());
81  if (Style.isJavaScript()) {
82  tryParseJSRegexLiteral();
83  handleTemplateStrings();
84  }
86  tryParsePythonComment();
87  tryMergePreviousTokens();
88  if (Style.isCSharp())
89  // This needs to come after tokens have been merged so that C#
90  // string literals are correctly identified.
91  handleCSharpVerbatimAndInterpolatedStrings();
92  if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
93  FirstInLineIndex = Tokens.size() - 1;
94  } while (Tokens.back()->Tok.isNot(tok::eof));
95  return Tokens;
96 }
97 
98 void FormatTokenLexer::tryMergePreviousTokens() {
99  if (tryMerge_TMacro())
100  return;
101  if (tryMergeConflictMarkers())
102  return;
103  if (tryMergeLessLess())
104  return;
105  if (tryMergeForEach())
106  return;
107  if (Style.isCpp() && tryTransformTryUsageForC())
108  return;
109 
110  if (Style.isJavaScript() || Style.isCSharp()) {
111  static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
112  tok::question};
113  static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
114  tok::period};
115  static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
116 
117  if (tryMergeTokens(FatArrow, TT_FatArrow))
118  return;
119  if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
120  // Treat like the "||" operator (as opposed to the ternary ?).
121  Tokens.back()->Tok.setKind(tok::pipepipe);
122  return;
123  }
124  if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
125  // Treat like a regular "." access.
126  Tokens.back()->Tok.setKind(tok::period);
127  return;
128  }
129  if (tryMergeNullishCoalescingEqual()) {
130  return;
131  }
132  }
133 
134  if (Style.isCSharp()) {
135  static const tok::TokenKind CSharpNullConditionalLSquare[] = {
136  tok::question, tok::l_square};
137 
138  if (tryMergeCSharpKeywordVariables())
139  return;
140  if (tryMergeCSharpStringLiteral())
141  return;
142  if (tryTransformCSharpForEach())
143  return;
144  if (tryMergeTokens(CSharpNullConditionalLSquare,
145  TT_CSharpNullConditionalLSquare)) {
146  // Treat like a regular "[" operator.
147  Tokens.back()->Tok.setKind(tok::l_square);
148  return;
149  }
150  }
151 
152  if (tryMergeNSStringLiteral())
153  return;
154 
155  if (Style.isJavaScript()) {
156  static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
157  static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
158  tok::equal};
159  static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
160  tok::greaterequal};
161  static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
162  static const tok::TokenKind JSExponentiationEqual[] = {tok::star,
163  tok::starequal};
164  static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
165  static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
166 
167  // FIXME: Investigate what token type gives the correct operator priority.
168  if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
169  return;
170  if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
171  return;
172  if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
173  return;
174  if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
175  return;
176  if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
177  Tokens.back()->Tok.setKind(tok::starequal);
178  return;
179  }
180  if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
181  tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
182  // Treat like the "=" assignment operator.
183  Tokens.back()->Tok.setKind(tok::equal);
184  return;
185  }
186  if (tryMergeJSPrivateIdentifier())
187  return;
188  }
189 
190  if (Style.Language == FormatStyle::LK_Java) {
191  static const tok::TokenKind JavaRightLogicalShiftAssign[] = {
192  tok::greater, tok::greater, tok::greaterequal};
193  if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
194  return;
195  }
196 }
197 
198 bool FormatTokenLexer::tryMergeNSStringLiteral() {
199  if (Tokens.size() < 2)
200  return false;
201  auto &At = *(Tokens.end() - 2);
202  auto &String = *(Tokens.end() - 1);
203  if (!At->is(tok::at) || !String->is(tok::string_literal))
204  return false;
205  At->Tok.setKind(tok::string_literal);
206  At->TokenText = StringRef(At->TokenText.begin(),
207  String->TokenText.end() - At->TokenText.begin());
208  At->ColumnWidth += String->ColumnWidth;
209  At->setType(TT_ObjCStringLiteral);
210  Tokens.erase(Tokens.end() - 1);
211  return true;
212 }
213 
214 bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
215  // Merges #idenfier into a single identifier with the text #identifier
216  // but the token tok::identifier.
217  if (Tokens.size() < 2)
218  return false;
219  auto &Hash = *(Tokens.end() - 2);
220  auto &Identifier = *(Tokens.end() - 1);
221  if (!Hash->is(tok::hash) || !Identifier->is(tok::identifier))
222  return false;
223  Hash->Tok.setKind(tok::identifier);
224  Hash->TokenText =
225  StringRef(Hash->TokenText.begin(),
226  Identifier->TokenText.end() - Hash->TokenText.begin());
227  Hash->ColumnWidth += Identifier->ColumnWidth;
228  Hash->setType(TT_JsPrivateIdentifier);
229  Tokens.erase(Tokens.end() - 1);
230  return true;
231 }
232 
233 // Search for verbatim or interpolated string literals @"ABC" or
234 // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
235 // prevent splitting of @, $ and ".
236 // Merging of multiline verbatim strings with embedded '"' is handled in
237 // handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing.
238 bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
239  if (Tokens.size() < 2)
240  return false;
241 
242  // Interpolated strings could contain { } with " characters inside.
243  // $"{x ?? "null"}"
244  // should not be split into $"{x ?? ", null, "}" but should treated as a
245  // single string-literal.
246  //
247  // We opt not to try and format expressions inside {} within a C#
248  // interpolated string. Formatting expressions within an interpolated string
249  // would require similar work as that done for JavaScript template strings
250  // in `handleTemplateStrings()`.
251  auto &CSharpInterpolatedString = *(Tokens.end() - 2);
252  if (CSharpInterpolatedString->getType() == TT_CSharpStringLiteral &&
253  (CSharpInterpolatedString->TokenText.startswith(R"($")") ||
254  CSharpInterpolatedString->TokenText.startswith(R"($@")"))) {
255  int UnmatchedOpeningBraceCount = 0;
256 
257  auto TokenTextSize = CSharpInterpolatedString->TokenText.size();
258  for (size_t Index = 0; Index < TokenTextSize; ++Index) {
259  char C = CSharpInterpolatedString->TokenText[Index];
260  if (C == '{') {
261  // "{{" inside an interpolated string is an escaped '{' so skip it.
262  if (Index + 1 < TokenTextSize &&
263  CSharpInterpolatedString->TokenText[Index + 1] == '{') {
264  ++Index;
265  continue;
266  }
267  ++UnmatchedOpeningBraceCount;
268  } else if (C == '}') {
269  // "}}" inside an interpolated string is an escaped '}' so skip it.
270  if (Index + 1 < TokenTextSize &&
271  CSharpInterpolatedString->TokenText[Index + 1] == '}') {
272  ++Index;
273  continue;
274  }
275  --UnmatchedOpeningBraceCount;
276  }
277  }
278 
279  if (UnmatchedOpeningBraceCount > 0) {
280  auto &NextToken = *(Tokens.end() - 1);
281  CSharpInterpolatedString->TokenText =
282  StringRef(CSharpInterpolatedString->TokenText.begin(),
283  NextToken->TokenText.end() -
284  CSharpInterpolatedString->TokenText.begin());
285  CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth;
286  Tokens.erase(Tokens.end() - 1);
287  return true;
288  }
289  }
290 
291  // Look for @"aaaaaa" or $"aaaaaa".
292  auto &String = *(Tokens.end() - 1);
293  if (!String->is(tok::string_literal))
294  return false;
295 
296  auto &At = *(Tokens.end() - 2);
297  if (!(At->is(tok::at) || At->TokenText == "$"))
298  return false;
299 
300  if (Tokens.size() > 2 && At->is(tok::at)) {
301  auto &Dollar = *(Tokens.end() - 3);
302  if (Dollar->TokenText == "$") {
303  // This looks like $@"aaaaa" so we need to combine all 3 tokens.
304  Dollar->Tok.setKind(tok::string_literal);
305  Dollar->TokenText =
306  StringRef(Dollar->TokenText.begin(),
307  String->TokenText.end() - Dollar->TokenText.begin());
308  Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth);
309  Dollar->setType(TT_CSharpStringLiteral);
310  Tokens.erase(Tokens.end() - 2);
311  Tokens.erase(Tokens.end() - 1);
312  return true;
313  }
314  }
315 
316  // Convert back into just a string_literal.
317  At->Tok.setKind(tok::string_literal);
318  At->TokenText = StringRef(At->TokenText.begin(),
319  String->TokenText.end() - At->TokenText.begin());
320  At->ColumnWidth += String->ColumnWidth;
321  At->setType(TT_CSharpStringLiteral);
322  Tokens.erase(Tokens.end() - 1);
323  return true;
324 }
325 
326 // Valid C# attribute targets:
327 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/concepts/attributes/#attribute-targets
328 const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
329  "assembly", "module", "field", "event", "method",
330  "param", "property", "return", "type",
331 };
332 
333 bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
334  if (Tokens.size() < 2)
335  return false;
336  auto &NullishCoalescing = *(Tokens.end() - 2);
337  auto &Equal = *(Tokens.end() - 1);
338  if (NullishCoalescing->getType() != TT_NullCoalescingOperator ||
339  !Equal->is(tok::equal))
340  return false;
341  NullishCoalescing->Tok.setKind(tok::equal); // no '??=' in clang tokens.
342  NullishCoalescing->TokenText =
343  StringRef(NullishCoalescing->TokenText.begin(),
344  Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
345  NullishCoalescing->ColumnWidth += Equal->ColumnWidth;
346  NullishCoalescing->setType(TT_NullCoalescingEqual);
347  Tokens.erase(Tokens.end() - 1);
348  return true;
349 }
350 
351 bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
352  if (Tokens.size() < 2)
353  return false;
354  auto &At = *(Tokens.end() - 2);
355  auto &Keyword = *(Tokens.end() - 1);
356  if (!At->is(tok::at))
357  return false;
358  if (!Keywords.isCSharpKeyword(*Keyword))
359  return false;
360 
361  At->Tok.setKind(tok::identifier);
362  At->TokenText = StringRef(At->TokenText.begin(),
363  Keyword->TokenText.end() - At->TokenText.begin());
364  At->ColumnWidth += Keyword->ColumnWidth;
365  At->setType(Keyword->getType());
366  Tokens.erase(Tokens.end() - 1);
367  return true;
368 }
369 
370 // In C# transform identifier foreach into kw_foreach
371 bool FormatTokenLexer::tryTransformCSharpForEach() {
372  if (Tokens.size() < 1)
373  return false;
374  auto &Identifier = *(Tokens.end() - 1);
375  if (!Identifier->is(tok::identifier))
376  return false;
377  if (Identifier->TokenText != "foreach")
378  return false;
379 
380  Identifier->setType(TT_ForEachMacro);
381  Identifier->Tok.setKind(tok::kw_for);
382  return true;
383 }
384 
385 bool FormatTokenLexer::tryMergeForEach() {
386  if (Tokens.size() < 2)
387  return false;
388  auto &For = *(Tokens.end() - 2);
389  auto &Each = *(Tokens.end() - 1);
390  if (!For->is(tok::kw_for))
391  return false;
392  if (!Each->is(tok::identifier))
393  return false;
394  if (Each->TokenText != "each")
395  return false;
396 
397  For->setType(TT_ForEachMacro);
398  For->Tok.setKind(tok::kw_for);
399 
400  For->TokenText = StringRef(For->TokenText.begin(),
401  Each->TokenText.end() - For->TokenText.begin());
402  For->ColumnWidth += Each->ColumnWidth;
403  Tokens.erase(Tokens.end() - 1);
404  return true;
405 }
406 
407 bool FormatTokenLexer::tryTransformTryUsageForC() {
408  if (Tokens.size() < 2)
409  return false;
410  auto &Try = *(Tokens.end() - 2);
411  if (!Try->is(tok::kw_try))
412  return false;
413  auto &Next = *(Tokens.end() - 1);
414  if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
415  return false;
416 
417  if (Tokens.size() > 2) {
418  auto &At = *(Tokens.end() - 3);
419  if (At->is(tok::at))
420  return false;
421  }
422 
423  Try->Tok.setKind(tok::identifier);
424  return true;
425 }
426 
427 bool FormatTokenLexer::tryMergeLessLess() {
428  // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
429  if (Tokens.size() < 3)
430  return false;
431 
432  auto First = Tokens.end() - 3;
433  if (First[0]->isNot(tok::less) || First[1]->isNot(tok::less))
434  return false;
435 
436  // Only merge if there currently is no whitespace between the two "<".
437  if (First[1]->hasWhitespaceBefore())
438  return false;
439 
440  auto X = Tokens.size() > 3 ? First[-1] : nullptr;
441  auto Y = First[2];
442  if ((X && X->is(tok::less)) || Y->is(tok::less))
443  return false;
444 
445  // Do not remove a whitespace between the two "<" e.g. "operator< <>".
446  if (X && X->is(tok::kw_operator) && Y->is(tok::greater))
447  return false;
448 
449  First[0]->Tok.setKind(tok::lessless);
450  First[0]->TokenText = "<<";
451  First[0]->ColumnWidth += 1;
452  Tokens.erase(Tokens.end() - 2);
453  return true;
454 }
455 
456 bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
457  TokenType NewType) {
458  if (Tokens.size() < Kinds.size())
459  return false;
460 
461  SmallVectorImpl<FormatToken *>::const_iterator First =
462  Tokens.end() - Kinds.size();
463  if (!First[0]->is(Kinds[0]))
464  return false;
465  unsigned AddLength = 0;
466  for (unsigned i = 1; i < Kinds.size(); ++i) {
467  if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore())
468  return false;
469  AddLength += First[i]->TokenText.size();
470  }
471  Tokens.resize(Tokens.size() - Kinds.size() + 1);
472  First[0]->TokenText = StringRef(First[0]->TokenText.data(),
473  First[0]->TokenText.size() + AddLength);
474  First[0]->ColumnWidth += AddLength;
475  First[0]->setType(NewType);
476  return true;
477 }
478 
479 // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
480 bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
481  // NB: This is not entirely correct, as an r_paren can introduce an operand
482  // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
483  // corner case to not matter in practice, though.
484  return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
485  tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
486  tok::colon, tok::question, tok::tilde) ||
487  Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
488  tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
489  tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
490  Tok->isBinaryOperator();
491 }
492 
493 bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
494  if (!Prev)
495  return true;
496 
497  // Regex literals can only follow after prefix unary operators, not after
498  // postfix unary operators. If the '++' is followed by a non-operand
499  // introducing token, the slash here is the operand and not the start of a
500  // regex.
501  // `!` is an unary prefix operator, but also a post-fix operator that casts
502  // away nullability, so the same check applies.
503  if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
504  return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
505 
506  // The previous token must introduce an operand location where regex
507  // literals can occur.
508  if (!precedesOperand(Prev))
509  return false;
510 
511  return true;
512 }
513 
514 // Tries to parse a JavaScript Regex literal starting at the current token,
515 // if that begins with a slash and is in a location where JavaScript allows
516 // regex literals. Changes the current token to a regex literal and updates
517 // its text if successful.
518 void FormatTokenLexer::tryParseJSRegexLiteral() {
519  FormatToken *RegexToken = Tokens.back();
520  if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
521  return;
522 
523  FormatToken *Prev = nullptr;
524  for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
525  // NB: Because previous pointers are not initialized yet, this cannot use
526  // Token.getPreviousNonComment.
527  if (FT->isNot(tok::comment)) {
528  Prev = FT;
529  break;
530  }
531  }
532 
533  if (!canPrecedeRegexLiteral(Prev))
534  return;
535 
536  // 'Manually' lex ahead in the current file buffer.
537  const char *Offset = Lex->getBufferLocation();
538  const char *RegexBegin = Offset - RegexToken->TokenText.size();
539  StringRef Buffer = Lex->getBuffer();
540  bool InCharacterClass = false;
541  bool HaveClosingSlash = false;
542  for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
543  // Regular expressions are terminated with a '/', which can only be
544  // escaped using '\' or a character class between '[' and ']'.
545  // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
546  switch (*Offset) {
547  case '\\':
548  // Skip the escaped character.
549  ++Offset;
550  break;
551  case '[':
552  InCharacterClass = true;
553  break;
554  case ']':
555  InCharacterClass = false;
556  break;
557  case '/':
558  if (!InCharacterClass)
559  HaveClosingSlash = true;
560  break;
561  }
562  }
563 
564  RegexToken->setType(TT_RegexLiteral);
565  // Treat regex literals like other string_literals.
566  RegexToken->Tok.setKind(tok::string_literal);
567  RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
568  RegexToken->ColumnWidth = RegexToken->TokenText.size();
569 
570  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
571 }
572 
573 void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
574  FormatToken *CSharpStringLiteral = Tokens.back();
575 
576  if (CSharpStringLiteral->getType() != TT_CSharpStringLiteral)
577  return;
578 
579  // Deal with multiline strings.
580  if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
581  CSharpStringLiteral->TokenText.startswith(R"($@")")))
582  return;
583 
584  const char *StrBegin =
585  Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
586  const char *Offset = StrBegin;
587  if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
588  Offset += 2;
589  else // CSharpStringLiteral->TokenText.startswith(R"($@")")
590  Offset += 3;
591 
592  // Look for a terminating '"' in the current file buffer.
593  // Make no effort to format code within an interpolated or verbatim string.
594  for (; Offset != Lex->getBuffer().end(); ++Offset) {
595  if (Offset[0] == '"') {
596  // "" within a verbatim string is an escaped double quote: skip it.
597  if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
598  ++Offset;
599  else
600  break;
601  }
602  }
603 
604  // Make no attempt to format code properly if a verbatim string is
605  // unterminated.
606  if (Offset == Lex->getBuffer().end())
607  return;
608 
609  StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
610  CSharpStringLiteral->TokenText = LiteralText;
611 
612  // Adjust width for potentially multiline string literals.
613  size_t FirstBreak = LiteralText.find('\n');
614  StringRef FirstLineText = FirstBreak == StringRef::npos
615  ? LiteralText
616  : LiteralText.substr(0, FirstBreak);
617  CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs(
618  FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,
619  Encoding);
620  size_t LastBreak = LiteralText.rfind('\n');
621  if (LastBreak != StringRef::npos) {
622  CSharpStringLiteral->IsMultiline = true;
623  unsigned StartColumn = 0;
624  CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs(
625  LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
626  Style.TabWidth, Encoding);
627  }
628 
629  SourceLocation loc = Offset < Lex->getBuffer().end()
630  ? Lex->getSourceLocation(Offset + 1)
631  : SourceMgr.getLocForEndOfFile(ID);
632  resetLexer(SourceMgr.getFileOffset(loc));
633 }
634 
635 void FormatTokenLexer::handleTemplateStrings() {
636  FormatToken *BacktickToken = Tokens.back();
637 
638  if (BacktickToken->is(tok::l_brace)) {
639  StateStack.push(LexerState::NORMAL);
640  return;
641  }
642  if (BacktickToken->is(tok::r_brace)) {
643  if (StateStack.size() == 1)
644  return;
645  StateStack.pop();
646  if (StateStack.top() != LexerState::TEMPLATE_STRING)
647  return;
648  // If back in TEMPLATE_STRING, fallthrough and continue parsing the
649  } else if (BacktickToken->is(tok::unknown) &&
650  BacktickToken->TokenText == "`") {
651  StateStack.push(LexerState::TEMPLATE_STRING);
652  } else {
653  return; // Not actually a template
654  }
655 
656  // 'Manually' lex ahead in the current file buffer.
657  const char *Offset = Lex->getBufferLocation();
658  const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
659  for (; Offset != Lex->getBuffer().end(); ++Offset) {
660  if (Offset[0] == '`') {
661  StateStack.pop();
662  break;
663  }
664  if (Offset[0] == '\\') {
665  ++Offset; // Skip the escaped character.
666  } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
667  Offset[1] == '{') {
668  // '${' introduces an expression interpolation in the template string.
669  StateStack.push(LexerState::NORMAL);
670  ++Offset;
671  break;
672  }
673  }
674 
675  StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
676  BacktickToken->setType(TT_TemplateString);
677  BacktickToken->Tok.setKind(tok::string_literal);
678  BacktickToken->TokenText = LiteralText;
679 
680  // Adjust width for potentially multiline string literals.
681  size_t FirstBreak = LiteralText.find('\n');
682  StringRef FirstLineText = FirstBreak == StringRef::npos
683  ? LiteralText
684  : LiteralText.substr(0, FirstBreak);
685  BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
686  FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
687  size_t LastBreak = LiteralText.rfind('\n');
688  if (LastBreak != StringRef::npos) {
689  BacktickToken->IsMultiline = true;
690  unsigned StartColumn = 0; // The template tail spans the entire line.
691  BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
692  LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
693  Style.TabWidth, Encoding);
694  }
695 
696  SourceLocation loc = Offset < Lex->getBuffer().end()
697  ? Lex->getSourceLocation(Offset + 1)
698  : SourceMgr.getLocForEndOfFile(ID);
699  resetLexer(SourceMgr.getFileOffset(loc));
700 }
701 
702 void FormatTokenLexer::tryParsePythonComment() {
703  FormatToken *HashToken = Tokens.back();
704  if (!HashToken->isOneOf(tok::hash, tok::hashhash))
705  return;
706  // Turn the remainder of this line into a comment.
707  const char *CommentBegin =
708  Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
709  size_t From = CommentBegin - Lex->getBuffer().begin();
710  size_t To = Lex->getBuffer().find_first_of('\n', From);
711  if (To == StringRef::npos)
712  To = Lex->getBuffer().size();
713  size_t Len = To - From;
714  HashToken->setType(TT_LineComment);
715  HashToken->Tok.setKind(tok::comment);
716  HashToken->TokenText = Lex->getBuffer().substr(From, Len);
717  SourceLocation Loc = To < Lex->getBuffer().size()
718  ? Lex->getSourceLocation(CommentBegin + Len)
719  : SourceMgr.getLocForEndOfFile(ID);
720  resetLexer(SourceMgr.getFileOffset(Loc));
721 }
722 
723 bool FormatTokenLexer::tryMerge_TMacro() {
724  if (Tokens.size() < 4)
725  return false;
726  FormatToken *Last = Tokens.back();
727  if (!Last->is(tok::r_paren))
728  return false;
729 
730  FormatToken *String = Tokens[Tokens.size() - 2];
731  if (!String->is(tok::string_literal) || String->IsMultiline)
732  return false;
733 
734  if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
735  return false;
736 
737  FormatToken *Macro = Tokens[Tokens.size() - 4];
738  if (Macro->TokenText != "_T")
739  return false;
740 
741  const char *Start = Macro->TokenText.data();
742  const char *End = Last->TokenText.data() + Last->TokenText.size();
743  String->TokenText = StringRef(Start, End - Start);
744  String->IsFirst = Macro->IsFirst;
745  String->LastNewlineOffset = Macro->LastNewlineOffset;
746  String->WhitespaceRange = Macro->WhitespaceRange;
747  String->OriginalColumn = Macro->OriginalColumn;
748  String->ColumnWidth = encoding::columnWidthWithTabs(
749  String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
750  String->NewlinesBefore = Macro->NewlinesBefore;
751  String->HasUnescapedNewline = Macro->HasUnescapedNewline;
752 
753  Tokens.pop_back();
754  Tokens.pop_back();
755  Tokens.pop_back();
756  Tokens.back() = String;
757  if (FirstInLineIndex >= Tokens.size())
758  FirstInLineIndex = Tokens.size() - 1;
759  return true;
760 }
761 
762 bool FormatTokenLexer::tryMergeConflictMarkers() {
763  if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
764  return false;
765 
766  // Conflict lines look like:
767  // <marker> <text from the vcs>
768  // For example:
769  // >>>>>>> /file/in/file/system at revision 1234
770  //
771  // We merge all tokens in a line that starts with a conflict marker
772  // into a single token with a special token type that the unwrapped line
773  // parser will use to correctly rebuild the underlying code.
774 
775  FileID ID;
776  // Get the position of the first token in the line.
777  unsigned FirstInLineOffset;
778  std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
779  Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
780  StringRef Buffer = SourceMgr.getBufferOrFake(ID).getBuffer();
781  // Calculate the offset of the start of the current line.
782  auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
783  if (LineOffset == StringRef::npos) {
784  LineOffset = 0;
785  } else {
786  ++LineOffset;
787  }
788 
789  auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
790  StringRef LineStart;
791  if (FirstSpace == StringRef::npos) {
792  LineStart = Buffer.substr(LineOffset);
793  } else {
794  LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
795  }
796 
797  TokenType Type = TT_Unknown;
798  if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
799  Type = TT_ConflictStart;
800  } else if (LineStart == "|||||||" || LineStart == "=======" ||
801  LineStart == "====") {
802  Type = TT_ConflictAlternative;
803  } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
804  Type = TT_ConflictEnd;
805  }
806 
807  if (Type != TT_Unknown) {
808  FormatToken *Next = Tokens.back();
809 
810  Tokens.resize(FirstInLineIndex + 1);
811  // We do not need to build a complete token here, as we will skip it
812  // during parsing anyway (as we must not touch whitespace around conflict
813  // markers).
814  Tokens.back()->setType(Type);
815  Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
816 
817  Tokens.push_back(Next);
818  return true;
819  }
820 
821  return false;
822 }
823 
824 FormatToken *FormatTokenLexer::getStashedToken() {
825  // Create a synthesized second '>' or '<' token.
826  Token Tok = FormatTok->Tok;
827  StringRef TokenText = FormatTok->TokenText;
828 
829  unsigned OriginalColumn = FormatTok->OriginalColumn;
830  FormatTok = new (Allocator.Allocate()) FormatToken;
831  FormatTok->Tok = Tok;
832  SourceLocation TokLocation =
833  FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
834  FormatTok->Tok.setLocation(TokLocation);
835  FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
836  FormatTok->TokenText = TokenText;
837  FormatTok->ColumnWidth = 1;
838  FormatTok->OriginalColumn = OriginalColumn + 1;
839 
840  return FormatTok;
841 }
842 
843 FormatToken *FormatTokenLexer::getNextToken() {
844  if (StateStack.top() == LexerState::TOKEN_STASHED) {
845  StateStack.pop();
846  return getStashedToken();
847  }
848 
849  FormatTok = new (Allocator.Allocate()) FormatToken;
850  readRawToken(*FormatTok);
851  SourceLocation WhitespaceStart =
852  FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
853  FormatTok->IsFirst = IsFirstToken;
854  IsFirstToken = false;
855 
856  // Consume and record whitespace until we find a significant token.
857  unsigned WhitespaceLength = TrailingWhitespace;
858  while (FormatTok->Tok.is(tok::unknown)) {
859  StringRef Text = FormatTok->TokenText;
860  auto EscapesNewline = [&](int pos) {
861  // A '\r' here is just part of '\r\n'. Skip it.
862  if (pos >= 0 && Text[pos] == '\r')
863  --pos;
864  // See whether there is an odd number of '\' before this.
865  // FIXME: This is wrong. A '\' followed by a newline is always removed,
866  // regardless of whether there is another '\' before it.
867  // FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph.
868  unsigned count = 0;
869  for (; pos >= 0; --pos, ++count)
870  if (Text[pos] != '\\')
871  break;
872  return count & 1;
873  };
874  // FIXME: This miscounts tok:unknown tokens that are not just
875  // whitespace, e.g. a '`' character.
876  for (int i = 0, e = Text.size(); i != e; ++i) {
877  switch (Text[i]) {
878  case '\n':
879  ++FormatTok->NewlinesBefore;
880  FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
881  FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
882  Column = 0;
883  break;
884  case '\r':
885  FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
886  Column = 0;
887  break;
888  case '\f':
889  case '\v':
890  Column = 0;
891  break;
892  case ' ':
893  ++Column;
894  break;
895  case '\t':
896  Column +=
897  Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0);
898  break;
899  case '\\':
900  if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
901  FormatTok->setType(TT_ImplicitStringLiteral);
902  break;
903  default:
904  FormatTok->setType(TT_ImplicitStringLiteral);
905  break;
906  }
907  if (FormatTok->getType() == TT_ImplicitStringLiteral)
908  break;
909  }
910 
911  if (FormatTok->is(TT_ImplicitStringLiteral))
912  break;
913  WhitespaceLength += FormatTok->Tok.getLength();
914 
915  readRawToken(*FormatTok);
916  }
917 
918  // JavaScript and Java do not allow to escape the end of the line with a
919  // backslash. Backslashes are syntax errors in plain source, but can occur in
920  // comments. When a single line comment ends with a \, it'll cause the next
921  // line of code to be lexed as a comment, breaking formatting. The code below
922  // finds comments that contain a backslash followed by a line break, truncates
923  // the comment token at the backslash, and resets the lexer to restart behind
924  // the backslash.
925  if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
926  FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
927  size_t BackslashPos = FormatTok->TokenText.find('\\');
928  while (BackslashPos != StringRef::npos) {
929  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
930  FormatTok->TokenText[BackslashPos + 1] == '\n') {
931  const char *Offset = Lex->getBufferLocation();
932  Offset -= FormatTok->TokenText.size();
933  Offset += BackslashPos + 1;
934  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
935  FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);
937  FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
938  Encoding);
939  break;
940  }
941  BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
942  }
943  }
944 
945  // In case the token starts with escaped newlines, we want to
946  // take them into account as whitespace - this pattern is quite frequent
947  // in macro definitions.
948  // FIXME: Add a more explicit test.
949  while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {
950  unsigned SkippedWhitespace = 0;
951  if (FormatTok->TokenText.size() > 2 &&
952  (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n'))
953  SkippedWhitespace = 3;
954  else if (FormatTok->TokenText[1] == '\n')
955  SkippedWhitespace = 2;
956  else
957  break;
958 
959  ++FormatTok->NewlinesBefore;
960  WhitespaceLength += SkippedWhitespace;
961  FormatTok->LastNewlineOffset = SkippedWhitespace;
962  Column = 0;
963  FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);
964  }
965 
966  FormatTok->WhitespaceRange = SourceRange(
967  WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
968 
969  FormatTok->OriginalColumn = Column;
970 
971  TrailingWhitespace = 0;
972  if (FormatTok->Tok.is(tok::comment)) {
973  // FIXME: Add the trimmed whitespace to Column.
974  StringRef UntrimmedText = FormatTok->TokenText;
975  FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
976  TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
977  } else if (FormatTok->Tok.is(tok::raw_identifier)) {
978  IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
979  FormatTok->Tok.setIdentifierInfo(&Info);
980  FormatTok->Tok.setKind(Info.getTokenID());
981  if (Style.Language == FormatStyle::LK_Java &&
982  FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
983  tok::kw_operator)) {
984  FormatTok->Tok.setKind(tok::identifier);
985  FormatTok->Tok.setIdentifierInfo(nullptr);
986  } else if (Style.isJavaScript() &&
987  FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
988  tok::kw_operator)) {
989  FormatTok->Tok.setKind(tok::identifier);
990  FormatTok->Tok.setIdentifierInfo(nullptr);
991  }
992  } else if (FormatTok->Tok.is(tok::greatergreater)) {
993  FormatTok->Tok.setKind(tok::greater);
994  FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
995  ++Column;
996  StateStack.push(LexerState::TOKEN_STASHED);
997  } else if (FormatTok->Tok.is(tok::lessless)) {
998  FormatTok->Tok.setKind(tok::less);
999  FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1000  ++Column;
1001  StateStack.push(LexerState::TOKEN_STASHED);
1002  }
1003 
1004  // Now FormatTok is the next non-whitespace token.
1005 
1006  StringRef Text = FormatTok->TokenText;
1007  size_t FirstNewlinePos = Text.find('\n');
1008  if (FirstNewlinePos == StringRef::npos) {
1009  // FIXME: ColumnWidth actually depends on the start column, we need to
1010  // take this into account when the token is moved.
1011  FormatTok->ColumnWidth =
1012  encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1013  Column += FormatTok->ColumnWidth;
1014  } else {
1015  FormatTok->IsMultiline = true;
1016  // FIXME: ColumnWidth actually depends on the start column, we need to
1017  // take this into account when the token is moved.
1019  Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1020 
1021  // The last line of the token always starts in column 0.
1022  // Thus, the length can be precomputed even in the presence of tabs.
1024  Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);
1025  Column = FormatTok->LastLineColumnWidth;
1026  }
1027 
1028  if (Style.isCpp()) {
1029  auto it = Macros.find(FormatTok->Tok.getIdentifierInfo());
1030  if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1031  Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1032  tok::pp_define) &&
1033  it != Macros.end()) {
1034  FormatTok->setType(it->second);
1035  if (it->second == TT_IfMacro) {
1036  // The lexer token currently has type tok::kw_unknown. However, for this
1037  // substitution to be treated correctly in the TokenAnnotator, faking
1038  // the tok value seems to be needed. Not sure if there's a more elegant
1039  // way.
1040  FormatTok->Tok.setKind(tok::kw_if);
1041  }
1042  } else if (FormatTok->is(tok::identifier)) {
1043  if (MacroBlockBeginRegex.match(Text)) {
1044  FormatTok->setType(TT_MacroBlockBegin);
1045  } else if (MacroBlockEndRegex.match(Text)) {
1046  FormatTok->setType(TT_MacroBlockEnd);
1047  }
1048  }
1049  }
1050 
1051  return FormatTok;
1052 }
1053 
1054 void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1055  Lex->LexFromRawLexer(Tok.Tok);
1056  Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1057  Tok.Tok.getLength());
1058  // For formatting, treat unterminated string literals like normal string
1059  // literals.
1060  if (Tok.is(tok::unknown)) {
1061  if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1062  Tok.Tok.setKind(tok::string_literal);
1063  Tok.IsUnterminatedLiteral = true;
1064  } else if (Style.isJavaScript() && Tok.TokenText == "''") {
1065  Tok.Tok.setKind(tok::string_literal);
1066  }
1067  }
1068 
1069  if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Proto ||
1070  Style.Language == FormatStyle::LK_TextProto) &&
1071  Tok.is(tok::char_constant)) {
1072  Tok.Tok.setKind(tok::string_literal);
1073  }
1074 
1075  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1076  Tok.TokenText == "/* clang-format on */")) {
1077  FormattingDisabled = false;
1078  }
1079 
1080  Tok.Finalized = FormattingDisabled;
1081 
1082  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1083  Tok.TokenText == "/* clang-format off */")) {
1084  FormattingDisabled = true;
1085  }
1086 }
1087 
1088 void FormatTokenLexer::resetLexer(unsigned Offset) {
1089  StringRef Buffer = SourceMgr.getBufferData(ID);
1090  Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1091  getFormattingLangOpts(Style), Buffer.begin(),
1092  Buffer.begin() + Offset, Buffer.end()));
1093  Lex->SetKeepWhitespaceMode(true);
1094  TrailingWhitespace = 0;
1095 }
1096 
1097 } // namespace format
1098 } // namespace clang
clang::format::FormatTokenLexer::lex
ArrayRef< FormatToken * > lex()
Definition: FormatTokenLexer.cpp:76
clang::format::TEMPLATE_STRING
@ TEMPLATE_STRING
Definition: FormatTokenLexer.h:34
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::SourceManager::getFileOffset
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
Definition: SourceManager.h:1287
clang::format::AdditionalKeywords::kw_instanceof
IdentifierInfo * kw_instanceof
Definition: FormatToken.h:1011
clang::IdentifierTable::get
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Definition: IdentifierTable.h:592
clang::SourceLocation::getLocWithOffset
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
Definition: SourceLocation.h:134
AttributeLangSupport::C
@ C
Definition: SemaDeclAttr.cpp:54
clang::Token::getIdentifierInfo
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
clang::format::NORMAL
@ NORMAL
Definition: FormatTokenLexer.h:33
clang::format::FormatStyle::StatementMacros
std::vector< std::string > StatementMacros
A vector of macros that should be interpreted as complete statements.
Definition: Format.h:2317
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
clang::format::FormatToken::TokenText
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:229
clang::format::FormatToken::IsMultiline
unsigned IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:245
clang::ComparisonCategoryType::First
@ First
SourceManager.h
clang::Token::setIdentifierInfo
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:188
clang::format::FormatToken::OriginalColumn
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:373
clang::minimize_source_to_dependency_directives::pp_define
@ pp_define
Definition: DependencyDirectivesSourceMinimizer.h:37
clang::tooling::X
static ToolExecutorPluginRegistry::Add< AllTUsToolExecutorPlugin > X("all-TUs", "Runs FrontendActions on all TUs in the compilation database. " "Tool results are stored in memory.")
clang::index::SymbolKind::Macro
@ Macro
Identifier
StringRef Identifier
Definition: Format.cpp:2437
Format.h
clang::format::FormatToken::setType
void setType(TokenType T)
Definition: FormatToken.h:332
clang::format::FormatToken::IsFirst
unsigned IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:248
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:626
clang::format::FormatToken::NewlinesBefore
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:338
clang::Lexer
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:76
Offset
unsigned Offset
Definition: Format.cpp:2431
clang::format::FormatTokenLexer::FormatTokenLexer
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
Definition: FormatTokenLexer.cpp:25
clang::format::FormatStyle::LK_Proto
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:2670
clang::format::FormatStyle::IfMacros
std::vector< std::string > IfMacros
A vector of macros that should be interpreted as conditionals instead of as function calls.
Definition: Format.h:2289
clang::format::FormatToken::Tok
Token Tok
The Token.
Definition: FormatToken.h:223
clang::format::AdditionalKeywords::isCSharpKeyword
bool isCSharpKeyword(const FormatToken &Tok) const
Returns true if Tok is a C# keyword, returns false if it is a anything else.
Definition: FormatToken.h:1140
clang::format::encoding::Encoding
Encoding
Definition: Encoding.h:27
clang::format::FormatStyle::LK_TextProto
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:2675
clang::format::AdditionalKeywords::kw_in
IdentifierInfo * kw_in
Definition: FormatToken.h:973
clang::Token::is
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:97
clang::format::FormatStyle::NamespaceMacros
std::vector< std::string > NamespaceMacros
A vector of macros which are used to open namespace blocks.
Definition: Format.h:2330
clang::format::getFormattingLangOpts
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3222
clang::SourceManager::getLocForEndOfFile
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
Definition: SourceManager.h:1126
clang::format::FormatStyle::MacroBlockEnd
std::string MacroBlockEnd
A regular expression matching macros that end a block.
Definition: Format.h:2750
clang::format::FormatStyle::WhitespaceSensitiveMacros
std::vector< std::string > WhitespaceSensitiveMacros
A vector of macros which are whitespace-sensitive and should not be touched.
Definition: Format.h:2347
clang::format::FormatStyle::TabWidth
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:3796
clang::SourceManager::getDecomposedLoc
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
Definition: SourceManager.h:1236
clang::format::FormatStyle::StatementAttributeLikeMacros
std::vector< std::string > StatementAttributeLikeMacros
Macros which are ignored in front of a statement, as if they were an attribute.
Definition: Format.h:3792
clang::format::FormatStyle::LK_Java
@ LK_Java
Should be used for Java.
Definition: Format.h:2661
clang::format::FormatStyle::isJavaScript
bool isJavaScript() const
Definition: Format.h:2680
clang::Token::getLength
unsigned getLength() const
Definition: Token.h:129
clang::SourceManager::getLocForStartOfFile
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
Definition: SourceManager.h:1118
SourceLocation.h
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
clang::format::FormatStyle::isCSharp
bool isCSharp() const
Definition: Format.h:2678
clang::format::FormatStyle::Language
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:2684
clang::format::FormatToken::getType
TokenType getType() const
Returns the token's type, e.g.
Definition: FormatToken.h:331
llvm::ArrayRef
Definition: LLVM.h:34
clang::SourceManager::getCharacterData
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
Definition: SourceManager.cpp:1154
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:457
clang::format::FormatToken::isOneOf
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:469
clang::ComparisonCategoryResult::Equal
@ Equal
clang::SourceManager::getBufferData
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
Definition: SourceManager.cpp:735
clang::format::FormatToken::LastLineColumnWidth
unsigned LastLineColumnWidth
Contains the width in columns of the last line of a multi-line token.
Definition: FormatToken.h:351
clang::format::FormatStyle::MacroBlockBegin
std::string MacroBlockBegin
A regular expression matching macros that start a block.
Definition: Format.h:2746
clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:126
clang::Token::setLocation
void setLocation(SourceLocation L)
Definition: Token.h:134
FormatTokenLexer.h
clang::Builtin::ID
ID
Definition: Builtins.h:48
clang::format::FormatStyle::AttributeMacros
std::vector< std::string > AttributeMacros
A vector of strings that should be interpreted as attributes/qualifiers instead of identifiers.
Definition: Format.h:942
clang
Definition: CalledOnceCheck.h:17
Text
StringRef Text
Definition: Format.cpp:2430
clang::ComparisonCategoryType::Last
@ Last
clang::FileID
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Definition: SourceLocation.h:38
clang::IdentifierTable
Implements an efficient mapping from strings to IdentifierInfo nodes.
Definition: IdentifierTable.h:559
clang::format::FormatStyle::isCpp
bool isCpp() const
Definition: Format.h:2677
clang::format::FormatToken::LastNewlineOffset
unsigned LastNewlineOffset
The offset just past the last ' ' in this token's leading whitespace (relative to WhiteSpaceStart).
Definition: FormatToken.h:342
clang::Token::setKind
void setKind(tok::TokenKind K)
Definition: Token.h:93
clang::format::FormatToken::ColumnWidth
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:347
clang::format::encoding::columnWidthWithTabs
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:61
FormatToken.h
clang::format::FormatStyle::TypenameMacros
std::vector< std::string > TypenameMacros
A vector of macros that should be interpreted as type declarations instead of as function calls.
Definition: Format.h:2306
clang::format::FormatStyle::ForEachMacros
std::vector< std::string > ForEachMacros
A vector of macros that should be interpreted as foreach loops instead of as function calls.
Definition: Format.h:2268
clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33
true
#define true
Definition: stdbool.h:16
clang::format::FormatToken::HasUnescapedNewline
unsigned HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:242
clang::format::TokenType
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:127
clang::format::FormatToken::WhitespaceRange
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:238
clang::SourceManager::getBufferOrFake
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
Definition: SourceManager.h:1023
Type
MatchType Type
Definition: ASTMatchFinder.cpp:70
clang::format::TOKEN_STASHED
@ TOKEN_STASHED
Definition: FormatTokenLexer.h:35