clang  11.0.0git
FormatTokenLexer.cpp
Go to the documentation of this file.
1 //===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements FormatTokenLexer, which tokenizes a source file
11 /// into a FormatToken stream suitable for ClangFormat.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatTokenLexer.h"
16 #include "FormatToken.h"
19 #include "clang/Format/Format.h"
20 #include "llvm/Support/Regex.h"
21 
22 namespace clang {
23 namespace format {
24 
26  const SourceManager &SourceMgr, FileID ID, unsigned Column,
28  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
29  IdentifierTable &IdentTable)
30  : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
31  Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
32  Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
33  Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
34  FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
35  MacroBlockEndRegex(Style.MacroBlockEnd) {
36  Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
37  getFormattingLangOpts(Style)));
38  Lex->SetKeepWhitespaceMode(true);
39 
40  for (const std::string &ForEachMacro : Style.ForEachMacros)
41  Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro});
42  for (const std::string &StatementMacro : Style.StatementMacros)
43  Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro});
44  for (const std::string &TypenameMacro : Style.TypenameMacros)
45  Macros.insert({&IdentTable.get(TypenameMacro), TT_TypenameMacro});
46  for (const std::string &NamespaceMacro : Style.NamespaceMacros)
47  Macros.insert({&IdentTable.get(NamespaceMacro), TT_NamespaceMacro});
48  for (const std::string &WhitespaceSensitiveMacro :
49  Style.WhitespaceSensitiveMacros) {
50  Macros.insert(
51  {&IdentTable.get(WhitespaceSensitiveMacro), TT_UntouchableMacroFunc});
52  }
53 }
54 
56  assert(Tokens.empty());
57  assert(FirstInLineIndex == 0);
58  do {
59  Tokens.push_back(getNextToken());
60  if (Style.Language == FormatStyle::LK_JavaScript) {
61  tryParseJSRegexLiteral();
62  handleTemplateStrings();
63  }
64  if (Style.Language == FormatStyle::LK_TextProto)
65  tryParsePythonComment();
66  tryMergePreviousTokens();
67  if (Style.isCSharp())
68  // This needs to come after tokens have been merged so that C#
69  // string literals are correctly identified.
70  handleCSharpVerbatimAndInterpolatedStrings();
71  if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
72  FirstInLineIndex = Tokens.size() - 1;
73  } while (Tokens.back()->Tok.isNot(tok::eof));
74  return Tokens;
75 }
76 
77 void FormatTokenLexer::tryMergePreviousTokens() {
78  if (tryMerge_TMacro())
79  return;
80  if (tryMergeConflictMarkers())
81  return;
82  if (tryMergeLessLess())
83  return;
84  if (tryMergeForEach())
85  return;
86  if (Style.isCpp() && tryTransformTryUsageForC())
87  return;
88 
89  if (Style.isCSharp()) {
90  if (tryMergeCSharpKeywordVariables())
91  return;
92  if (tryMergeCSharpStringLiteral())
93  return;
94  if (tryMergeCSharpDoubleQuestion())
95  return;
96  if (tryMergeCSharpNullConditional())
97  return;
98  if (tryTransformCSharpForEach())
99  return;
100  static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
101  if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
102  return;
103  }
104 
105  if (tryMergeNSStringLiteral())
106  return;
107 
108  if (Style.Language == FormatStyle::LK_JavaScript) {
109  static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
110  static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
111  tok::equal};
112  static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
113  tok::greaterequal};
114  static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
115  static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
116  static const tok::TokenKind JSExponentiationEqual[] = {tok::star,
117  tok::starequal};
118  static const tok::TokenKind JSNullPropagatingOperator[] = {tok::question,
119  tok::period};
120  static const tok::TokenKind JSNullishOperator[] = {tok::question,
121  tok::question};
122 
123  // FIXME: Investigate what token type gives the correct operator priority.
124  if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
125  return;
126  if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
127  return;
128  if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
129  return;
130  if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
131  return;
132  if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
133  return;
134  if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
135  Tokens.back()->Tok.setKind(tok::starequal);
136  return;
137  }
138  if (tryMergeTokens(JSNullishOperator, TT_JsNullishCoalescingOperator)) {
139  // Treat like the "||" operator (as opposed to the ternary ?).
140  Tokens.back()->Tok.setKind(tok::pipepipe);
141  return;
142  }
143  if (tryMergeTokens(JSNullPropagatingOperator,
144  TT_JsNullPropagatingOperator)) {
145  // Treat like a regular "." access.
146  Tokens.back()->Tok.setKind(tok::period);
147  return;
148  }
149  if (tryMergeJSPrivateIdentifier())
150  return;
151  }
152 
153  if (Style.Language == FormatStyle::LK_Java) {
154  static const tok::TokenKind JavaRightLogicalShiftAssign[] = {
155  tok::greater, tok::greater, tok::greaterequal};
156  if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
157  return;
158  }
159 }
160 
161 bool FormatTokenLexer::tryMergeNSStringLiteral() {
162  if (Tokens.size() < 2)
163  return false;
164  auto &At = *(Tokens.end() - 2);
165  auto &String = *(Tokens.end() - 1);
166  if (!At->is(tok::at) || !String->is(tok::string_literal))
167  return false;
168  At->Tok.setKind(tok::string_literal);
169  At->TokenText = StringRef(At->TokenText.begin(),
170  String->TokenText.end() - At->TokenText.begin());
171  At->ColumnWidth += String->ColumnWidth;
172  At->setType(TT_ObjCStringLiteral);
173  Tokens.erase(Tokens.end() - 1);
174  return true;
175 }
176 
177 bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
178  // Merges #idenfier into a single identifier with the text #identifier
179  // but the token tok::identifier.
180  if (Tokens.size() < 2)
181  return false;
182  auto &Hash = *(Tokens.end() - 2);
183  auto &Identifier = *(Tokens.end() - 1);
184  if (!Hash->is(tok::hash) || !Identifier->is(tok::identifier))
185  return false;
186  Hash->Tok.setKind(tok::identifier);
187  Hash->TokenText =
188  StringRef(Hash->TokenText.begin(),
189  Identifier->TokenText.end() - Hash->TokenText.begin());
190  Hash->ColumnWidth += Identifier->ColumnWidth;
191  Hash->setType(TT_JsPrivateIdentifier);
192  Tokens.erase(Tokens.end() - 1);
193  return true;
194 }
195 
196 // Search for verbatim or interpolated string literals @"ABC" or
197 // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
198 // prevent splitting of @, $ and ".
199 // Merging of multiline verbatim strings with embedded '"' is handled in
200 // handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing.
201 bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
202  if (Tokens.size() < 2)
203  return false;
204 
205  // Interpolated strings could contain { } with " characters inside.
206  // $"{x ?? "null"}"
207  // should not be split into $"{x ?? ", null, "}" but should treated as a
208  // single string-literal.
209  //
210  // We opt not to try and format expressions inside {} within a C#
211  // interpolated string. Formatting expressions within an interpolated string
212  // would require similar work as that done for JavaScript template strings
213  // in `handleTemplateStrings()`.
214  auto &CSharpInterpolatedString = *(Tokens.end() - 2);
215  if (CSharpInterpolatedString->getType() == TT_CSharpStringLiteral &&
216  (CSharpInterpolatedString->TokenText.startswith(R"($")") ||
217  CSharpInterpolatedString->TokenText.startswith(R"($@")"))) {
218  int UnmatchedOpeningBraceCount = 0;
219 
220  auto TokenTextSize = CSharpInterpolatedString->TokenText.size();
221  for (size_t Index = 0; Index < TokenTextSize; ++Index) {
222  char C = CSharpInterpolatedString->TokenText[Index];
223  if (C == '{') {
224  // "{{" inside an interpolated string is an escaped '{' so skip it.
225  if (Index + 1 < TokenTextSize &&
226  CSharpInterpolatedString->TokenText[Index + 1] == '{') {
227  ++Index;
228  continue;
229  }
230  ++UnmatchedOpeningBraceCount;
231  } else if (C == '}') {
232  // "}}" inside an interpolated string is an escaped '}' so skip it.
233  if (Index + 1 < TokenTextSize &&
234  CSharpInterpolatedString->TokenText[Index + 1] == '}') {
235  ++Index;
236  continue;
237  }
238  --UnmatchedOpeningBraceCount;
239  }
240  }
241 
242  if (UnmatchedOpeningBraceCount > 0) {
243  auto &NextToken = *(Tokens.end() - 1);
244  CSharpInterpolatedString->TokenText =
245  StringRef(CSharpInterpolatedString->TokenText.begin(),
246  NextToken->TokenText.end() -
247  CSharpInterpolatedString->TokenText.begin());
248  CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth;
249  Tokens.erase(Tokens.end() - 1);
250  return true;
251  }
252  }
253 
254  // Look for @"aaaaaa" or $"aaaaaa".
255  auto &String = *(Tokens.end() - 1);
256  if (!String->is(tok::string_literal))
257  return false;
258 
259  auto &At = *(Tokens.end() - 2);
260  if (!(At->is(tok::at) || At->TokenText == "$"))
261  return false;
262 
263  if (Tokens.size() > 2 && At->is(tok::at)) {
264  auto &Dollar = *(Tokens.end() - 3);
265  if (Dollar->TokenText == "$") {
266  // This looks like $@"aaaaa" so we need to combine all 3 tokens.
267  Dollar->Tok.setKind(tok::string_literal);
268  Dollar->TokenText =
269  StringRef(Dollar->TokenText.begin(),
270  String->TokenText.end() - Dollar->TokenText.begin());
271  Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth);
272  Dollar->setType(TT_CSharpStringLiteral);
273  Tokens.erase(Tokens.end() - 2);
274  Tokens.erase(Tokens.end() - 1);
275  return true;
276  }
277  }
278 
279  // Convert back into just a string_literal.
280  At->Tok.setKind(tok::string_literal);
281  At->TokenText = StringRef(At->TokenText.begin(),
282  String->TokenText.end() - At->TokenText.begin());
283  At->ColumnWidth += String->ColumnWidth;
284  At->setType(TT_CSharpStringLiteral);
285  Tokens.erase(Tokens.end() - 1);
286  return true;
287 }
288 
289 // Valid C# attribute targets:
290 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/concepts/attributes/#attribute-targets
291 const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
292  "assembly", "module", "field", "event", "method",
293  "param", "property", "return", "type",
294 };
295 
296 bool FormatTokenLexer::tryMergeCSharpDoubleQuestion() {
297  if (Tokens.size() < 2)
298  return false;
299  auto &FirstQuestion = *(Tokens.end() - 2);
300  auto &SecondQuestion = *(Tokens.end() - 1);
301  if (!FirstQuestion->is(tok::question) || !SecondQuestion->is(tok::question))
302  return false;
303  FirstQuestion->Tok.setKind(tok::question); // no '??' in clang tokens.
304  FirstQuestion->TokenText = StringRef(FirstQuestion->TokenText.begin(),
305  SecondQuestion->TokenText.end() -
306  FirstQuestion->TokenText.begin());
307  FirstQuestion->ColumnWidth += SecondQuestion->ColumnWidth;
308  FirstQuestion->setType(TT_CSharpNullCoalescing);
309  Tokens.erase(Tokens.end() - 1);
310  return true;
311 }
312 
313 // Merge '?[' and '?.' pairs into single tokens.
314 bool FormatTokenLexer::tryMergeCSharpNullConditional() {
315  if (Tokens.size() < 2)
316  return false;
317  auto &Question = *(Tokens.end() - 2);
318  auto &PeriodOrLSquare = *(Tokens.end() - 1);
319  if (!Question->is(tok::question) ||
320  !PeriodOrLSquare->isOneOf(tok::l_square, tok::period))
321  return false;
322  Question->TokenText =
323  StringRef(Question->TokenText.begin(),
324  PeriodOrLSquare->TokenText.end() - Question->TokenText.begin());
325  Question->ColumnWidth += PeriodOrLSquare->ColumnWidth;
326 
327  if (PeriodOrLSquare->is(tok::l_square)) {
328  Question->Tok.setKind(tok::question); // no '?[' in clang tokens.
329  Question->setType(TT_CSharpNullConditionalLSquare);
330  } else {
331  Question->Tok.setKind(tok::question); // no '?.' in clang tokens.
332  Question->setType(TT_CSharpNullConditional);
333  }
334 
335  Tokens.erase(Tokens.end() - 1);
336  return true;
337 }
338 
339 bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
340  if (Tokens.size() < 2)
341  return false;
342  auto &At = *(Tokens.end() - 2);
343  auto &Keyword = *(Tokens.end() - 1);
344  if (!At->is(tok::at))
345  return false;
346  if (!Keywords.isCSharpKeyword(*Keyword))
347  return false;
348 
349  At->Tok.setKind(tok::identifier);
350  At->TokenText = StringRef(At->TokenText.begin(),
351  Keyword->TokenText.end() - At->TokenText.begin());
352  At->ColumnWidth += Keyword->ColumnWidth;
353  At->setType(Keyword->getType());
354  Tokens.erase(Tokens.end() - 1);
355  return true;
356 }
357 
358 // In C# transform identifier foreach into kw_foreach
359 bool FormatTokenLexer::tryTransformCSharpForEach() {
360  if (Tokens.size() < 1)
361  return false;
362  auto &Identifier = *(Tokens.end() - 1);
363  if (!Identifier->is(tok::identifier))
364  return false;
365  if (Identifier->TokenText != "foreach")
366  return false;
367 
368  Identifier->setType(TT_ForEachMacro);
369  Identifier->Tok.setKind(tok::kw_for);
370  return true;
371 }
372 
373 bool FormatTokenLexer::tryMergeForEach() {
374  if (Tokens.size() < 2)
375  return false;
376  auto &For = *(Tokens.end() - 2);
377  auto &Each = *(Tokens.end() - 1);
378  if (!For->is(tok::kw_for))
379  return false;
380  if (!Each->is(tok::identifier))
381  return false;
382  if (Each->TokenText != "each")
383  return false;
384 
385  For->setType(TT_ForEachMacro);
386  For->Tok.setKind(tok::kw_for);
387 
388  For->TokenText = StringRef(For->TokenText.begin(),
389  Each->TokenText.end() - For->TokenText.begin());
390  For->ColumnWidth += Each->ColumnWidth;
391  Tokens.erase(Tokens.end() - 1);
392  return true;
393 }
394 
395 bool FormatTokenLexer::tryTransformTryUsageForC() {
396  if (Tokens.size() < 2)
397  return false;
398  auto &Try = *(Tokens.end() - 2);
399  if (!Try->is(tok::kw_try))
400  return false;
401  auto &Next = *(Tokens.end() - 1);
402  if (Next->isOneOf(tok::l_brace, tok::colon))
403  return false;
404 
405  if (Tokens.size() > 2) {
406  auto &At = *(Tokens.end() - 3);
407  if (At->is(tok::at))
408  return false;
409  }
410 
411  Try->Tok.setKind(tok::identifier);
412  return true;
413 }
414 
415 bool FormatTokenLexer::tryMergeLessLess() {
416  // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
417  if (Tokens.size() < 3)
418  return false;
419 
420  bool FourthTokenIsLess = false;
421  if (Tokens.size() > 3)
422  FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
423 
424  auto First = Tokens.end() - 3;
425  if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
426  First[0]->isNot(tok::less) || FourthTokenIsLess)
427  return false;
428 
429  // Only merge if there currently is no whitespace between the two "<".
430  if (First[1]->WhitespaceRange.getBegin() !=
431  First[1]->WhitespaceRange.getEnd())
432  return false;
433 
434  First[0]->Tok.setKind(tok::lessless);
435  First[0]->TokenText = "<<";
436  First[0]->ColumnWidth += 1;
437  Tokens.erase(Tokens.end() - 2);
438  return true;
439 }
440 
441 bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
442  TokenType NewType) {
443  if (Tokens.size() < Kinds.size())
444  return false;
445 
447  Tokens.end() - Kinds.size();
448  if (!First[0]->is(Kinds[0]))
449  return false;
450  unsigned AddLength = 0;
451  for (unsigned i = 1; i < Kinds.size(); ++i) {
452  if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
453  First[i]->WhitespaceRange.getEnd())
454  return false;
455  AddLength += First[i]->TokenText.size();
456  }
457  Tokens.resize(Tokens.size() - Kinds.size() + 1);
458  First[0]->TokenText = StringRef(First[0]->TokenText.data(),
459  First[0]->TokenText.size() + AddLength);
460  First[0]->ColumnWidth += AddLength;
461  First[0]->setType(NewType);
462  return true;
463 }
464 
465 // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
466 bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
467  // NB: This is not entirely correct, as an r_paren can introduce an operand
468  // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
469  // corner case to not matter in practice, though.
470  return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
471  tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
472  tok::colon, tok::question, tok::tilde) ||
473  Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
474  tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
475  tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
476  Tok->isBinaryOperator();
477 }
478 
479 bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
480  if (!Prev)
481  return true;
482 
483  // Regex literals can only follow after prefix unary operators, not after
484  // postfix unary operators. If the '++' is followed by a non-operand
485  // introducing token, the slash here is the operand and not the start of a
486  // regex.
487  // `!` is an unary prefix operator, but also a post-fix operator that casts
488  // away nullability, so the same check applies.
489  if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
490  return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
491 
492  // The previous token must introduce an operand location where regex
493  // literals can occur.
494  if (!precedesOperand(Prev))
495  return false;
496 
497  return true;
498 }
499 
500 // Tries to parse a JavaScript Regex literal starting at the current token,
501 // if that begins with a slash and is in a location where JavaScript allows
502 // regex literals. Changes the current token to a regex literal and updates
503 // its text if successful.
504 void FormatTokenLexer::tryParseJSRegexLiteral() {
505  FormatToken *RegexToken = Tokens.back();
506  if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
507  return;
508 
509  FormatToken *Prev = nullptr;
510  for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
511  // NB: Because previous pointers are not initialized yet, this cannot use
512  // Token.getPreviousNonComment.
513  if ((*I)->isNot(tok::comment)) {
514  Prev = *I;
515  break;
516  }
517  }
518 
519  if (!canPrecedeRegexLiteral(Prev))
520  return;
521 
522  // 'Manually' lex ahead in the current file buffer.
523  const char *Offset = Lex->getBufferLocation();
524  const char *RegexBegin = Offset - RegexToken->TokenText.size();
525  StringRef Buffer = Lex->getBuffer();
526  bool InCharacterClass = false;
527  bool HaveClosingSlash = false;
528  for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
529  // Regular expressions are terminated with a '/', which can only be
530  // escaped using '\' or a character class between '[' and ']'.
531  // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
532  switch (*Offset) {
533  case '\\':
534  // Skip the escaped character.
535  ++Offset;
536  break;
537  case '[':
538  InCharacterClass = true;
539  break;
540  case ']':
541  InCharacterClass = false;
542  break;
543  case '/':
544  if (!InCharacterClass)
545  HaveClosingSlash = true;
546  break;
547  }
548  }
549 
550  RegexToken->setType(TT_RegexLiteral);
551  // Treat regex literals like other string_literals.
552  RegexToken->Tok.setKind(tok::string_literal);
553  RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
554  RegexToken->ColumnWidth = RegexToken->TokenText.size();
555 
556  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
557 }
558 
559 void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
560  FormatToken *CSharpStringLiteral = Tokens.back();
561 
562  if (CSharpStringLiteral->getType() != TT_CSharpStringLiteral)
563  return;
564 
565  // Deal with multiline strings.
566  if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
567  CSharpStringLiteral->TokenText.startswith(R"($@")")))
568  return;
569 
570  const char *StrBegin =
571  Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
572  const char *Offset = StrBegin;
573  if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
574  Offset += 2;
575  else // CSharpStringLiteral->TokenText.startswith(R"($@")")
576  Offset += 3;
577 
578  // Look for a terminating '"' in the current file buffer.
579  // Make no effort to format code within an interpolated or verbatim string.
580  for (; Offset != Lex->getBuffer().end(); ++Offset) {
581  if (Offset[0] == '"') {
582  // "" within a verbatim string is an escaped double quote: skip it.
583  if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
584  ++Offset;
585  else
586  break;
587  }
588  }
589 
590  // Make no attempt to format code properly if a verbatim string is
591  // unterminated.
592  if (Offset == Lex->getBuffer().end())
593  return;
594 
595  StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
596  CSharpStringLiteral->TokenText = LiteralText;
597 
598  // Adjust width for potentially multiline string literals.
599  size_t FirstBreak = LiteralText.find('\n');
600  StringRef FirstLineText = FirstBreak == StringRef::npos
601  ? LiteralText
602  : LiteralText.substr(0, FirstBreak);
603  CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs(
604  FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,
605  Encoding);
606  size_t LastBreak = LiteralText.rfind('\n');
607  if (LastBreak != StringRef::npos) {
608  CSharpStringLiteral->IsMultiline = true;
609  unsigned StartColumn = 0;
610  CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs(
611  LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
612  Style.TabWidth, Encoding);
613  }
614 
615  SourceLocation loc = Offset < Lex->getBuffer().end()
616  ? Lex->getSourceLocation(Offset + 1)
617  : SourceMgr.getLocForEndOfFile(ID);
618  resetLexer(SourceMgr.getFileOffset(loc));
619 }
620 
621 void FormatTokenLexer::handleTemplateStrings() {
622  FormatToken *BacktickToken = Tokens.back();
623 
624  if (BacktickToken->is(tok::l_brace)) {
625  StateStack.push(LexerState::NORMAL);
626  return;
627  }
628  if (BacktickToken->is(tok::r_brace)) {
629  if (StateStack.size() == 1)
630  return;
631  StateStack.pop();
632  if (StateStack.top() != LexerState::TEMPLATE_STRING)
633  return;
634  // If back in TEMPLATE_STRING, fallthrough and continue parsing the
635  } else if (BacktickToken->is(tok::unknown) &&
636  BacktickToken->TokenText == "`") {
637  StateStack.push(LexerState::TEMPLATE_STRING);
638  } else {
639  return; // Not actually a template
640  }
641 
642  // 'Manually' lex ahead in the current file buffer.
643  const char *Offset = Lex->getBufferLocation();
644  const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
645  for (; Offset != Lex->getBuffer().end(); ++Offset) {
646  if (Offset[0] == '`') {
647  StateStack.pop();
648  break;
649  }
650  if (Offset[0] == '\\') {
651  ++Offset; // Skip the escaped character.
652  } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
653  Offset[1] == '{') {
654  // '${' introduces an expression interpolation in the template string.
655  StateStack.push(LexerState::NORMAL);
656  ++Offset;
657  break;
658  }
659  }
660 
661  StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
662  BacktickToken->setType(TT_TemplateString);
663  BacktickToken->Tok.setKind(tok::string_literal);
664  BacktickToken->TokenText = LiteralText;
665 
666  // Adjust width for potentially multiline string literals.
667  size_t FirstBreak = LiteralText.find('\n');
668  StringRef FirstLineText = FirstBreak == StringRef::npos
669  ? LiteralText
670  : LiteralText.substr(0, FirstBreak);
672  FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
673  size_t LastBreak = LiteralText.rfind('\n');
674  if (LastBreak != StringRef::npos) {
675  BacktickToken->IsMultiline = true;
676  unsigned StartColumn = 0; // The template tail spans the entire line.
678  LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
679  Style.TabWidth, Encoding);
680  }
681 
682  SourceLocation loc = Offset < Lex->getBuffer().end()
683  ? Lex->getSourceLocation(Offset + 1)
684  : SourceMgr.getLocForEndOfFile(ID);
685  resetLexer(SourceMgr.getFileOffset(loc));
686 }
687 
688 void FormatTokenLexer::tryParsePythonComment() {
689  FormatToken *HashToken = Tokens.back();
690  if (!HashToken->isOneOf(tok::hash, tok::hashhash))
691  return;
692  // Turn the remainder of this line into a comment.
693  const char *CommentBegin =
694  Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
695  size_t From = CommentBegin - Lex->getBuffer().begin();
696  size_t To = Lex->getBuffer().find_first_of('\n', From);
697  if (To == StringRef::npos)
698  To = Lex->getBuffer().size();
699  size_t Len = To - From;
700  HashToken->setType(TT_LineComment);
701  HashToken->Tok.setKind(tok::comment);
702  HashToken->TokenText = Lex->getBuffer().substr(From, Len);
703  SourceLocation Loc = To < Lex->getBuffer().size()
704  ? Lex->getSourceLocation(CommentBegin + Len)
705  : SourceMgr.getLocForEndOfFile(ID);
706  resetLexer(SourceMgr.getFileOffset(Loc));
707 }
708 
709 bool FormatTokenLexer::tryMerge_TMacro() {
710  if (Tokens.size() < 4)
711  return false;
712  FormatToken *Last = Tokens.back();
713  if (!Last->is(tok::r_paren))
714  return false;
715 
716  FormatToken *String = Tokens[Tokens.size() - 2];
717  if (!String->is(tok::string_literal) || String->IsMultiline)
718  return false;
719 
720  if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
721  return false;
722 
723  FormatToken *Macro = Tokens[Tokens.size() - 4];
724  if (Macro->TokenText != "_T")
725  return false;
726 
727  const char *Start = Macro->TokenText.data();
728  const char *End = Last->TokenText.data() + Last->TokenText.size();
729  String->TokenText = StringRef(Start, End - Start);
730  String->IsFirst = Macro->IsFirst;
731  String->LastNewlineOffset = Macro->LastNewlineOffset;
732  String->WhitespaceRange = Macro->WhitespaceRange;
733  String->OriginalColumn = Macro->OriginalColumn;
735  String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
736  String->NewlinesBefore = Macro->NewlinesBefore;
737  String->HasUnescapedNewline = Macro->HasUnescapedNewline;
738 
739  Tokens.pop_back();
740  Tokens.pop_back();
741  Tokens.pop_back();
742  Tokens.back() = String;
743  return true;
744 }
745 
746 bool FormatTokenLexer::tryMergeConflictMarkers() {
747  if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
748  return false;
749 
750  // Conflict lines look like:
751  // <marker> <text from the vcs>
752  // For example:
753  // >>>>>>> /file/in/file/system at revision 1234
754  //
755  // We merge all tokens in a line that starts with a conflict marker
756  // into a single token with a special token type that the unwrapped line
757  // parser will use to correctly rebuild the underlying code.
758 
759  FileID ID;
760  // Get the position of the first token in the line.
761  unsigned FirstInLineOffset;
762  std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
763  Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
764  StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
765  // Calculate the offset of the start of the current line.
766  auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
767  if (LineOffset == StringRef::npos) {
768  LineOffset = 0;
769  } else {
770  ++LineOffset;
771  }
772 
773  auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
774  StringRef LineStart;
775  if (FirstSpace == StringRef::npos) {
776  LineStart = Buffer.substr(LineOffset);
777  } else {
778  LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
779  }
780 
781  TokenType Type = TT_Unknown;
782  if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
783  Type = TT_ConflictStart;
784  } else if (LineStart == "|||||||" || LineStart == "=======" ||
785  LineStart == "====") {
786  Type = TT_ConflictAlternative;
787  } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
788  Type = TT_ConflictEnd;
789  }
790 
791  if (Type != TT_Unknown) {
792  FormatToken *Next = Tokens.back();
793 
794  Tokens.resize(FirstInLineIndex + 1);
795  // We do not need to build a complete token here, as we will skip it
796  // during parsing anyway (as we must not touch whitespace around conflict
797  // markers).
798  Tokens.back()->setType(Type);
799  Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
800 
801  Tokens.push_back(Next);
802  return true;
803  }
804 
805  return false;
806 }
807 
808 FormatToken *FormatTokenLexer::getStashedToken() {
809  // Create a synthesized second '>' or '<' token.
810  Token Tok = FormatTok->Tok;
811  StringRef TokenText = FormatTok->TokenText;
812 
813  unsigned OriginalColumn = FormatTok->OriginalColumn;
814  FormatTok = new (Allocator.Allocate()) FormatToken;
815  FormatTok->Tok = Tok;
816  SourceLocation TokLocation =
817  FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
818  FormatTok->Tok.setLocation(TokLocation);
819  FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
820  FormatTok->TokenText = TokenText;
821  FormatTok->ColumnWidth = 1;
822  FormatTok->OriginalColumn = OriginalColumn + 1;
823 
824  return FormatTok;
825 }
826 
827 FormatToken *FormatTokenLexer::getNextToken() {
828  if (StateStack.top() == LexerState::TOKEN_STASHED) {
829  StateStack.pop();
830  return getStashedToken();
831  }
832 
833  FormatTok = new (Allocator.Allocate()) FormatToken;
834  readRawToken(*FormatTok);
835  SourceLocation WhitespaceStart =
836  FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
837  FormatTok->IsFirst = IsFirstToken;
838  IsFirstToken = false;
839 
840  // Consume and record whitespace until we find a significant token.
841  unsigned WhitespaceLength = TrailingWhitespace;
842  while (FormatTok->Tok.is(tok::unknown)) {
843  StringRef Text = FormatTok->TokenText;
844  auto EscapesNewline = [&](int pos) {
845  // A '\r' here is just part of '\r\n'. Skip it.
846  if (pos >= 0 && Text[pos] == '\r')
847  --pos;
848  // See whether there is an odd number of '\' before this.
849  // FIXME: This is wrong. A '\' followed by a newline is always removed,
850  // regardless of whether there is another '\' before it.
851  // FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph.
852  unsigned count = 0;
853  for (; pos >= 0; --pos, ++count)
854  if (Text[pos] != '\\')
855  break;
856  return count & 1;
857  };
858  // FIXME: This miscounts tok:unknown tokens that are not just
859  // whitespace, e.g. a '`' character.
860  for (int i = 0, e = Text.size(); i != e; ++i) {
861  switch (Text[i]) {
862  case '\n':
863  ++FormatTok->NewlinesBefore;
864  FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
865  FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
866  Column = 0;
867  break;
868  case '\r':
869  FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
870  Column = 0;
871  break;
872  case '\f':
873  case '\v':
874  Column = 0;
875  break;
876  case ' ':
877  ++Column;
878  break;
879  case '\t':
880  Column +=
881  Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0);
882  break;
883  case '\\':
884  if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
885  FormatTok->setType(TT_ImplicitStringLiteral);
886  break;
887  default:
888  FormatTok->setType(TT_ImplicitStringLiteral);
889  break;
890  }
891  if (FormatTok->getType() == TT_ImplicitStringLiteral)
892  break;
893  }
894 
895  if (FormatTok->is(TT_ImplicitStringLiteral))
896  break;
897  WhitespaceLength += FormatTok->Tok.getLength();
898 
899  readRawToken(*FormatTok);
900  }
901 
902  // JavaScript and Java do not allow to escape the end of the line with a
903  // backslash. Backslashes are syntax errors in plain source, but can occur in
904  // comments. When a single line comment ends with a \, it'll cause the next
905  // line of code to be lexed as a comment, breaking formatting. The code below
906  // finds comments that contain a backslash followed by a line break, truncates
907  // the comment token at the backslash, and resets the lexer to restart behind
908  // the backslash.
909  if ((Style.Language == FormatStyle::LK_JavaScript ||
910  Style.Language == FormatStyle::LK_Java) &&
911  FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
912  size_t BackslashPos = FormatTok->TokenText.find('\\');
913  while (BackslashPos != StringRef::npos) {
914  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
915  FormatTok->TokenText[BackslashPos + 1] == '\n') {
916  const char *Offset = Lex->getBufferLocation();
917  Offset -= FormatTok->TokenText.size();
918  Offset += BackslashPos + 1;
919  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
920  FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);
922  FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
923  Encoding);
924  break;
925  }
926  BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
927  }
928  }
929 
930  // In case the token starts with escaped newlines, we want to
931  // take them into account as whitespace - this pattern is quite frequent
932  // in macro definitions.
933  // FIXME: Add a more explicit test.
934  while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {
935  unsigned SkippedWhitespace = 0;
936  if (FormatTok->TokenText.size() > 2 &&
937  (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n'))
938  SkippedWhitespace = 3;
939  else if (FormatTok->TokenText[1] == '\n')
940  SkippedWhitespace = 2;
941  else
942  break;
943 
944  ++FormatTok->NewlinesBefore;
945  WhitespaceLength += SkippedWhitespace;
946  FormatTok->LastNewlineOffset = SkippedWhitespace;
947  Column = 0;
948  FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);
949  }
950 
951  FormatTok->WhitespaceRange = SourceRange(
952  WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
953 
954  FormatTok->OriginalColumn = Column;
955 
956  TrailingWhitespace = 0;
957  if (FormatTok->Tok.is(tok::comment)) {
958  // FIXME: Add the trimmed whitespace to Column.
959  StringRef UntrimmedText = FormatTok->TokenText;
960  FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
961  TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
962  } else if (FormatTok->Tok.is(tok::raw_identifier)) {
963  IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
964  FormatTok->Tok.setIdentifierInfo(&Info);
965  FormatTok->Tok.setKind(Info.getTokenID());
966  if (Style.Language == FormatStyle::LK_Java &&
967  FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
968  tok::kw_operator)) {
969  FormatTok->Tok.setKind(tok::identifier);
970  FormatTok->Tok.setIdentifierInfo(nullptr);
971  } else if (Style.Language == FormatStyle::LK_JavaScript &&
972  FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
973  tok::kw_operator)) {
974  FormatTok->Tok.setKind(tok::identifier);
975  FormatTok->Tok.setIdentifierInfo(nullptr);
976  }
977  } else if (FormatTok->Tok.is(tok::greatergreater)) {
978  FormatTok->Tok.setKind(tok::greater);
979  FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
980  ++Column;
981  StateStack.push(LexerState::TOKEN_STASHED);
982  } else if (FormatTok->Tok.is(tok::lessless)) {
983  FormatTok->Tok.setKind(tok::less);
984  FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
985  ++Column;
986  StateStack.push(LexerState::TOKEN_STASHED);
987  }
988 
989  // Now FormatTok is the next non-whitespace token.
990 
991  StringRef Text = FormatTok->TokenText;
992  size_t FirstNewlinePos = Text.find('\n');
993  if (FirstNewlinePos == StringRef::npos) {
994  // FIXME: ColumnWidth actually depends on the start column, we need to
995  // take this into account when the token is moved.
996  FormatTok->ColumnWidth =
997  encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
998  Column += FormatTok->ColumnWidth;
999  } else {
1000  FormatTok->IsMultiline = true;
1001  // FIXME: ColumnWidth actually depends on the start column, we need to
1002  // take this into account when the token is moved.
1004  Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1005 
1006  // The last line of the token always starts in column 0.
1007  // Thus, the length can be precomputed even in the presence of tabs.
1009  Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);
1010  Column = FormatTok->LastLineColumnWidth;
1011  }
1012 
1013  if (Style.isCpp()) {
1014  auto it = Macros.find(FormatTok->Tok.getIdentifierInfo());
1015  if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1016  Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1017  tok::pp_define) &&
1018  it != Macros.end()) {
1019  FormatTok->setType(it->second);
1020  } else if (FormatTok->is(tok::identifier)) {
1021  if (MacroBlockBeginRegex.match(Text)) {
1022  FormatTok->setType(TT_MacroBlockBegin);
1023  } else if (MacroBlockEndRegex.match(Text)) {
1024  FormatTok->setType(TT_MacroBlockEnd);
1025  }
1026  }
1027  }
1028 
1029  return FormatTok;
1030 }
1031 
1032 void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1033  Lex->LexFromRawLexer(Tok.Tok);
1034  Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1035  Tok.Tok.getLength());
1036  // For formatting, treat unterminated string literals like normal string
1037  // literals.
1038  if (Tok.is(tok::unknown)) {
1039  if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1040  Tok.Tok.setKind(tok::string_literal);
1041  Tok.IsUnterminatedLiteral = true;
1042  } else if (Style.Language == FormatStyle::LK_JavaScript &&
1043  Tok.TokenText == "''") {
1044  Tok.Tok.setKind(tok::string_literal);
1045  }
1046  }
1047 
1048  if ((Style.Language == FormatStyle::LK_JavaScript ||
1049  Style.Language == FormatStyle::LK_Proto ||
1050  Style.Language == FormatStyle::LK_TextProto) &&
1051  Tok.is(tok::char_constant)) {
1052  Tok.Tok.setKind(tok::string_literal);
1053  }
1054 
1055  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1056  Tok.TokenText == "/* clang-format on */")) {
1057  FormattingDisabled = false;
1058  }
1059 
1060  Tok.Finalized = FormattingDisabled;
1061 
1062  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1063  Tok.TokenText == "/* clang-format off */")) {
1064  FormattingDisabled = true;
1065  }
1066 }
1067 
1068 void FormatTokenLexer::resetLexer(unsigned Offset) {
1069  StringRef Buffer = SourceMgr.getBufferData(ID);
1070  Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1071  getFormattingLangOpts(Style), Buffer.begin(),
1072  Buffer.begin() + Offset, Buffer.end()));
1073  Lex->SetKeepWhitespaceMode(true);
1074  TrailingWhitespace = 0;
1075 }
1076 
1077 } // namespace format
1078 } // namespace clang
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
StringRef Identifier
Definition: Format.cpp:2026
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
Definition: Lexer.h:76
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
Token Tok
The Token.
Definition: FormatToken.h:146
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:97
Defines the SourceManager interface.
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:242
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
The base class of the type hierarchy.
Definition: Type.h:1472
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:175
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:178
bool isBinaryOperator() const
Definition: FormatToken.h:449
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:152
One of these records is kept for each identifier that is lexed.
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:61
void setKind(tok::TokenKind K)
Definition: Token.h:93
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
const FormatToken & Tok
bool isCSharpKeyword(const FormatToken &Tok) const
Returns true if Tok is a C# keyword, returns false if it is a anything else.
Definition: FormatToken.h:996
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:343
unsigned Offset
Definition: Format.cpp:2020
SourceLocation End
Implements an efficient mapping from strings to IdentifierInfo nodes.
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:2707
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:126
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:142
void setType(TokenType T)
Definition: FormatToken.h:207
unsigned LastNewlineOffset
The offset just past the last &#39; &#39; in this token&#39;s leading whitespace (relative to WhiteSpaceStart)...
Definition: FormatToken.h:163
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:334
Various functions to configurably format source code.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:188
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:159
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool IsUnterminatedLiteral
Set to true if this token is an unterminated literal.
Definition: FormatToken.h:199
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:196
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
TokenType getType() const
Returns the token&#39;s type, e.g.
Definition: FormatToken.h:206
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
const llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Dataflow Directional Tag Classes.
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:168
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:332
unsigned getLength() const
Definition: Token.h:129
Defines the clang::SourceLocation class and associated facilities.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:119
StringRef Text
Definition: Format.cpp:2019
void setLocation(SourceLocation L)
Definition: Token.h:134
#define true
Definition: stdbool.h:16
A trivial tuple used to represent a source range.
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:156
This class handles loading and caching of source files into memory.
ArrayRef< FormatToken * > lex()
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
unsigned LastLineColumnWidth
Contains the width in columns of the last line of a multi-line token.
Definition: FormatToken.h:172
const encoding::Encoding Encoding
const FormatStyle & Style