clang  16.0.0git
LiteralSupport.cpp
Go to the documentation of this file.
1 //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the NumericLiteralParser, CharLiteralParser, and
10 // StringLiteralParser interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "clang/Basic/CharInfo.h"
18 #include "clang/Basic/TargetInfo.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/Support/ConvertUTF.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Unicode.h"
31 #include <algorithm>
32 #include <cassert>
33 #include <cstddef>
34 #include <cstdint>
35 #include <cstring>
36 #include <string>
37 
38 using namespace clang;
39 
40 static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
41  switch (kind) {
42  default: llvm_unreachable("Unknown token type!");
43  case tok::char_constant:
44  case tok::string_literal:
45  case tok::utf8_char_constant:
46  case tok::utf8_string_literal:
47  return Target.getCharWidth();
48  case tok::wide_char_constant:
49  case tok::wide_string_literal:
50  return Target.getWCharWidth();
51  case tok::utf16_char_constant:
52  case tok::utf16_string_literal:
53  return Target.getChar16Width();
54  case tok::utf32_char_constant:
55  case tok::utf32_string_literal:
56  return Target.getChar32Width();
57  }
58 }
59 
61  FullSourceLoc TokLoc,
62  const char *TokBegin,
63  const char *TokRangeBegin,
64  const char *TokRangeEnd) {
66  Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
67  TokLoc.getManager(), Features);
69  Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
70  TokLoc.getManager(), Features);
72 }
73 
74 /// Produce a diagnostic highlighting some portion of a literal.
75 ///
76 /// Emits the diagnostic \p DiagID, highlighting the range of characters from
77 /// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
78 /// a substring of a spelling buffer for the token beginning at \p TokBegin.
80  const LangOptions &Features, FullSourceLoc TokLoc,
81  const char *TokBegin, const char *TokRangeBegin,
82  const char *TokRangeEnd, unsigned DiagID) {
84  Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
85  TokLoc.getManager(), Features);
86  return Diags->Report(Begin, DiagID) <<
87  MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
88 }
89 
90 /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
91 /// either a character or a string literal.
92 static unsigned ProcessCharEscape(const char *ThisTokBegin,
93  const char *&ThisTokBuf,
94  const char *ThisTokEnd, bool &HadError,
95  FullSourceLoc Loc, unsigned CharWidth,
96  DiagnosticsEngine *Diags,
97  const LangOptions &Features) {
98  const char *EscapeBegin = ThisTokBuf;
99  bool Delimited = false;
100  bool EndDelimiterFound = false;
101 
102  // Skip the '\' char.
103  ++ThisTokBuf;
104 
105  // We know that this character can't be off the end of the buffer, because
106  // that would have been \", which would not have been the end of string.
107  unsigned ResultChar = *ThisTokBuf++;
108  switch (ResultChar) {
109  // These map to themselves.
110  case '\\': case '\'': case '"': case '?': break;
111 
112  // These have fixed mappings.
113  case 'a':
114  // TODO: K&R: the meaning of '\\a' is different in traditional C
115  ResultChar = 7;
116  break;
117  case 'b':
118  ResultChar = 8;
119  break;
120  case 'e':
121  if (Diags)
122  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
123  diag::ext_nonstandard_escape) << "e";
124  ResultChar = 27;
125  break;
126  case 'E':
127  if (Diags)
128  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
129  diag::ext_nonstandard_escape) << "E";
130  ResultChar = 27;
131  break;
132  case 'f':
133  ResultChar = 12;
134  break;
135  case 'n':
136  ResultChar = 10;
137  break;
138  case 'r':
139  ResultChar = 13;
140  break;
141  case 't':
142  ResultChar = 9;
143  break;
144  case 'v':
145  ResultChar = 11;
146  break;
147  case 'x': { // Hex escape.
148  ResultChar = 0;
149  if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
150  Delimited = true;
151  ThisTokBuf++;
152  if (*ThisTokBuf == '}') {
153  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
154  diag::err_delimited_escape_empty);
155  return ResultChar;
156  }
157  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
158  if (Diags)
159  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
160  diag::err_hex_escape_no_digits) << "x";
161  return ResultChar;
162  }
163 
164  // Hex escapes are a maximal series of hex digits.
165  bool Overflow = false;
166  for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
167  if (Delimited && *ThisTokBuf == '}') {
168  ThisTokBuf++;
169  EndDelimiterFound = true;
170  break;
171  }
172  int CharVal = llvm::hexDigitValue(*ThisTokBuf);
173  if (CharVal == -1) {
174  // Non delimited hex escape sequences stop at the first non-hex digit.
175  if (!Delimited)
176  break;
177  HadError = true;
178  if (Diags)
179  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
180  diag::err_delimited_escape_invalid)
181  << StringRef(ThisTokBuf, 1);
182  continue;
183  }
184  // About to shift out a digit?
185  if (ResultChar & 0xF0000000)
186  Overflow = true;
187  ResultChar <<= 4;
188  ResultChar |= CharVal;
189  }
190  // See if any bits will be truncated when evaluated as a character.
191  if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
192  Overflow = true;
193  ResultChar &= ~0U >> (32-CharWidth);
194  }
195 
196  // Check for overflow.
197  if (!HadError && Overflow) { // Too many digits to fit in
198  HadError = true;
199  if (Diags)
200  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
201  diag::err_escape_too_large)
202  << 0;
203  }
204  break;
205  }
206  case '0': case '1': case '2': case '3':
207  case '4': case '5': case '6': case '7': {
208  // Octal escapes.
209  --ThisTokBuf;
210  ResultChar = 0;
211 
212  // Octal escapes are a series of octal digits with maximum length 3.
213  // "\0123" is a two digit sequence equal to "\012" "3".
214  unsigned NumDigits = 0;
215  do {
216  ResultChar <<= 3;
217  ResultChar |= *ThisTokBuf++ - '0';
218  ++NumDigits;
219  } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
220  ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
221 
222  // Check for overflow. Reject '\777', but not L'\777'.
223  if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
224  if (Diags)
225  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
226  diag::err_escape_too_large) << 1;
227  ResultChar &= ~0U >> (32-CharWidth);
228  }
229  break;
230  }
231  case 'o': {
232  bool Overflow = false;
233  if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
234  HadError = true;
235  if (Diags)
236  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
237  diag::err_delimited_escape_missing_brace)
238  << "o";
239 
240  break;
241  }
242  ResultChar = 0;
243  Delimited = true;
244  ++ThisTokBuf;
245  if (*ThisTokBuf == '}') {
246  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
247  diag::err_delimited_escape_empty);
248  return ResultChar;
249  }
250 
251  while (ThisTokBuf != ThisTokEnd) {
252  if (*ThisTokBuf == '}') {
253  EndDelimiterFound = true;
254  ThisTokBuf++;
255  break;
256  }
257  if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
258  HadError = true;
259  if (Diags)
260  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
261  diag::err_delimited_escape_invalid)
262  << StringRef(ThisTokBuf, 1);
263  ThisTokBuf++;
264  continue;
265  }
266  if (ResultChar & 0x020000000)
267  Overflow = true;
268 
269  ResultChar <<= 3;
270  ResultChar |= *ThisTokBuf++ - '0';
271  }
272  // Check for overflow. Reject '\777', but not L'\777'.
273  if (!HadError &&
274  (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
275  HadError = true;
276  if (Diags)
277  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
278  diag::err_escape_too_large)
279  << 1;
280  ResultChar &= ~0U >> (32 - CharWidth);
281  }
282  break;
283  }
284  // Otherwise, these are not valid escapes.
285  case '(': case '{': case '[': case '%':
286  // GCC accepts these as extensions. We warn about them as such though.
287  if (Diags)
288  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
289  diag::ext_nonstandard_escape)
290  << std::string(1, ResultChar);
291  break;
292  default:
293  if (!Diags)
294  break;
295 
296  if (isPrintable(ResultChar))
297  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
298  diag::ext_unknown_escape)
299  << std::string(1, ResultChar);
300  else
301  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
302  diag::ext_unknown_escape)
303  << "x" + llvm::utohexstr(ResultChar);
304  break;
305  }
306 
307  if (Delimited && Diags) {
308  if (!EndDelimiterFound)
309  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
310  diag::err_expected)
311  << tok::r_brace;
312  else if (!HadError) {
313  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
314  Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence
315  : diag::ext_delimited_escape_sequence)
316  << /*delimited*/ 0 << (Features.CPlusPlus ? 1 : 0);
317  }
318  }
319 
320  return ResultChar;
321 }
322 
323 static void appendCodePoint(unsigned Codepoint,
325  char ResultBuf[4];
326  char *ResultPtr = ResultBuf;
327  if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr))
328  Str.append(ResultBuf, ResultPtr);
329 }
330 
331 void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
332  for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
333  if (*I != '\\') {
334  Buf.push_back(*I);
335  continue;
336  }
337 
338  ++I;
339  char Kind = *I;
340  ++I;
341 
342  assert(Kind == 'u' || Kind == 'U' || Kind == 'N');
343  uint32_t CodePoint = 0;
344 
345  if (Kind == 'u' && *I == '{') {
346  for (++I; *I != '}'; ++I) {
347  unsigned Value = llvm::hexDigitValue(*I);
348  assert(Value != -1U);
349  CodePoint <<= 4;
350  CodePoint += Value;
351  }
352  appendCodePoint(CodePoint, Buf);
353  continue;
354  }
355 
356  if (Kind == 'N') {
357  assert(*I == '{');
358  ++I;
359  auto Delim = std::find(I, Input.end(), '}');
360  assert(Delim != Input.end());
362  llvm::sys::unicode::nameToCodepointLooseMatching(
363  StringRef(I, std::distance(I, Delim)));
364  assert(Res);
365  CodePoint = Res->CodePoint;
366  assert(CodePoint != 0xFFFFFFFF);
367  appendCodePoint(CodePoint, Buf);
368  I = Delim;
369  continue;
370  }
371 
372  unsigned NumHexDigits;
373  if (Kind == 'u')
374  NumHexDigits = 4;
375  else
376  NumHexDigits = 8;
377 
378  assert(I + NumHexDigits <= E);
379 
380  for (; NumHexDigits != 0; ++I, --NumHexDigits) {
381  unsigned Value = llvm::hexDigitValue(*I);
382  assert(Value != -1U);
383 
384  CodePoint <<= 4;
385  CodePoint += Value;
386  }
387 
388  appendCodePoint(CodePoint, Buf);
389  --I;
390  }
391 }
392 
393 static bool ProcessNumericUCNEscape(const char *ThisTokBegin,
394  const char *&ThisTokBuf,
395  const char *ThisTokEnd, uint32_t &UcnVal,
396  unsigned short &UcnLen, bool &Delimited,
397  FullSourceLoc Loc, DiagnosticsEngine *Diags,
398  const LangOptions &Features,
399  bool in_char_string_literal = false) {
400  const char *UcnBegin = ThisTokBuf;
401  bool HasError = false;
402  bool EndDelimiterFound = false;
403 
404  // Skip the '\u' char's.
405  ThisTokBuf += 2;
406  Delimited = false;
407  if (UcnBegin[1] == 'u' && in_char_string_literal &&
408  ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
409  Delimited = true;
410  ThisTokBuf++;
411  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
412  if (Diags)
413  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
414  diag::err_hex_escape_no_digits)
415  << StringRef(&ThisTokBuf[-1], 1);
416  return false;
417  }
418  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
419 
420  bool Overflow = false;
421  unsigned short Count = 0;
422  for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
423  ++ThisTokBuf) {
424  if (Delimited && *ThisTokBuf == '}') {
425  ++ThisTokBuf;
426  EndDelimiterFound = true;
427  break;
428  }
429  int CharVal = llvm::hexDigitValue(*ThisTokBuf);
430  if (CharVal == -1) {
431  HasError = true;
432  if (!Delimited)
433  break;
434  if (Diags) {
435  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
436  diag::err_delimited_escape_invalid)
437  << StringRef(ThisTokBuf, 1);
438  }
439  Count++;
440  continue;
441  }
442  if (UcnVal & 0xF0000000) {
443  Overflow = true;
444  continue;
445  }
446  UcnVal <<= 4;
447  UcnVal |= CharVal;
448  Count++;
449  }
450 
451  if (Overflow) {
452  if (Diags)
453  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
454  diag::err_escape_too_large)
455  << 0;
456  return false;
457  }
458 
459  if (Delimited && !EndDelimiterFound) {
460  if (Diags) {
461  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
462  diag::err_expected)
463  << tok::r_brace;
464  }
465  return false;
466  }
467 
468  // If we didn't consume the proper number of digits, there is a problem.
469  if (Count == 0 || (!Delimited && Count != UcnLen)) {
470  if (Diags)
471  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
472  Delimited ? diag::err_delimited_escape_empty
473  : diag::err_ucn_escape_incomplete);
474  return false;
475  }
476  return !HasError;
477 }
478 
480  DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc,
481  const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd,
482  llvm::StringRef Name) {
483 
484  Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
485  diag::err_invalid_ucn_name)
486  << Name;
487 
488  namespace u = llvm::sys::unicode;
489 
491  u::nameToCodepointLooseMatching(Name);
492  if (Res) {
493  Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
494  diag::note_invalid_ucn_name_loose_matching)
496  MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
497  TokRangeEnd),
498  Res->Name);
499  return;
500  }
501 
502  unsigned Distance = 0;
504  u::nearestMatchesForCodepointName(Name, 5);
505  assert(!Matches.empty() && "No unicode characters found");
506 
507  for (const auto &Match : Matches) {
508  if (Distance == 0)
509  Distance = Match.Distance;
510  if (std::max(Distance, Match.Distance) -
511  std::min(Distance, Match.Distance) >
512  3)
513  break;
514  Distance = Match.Distance;
515 
516  std::string Str;
517  llvm::UTF32 V = Match.Value;
518  LLVM_ATTRIBUTE_UNUSED bool Converted =
519  llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&V, 1), Str);
520  assert(Converted && "Found a match wich is not a unicode character");
521 
522  Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
523  diag::note_invalid_ucn_name_candidate)
524  << Match.Name << llvm::utohexstr(Match.Value)
525  << Str // FIXME: Fix the rendering of non printable characters
527  MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
528  TokRangeEnd),
529  Match.Name);
530  }
531 }
532 
533 static bool ProcessNamedUCNEscape(const char *ThisTokBegin,
534  const char *&ThisTokBuf,
535  const char *ThisTokEnd, uint32_t &UcnVal,
536  unsigned short &UcnLen, FullSourceLoc Loc,
537  DiagnosticsEngine *Diags,
538  const LangOptions &Features) {
539  const char *UcnBegin = ThisTokBuf;
540  assert(UcnBegin[0] == '\\' && UcnBegin[1] == 'N');
541  ThisTokBuf += 2;
542  if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
543  if (Diags) {
544  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
545  diag::err_delimited_escape_missing_brace)
546  << StringRef(&ThisTokBuf[-1], 1);
547  }
548  return false;
549  }
550  ThisTokBuf++;
551  const char *ClosingBrace =
552  std::find_if_not(ThisTokBuf, ThisTokEnd, [](char C) {
553  return llvm::isAlnum(C) || llvm::isSpace(C) || C == '_' || C == '-';
554  });
555  bool Incomplete = ClosingBrace == ThisTokEnd || *ClosingBrace != '}';
556  bool Empty = ClosingBrace == ThisTokBuf;
557  if (Incomplete || Empty) {
558  if (Diags) {
559  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
560  Incomplete ? diag::err_ucn_escape_incomplete
561  : diag::err_delimited_escape_empty)
562  << StringRef(&UcnBegin[1], 1);
563  }
564  ThisTokBuf = ClosingBrace == ThisTokEnd ? ClosingBrace : ClosingBrace + 1;
565  return false;
566  }
567  StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf);
568  ThisTokBuf = ClosingBrace + 1;
570  llvm::sys::unicode::nameToCodepointStrict(Name);
571  if (!Res) {
572  if (Diags)
573  DiagnoseInvalidUnicodeCharacterName(Diags, Features, Loc, ThisTokBegin,
574  &UcnBegin[3], ClosingBrace, Name);
575  return false;
576  }
577  UcnVal = *Res;
578  UcnLen = UcnVal > 0xFFFF ? 8 : 4;
579  return true;
580 }
581 
582 /// ProcessUCNEscape - Read the Universal Character Name, check constraints and
583 /// return the UTF32.
584 static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
585  const char *ThisTokEnd, uint32_t &UcnVal,
586  unsigned short &UcnLen, FullSourceLoc Loc,
587  DiagnosticsEngine *Diags,
588  const LangOptions &Features,
589  bool in_char_string_literal = false) {
590 
591  bool HasError;
592  const char *UcnBegin = ThisTokBuf;
593  bool IsDelimitedEscapeSequence = false;
594  bool IsNamedEscapeSequence = false;
595  if (ThisTokBuf[1] == 'N') {
596  IsNamedEscapeSequence = true;
597  HasError = !ProcessNamedUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
598  UcnVal, UcnLen, Loc, Diags, Features);
599  } else {
600  HasError =
601  !ProcessNumericUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
602  UcnLen, IsDelimitedEscapeSequence, Loc, Diags,
603  Features, in_char_string_literal);
604  }
605  if (HasError)
606  return false;
607 
608  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
609  if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
610  UcnVal > 0x10FFFF) { // maximum legal UTF32 value
611  if (Diags)
612  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
613  diag::err_ucn_escape_invalid);
614  return false;
615  }
616 
617  // C++11 allows UCNs that refer to control characters and basic source
618  // characters inside character and string literals
619  if (UcnVal < 0xa0 &&
620  (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
621  bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
622  if (Diags) {
623  char BasicSCSChar = UcnVal;
624  if (UcnVal >= 0x20 && UcnVal < 0x7f)
625  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
626  IsError ? diag::err_ucn_escape_basic_scs :
627  diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
628  << StringRef(&BasicSCSChar, 1);
629  else
630  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
631  IsError ? diag::err_ucn_control_character :
632  diag::warn_cxx98_compat_literal_ucn_control_character);
633  }
634  if (IsError)
635  return false;
636  }
637 
638  if (!Features.CPlusPlus && !Features.C99 && Diags)
639  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
640  diag::warn_ucn_not_valid_in_c89_literal);
641 
642  if ((IsDelimitedEscapeSequence || IsNamedEscapeSequence) && Diags)
643  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
644  Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence
645  : diag::ext_delimited_escape_sequence)
646  << (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0);
647 
648  return true;
649 }
650 
651 /// MeasureUCNEscape - Determine the number of bytes within the resulting string
652 /// which this UCN will occupy.
653 static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
654  const char *ThisTokEnd, unsigned CharByteWidth,
655  const LangOptions &Features, bool &HadError) {
656  // UTF-32: 4 bytes per escape.
657  if (CharByteWidth == 4)
658  return 4;
659 
660  uint32_t UcnVal = 0;
661  unsigned short UcnLen = 0;
662  FullSourceLoc Loc;
663 
664  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
665  UcnLen, Loc, nullptr, Features, true)) {
666  HadError = true;
667  return 0;
668  }
669 
670  // UTF-16: 2 bytes for BMP, 4 bytes otherwise.
671  if (CharByteWidth == 2)
672  return UcnVal <= 0xFFFF ? 2 : 4;
673 
674  // UTF-8.
675  if (UcnVal < 0x80)
676  return 1;
677  if (UcnVal < 0x800)
678  return 2;
679  if (UcnVal < 0x10000)
680  return 3;
681  return 4;
682 }
683 
684 /// EncodeUCNEscape - Read the Universal Character Name, check constraints and
685 /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
686 /// StringLiteralParser. When we decide to implement UCN's for identifiers,
687 /// we will likely rework our support for UCN's.
688 static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
689  const char *ThisTokEnd,
690  char *&ResultBuf, bool &HadError,
691  FullSourceLoc Loc, unsigned CharByteWidth,
692  DiagnosticsEngine *Diags,
693  const LangOptions &Features) {
694  typedef uint32_t UTF32;
695  UTF32 UcnVal = 0;
696  unsigned short UcnLen = 0;
697  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
698  Loc, Diags, Features, true)) {
699  HadError = true;
700  return;
701  }
702 
703  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&
704  "only character widths of 1, 2, or 4 bytes supported");
705 
706  (void)UcnLen;
707  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
708 
709  if (CharByteWidth == 4) {
710  // FIXME: Make the type of the result buffer correct instead of
711  // using reinterpret_cast.
712  llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
713  *ResultPtr = UcnVal;
714  ResultBuf += 4;
715  return;
716  }
717 
718  if (CharByteWidth == 2) {
719  // FIXME: Make the type of the result buffer correct instead of
720  // using reinterpret_cast.
721  llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);
722 
723  if (UcnVal <= (UTF32)0xFFFF) {
724  *ResultPtr = UcnVal;
725  ResultBuf += 2;
726  return;
727  }
728 
729  // Convert to UTF16.
730  UcnVal -= 0x10000;
731  *ResultPtr = 0xD800 + (UcnVal >> 10);
732  *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
733  ResultBuf += 4;
734  return;
735  }
736 
737  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
738 
739  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
740  // The conversion below was inspired by:
741  // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
742  // First, we determine how many bytes the result will require.
743  typedef uint8_t UTF8;
744 
745  unsigned short bytesToWrite = 0;
746  if (UcnVal < (UTF32)0x80)
747  bytesToWrite = 1;
748  else if (UcnVal < (UTF32)0x800)
749  bytesToWrite = 2;
750  else if (UcnVal < (UTF32)0x10000)
751  bytesToWrite = 3;
752  else
753  bytesToWrite = 4;
754 
755  const unsigned byteMask = 0xBF;
756  const unsigned byteMark = 0x80;
757 
758  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
759  // into the first byte, depending on how many bytes follow.
760  static const UTF8 firstByteMark[5] = {
761  0x00, 0x00, 0xC0, 0xE0, 0xF0
762  };
763  // Finally, we write the bytes into ResultBuf.
764  ResultBuf += bytesToWrite;
765  switch (bytesToWrite) { // note: everything falls through.
766  case 4:
767  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
768  [[fallthrough]];
769  case 3:
770  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
771  [[fallthrough]];
772  case 2:
773  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
774  [[fallthrough]];
775  case 1:
776  *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
777  }
778  // Update the buffer.
779  ResultBuf += bytesToWrite;
780 }
781 
782 /// integer-constant: [C99 6.4.4.1]
783 /// decimal-constant integer-suffix
784 /// octal-constant integer-suffix
785 /// hexadecimal-constant integer-suffix
786 /// binary-literal integer-suffix [GNU, C++1y]
787 /// user-defined-integer-literal: [C++11 lex.ext]
788 /// decimal-literal ud-suffix
789 /// octal-literal ud-suffix
790 /// hexadecimal-literal ud-suffix
791 /// binary-literal ud-suffix [GNU, C++1y]
792 /// decimal-constant:
793 /// nonzero-digit
794 /// decimal-constant digit
795 /// octal-constant:
796 /// 0
797 /// octal-constant octal-digit
798 /// hexadecimal-constant:
799 /// hexadecimal-prefix hexadecimal-digit
800 /// hexadecimal-constant hexadecimal-digit
801 /// hexadecimal-prefix: one of
802 /// 0x 0X
803 /// binary-literal:
804 /// 0b binary-digit
805 /// 0B binary-digit
806 /// binary-literal binary-digit
807 /// integer-suffix:
808 /// unsigned-suffix [long-suffix]
809 /// unsigned-suffix [long-long-suffix]
810 /// long-suffix [unsigned-suffix]
811 /// long-long-suffix [unsigned-sufix]
812 /// nonzero-digit:
813 /// 1 2 3 4 5 6 7 8 9
814 /// octal-digit:
815 /// 0 1 2 3 4 5 6 7
816 /// hexadecimal-digit:
817 /// 0 1 2 3 4 5 6 7 8 9
818 /// a b c d e f
819 /// A B C D E F
820 /// binary-digit:
821 /// 0
822 /// 1
823 /// unsigned-suffix: one of
824 /// u U
825 /// long-suffix: one of
826 /// l L
827 /// long-long-suffix: one of
828 /// ll LL
829 ///
830 /// floating-constant: [C99 6.4.4.2]
831 /// TODO: add rules...
832 ///
834  SourceLocation TokLoc,
835  const SourceManager &SM,
836  const LangOptions &LangOpts,
837  const TargetInfo &Target,
838  DiagnosticsEngine &Diags)
839  : SM(SM), LangOpts(LangOpts), Diags(Diags),
840  ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
841 
842  s = DigitsBegin = ThisTokBegin;
843  saw_exponent = false;
844  saw_period = false;
845  saw_ud_suffix = false;
846  saw_fixed_point_suffix = false;
847  isLong = false;
848  isUnsigned = false;
849  isLongLong = false;
850  isSizeT = false;
851  isHalf = false;
852  isFloat = false;
853  isImaginary = false;
854  isFloat16 = false;
855  isFloat128 = false;
856  MicrosoftInteger = 0;
857  isFract = false;
858  isAccum = false;
859  hadError = false;
860  isBitInt = false;
861 
862  // This routine assumes that the range begin/end matches the regex for integer
863  // and FP constants (specifically, the 'pp-number' regex), and assumes that
864  // the byte at "*end" is both valid and not part of the regex. Because of
865  // this, it doesn't have to check for 'overscan' in various places.
866  if (isPreprocessingNumberBody(*ThisTokEnd)) {
867  Diags.Report(TokLoc, diag::err_lexing_numeric);
868  hadError = true;
869  return;
870  }
871 
872  if (*s == '0') { // parse radix
873  ParseNumberStartingWithZero(TokLoc);
874  if (hadError)
875  return;
876  } else { // the first digit is non-zero
877  radix = 10;
878  s = SkipDigits(s);
879  if (s == ThisTokEnd) {
880  // Done.
881  } else {
882  ParseDecimalOrOctalCommon(TokLoc);
883  if (hadError)
884  return;
885  }
886  }
887 
888  SuffixBegin = s;
889  checkSeparator(TokLoc, s, CSK_AfterDigits);
890 
891  // Initial scan to lookahead for fixed point suffix.
892  if (LangOpts.FixedPoint) {
893  for (const char *c = s; c != ThisTokEnd; ++c) {
894  if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {
895  saw_fixed_point_suffix = true;
896  break;
897  }
898  }
899  }
900 
901  // Parse the suffix. At this point we can classify whether we have an FP or
902  // integer constant.
903  bool isFixedPointConstant = isFixedPointLiteral();
904  bool isFPConstant = isFloatingLiteral();
905  bool HasSize = false;
906 
907  // Loop over all of the characters of the suffix. If we see something bad,
908  // we break out of the loop.
909  for (; s != ThisTokEnd; ++s) {
910  switch (*s) {
911  case 'R':
912  case 'r':
913  if (!LangOpts.FixedPoint)
914  break;
915  if (isFract || isAccum) break;
916  if (!(saw_period || saw_exponent)) break;
917  isFract = true;
918  continue;
919  case 'K':
920  case 'k':
921  if (!LangOpts.FixedPoint)
922  break;
923  if (isFract || isAccum) break;
924  if (!(saw_period || saw_exponent)) break;
925  isAccum = true;
926  continue;
927  case 'h': // FP Suffix for "half".
928  case 'H':
929  // OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
930  if (!(LangOpts.Half || LangOpts.FixedPoint))
931  break;
932  if (isIntegerLiteral()) break; // Error for integer constant.
933  if (HasSize)
934  break;
935  HasSize = true;
936  isHalf = true;
937  continue; // Success.
938  case 'f': // FP Suffix for "float"
939  case 'F':
940  if (!isFPConstant) break; // Error for integer constant.
941  if (HasSize)
942  break;
943  HasSize = true;
944 
945  // CUDA host and device may have different _Float16 support, therefore
946  // allows f16 literals to avoid false alarm.
947  // ToDo: more precise check for CUDA.
948  if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd &&
949  s[1] == '1' && s[2] == '6') {
950  s += 2; // success, eat up 2 characters.
951  isFloat16 = true;
952  continue;
953  }
954 
955  isFloat = true;
956  continue; // Success.
957  case 'q': // FP Suffix for "__float128"
958  case 'Q':
959  if (!isFPConstant) break; // Error for integer constant.
960  if (HasSize)
961  break;
962  HasSize = true;
963  isFloat128 = true;
964  continue; // Success.
965  case 'u':
966  case 'U':
967  if (isFPConstant) break; // Error for floating constant.
968  if (isUnsigned) break; // Cannot be repeated.
969  isUnsigned = true;
970  continue; // Success.
971  case 'l':
972  case 'L':
973  if (HasSize)
974  break;
975  HasSize = true;
976 
977  // Check for long long. The L's need to be adjacent and the same case.
978  if (s[1] == s[0]) {
979  assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
980  if (isFPConstant) break; // long long invalid for floats.
981  isLongLong = true;
982  ++s; // Eat both of them.
983  } else {
984  isLong = true;
985  }
986  continue; // Success.
987  case 'z':
988  case 'Z':
989  if (isFPConstant)
990  break; // Invalid for floats.
991  if (HasSize)
992  break;
993  HasSize = true;
994  isSizeT = true;
995  continue;
996  case 'i':
997  case 'I':
998  if (LangOpts.MicrosoftExt && !isFPConstant) {
999  // Allow i8, i16, i32, and i64. First, look ahead and check if
1000  // suffixes are Microsoft integers and not the imaginary unit.
1001  uint8_t Bits = 0;
1002  size_t ToSkip = 0;
1003  switch (s[1]) {
1004  case '8': // i8 suffix
1005  Bits = 8;
1006  ToSkip = 2;
1007  break;
1008  case '1':
1009  if (s[2] == '6') { // i16 suffix
1010  Bits = 16;
1011  ToSkip = 3;
1012  }
1013  break;
1014  case '3':
1015  if (s[2] == '2') { // i32 suffix
1016  Bits = 32;
1017  ToSkip = 3;
1018  }
1019  break;
1020  case '6':
1021  if (s[2] == '4') { // i64 suffix
1022  Bits = 64;
1023  ToSkip = 3;
1024  }
1025  break;
1026  default:
1027  break;
1028  }
1029  if (Bits) {
1030  if (HasSize)
1031  break;
1032  HasSize = true;
1033  MicrosoftInteger = Bits;
1034  s += ToSkip;
1035  assert(s <= ThisTokEnd && "didn't maximally munch?");
1036  break;
1037  }
1038  }
1039  [[fallthrough]];
1040  case 'j':
1041  case 'J':
1042  if (isImaginary) break; // Cannot be repeated.
1043  isImaginary = true;
1044  continue; // Success.
1045  case 'w':
1046  case 'W':
1047  if (isFPConstant)
1048  break; // Invalid for floats.
1049  if (HasSize)
1050  break; // Invalid if we already have a size for the literal.
1051 
1052  // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
1053  // explicitly do not support the suffix in C++ as an extension because a
1054  // library-based UDL that resolves to a library type may be more
1055  // appropriate there.
1056  if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
1057  (s[0] == 'W' && s[1] == 'B'))) {
1058  isBitInt = true;
1059  HasSize = true;
1060  ++s; // Skip both characters (2nd char skipped on continue).
1061  continue; // Success.
1062  }
1063  }
1064  // If we reached here, there was an error or a ud-suffix.
1065  break;
1066  }
1067 
1068  // "i", "if", and "il" are user-defined suffixes in C++1y.
1069  if (s != ThisTokEnd || isImaginary) {
1070  // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
1071  expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
1072  if (isValidUDSuffix(LangOpts, UDSuffixBuf)) {
1073  if (!isImaginary) {
1074  // Any suffix pieces we might have parsed are actually part of the
1075  // ud-suffix.
1076  isLong = false;
1077  isUnsigned = false;
1078  isLongLong = false;
1079  isSizeT = false;
1080  isFloat = false;
1081  isFloat16 = false;
1082  isHalf = false;
1083  isImaginary = false;
1084  isBitInt = false;
1085  MicrosoftInteger = 0;
1086  saw_fixed_point_suffix = false;
1087  isFract = false;
1088  isAccum = false;
1089  }
1090 
1091  saw_ud_suffix = true;
1092  return;
1093  }
1094 
1095  if (s != ThisTokEnd) {
1096  // Report an error if there are any.
1098  TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
1099  diag::err_invalid_suffix_constant)
1100  << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
1101  << (isFixedPointConstant ? 2 : isFPConstant);
1102  hadError = true;
1103  }
1104  }
1105 
1106  if (!hadError && saw_fixed_point_suffix) {
1107  assert(isFract || isAccum);
1108  }
1109 }
1110 
1111 /// ParseDecimalOrOctalCommon - This method is called for decimal or octal
1112 /// numbers. It issues an error for illegal digits, and handles floating point
1113 /// parsing. If it detects a floating point number, the radix is set to 10.
1114 void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
1115  assert((radix == 8 || radix == 10) && "Unexpected radix");
1116 
1117  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
1118  // the code is using an incorrect base.
1119  if (isHexDigit(*s) && *s != 'e' && *s != 'E' &&
1120  !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1121  Diags.Report(
1122  Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts),
1123  diag::err_invalid_digit)
1124  << StringRef(s, 1) << (radix == 8 ? 1 : 0);
1125  hadError = true;
1126  return;
1127  }
1128 
1129  if (*s == '.') {
1130  checkSeparator(TokLoc, s, CSK_AfterDigits);
1131  s++;
1132  radix = 10;
1133  saw_period = true;
1134  checkSeparator(TokLoc, s, CSK_BeforeDigits);
1135  s = SkipDigits(s); // Skip suffix.
1136  }
1137  if (*s == 'e' || *s == 'E') { // exponent
1138  checkSeparator(TokLoc, s, CSK_AfterDigits);
1139  const char *Exponent = s;
1140  s++;
1141  radix = 10;
1142  saw_exponent = true;
1143  if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
1144  const char *first_non_digit = SkipDigits(s);
1145  if (containsDigits(s, first_non_digit)) {
1146  checkSeparator(TokLoc, s, CSK_BeforeDigits);
1147  s = first_non_digit;
1148  } else {
1149  if (!hadError) {
1151  TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1152  diag::err_exponent_has_no_digits);
1153  hadError = true;
1154  }
1155  return;
1156  }
1157  }
1158 }
1159 
1160 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
1161 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
1162 /// treat it as an invalid suffix.
1164  StringRef Suffix) {
1165  if (!LangOpts.CPlusPlus11 || Suffix.empty())
1166  return false;
1167 
1168  // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
1169  if (Suffix[0] == '_')
1170  return true;
1171 
1172  // In C++11, there are no library suffixes.
1173  if (!LangOpts.CPlusPlus14)
1174  return false;
1175 
1176  // In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library.
1177  // Per tweaked N3660, "il", "i", and "if" are also used in the library.
1178  // In C++2a "d" and "y" are used in the library.
1179  return llvm::StringSwitch<bool>(Suffix)
1180  .Cases("h", "min", "s", true)
1181  .Cases("ms", "us", "ns", true)
1182  .Cases("il", "i", "if", true)
1183  .Cases("d", "y", LangOpts.CPlusPlus20)
1184  .Default(false);
1185 }
1186 
1187 void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
1188  const char *Pos,
1189  CheckSeparatorKind IsAfterDigits) {
1190  if (IsAfterDigits == CSK_AfterDigits) {
1191  if (Pos == ThisTokBegin)
1192  return;
1193  --Pos;
1194  } else if (Pos == ThisTokEnd)
1195  return;
1196 
1197  if (isDigitSeparator(*Pos)) {
1198  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM,
1199  LangOpts),
1200  diag::err_digit_separator_not_between_digits)
1201  << IsAfterDigits;
1202  hadError = true;
1203  }
1204 }
1205 
1206 /// ParseNumberStartingWithZero - This method is called when the first character
1207 /// of the number is found to be a zero. This means it is either an octal
1208 /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
1209 /// a floating point number (01239.123e4). Eat the prefix, determining the
1210 /// radix etc.
1211 void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
1212  assert(s[0] == '0' && "Invalid method call");
1213  s++;
1214 
1215  int c1 = s[0];
1216 
1217  // Handle a hex number like 0x1234.
1218  if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
1219  s++;
1220  assert(s < ThisTokEnd && "didn't maximally munch?");
1221  radix = 16;
1222  DigitsBegin = s;
1223  s = SkipHexDigits(s);
1224  bool HasSignificandDigits = containsDigits(DigitsBegin, s);
1225  if (s == ThisTokEnd) {
1226  // Done.
1227  } else if (*s == '.') {
1228  s++;
1229  saw_period = true;
1230  const char *floatDigitsBegin = s;
1231  s = SkipHexDigits(s);
1232  if (containsDigits(floatDigitsBegin, s))
1233  HasSignificandDigits = true;
1234  if (HasSignificandDigits)
1235  checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
1236  }
1237 
1238  if (!HasSignificandDigits) {
1239  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1240  LangOpts),
1241  diag::err_hex_constant_requires)
1242  << LangOpts.CPlusPlus << 1;
1243  hadError = true;
1244  return;
1245  }
1246 
1247  // A binary exponent can appear with or with a '.'. If dotted, the
1248  // binary exponent is required.
1249  if (*s == 'p' || *s == 'P') {
1250  checkSeparator(TokLoc, s, CSK_AfterDigits);
1251  const char *Exponent = s;
1252  s++;
1253  saw_exponent = true;
1254  if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
1255  const char *first_non_digit = SkipDigits(s);
1256  if (!containsDigits(s, first_non_digit)) {
1257  if (!hadError) {
1259  TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1260  diag::err_exponent_has_no_digits);
1261  hadError = true;
1262  }
1263  return;
1264  }
1265  checkSeparator(TokLoc, s, CSK_BeforeDigits);
1266  s = first_non_digit;
1267 
1268  if (!LangOpts.HexFloats)
1269  Diags.Report(TokLoc, LangOpts.CPlusPlus
1270  ? diag::ext_hex_literal_invalid
1271  : diag::ext_hex_constant_invalid);
1272  else if (LangOpts.CPlusPlus17)
1273  Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
1274  } else if (saw_period) {
1275  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1276  LangOpts),
1277  diag::err_hex_constant_requires)
1278  << LangOpts.CPlusPlus << 0;
1279  hadError = true;
1280  }
1281  return;
1282  }
1283 
1284  // Handle simple binary numbers 0b01010
1285  if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
1286  // 0b101010 is a C++1y / GCC extension.
1287  Diags.Report(TokLoc, LangOpts.CPlusPlus14
1288  ? diag::warn_cxx11_compat_binary_literal
1289  : LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14
1290  : diag::ext_binary_literal);
1291  ++s;
1292  assert(s < ThisTokEnd && "didn't maximally munch?");
1293  radix = 2;
1294  DigitsBegin = s;
1295  s = SkipBinaryDigits(s);
1296  if (s == ThisTokEnd) {
1297  // Done.
1298  } else if (isHexDigit(*s) &&
1299  !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1300  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1301  LangOpts),
1302  diag::err_invalid_digit)
1303  << StringRef(s, 1) << 2;
1304  hadError = true;
1305  }
1306  // Other suffixes will be diagnosed by the caller.
1307  return;
1308  }
1309 
1310  // For now, the radix is set to 8. If we discover that we have a
1311  // floating point constant, the radix will change to 10. Octal floating
1312  // point constants are not permitted (only decimal and hexadecimal).
1313  radix = 8;
1314  const char *PossibleNewDigitStart = s;
1315  s = SkipOctalDigits(s);
1316  // When the value is 0 followed by a suffix (like 0wb), we want to leave 0
1317  // as the start of the digits. So if skipping octal digits does not skip
1318  // anything, we leave the digit start where it was.
1319  if (s != PossibleNewDigitStart)
1320  DigitsBegin = PossibleNewDigitStart;
1321 
1322  if (s == ThisTokEnd)
1323  return; // Done, simple octal number like 01234
1324 
1325  // If we have some other non-octal digit that *is* a decimal digit, see if
1326  // this is part of a floating point number like 094.123 or 09e1.
1327  if (isDigit(*s)) {
1328  const char *EndDecimal = SkipDigits(s);
1329  if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
1330  s = EndDecimal;
1331  radix = 10;
1332  }
1333  }
1334 
1335  ParseDecimalOrOctalCommon(TokLoc);
1336 }
1337 
1338 static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
1339  switch (Radix) {
1340  case 2:
1341  return NumDigits <= 64;
1342  case 8:
1343  return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
1344  case 10:
1345  return NumDigits <= 19; // floor(log10(2^64))
1346  case 16:
1347  return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
1348  default:
1349  llvm_unreachable("impossible Radix");
1350  }
1351 }
1352 
1353 /// GetIntegerValue - Convert this numeric literal value to an APInt that
1354 /// matches Val's input width. If there is an overflow, set Val to the low bits
1355 /// of the result and return true. Otherwise, return false.
1357  // Fast path: Compute a conservative bound on the maximum number of
1358  // bits per digit in this radix. If we can't possibly overflow a
1359  // uint64 based on that bound then do the simple conversion to
1360  // integer. This avoids the expensive overflow checking below, and
1361  // handles the common cases that matter (small decimal integers and
1362  // hex/octal values which don't overflow).
1363  const unsigned NumDigits = SuffixBegin - DigitsBegin;
1364  if (alwaysFitsInto64Bits(radix, NumDigits)) {
1365  uint64_t N = 0;
1366  for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
1367  if (!isDigitSeparator(*Ptr))
1368  N = N * radix + llvm::hexDigitValue(*Ptr);
1369 
1370  // This will truncate the value to Val's input width. Simply check
1371  // for overflow by comparing.
1372  Val = N;
1373  return Val.getZExtValue() != N;
1374  }
1375 
1376  Val = 0;
1377  const char *Ptr = DigitsBegin;
1378 
1379  llvm::APInt RadixVal(Val.getBitWidth(), radix);
1380  llvm::APInt CharVal(Val.getBitWidth(), 0);
1381  llvm::APInt OldVal = Val;
1382 
1383  bool OverflowOccurred = false;
1384  while (Ptr < SuffixBegin) {
1385  if (isDigitSeparator(*Ptr)) {
1386  ++Ptr;
1387  continue;
1388  }
1389 
1390  unsigned C = llvm::hexDigitValue(*Ptr++);
1391 
1392  // If this letter is out of bound for this radix, reject it.
1393  assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1394 
1395  CharVal = C;
1396 
1397  // Add the digit to the value in the appropriate radix. If adding in digits
1398  // made the value smaller, then this overflowed.
1399  OldVal = Val;
1400 
1401  // Multiply by radix, did overflow occur on the multiply?
1402  Val *= RadixVal;
1403  OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
1404 
1405  // Add value, did overflow occur on the value?
1406  // (a + b) ult b <=> overflow
1407  Val += CharVal;
1408  OverflowOccurred |= Val.ult(CharVal);
1409  }
1410  return OverflowOccurred;
1411 }
1412 
1413 llvm::APFloat::opStatus
1414 NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
1415  using llvm::APFloat;
1416 
1417  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
1418 
1419  llvm::SmallString<16> Buffer;
1420  StringRef Str(ThisTokBegin, n);
1421  if (Str.contains('\'')) {
1422  Buffer.reserve(n);
1423  std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
1424  &isDigitSeparator);
1425  Str = Buffer;
1426  }
1427 
1428  auto StatusOrErr =
1429  Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
1430  assert(StatusOrErr && "Invalid floating point representation");
1431  return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr
1432  : APFloat::opInvalidOp;
1433 }
1434 
1435 static inline bool IsExponentPart(char c) {
1436  return c == 'p' || c == 'P' || c == 'e' || c == 'E';
1437 }
1438 
1440  assert(radix == 16 || radix == 10);
1441 
1442  // Find how many digits are needed to store the whole literal.
1443  unsigned NumDigits = SuffixBegin - DigitsBegin;
1444  if (saw_period) --NumDigits;
1445 
1446  // Initial scan of the exponent if it exists
1447  bool ExpOverflowOccurred = false;
1448  bool NegativeExponent = false;
1449  const char *ExponentBegin;
1450  uint64_t Exponent = 0;
1451  int64_t BaseShift = 0;
1452  if (saw_exponent) {
1453  const char *Ptr = DigitsBegin;
1454 
1455  while (!IsExponentPart(*Ptr)) ++Ptr;
1456  ExponentBegin = Ptr;
1457  ++Ptr;
1458  NegativeExponent = *Ptr == '-';
1459  if (NegativeExponent) ++Ptr;
1460 
1461  unsigned NumExpDigits = SuffixBegin - Ptr;
1462  if (alwaysFitsInto64Bits(radix, NumExpDigits)) {
1463  llvm::StringRef ExpStr(Ptr, NumExpDigits);
1464  llvm::APInt ExpInt(/*numBits=*/64, ExpStr, /*radix=*/10);
1465  Exponent = ExpInt.getZExtValue();
1466  } else {
1467  ExpOverflowOccurred = true;
1468  }
1469 
1470  if (NegativeExponent) BaseShift -= Exponent;
1471  else BaseShift += Exponent;
1472  }
1473 
1474  // Number of bits needed for decimal literal is
1475  // ceil(NumDigits * log2(10)) Integral part
1476  // + Scale Fractional part
1477  // + ceil(Exponent * log2(10)) Exponent
1478  // --------------------------------------------------
1479  // ceil((NumDigits + Exponent) * log2(10)) + Scale
1480  //
1481  // But for simplicity in handling integers, we can round up log2(10) to 4,
1482  // making:
1483  // 4 * (NumDigits + Exponent) + Scale
1484  //
1485  // Number of digits needed for hexadecimal literal is
1486  // 4 * NumDigits Integral part
1487  // + Scale Fractional part
1488  // + Exponent Exponent
1489  // --------------------------------------------------
1490  // (4 * NumDigits) + Scale + Exponent
1491  uint64_t NumBitsNeeded;
1492  if (radix == 10)
1493  NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
1494  else
1495  NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
1496 
1497  if (NumBitsNeeded > std::numeric_limits<unsigned>::max())
1498  ExpOverflowOccurred = true;
1499  llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /*isSigned=*/false);
1500 
1501  bool FoundDecimal = false;
1502 
1503  int64_t FractBaseShift = 0;
1504  const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
1505  for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
1506  if (*Ptr == '.') {
1507  FoundDecimal = true;
1508  continue;
1509  }
1510 
1511  // Normal reading of an integer
1512  unsigned C = llvm::hexDigitValue(*Ptr);
1513  assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1514 
1515  Val *= radix;
1516  Val += C;
1517 
1518  if (FoundDecimal)
1519  // Keep track of how much we will need to adjust this value by from the
1520  // number of digits past the radix point.
1521  --FractBaseShift;
1522  }
1523 
1524  // For a radix of 16, we will be multiplying by 2 instead of 16.
1525  if (radix == 16) FractBaseShift *= 4;
1526  BaseShift += FractBaseShift;
1527 
1528  Val <<= Scale;
1529 
1530  uint64_t Base = (radix == 16) ? 2 : 10;
1531  if (BaseShift > 0) {
1532  for (int64_t i = 0; i < BaseShift; ++i) {
1533  Val *= Base;
1534  }
1535  } else if (BaseShift < 0) {
1536  for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
1537  Val = Val.udiv(Base);
1538  }
1539 
1540  bool IntOverflowOccurred = false;
1541  auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
1542  if (Val.getBitWidth() > StoreVal.getBitWidth()) {
1543  IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth()));
1544  StoreVal = Val.trunc(StoreVal.getBitWidth());
1545  } else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
1546  IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
1547  StoreVal = Val.zext(StoreVal.getBitWidth());
1548  } else {
1549  StoreVal = Val;
1550  }
1551 
1552  return IntOverflowOccurred || ExpOverflowOccurred;
1553 }
1554 
1555 /// \verbatim
1556 /// user-defined-character-literal: [C++11 lex.ext]
1557 /// character-literal ud-suffix
1558 /// ud-suffix:
1559 /// identifier
1560 /// character-literal: [C++11 lex.ccon]
1561 /// ' c-char-sequence '
1562 /// u' c-char-sequence '
1563 /// U' c-char-sequence '
1564 /// L' c-char-sequence '
1565 /// u8' c-char-sequence ' [C++1z lex.ccon]
1566 /// c-char-sequence:
1567 /// c-char
1568 /// c-char-sequence c-char
1569 /// c-char:
1570 /// any member of the source character set except the single-quote ',
1571 /// backslash \, or new-line character
1572 /// escape-sequence
1573 /// universal-character-name
1574 /// escape-sequence:
1575 /// simple-escape-sequence
1576 /// octal-escape-sequence
1577 /// hexadecimal-escape-sequence
1578 /// simple-escape-sequence:
1579 /// one of \' \" \? \\ \a \b \f \n \r \t \v
1580 /// octal-escape-sequence:
1581 /// \ octal-digit
1582 /// \ octal-digit octal-digit
1583 /// \ octal-digit octal-digit octal-digit
1584 /// hexadecimal-escape-sequence:
1585 /// \x hexadecimal-digit
1586 /// hexadecimal-escape-sequence hexadecimal-digit
1587 /// universal-character-name: [C++11 lex.charset]
1588 /// \u hex-quad
1589 /// \U hex-quad hex-quad
1590 /// hex-quad:
1591 /// hex-digit hex-digit hex-digit hex-digit
1592 /// \endverbatim
1593 ///
1594 CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
1595  SourceLocation Loc, Preprocessor &PP,
1596  tok::TokenKind kind) {
1597  // At this point we know that the character matches the regex "(L|u|U)?'.*'".
1598  HadError = false;
1599 
1600  Kind = kind;
1601 
1602  const char *TokBegin = begin;
1603 
1604  // Skip over wide character determinant.
1605  if (Kind != tok::char_constant)
1606  ++begin;
1607  if (Kind == tok::utf8_char_constant)
1608  ++begin;
1609 
1610  // Skip over the entry quote.
1611  if (begin[0] != '\'') {
1612  PP.Diag(Loc, diag::err_lexing_char);
1613  HadError = true;
1614  return;
1615  }
1616 
1617  ++begin;
1618 
1619  // Remove an optional ud-suffix.
1620  if (end[-1] != '\'') {
1621  const char *UDSuffixEnd = end;
1622  do {
1623  --end;
1624  } while (end[-1] != '\'');
1625  // FIXME: Don't bother with this if !tok.hasUCN().
1626  expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
1627  UDSuffixOffset = end - TokBegin;
1628  }
1629 
1630  // Trim the ending quote.
1631  assert(end != begin && "Invalid token lexed");
1632  --end;
1633 
1634  // FIXME: The "Value" is an uint64_t so we can handle char literals of
1635  // up to 64-bits.
1636  // FIXME: This extensively assumes that 'char' is 8-bits.
1637  assert(PP.getTargetInfo().getCharWidth() == 8 &&
1638  "Assumes char is 8 bits");
1639  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
1640  (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
1641  "Assumes sizeof(int) on target is <= 64 and a multiple of char");
1642  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
1643  "Assumes sizeof(wchar) on target is <= 64");
1644 
1645  SmallVector<uint32_t, 4> codepoint_buffer;
1646  codepoint_buffer.resize(end - begin);
1647  uint32_t *buffer_begin = &codepoint_buffer.front();
1648  uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
1649 
1650  // Unicode escapes representing characters that cannot be correctly
1651  // represented in a single code unit are disallowed in character literals
1652  // by this implementation.
1653  uint32_t largest_character_for_kind;
1654  if (tok::wide_char_constant == Kind) {
1655  largest_character_for_kind =
1656  0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
1657  } else if (tok::utf8_char_constant == Kind) {
1658  largest_character_for_kind = 0x7F;
1659  } else if (tok::utf16_char_constant == Kind) {
1660  largest_character_for_kind = 0xFFFF;
1661  } else if (tok::utf32_char_constant == Kind) {
1662  largest_character_for_kind = 0x10FFFF;
1663  } else {
1664  largest_character_for_kind = 0x7Fu;
1665  }
1666 
1667  while (begin != end) {
1668  // Is this a span of non-escape characters?
1669  if (begin[0] != '\\') {
1670  char const *start = begin;
1671  do {
1672  ++begin;
1673  } while (begin != end && *begin != '\\');
1674 
1675  char const *tmp_in_start = start;
1676  uint32_t *tmp_out_start = buffer_begin;
1677  llvm::ConversionResult res =
1678  llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
1679  reinterpret_cast<llvm::UTF8 const *>(begin),
1680  &buffer_begin, buffer_end, llvm::strictConversion);
1681  if (res != llvm::conversionOK) {
1682  // If we see bad encoding for unprefixed character literals, warn and
1683  // simply copy the byte values, for compatibility with gcc and
1684  // older versions of clang.
1685  bool NoErrorOnBadEncoding = isOrdinary();
1686  unsigned Msg = diag::err_bad_character_encoding;
1687  if (NoErrorOnBadEncoding)
1688  Msg = diag::warn_bad_character_encoding;
1689  PP.Diag(Loc, Msg);
1690  if (NoErrorOnBadEncoding) {
1691  start = tmp_in_start;
1692  buffer_begin = tmp_out_start;
1693  for (; start != begin; ++start, ++buffer_begin)
1694  *buffer_begin = static_cast<uint8_t>(*start);
1695  } else {
1696  HadError = true;
1697  }
1698  } else {
1699  for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
1700  if (*tmp_out_start > largest_character_for_kind) {
1701  HadError = true;
1702  PP.Diag(Loc, diag::err_character_too_large);
1703  }
1704  }
1705  }
1706 
1707  continue;
1708  }
1709  // Is this a Universal Character Name escape?
1710  if (begin[1] == 'u' || begin[1] == 'U' || begin[1] == 'N') {
1711  unsigned short UcnLen = 0;
1712  if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
1713  FullSourceLoc(Loc, PP.getSourceManager()),
1714  &PP.getDiagnostics(), PP.getLangOpts(), true)) {
1715  HadError = true;
1716  } else if (*buffer_begin > largest_character_for_kind) {
1717  HadError = true;
1718  PP.Diag(Loc, diag::err_character_too_large);
1719  }
1720 
1721  ++buffer_begin;
1722  continue;
1723  }
1724  unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
1725  uint64_t result =
1726  ProcessCharEscape(TokBegin, begin, end, HadError,
1727  FullSourceLoc(Loc,PP.getSourceManager()),
1728  CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
1729  *buffer_begin++ = result;
1730  }
1731 
1732  unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
1733 
1734  if (NumCharsSoFar > 1) {
1735  if (isOrdinary() && NumCharsSoFar == 4)
1736  PP.Diag(Loc, diag::warn_four_char_character_literal);
1737  else if (isOrdinary())
1738  PP.Diag(Loc, diag::warn_multichar_character_literal);
1739  else {
1740  PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1741  HadError = true;
1742  }
1743  IsMultiChar = true;
1744  } else {
1745  IsMultiChar = false;
1746  }
1747 
1748  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
1749 
1750  // Narrow character literals act as though their value is concatenated
1751  // in this implementation, but warn on overflow.
1752  bool multi_char_too_long = false;
1753  if (isOrdinary() && isMultiChar()) {
1754  LitVal = 0;
1755  for (size_t i = 0; i < NumCharsSoFar; ++i) {
1756  // check for enough leading zeros to shift into
1757  multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
1758  LitVal <<= 8;
1759  LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1760  }
1761  } else if (NumCharsSoFar > 0) {
1762  // otherwise just take the last character
1763  LitVal = buffer_begin[-1];
1764  }
1765 
1766  if (!HadError && multi_char_too_long) {
1767  PP.Diag(Loc, diag::warn_char_constant_too_large);
1768  }
1769 
1770  // Transfer the value from APInt to uint64_t
1771  Value = LitVal.getZExtValue();
1772 
1773  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
1774  // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
1775  // character constants are not sign extended in the this implementation:
1776  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
1777  if (isOrdinary() && NumCharsSoFar == 1 && (Value & 128) &&
1778  PP.getLangOpts().CharIsSigned)
1779  Value = (signed char)Value;
1780 }
1781 
1782 /// \verbatim
1783 /// string-literal: [C++0x lex.string]
1784 /// encoding-prefix " [s-char-sequence] "
1785 /// encoding-prefix R raw-string
1786 /// encoding-prefix:
1787 /// u8
1788 /// u
1789 /// U
1790 /// L
1791 /// s-char-sequence:
1792 /// s-char
1793 /// s-char-sequence s-char
1794 /// s-char:
1795 /// any member of the source character set except the double-quote ",
1796 /// backslash \, or new-line character
1797 /// escape-sequence
1798 /// universal-character-name
1799 /// raw-string:
1800 /// " d-char-sequence ( r-char-sequence ) d-char-sequence "
1801 /// r-char-sequence:
1802 /// r-char
1803 /// r-char-sequence r-char
1804 /// r-char:
1805 /// any member of the source character set, except a right parenthesis )
1806 /// followed by the initial d-char-sequence (which may be empty)
1807 /// followed by a double quote ".
1808 /// d-char-sequence:
1809 /// d-char
1810 /// d-char-sequence d-char
1811 /// d-char:
1812 /// any member of the basic source character set except:
1813 /// space, the left parenthesis (, the right parenthesis ),
1814 /// the backslash \, and the control characters representing horizontal
1815 /// tab, vertical tab, form feed, and newline.
1816 /// escape-sequence: [C++0x lex.ccon]
1817 /// simple-escape-sequence
1818 /// octal-escape-sequence
1819 /// hexadecimal-escape-sequence
1820 /// simple-escape-sequence:
1821 /// one of \' \" \? \\ \a \b \f \n \r \t \v
1822 /// octal-escape-sequence:
1823 /// \ octal-digit
1824 /// \ octal-digit octal-digit
1825 /// \ octal-digit octal-digit octal-digit
1826 /// hexadecimal-escape-sequence:
1827 /// \x hexadecimal-digit
1828 /// hexadecimal-escape-sequence hexadecimal-digit
1829 /// universal-character-name:
1830 /// \u hex-quad
1831 /// \U hex-quad hex-quad
1832 /// hex-quad:
1833 /// hex-digit hex-digit hex-digit hex-digit
1834 /// \endverbatim
1835 ///
1838  Preprocessor &PP)
1839  : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1840  Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
1841  MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
1842  ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
1843  init(StringToks);
1844 }
1845 
1846 void StringLiteralParser::init(ArrayRef<Token> StringToks){
1847  // The literal token may have come from an invalid source location (e.g. due
1848  // to a PCH error), in which case the token length will be 0.
1849  if (StringToks.empty() || StringToks[0].getLength() < 2)
1850  return DiagnoseLexingError(SourceLocation());
1851 
1852  // Scan all of the string portions, remember the max individual token length,
1853  // computing a bound on the concatenated string length, and see whether any
1854  // piece is a wide-string. If any of the string portions is a wide-string
1855  // literal, the result is a wide-string literal [C99 6.4.5p4].
1856  assert(!StringToks.empty() && "expected at least one token");
1857  MaxTokenLength = StringToks[0].getLength();
1858  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
1859  SizeBound = StringToks[0].getLength()-2; // -2 for "".
1860  Kind = StringToks[0].getKind();
1861 
1862  hadError = false;
1863 
1864  // Implement Translation Phase #6: concatenation of string literals
1865  /// (C99 5.1.1.2p1). The common case is only one string fragment.
1866  for (unsigned i = 1; i != StringToks.size(); ++i) {
1867  if (StringToks[i].getLength() < 2)
1868  return DiagnoseLexingError(StringToks[i].getLocation());
1869 
1870  // The string could be shorter than this if it needs cleaning, but this is a
1871  // reasonable bound, which is all we need.
1872  assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
1873  SizeBound += StringToks[i].getLength()-2; // -2 for "".
1874 
1875  // Remember maximum string piece length.
1876  if (StringToks[i].getLength() > MaxTokenLength)
1877  MaxTokenLength = StringToks[i].getLength();
1878 
1879  // Remember if we see any wide or utf-8/16/32 strings.
1880  // Also check for illegal concatenations.
1881  if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
1882  if (isOrdinary()) {
1883  Kind = StringToks[i].getKind();
1884  } else {
1885  if (Diags)
1886  Diags->Report(StringToks[i].getLocation(),
1887  diag::err_unsupported_string_concat);
1888  hadError = true;
1889  }
1890  }
1891  }
1892 
1893  // Include space for the null terminator.
1894  ++SizeBound;
1895 
1896  // TODO: K&R warning: "traditional C rejects string constant concatenation"
1897 
1898  // Get the width in bytes of char/wchar_t/char16_t/char32_t
1899  CharByteWidth = getCharWidth(Kind, Target);
1900  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
1901  CharByteWidth /= 8;
1902 
1903  // The output buffer size needs to be large enough to hold wide characters.
1904  // This is a worst-case assumption which basically corresponds to L"" "long".
1905  SizeBound *= CharByteWidth;
1906 
1907  // Size the temporary buffer to hold the result string data.
1908  ResultBuf.resize(SizeBound);
1909 
1910  // Likewise, but for each string piece.
1911  SmallString<512> TokenBuf;
1912  TokenBuf.resize(MaxTokenLength);
1913 
1914  // Loop over all the strings, getting their spelling, and expanding them to
1915  // wide strings as appropriate.
1916  ResultPtr = &ResultBuf[0]; // Next byte to fill in.
1917 
1918  Pascal = false;
1919 
1920  SourceLocation UDSuffixTokLoc;
1921 
1922  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
1923  const char *ThisTokBuf = &TokenBuf[0];
1924  // Get the spelling of the token, which eliminates trigraphs, etc. We know
1925  // that ThisTokBuf points to a buffer that is big enough for the whole token
1926  // and 'spelled' tokens can only shrink.
1927  bool StringInvalid = false;
1928  unsigned ThisTokLen =
1929  Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1930  &StringInvalid);
1931  if (StringInvalid)
1932  return DiagnoseLexingError(StringToks[i].getLocation());
1933 
1934  const char *ThisTokBegin = ThisTokBuf;
1935  const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
1936 
1937  // Remove an optional ud-suffix.
1938  if (ThisTokEnd[-1] != '"') {
1939  const char *UDSuffixEnd = ThisTokEnd;
1940  do {
1941  --ThisTokEnd;
1942  } while (ThisTokEnd[-1] != '"');
1943 
1944  StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
1945 
1946  if (UDSuffixBuf.empty()) {
1947  if (StringToks[i].hasUCN())
1948  expandUCNs(UDSuffixBuf, UDSuffix);
1949  else
1950  UDSuffixBuf.assign(UDSuffix);
1951  UDSuffixToken = i;
1952  UDSuffixOffset = ThisTokEnd - ThisTokBuf;
1953  UDSuffixTokLoc = StringToks[i].getLocation();
1954  } else {
1955  SmallString<32> ExpandedUDSuffix;
1956  if (StringToks[i].hasUCN()) {
1957  expandUCNs(ExpandedUDSuffix, UDSuffix);
1958  UDSuffix = ExpandedUDSuffix;
1959  }
1960 
1961  // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
1962  // result of a concatenation involving at least one user-defined-string-
1963  // literal, all the participating user-defined-string-literals shall
1964  // have the same ud-suffix.
1965  if (UDSuffixBuf != UDSuffix) {
1966  if (Diags) {
1967  SourceLocation TokLoc = StringToks[i].getLocation();
1968  Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
1969  << UDSuffixBuf << UDSuffix
1970  << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
1971  << SourceRange(TokLoc, TokLoc);
1972  }
1973  hadError = true;
1974  }
1975  }
1976  }
1977 
1978  // Strip the end quote.
1979  --ThisTokEnd;
1980 
1981  // TODO: Input character set mapping support.
1982 
1983  // Skip marker for wide or unicode strings.
1984  if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
1985  ++ThisTokBuf;
1986  // Skip 8 of u8 marker for utf8 strings.
1987  if (ThisTokBuf[0] == '8')
1988  ++ThisTokBuf;
1989  }
1990 
1991  // Check for raw string
1992  if (ThisTokBuf[0] == 'R') {
1993  if (ThisTokBuf[1] != '"') {
1994  // The file may have come from PCH and then changed after loading the
1995  // PCH; Fail gracefully.
1996  return DiagnoseLexingError(StringToks[i].getLocation());
1997  }
1998  ThisTokBuf += 2; // skip R"
1999 
2000  // C++11 [lex.string]p2: A `d-char-sequence` shall consist of at most 16
2001  // characters.
2002  constexpr unsigned MaxRawStrDelimLen = 16;
2003 
2004  const char *Prefix = ThisTokBuf;
2005  while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&
2006  ThisTokBuf[0] != '(')
2007  ++ThisTokBuf;
2008  if (ThisTokBuf[0] != '(')
2009  return DiagnoseLexingError(StringToks[i].getLocation());
2010  ++ThisTokBuf; // skip '('
2011 
2012  // Remove same number of characters from the end
2013  ThisTokEnd -= ThisTokBuf - Prefix;
2014  if (ThisTokEnd < ThisTokBuf)
2015  return DiagnoseLexingError(StringToks[i].getLocation());
2016 
2017  // C++14 [lex.string]p4: A source-file new-line in a raw string literal
2018  // results in a new-line in the resulting execution string-literal.
2019  StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
2020  while (!RemainingTokenSpan.empty()) {
2021  // Split the string literal on \r\n boundaries.
2022  size_t CRLFPos = RemainingTokenSpan.find("\r\n");
2023  StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
2024  StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
2025 
2026  // Copy everything before the \r\n sequence into the string literal.
2027  if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
2028  hadError = true;
2029 
2030  // Point into the \n inside the \r\n sequence and operate on the
2031  // remaining portion of the literal.
2032  RemainingTokenSpan = AfterCRLF.substr(1);
2033  }
2034  } else {
2035  if (ThisTokBuf[0] != '"') {
2036  // The file may have come from PCH and then changed after loading the
2037  // PCH; Fail gracefully.
2038  return DiagnoseLexingError(StringToks[i].getLocation());
2039  }
2040  ++ThisTokBuf; // skip "
2041 
2042  // Check if this is a pascal string
2043  if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
2044  ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
2045 
2046  // If the \p sequence is found in the first token, we have a pascal string
2047  // Otherwise, if we already have a pascal string, ignore the first \p
2048  if (i == 0) {
2049  ++ThisTokBuf;
2050  Pascal = true;
2051  } else if (Pascal)
2052  ThisTokBuf += 2;
2053  }
2054 
2055  while (ThisTokBuf != ThisTokEnd) {
2056  // Is this a span of non-escape characters?
2057  if (ThisTokBuf[0] != '\\') {
2058  const char *InStart = ThisTokBuf;
2059  do {
2060  ++ThisTokBuf;
2061  } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
2062 
2063  // Copy the character span over.
2064  if (CopyStringFragment(StringToks[i], ThisTokBegin,
2065  StringRef(InStart, ThisTokBuf - InStart)))
2066  hadError = true;
2067  continue;
2068  }
2069  // Is this a Universal Character Name escape?
2070  if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U' ||
2071  ThisTokBuf[1] == 'N') {
2072  EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
2073  ResultPtr, hadError,
2074  FullSourceLoc(StringToks[i].getLocation(), SM),
2075  CharByteWidth, Diags, Features);
2076  continue;
2077  }
2078  // Otherwise, this is a non-UCN escape character. Process it.
2079  unsigned ResultChar =
2080  ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
2081  FullSourceLoc(StringToks[i].getLocation(), SM),
2082  CharByteWidth*8, Diags, Features);
2083 
2084  if (CharByteWidth == 4) {
2085  // FIXME: Make the type of the result buffer correct instead of
2086  // using reinterpret_cast.
2087  llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
2088  *ResultWidePtr = ResultChar;
2089  ResultPtr += 4;
2090  } else if (CharByteWidth == 2) {
2091  // FIXME: Make the type of the result buffer correct instead of
2092  // using reinterpret_cast.
2093  llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
2094  *ResultWidePtr = ResultChar & 0xFFFF;
2095  ResultPtr += 2;
2096  } else {
2097  assert(CharByteWidth == 1 && "Unexpected char width");
2098  *ResultPtr++ = ResultChar & 0xFF;
2099  }
2100  }
2101  }
2102  }
2103 
2104  if (Pascal) {
2105  if (CharByteWidth == 4) {
2106  // FIXME: Make the type of the result buffer correct instead of
2107  // using reinterpret_cast.
2108  llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
2109  ResultWidePtr[0] = GetNumStringChars() - 1;
2110  } else if (CharByteWidth == 2) {
2111  // FIXME: Make the type of the result buffer correct instead of
2112  // using reinterpret_cast.
2113  llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
2114  ResultWidePtr[0] = GetNumStringChars() - 1;
2115  } else {
2116  assert(CharByteWidth == 1 && "Unexpected char width");
2117  ResultBuf[0] = GetNumStringChars() - 1;
2118  }
2119 
2120  // Verify that pascal strings aren't too large.
2121  if (GetStringLength() > 256) {
2122  if (Diags)
2123  Diags->Report(StringToks.front().getLocation(),
2124  diag::err_pascal_string_too_long)
2125  << SourceRange(StringToks.front().getLocation(),
2126  StringToks.back().getLocation());
2127  hadError = true;
2128  return;
2129  }
2130  } else if (Diags) {
2131  // Complain if this string literal has too many characters.
2132  unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
2133 
2134  if (GetNumStringChars() > MaxChars)
2135  Diags->Report(StringToks.front().getLocation(),
2136  diag::ext_string_too_long)
2137  << GetNumStringChars() << MaxChars
2138  << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
2139  << SourceRange(StringToks.front().getLocation(),
2140  StringToks.back().getLocation());
2141  }
2142 }
2143 
2144 static const char *resyncUTF8(const char *Err, const char *End) {
2145  if (Err == End)
2146  return End;
2147  End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
2148  while (++Err != End && (*Err & 0xC0) == 0x80)
2149  ;
2150  return Err;
2151 }
2152 
2153 /// This function copies from Fragment, which is a sequence of bytes
2154 /// within Tok's contents (which begin at TokBegin) into ResultPtr.
2155 /// Performs widening for multi-byte characters.
2156 bool StringLiteralParser::CopyStringFragment(const Token &Tok,
2157  const char *TokBegin,
2158  StringRef Fragment) {
2159  const llvm::UTF8 *ErrorPtrTmp;
2160  if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
2161  return false;
2162 
2163  // If we see bad encoding for unprefixed string literals, warn and
2164  // simply copy the byte values, for compatibility with gcc and older
2165  // versions of clang.
2166  bool NoErrorOnBadEncoding = isOrdinary();
2167  if (NoErrorOnBadEncoding) {
2168  memcpy(ResultPtr, Fragment.data(), Fragment.size());
2169  ResultPtr += Fragment.size();
2170  }
2171 
2172  if (Diags) {
2173  const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2174 
2175  FullSourceLoc SourceLoc(Tok.getLocation(), SM);
2176  const DiagnosticBuilder &Builder =
2177  Diag(Diags, Features, SourceLoc, TokBegin,
2178  ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
2179  NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
2180  : diag::err_bad_string_encoding);
2181 
2182  const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2183  StringRef NextFragment(NextStart, Fragment.end()-NextStart);
2184 
2185  // Decode into a dummy buffer.
2186  SmallString<512> Dummy;
2187  Dummy.reserve(Fragment.size() * CharByteWidth);
2188  char *Ptr = Dummy.data();
2189 
2190  while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
2191  const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2192  NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2193  Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
2194  ErrorPtr, NextStart);
2195  NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
2196  }
2197  }
2198  return !NoErrorOnBadEncoding;
2199 }
2200 
2201 void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
2202  hadError = true;
2203  if (Diags)
2204  Diags->Report(Loc, diag::err_lexing_string);
2205 }
2206 
2207 /// getOffsetOfStringByte - This function returns the offset of the
2208 /// specified byte of the string data represented by Token. This handles
2209 /// advancing over escape sequences in the string.
2211  unsigned ByteNo) const {
2212  // Get the spelling of the token.
2213  SmallString<32> SpellingBuffer;
2214  SpellingBuffer.resize(Tok.getLength());
2215 
2216  bool StringInvalid = false;
2217  const char *SpellingPtr = &SpellingBuffer[0];
2218  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
2219  &StringInvalid);
2220  if (StringInvalid)
2221  return 0;
2222 
2223  const char *SpellingStart = SpellingPtr;
2224  const char *SpellingEnd = SpellingPtr+TokLen;
2225 
2226  // Handle UTF-8 strings just like narrow strings.
2227  if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
2228  SpellingPtr += 2;
2229 
2230  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
2231  SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
2232 
2233  // For raw string literals, this is easy.
2234  if (SpellingPtr[0] == 'R') {
2235  assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
2236  // Skip 'R"'.
2237  SpellingPtr += 2;
2238  while (*SpellingPtr != '(') {
2239  ++SpellingPtr;
2240  assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
2241  }
2242  // Skip '('.
2243  ++SpellingPtr;
2244  return SpellingPtr - SpellingStart + ByteNo;
2245  }
2246 
2247  // Skip over the leading quote
2248  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
2249  ++SpellingPtr;
2250 
2251  // Skip over bytes until we find the offset we're looking for.
2252  while (ByteNo) {
2253  assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
2254 
2255  // Step over non-escapes simply.
2256  if (*SpellingPtr != '\\') {
2257  ++SpellingPtr;
2258  --ByteNo;
2259  continue;
2260  }
2261 
2262  // Otherwise, this is an escape character. Advance over it.
2263  bool HadError = false;
2264  if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U' ||
2265  SpellingPtr[1] == 'N') {
2266  const char *EscapePtr = SpellingPtr;
2267  unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
2268  1, Features, HadError);
2269  if (Len > ByteNo) {
2270  // ByteNo is somewhere within the escape sequence.
2271  SpellingPtr = EscapePtr;
2272  break;
2273  }
2274  ByteNo -= Len;
2275  } else {
2276  ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
2277  FullSourceLoc(Tok.getLocation(), SM),
2278  CharByteWidth*8, Diags, Features);
2279  --ByteNo;
2280  }
2281  assert(!HadError && "This method isn't valid on erroneous strings");
2282  }
2283 
2284  return SpellingPtr-SpellingStart;
2285 }
2286 
2287 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
2288 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
2289 /// treat it as an invalid suffix.
2291  StringRef Suffix) {
2292  return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) ||
2293  Suffix == "sv";
2294 }
clang::NumericLiteralParser::isLong
bool isLong
Definition: LiteralSupport.h:64
max
__DEVICE__ int max(int __a, int __b)
Definition: __clang_cuda_math.h:196
ProcessNamedUCNEscape
static bool ProcessNamedUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features)
Definition: LiteralSupport.cpp:533
clang::FullSourceLoc::getManager
const SourceManager & getManager() const
Definition: SourceLocation.h:382
clang::isPrintable
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:145
clang::CharLiteralParser::CharLiteralParser
CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, tok::TokenKind kind)
Definition: LiteralSupport.cpp:1594
clang::interp::APInt
llvm::APInt APInt
Definition: Integral.h:27
clang::DiagnosticBuilder
A little helper class used to produce diagnostics.
Definition: Diagnostic.h:1266
clang::CharLiteralParser::isOrdinary
bool isOrdinary() const
Definition: LiteralSupport.h:201
MakeCharSourceRange
static CharSourceRange MakeCharSourceRange(const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd)
Definition: LiteralSupport.cpp:60
clang::FullSourceLoc
A SourceLocation and its associated SourceManager.
Definition: SourceLocation.h:368
clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:210
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
Diag
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Definition: LiteralSupport.cpp:79
clang::Lexer::getSpelling
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition: Lexer.cpp:404
llvm::SmallVector
Definition: LLVM.h:38
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:86
clang::NumericLiteralParser::isImaginary
bool isImaginary
Definition: LiteralSupport.h:69
TargetInfo.h
clang::MultiVersionKind::Target
@ Target
AttributeLangSupport::C
@ C
Definition: SemaDeclAttr.cpp:56
clang::DiagnosticsEngine
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
getCharWidth
static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target)
Definition: LiteralSupport.cpp:40
clang::TargetInfo
Exposes information about the current target.
Definition: TargetInfo.h:205
memcpy
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Definition: __clang_cuda_device_functions.h:1549
clang::TargetInfo::getCharWidth
unsigned getCharWidth() const
Definition: TargetInfo.h:465
llvm::Optional
Definition: LLVM.h:40
clang::StringLiteralParser::hadError
bool hadError
Definition: LiteralSupport.h:247
clang::NumericLiteralParser::isValidUDSuffix
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Definition: LiteralSupport.cpp:1163
MeasureUCNEscape
static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, unsigned CharByteWidth, const LangOptions &Features, bool &HadError)
MeasureUCNEscape - Determine the number of bytes within the resulting string which this UCN will occu...
Definition: LiteralSupport.cpp:653
clang::NumericLiteralParser::NumericLiteralParser
NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, const SourceManager &SM, const LangOptions &LangOpts, const TargetInfo &Target, DiagnosticsEngine &Diags)
integer-constant: [C99 6.4.4.1] decimal-constant integer-suffix octal-constant integer-suffix hexadec...
Definition: LiteralSupport.cpp:833
clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
LiteralSupport.h
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::StringLiteralParser::isValidUDSuffix
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Definition: LiteralSupport.cpp:2290
appendCodePoint
static void appendCodePoint(unsigned Codepoint, llvm::SmallVectorImpl< char > &Str)
Definition: LiteralSupport.cpp:323
clang::isPreprocessingNumberBody
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
Definition: CharInfo.h:153
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:636
clang::Preprocessor::getLangOpts
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:1065
Preprocessor.h
alwaysFitsInto64Bits
static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits)
Definition: LiteralSupport.cpp:1338
clang::NumericLiteralParser::isSizeT
bool isSizeT
Definition: LiteralSupport.h:66
U
clang::StringLiteralParser::getOffsetOfStringByte
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const
getOffsetOfStringByte - This function returns the offset of the specified byte of the string data rep...
Definition: LiteralSupport.cpp:2210
clang::CharSourceRange::getCharRange
static CharSourceRange getCharRange(SourceRange R)
Definition: SourceLocation.h:265
clang::StringLiteralParser::isOrdinary
bool isOrdinary() const
Definition: LiteralSupport.h:266
V
#define V(N, I)
Definition: ASTContext.h:3235
min
__DEVICE__ int min(int __a, int __b)
Definition: __clang_cuda_math.h:197
hlsl::uint64_t
unsigned long uint64_t
Definition: hlsl_basic_types.h:25
ProcessCharEscape
static unsigned ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
ProcessCharEscape - Parse a standard C escape sequence, which can occur in either a character or a st...
Definition: LiteralSupport.cpp:92
LangOptions.h
clang::NumericLiteralParser::isFloat
bool isFloat
Definition: LiteralSupport.h:68
LexDiagnostic.h
clang::Preprocessor::Diag
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Definition: Preprocessor.h:1899
clang::TargetInfo::getWCharWidth
unsigned getWCharWidth() const
getWCharWidth/Align - Return the size of 'wchar_t' for this target, in bits.
Definition: TargetInfo.h:693
clang::Preprocessor::getTargetInfo
const TargetInfo & getTargetInfo() const
Definition: Preprocessor.h:1066
clang::NumericLiteralParser::isLongLong
bool isLongLong
Definition: LiteralSupport.h:65
llvm::SmallString< 16 >
clang::StringLiteralParser::Pascal
bool Pascal
Definition: LiteralSupport.h:248
clang::StringLiteralParser::GetStringLength
unsigned GetStringLength() const
Definition: LiteralSupport.h:253
DiagnoseInvalidUnicodeCharacterName
static void DiagnoseInvalidUnicodeCharacterName(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, llvm::StringRef Name)
Definition: LiteralSupport.cpp:479
Base
clang::StringLiteralParser::StringLiteralParser
StringLiteralParser(ArrayRef< Token > StringToks, Preprocessor &PP)
Definition: LiteralSupport.cpp:1837
ProcessUCNEscape
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)
ProcessUCNEscape - Read the Universal Character Name, check constraints and return the UTF32.
Definition: LiteralSupport.cpp:584
clang::NumericLiteralParser::isUnsigned
bool isUnsigned
Definition: LiteralSupport.h:63
EncodeUCNEscape
static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, FullSourceLoc Loc, unsigned CharByteWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
EncodeUCNEscape - Read the Universal Character Name, check constraints and convert the UTF32 to UTF8 ...
Definition: LiteralSupport.cpp:688
clang::Token::getLength
unsigned getLength() const
Definition: Token.h:129
clang::NumericLiteralParser::GetFixedPointValue
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale)
GetFixedPointValue - Convert this numeric literal value into a scaled integer that represents this va...
Definition: LiteralSupport.cpp:1439
clang::NumericLiteralParser::isAccum
bool isAccum
Definition: LiteralSupport.h:73
IsExponentPart
static bool IsExponentPart(char c)
Definition: LiteralSupport.cpp:1435
clang::isHexDigit
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
Definition: CharInfo.h:129
clang::Preprocessor::getSourceManager
SourceManager & getSourceManager() const
Definition: Preprocessor.h:1069
clang::NumericLiteralParser::hadError
bool hadError
Definition: LiteralSupport.h:62
resyncUTF8
static const char * resyncUTF8(const char *Err, const char *End)
Definition: LiteralSupport.cpp:2144
clang::expandUCNs
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
Definition: LiteralSupport.cpp:331
SourceLocation.h
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
CharInfo.h
false
#define false
Definition: stdbool.h:22
clang::NumericLiteralParser::MicrosoftInteger
uint8_t MicrosoftInteger
Definition: LiteralSupport.h:75
clang::NumericLiteralParser::isFloat16
bool isFloat16
Definition: LiteralSupport.h:70
clang::NumericLiteralParser::isFract
bool isFract
Definition: LiteralSupport.h:72
Begin
SourceLocation Begin
Definition: USRLocFinder.cpp:165
llvm::ArrayRef
Definition: LLVM.h:34
Lexer.h
Value
Value
Definition: UninitializedValues.cpp:103
clang::CharSourceRange
Represents a character-granular source range.
Definition: SourceLocation.h:253
clang::NumericLiteralParser::GetIntegerValue
bool GetIntegerValue(llvm::APInt &Val)
GetIntegerValue - Convert this numeric literal value to an APInt that matches Val's input width.
Definition: LiteralSupport.cpp:1356
clang::NumericLiteralParser::isIntegerLiteral
bool isIntegerLiteral() const
Definition: LiteralSupport.h:82
clang::NumericLiteralParser::isFloatingLiteral
bool isFloatingLiteral() const
Definition: LiteralSupport.h:85
clang::CharLiteralParser::isWide
bool isWide() const
Definition: LiteralSupport.h:202
clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:126
clang::LangOptions
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:81
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::NumericLiteralParser::GetFloatValue
llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result)
GetFloatValue - Convert this numeric literal to a floating value, using the specified APFloat fltSema...
Definition: LiteralSupport.cpp:1414
clang
Definition: CalledOnceCheck.h:17
clang::NumericLiteralParser::isFloat128
bool isFloat128
Definition: LiteralSupport.h:71
hlsl::int64_t
long int64_t
Definition: hlsl_basic_types.h:26
distance
float __ovld __cnfn distance(float, float)
Returns the distance between p0 and p1.
clang::NumericLiteralParser::isHalf
bool isHalf
Definition: LiteralSupport.h:67
clang::CharLiteralParser::isMultiChar
bool isMultiChar() const
Definition: LiteralSupport.h:206
clang::Preprocessor::getDiagnostics
DiagnosticsEngine & getDiagnostics() const
Definition: Preprocessor.h:1062
clang::isDigit
LLVM_READONLY bool isDigit(unsigned char c)
Return true if this character is an ASCII digit: [0-9].
Definition: CharInfo.h:99
s
__device__ __2f16 float bool s
Definition: __clang_hip_libdevice_declares.h:320
clang::NumericLiteralParser::isBitInt
bool isBitInt
Definition: LiteralSupport.h:74
c
__device__ __2f16 float c
Definition: __clang_hip_libdevice_declares.h:320
clang::Lexer::AdvanceToTokenCharacter
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition: Lexer.h:399
llvm::SmallVectorImpl< char >
clang::Preprocessor
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:129
SM
#define SM(sm)
Definition: Cuda.cpp:79
Token.h
clang::NumericLiteralParser::isFixedPointLiteral
bool isFixedPointLiteral() const
Definition: LiteralSupport.h:78
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:62
clang::TargetInfo::getIntWidth
unsigned getIntWidth() const
getIntWidth/Align - Return the size of 'signed int' and 'unsigned int' for this target,...
Definition: TargetInfo.h:478
clang::FixItHint::CreateReplacement
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
Definition: Diagnostic.h:134
clang::DiagnosticsEngine::Report
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1537
clang::StringLiteralParser::GetNumStringChars
unsigned GetNumStringChars() const
Definition: LiteralSupport.h:255
ProcessNumericUCNEscape
static bool ProcessNumericUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, bool &Delimited, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)
Definition: LiteralSupport.cpp:393