clang  14.0.0git
LiteralSupport.cpp
Go to the documentation of this file.
1 //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the NumericLiteralParser, CharLiteralParser, and
10 // StringLiteralParser interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "clang/Basic/CharInfo.h"
18 #include "clang/Basic/TargetInfo.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/Support/ConvertUTF.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <cstring>
35 #include <string>
36 
37 using namespace clang;
38 
39 static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
40  switch (kind) {
41  default: llvm_unreachable("Unknown token type!");
42  case tok::char_constant:
43  case tok::string_literal:
44  case tok::utf8_char_constant:
45  case tok::utf8_string_literal:
46  return Target.getCharWidth();
47  case tok::wide_char_constant:
48  case tok::wide_string_literal:
49  return Target.getWCharWidth();
50  case tok::utf16_char_constant:
51  case tok::utf16_string_literal:
52  return Target.getChar16Width();
53  case tok::utf32_char_constant:
54  case tok::utf32_string_literal:
55  return Target.getChar32Width();
56  }
57 }
58 
60  FullSourceLoc TokLoc,
61  const char *TokBegin,
62  const char *TokRangeBegin,
63  const char *TokRangeEnd) {
65  Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
66  TokLoc.getManager(), Features);
68  Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
69  TokLoc.getManager(), Features);
71 }
72 
73 /// Produce a diagnostic highlighting some portion of a literal.
74 ///
75 /// Emits the diagnostic \p DiagID, highlighting the range of characters from
76 /// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
77 /// a substring of a spelling buffer for the token beginning at \p TokBegin.
79  const LangOptions &Features, FullSourceLoc TokLoc,
80  const char *TokBegin, const char *TokRangeBegin,
81  const char *TokRangeEnd, unsigned DiagID) {
83  Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
84  TokLoc.getManager(), Features);
85  return Diags->Report(Begin, DiagID) <<
86  MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
87 }
88 
89 /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
90 /// either a character or a string literal.
91 static unsigned ProcessCharEscape(const char *ThisTokBegin,
92  const char *&ThisTokBuf,
93  const char *ThisTokEnd, bool &HadError,
94  FullSourceLoc Loc, unsigned CharWidth,
95  DiagnosticsEngine *Diags,
96  const LangOptions &Features) {
97  const char *EscapeBegin = ThisTokBuf;
98  bool Delimited = false;
99  bool EndDelimiterFound = false;
100 
101  // Skip the '\' char.
102  ++ThisTokBuf;
103 
104  // We know that this character can't be off the end of the buffer, because
105  // that would have been \", which would not have been the end of string.
106  unsigned ResultChar = *ThisTokBuf++;
107  switch (ResultChar) {
108  // These map to themselves.
109  case '\\': case '\'': case '"': case '?': break;
110 
111  // These have fixed mappings.
112  case 'a':
113  // TODO: K&R: the meaning of '\\a' is different in traditional C
114  ResultChar = 7;
115  break;
116  case 'b':
117  ResultChar = 8;
118  break;
119  case 'e':
120  if (Diags)
121  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
122  diag::ext_nonstandard_escape) << "e";
123  ResultChar = 27;
124  break;
125  case 'E':
126  if (Diags)
127  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
128  diag::ext_nonstandard_escape) << "E";
129  ResultChar = 27;
130  break;
131  case 'f':
132  ResultChar = 12;
133  break;
134  case 'n':
135  ResultChar = 10;
136  break;
137  case 'r':
138  ResultChar = 13;
139  break;
140  case 't':
141  ResultChar = 9;
142  break;
143  case 'v':
144  ResultChar = 11;
145  break;
146  case 'x': { // Hex escape.
147  ResultChar = 0;
148  if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
149  Delimited = true;
150  ThisTokBuf++;
151  if (*ThisTokBuf == '}') {
152  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
153  diag::err_delimited_escape_empty);
154  return ResultChar;
155  }
156  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
157  if (Diags)
158  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
159  diag::err_hex_escape_no_digits) << "x";
160  return ResultChar;
161  }
162 
163  // Hex escapes are a maximal series of hex digits.
164  bool Overflow = false;
165  for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
166  if (Delimited && *ThisTokBuf == '}') {
167  ThisTokBuf++;
168  EndDelimiterFound = true;
169  break;
170  }
171  int CharVal = llvm::hexDigitValue(*ThisTokBuf);
172  if (CharVal == -1) {
173  // Non delimited hex escape sequences stop at the first non-hex digit.
174  if (!Delimited)
175  break;
176  HadError = true;
177  if (Diags)
178  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
179  diag::err_delimited_escape_invalid)
180  << StringRef(ThisTokBuf, 1);
181  continue;
182  }
183  // About to shift out a digit?
184  if (ResultChar & 0xF0000000)
185  Overflow = true;
186  ResultChar <<= 4;
187  ResultChar |= CharVal;
188  }
189  // See if any bits will be truncated when evaluated as a character.
190  if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
191  Overflow = true;
192  ResultChar &= ~0U >> (32-CharWidth);
193  }
194 
195  // Check for overflow.
196  if (!HadError && Overflow) { // Too many digits to fit in
197  HadError = true;
198  if (Diags)
199  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
200  diag::err_escape_too_large)
201  << 0;
202  }
203  break;
204  }
205  case '0': case '1': case '2': case '3':
206  case '4': case '5': case '6': case '7': {
207  // Octal escapes.
208  --ThisTokBuf;
209  ResultChar = 0;
210 
211  // Octal escapes are a series of octal digits with maximum length 3.
212  // "\0123" is a two digit sequence equal to "\012" "3".
213  unsigned NumDigits = 0;
214  do {
215  ResultChar <<= 3;
216  ResultChar |= *ThisTokBuf++ - '0';
217  ++NumDigits;
218  } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
219  ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
220 
221  // Check for overflow. Reject '\777', but not L'\777'.
222  if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
223  if (Diags)
224  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
225  diag::err_escape_too_large) << 1;
226  ResultChar &= ~0U >> (32-CharWidth);
227  }
228  break;
229  }
230  case 'o': {
231  bool Overflow = false;
232  if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
233  HadError = true;
234  if (Diags)
235  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
236  diag::err_delimited_escape_missing_brace);
237 
238  break;
239  }
240  ResultChar = 0;
241  Delimited = true;
242  ++ThisTokBuf;
243  if (*ThisTokBuf == '}') {
244  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
245  diag::err_delimited_escape_empty);
246  return ResultChar;
247  }
248 
249  while (ThisTokBuf != ThisTokEnd) {
250  if (*ThisTokBuf == '}') {
251  EndDelimiterFound = true;
252  ThisTokBuf++;
253  break;
254  }
255  if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
256  HadError = true;
257  if (Diags)
258  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
259  diag::err_delimited_escape_invalid)
260  << StringRef(ThisTokBuf, 1);
261  ThisTokBuf++;
262  continue;
263  }
264  if (ResultChar & 0x020000000)
265  Overflow = true;
266 
267  ResultChar <<= 3;
268  ResultChar |= *ThisTokBuf++ - '0';
269  }
270  // Check for overflow. Reject '\777', but not L'\777'.
271  if (!HadError &&
272  (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
273  HadError = true;
274  if (Diags)
275  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
276  diag::err_escape_too_large)
277  << 1;
278  ResultChar &= ~0U >> (32 - CharWidth);
279  }
280  break;
281  }
282  // Otherwise, these are not valid escapes.
283  case '(': case '{': case '[': case '%':
284  // GCC accepts these as extensions. We warn about them as such though.
285  if (Diags)
286  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
287  diag::ext_nonstandard_escape)
288  << std::string(1, ResultChar);
289  break;
290  default:
291  if (!Diags)
292  break;
293 
294  if (isPrintable(ResultChar))
295  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
296  diag::ext_unknown_escape)
297  << std::string(1, ResultChar);
298  else
299  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
300  diag::ext_unknown_escape)
301  << "x" + llvm::utohexstr(ResultChar);
302  break;
303  }
304 
305  if (Delimited && Diags) {
306  if (!EndDelimiterFound)
307  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
308  diag::err_expected)
309  << tok::r_brace;
310  else if (!HadError) {
311  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
312  diag::ext_delimited_escape_sequence);
313  }
314  }
315 
316  return ResultChar;
317 }
318 
319 static void appendCodePoint(unsigned Codepoint,
321  char ResultBuf[4];
322  char *ResultPtr = ResultBuf;
323  bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
324  (void)Res;
325  assert(Res && "Unexpected conversion failure");
326  Str.append(ResultBuf, ResultPtr);
327 }
328 
329 void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
330  for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
331  if (*I != '\\') {
332  Buf.push_back(*I);
333  continue;
334  }
335 
336  ++I;
337  char Kind = *I;
338  ++I;
339 
340  assert(Kind == 'u' || Kind == 'U');
341  uint32_t CodePoint = 0;
342 
343  if (Kind == 'u' && *I == '{') {
344  for (++I; *I != '}'; ++I) {
345  unsigned Value = llvm::hexDigitValue(*I);
346  assert(Value != -1U);
347  CodePoint <<= 4;
348  CodePoint += Value;
349  }
350  appendCodePoint(CodePoint, Buf);
351  continue;
352  }
353 
354  unsigned NumHexDigits;
355  if (Kind == 'u')
356  NumHexDigits = 4;
357  else
358  NumHexDigits = 8;
359 
360  assert(I + NumHexDigits <= E);
361 
362  for (; NumHexDigits != 0; ++I, --NumHexDigits) {
363  unsigned Value = llvm::hexDigitValue(*I);
364  assert(Value != -1U);
365 
366  CodePoint <<= 4;
367  CodePoint += Value;
368  }
369 
370  appendCodePoint(CodePoint, Buf);
371  --I;
372  }
373 }
374 
375 /// ProcessUCNEscape - Read the Universal Character Name, check constraints and
376 /// return the UTF32.
377 static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
378  const char *ThisTokEnd,
379  uint32_t &UcnVal, unsigned short &UcnLen,
380  FullSourceLoc Loc, DiagnosticsEngine *Diags,
381  const LangOptions &Features,
382  bool in_char_string_literal = false) {
383  const char *UcnBegin = ThisTokBuf;
384 
385  // Skip the '\u' char's.
386  ThisTokBuf += 2;
387 
388  bool Delimited = false;
389  bool EndDelimiterFound = false;
390  bool HasError = false;
391 
392  if (UcnBegin[1] == 'u' && in_char_string_literal &&
393  ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
394  Delimited = true;
395  ThisTokBuf++;
396  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
397  if (Diags)
398  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
399  diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
400  return false;
401  }
402  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
403 
404  bool Overflow = false;
405  unsigned short Count = 0;
406  for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
407  ++ThisTokBuf) {
408  if (Delimited && *ThisTokBuf == '}') {
409  ++ThisTokBuf;
410  EndDelimiterFound = true;
411  break;
412  }
413  int CharVal = llvm::hexDigitValue(*ThisTokBuf);
414  if (CharVal == -1) {
415  HasError = true;
416  if (!Delimited)
417  break;
418  if (Diags) {
419  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
420  diag::err_delimited_escape_invalid)
421  << StringRef(ThisTokBuf, 1);
422  }
423  Count++;
424  continue;
425  }
426  if (UcnVal & 0xF0000000) {
427  Overflow = true;
428  continue;
429  }
430  UcnVal <<= 4;
431  UcnVal |= CharVal;
432  Count++;
433  }
434 
435  if (Overflow) {
436  if (Diags)
437  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
438  diag::err_escape_too_large)
439  << 0;
440  return false;
441  }
442 
443  if (Delimited && !EndDelimiterFound) {
444  if (Diags) {
445  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
446  diag::err_expected)
447  << tok::r_brace;
448  }
449  return false;
450  }
451 
452  // If we didn't consume the proper number of digits, there is a problem.
453  if (Count == 0 || (!Delimited && Count != UcnLen)) {
454  if (Diags)
455  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
456  Delimited ? diag::err_delimited_escape_empty
457  : diag::err_ucn_escape_incomplete);
458  return false;
459  }
460 
461  if (HasError)
462  return false;
463 
464  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
465  if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
466  UcnVal > 0x10FFFF) { // maximum legal UTF32 value
467  if (Diags)
468  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
469  diag::err_ucn_escape_invalid);
470  return false;
471  }
472 
473  // C++11 allows UCNs that refer to control characters and basic source
474  // characters inside character and string literals
475  if (UcnVal < 0xa0 &&
476  (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
477  bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
478  if (Diags) {
479  char BasicSCSChar = UcnVal;
480  if (UcnVal >= 0x20 && UcnVal < 0x7f)
481  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
482  IsError ? diag::err_ucn_escape_basic_scs :
483  diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
484  << StringRef(&BasicSCSChar, 1);
485  else
486  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
487  IsError ? diag::err_ucn_control_character :
488  diag::warn_cxx98_compat_literal_ucn_control_character);
489  }
490  if (IsError)
491  return false;
492  }
493 
494  if (!Features.CPlusPlus && !Features.C99 && Diags)
495  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
496  diag::warn_ucn_not_valid_in_c89_literal);
497 
498  if (Delimited && Diags)
499  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
500  diag::ext_delimited_escape_sequence);
501 
502  return true;
503 }
504 
505 /// MeasureUCNEscape - Determine the number of bytes within the resulting string
506 /// which this UCN will occupy.
507 static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
508  const char *ThisTokEnd, unsigned CharByteWidth,
509  const LangOptions &Features, bool &HadError) {
510  // UTF-32: 4 bytes per escape.
511  if (CharByteWidth == 4)
512  return 4;
513 
514  uint32_t UcnVal = 0;
515  unsigned short UcnLen = 0;
516  FullSourceLoc Loc;
517 
518  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
519  UcnLen, Loc, nullptr, Features, true)) {
520  HadError = true;
521  return 0;
522  }
523 
524  // UTF-16: 2 bytes for BMP, 4 bytes otherwise.
525  if (CharByteWidth == 2)
526  return UcnVal <= 0xFFFF ? 2 : 4;
527 
528  // UTF-8.
529  if (UcnVal < 0x80)
530  return 1;
531  if (UcnVal < 0x800)
532  return 2;
533  if (UcnVal < 0x10000)
534  return 3;
535  return 4;
536 }
537 
538 /// EncodeUCNEscape - Read the Universal Character Name, check constraints and
539 /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
540 /// StringLiteralParser. When we decide to implement UCN's for identifiers,
541 /// we will likely rework our support for UCN's.
542 static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
543  const char *ThisTokEnd,
544  char *&ResultBuf, bool &HadError,
545  FullSourceLoc Loc, unsigned CharByteWidth,
546  DiagnosticsEngine *Diags,
547  const LangOptions &Features) {
548  typedef uint32_t UTF32;
549  UTF32 UcnVal = 0;
550  unsigned short UcnLen = 0;
551  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
552  Loc, Diags, Features, true)) {
553  HadError = true;
554  return;
555  }
556 
557  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&
558  "only character widths of 1, 2, or 4 bytes supported");
559 
560  (void)UcnLen;
561  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
562 
563  if (CharByteWidth == 4) {
564  // FIXME: Make the type of the result buffer correct instead of
565  // using reinterpret_cast.
566  llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
567  *ResultPtr = UcnVal;
568  ResultBuf += 4;
569  return;
570  }
571 
572  if (CharByteWidth == 2) {
573  // FIXME: Make the type of the result buffer correct instead of
574  // using reinterpret_cast.
575  llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);
576 
577  if (UcnVal <= (UTF32)0xFFFF) {
578  *ResultPtr = UcnVal;
579  ResultBuf += 2;
580  return;
581  }
582 
583  // Convert to UTF16.
584  UcnVal -= 0x10000;
585  *ResultPtr = 0xD800 + (UcnVal >> 10);
586  *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
587  ResultBuf += 4;
588  return;
589  }
590 
591  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
592 
593  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
594  // The conversion below was inspired by:
595  // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
596  // First, we determine how many bytes the result will require.
597  typedef uint8_t UTF8;
598 
599  unsigned short bytesToWrite = 0;
600  if (UcnVal < (UTF32)0x80)
601  bytesToWrite = 1;
602  else if (UcnVal < (UTF32)0x800)
603  bytesToWrite = 2;
604  else if (UcnVal < (UTF32)0x10000)
605  bytesToWrite = 3;
606  else
607  bytesToWrite = 4;
608 
609  const unsigned byteMask = 0xBF;
610  const unsigned byteMark = 0x80;
611 
612  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
613  // into the first byte, depending on how many bytes follow.
614  static const UTF8 firstByteMark[5] = {
615  0x00, 0x00, 0xC0, 0xE0, 0xF0
616  };
617  // Finally, we write the bytes into ResultBuf.
618  ResultBuf += bytesToWrite;
619  switch (bytesToWrite) { // note: everything falls through.
620  case 4:
621  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
622  LLVM_FALLTHROUGH;
623  case 3:
624  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
625  LLVM_FALLTHROUGH;
626  case 2:
627  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
628  LLVM_FALLTHROUGH;
629  case 1:
630  *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
631  }
632  // Update the buffer.
633  ResultBuf += bytesToWrite;
634 }
635 
636 /// integer-constant: [C99 6.4.4.1]
637 /// decimal-constant integer-suffix
638 /// octal-constant integer-suffix
639 /// hexadecimal-constant integer-suffix
640 /// binary-literal integer-suffix [GNU, C++1y]
641 /// user-defined-integer-literal: [C++11 lex.ext]
642 /// decimal-literal ud-suffix
643 /// octal-literal ud-suffix
644 /// hexadecimal-literal ud-suffix
645 /// binary-literal ud-suffix [GNU, C++1y]
646 /// decimal-constant:
647 /// nonzero-digit
648 /// decimal-constant digit
649 /// octal-constant:
650 /// 0
651 /// octal-constant octal-digit
652 /// hexadecimal-constant:
653 /// hexadecimal-prefix hexadecimal-digit
654 /// hexadecimal-constant hexadecimal-digit
655 /// hexadecimal-prefix: one of
656 /// 0x 0X
657 /// binary-literal:
658 /// 0b binary-digit
659 /// 0B binary-digit
660 /// binary-literal binary-digit
661 /// integer-suffix:
662 /// unsigned-suffix [long-suffix]
663 /// unsigned-suffix [long-long-suffix]
664 /// long-suffix [unsigned-suffix]
665 /// long-long-suffix [unsigned-sufix]
666 /// nonzero-digit:
667 /// 1 2 3 4 5 6 7 8 9
668 /// octal-digit:
669 /// 0 1 2 3 4 5 6 7
670 /// hexadecimal-digit:
671 /// 0 1 2 3 4 5 6 7 8 9
672 /// a b c d e f
673 /// A B C D E F
674 /// binary-digit:
675 /// 0
676 /// 1
677 /// unsigned-suffix: one of
678 /// u U
679 /// long-suffix: one of
680 /// l L
681 /// long-long-suffix: one of
682 /// ll LL
683 ///
684 /// floating-constant: [C99 6.4.4.2]
685 /// TODO: add rules...
686 ///
688  SourceLocation TokLoc,
689  const SourceManager &SM,
690  const LangOptions &LangOpts,
691  const TargetInfo &Target,
692  DiagnosticsEngine &Diags)
693  : SM(SM), LangOpts(LangOpts), Diags(Diags),
694  ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
695 
696  // This routine assumes that the range begin/end matches the regex for integer
697  // and FP constants (specifically, the 'pp-number' regex), and assumes that
698  // the byte at "*end" is both valid and not part of the regex. Because of
699  // this, it doesn't have to check for 'overscan' in various places.
700  assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
701 
702  s = DigitsBegin = ThisTokBegin;
703  saw_exponent = false;
704  saw_period = false;
705  saw_ud_suffix = false;
706  saw_fixed_point_suffix = false;
707  isLong = false;
708  isUnsigned = false;
709  isLongLong = false;
710  isSizeT = false;
711  isHalf = false;
712  isFloat = false;
713  isImaginary = false;
714  isFloat16 = false;
715  isFloat128 = false;
716  MicrosoftInteger = 0;
717  isFract = false;
718  isAccum = false;
719  hadError = false;
720 
721  if (*s == '0') { // parse radix
722  ParseNumberStartingWithZero(TokLoc);
723  if (hadError)
724  return;
725  } else { // the first digit is non-zero
726  radix = 10;
727  s = SkipDigits(s);
728  if (s == ThisTokEnd) {
729  // Done.
730  } else {
731  ParseDecimalOrOctalCommon(TokLoc);
732  if (hadError)
733  return;
734  }
735  }
736 
737  SuffixBegin = s;
738  checkSeparator(TokLoc, s, CSK_AfterDigits);
739 
740  // Initial scan to lookahead for fixed point suffix.
741  if (LangOpts.FixedPoint) {
742  for (const char *c = s; c != ThisTokEnd; ++c) {
743  if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {
744  saw_fixed_point_suffix = true;
745  break;
746  }
747  }
748  }
749 
750  // Parse the suffix. At this point we can classify whether we have an FP or
751  // integer constant.
752  bool isFixedPointConstant = isFixedPointLiteral();
753  bool isFPConstant = isFloatingLiteral();
754  bool HasSize = false;
755 
756  // Loop over all of the characters of the suffix. If we see something bad,
757  // we break out of the loop.
758  for (; s != ThisTokEnd; ++s) {
759  switch (*s) {
760  case 'R':
761  case 'r':
762  if (!LangOpts.FixedPoint)
763  break;
764  if (isFract || isAccum) break;
765  if (!(saw_period || saw_exponent)) break;
766  isFract = true;
767  continue;
768  case 'K':
769  case 'k':
770  if (!LangOpts.FixedPoint)
771  break;
772  if (isFract || isAccum) break;
773  if (!(saw_period || saw_exponent)) break;
774  isAccum = true;
775  continue;
776  case 'h': // FP Suffix for "half".
777  case 'H':
778  // OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
779  if (!(LangOpts.Half || LangOpts.FixedPoint))
780  break;
781  if (isIntegerLiteral()) break; // Error for integer constant.
782  if (HasSize)
783  break;
784  HasSize = true;
785  isHalf = true;
786  continue; // Success.
787  case 'f': // FP Suffix for "float"
788  case 'F':
789  if (!isFPConstant) break; // Error for integer constant.
790  if (HasSize)
791  break;
792  HasSize = true;
793 
794  // CUDA host and device may have different _Float16 support, therefore
795  // allows f16 literals to avoid false alarm.
796  // ToDo: more precise check for CUDA.
797  if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd &&
798  s[1] == '1' && s[2] == '6') {
799  s += 2; // success, eat up 2 characters.
800  isFloat16 = true;
801  continue;
802  }
803 
804  isFloat = true;
805  continue; // Success.
806  case 'q': // FP Suffix for "__float128"
807  case 'Q':
808  if (!isFPConstant) break; // Error for integer constant.
809  if (HasSize)
810  break;
811  HasSize = true;
812  isFloat128 = true;
813  continue; // Success.
814  case 'u':
815  case 'U':
816  if (isFPConstant) break; // Error for floating constant.
817  if (isUnsigned) break; // Cannot be repeated.
818  isUnsigned = true;
819  continue; // Success.
820  case 'l':
821  case 'L':
822  if (HasSize)
823  break;
824  HasSize = true;
825 
826  // Check for long long. The L's need to be adjacent and the same case.
827  if (s[1] == s[0]) {
828  assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
829  if (isFPConstant) break; // long long invalid for floats.
830  isLongLong = true;
831  ++s; // Eat both of them.
832  } else {
833  isLong = true;
834  }
835  continue; // Success.
836  case 'z':
837  case 'Z':
838  if (isFPConstant)
839  break; // Invalid for floats.
840  if (HasSize)
841  break;
842  HasSize = true;
843  isSizeT = true;
844  continue;
845  case 'i':
846  case 'I':
847  if (LangOpts.MicrosoftExt && !isFPConstant) {
848  // Allow i8, i16, i32, and i64. First, look ahead and check if
849  // suffixes are Microsoft integers and not the imaginary unit.
850  uint8_t Bits = 0;
851  size_t ToSkip = 0;
852  switch (s[1]) {
853  case '8': // i8 suffix
854  Bits = 8;
855  ToSkip = 2;
856  break;
857  case '1':
858  if (s[2] == '6') { // i16 suffix
859  Bits = 16;
860  ToSkip = 3;
861  }
862  break;
863  case '3':
864  if (s[2] == '2') { // i32 suffix
865  Bits = 32;
866  ToSkip = 3;
867  }
868  break;
869  case '6':
870  if (s[2] == '4') { // i64 suffix
871  Bits = 64;
872  ToSkip = 3;
873  }
874  break;
875  default:
876  break;
877  }
878  if (Bits) {
879  if (HasSize)
880  break;
881  HasSize = true;
882  MicrosoftInteger = Bits;
883  s += ToSkip;
884  assert(s <= ThisTokEnd && "didn't maximally munch?");
885  break;
886  }
887  }
888  LLVM_FALLTHROUGH;
889  case 'j':
890  case 'J':
891  if (isImaginary) break; // Cannot be repeated.
892  isImaginary = true;
893  continue; // Success.
894  }
895  // If we reached here, there was an error or a ud-suffix.
896  break;
897  }
898 
899  // "i", "if", and "il" are user-defined suffixes in C++1y.
900  if (s != ThisTokEnd || isImaginary) {
901  // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
902  expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
903  if (isValidUDSuffix(LangOpts, UDSuffixBuf)) {
904  if (!isImaginary) {
905  // Any suffix pieces we might have parsed are actually part of the
906  // ud-suffix.
907  isLong = false;
908  isUnsigned = false;
909  isLongLong = false;
910  isSizeT = false;
911  isFloat = false;
912  isFloat16 = false;
913  isHalf = false;
914  isImaginary = false;
915  MicrosoftInteger = 0;
916  saw_fixed_point_suffix = false;
917  isFract = false;
918  isAccum = false;
919  }
920 
921  saw_ud_suffix = true;
922  return;
923  }
924 
925  if (s != ThisTokEnd) {
926  // Report an error if there are any.
928  TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
929  diag::err_invalid_suffix_constant)
930  << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
931  << (isFixedPointConstant ? 2 : isFPConstant);
932  hadError = true;
933  }
934  }
935 
936  if (!hadError && saw_fixed_point_suffix) {
937  assert(isFract || isAccum);
938  }
939 }
940 
941 /// ParseDecimalOrOctalCommon - This method is called for decimal or octal
942 /// numbers. It issues an error for illegal digits, and handles floating point
943 /// parsing. If it detects a floating point number, the radix is set to 10.
944 void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
945  assert((radix == 8 || radix == 10) && "Unexpected radix");
946 
947  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
948  // the code is using an incorrect base.
949  if (isHexDigit(*s) && *s != 'e' && *s != 'E' &&
950  !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
951  Diags.Report(
952  Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts),
953  diag::err_invalid_digit)
954  << StringRef(s, 1) << (radix == 8 ? 1 : 0);
955  hadError = true;
956  return;
957  }
958 
959  if (*s == '.') {
960  checkSeparator(TokLoc, s, CSK_AfterDigits);
961  s++;
962  radix = 10;
963  saw_period = true;
964  checkSeparator(TokLoc, s, CSK_BeforeDigits);
965  s = SkipDigits(s); // Skip suffix.
966  }
967  if (*s == 'e' || *s == 'E') { // exponent
968  checkSeparator(TokLoc, s, CSK_AfterDigits);
969  const char *Exponent = s;
970  s++;
971  radix = 10;
972  saw_exponent = true;
973  if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
974  const char *first_non_digit = SkipDigits(s);
975  if (containsDigits(s, first_non_digit)) {
976  checkSeparator(TokLoc, s, CSK_BeforeDigits);
977  s = first_non_digit;
978  } else {
979  if (!hadError) {
981  TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
982  diag::err_exponent_has_no_digits);
983  hadError = true;
984  }
985  return;
986  }
987  }
988 }
989 
990 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
991 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
992 /// treat it as an invalid suffix.
994  StringRef Suffix) {
995  if (!LangOpts.CPlusPlus11 || Suffix.empty())
996  return false;
997 
998  // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
999  if (Suffix[0] == '_')
1000  return true;
1001 
1002  // In C++11, there are no library suffixes.
1003  if (!LangOpts.CPlusPlus14)
1004  return false;
1005 
1006  // In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library.
1007  // Per tweaked N3660, "il", "i", and "if" are also used in the library.
1008  // In C++2a "d" and "y" are used in the library.
1009  return llvm::StringSwitch<bool>(Suffix)
1010  .Cases("h", "min", "s", true)
1011  .Cases("ms", "us", "ns", true)
1012  .Cases("il", "i", "if", true)
1013  .Cases("d", "y", LangOpts.CPlusPlus20)
1014  .Default(false);
1015 }
1016 
1017 void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
1018  const char *Pos,
1019  CheckSeparatorKind IsAfterDigits) {
1020  if (IsAfterDigits == CSK_AfterDigits) {
1021  if (Pos == ThisTokBegin)
1022  return;
1023  --Pos;
1024  } else if (Pos == ThisTokEnd)
1025  return;
1026 
1027  if (isDigitSeparator(*Pos)) {
1028  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM,
1029  LangOpts),
1030  diag::err_digit_separator_not_between_digits)
1031  << IsAfterDigits;
1032  hadError = true;
1033  }
1034 }
1035 
1036 /// ParseNumberStartingWithZero - This method is called when the first character
1037 /// of the number is found to be a zero. This means it is either an octal
1038 /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
1039 /// a floating point number (01239.123e4). Eat the prefix, determining the
1040 /// radix etc.
1041 void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
1042  assert(s[0] == '0' && "Invalid method call");
1043  s++;
1044 
1045  int c1 = s[0];
1046 
1047  // Handle a hex number like 0x1234.
1048  if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
1049  s++;
1050  assert(s < ThisTokEnd && "didn't maximally munch?");
1051  radix = 16;
1052  DigitsBegin = s;
1053  s = SkipHexDigits(s);
1054  bool HasSignificandDigits = containsDigits(DigitsBegin, s);
1055  if (s == ThisTokEnd) {
1056  // Done.
1057  } else if (*s == '.') {
1058  s++;
1059  saw_period = true;
1060  const char *floatDigitsBegin = s;
1061  s = SkipHexDigits(s);
1062  if (containsDigits(floatDigitsBegin, s))
1063  HasSignificandDigits = true;
1064  if (HasSignificandDigits)
1065  checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
1066  }
1067 
1068  if (!HasSignificandDigits) {
1069  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1070  LangOpts),
1071  diag::err_hex_constant_requires)
1072  << LangOpts.CPlusPlus << 1;
1073  hadError = true;
1074  return;
1075  }
1076 
1077  // A binary exponent can appear with or with a '.'. If dotted, the
1078  // binary exponent is required.
1079  if (*s == 'p' || *s == 'P') {
1080  checkSeparator(TokLoc, s, CSK_AfterDigits);
1081  const char *Exponent = s;
1082  s++;
1083  saw_exponent = true;
1084  if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
1085  const char *first_non_digit = SkipDigits(s);
1086  if (!containsDigits(s, first_non_digit)) {
1087  if (!hadError) {
1089  TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1090  diag::err_exponent_has_no_digits);
1091  hadError = true;
1092  }
1093  return;
1094  }
1095  checkSeparator(TokLoc, s, CSK_BeforeDigits);
1096  s = first_non_digit;
1097 
1098  if (!LangOpts.HexFloats)
1099  Diags.Report(TokLoc, LangOpts.CPlusPlus
1100  ? diag::ext_hex_literal_invalid
1101  : diag::ext_hex_constant_invalid);
1102  else if (LangOpts.CPlusPlus17)
1103  Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
1104  } else if (saw_period) {
1105  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1106  LangOpts),
1107  diag::err_hex_constant_requires)
1108  << LangOpts.CPlusPlus << 0;
1109  hadError = true;
1110  }
1111  return;
1112  }
1113 
1114  // Handle simple binary numbers 0b01010
1115  if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
1116  // 0b101010 is a C++1y / GCC extension.
1117  Diags.Report(TokLoc, LangOpts.CPlusPlus14
1118  ? diag::warn_cxx11_compat_binary_literal
1119  : LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14
1120  : diag::ext_binary_literal);
1121  ++s;
1122  assert(s < ThisTokEnd && "didn't maximally munch?");
1123  radix = 2;
1124  DigitsBegin = s;
1125  s = SkipBinaryDigits(s);
1126  if (s == ThisTokEnd) {
1127  // Done.
1128  } else if (isHexDigit(*s) &&
1129  !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1130  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1131  LangOpts),
1132  diag::err_invalid_digit)
1133  << StringRef(s, 1) << 2;
1134  hadError = true;
1135  }
1136  // Other suffixes will be diagnosed by the caller.
1137  return;
1138  }
1139 
1140  // For now, the radix is set to 8. If we discover that we have a
1141  // floating point constant, the radix will change to 10. Octal floating
1142  // point constants are not permitted (only decimal and hexadecimal).
1143  radix = 8;
1144  DigitsBegin = s;
1145  s = SkipOctalDigits(s);
1146  if (s == ThisTokEnd)
1147  return; // Done, simple octal number like 01234
1148 
1149  // If we have some other non-octal digit that *is* a decimal digit, see if
1150  // this is part of a floating point number like 094.123 or 09e1.
1151  if (isDigit(*s)) {
1152  const char *EndDecimal = SkipDigits(s);
1153  if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
1154  s = EndDecimal;
1155  radix = 10;
1156  }
1157  }
1158 
1159  ParseDecimalOrOctalCommon(TokLoc);
1160 }
1161 
1162 static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
1163  switch (Radix) {
1164  case 2:
1165  return NumDigits <= 64;
1166  case 8:
1167  return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
1168  case 10:
1169  return NumDigits <= 19; // floor(log10(2^64))
1170  case 16:
1171  return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
1172  default:
1173  llvm_unreachable("impossible Radix");
1174  }
1175 }
1176 
1177 /// GetIntegerValue - Convert this numeric literal value to an APInt that
1178 /// matches Val's input width. If there is an overflow, set Val to the low bits
1179 /// of the result and return true. Otherwise, return false.
1181  // Fast path: Compute a conservative bound on the maximum number of
1182  // bits per digit in this radix. If we can't possibly overflow a
1183  // uint64 based on that bound then do the simple conversion to
1184  // integer. This avoids the expensive overflow checking below, and
1185  // handles the common cases that matter (small decimal integers and
1186  // hex/octal values which don't overflow).
1187  const unsigned NumDigits = SuffixBegin - DigitsBegin;
1188  if (alwaysFitsInto64Bits(radix, NumDigits)) {
1189  uint64_t N = 0;
1190  for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
1191  if (!isDigitSeparator(*Ptr))
1192  N = N * radix + llvm::hexDigitValue(*Ptr);
1193 
1194  // This will truncate the value to Val's input width. Simply check
1195  // for overflow by comparing.
1196  Val = N;
1197  return Val.getZExtValue() != N;
1198  }
1199 
1200  Val = 0;
1201  const char *Ptr = DigitsBegin;
1202 
1203  llvm::APInt RadixVal(Val.getBitWidth(), radix);
1204  llvm::APInt CharVal(Val.getBitWidth(), 0);
1205  llvm::APInt OldVal = Val;
1206 
1207  bool OverflowOccurred = false;
1208  while (Ptr < SuffixBegin) {
1209  if (isDigitSeparator(*Ptr)) {
1210  ++Ptr;
1211  continue;
1212  }
1213 
1214  unsigned C = llvm::hexDigitValue(*Ptr++);
1215 
1216  // If this letter is out of bound for this radix, reject it.
1217  assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1218 
1219  CharVal = C;
1220 
1221  // Add the digit to the value in the appropriate radix. If adding in digits
1222  // made the value smaller, then this overflowed.
1223  OldVal = Val;
1224 
1225  // Multiply by radix, did overflow occur on the multiply?
1226  Val *= RadixVal;
1227  OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
1228 
1229  // Add value, did overflow occur on the value?
1230  // (a + b) ult b <=> overflow
1231  Val += CharVal;
1232  OverflowOccurred |= Val.ult(CharVal);
1233  }
1234  return OverflowOccurred;
1235 }
1236 
1237 llvm::APFloat::opStatus
1238 NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
1239  using llvm::APFloat;
1240 
1241  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
1242 
1243  llvm::SmallString<16> Buffer;
1244  StringRef Str(ThisTokBegin, n);
1245  if (Str.contains('\'')) {
1246  Buffer.reserve(n);
1247  std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
1248  &isDigitSeparator);
1249  Str = Buffer;
1250  }
1251 
1252  auto StatusOrErr =
1253  Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
1254  assert(StatusOrErr && "Invalid floating point representation");
1255  return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr
1256  : APFloat::opInvalidOp;
1257 }
1258 
1259 static inline bool IsExponentPart(char c) {
1260  return c == 'p' || c == 'P' || c == 'e' || c == 'E';
1261 }
1262 
1264  assert(radix == 16 || radix == 10);
1265 
1266  // Find how many digits are needed to store the whole literal.
1267  unsigned NumDigits = SuffixBegin - DigitsBegin;
1268  if (saw_period) --NumDigits;
1269 
1270  // Initial scan of the exponent if it exists
1271  bool ExpOverflowOccurred = false;
1272  bool NegativeExponent = false;
1273  const char *ExponentBegin;
1274  uint64_t Exponent = 0;
1275  int64_t BaseShift = 0;
1276  if (saw_exponent) {
1277  const char *Ptr = DigitsBegin;
1278 
1279  while (!IsExponentPart(*Ptr)) ++Ptr;
1280  ExponentBegin = Ptr;
1281  ++Ptr;
1282  NegativeExponent = *Ptr == '-';
1283  if (NegativeExponent) ++Ptr;
1284 
1285  unsigned NumExpDigits = SuffixBegin - Ptr;
1286  if (alwaysFitsInto64Bits(radix, NumExpDigits)) {
1287  llvm::StringRef ExpStr(Ptr, NumExpDigits);
1288  llvm::APInt ExpInt(/*numBits=*/64, ExpStr, /*radix=*/10);
1289  Exponent = ExpInt.getZExtValue();
1290  } else {
1291  ExpOverflowOccurred = true;
1292  }
1293 
1294  if (NegativeExponent) BaseShift -= Exponent;
1295  else BaseShift += Exponent;
1296  }
1297 
1298  // Number of bits needed for decimal literal is
1299  // ceil(NumDigits * log2(10)) Integral part
1300  // + Scale Fractional part
1301  // + ceil(Exponent * log2(10)) Exponent
1302  // --------------------------------------------------
1303  // ceil((NumDigits + Exponent) * log2(10)) + Scale
1304  //
1305  // But for simplicity in handling integers, we can round up log2(10) to 4,
1306  // making:
1307  // 4 * (NumDigits + Exponent) + Scale
1308  //
1309  // Number of digits needed for hexadecimal literal is
1310  // 4 * NumDigits Integral part
1311  // + Scale Fractional part
1312  // + Exponent Exponent
1313  // --------------------------------------------------
1314  // (4 * NumDigits) + Scale + Exponent
1315  uint64_t NumBitsNeeded;
1316  if (radix == 10)
1317  NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
1318  else
1319  NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
1320 
1321  if (NumBitsNeeded > std::numeric_limits<unsigned>::max())
1322  ExpOverflowOccurred = true;
1323  llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /*isSigned=*/false);
1324 
1325  bool FoundDecimal = false;
1326 
1327  int64_t FractBaseShift = 0;
1328  const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
1329  for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
1330  if (*Ptr == '.') {
1331  FoundDecimal = true;
1332  continue;
1333  }
1334 
1335  // Normal reading of an integer
1336  unsigned C = llvm::hexDigitValue(*Ptr);
1337  assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1338 
1339  Val *= radix;
1340  Val += C;
1341 
1342  if (FoundDecimal)
1343  // Keep track of how much we will need to adjust this value by from the
1344  // number of digits past the radix point.
1345  --FractBaseShift;
1346  }
1347 
1348  // For a radix of 16, we will be multiplying by 2 instead of 16.
1349  if (radix == 16) FractBaseShift *= 4;
1350  BaseShift += FractBaseShift;
1351 
1352  Val <<= Scale;
1353 
1354  uint64_t Base = (radix == 16) ? 2 : 10;
1355  if (BaseShift > 0) {
1356  for (int64_t i = 0; i < BaseShift; ++i) {
1357  Val *= Base;
1358  }
1359  } else if (BaseShift < 0) {
1360  for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
1361  Val = Val.udiv(Base);
1362  }
1363 
1364  bool IntOverflowOccurred = false;
1365  auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
1366  if (Val.getBitWidth() > StoreVal.getBitWidth()) {
1367  IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth()));
1368  StoreVal = Val.trunc(StoreVal.getBitWidth());
1369  } else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
1370  IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
1371  StoreVal = Val.zext(StoreVal.getBitWidth());
1372  } else {
1373  StoreVal = Val;
1374  }
1375 
1376  return IntOverflowOccurred || ExpOverflowOccurred;
1377 }
1378 
1379 /// \verbatim
1380 /// user-defined-character-literal: [C++11 lex.ext]
1381 /// character-literal ud-suffix
1382 /// ud-suffix:
1383 /// identifier
1384 /// character-literal: [C++11 lex.ccon]
1385 /// ' c-char-sequence '
1386 /// u' c-char-sequence '
1387 /// U' c-char-sequence '
1388 /// L' c-char-sequence '
1389 /// u8' c-char-sequence ' [C++1z lex.ccon]
1390 /// c-char-sequence:
1391 /// c-char
1392 /// c-char-sequence c-char
1393 /// c-char:
1394 /// any member of the source character set except the single-quote ',
1395 /// backslash \, or new-line character
1396 /// escape-sequence
1397 /// universal-character-name
1398 /// escape-sequence:
1399 /// simple-escape-sequence
1400 /// octal-escape-sequence
1401 /// hexadecimal-escape-sequence
1402 /// simple-escape-sequence:
1403 /// one of \' \" \? \\ \a \b \f \n \r \t \v
1404 /// octal-escape-sequence:
1405 /// \ octal-digit
1406 /// \ octal-digit octal-digit
1407 /// \ octal-digit octal-digit octal-digit
1408 /// hexadecimal-escape-sequence:
1409 /// \x hexadecimal-digit
1410 /// hexadecimal-escape-sequence hexadecimal-digit
1411 /// universal-character-name: [C++11 lex.charset]
1412 /// \u hex-quad
1413 /// \U hex-quad hex-quad
1414 /// hex-quad:
1415 /// hex-digit hex-digit hex-digit hex-digit
1416 /// \endverbatim
1417 ///
1418 CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
1419  SourceLocation Loc, Preprocessor &PP,
1420  tok::TokenKind kind) {
1421  // At this point we know that the character matches the regex "(L|u|U)?'.*'".
1422  HadError = false;
1423 
1424  Kind = kind;
1425 
1426  const char *TokBegin = begin;
1427 
1428  // Skip over wide character determinant.
1429  if (Kind != tok::char_constant)
1430  ++begin;
1431  if (Kind == tok::utf8_char_constant)
1432  ++begin;
1433 
1434  // Skip over the entry quote.
1435  assert(begin[0] == '\'' && "Invalid token lexed");
1436  ++begin;
1437 
1438  // Remove an optional ud-suffix.
1439  if (end[-1] != '\'') {
1440  const char *UDSuffixEnd = end;
1441  do {
1442  --end;
1443  } while (end[-1] != '\'');
1444  // FIXME: Don't bother with this if !tok.hasUCN().
1445  expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
1446  UDSuffixOffset = end - TokBegin;
1447  }
1448 
1449  // Trim the ending quote.
1450  assert(end != begin && "Invalid token lexed");
1451  --end;
1452 
1453  // FIXME: The "Value" is an uint64_t so we can handle char literals of
1454  // up to 64-bits.
1455  // FIXME: This extensively assumes that 'char' is 8-bits.
1456  assert(PP.getTargetInfo().getCharWidth() == 8 &&
1457  "Assumes char is 8 bits");
1458  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
1459  (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
1460  "Assumes sizeof(int) on target is <= 64 and a multiple of char");
1461  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
1462  "Assumes sizeof(wchar) on target is <= 64");
1463 
1464  SmallVector<uint32_t, 4> codepoint_buffer;
1465  codepoint_buffer.resize(end - begin);
1466  uint32_t *buffer_begin = &codepoint_buffer.front();
1467  uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
1468 
1469  // Unicode escapes representing characters that cannot be correctly
1470  // represented in a single code unit are disallowed in character literals
1471  // by this implementation.
1472  uint32_t largest_character_for_kind;
1473  if (tok::wide_char_constant == Kind) {
1474  largest_character_for_kind =
1475  0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
1476  } else if (tok::utf8_char_constant == Kind) {
1477  largest_character_for_kind = 0x7F;
1478  } else if (tok::utf16_char_constant == Kind) {
1479  largest_character_for_kind = 0xFFFF;
1480  } else if (tok::utf32_char_constant == Kind) {
1481  largest_character_for_kind = 0x10FFFF;
1482  } else {
1483  largest_character_for_kind = 0x7Fu;
1484  }
1485 
1486  while (begin != end) {
1487  // Is this a span of non-escape characters?
1488  if (begin[0] != '\\') {
1489  char const *start = begin;
1490  do {
1491  ++begin;
1492  } while (begin != end && *begin != '\\');
1493 
1494  char const *tmp_in_start = start;
1495  uint32_t *tmp_out_start = buffer_begin;
1496  llvm::ConversionResult res =
1497  llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
1498  reinterpret_cast<llvm::UTF8 const *>(begin),
1499  &buffer_begin, buffer_end, llvm::strictConversion);
1500  if (res != llvm::conversionOK) {
1501  // If we see bad encoding for unprefixed character literals, warn and
1502  // simply copy the byte values, for compatibility with gcc and
1503  // older versions of clang.
1504  bool NoErrorOnBadEncoding = isAscii();
1505  unsigned Msg = diag::err_bad_character_encoding;
1506  if (NoErrorOnBadEncoding)
1507  Msg = diag::warn_bad_character_encoding;
1508  PP.Diag(Loc, Msg);
1509  if (NoErrorOnBadEncoding) {
1510  start = tmp_in_start;
1511  buffer_begin = tmp_out_start;
1512  for (; start != begin; ++start, ++buffer_begin)
1513  *buffer_begin = static_cast<uint8_t>(*start);
1514  } else {
1515  HadError = true;
1516  }
1517  } else {
1518  for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
1519  if (*tmp_out_start > largest_character_for_kind) {
1520  HadError = true;
1521  PP.Diag(Loc, diag::err_character_too_large);
1522  }
1523  }
1524  }
1525 
1526  continue;
1527  }
1528  // Is this a Universal Character Name escape?
1529  if (begin[1] == 'u' || begin[1] == 'U') {
1530  unsigned short UcnLen = 0;
1531  if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
1532  FullSourceLoc(Loc, PP.getSourceManager()),
1533  &PP.getDiagnostics(), PP.getLangOpts(), true)) {
1534  HadError = true;
1535  } else if (*buffer_begin > largest_character_for_kind) {
1536  HadError = true;
1537  PP.Diag(Loc, diag::err_character_too_large);
1538  }
1539 
1540  ++buffer_begin;
1541  continue;
1542  }
1543  unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
1544  uint64_t result =
1545  ProcessCharEscape(TokBegin, begin, end, HadError,
1546  FullSourceLoc(Loc,PP.getSourceManager()),
1547  CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
1548  *buffer_begin++ = result;
1549  }
1550 
1551  unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
1552 
1553  if (NumCharsSoFar > 1) {
1554  if (isAscii() && NumCharsSoFar == 4)
1555  PP.Diag(Loc, diag::warn_four_char_character_literal);
1556  else if (isAscii())
1557  PP.Diag(Loc, diag::warn_multichar_character_literal);
1558  else {
1559  PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1560  HadError = true;
1561  }
1562  IsMultiChar = true;
1563  } else {
1564  IsMultiChar = false;
1565  }
1566 
1567  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
1568 
1569  // Narrow character literals act as though their value is concatenated
1570  // in this implementation, but warn on overflow.
1571  bool multi_char_too_long = false;
1572  if (isAscii() && isMultiChar()) {
1573  LitVal = 0;
1574  for (size_t i = 0; i < NumCharsSoFar; ++i) {
1575  // check for enough leading zeros to shift into
1576  multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
1577  LitVal <<= 8;
1578  LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1579  }
1580  } else if (NumCharsSoFar > 0) {
1581  // otherwise just take the last character
1582  LitVal = buffer_begin[-1];
1583  }
1584 
1585  if (!HadError && multi_char_too_long) {
1586  PP.Diag(Loc, diag::warn_char_constant_too_large);
1587  }
1588 
1589  // Transfer the value from APInt to uint64_t
1590  Value = LitVal.getZExtValue();
1591 
1592  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
1593  // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
1594  // character constants are not sign extended in the this implementation:
1595  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
1596  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
1597  PP.getLangOpts().CharIsSigned)
1598  Value = (signed char)Value;
1599 }
1600 
1601 /// \verbatim
1602 /// string-literal: [C++0x lex.string]
1603 /// encoding-prefix " [s-char-sequence] "
1604 /// encoding-prefix R raw-string
1605 /// encoding-prefix:
1606 /// u8
1607 /// u
1608 /// U
1609 /// L
1610 /// s-char-sequence:
1611 /// s-char
1612 /// s-char-sequence s-char
1613 /// s-char:
1614 /// any member of the source character set except the double-quote ",
1615 /// backslash \, or new-line character
1616 /// escape-sequence
1617 /// universal-character-name
1618 /// raw-string:
1619 /// " d-char-sequence ( r-char-sequence ) d-char-sequence "
1620 /// r-char-sequence:
1621 /// r-char
1622 /// r-char-sequence r-char
1623 /// r-char:
1624 /// any member of the source character set, except a right parenthesis )
1625 /// followed by the initial d-char-sequence (which may be empty)
1626 /// followed by a double quote ".
1627 /// d-char-sequence:
1628 /// d-char
1629 /// d-char-sequence d-char
1630 /// d-char:
1631 /// any member of the basic source character set except:
1632 /// space, the left parenthesis (, the right parenthesis ),
1633 /// the backslash \, and the control characters representing horizontal
1634 /// tab, vertical tab, form feed, and newline.
1635 /// escape-sequence: [C++0x lex.ccon]
1636 /// simple-escape-sequence
1637 /// octal-escape-sequence
1638 /// hexadecimal-escape-sequence
1639 /// simple-escape-sequence:
1640 /// one of \' \" \? \\ \a \b \f \n \r \t \v
1641 /// octal-escape-sequence:
1642 /// \ octal-digit
1643 /// \ octal-digit octal-digit
1644 /// \ octal-digit octal-digit octal-digit
1645 /// hexadecimal-escape-sequence:
1646 /// \x hexadecimal-digit
1647 /// hexadecimal-escape-sequence hexadecimal-digit
1648 /// universal-character-name:
1649 /// \u hex-quad
1650 /// \U hex-quad hex-quad
1651 /// hex-quad:
1652 /// hex-digit hex-digit hex-digit hex-digit
1653 /// \endverbatim
1654 ///
1657  Preprocessor &PP)
1658  : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1659  Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
1660  MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
1661  ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
1662  init(StringToks);
1663 }
1664 
1665 void StringLiteralParser::init(ArrayRef<Token> StringToks){
1666  // The literal token may have come from an invalid source location (e.g. due
1667  // to a PCH error), in which case the token length will be 0.
1668  if (StringToks.empty() || StringToks[0].getLength() < 2)
1669  return DiagnoseLexingError(SourceLocation());
1670 
1671  // Scan all of the string portions, remember the max individual token length,
1672  // computing a bound on the concatenated string length, and see whether any
1673  // piece is a wide-string. If any of the string portions is a wide-string
1674  // literal, the result is a wide-string literal [C99 6.4.5p4].
1675  assert(!StringToks.empty() && "expected at least one token");
1676  MaxTokenLength = StringToks[0].getLength();
1677  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
1678  SizeBound = StringToks[0].getLength()-2; // -2 for "".
1679  Kind = StringToks[0].getKind();
1680 
1681  hadError = false;
1682 
1683  // Implement Translation Phase #6: concatenation of string literals
1684  /// (C99 5.1.1.2p1). The common case is only one string fragment.
1685  for (unsigned i = 1; i != StringToks.size(); ++i) {
1686  if (StringToks[i].getLength() < 2)
1687  return DiagnoseLexingError(StringToks[i].getLocation());
1688 
1689  // The string could be shorter than this if it needs cleaning, but this is a
1690  // reasonable bound, which is all we need.
1691  assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
1692  SizeBound += StringToks[i].getLength()-2; // -2 for "".
1693 
1694  // Remember maximum string piece length.
1695  if (StringToks[i].getLength() > MaxTokenLength)
1696  MaxTokenLength = StringToks[i].getLength();
1697 
1698  // Remember if we see any wide or utf-8/16/32 strings.
1699  // Also check for illegal concatenations.
1700  if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
1701  if (isAscii()) {
1702  Kind = StringToks[i].getKind();
1703  } else {
1704  if (Diags)
1705  Diags->Report(StringToks[i].getLocation(),
1706  diag::err_unsupported_string_concat);
1707  hadError = true;
1708  }
1709  }
1710  }
1711 
1712  // Include space for the null terminator.
1713  ++SizeBound;
1714 
1715  // TODO: K&R warning: "traditional C rejects string constant concatenation"
1716 
1717  // Get the width in bytes of char/wchar_t/char16_t/char32_t
1718  CharByteWidth = getCharWidth(Kind, Target);
1719  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
1720  CharByteWidth /= 8;
1721 
1722  // The output buffer size needs to be large enough to hold wide characters.
1723  // This is a worst-case assumption which basically corresponds to L"" "long".
1724  SizeBound *= CharByteWidth;
1725 
1726  // Size the temporary buffer to hold the result string data.
1727  ResultBuf.resize(SizeBound);
1728 
1729  // Likewise, but for each string piece.
1730  SmallString<512> TokenBuf;
1731  TokenBuf.resize(MaxTokenLength);
1732 
1733  // Loop over all the strings, getting their spelling, and expanding them to
1734  // wide strings as appropriate.
1735  ResultPtr = &ResultBuf[0]; // Next byte to fill in.
1736 
1737  Pascal = false;
1738 
1739  SourceLocation UDSuffixTokLoc;
1740 
1741  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
1742  const char *ThisTokBuf = &TokenBuf[0];
1743  // Get the spelling of the token, which eliminates trigraphs, etc. We know
1744  // that ThisTokBuf points to a buffer that is big enough for the whole token
1745  // and 'spelled' tokens can only shrink.
1746  bool StringInvalid = false;
1747  unsigned ThisTokLen =
1748  Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1749  &StringInvalid);
1750  if (StringInvalid)
1751  return DiagnoseLexingError(StringToks[i].getLocation());
1752 
1753  const char *ThisTokBegin = ThisTokBuf;
1754  const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
1755 
1756  // Remove an optional ud-suffix.
1757  if (ThisTokEnd[-1] != '"') {
1758  const char *UDSuffixEnd = ThisTokEnd;
1759  do {
1760  --ThisTokEnd;
1761  } while (ThisTokEnd[-1] != '"');
1762 
1763  StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
1764 
1765  if (UDSuffixBuf.empty()) {
1766  if (StringToks[i].hasUCN())
1767  expandUCNs(UDSuffixBuf, UDSuffix);
1768  else
1769  UDSuffixBuf.assign(UDSuffix);
1770  UDSuffixToken = i;
1771  UDSuffixOffset = ThisTokEnd - ThisTokBuf;
1772  UDSuffixTokLoc = StringToks[i].getLocation();
1773  } else {
1774  SmallString<32> ExpandedUDSuffix;
1775  if (StringToks[i].hasUCN()) {
1776  expandUCNs(ExpandedUDSuffix, UDSuffix);
1777  UDSuffix = ExpandedUDSuffix;
1778  }
1779 
1780  // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
1781  // result of a concatenation involving at least one user-defined-string-
1782  // literal, all the participating user-defined-string-literals shall
1783  // have the same ud-suffix.
1784  if (UDSuffixBuf != UDSuffix) {
1785  if (Diags) {
1786  SourceLocation TokLoc = StringToks[i].getLocation();
1787  Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
1788  << UDSuffixBuf << UDSuffix
1789  << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
1790  << SourceRange(TokLoc, TokLoc);
1791  }
1792  hadError = true;
1793  }
1794  }
1795  }
1796 
1797  // Strip the end quote.
1798  --ThisTokEnd;
1799 
1800  // TODO: Input character set mapping support.
1801 
1802  // Skip marker for wide or unicode strings.
1803  if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
1804  ++ThisTokBuf;
1805  // Skip 8 of u8 marker for utf8 strings.
1806  if (ThisTokBuf[0] == '8')
1807  ++ThisTokBuf;
1808  }
1809 
1810  // Check for raw string
1811  if (ThisTokBuf[0] == 'R') {
1812  if (ThisTokBuf[1] != '"') {
1813  // The file may have come from PCH and then changed after loading the
1814  // PCH; Fail gracefully.
1815  return DiagnoseLexingError(StringToks[i].getLocation());
1816  }
1817  ThisTokBuf += 2; // skip R"
1818 
1819  // C++11 [lex.string]p2: A `d-char-sequence` shall consist of at most 16
1820  // characters.
1821  constexpr unsigned MaxRawStrDelimLen = 16;
1822 
1823  const char *Prefix = ThisTokBuf;
1824  while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&
1825  ThisTokBuf[0] != '(')
1826  ++ThisTokBuf;
1827  if (ThisTokBuf[0] != '(')
1828  return DiagnoseLexingError(StringToks[i].getLocation());
1829  ++ThisTokBuf; // skip '('
1830 
1831  // Remove same number of characters from the end
1832  ThisTokEnd -= ThisTokBuf - Prefix;
1833  if (ThisTokEnd < ThisTokBuf)
1834  return DiagnoseLexingError(StringToks[i].getLocation());
1835 
1836  // C++14 [lex.string]p4: A source-file new-line in a raw string literal
1837  // results in a new-line in the resulting execution string-literal.
1838  StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
1839  while (!RemainingTokenSpan.empty()) {
1840  // Split the string literal on \r\n boundaries.
1841  size_t CRLFPos = RemainingTokenSpan.find("\r\n");
1842  StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
1843  StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
1844 
1845  // Copy everything before the \r\n sequence into the string literal.
1846  if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
1847  hadError = true;
1848 
1849  // Point into the \n inside the \r\n sequence and operate on the
1850  // remaining portion of the literal.
1851  RemainingTokenSpan = AfterCRLF.substr(1);
1852  }
1853  } else {
1854  if (ThisTokBuf[0] != '"') {
1855  // The file may have come from PCH and then changed after loading the
1856  // PCH; Fail gracefully.
1857  return DiagnoseLexingError(StringToks[i].getLocation());
1858  }
1859  ++ThisTokBuf; // skip "
1860 
1861  // Check if this is a pascal string
1862  if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
1863  ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
1864 
1865  // If the \p sequence is found in the first token, we have a pascal string
1866  // Otherwise, if we already have a pascal string, ignore the first \p
1867  if (i == 0) {
1868  ++ThisTokBuf;
1869  Pascal = true;
1870  } else if (Pascal)
1871  ThisTokBuf += 2;
1872  }
1873 
1874  while (ThisTokBuf != ThisTokEnd) {
1875  // Is this a span of non-escape characters?
1876  if (ThisTokBuf[0] != '\\') {
1877  const char *InStart = ThisTokBuf;
1878  do {
1879  ++ThisTokBuf;
1880  } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
1881 
1882  // Copy the character span over.
1883  if (CopyStringFragment(StringToks[i], ThisTokBegin,
1884  StringRef(InStart, ThisTokBuf - InStart)))
1885  hadError = true;
1886  continue;
1887  }
1888  // Is this a Universal Character Name escape?
1889  if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
1890  EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
1891  ResultPtr, hadError,
1892  FullSourceLoc(StringToks[i].getLocation(), SM),
1893  CharByteWidth, Diags, Features);
1894  continue;
1895  }
1896  // Otherwise, this is a non-UCN escape character. Process it.
1897  unsigned ResultChar =
1898  ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
1899  FullSourceLoc(StringToks[i].getLocation(), SM),
1900  CharByteWidth*8, Diags, Features);
1901 
1902  if (CharByteWidth == 4) {
1903  // FIXME: Make the type of the result buffer correct instead of
1904  // using reinterpret_cast.
1905  llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
1906  *ResultWidePtr = ResultChar;
1907  ResultPtr += 4;
1908  } else if (CharByteWidth == 2) {
1909  // FIXME: Make the type of the result buffer correct instead of
1910  // using reinterpret_cast.
1911  llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
1912  *ResultWidePtr = ResultChar & 0xFFFF;
1913  ResultPtr += 2;
1914  } else {
1915  assert(CharByteWidth == 1 && "Unexpected char width");
1916  *ResultPtr++ = ResultChar & 0xFF;
1917  }
1918  }
1919  }
1920  }
1921 
1922  if (Pascal) {
1923  if (CharByteWidth == 4) {
1924  // FIXME: Make the type of the result buffer correct instead of
1925  // using reinterpret_cast.
1926  llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
1927  ResultWidePtr[0] = GetNumStringChars() - 1;
1928  } else if (CharByteWidth == 2) {
1929  // FIXME: Make the type of the result buffer correct instead of
1930  // using reinterpret_cast.
1931  llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
1932  ResultWidePtr[0] = GetNumStringChars() - 1;
1933  } else {
1934  assert(CharByteWidth == 1 && "Unexpected char width");
1935  ResultBuf[0] = GetNumStringChars() - 1;
1936  }
1937 
1938  // Verify that pascal strings aren't too large.
1939  if (GetStringLength() > 256) {
1940  if (Diags)
1941  Diags->Report(StringToks.front().getLocation(),
1942  diag::err_pascal_string_too_long)
1943  << SourceRange(StringToks.front().getLocation(),
1944  StringToks.back().getLocation());
1945  hadError = true;
1946  return;
1947  }
1948  } else if (Diags) {
1949  // Complain if this string literal has too many characters.
1950  unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
1951 
1952  if (GetNumStringChars() > MaxChars)
1953  Diags->Report(StringToks.front().getLocation(),
1954  diag::ext_string_too_long)
1955  << GetNumStringChars() << MaxChars
1956  << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
1957  << SourceRange(StringToks.front().getLocation(),
1958  StringToks.back().getLocation());
1959  }
1960 }
1961 
1962 static const char *resyncUTF8(const char *Err, const char *End) {
1963  if (Err == End)
1964  return End;
1965  End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
1966  while (++Err != End && (*Err & 0xC0) == 0x80)
1967  ;
1968  return Err;
1969 }
1970 
1971 /// This function copies from Fragment, which is a sequence of bytes
1972 /// within Tok's contents (which begin at TokBegin) into ResultPtr.
1973 /// Performs widening for multi-byte characters.
1974 bool StringLiteralParser::CopyStringFragment(const Token &Tok,
1975  const char *TokBegin,
1976  StringRef Fragment) {
1977  const llvm::UTF8 *ErrorPtrTmp;
1978  if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
1979  return false;
1980 
1981  // If we see bad encoding for unprefixed string literals, warn and
1982  // simply copy the byte values, for compatibility with gcc and older
1983  // versions of clang.
1984  bool NoErrorOnBadEncoding = isAscii();
1985  if (NoErrorOnBadEncoding) {
1986  memcpy(ResultPtr, Fragment.data(), Fragment.size());
1987  ResultPtr += Fragment.size();
1988  }
1989 
1990  if (Diags) {
1991  const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
1992 
1993  FullSourceLoc SourceLoc(Tok.getLocation(), SM);
1994  const DiagnosticBuilder &Builder =
1995  Diag(Diags, Features, SourceLoc, TokBegin,
1996  ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
1997  NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
1998  : diag::err_bad_string_encoding);
1999 
2000  const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2001  StringRef NextFragment(NextStart, Fragment.end()-NextStart);
2002 
2003  // Decode into a dummy buffer.
2004  SmallString<512> Dummy;
2005  Dummy.reserve(Fragment.size() * CharByteWidth);
2006  char *Ptr = Dummy.data();
2007 
2008  while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
2009  const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2010  NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2011  Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
2012  ErrorPtr, NextStart);
2013  NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
2014  }
2015  }
2016  return !NoErrorOnBadEncoding;
2017 }
2018 
2019 void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
2020  hadError = true;
2021  if (Diags)
2022  Diags->Report(Loc, diag::err_lexing_string);
2023 }
2024 
2025 /// getOffsetOfStringByte - This function returns the offset of the
2026 /// specified byte of the string data represented by Token. This handles
2027 /// advancing over escape sequences in the string.
2029  unsigned ByteNo) const {
2030  // Get the spelling of the token.
2031  SmallString<32> SpellingBuffer;
2032  SpellingBuffer.resize(Tok.getLength());
2033 
2034  bool StringInvalid = false;
2035  const char *SpellingPtr = &SpellingBuffer[0];
2036  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
2037  &StringInvalid);
2038  if (StringInvalid)
2039  return 0;
2040 
2041  const char *SpellingStart = SpellingPtr;
2042  const char *SpellingEnd = SpellingPtr+TokLen;
2043 
2044  // Handle UTF-8 strings just like narrow strings.
2045  if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
2046  SpellingPtr += 2;
2047 
2048  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
2049  SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
2050 
2051  // For raw string literals, this is easy.
2052  if (SpellingPtr[0] == 'R') {
2053  assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
2054  // Skip 'R"'.
2055  SpellingPtr += 2;
2056  while (*SpellingPtr != '(') {
2057  ++SpellingPtr;
2058  assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
2059  }
2060  // Skip '('.
2061  ++SpellingPtr;
2062  return SpellingPtr - SpellingStart + ByteNo;
2063  }
2064 
2065  // Skip over the leading quote
2066  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
2067  ++SpellingPtr;
2068 
2069  // Skip over bytes until we find the offset we're looking for.
2070  while (ByteNo) {
2071  assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
2072 
2073  // Step over non-escapes simply.
2074  if (*SpellingPtr != '\\') {
2075  ++SpellingPtr;
2076  --ByteNo;
2077  continue;
2078  }
2079 
2080  // Otherwise, this is an escape character. Advance over it.
2081  bool HadError = false;
2082  if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') {
2083  const char *EscapePtr = SpellingPtr;
2084  unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
2085  1, Features, HadError);
2086  if (Len > ByteNo) {
2087  // ByteNo is somewhere within the escape sequence.
2088  SpellingPtr = EscapePtr;
2089  break;
2090  }
2091  ByteNo -= Len;
2092  } else {
2093  ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
2094  FullSourceLoc(Tok.getLocation(), SM),
2095  CharByteWidth*8, Diags, Features);
2096  --ByteNo;
2097  }
2098  assert(!HadError && "This method isn't valid on erroneous strings");
2099  }
2100 
2101  return SpellingPtr-SpellingStart;
2102 }
2103 
2104 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
2105 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
2106 /// treat it as an invalid suffix.
2108  StringRef Suffix) {
2109  return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) ||
2110  Suffix == "sv";
2111 }
clang::NumericLiteralParser::isLong
bool isLong
Definition: LiteralSupport.h:64
max
__DEVICE__ int max(int __a, int __b)
Definition: __clang_cuda_math.h:196
clang::FullSourceLoc::getManager
const SourceManager & getManager() const
Definition: SourceLocation.h:384
clang::isPrintable
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:144
clang::CharLiteralParser::CharLiteralParser
CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, tok::TokenKind kind)
Definition: LiteralSupport.cpp:1418
clang::interp::APInt
llvm::APInt APInt
Definition: Integral.h:27
clang::DiagnosticBuilder
A little helper class used to produce diagnostics.
Definition: Diagnostic.h:1264
MakeCharSourceRange
static CharSourceRange MakeCharSourceRange(const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd)
Definition: LiteralSupport.cpp:59
clang::FullSourceLoc
A SourceLocation and its associated SourceManager.
Definition: SourceLocation.h:370
clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:212
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
Diag
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Definition: LiteralSupport.cpp:78
clang::Lexer::getSpelling
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition: Lexer.cpp:400
llvm::SmallVector< uint32_t, 4 >
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:88
clang::NumericLiteralParser::isImaginary
bool isImaginary
Definition: LiteralSupport.h:69
TargetInfo.h
clang::MultiVersionKind::Target
@ Target
clang::DiagnosticsEngine
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:191
getCharWidth
static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target)
Definition: LiteralSupport.cpp:39
clang::TargetInfo
Exposes information about the current target.
Definition: TargetInfo.h:189
memcpy
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Definition: __clang_cuda_device_functions.h:1549
clang::TargetInfo::getCharWidth
unsigned getCharWidth() const
Definition: TargetInfo.h:439
clang::StringLiteralParser::hadError
bool hadError
Definition: LiteralSupport.h:239
clang::NumericLiteralParser::isValidUDSuffix
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Definition: LiteralSupport.cpp:993
MeasureUCNEscape
static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, unsigned CharByteWidth, const LangOptions &Features, bool &HadError)
MeasureUCNEscape - Determine the number of bytes within the resulting string which this UCN will occu...
Definition: LiteralSupport.cpp:507
clang::NumericLiteralParser::NumericLiteralParser
NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, const SourceManager &SM, const LangOptions &LangOpts, const TargetInfo &Target, DiagnosticsEngine &Diags)
integer-constant: [C99 6.4.4.1] decimal-constant integer-suffix octal-constant integer-suffix hexadec...
Definition: LiteralSupport.cpp:687
clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
LiteralSupport.h
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::StringLiteralParser::isValidUDSuffix
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Definition: LiteralSupport.cpp:2107
appendCodePoint
static void appendCodePoint(unsigned Codepoint, llvm::SmallVectorImpl< char > &Str)
Definition: LiteralSupport.cpp:319
clang::isPreprocessingNumberBody
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
Definition: CharInfo.h:152
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:626
clang::Preprocessor::getLangOpts
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:960
Preprocessor.h
alwaysFitsInto64Bits
static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits)
Definition: LiteralSupport.cpp:1162
clang::StringLiteralParser::isAscii
bool isAscii() const
Definition: LiteralSupport.h:258
clang::NumericLiteralParser::isSizeT
bool isSizeT
Definition: LiteralSupport.h:66
U
clang::StringLiteralParser::getOffsetOfStringByte
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const
getOffsetOfStringByte - This function returns the offset of the specified byte of the string data rep...
Definition: LiteralSupport.cpp:2028
clang::CharSourceRange::getCharRange
static CharSourceRange getCharRange(SourceRange R)
Definition: SourceLocation.h:267
min
__DEVICE__ int min(int __a, int __b)
Definition: __clang_cuda_math.h:197
clang::CharLiteralParser::isAscii
bool isAscii() const
Definition: LiteralSupport.h:193
ProcessCharEscape
static unsigned ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
ProcessCharEscape - Parse a standard C escape sequence, which can occur in either a character or a st...
Definition: LiteralSupport.cpp:91
LangOptions.h
clang::NumericLiteralParser::isFloat
bool isFloat
Definition: LiteralSupport.h:68
LexDiagnostic.h
clang::Preprocessor::Diag
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Definition: Preprocessor.h:1744
clang::TargetInfo::getWCharWidth
unsigned getWCharWidth() const
getWCharWidth/Align - Return the size of 'wchar_t' for this target, in bits.
Definition: TargetInfo.h:633
clang::Preprocessor::getTargetInfo
const TargetInfo & getTargetInfo() const
Definition: Preprocessor.h:961
clang::NumericLiteralParser::isLongLong
bool isLongLong
Definition: LiteralSupport.h:65
llvm::SmallString< 16 >
clang::StringLiteralParser::Pascal
bool Pascal
Definition: LiteralSupport.h:240
clang::StringLiteralParser::GetStringLength
unsigned GetStringLength() const
Definition: LiteralSupport.h:245
Base
clang::StringLiteralParser::StringLiteralParser
StringLiteralParser(ArrayRef< Token > StringToks, Preprocessor &PP)
Definition: LiteralSupport.cpp:1656
ProcessUCNEscape
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)
ProcessUCNEscape - Read the Universal Character Name, check constraints and return the UTF32.
Definition: LiteralSupport.cpp:377
clang::NumericLiteralParser::isUnsigned
bool isUnsigned
Definition: LiteralSupport.h:63
EncodeUCNEscape
static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, FullSourceLoc Loc, unsigned CharByteWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
EncodeUCNEscape - Read the Universal Character Name, check constraints and convert the UTF32 to UTF8 ...
Definition: LiteralSupport.cpp:542
clang::Token::getLength
unsigned getLength() const
Definition: Token.h:129
clang::NumericLiteralParser::GetFixedPointValue
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale)
GetFixedPointValue - Convert this numeric literal value into a scaled integer that represents this va...
Definition: LiteralSupport.cpp:1263
clang::NumericLiteralParser::isAccum
bool isAccum
Definition: LiteralSupport.h:75
IsExponentPart
static bool IsExponentPart(char c)
Definition: LiteralSupport.cpp:1259
clang::isHexDigit
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
Definition: CharInfo.h:128
clang::Preprocessor::getSourceManager
SourceManager & getSourceManager() const
Definition: Preprocessor.h:964
clang::NumericLiteralParser::hadError
bool hadError
Definition: LiteralSupport.h:62
resyncUTF8
static const char * resyncUTF8(const char *Err, const char *End)
Definition: LiteralSupport.cpp:1962
clang::expandUCNs
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
Definition: LiteralSupport.cpp:329
SourceLocation.h
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
CharInfo.h
false
#define false
Definition: stdbool.h:17
clang::NumericLiteralParser::MicrosoftInteger
uint8_t MicrosoftInteger
Definition: LiteralSupport.h:72
clang::NumericLiteralParser::isFloat16
bool isFloat16
Definition: LiteralSupport.h:70
clang::NumericLiteralParser::isFract
bool isFract
Definition: LiteralSupport.h:74
Begin
SourceLocation Begin
Definition: USRLocFinder.cpp:165
llvm::ArrayRef
Definition: LLVM.h:34
Lexer.h
Value
Value
Definition: UninitializedValues.cpp:102
clang::CharSourceRange
Represents a character-granular source range.
Definition: SourceLocation.h:255
clang::NumericLiteralParser::GetIntegerValue
bool GetIntegerValue(llvm::APInt &Val)
GetIntegerValue - Convert this numeric literal value to an APInt that matches Val's input width.
Definition: LiteralSupport.cpp:1180
clang::NumericLiteralParser::isIntegerLiteral
bool isIntegerLiteral() const
Definition: LiteralSupport.h:81
clang::NumericLiteralParser::isFloatingLiteral
bool isFloatingLiteral() const
Definition: LiteralSupport.h:84
clang::CharLiteralParser::isWide
bool isWide() const
Definition: LiteralSupport.h:194
clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:126
clang::LangOptions
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:58
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::NumericLiteralParser::GetFloatValue
llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result)
GetFloatValue - Convert this numeric literal to a floating value, using the specified APFloat fltSema...
Definition: LiteralSupport.cpp:1238
clang
Definition: CalledOnceCheck.h:17
clang::NumericLiteralParser::isFloat128
bool isFloat128
Definition: LiteralSupport.h:71
clang::NumericLiteralParser::isHalf
bool isHalf
Definition: LiteralSupport.h:67
clang::CharLiteralParser::isMultiChar
bool isMultiChar() const
Definition: LiteralSupport.h:198
clang::Preprocessor::getDiagnostics
DiagnosticsEngine & getDiagnostics() const
Definition: Preprocessor.h:957
clang::isDigit
LLVM_READONLY bool isDigit(unsigned char c)
Return true if this character is an ASCII digit: [0-9].
Definition: CharInfo.h:98
c
__device__ __2f16 float c
Definition: __clang_hip_libdevice_declares.h:315
clang::Lexer::AdvanceToTokenCharacter
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition: Lexer.h:367
llvm::SmallVectorImpl< char >
clang::Preprocessor
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:130
SM
#define SM(sm)
Definition: Cuda.cpp:78
Token.h
clang::NumericLiteralParser::isFixedPointLiteral
bool isFixedPointLiteral() const
Definition: LiteralSupport.h:77
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:62
clang::TargetInfo::getIntWidth
unsigned getIntWidth() const
getIntWidth/Align - Return the size of 'signed int' and 'unsigned int' for this target,...
Definition: TargetInfo.h:452
clang::DiagnosticsEngine::Report
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1523
clang::StringLiteralParser::GetNumStringChars
unsigned GetNumStringChars() const
Definition: LiteralSupport.h:247