clang  15.0.0git
LiteralSupport.cpp
Go to the documentation of this file.
1 //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the NumericLiteralParser, CharLiteralParser, and
10 // StringLiteralParser interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "clang/Basic/CharInfo.h"
18 #include "clang/Basic/TargetInfo.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/Support/ConvertUTF.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <cstring>
35 #include <string>
36 
37 using namespace clang;
38 
39 static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
40  switch (kind) {
41  default: llvm_unreachable("Unknown token type!");
42  case tok::char_constant:
43  case tok::string_literal:
44  case tok::utf8_char_constant:
45  case tok::utf8_string_literal:
46  return Target.getCharWidth();
47  case tok::wide_char_constant:
48  case tok::wide_string_literal:
49  return Target.getWCharWidth();
50  case tok::utf16_char_constant:
51  case tok::utf16_string_literal:
52  return Target.getChar16Width();
53  case tok::utf32_char_constant:
54  case tok::utf32_string_literal:
55  return Target.getChar32Width();
56  }
57 }
58 
60  FullSourceLoc TokLoc,
61  const char *TokBegin,
62  const char *TokRangeBegin,
63  const char *TokRangeEnd) {
65  Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
66  TokLoc.getManager(), Features);
68  Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
69  TokLoc.getManager(), Features);
71 }
72 
73 /// Produce a diagnostic highlighting some portion of a literal.
74 ///
75 /// Emits the diagnostic \p DiagID, highlighting the range of characters from
76 /// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
77 /// a substring of a spelling buffer for the token beginning at \p TokBegin.
79  const LangOptions &Features, FullSourceLoc TokLoc,
80  const char *TokBegin, const char *TokRangeBegin,
81  const char *TokRangeEnd, unsigned DiagID) {
83  Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
84  TokLoc.getManager(), Features);
85  return Diags->Report(Begin, DiagID) <<
86  MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
87 }
88 
89 /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
90 /// either a character or a string literal.
91 static unsigned ProcessCharEscape(const char *ThisTokBegin,
92  const char *&ThisTokBuf,
93  const char *ThisTokEnd, bool &HadError,
94  FullSourceLoc Loc, unsigned CharWidth,
95  DiagnosticsEngine *Diags,
96  const LangOptions &Features) {
97  const char *EscapeBegin = ThisTokBuf;
98  bool Delimited = false;
99  bool EndDelimiterFound = false;
100 
101  // Skip the '\' char.
102  ++ThisTokBuf;
103 
104  // We know that this character can't be off the end of the buffer, because
105  // that would have been \", which would not have been the end of string.
106  unsigned ResultChar = *ThisTokBuf++;
107  switch (ResultChar) {
108  // These map to themselves.
109  case '\\': case '\'': case '"': case '?': break;
110 
111  // These have fixed mappings.
112  case 'a':
113  // TODO: K&R: the meaning of '\\a' is different in traditional C
114  ResultChar = 7;
115  break;
116  case 'b':
117  ResultChar = 8;
118  break;
119  case 'e':
120  if (Diags)
121  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
122  diag::ext_nonstandard_escape) << "e";
123  ResultChar = 27;
124  break;
125  case 'E':
126  if (Diags)
127  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
128  diag::ext_nonstandard_escape) << "E";
129  ResultChar = 27;
130  break;
131  case 'f':
132  ResultChar = 12;
133  break;
134  case 'n':
135  ResultChar = 10;
136  break;
137  case 'r':
138  ResultChar = 13;
139  break;
140  case 't':
141  ResultChar = 9;
142  break;
143  case 'v':
144  ResultChar = 11;
145  break;
146  case 'x': { // Hex escape.
147  ResultChar = 0;
148  if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
149  Delimited = true;
150  ThisTokBuf++;
151  if (*ThisTokBuf == '}') {
152  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
153  diag::err_delimited_escape_empty);
154  return ResultChar;
155  }
156  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
157  if (Diags)
158  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
159  diag::err_hex_escape_no_digits) << "x";
160  return ResultChar;
161  }
162 
163  // Hex escapes are a maximal series of hex digits.
164  bool Overflow = false;
165  for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
166  if (Delimited && *ThisTokBuf == '}') {
167  ThisTokBuf++;
168  EndDelimiterFound = true;
169  break;
170  }
171  int CharVal = llvm::hexDigitValue(*ThisTokBuf);
172  if (CharVal == -1) {
173  // Non delimited hex escape sequences stop at the first non-hex digit.
174  if (!Delimited)
175  break;
176  HadError = true;
177  if (Diags)
178  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
179  diag::err_delimited_escape_invalid)
180  << StringRef(ThisTokBuf, 1);
181  continue;
182  }
183  // About to shift out a digit?
184  if (ResultChar & 0xF0000000)
185  Overflow = true;
186  ResultChar <<= 4;
187  ResultChar |= CharVal;
188  }
189  // See if any bits will be truncated when evaluated as a character.
190  if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
191  Overflow = true;
192  ResultChar &= ~0U >> (32-CharWidth);
193  }
194 
195  // Check for overflow.
196  if (!HadError && Overflow) { // Too many digits to fit in
197  HadError = true;
198  if (Diags)
199  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
200  diag::err_escape_too_large)
201  << 0;
202  }
203  break;
204  }
205  case '0': case '1': case '2': case '3':
206  case '4': case '5': case '6': case '7': {
207  // Octal escapes.
208  --ThisTokBuf;
209  ResultChar = 0;
210 
211  // Octal escapes are a series of octal digits with maximum length 3.
212  // "\0123" is a two digit sequence equal to "\012" "3".
213  unsigned NumDigits = 0;
214  do {
215  ResultChar <<= 3;
216  ResultChar |= *ThisTokBuf++ - '0';
217  ++NumDigits;
218  } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
219  ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
220 
221  // Check for overflow. Reject '\777', but not L'\777'.
222  if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
223  if (Diags)
224  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
225  diag::err_escape_too_large) << 1;
226  ResultChar &= ~0U >> (32-CharWidth);
227  }
228  break;
229  }
230  case 'o': {
231  bool Overflow = false;
232  if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
233  HadError = true;
234  if (Diags)
235  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
236  diag::err_delimited_escape_missing_brace);
237 
238  break;
239  }
240  ResultChar = 0;
241  Delimited = true;
242  ++ThisTokBuf;
243  if (*ThisTokBuf == '}') {
244  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
245  diag::err_delimited_escape_empty);
246  return ResultChar;
247  }
248 
249  while (ThisTokBuf != ThisTokEnd) {
250  if (*ThisTokBuf == '}') {
251  EndDelimiterFound = true;
252  ThisTokBuf++;
253  break;
254  }
255  if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
256  HadError = true;
257  if (Diags)
258  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
259  diag::err_delimited_escape_invalid)
260  << StringRef(ThisTokBuf, 1);
261  ThisTokBuf++;
262  continue;
263  }
264  if (ResultChar & 0x020000000)
265  Overflow = true;
266 
267  ResultChar <<= 3;
268  ResultChar |= *ThisTokBuf++ - '0';
269  }
270  // Check for overflow. Reject '\777', but not L'\777'.
271  if (!HadError &&
272  (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
273  HadError = true;
274  if (Diags)
275  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
276  diag::err_escape_too_large)
277  << 1;
278  ResultChar &= ~0U >> (32 - CharWidth);
279  }
280  break;
281  }
282  // Otherwise, these are not valid escapes.
283  case '(': case '{': case '[': case '%':
284  // GCC accepts these as extensions. We warn about them as such though.
285  if (Diags)
286  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
287  diag::ext_nonstandard_escape)
288  << std::string(1, ResultChar);
289  break;
290  default:
291  if (!Diags)
292  break;
293 
294  if (isPrintable(ResultChar))
295  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
296  diag::ext_unknown_escape)
297  << std::string(1, ResultChar);
298  else
299  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
300  diag::ext_unknown_escape)
301  << "x" + llvm::utohexstr(ResultChar);
302  break;
303  }
304 
305  if (Delimited && Diags) {
306  if (!EndDelimiterFound)
307  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
308  diag::err_expected)
309  << tok::r_brace;
310  else if (!HadError) {
311  Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
312  diag::ext_delimited_escape_sequence);
313  }
314  }
315 
316  return ResultChar;
317 }
318 
319 static void appendCodePoint(unsigned Codepoint,
321  char ResultBuf[4];
322  char *ResultPtr = ResultBuf;
323  if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr))
324  Str.append(ResultBuf, ResultPtr);
325 }
326 
327 void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
328  for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
329  if (*I != '\\') {
330  Buf.push_back(*I);
331  continue;
332  }
333 
334  ++I;
335  char Kind = *I;
336  ++I;
337 
338  assert(Kind == 'u' || Kind == 'U');
339  uint32_t CodePoint = 0;
340 
341  if (Kind == 'u' && *I == '{') {
342  for (++I; *I != '}'; ++I) {
343  unsigned Value = llvm::hexDigitValue(*I);
344  assert(Value != -1U);
345  CodePoint <<= 4;
346  CodePoint += Value;
347  }
348  appendCodePoint(CodePoint, Buf);
349  continue;
350  }
351 
352  unsigned NumHexDigits;
353  if (Kind == 'u')
354  NumHexDigits = 4;
355  else
356  NumHexDigits = 8;
357 
358  assert(I + NumHexDigits <= E);
359 
360  for (; NumHexDigits != 0; ++I, --NumHexDigits) {
361  unsigned Value = llvm::hexDigitValue(*I);
362  assert(Value != -1U);
363 
364  CodePoint <<= 4;
365  CodePoint += Value;
366  }
367 
368  appendCodePoint(CodePoint, Buf);
369  --I;
370  }
371 }
372 
373 /// ProcessUCNEscape - Read the Universal Character Name, check constraints and
374 /// return the UTF32.
375 static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
376  const char *ThisTokEnd,
377  uint32_t &UcnVal, unsigned short &UcnLen,
378  FullSourceLoc Loc, DiagnosticsEngine *Diags,
379  const LangOptions &Features,
380  bool in_char_string_literal = false) {
381  const char *UcnBegin = ThisTokBuf;
382 
383  // Skip the '\u' char's.
384  ThisTokBuf += 2;
385 
386  bool Delimited = false;
387  bool EndDelimiterFound = false;
388  bool HasError = false;
389 
390  if (UcnBegin[1] == 'u' && in_char_string_literal &&
391  ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
392  Delimited = true;
393  ThisTokBuf++;
394  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
395  if (Diags)
396  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
397  diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
398  return false;
399  }
400  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
401 
402  bool Overflow = false;
403  unsigned short Count = 0;
404  for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
405  ++ThisTokBuf) {
406  if (Delimited && *ThisTokBuf == '}') {
407  ++ThisTokBuf;
408  EndDelimiterFound = true;
409  break;
410  }
411  int CharVal = llvm::hexDigitValue(*ThisTokBuf);
412  if (CharVal == -1) {
413  HasError = true;
414  if (!Delimited)
415  break;
416  if (Diags) {
417  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
418  diag::err_delimited_escape_invalid)
419  << StringRef(ThisTokBuf, 1);
420  }
421  Count++;
422  continue;
423  }
424  if (UcnVal & 0xF0000000) {
425  Overflow = true;
426  continue;
427  }
428  UcnVal <<= 4;
429  UcnVal |= CharVal;
430  Count++;
431  }
432 
433  if (Overflow) {
434  if (Diags)
435  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
436  diag::err_escape_too_large)
437  << 0;
438  return false;
439  }
440 
441  if (Delimited && !EndDelimiterFound) {
442  if (Diags) {
443  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
444  diag::err_expected)
445  << tok::r_brace;
446  }
447  return false;
448  }
449 
450  // If we didn't consume the proper number of digits, there is a problem.
451  if (Count == 0 || (!Delimited && Count != UcnLen)) {
452  if (Diags)
453  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
454  Delimited ? diag::err_delimited_escape_empty
455  : diag::err_ucn_escape_incomplete);
456  return false;
457  }
458 
459  if (HasError)
460  return false;
461 
462  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
463  if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
464  UcnVal > 0x10FFFF) { // maximum legal UTF32 value
465  if (Diags)
466  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
467  diag::err_ucn_escape_invalid);
468  return false;
469  }
470 
471  // C++11 allows UCNs that refer to control characters and basic source
472  // characters inside character and string literals
473  if (UcnVal < 0xa0 &&
474  (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
475  bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
476  if (Diags) {
477  char BasicSCSChar = UcnVal;
478  if (UcnVal >= 0x20 && UcnVal < 0x7f)
479  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
480  IsError ? diag::err_ucn_escape_basic_scs :
481  diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
482  << StringRef(&BasicSCSChar, 1);
483  else
484  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
485  IsError ? diag::err_ucn_control_character :
486  diag::warn_cxx98_compat_literal_ucn_control_character);
487  }
488  if (IsError)
489  return false;
490  }
491 
492  if (!Features.CPlusPlus && !Features.C99 && Diags)
493  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
494  diag::warn_ucn_not_valid_in_c89_literal);
495 
496  if (Delimited && Diags)
497  Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
498  diag::ext_delimited_escape_sequence);
499 
500  return true;
501 }
502 
503 /// MeasureUCNEscape - Determine the number of bytes within the resulting string
504 /// which this UCN will occupy.
505 static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
506  const char *ThisTokEnd, unsigned CharByteWidth,
507  const LangOptions &Features, bool &HadError) {
508  // UTF-32: 4 bytes per escape.
509  if (CharByteWidth == 4)
510  return 4;
511 
512  uint32_t UcnVal = 0;
513  unsigned short UcnLen = 0;
514  FullSourceLoc Loc;
515 
516  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
517  UcnLen, Loc, nullptr, Features, true)) {
518  HadError = true;
519  return 0;
520  }
521 
522  // UTF-16: 2 bytes for BMP, 4 bytes otherwise.
523  if (CharByteWidth == 2)
524  return UcnVal <= 0xFFFF ? 2 : 4;
525 
526  // UTF-8.
527  if (UcnVal < 0x80)
528  return 1;
529  if (UcnVal < 0x800)
530  return 2;
531  if (UcnVal < 0x10000)
532  return 3;
533  return 4;
534 }
535 
536 /// EncodeUCNEscape - Read the Universal Character Name, check constraints and
537 /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
538 /// StringLiteralParser. When we decide to implement UCN's for identifiers,
539 /// we will likely rework our support for UCN's.
540 static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
541  const char *ThisTokEnd,
542  char *&ResultBuf, bool &HadError,
543  FullSourceLoc Loc, unsigned CharByteWidth,
544  DiagnosticsEngine *Diags,
545  const LangOptions &Features) {
546  typedef uint32_t UTF32;
547  UTF32 UcnVal = 0;
548  unsigned short UcnLen = 0;
549  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
550  Loc, Diags, Features, true)) {
551  HadError = true;
552  return;
553  }
554 
555  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&
556  "only character widths of 1, 2, or 4 bytes supported");
557 
558  (void)UcnLen;
559  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
560 
561  if (CharByteWidth == 4) {
562  // FIXME: Make the type of the result buffer correct instead of
563  // using reinterpret_cast.
564  llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
565  *ResultPtr = UcnVal;
566  ResultBuf += 4;
567  return;
568  }
569 
570  if (CharByteWidth == 2) {
571  // FIXME: Make the type of the result buffer correct instead of
572  // using reinterpret_cast.
573  llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);
574 
575  if (UcnVal <= (UTF32)0xFFFF) {
576  *ResultPtr = UcnVal;
577  ResultBuf += 2;
578  return;
579  }
580 
581  // Convert to UTF16.
582  UcnVal -= 0x10000;
583  *ResultPtr = 0xD800 + (UcnVal >> 10);
584  *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
585  ResultBuf += 4;
586  return;
587  }
588 
589  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
590 
591  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
592  // The conversion below was inspired by:
593  // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
594  // First, we determine how many bytes the result will require.
595  typedef uint8_t UTF8;
596 
597  unsigned short bytesToWrite = 0;
598  if (UcnVal < (UTF32)0x80)
599  bytesToWrite = 1;
600  else if (UcnVal < (UTF32)0x800)
601  bytesToWrite = 2;
602  else if (UcnVal < (UTF32)0x10000)
603  bytesToWrite = 3;
604  else
605  bytesToWrite = 4;
606 
607  const unsigned byteMask = 0xBF;
608  const unsigned byteMark = 0x80;
609 
610  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
611  // into the first byte, depending on how many bytes follow.
612  static const UTF8 firstByteMark[5] = {
613  0x00, 0x00, 0xC0, 0xE0, 0xF0
614  };
615  // Finally, we write the bytes into ResultBuf.
616  ResultBuf += bytesToWrite;
617  switch (bytesToWrite) { // note: everything falls through.
618  case 4:
619  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
620  LLVM_FALLTHROUGH;
621  case 3:
622  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
623  LLVM_FALLTHROUGH;
624  case 2:
625  *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
626  LLVM_FALLTHROUGH;
627  case 1:
628  *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
629  }
630  // Update the buffer.
631  ResultBuf += bytesToWrite;
632 }
633 
634 /// integer-constant: [C99 6.4.4.1]
635 /// decimal-constant integer-suffix
636 /// octal-constant integer-suffix
637 /// hexadecimal-constant integer-suffix
638 /// binary-literal integer-suffix [GNU, C++1y]
639 /// user-defined-integer-literal: [C++11 lex.ext]
640 /// decimal-literal ud-suffix
641 /// octal-literal ud-suffix
642 /// hexadecimal-literal ud-suffix
643 /// binary-literal ud-suffix [GNU, C++1y]
644 /// decimal-constant:
645 /// nonzero-digit
646 /// decimal-constant digit
647 /// octal-constant:
648 /// 0
649 /// octal-constant octal-digit
650 /// hexadecimal-constant:
651 /// hexadecimal-prefix hexadecimal-digit
652 /// hexadecimal-constant hexadecimal-digit
653 /// hexadecimal-prefix: one of
654 /// 0x 0X
655 /// binary-literal:
656 /// 0b binary-digit
657 /// 0B binary-digit
658 /// binary-literal binary-digit
659 /// integer-suffix:
660 /// unsigned-suffix [long-suffix]
661 /// unsigned-suffix [long-long-suffix]
662 /// long-suffix [unsigned-suffix]
663 /// long-long-suffix [unsigned-sufix]
664 /// nonzero-digit:
665 /// 1 2 3 4 5 6 7 8 9
666 /// octal-digit:
667 /// 0 1 2 3 4 5 6 7
668 /// hexadecimal-digit:
669 /// 0 1 2 3 4 5 6 7 8 9
670 /// a b c d e f
671 /// A B C D E F
672 /// binary-digit:
673 /// 0
674 /// 1
675 /// unsigned-suffix: one of
676 /// u U
677 /// long-suffix: one of
678 /// l L
679 /// long-long-suffix: one of
680 /// ll LL
681 ///
682 /// floating-constant: [C99 6.4.4.2]
683 /// TODO: add rules...
684 ///
686  SourceLocation TokLoc,
687  const SourceManager &SM,
688  const LangOptions &LangOpts,
689  const TargetInfo &Target,
690  DiagnosticsEngine &Diags)
691  : SM(SM), LangOpts(LangOpts), Diags(Diags),
692  ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
693 
694  s = DigitsBegin = ThisTokBegin;
695  saw_exponent = false;
696  saw_period = false;
697  saw_ud_suffix = false;
698  saw_fixed_point_suffix = false;
699  isLong = false;
700  isUnsigned = false;
701  isLongLong = false;
702  isSizeT = false;
703  isHalf = false;
704  isFloat = false;
705  isImaginary = false;
706  isFloat16 = false;
707  isFloat128 = false;
708  MicrosoftInteger = 0;
709  isFract = false;
710  isAccum = false;
711  hadError = false;
712  isBitInt = false;
713 
714  // This routine assumes that the range begin/end matches the regex for integer
715  // and FP constants (specifically, the 'pp-number' regex), and assumes that
716  // the byte at "*end" is both valid and not part of the regex. Because of
717  // this, it doesn't have to check for 'overscan' in various places.
718  if (isPreprocessingNumberBody(*ThisTokEnd)) {
719  Diags.Report(TokLoc, diag::err_lexing_numeric);
720  hadError = true;
721  return;
722  }
723 
724  if (*s == '0') { // parse radix
725  ParseNumberStartingWithZero(TokLoc);
726  if (hadError)
727  return;
728  } else { // the first digit is non-zero
729  radix = 10;
730  s = SkipDigits(s);
731  if (s == ThisTokEnd) {
732  // Done.
733  } else {
734  ParseDecimalOrOctalCommon(TokLoc);
735  if (hadError)
736  return;
737  }
738  }
739 
740  SuffixBegin = s;
741  checkSeparator(TokLoc, s, CSK_AfterDigits);
742 
743  // Initial scan to lookahead for fixed point suffix.
744  if (LangOpts.FixedPoint) {
745  for (const char *c = s; c != ThisTokEnd; ++c) {
746  if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {
747  saw_fixed_point_suffix = true;
748  break;
749  }
750  }
751  }
752 
753  // Parse the suffix. At this point we can classify whether we have an FP or
754  // integer constant.
755  bool isFixedPointConstant = isFixedPointLiteral();
756  bool isFPConstant = isFloatingLiteral();
757  bool HasSize = false;
758 
759  // Loop over all of the characters of the suffix. If we see something bad,
760  // we break out of the loop.
761  for (; s != ThisTokEnd; ++s) {
762  switch (*s) {
763  case 'R':
764  case 'r':
765  if (!LangOpts.FixedPoint)
766  break;
767  if (isFract || isAccum) break;
768  if (!(saw_period || saw_exponent)) break;
769  isFract = true;
770  continue;
771  case 'K':
772  case 'k':
773  if (!LangOpts.FixedPoint)
774  break;
775  if (isFract || isAccum) break;
776  if (!(saw_period || saw_exponent)) break;
777  isAccum = true;
778  continue;
779  case 'h': // FP Suffix for "half".
780  case 'H':
781  // OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
782  if (!(LangOpts.Half || LangOpts.FixedPoint))
783  break;
784  if (isIntegerLiteral()) break; // Error for integer constant.
785  if (HasSize)
786  break;
787  HasSize = true;
788  isHalf = true;
789  continue; // Success.
790  case 'f': // FP Suffix for "float"
791  case 'F':
792  if (!isFPConstant) break; // Error for integer constant.
793  if (HasSize)
794  break;
795  HasSize = true;
796 
797  // CUDA host and device may have different _Float16 support, therefore
798  // allows f16 literals to avoid false alarm.
799  // ToDo: more precise check for CUDA.
800  if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd &&
801  s[1] == '1' && s[2] == '6') {
802  s += 2; // success, eat up 2 characters.
803  isFloat16 = true;
804  continue;
805  }
806 
807  isFloat = true;
808  continue; // Success.
809  case 'q': // FP Suffix for "__float128"
810  case 'Q':
811  if (!isFPConstant) break; // Error for integer constant.
812  if (HasSize)
813  break;
814  HasSize = true;
815  isFloat128 = true;
816  continue; // Success.
817  case 'u':
818  case 'U':
819  if (isFPConstant) break; // Error for floating constant.
820  if (isUnsigned) break; // Cannot be repeated.
821  isUnsigned = true;
822  continue; // Success.
823  case 'l':
824  case 'L':
825  if (HasSize)
826  break;
827  HasSize = true;
828 
829  // Check for long long. The L's need to be adjacent and the same case.
830  if (s[1] == s[0]) {
831  assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
832  if (isFPConstant) break; // long long invalid for floats.
833  isLongLong = true;
834  ++s; // Eat both of them.
835  } else {
836  isLong = true;
837  }
838  continue; // Success.
839  case 'z':
840  case 'Z':
841  if (isFPConstant)
842  break; // Invalid for floats.
843  if (HasSize)
844  break;
845  HasSize = true;
846  isSizeT = true;
847  continue;
848  case 'i':
849  case 'I':
850  if (LangOpts.MicrosoftExt && !isFPConstant) {
851  // Allow i8, i16, i32, and i64. First, look ahead and check if
852  // suffixes are Microsoft integers and not the imaginary unit.
853  uint8_t Bits = 0;
854  size_t ToSkip = 0;
855  switch (s[1]) {
856  case '8': // i8 suffix
857  Bits = 8;
858  ToSkip = 2;
859  break;
860  case '1':
861  if (s[2] == '6') { // i16 suffix
862  Bits = 16;
863  ToSkip = 3;
864  }
865  break;
866  case '3':
867  if (s[2] == '2') { // i32 suffix
868  Bits = 32;
869  ToSkip = 3;
870  }
871  break;
872  case '6':
873  if (s[2] == '4') { // i64 suffix
874  Bits = 64;
875  ToSkip = 3;
876  }
877  break;
878  default:
879  break;
880  }
881  if (Bits) {
882  if (HasSize)
883  break;
884  HasSize = true;
885  MicrosoftInteger = Bits;
886  s += ToSkip;
887  assert(s <= ThisTokEnd && "didn't maximally munch?");
888  break;
889  }
890  }
891  LLVM_FALLTHROUGH;
892  case 'j':
893  case 'J':
894  if (isImaginary) break; // Cannot be repeated.
895  isImaginary = true;
896  continue; // Success.
897  case 'w':
898  case 'W':
899  if (isFPConstant)
900  break; // Invalid for floats.
901  if (HasSize)
902  break; // Invalid if we already have a size for the literal.
903 
904  // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
905  // explicitly do not support the suffix in C++ as an extension because a
906  // library-based UDL that resolves to a library type may be more
907  // appropriate there.
908  if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
909  (s[0] == 'W' && s[1] == 'B'))) {
910  isBitInt = true;
911  HasSize = true;
912  ++s; // Skip both characters (2nd char skipped on continue).
913  continue; // Success.
914  }
915  }
916  // If we reached here, there was an error or a ud-suffix.
917  break;
918  }
919 
920  // "i", "if", and "il" are user-defined suffixes in C++1y.
921  if (s != ThisTokEnd || isImaginary) {
922  // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
923  expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
924  if (isValidUDSuffix(LangOpts, UDSuffixBuf)) {
925  if (!isImaginary) {
926  // Any suffix pieces we might have parsed are actually part of the
927  // ud-suffix.
928  isLong = false;
929  isUnsigned = false;
930  isLongLong = false;
931  isSizeT = false;
932  isFloat = false;
933  isFloat16 = false;
934  isHalf = false;
935  isImaginary = false;
936  isBitInt = false;
937  MicrosoftInteger = 0;
938  saw_fixed_point_suffix = false;
939  isFract = false;
940  isAccum = false;
941  }
942 
943  saw_ud_suffix = true;
944  return;
945  }
946 
947  if (s != ThisTokEnd) {
948  // Report an error if there are any.
950  TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
951  diag::err_invalid_suffix_constant)
952  << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
953  << (isFixedPointConstant ? 2 : isFPConstant);
954  hadError = true;
955  }
956  }
957 
958  if (!hadError && saw_fixed_point_suffix) {
959  assert(isFract || isAccum);
960  }
961 }
962 
963 /// ParseDecimalOrOctalCommon - This method is called for decimal or octal
964 /// numbers. It issues an error for illegal digits, and handles floating point
965 /// parsing. If it detects a floating point number, the radix is set to 10.
966 void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
967  assert((radix == 8 || radix == 10) && "Unexpected radix");
968 
969  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
970  // the code is using an incorrect base.
971  if (isHexDigit(*s) && *s != 'e' && *s != 'E' &&
972  !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
973  Diags.Report(
974  Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts),
975  diag::err_invalid_digit)
976  << StringRef(s, 1) << (radix == 8 ? 1 : 0);
977  hadError = true;
978  return;
979  }
980 
981  if (*s == '.') {
982  checkSeparator(TokLoc, s, CSK_AfterDigits);
983  s++;
984  radix = 10;
985  saw_period = true;
986  checkSeparator(TokLoc, s, CSK_BeforeDigits);
987  s = SkipDigits(s); // Skip suffix.
988  }
989  if (*s == 'e' || *s == 'E') { // exponent
990  checkSeparator(TokLoc, s, CSK_AfterDigits);
991  const char *Exponent = s;
992  s++;
993  radix = 10;
994  saw_exponent = true;
995  if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
996  const char *first_non_digit = SkipDigits(s);
997  if (containsDigits(s, first_non_digit)) {
998  checkSeparator(TokLoc, s, CSK_BeforeDigits);
999  s = first_non_digit;
1000  } else {
1001  if (!hadError) {
1003  TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1004  diag::err_exponent_has_no_digits);
1005  hadError = true;
1006  }
1007  return;
1008  }
1009  }
1010 }
1011 
1012 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
1013 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
1014 /// treat it as an invalid suffix.
1016  StringRef Suffix) {
1017  if (!LangOpts.CPlusPlus11 || Suffix.empty())
1018  return false;
1019 
1020  // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
1021  if (Suffix[0] == '_')
1022  return true;
1023 
1024  // In C++11, there are no library suffixes.
1025  if (!LangOpts.CPlusPlus14)
1026  return false;
1027 
1028  // In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library.
1029  // Per tweaked N3660, "il", "i", and "if" are also used in the library.
1030  // In C++2a "d" and "y" are used in the library.
1031  return llvm::StringSwitch<bool>(Suffix)
1032  .Cases("h", "min", "s", true)
1033  .Cases("ms", "us", "ns", true)
1034  .Cases("il", "i", "if", true)
1035  .Cases("d", "y", LangOpts.CPlusPlus20)
1036  .Default(false);
1037 }
1038 
1039 void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
1040  const char *Pos,
1041  CheckSeparatorKind IsAfterDigits) {
1042  if (IsAfterDigits == CSK_AfterDigits) {
1043  if (Pos == ThisTokBegin)
1044  return;
1045  --Pos;
1046  } else if (Pos == ThisTokEnd)
1047  return;
1048 
1049  if (isDigitSeparator(*Pos)) {
1050  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM,
1051  LangOpts),
1052  diag::err_digit_separator_not_between_digits)
1053  << IsAfterDigits;
1054  hadError = true;
1055  }
1056 }
1057 
1058 /// ParseNumberStartingWithZero - This method is called when the first character
1059 /// of the number is found to be a zero. This means it is either an octal
1060 /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
1061 /// a floating point number (01239.123e4). Eat the prefix, determining the
1062 /// radix etc.
1063 void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
1064  assert(s[0] == '0' && "Invalid method call");
1065  s++;
1066 
1067  int c1 = s[0];
1068 
1069  // Handle a hex number like 0x1234.
1070  if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
1071  s++;
1072  assert(s < ThisTokEnd && "didn't maximally munch?");
1073  radix = 16;
1074  DigitsBegin = s;
1075  s = SkipHexDigits(s);
1076  bool HasSignificandDigits = containsDigits(DigitsBegin, s);
1077  if (s == ThisTokEnd) {
1078  // Done.
1079  } else if (*s == '.') {
1080  s++;
1081  saw_period = true;
1082  const char *floatDigitsBegin = s;
1083  s = SkipHexDigits(s);
1084  if (containsDigits(floatDigitsBegin, s))
1085  HasSignificandDigits = true;
1086  if (HasSignificandDigits)
1087  checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
1088  }
1089 
1090  if (!HasSignificandDigits) {
1091  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1092  LangOpts),
1093  diag::err_hex_constant_requires)
1094  << LangOpts.CPlusPlus << 1;
1095  hadError = true;
1096  return;
1097  }
1098 
1099  // A binary exponent can appear with or with a '.'. If dotted, the
1100  // binary exponent is required.
1101  if (*s == 'p' || *s == 'P') {
1102  checkSeparator(TokLoc, s, CSK_AfterDigits);
1103  const char *Exponent = s;
1104  s++;
1105  saw_exponent = true;
1106  if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
1107  const char *first_non_digit = SkipDigits(s);
1108  if (!containsDigits(s, first_non_digit)) {
1109  if (!hadError) {
1111  TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1112  diag::err_exponent_has_no_digits);
1113  hadError = true;
1114  }
1115  return;
1116  }
1117  checkSeparator(TokLoc, s, CSK_BeforeDigits);
1118  s = first_non_digit;
1119 
1120  if (!LangOpts.HexFloats)
1121  Diags.Report(TokLoc, LangOpts.CPlusPlus
1122  ? diag::ext_hex_literal_invalid
1123  : diag::ext_hex_constant_invalid);
1124  else if (LangOpts.CPlusPlus17)
1125  Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
1126  } else if (saw_period) {
1127  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1128  LangOpts),
1129  diag::err_hex_constant_requires)
1130  << LangOpts.CPlusPlus << 0;
1131  hadError = true;
1132  }
1133  return;
1134  }
1135 
1136  // Handle simple binary numbers 0b01010
1137  if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
1138  // 0b101010 is a C++1y / GCC extension.
1139  Diags.Report(TokLoc, LangOpts.CPlusPlus14
1140  ? diag::warn_cxx11_compat_binary_literal
1141  : LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14
1142  : diag::ext_binary_literal);
1143  ++s;
1144  assert(s < ThisTokEnd && "didn't maximally munch?");
1145  radix = 2;
1146  DigitsBegin = s;
1147  s = SkipBinaryDigits(s);
1148  if (s == ThisTokEnd) {
1149  // Done.
1150  } else if (isHexDigit(*s) &&
1151  !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1152  Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1153  LangOpts),
1154  diag::err_invalid_digit)
1155  << StringRef(s, 1) << 2;
1156  hadError = true;
1157  }
1158  // Other suffixes will be diagnosed by the caller.
1159  return;
1160  }
1161 
1162  // For now, the radix is set to 8. If we discover that we have a
1163  // floating point constant, the radix will change to 10. Octal floating
1164  // point constants are not permitted (only decimal and hexadecimal).
1165  radix = 8;
1166  const char *PossibleNewDigitStart = s;
1167  s = SkipOctalDigits(s);
1168  // When the value is 0 followed by a suffix (like 0wb), we want to leave 0
1169  // as the start of the digits. So if skipping octal digits does not skip
1170  // anything, we leave the digit start where it was.
1171  if (s != PossibleNewDigitStart)
1172  DigitsBegin = PossibleNewDigitStart;
1173 
1174  if (s == ThisTokEnd)
1175  return; // Done, simple octal number like 01234
1176 
1177  // If we have some other non-octal digit that *is* a decimal digit, see if
1178  // this is part of a floating point number like 094.123 or 09e1.
1179  if (isDigit(*s)) {
1180  const char *EndDecimal = SkipDigits(s);
1181  if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
1182  s = EndDecimal;
1183  radix = 10;
1184  }
1185  }
1186 
1187  ParseDecimalOrOctalCommon(TokLoc);
1188 }
1189 
1190 static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
1191  switch (Radix) {
1192  case 2:
1193  return NumDigits <= 64;
1194  case 8:
1195  return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
1196  case 10:
1197  return NumDigits <= 19; // floor(log10(2^64))
1198  case 16:
1199  return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
1200  default:
1201  llvm_unreachable("impossible Radix");
1202  }
1203 }
1204 
1205 /// GetIntegerValue - Convert this numeric literal value to an APInt that
1206 /// matches Val's input width. If there is an overflow, set Val to the low bits
1207 /// of the result and return true. Otherwise, return false.
1209  // Fast path: Compute a conservative bound on the maximum number of
1210  // bits per digit in this radix. If we can't possibly overflow a
1211  // uint64 based on that bound then do the simple conversion to
1212  // integer. This avoids the expensive overflow checking below, and
1213  // handles the common cases that matter (small decimal integers and
1214  // hex/octal values which don't overflow).
1215  const unsigned NumDigits = SuffixBegin - DigitsBegin;
1216  if (alwaysFitsInto64Bits(radix, NumDigits)) {
1217  uint64_t N = 0;
1218  for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
1219  if (!isDigitSeparator(*Ptr))
1220  N = N * radix + llvm::hexDigitValue(*Ptr);
1221 
1222  // This will truncate the value to Val's input width. Simply check
1223  // for overflow by comparing.
1224  Val = N;
1225  return Val.getZExtValue() != N;
1226  }
1227 
1228  Val = 0;
1229  const char *Ptr = DigitsBegin;
1230 
1231  llvm::APInt RadixVal(Val.getBitWidth(), radix);
1232  llvm::APInt CharVal(Val.getBitWidth(), 0);
1233  llvm::APInt OldVal = Val;
1234 
1235  bool OverflowOccurred = false;
1236  while (Ptr < SuffixBegin) {
1237  if (isDigitSeparator(*Ptr)) {
1238  ++Ptr;
1239  continue;
1240  }
1241 
1242  unsigned C = llvm::hexDigitValue(*Ptr++);
1243 
1244  // If this letter is out of bound for this radix, reject it.
1245  assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1246 
1247  CharVal = C;
1248 
1249  // Add the digit to the value in the appropriate radix. If adding in digits
1250  // made the value smaller, then this overflowed.
1251  OldVal = Val;
1252 
1253  // Multiply by radix, did overflow occur on the multiply?
1254  Val *= RadixVal;
1255  OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
1256 
1257  // Add value, did overflow occur on the value?
1258  // (a + b) ult b <=> overflow
1259  Val += CharVal;
1260  OverflowOccurred |= Val.ult(CharVal);
1261  }
1262  return OverflowOccurred;
1263 }
1264 
1265 llvm::APFloat::opStatus
1266 NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
1267  using llvm::APFloat;
1268 
1269  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
1270 
1271  llvm::SmallString<16> Buffer;
1272  StringRef Str(ThisTokBegin, n);
1273  if (Str.contains('\'')) {
1274  Buffer.reserve(n);
1275  std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
1276  &isDigitSeparator);
1277  Str = Buffer;
1278  }
1279 
1280  auto StatusOrErr =
1281  Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
1282  assert(StatusOrErr && "Invalid floating point representation");
1283  return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr
1284  : APFloat::opInvalidOp;
1285 }
1286 
1287 static inline bool IsExponentPart(char c) {
1288  return c == 'p' || c == 'P' || c == 'e' || c == 'E';
1289 }
1290 
1292  assert(radix == 16 || radix == 10);
1293 
1294  // Find how many digits are needed to store the whole literal.
1295  unsigned NumDigits = SuffixBegin - DigitsBegin;
1296  if (saw_period) --NumDigits;
1297 
1298  // Initial scan of the exponent if it exists
1299  bool ExpOverflowOccurred = false;
1300  bool NegativeExponent = false;
1301  const char *ExponentBegin;
1302  uint64_t Exponent = 0;
1303  int64_t BaseShift = 0;
1304  if (saw_exponent) {
1305  const char *Ptr = DigitsBegin;
1306 
1307  while (!IsExponentPart(*Ptr)) ++Ptr;
1308  ExponentBegin = Ptr;
1309  ++Ptr;
1310  NegativeExponent = *Ptr == '-';
1311  if (NegativeExponent) ++Ptr;
1312 
1313  unsigned NumExpDigits = SuffixBegin - Ptr;
1314  if (alwaysFitsInto64Bits(radix, NumExpDigits)) {
1315  llvm::StringRef ExpStr(Ptr, NumExpDigits);
1316  llvm::APInt ExpInt(/*numBits=*/64, ExpStr, /*radix=*/10);
1317  Exponent = ExpInt.getZExtValue();
1318  } else {
1319  ExpOverflowOccurred = true;
1320  }
1321 
1322  if (NegativeExponent) BaseShift -= Exponent;
1323  else BaseShift += Exponent;
1324  }
1325 
1326  // Number of bits needed for decimal literal is
1327  // ceil(NumDigits * log2(10)) Integral part
1328  // + Scale Fractional part
1329  // + ceil(Exponent * log2(10)) Exponent
1330  // --------------------------------------------------
1331  // ceil((NumDigits + Exponent) * log2(10)) + Scale
1332  //
1333  // But for simplicity in handling integers, we can round up log2(10) to 4,
1334  // making:
1335  // 4 * (NumDigits + Exponent) + Scale
1336  //
1337  // Number of digits needed for hexadecimal literal is
1338  // 4 * NumDigits Integral part
1339  // + Scale Fractional part
1340  // + Exponent Exponent
1341  // --------------------------------------------------
1342  // (4 * NumDigits) + Scale + Exponent
1343  uint64_t NumBitsNeeded;
1344  if (radix == 10)
1345  NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
1346  else
1347  NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
1348 
1349  if (NumBitsNeeded > std::numeric_limits<unsigned>::max())
1350  ExpOverflowOccurred = true;
1351  llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /*isSigned=*/false);
1352 
1353  bool FoundDecimal = false;
1354 
1355  int64_t FractBaseShift = 0;
1356  const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
1357  for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
1358  if (*Ptr == '.') {
1359  FoundDecimal = true;
1360  continue;
1361  }
1362 
1363  // Normal reading of an integer
1364  unsigned C = llvm::hexDigitValue(*Ptr);
1365  assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1366 
1367  Val *= radix;
1368  Val += C;
1369 
1370  if (FoundDecimal)
1371  // Keep track of how much we will need to adjust this value by from the
1372  // number of digits past the radix point.
1373  --FractBaseShift;
1374  }
1375 
1376  // For a radix of 16, we will be multiplying by 2 instead of 16.
1377  if (radix == 16) FractBaseShift *= 4;
1378  BaseShift += FractBaseShift;
1379 
1380  Val <<= Scale;
1381 
1382  uint64_t Base = (radix == 16) ? 2 : 10;
1383  if (BaseShift > 0) {
1384  for (int64_t i = 0; i < BaseShift; ++i) {
1385  Val *= Base;
1386  }
1387  } else if (BaseShift < 0) {
1388  for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
1389  Val = Val.udiv(Base);
1390  }
1391 
1392  bool IntOverflowOccurred = false;
1393  auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
1394  if (Val.getBitWidth() > StoreVal.getBitWidth()) {
1395  IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth()));
1396  StoreVal = Val.trunc(StoreVal.getBitWidth());
1397  } else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
1398  IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
1399  StoreVal = Val.zext(StoreVal.getBitWidth());
1400  } else {
1401  StoreVal = Val;
1402  }
1403 
1404  return IntOverflowOccurred || ExpOverflowOccurred;
1405 }
1406 
1407 /// \verbatim
1408 /// user-defined-character-literal: [C++11 lex.ext]
1409 /// character-literal ud-suffix
1410 /// ud-suffix:
1411 /// identifier
1412 /// character-literal: [C++11 lex.ccon]
1413 /// ' c-char-sequence '
1414 /// u' c-char-sequence '
1415 /// U' c-char-sequence '
1416 /// L' c-char-sequence '
1417 /// u8' c-char-sequence ' [C++1z lex.ccon]
1418 /// c-char-sequence:
1419 /// c-char
1420 /// c-char-sequence c-char
1421 /// c-char:
1422 /// any member of the source character set except the single-quote ',
1423 /// backslash \, or new-line character
1424 /// escape-sequence
1425 /// universal-character-name
1426 /// escape-sequence:
1427 /// simple-escape-sequence
1428 /// octal-escape-sequence
1429 /// hexadecimal-escape-sequence
1430 /// simple-escape-sequence:
1431 /// one of \' \" \? \\ \a \b \f \n \r \t \v
1432 /// octal-escape-sequence:
1433 /// \ octal-digit
1434 /// \ octal-digit octal-digit
1435 /// \ octal-digit octal-digit octal-digit
1436 /// hexadecimal-escape-sequence:
1437 /// \x hexadecimal-digit
1438 /// hexadecimal-escape-sequence hexadecimal-digit
1439 /// universal-character-name: [C++11 lex.charset]
1440 /// \u hex-quad
1441 /// \U hex-quad hex-quad
1442 /// hex-quad:
1443 /// hex-digit hex-digit hex-digit hex-digit
1444 /// \endverbatim
1445 ///
1446 CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
1447  SourceLocation Loc, Preprocessor &PP,
1448  tok::TokenKind kind) {
1449  // At this point we know that the character matches the regex "(L|u|U)?'.*'".
1450  HadError = false;
1451 
1452  Kind = kind;
1453 
1454  const char *TokBegin = begin;
1455 
1456  // Skip over wide character determinant.
1457  if (Kind != tok::char_constant)
1458  ++begin;
1459  if (Kind == tok::utf8_char_constant)
1460  ++begin;
1461 
1462  // Skip over the entry quote.
1463  if (begin[0] != '\'') {
1464  PP.Diag(Loc, diag::err_lexing_char);
1465  HadError = true;
1466  return;
1467  }
1468 
1469  ++begin;
1470 
1471  // Remove an optional ud-suffix.
1472  if (end[-1] != '\'') {
1473  const char *UDSuffixEnd = end;
1474  do {
1475  --end;
1476  } while (end[-1] != '\'');
1477  // FIXME: Don't bother with this if !tok.hasUCN().
1478  expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
1479  UDSuffixOffset = end - TokBegin;
1480  }
1481 
1482  // Trim the ending quote.
1483  assert(end != begin && "Invalid token lexed");
1484  --end;
1485 
1486  // FIXME: The "Value" is an uint64_t so we can handle char literals of
1487  // up to 64-bits.
1488  // FIXME: This extensively assumes that 'char' is 8-bits.
1489  assert(PP.getTargetInfo().getCharWidth() == 8 &&
1490  "Assumes char is 8 bits");
1491  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
1492  (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
1493  "Assumes sizeof(int) on target is <= 64 and a multiple of char");
1494  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
1495  "Assumes sizeof(wchar) on target is <= 64");
1496 
1497  SmallVector<uint32_t, 4> codepoint_buffer;
1498  codepoint_buffer.resize(end - begin);
1499  uint32_t *buffer_begin = &codepoint_buffer.front();
1500  uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
1501 
1502  // Unicode escapes representing characters that cannot be correctly
1503  // represented in a single code unit are disallowed in character literals
1504  // by this implementation.
1505  uint32_t largest_character_for_kind;
1506  if (tok::wide_char_constant == Kind) {
1507  largest_character_for_kind =
1508  0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
1509  } else if (tok::utf8_char_constant == Kind) {
1510  largest_character_for_kind = 0x7F;
1511  } else if (tok::utf16_char_constant == Kind) {
1512  largest_character_for_kind = 0xFFFF;
1513  } else if (tok::utf32_char_constant == Kind) {
1514  largest_character_for_kind = 0x10FFFF;
1515  } else {
1516  largest_character_for_kind = 0x7Fu;
1517  }
1518 
1519  while (begin != end) {
1520  // Is this a span of non-escape characters?
1521  if (begin[0] != '\\') {
1522  char const *start = begin;
1523  do {
1524  ++begin;
1525  } while (begin != end && *begin != '\\');
1526 
1527  char const *tmp_in_start = start;
1528  uint32_t *tmp_out_start = buffer_begin;
1529  llvm::ConversionResult res =
1530  llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
1531  reinterpret_cast<llvm::UTF8 const *>(begin),
1532  &buffer_begin, buffer_end, llvm::strictConversion);
1533  if (res != llvm::conversionOK) {
1534  // If we see bad encoding for unprefixed character literals, warn and
1535  // simply copy the byte values, for compatibility with gcc and
1536  // older versions of clang.
1537  bool NoErrorOnBadEncoding = isAscii();
1538  unsigned Msg = diag::err_bad_character_encoding;
1539  if (NoErrorOnBadEncoding)
1540  Msg = diag::warn_bad_character_encoding;
1541  PP.Diag(Loc, Msg);
1542  if (NoErrorOnBadEncoding) {
1543  start = tmp_in_start;
1544  buffer_begin = tmp_out_start;
1545  for (; start != begin; ++start, ++buffer_begin)
1546  *buffer_begin = static_cast<uint8_t>(*start);
1547  } else {
1548  HadError = true;
1549  }
1550  } else {
1551  for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
1552  if (*tmp_out_start > largest_character_for_kind) {
1553  HadError = true;
1554  PP.Diag(Loc, diag::err_character_too_large);
1555  }
1556  }
1557  }
1558 
1559  continue;
1560  }
1561  // Is this a Universal Character Name escape?
1562  if (begin[1] == 'u' || begin[1] == 'U') {
1563  unsigned short UcnLen = 0;
1564  if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
1565  FullSourceLoc(Loc, PP.getSourceManager()),
1566  &PP.getDiagnostics(), PP.getLangOpts(), true)) {
1567  HadError = true;
1568  } else if (*buffer_begin > largest_character_for_kind) {
1569  HadError = true;
1570  PP.Diag(Loc, diag::err_character_too_large);
1571  }
1572 
1573  ++buffer_begin;
1574  continue;
1575  }
1576  unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
1577  uint64_t result =
1578  ProcessCharEscape(TokBegin, begin, end, HadError,
1579  FullSourceLoc(Loc,PP.getSourceManager()),
1580  CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
1581  *buffer_begin++ = result;
1582  }
1583 
1584  unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
1585 
1586  if (NumCharsSoFar > 1) {
1587  if (isAscii() && NumCharsSoFar == 4)
1588  PP.Diag(Loc, diag::warn_four_char_character_literal);
1589  else if (isAscii())
1590  PP.Diag(Loc, diag::warn_multichar_character_literal);
1591  else {
1592  PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1593  HadError = true;
1594  }
1595  IsMultiChar = true;
1596  } else {
1597  IsMultiChar = false;
1598  }
1599 
1600  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
1601 
1602  // Narrow character literals act as though their value is concatenated
1603  // in this implementation, but warn on overflow.
1604  bool multi_char_too_long = false;
1605  if (isAscii() && isMultiChar()) {
1606  LitVal = 0;
1607  for (size_t i = 0; i < NumCharsSoFar; ++i) {
1608  // check for enough leading zeros to shift into
1609  multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
1610  LitVal <<= 8;
1611  LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1612  }
1613  } else if (NumCharsSoFar > 0) {
1614  // otherwise just take the last character
1615  LitVal = buffer_begin[-1];
1616  }
1617 
1618  if (!HadError && multi_char_too_long) {
1619  PP.Diag(Loc, diag::warn_char_constant_too_large);
1620  }
1621 
1622  // Transfer the value from APInt to uint64_t
1623  Value = LitVal.getZExtValue();
1624 
1625  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
1626  // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
1627  // character constants are not sign extended in the this implementation:
1628  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
1629  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
1630  PP.getLangOpts().CharIsSigned)
1631  Value = (signed char)Value;
1632 }
1633 
1634 /// \verbatim
1635 /// string-literal: [C++0x lex.string]
1636 /// encoding-prefix " [s-char-sequence] "
1637 /// encoding-prefix R raw-string
1638 /// encoding-prefix:
1639 /// u8
1640 /// u
1641 /// U
1642 /// L
1643 /// s-char-sequence:
1644 /// s-char
1645 /// s-char-sequence s-char
1646 /// s-char:
1647 /// any member of the source character set except the double-quote ",
1648 /// backslash \, or new-line character
1649 /// escape-sequence
1650 /// universal-character-name
1651 /// raw-string:
1652 /// " d-char-sequence ( r-char-sequence ) d-char-sequence "
1653 /// r-char-sequence:
1654 /// r-char
1655 /// r-char-sequence r-char
1656 /// r-char:
1657 /// any member of the source character set, except a right parenthesis )
1658 /// followed by the initial d-char-sequence (which may be empty)
1659 /// followed by a double quote ".
1660 /// d-char-sequence:
1661 /// d-char
1662 /// d-char-sequence d-char
1663 /// d-char:
1664 /// any member of the basic source character set except:
1665 /// space, the left parenthesis (, the right parenthesis ),
1666 /// the backslash \, and the control characters representing horizontal
1667 /// tab, vertical tab, form feed, and newline.
1668 /// escape-sequence: [C++0x lex.ccon]
1669 /// simple-escape-sequence
1670 /// octal-escape-sequence
1671 /// hexadecimal-escape-sequence
1672 /// simple-escape-sequence:
1673 /// one of \' \" \? \\ \a \b \f \n \r \t \v
1674 /// octal-escape-sequence:
1675 /// \ octal-digit
1676 /// \ octal-digit octal-digit
1677 /// \ octal-digit octal-digit octal-digit
1678 /// hexadecimal-escape-sequence:
1679 /// \x hexadecimal-digit
1680 /// hexadecimal-escape-sequence hexadecimal-digit
1681 /// universal-character-name:
1682 /// \u hex-quad
1683 /// \U hex-quad hex-quad
1684 /// hex-quad:
1685 /// hex-digit hex-digit hex-digit hex-digit
1686 /// \endverbatim
1687 ///
1690  Preprocessor &PP)
1691  : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1692  Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
1693  MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
1694  ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
1695  init(StringToks);
1696 }
1697 
1698 void StringLiteralParser::init(ArrayRef<Token> StringToks){
1699  // The literal token may have come from an invalid source location (e.g. due
1700  // to a PCH error), in which case the token length will be 0.
1701  if (StringToks.empty() || StringToks[0].getLength() < 2)
1702  return DiagnoseLexingError(SourceLocation());
1703 
1704  // Scan all of the string portions, remember the max individual token length,
1705  // computing a bound on the concatenated string length, and see whether any
1706  // piece is a wide-string. If any of the string portions is a wide-string
1707  // literal, the result is a wide-string literal [C99 6.4.5p4].
1708  assert(!StringToks.empty() && "expected at least one token");
1709  MaxTokenLength = StringToks[0].getLength();
1710  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
1711  SizeBound = StringToks[0].getLength()-2; // -2 for "".
1712  Kind = StringToks[0].getKind();
1713 
1714  hadError = false;
1715 
1716  // Implement Translation Phase #6: concatenation of string literals
1717  /// (C99 5.1.1.2p1). The common case is only one string fragment.
1718  for (unsigned i = 1; i != StringToks.size(); ++i) {
1719  if (StringToks[i].getLength() < 2)
1720  return DiagnoseLexingError(StringToks[i].getLocation());
1721 
1722  // The string could be shorter than this if it needs cleaning, but this is a
1723  // reasonable bound, which is all we need.
1724  assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
1725  SizeBound += StringToks[i].getLength()-2; // -2 for "".
1726 
1727  // Remember maximum string piece length.
1728  if (StringToks[i].getLength() > MaxTokenLength)
1729  MaxTokenLength = StringToks[i].getLength();
1730 
1731  // Remember if we see any wide or utf-8/16/32 strings.
1732  // Also check for illegal concatenations.
1733  if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
1734  if (isAscii()) {
1735  Kind = StringToks[i].getKind();
1736  } else {
1737  if (Diags)
1738  Diags->Report(StringToks[i].getLocation(),
1739  diag::err_unsupported_string_concat);
1740  hadError = true;
1741  }
1742  }
1743  }
1744 
1745  // Include space for the null terminator.
1746  ++SizeBound;
1747 
1748  // TODO: K&R warning: "traditional C rejects string constant concatenation"
1749 
1750  // Get the width in bytes of char/wchar_t/char16_t/char32_t
1751  CharByteWidth = getCharWidth(Kind, Target);
1752  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
1753  CharByteWidth /= 8;
1754 
1755  // The output buffer size needs to be large enough to hold wide characters.
1756  // This is a worst-case assumption which basically corresponds to L"" "long".
1757  SizeBound *= CharByteWidth;
1758 
1759  // Size the temporary buffer to hold the result string data.
1760  ResultBuf.resize(SizeBound);
1761 
1762  // Likewise, but for each string piece.
1763  SmallString<512> TokenBuf;
1764  TokenBuf.resize(MaxTokenLength);
1765 
1766  // Loop over all the strings, getting their spelling, and expanding them to
1767  // wide strings as appropriate.
1768  ResultPtr = &ResultBuf[0]; // Next byte to fill in.
1769 
1770  Pascal = false;
1771 
1772  SourceLocation UDSuffixTokLoc;
1773 
1774  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
1775  const char *ThisTokBuf = &TokenBuf[0];
1776  // Get the spelling of the token, which eliminates trigraphs, etc. We know
1777  // that ThisTokBuf points to a buffer that is big enough for the whole token
1778  // and 'spelled' tokens can only shrink.
1779  bool StringInvalid = false;
1780  unsigned ThisTokLen =
1781  Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1782  &StringInvalid);
1783  if (StringInvalid)
1784  return DiagnoseLexingError(StringToks[i].getLocation());
1785 
1786  const char *ThisTokBegin = ThisTokBuf;
1787  const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
1788 
1789  // Remove an optional ud-suffix.
1790  if (ThisTokEnd[-1] != '"') {
1791  const char *UDSuffixEnd = ThisTokEnd;
1792  do {
1793  --ThisTokEnd;
1794  } while (ThisTokEnd[-1] != '"');
1795 
1796  StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
1797 
1798  if (UDSuffixBuf.empty()) {
1799  if (StringToks[i].hasUCN())
1800  expandUCNs(UDSuffixBuf, UDSuffix);
1801  else
1802  UDSuffixBuf.assign(UDSuffix);
1803  UDSuffixToken = i;
1804  UDSuffixOffset = ThisTokEnd - ThisTokBuf;
1805  UDSuffixTokLoc = StringToks[i].getLocation();
1806  } else {
1807  SmallString<32> ExpandedUDSuffix;
1808  if (StringToks[i].hasUCN()) {
1809  expandUCNs(ExpandedUDSuffix, UDSuffix);
1810  UDSuffix = ExpandedUDSuffix;
1811  }
1812 
1813  // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
1814  // result of a concatenation involving at least one user-defined-string-
1815  // literal, all the participating user-defined-string-literals shall
1816  // have the same ud-suffix.
1817  if (UDSuffixBuf != UDSuffix) {
1818  if (Diags) {
1819  SourceLocation TokLoc = StringToks[i].getLocation();
1820  Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
1821  << UDSuffixBuf << UDSuffix
1822  << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
1823  << SourceRange(TokLoc, TokLoc);
1824  }
1825  hadError = true;
1826  }
1827  }
1828  }
1829 
1830  // Strip the end quote.
1831  --ThisTokEnd;
1832 
1833  // TODO: Input character set mapping support.
1834 
1835  // Skip marker for wide or unicode strings.
1836  if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
1837  ++ThisTokBuf;
1838  // Skip 8 of u8 marker for utf8 strings.
1839  if (ThisTokBuf[0] == '8')
1840  ++ThisTokBuf;
1841  }
1842 
1843  // Check for raw string
1844  if (ThisTokBuf[0] == 'R') {
1845  if (ThisTokBuf[1] != '"') {
1846  // The file may have come from PCH and then changed after loading the
1847  // PCH; Fail gracefully.
1848  return DiagnoseLexingError(StringToks[i].getLocation());
1849  }
1850  ThisTokBuf += 2; // skip R"
1851 
1852  // C++11 [lex.string]p2: A `d-char-sequence` shall consist of at most 16
1853  // characters.
1854  constexpr unsigned MaxRawStrDelimLen = 16;
1855 
1856  const char *Prefix = ThisTokBuf;
1857  while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&
1858  ThisTokBuf[0] != '(')
1859  ++ThisTokBuf;
1860  if (ThisTokBuf[0] != '(')
1861  return DiagnoseLexingError(StringToks[i].getLocation());
1862  ++ThisTokBuf; // skip '('
1863 
1864  // Remove same number of characters from the end
1865  ThisTokEnd -= ThisTokBuf - Prefix;
1866  if (ThisTokEnd < ThisTokBuf)
1867  return DiagnoseLexingError(StringToks[i].getLocation());
1868 
1869  // C++14 [lex.string]p4: A source-file new-line in a raw string literal
1870  // results in a new-line in the resulting execution string-literal.
1871  StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
1872  while (!RemainingTokenSpan.empty()) {
1873  // Split the string literal on \r\n boundaries.
1874  size_t CRLFPos = RemainingTokenSpan.find("\r\n");
1875  StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
1876  StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
1877 
1878  // Copy everything before the \r\n sequence into the string literal.
1879  if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
1880  hadError = true;
1881 
1882  // Point into the \n inside the \r\n sequence and operate on the
1883  // remaining portion of the literal.
1884  RemainingTokenSpan = AfterCRLF.substr(1);
1885  }
1886  } else {
1887  if (ThisTokBuf[0] != '"') {
1888  // The file may have come from PCH and then changed after loading the
1889  // PCH; Fail gracefully.
1890  return DiagnoseLexingError(StringToks[i].getLocation());
1891  }
1892  ++ThisTokBuf; // skip "
1893 
1894  // Check if this is a pascal string
1895  if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
1896  ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
1897 
1898  // If the \p sequence is found in the first token, we have a pascal string
1899  // Otherwise, if we already have a pascal string, ignore the first \p
1900  if (i == 0) {
1901  ++ThisTokBuf;
1902  Pascal = true;
1903  } else if (Pascal)
1904  ThisTokBuf += 2;
1905  }
1906 
1907  while (ThisTokBuf != ThisTokEnd) {
1908  // Is this a span of non-escape characters?
1909  if (ThisTokBuf[0] != '\\') {
1910  const char *InStart = ThisTokBuf;
1911  do {
1912  ++ThisTokBuf;
1913  } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
1914 
1915  // Copy the character span over.
1916  if (CopyStringFragment(StringToks[i], ThisTokBegin,
1917  StringRef(InStart, ThisTokBuf - InStart)))
1918  hadError = true;
1919  continue;
1920  }
1921  // Is this a Universal Character Name escape?
1922  if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
1923  EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
1924  ResultPtr, hadError,
1925  FullSourceLoc(StringToks[i].getLocation(), SM),
1926  CharByteWidth, Diags, Features);
1927  continue;
1928  }
1929  // Otherwise, this is a non-UCN escape character. Process it.
1930  unsigned ResultChar =
1931  ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
1932  FullSourceLoc(StringToks[i].getLocation(), SM),
1933  CharByteWidth*8, Diags, Features);
1934 
1935  if (CharByteWidth == 4) {
1936  // FIXME: Make the type of the result buffer correct instead of
1937  // using reinterpret_cast.
1938  llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
1939  *ResultWidePtr = ResultChar;
1940  ResultPtr += 4;
1941  } else if (CharByteWidth == 2) {
1942  // FIXME: Make the type of the result buffer correct instead of
1943  // using reinterpret_cast.
1944  llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
1945  *ResultWidePtr = ResultChar & 0xFFFF;
1946  ResultPtr += 2;
1947  } else {
1948  assert(CharByteWidth == 1 && "Unexpected char width");
1949  *ResultPtr++ = ResultChar & 0xFF;
1950  }
1951  }
1952  }
1953  }
1954 
1955  if (Pascal) {
1956  if (CharByteWidth == 4) {
1957  // FIXME: Make the type of the result buffer correct instead of
1958  // using reinterpret_cast.
1959  llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
1960  ResultWidePtr[0] = GetNumStringChars() - 1;
1961  } else if (CharByteWidth == 2) {
1962  // FIXME: Make the type of the result buffer correct instead of
1963  // using reinterpret_cast.
1964  llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
1965  ResultWidePtr[0] = GetNumStringChars() - 1;
1966  } else {
1967  assert(CharByteWidth == 1 && "Unexpected char width");
1968  ResultBuf[0] = GetNumStringChars() - 1;
1969  }
1970 
1971  // Verify that pascal strings aren't too large.
1972  if (GetStringLength() > 256) {
1973  if (Diags)
1974  Diags->Report(StringToks.front().getLocation(),
1975  diag::err_pascal_string_too_long)
1976  << SourceRange(StringToks.front().getLocation(),
1977  StringToks.back().getLocation());
1978  hadError = true;
1979  return;
1980  }
1981  } else if (Diags) {
1982  // Complain if this string literal has too many characters.
1983  unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
1984 
1985  if (GetNumStringChars() > MaxChars)
1986  Diags->Report(StringToks.front().getLocation(),
1987  diag::ext_string_too_long)
1988  << GetNumStringChars() << MaxChars
1989  << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
1990  << SourceRange(StringToks.front().getLocation(),
1991  StringToks.back().getLocation());
1992  }
1993 }
1994 
1995 static const char *resyncUTF8(const char *Err, const char *End) {
1996  if (Err == End)
1997  return End;
1998  End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
1999  while (++Err != End && (*Err & 0xC0) == 0x80)
2000  ;
2001  return Err;
2002 }
2003 
2004 /// This function copies from Fragment, which is a sequence of bytes
2005 /// within Tok's contents (which begin at TokBegin) into ResultPtr.
2006 /// Performs widening for multi-byte characters.
2007 bool StringLiteralParser::CopyStringFragment(const Token &Tok,
2008  const char *TokBegin,
2009  StringRef Fragment) {
2010  const llvm::UTF8 *ErrorPtrTmp;
2011  if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
2012  return false;
2013 
2014  // If we see bad encoding for unprefixed string literals, warn and
2015  // simply copy the byte values, for compatibility with gcc and older
2016  // versions of clang.
2017  bool NoErrorOnBadEncoding = isAscii();
2018  if (NoErrorOnBadEncoding) {
2019  memcpy(ResultPtr, Fragment.data(), Fragment.size());
2020  ResultPtr += Fragment.size();
2021  }
2022 
2023  if (Diags) {
2024  const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2025 
2026  FullSourceLoc SourceLoc(Tok.getLocation(), SM);
2027  const DiagnosticBuilder &Builder =
2028  Diag(Diags, Features, SourceLoc, TokBegin,
2029  ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
2030  NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
2031  : diag::err_bad_string_encoding);
2032 
2033  const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2034  StringRef NextFragment(NextStart, Fragment.end()-NextStart);
2035 
2036  // Decode into a dummy buffer.
2037  SmallString<512> Dummy;
2038  Dummy.reserve(Fragment.size() * CharByteWidth);
2039  char *Ptr = Dummy.data();
2040 
2041  while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
2042  const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2043  NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2044  Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
2045  ErrorPtr, NextStart);
2046  NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
2047  }
2048  }
2049  return !NoErrorOnBadEncoding;
2050 }
2051 
2052 void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
2053  hadError = true;
2054  if (Diags)
2055  Diags->Report(Loc, diag::err_lexing_string);
2056 }
2057 
2058 /// getOffsetOfStringByte - This function returns the offset of the
2059 /// specified byte of the string data represented by Token. This handles
2060 /// advancing over escape sequences in the string.
2062  unsigned ByteNo) const {
2063  // Get the spelling of the token.
2064  SmallString<32> SpellingBuffer;
2065  SpellingBuffer.resize(Tok.getLength());
2066 
2067  bool StringInvalid = false;
2068  const char *SpellingPtr = &SpellingBuffer[0];
2069  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
2070  &StringInvalid);
2071  if (StringInvalid)
2072  return 0;
2073 
2074  const char *SpellingStart = SpellingPtr;
2075  const char *SpellingEnd = SpellingPtr+TokLen;
2076 
2077  // Handle UTF-8 strings just like narrow strings.
2078  if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
2079  SpellingPtr += 2;
2080 
2081  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
2082  SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
2083 
2084  // For raw string literals, this is easy.
2085  if (SpellingPtr[0] == 'R') {
2086  assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
2087  // Skip 'R"'.
2088  SpellingPtr += 2;
2089  while (*SpellingPtr != '(') {
2090  ++SpellingPtr;
2091  assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
2092  }
2093  // Skip '('.
2094  ++SpellingPtr;
2095  return SpellingPtr - SpellingStart + ByteNo;
2096  }
2097 
2098  // Skip over the leading quote
2099  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
2100  ++SpellingPtr;
2101 
2102  // Skip over bytes until we find the offset we're looking for.
2103  while (ByteNo) {
2104  assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
2105 
2106  // Step over non-escapes simply.
2107  if (*SpellingPtr != '\\') {
2108  ++SpellingPtr;
2109  --ByteNo;
2110  continue;
2111  }
2112 
2113  // Otherwise, this is an escape character. Advance over it.
2114  bool HadError = false;
2115  if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') {
2116  const char *EscapePtr = SpellingPtr;
2117  unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
2118  1, Features, HadError);
2119  if (Len > ByteNo) {
2120  // ByteNo is somewhere within the escape sequence.
2121  SpellingPtr = EscapePtr;
2122  break;
2123  }
2124  ByteNo -= Len;
2125  } else {
2126  ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
2127  FullSourceLoc(Tok.getLocation(), SM),
2128  CharByteWidth*8, Diags, Features);
2129  --ByteNo;
2130  }
2131  assert(!HadError && "This method isn't valid on erroneous strings");
2132  }
2133 
2134  return SpellingPtr-SpellingStart;
2135 }
2136 
2137 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
2138 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
2139 /// treat it as an invalid suffix.
2141  StringRef Suffix) {
2142  return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) ||
2143  Suffix == "sv";
2144 }
clang::NumericLiteralParser::isLong
bool isLong
Definition: LiteralSupport.h:64
max
__DEVICE__ int max(int __a, int __b)
Definition: __clang_cuda_math.h:196
clang::FullSourceLoc::getManager
const SourceManager & getManager() const
Definition: SourceLocation.h:382
clang::isPrintable
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
Definition: CharInfo.h:145
clang::CharLiteralParser::CharLiteralParser
CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, tok::TokenKind kind)
Definition: LiteralSupport.cpp:1446
clang::interp::APInt
llvm::APInt APInt
Definition: Integral.h:27
clang::DiagnosticBuilder
A little helper class used to produce diagnostics.
Definition: Diagnostic.h:1265
MakeCharSourceRange
static CharSourceRange MakeCharSourceRange(const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd)
Definition: LiteralSupport.cpp:59
clang::FullSourceLoc
A SourceLocation and its associated SourceManager.
Definition: SourceLocation.h:368
clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:210
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
Diag
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Definition: LiteralSupport.cpp:78
clang::Lexer::getSpelling
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition: Lexer.cpp:405
llvm::SmallVector< uint32_t, 4 >
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:86
clang::NumericLiteralParser::isImaginary
bool isImaginary
Definition: LiteralSupport.h:69
TargetInfo.h
clang::MultiVersionKind::Target
@ Target
clang::DiagnosticsEngine
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
getCharWidth
static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target)
Definition: LiteralSupport.cpp:39
clang::TargetInfo
Exposes information about the current target.
Definition: TargetInfo.h:186
memcpy
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Definition: __clang_cuda_device_functions.h:1549
clang::TargetInfo::getCharWidth
unsigned getCharWidth() const
Definition: TargetInfo.h:441
clang::StringLiteralParser::hadError
bool hadError
Definition: LiteralSupport.h:247
clang::NumericLiteralParser::isValidUDSuffix
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Definition: LiteralSupport.cpp:1015
MeasureUCNEscape
static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, unsigned CharByteWidth, const LangOptions &Features, bool &HadError)
MeasureUCNEscape - Determine the number of bytes within the resulting string which this UCN will occu...
Definition: LiteralSupport.cpp:505
clang::NumericLiteralParser::NumericLiteralParser
NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, const SourceManager &SM, const LangOptions &LangOpts, const TargetInfo &Target, DiagnosticsEngine &Diags)
integer-constant: [C99 6.4.4.1] decimal-constant integer-suffix octal-constant integer-suffix hexadec...
Definition: LiteralSupport.cpp:685
clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
LiteralSupport.h
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::StringLiteralParser::isValidUDSuffix
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Definition: LiteralSupport.cpp:2140
appendCodePoint
static void appendCodePoint(unsigned Codepoint, llvm::SmallVectorImpl< char > &Str)
Definition: LiteralSupport.cpp:319
clang::isPreprocessingNumberBody
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
Definition: CharInfo.h:153
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:627
clang::Preprocessor::getLangOpts
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:994
Preprocessor.h
alwaysFitsInto64Bits
static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits)
Definition: LiteralSupport.cpp:1190
clang::StringLiteralParser::isAscii
bool isAscii() const
Definition: LiteralSupport.h:266
clang::NumericLiteralParser::isSizeT
bool isSizeT
Definition: LiteralSupport.h:66
U
clang::StringLiteralParser::getOffsetOfStringByte
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const
getOffsetOfStringByte - This function returns the offset of the specified byte of the string data rep...
Definition: LiteralSupport.cpp:2061
clang::CharSourceRange::getCharRange
static CharSourceRange getCharRange(SourceRange R)
Definition: SourceLocation.h:265
min
__DEVICE__ int min(int __a, int __b)
Definition: __clang_cuda_math.h:197
clang::CharLiteralParser::isAscii
bool isAscii() const
Definition: LiteralSupport.h:201
ProcessCharEscape
static unsigned ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
ProcessCharEscape - Parse a standard C escape sequence, which can occur in either a character or a st...
Definition: LiteralSupport.cpp:91
LangOptions.h
clang::NumericLiteralParser::isFloat
bool isFloat
Definition: LiteralSupport.h:68
LexDiagnostic.h
clang::Preprocessor::Diag
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Definition: Preprocessor.h:1806
clang::TargetInfo::getWCharWidth
unsigned getWCharWidth() const
getWCharWidth/Align - Return the size of 'wchar_t' for this target, in bits.
Definition: TargetInfo.h:657
clang::Preprocessor::getTargetInfo
const TargetInfo & getTargetInfo() const
Definition: Preprocessor.h:995
clang::NumericLiteralParser::isLongLong
bool isLongLong
Definition: LiteralSupport.h:65
llvm::SmallString< 16 >
clang::StringLiteralParser::Pascal
bool Pascal
Definition: LiteralSupport.h:248
clang::StringLiteralParser::GetStringLength
unsigned GetStringLength() const
Definition: LiteralSupport.h:253
Base
clang::StringLiteralParser::StringLiteralParser
StringLiteralParser(ArrayRef< Token > StringToks, Preprocessor &PP)
Definition: LiteralSupport.cpp:1689
ProcessUCNEscape
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)
ProcessUCNEscape - Read the Universal Character Name, check constraints and return the UTF32.
Definition: LiteralSupport.cpp:375
clang::NumericLiteralParser::isUnsigned
bool isUnsigned
Definition: LiteralSupport.h:63
EncodeUCNEscape
static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, FullSourceLoc Loc, unsigned CharByteWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
EncodeUCNEscape - Read the Universal Character Name, check constraints and convert the UTF32 to UTF8 ...
Definition: LiteralSupport.cpp:540
clang::Token::getLength
unsigned getLength() const
Definition: Token.h:128
clang::NumericLiteralParser::GetFixedPointValue
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale)
GetFixedPointValue - Convert this numeric literal value into a scaled integer that represents this va...
Definition: LiteralSupport.cpp:1291
clang::NumericLiteralParser::isAccum
bool isAccum
Definition: LiteralSupport.h:73
IsExponentPart
static bool IsExponentPart(char c)
Definition: LiteralSupport.cpp:1287
clang::isHexDigit
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
Definition: CharInfo.h:129
clang::Preprocessor::getSourceManager
SourceManager & getSourceManager() const
Definition: Preprocessor.h:998
clang::NumericLiteralParser::hadError
bool hadError
Definition: LiteralSupport.h:62
resyncUTF8
static const char * resyncUTF8(const char *Err, const char *End)
Definition: LiteralSupport.cpp:1995
clang::expandUCNs
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
Definition: LiteralSupport.cpp:327
SourceLocation.h
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
CharInfo.h
false
#define false
Definition: stdbool.h:22
clang::NumericLiteralParser::MicrosoftInteger
uint8_t MicrosoftInteger
Definition: LiteralSupport.h:75
clang::NumericLiteralParser::isFloat16
bool isFloat16
Definition: LiteralSupport.h:70
clang::NumericLiteralParser::isFract
bool isFract
Definition: LiteralSupport.h:72
Begin
SourceLocation Begin
Definition: USRLocFinder.cpp:165
llvm::ArrayRef
Definition: LLVM.h:34
Lexer.h
Value
Value
Definition: UninitializedValues.cpp:102
clang::CharSourceRange
Represents a character-granular source range.
Definition: SourceLocation.h:253
clang::NumericLiteralParser::GetIntegerValue
bool GetIntegerValue(llvm::APInt &Val)
GetIntegerValue - Convert this numeric literal value to an APInt that matches Val's input width.
Definition: LiteralSupport.cpp:1208
clang::NumericLiteralParser::isIntegerLiteral
bool isIntegerLiteral() const
Definition: LiteralSupport.h:82
clang::NumericLiteralParser::isFloatingLiteral
bool isFloatingLiteral() const
Definition: LiteralSupport.h:85
clang::CharLiteralParser::isWide
bool isWide() const
Definition: LiteralSupport.h:202
clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:125
clang::LangOptions
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:78
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::NumericLiteralParser::GetFloatValue
llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result)
GetFloatValue - Convert this numeric literal to a floating value, using the specified APFloat fltSema...
Definition: LiteralSupport.cpp:1266
clang
Definition: CalledOnceCheck.h:17
clang::NumericLiteralParser::isFloat128
bool isFloat128
Definition: LiteralSupport.h:71
clang::NumericLiteralParser::isHalf
bool isHalf
Definition: LiteralSupport.h:67
clang::CharLiteralParser::isMultiChar
bool isMultiChar() const
Definition: LiteralSupport.h:206
clang::Preprocessor::getDiagnostics
DiagnosticsEngine & getDiagnostics() const
Definition: Preprocessor.h:991
clang::isDigit
LLVM_READONLY bool isDigit(unsigned char c)
Return true if this character is an ASCII digit: [0-9].
Definition: CharInfo.h:99
clang::NumericLiteralParser::isBitInt
bool isBitInt
Definition: LiteralSupport.h:74
c
__device__ __2f16 float c
Definition: __clang_hip_libdevice_declares.h:315
clang::Lexer::AdvanceToTokenCharacter
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition: Lexer.h:380
llvm::SmallVectorImpl< char >
clang::Preprocessor
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:131
SM
#define SM(sm)
Definition: Cuda.cpp:81
Token.h
clang::NumericLiteralParser::isFixedPointLiteral
bool isFixedPointLiteral() const
Definition: LiteralSupport.h:78
clang::diag::kind
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:62
clang::TargetInfo::getIntWidth
unsigned getIntWidth() const
getIntWidth/Align - Return the size of 'signed int' and 'unsigned int' for this target,...
Definition: TargetInfo.h:454
clang::DiagnosticsEngine::Report
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1536
clang::StringLiteralParser::GetNumStringChars
unsigned GetNumStringChars() const
Definition: LiteralSupport.h:255