clang 23.0.0git
TokenLexer.cpp
Go to the documentation of this file.
1//===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the TokenLexer interface.
10//
11//===----------------------------------------------------------------------===//
12
21#include "clang/Lex/Lexer.h"
22#include "clang/Lex/MacroArgs.h"
23#include "clang/Lex/MacroInfo.h"
25#include "clang/Lex/Token.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/iterator_range.h"
31#include <cassert>
32#include <cstring>
33#include <optional>
34
35using namespace clang;
36
37/// Create a TokenLexer for the specified macro with the specified actual
38/// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
40 MacroArgs *Actuals) {
41 // If the client is reusing a TokenLexer, make sure to free any memory
42 // associated with it.
43 destroy();
44
45 Macro = MI;
46 ActualArgs = Actuals;
47 CurTokenIdx = 0;
48
49 ExpandLocStart = Tok.getLocation();
50 ExpandLocEnd = ELEnd;
51 AtStartOfLine = Tok.isAtStartOfLine();
52 HasLeadingSpace = Tok.hasLeadingSpace();
53 NextTokGetsSpace = false;
54 Tokens = &*Macro->tokens_begin();
55 OwnsTokens = false;
56 DisableMacroExpansion = false;
57 IsReinject = false;
58 NumTokens = Macro->tokens_end()-Macro->tokens_begin();
59 MacroExpansionStart = SourceLocation();
60 LexingCXXModuleDirective = false;
61
62 SourceManager &SM = PP.getSourceManager();
63 MacroStartSLocOffset = SM.getNextLocalOffset();
64
65 if (NumTokens > 0) {
66 assert(Tokens[0].getLocation().isValid());
67 assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
68 "Macro defined in macro?");
69 assert(ExpandLocStart.isValid());
70
71 // Reserve a source location entry chunk for the length of the macro
72 // definition. Tokens that get lexed directly from the definition will
73 // have their locations pointing inside this chunk. This is to avoid
74 // creating separate source location entries for each token.
75 MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
76 MacroDefLength = Macro->getDefinitionLength(SM);
77 MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
78 ExpandLocStart,
79 ExpandLocEnd,
80 MacroDefLength);
81 }
82
83 // If this is a function-like macro, expand the arguments and change
84 // Tokens to point to the expanded tokens.
85 if (Macro->isFunctionLike() && Macro->getNumParams())
86 ExpandFunctionArguments();
87
88 // Mark the macro as currently disabled, so that it is not recursively
89 // expanded. The macro must be disabled only after argument pre-expansion of
90 // function-like macro arguments occurs.
91 Macro->DisableMacro();
92}
93
94/// Create a TokenLexer for the specified token stream. This does not
95/// take ownership of the specified token vector.
96void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
97 bool disableMacroExpansion, bool ownsTokens,
98 bool isReinject) {
99 assert(!isReinject || disableMacroExpansion);
100 // If the client is reusing a TokenLexer, make sure to free any memory
101 // associated with it.
102 destroy();
103
104 Macro = nullptr;
105 ActualArgs = nullptr;
106 Tokens = TokArray;
107 OwnsTokens = ownsTokens;
108 DisableMacroExpansion = disableMacroExpansion;
109 IsReinject = isReinject;
110 NumTokens = NumToks;
111 CurTokenIdx = 0;
112 ExpandLocStart = ExpandLocEnd = SourceLocation();
113 AtStartOfLine = false;
114 HasLeadingSpace = false;
115 NextTokGetsSpace = false;
116 MacroExpansionStart = SourceLocation();
117 LexingCXXModuleDirective = false;
118
119 // Set HasLeadingSpace/AtStartOfLine so that the first token will be
120 // returned unmodified.
121 if (NumToks != 0) {
122 AtStartOfLine = TokArray[0].isAtStartOfLine();
123 HasLeadingSpace = TokArray[0].hasLeadingSpace();
124 }
125}
126
127void TokenLexer::destroy() {
128 // If this was a function-like macro that actually uses its arguments, delete
129 // the expanded tokens.
130 if (OwnsTokens) {
131 delete [] Tokens;
132 Tokens = nullptr;
133 OwnsTokens = false;
134 }
135
136 // TokenLexer owns its formal arguments.
137 if (ActualArgs) ActualArgs->destroy(PP);
138}
139
140bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
141 SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
142 unsigned MacroArgNo, Preprocessor &PP) {
143 // Is the macro argument __VA_ARGS__?
144 if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1)
145 return false;
146
147 // In Microsoft-compatibility mode, a comma is removed in the expansion
148 // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
149 // not supported by gcc.
150 if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
151 return false;
152
153 // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
154 // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
155 // named arguments, where it remains. In all other modes, including C99
156 // with GNU extensions, it is removed regardless of named arguments.
157 // Microsoft also appears to support this extension, unofficially.
158 if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
159 && Macro->getNumParams() < 2)
160 return false;
161
162 // Is a comma available to be removed?
163 if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
164 return false;
165
166 // Issue an extension diagnostic for the paste operator.
167 if (HasPasteOperator)
168 PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
169
170 // Remove the comma.
171 ResultToks.pop_back();
172
173 if (!ResultToks.empty()) {
174 // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
175 // then removal of the comma should produce a placemarker token (in C99
176 // terms) which we model by popping off the previous ##, giving us a plain
177 // "X" when __VA_ARGS__ is empty.
178 if (ResultToks.back().is(tok::hashhash))
179 ResultToks.pop_back();
180
181 // Remember that this comma was elided.
182 ResultToks.back().setFlag(Token::CommaAfterElided);
183 }
184
185 // Never add a space, even if the comma, ##, or arg had a space.
186 NextTokGetsSpace = false;
187 return true;
188}
189
190void TokenLexer::stringifyVAOPTContents(
191 SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx,
192 const SourceLocation VAOPTClosingParenLoc) {
193 const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt();
194 const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt;
195 Token *const VAOPTTokens =
196 NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr;
197
198 SmallVector<Token, 64> ConcatenatedVAOPTResultToks;
199 // FIXME: Should we keep track within VCtx that we did or didnot
200 // encounter pasting - and only then perform this loop.
201
202 // Perform token pasting (concatenation) prior to stringization.
203 for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens;
204 ++CurTokenIdx) {
205 if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) {
206 assert(CurTokenIdx != 0 &&
207 "Can not have __VAOPT__ contents begin with a ##");
208 Token &LHS = VAOPTTokens[CurTokenIdx - 1];
209 pasteTokens(LHS, llvm::ArrayRef(VAOPTTokens, NumVAOptTokens),
210 CurTokenIdx);
211 // Replace the token prior to the first ## in this iteration.
212 ConcatenatedVAOPTResultToks.back() = LHS;
213 if (CurTokenIdx == NumVAOptTokens)
214 break;
215 }
216 ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]);
217 }
218
219 ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok());
220 // Get the SourceLocation that represents the start location within
221 // the macro definition that marks where this string is substituted
222 // into: i.e. the __VA_OPT__ and the ')' within the spelling of the
223 // macro definition, and use it to indicate that the stringified token
224 // was generated from that location.
225 const SourceLocation ExpansionLocStartWithinMacro =
226 getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc());
227 const SourceLocation ExpansionLocEndWithinMacro =
228 getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc);
229
230 Token StringifiedVAOPT = MacroArgs::StringifyArgument(
231 &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/,
232 ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro);
233
235 StringifiedVAOPT.setFlag(Token::LeadingSpace);
236
237 StringifiedVAOPT.setFlag(Token::StringifiedInMacro);
238 // Resize (shrink) the token stream to just capture this stringified token.
239 ResultToks.resize(NumToksPriorToVAOpt + 1);
240 ResultToks.back() = StringifiedVAOPT;
241}
242
243/// Expand the arguments of a function-like macro so that we can quickly
244/// return preexpanded tokens from Tokens.
245void TokenLexer::ExpandFunctionArguments() {
246 SmallVector<Token, 128> ResultToks;
247
248 // Loop through 'Tokens', expanding them into ResultToks. Keep
249 // track of whether we change anything. If not, no need to keep them. If so,
250 // we install the newly expanded sequence as the new 'Tokens' list.
251 bool MadeChange = false;
252
253 std::optional<bool> CalledWithVariadicArguments;
254
255 VAOptExpansionContext VCtx(PP);
256
257 for (unsigned I = 0, E = NumTokens; I != E; ++I) {
258 const Token &CurTok = Tokens[I];
259 // We don't want a space for the next token after a paste
260 // operator. In valid code, the token will get smooshed onto the
261 // preceding one anyway. In assembler-with-cpp mode, invalid
262 // pastes are allowed through: in this case, we do not want the
263 // extra whitespace to be added. For example, we want ". ## foo"
264 // -> ".foo" not ". foo".
265 if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
266 NextTokGetsSpace = true;
267
268 if (VCtx.isVAOptToken(CurTok)) {
269 MadeChange = true;
270 assert(Tokens[I + 1].is(tok::l_paren) &&
271 "__VA_OPT__ must be followed by '('");
272
273 ++I; // Skip the l_paren
275 ResultToks.size());
276
277 continue;
278 }
279
280 // We have entered into the __VA_OPT__ context, so handle tokens
281 // appropriately.
282 if (VCtx.isInVAOpt()) {
283 // If we are about to process a token that is either an argument to
284 // __VA_OPT__ or its closing rparen, then:
285 // 1) If the token is the closing rparen that exits us out of __VA_OPT__,
286 // perform any necessary stringification or placemarker processing,
287 // and/or skip to the next token.
288 // 2) else if macro was invoked without variadic arguments skip this
289 // token.
290 // 3) else (macro was invoked with variadic arguments) process the token
291 // normally.
292
293 if (Tokens[I].is(tok::l_paren))
294 VCtx.sawOpeningParen(Tokens[I].getLocation());
295 // Continue skipping tokens within __VA_OPT__ if the macro was not
296 // called with variadic arguments, else let the rest of the loop handle
297 // this token. Note sawClosingParen() returns true only if the r_paren matches
298 // the closing r_paren of the __VA_OPT__.
299 if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) {
300 // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__.
301 if (!CalledWithVariadicArguments) {
302 CalledWithVariadicArguments =
303 ActualArgs->invokedWithVariadicArgument(Macro, PP);
304 }
305 if (!*CalledWithVariadicArguments) {
306 // Skip this token.
307 continue;
308 }
309 // ... else the macro was called with variadic arguments, and we do not
310 // have a closing rparen - so process this token normally.
311 } else {
312 // Current token is the closing r_paren which marks the end of the
313 // __VA_OPT__ invocation, so handle any place-marker pasting (if
314 // empty) by removing hashhash either before (if exists) or after. And
315 // also stringify the entire contents if VAOPT was preceded by a hash,
316 // but do so only after any token concatenation that needs to occur
317 // within the contents of VAOPT.
318
319 if (VCtx.hasStringifyOrCharifyBefore()) {
320 // Replace all the tokens just added from within VAOPT into a single
321 // stringified token. This requires token-pasting to eagerly occur
322 // within these tokens. If either the contents of VAOPT were empty
323 // or the macro wasn't called with any variadic arguments, the result
324 // is a token that represents an empty string.
325 stringifyVAOPTContents(ResultToks, VCtx,
326 /*ClosingParenLoc*/ Tokens[I].getLocation());
327
328 } else if (/*No tokens within VAOPT*/
329 ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) {
330 // Treat VAOPT as a placemarker token. Eat either the '##' before the
331 // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
332 // hashhash was not a placemarker) or the '##'
333 // after VAOPT, but not both.
334
335 if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) {
336 ResultToks.pop_back();
337 } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) {
338 ++I; // Skip the following hashhash.
339 }
340 } else {
341 // If there's a ## before the __VA_OPT__, we might have discovered
342 // that the __VA_OPT__ begins with a placeholder. We delay action on
343 // that to now to avoid messing up our stashed count of tokens before
344 // __VA_OPT__.
345 if (VCtx.beginsWithPlaceholder()) {
346 assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 &&
347 ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() &&
348 ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is(
349 tok::hashhash) &&
350 "no token paste before __VA_OPT__");
351 ResultToks.erase(ResultToks.begin() +
353 }
354 // If the expansion of __VA_OPT__ ends with a placeholder, eat any
355 // following '##' token.
356 if (VCtx.endsWithPlaceholder() && I + 1 != E &&
357 Tokens[I + 1].is(tok::hashhash)) {
358 ++I;
359 }
360 }
361 VCtx.reset();
362 // We processed __VA_OPT__'s closing paren (and the exit out of
363 // __VA_OPT__), so skip to the next token.
364 continue;
365 }
366 }
367
368 // If we found the stringify operator, get the argument stringified. The
369 // preprocessor already verified that the following token is a macro
370 // parameter or __VA_OPT__ when the #define was lexed.
371
372 if (CurTok.isOneOf(tok::hash, tok::hashat)) {
373 int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo());
374 assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) &&
375 "Token following # is not an argument or __VA_OPT__!");
376
377 if (ArgNo == -1) {
378 // Handle the __VA_OPT__ case.
379 VCtx.sawHashOrHashAtBefore(NextTokGetsSpace,
380 CurTok.is(tok::hashat));
381 continue;
382 }
383 // Else handle the simple argument case.
384 SourceLocation ExpansionLocStart =
385 getExpansionLocForMacroDefLoc(CurTok.getLocation());
386 SourceLocation ExpansionLocEnd =
387 getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation());
388
389 bool Charify = CurTok.is(tok::hashat);
390 const Token *UnexpArg = ActualArgs->getUnexpArgument(ArgNo);
392 UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd);
394
395 // The stringified/charified string leading space flag gets set to match
396 // the #/#@ operator.
397 if (NextTokGetsSpace)
399
400 ResultToks.push_back(Res);
401 MadeChange = true;
402 ++I; // Skip arg name.
403 NextTokGetsSpace = false;
404 continue;
405 }
406
407 // Find out if there is a paste (##) operator before or after the token.
408 bool NonEmptyPasteBefore =
409 !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
410 bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash);
411 bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash);
412 bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren);
413
414 assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) &&
415 "unexpected ## in ResultToks");
416
417 // Otherwise, if this is not an argument token, just add the token to the
418 // output buffer.
419 IdentifierInfo *II = CurTok.getIdentifierInfo();
420 int ArgNo = II ? Macro->getParameterNum(II) : -1;
421 if (ArgNo == -1) {
422 // This isn't an argument, just add it.
423 ResultToks.push_back(CurTok);
424
425 if (NextTokGetsSpace) {
426 ResultToks.back().setFlag(Token::LeadingSpace);
427 NextTokGetsSpace = false;
428 } else if (PasteBefore && !NonEmptyPasteBefore)
429 ResultToks.back().clearFlag(Token::LeadingSpace);
430
431 continue;
432 }
433
434 // An argument is expanded somehow, the result is different than the
435 // input.
436 MadeChange = true;
437
438 // Otherwise, this is a use of the argument.
439
440 // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
441 // are no trailing commas if __VA_ARGS__ is empty.
442 if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
443 MaybeRemoveCommaBeforeVaArgs(ResultToks,
444 /*HasPasteOperator=*/false,
445 Macro, ArgNo, PP))
446 continue;
447
448 // If it is not the LHS/RHS of a ## operator, we must pre-expand the
449 // argument and substitute the expanded tokens into the result. This is
450 // C99 6.10.3.1p1.
451 if (!PasteBefore && !PasteAfter) {
452 const Token *ResultArgToks;
453
454 // Only preexpand the argument if it could possibly need it. This
455 // avoids some work in common cases.
456 const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
457 if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
458 ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
459 else
460 ResultArgToks = ArgTok; // Use non-preexpanded tokens.
461
462 // If the arg token expanded into anything, append it.
463 if (ResultArgToks->isNot(tok::eof)) {
464 size_t FirstResult = ResultToks.size();
465 unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
466 ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
467
468 // In Microsoft-compatibility mode, we follow MSVC's preprocessing
469 // behavior by not considering single commas from nested macro
470 // expansions as argument separators. Set a flag on the token so we can
471 // test for this later when the macro expansion is processed.
472 if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
473 ResultToks.back().is(tok::comma))
474 ResultToks.back().setFlag(Token::IgnoredComma);
475
476 // If the '##' came from expanding an argument, turn it into 'unknown'
477 // to avoid pasting.
478 for (Token &Tok : llvm::drop_begin(ResultToks, FirstResult))
479 if (Tok.is(tok::hashhash))
480 Tok.setKind(tok::unknown);
481
482 if(ExpandLocStart.isValid()) {
483 updateLocForMacroArgTokens(CurTok.getLocation(),
484 ResultToks.begin()+FirstResult,
485 ResultToks.end());
486 }
487
488 // If any tokens were substituted from the argument, the whitespace
489 // before the first token should match the whitespace of the arg
490 // identifier.
491 ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
492 NextTokGetsSpace);
493 ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
494 NextTokGetsSpace = false;
495 } else {
496 // We're creating a placeholder token. Usually this doesn't matter,
497 // but it can affect paste behavior when at the start or end of a
498 // __VA_OPT__.
499 if (NonEmptyPasteBefore) {
500 // We're imagining a placeholder token is inserted here. If this is
501 // the first token in a __VA_OPT__ after a ##, delete the ##.
502 assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__");
504 } else if (RParenAfter)
506 }
507 continue;
508 }
509
510 // Okay, we have a token that is either the LHS or RHS of a paste (##)
511 // argument. It gets substituted as its non-pre-expanded tokens.
512 const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
513 unsigned NumToks = MacroArgs::getArgLength(ArgToks);
514 if (NumToks) { // Not an empty argument?
515 bool VaArgsPseudoPaste = false;
516 // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
517 // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
518 // the expander tries to paste ',' with the first token of the __VA_ARGS__
519 // expansion.
520 if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
521 ResultToks[ResultToks.size()-2].is(tok::comma) &&
522 (unsigned)ArgNo == Macro->getNumParams()-1 &&
523 Macro->isVariadic()) {
524 VaArgsPseudoPaste = true;
525 // Remove the paste operator, report use of the extension.
526 PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
527 }
528
529 ResultToks.append(ArgToks, ArgToks+NumToks);
530
531 // If the '##' came from expanding an argument, turn it into 'unknown'
532 // to avoid pasting.
533 for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
534 ResultToks.end())) {
535 if (Tok.is(tok::hashhash))
536 Tok.setKind(tok::unknown);
537 }
538
539 if (ExpandLocStart.isValid()) {
540 updateLocForMacroArgTokens(CurTok.getLocation(),
541 ResultToks.end()-NumToks, ResultToks.end());
542 }
543
544 // Transfer the leading whitespace information from the token
545 // (the macro argument) onto the first token of the
546 // expansion. Note that we don't do this for the GNU
547 // pseudo-paste extension ", ## __VA_ARGS__".
548 if (!VaArgsPseudoPaste) {
549 ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine,
550 false);
551 ResultToks[ResultToks.size() - NumToks].setFlagValue(
552 Token::LeadingSpace, NextTokGetsSpace);
553 }
554
555 NextTokGetsSpace = false;
556 continue;
557 }
558
559 // If an empty argument is on the LHS or RHS of a paste, the standard (C99
560 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
561 // implement this by eating ## operators when a LHS or RHS expands to
562 // empty.
563 if (PasteAfter) {
564 // Discard the argument token and skip (don't copy to the expansion
565 // buffer) the paste operator after it.
566 ++I;
567 continue;
568 }
569
570 if (RParenAfter && !NonEmptyPasteBefore)
572
573 // If this is on the RHS of a paste operator, we've already copied the
574 // paste operator to the ResultToks list, unless the LHS was empty too.
575 // Remove it.
576 assert(PasteBefore);
577 if (NonEmptyPasteBefore) {
578 assert(ResultToks.back().is(tok::hashhash));
579 // Do not remove the paste operator if it is the one before __VA_OPT__
580 // (and we are still processing tokens within VA_OPT). We handle the case
581 // of removing the paste operator if __VA_OPT__ reduces to the notional
582 // placemarker above when we encounter the closing paren of VA_OPT.
583 if (!VCtx.isInVAOpt() ||
584 ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
585 ResultToks.pop_back();
586 else
588 }
589
590 // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
591 // and if the macro had at least one real argument, and if the token before
592 // the ## was a comma, remove the comma. This is a GCC extension which is
593 // disabled when using -std=c99.
594 if (ActualArgs->isVarargsElidedUse())
595 MaybeRemoveCommaBeforeVaArgs(ResultToks,
596 /*HasPasteOperator=*/true,
597 Macro, ArgNo, PP);
598 }
599
600 // If anything changed, install this as the new Tokens list.
601 if (MadeChange) {
602 assert(!OwnsTokens && "This would leak if we already own the token list");
603 // This is deleted in the dtor.
604 NumTokens = ResultToks.size();
605 // The tokens will be added to Preprocessor's cache and will be removed
606 // when this TokenLexer finishes lexing them.
607 Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
608
609 // The preprocessor cache of macro expanded tokens owns these tokens,not us.
610 OwnsTokens = false;
611 }
612}
613
614/// Checks if two tokens form wide string literal.
615static bool isWideStringLiteralFromMacro(const Token &FirstTok,
616 const Token &SecondTok) {
617 return FirstTok.is(tok::identifier) &&
618 FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
619 SecondTok.stringifiedInMacro();
620}
621
622/// Lex - Lex and return a token from this macro stream.
624 // Lexing off the end of the macro, pop this macro off the expansion stack.
625 if (isAtEnd()) {
626 // If this is a macro (not a token stream), mark the macro enabled now
627 // that it is no longer being expanded.
628 if (Macro) Macro->EnableMacro();
629
630 // CWG2947: Allow the following code:
631 //
632 // export module m; int x;
633 // extern "C++" int *y = &x;
634 //
635 // The 'extern' token should has 'StartOfLine' flag when current TokenLexer
636 // exits and propagate line start/leading space info.
637 if (!Macro && isLexingCXXModuleDirective()) {
638 AtStartOfLine = true;
640 }
641
642 Tok.startToken();
643 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
644 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
645 if (CurTokenIdx == 0)
647 return PP.HandleEndOfTokenLexer(Tok);
648 }
649
650 SourceManager &SM = PP.getSourceManager();
651
652 // If this is the first token of the expanded result, we inherit spacing
653 // properties later.
654 bool isFirstToken = CurTokenIdx == 0;
655
656 // Get the next token to return.
657 Tok = Tokens[CurTokenIdx++];
658 if (IsReinject)
659 Tok.setFlag(Token::IsReinjected);
660
661 bool TokenIsFromPaste = false;
662
663 // If this token is followed by a token paste (##) operator, paste the tokens!
664 // Note that ## is a normal token when not expanding a macro.
665 if (!isAtEnd() && Macro &&
666 (Tokens[CurTokenIdx].is(tok::hashhash) ||
667 // Special processing of L#x macros in -fms-compatibility mode.
668 // Microsoft compiler is able to form a wide string literal from
669 // 'L#macro_arg' construct in a function-like macro.
670 (PP.getLangOpts().MSVCCompat &&
671 isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) {
672 // When handling the microsoft /##/ extension, the final token is
673 // returned by pasteTokens, not the pasted token.
674 if (pasteTokens(Tok))
675 return true;
676
677 TokenIsFromPaste = true;
678 }
679
680 // The token's current location indicate where the token was lexed from. We
681 // need this information to compute the spelling of the token, but any
682 // diagnostics for the expanded token should appear as if they came from
683 // ExpansionLoc. Pull this information together into a new SourceLocation
684 // that captures all of this.
685 if (ExpandLocStart.isValid() && // Don't do this for token streams.
686 // Check that the token's location was not already set properly.
687 SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
688 SourceLocation instLoc;
689 if (Tok.is(tok::comment)) {
690 instLoc = SM.createExpansionLoc(Tok.getLocation(),
691 ExpandLocStart,
692 ExpandLocEnd,
693 Tok.getLength());
694 } else {
695 instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
696 }
697
698 Tok.setLocation(instLoc);
699 }
700
701 // If this is the first token, set the lexical properties of the token to
702 // match the lexical properties of the macro identifier.
703 if (isFirstToken) {
704 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
705 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
706 } else {
707 // If this is not the first token, we may still need to pass through
708 // leading whitespace if we've expanded a macro.
709 if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
710 if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
711 }
712 AtStartOfLine = false;
713 HasLeadingSpace = false;
714
715 // Handle recursive expansion!
716 if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr &&
717 (!PP.getLangOpts().CPlusPlusModules ||
718 !Tok.isModuleContextualKeyword())) {
719 // Change the kind of this identifier to the appropriate token kind, e.g.
720 // turning "for" into a keyword.
721 IdentifierInfo *II = Tok.getIdentifierInfo();
722 Tok.setKind(II->getTokenID());
723
724 // If this identifier was poisoned and from a paste, emit an error. This
725 // won't be handled by Preprocessor::HandleIdentifier because this is coming
726 // from a macro expansion.
727 if (II->isPoisoned() && TokenIsFromPaste) {
728 PP.HandlePoisonedIdentifier(Tok);
729 }
730
731 if (!DisableMacroExpansion && II->isHandleIdentifierCase())
732 return PP.HandleIdentifier(Tok);
733 }
734
735 // Otherwise, return a normal token.
736 return true;
737}
738
739bool TokenLexer::pasteTokens(Token &Tok) {
740 return pasteTokens(Tok, llvm::ArrayRef(Tokens, NumTokens), CurTokenIdx);
741}
742
743/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##
744/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
745/// are more ## after it, chomp them iteratively. Return the result as LHSTok.
746/// If this returns true, the caller should immediately return the token.
747bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
748 unsigned int &CurIdx) {
749 assert(CurIdx > 0 && "## can not be the first token within tokens");
750 assert((TokenStream[CurIdx].is(tok::hashhash) ||
751 (PP.getLangOpts().MSVCCompat &&
752 isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) &&
753 "Token at this Index must be ## or part of the MSVC 'L "
754 "#macro-arg' pasting pair");
755
756 // MSVC: If previous token was pasted, this must be a recovery from an invalid
757 // paste operation. Ignore spaces before this token to mimic MSVC output.
758 // Required for generating valid UUID strings in some MS headers.
759 if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) &&
760 TokenStream[CurIdx - 2].is(tok::hashhash))
762
763 SmallString<128> Buffer;
764 const char *ResultTokStrPtr = nullptr;
765 SourceLocation StartLoc = LHSTok.getLocation();
766 SourceLocation PasteOpLoc;
767 bool HasUCNs = false;
768
769 auto IsAtEnd = [&TokenStream, &CurIdx] {
770 return TokenStream.size() == CurIdx;
771 };
772
773 do {
774 // Consume the ## operator if any.
775 PasteOpLoc = TokenStream[CurIdx].getLocation();
776 if (TokenStream[CurIdx].is(tok::hashhash))
777 ++CurIdx;
778 assert(!IsAtEnd() && "No token on the RHS of a paste operator!");
779
780 // Get the RHS token.
781 const Token &RHS = TokenStream[CurIdx];
782
783 // Allocate space for the result token. This is guaranteed to be enough for
784 // the two tokens.
785 Buffer.resize(LHSTok.getLength() + RHS.getLength());
786
787 // Get the spelling of the LHS token in Buffer.
788 const char *BufPtr = &Buffer[0];
789 bool Invalid = false;
790 unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid);
791 if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
792 memcpy(&Buffer[0], BufPtr, LHSLen);
793 if (Invalid)
794 return true;
795
796 BufPtr = Buffer.data() + LHSLen;
797 unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
798 if (Invalid)
799 return true;
800 if (RHSLen && BufPtr != &Buffer[LHSLen])
801 // Really, we want the chars in Buffer!
802 memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
803
804 // Trim excess space.
805 Buffer.resize(LHSLen+RHSLen);
806
807 // Plop the pasted result (including the trailing newline and null) into a
808 // scratch buffer where we can lex it.
809 Token ResultTokTmp;
810 ResultTokTmp.startToken();
811
812 // Claim that the tmp token is a string_literal so that we can get the
813 // character pointer back from CreateString in getLiteralData().
814 ResultTokTmp.setKind(tok::string_literal);
815 PP.CreateString(Buffer, ResultTokTmp);
816 SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
817 ResultTokStrPtr = ResultTokTmp.getLiteralData();
818
819 // Lex the resultant pasted token into Result.
820 Token Result;
821
822 if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
823 // Common paste case: identifier+identifier = identifier. Avoid creating
824 // a lexer and other overhead.
825 PP.IncrementPasteCounter(true);
826 Result.startToken();
827 Result.setKind(tok::raw_identifier);
828 Result.setRawIdentifierData(ResultTokStrPtr);
829 Result.setLocation(ResultTokLoc);
830 Result.setLength(LHSLen+RHSLen);
831 } else {
832 PP.IncrementPasteCounter(false);
833
834 assert(ResultTokLoc.isFileID() &&
835 "Should be a raw location into scratch buffer");
836 SourceManager &SourceMgr = PP.getSourceManager();
837 FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
838
839 bool Invalid = false;
840 const char *ScratchBufStart
841 = SourceMgr.getBufferData(LocFileID, &Invalid).data();
842 if (Invalid)
843 return false;
844
845 // Make a lexer to lex this string from. Lex just this one token.
846 // Make a lexer object so that we lex and expand the paste result.
847 Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
848 PP.getLangOpts(), ScratchBufStart,
849 ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
850
851 // Lex a token in raw mode. This way it won't look up identifiers
852 // automatically, lexing off the end will return an eof token, and
853 // warnings are disabled. This returns true if the result token is the
854 // entire buffer.
855 bool isInvalid = !TL.LexFromRawLexer(Result);
856
857 // If we got an EOF token, we didn't form even ONE token. For example, we
858 // did "/ ## /" to get "//".
859 isInvalid |= Result.is(tok::eof);
860
861 // If pasting the two tokens didn't form a full new token, this is an
862 // error. This occurs with "x ## +" and other stuff. Return with LHSTok
863 // unmodified and with RHS as the next token to lex.
864 if (isInvalid) {
865 // Explicitly convert the token location to have proper expansion
866 // information so that the user knows where it came from.
867 SourceManager &SM = PP.getSourceManager();
868 SourceLocation Loc =
869 SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
870
871 // Test for the Microsoft extension of /##/ turning into // here on the
872 // error path.
873 if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) &&
874 RHS.is(tok::slash)) {
875 HandleMicrosoftCommentPaste(LHSTok, Loc);
876 return true;
877 }
878
879 // Do not emit the error when preprocessing assembler code.
880 if (!PP.getLangOpts().AsmPreprocessor) {
881 // If we're in microsoft extensions mode, downgrade this from a hard
882 // error to an extension that defaults to an error. This allows
883 // disabling it.
884 PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
885 : diag::err_pp_bad_paste)
886 << Buffer;
887 }
888
889 // An error has occurred so exit loop.
890 break;
891 }
892
893 // Turn ## into 'unknown' to avoid # ## # from looking like a paste
894 // operator.
895 if (Result.is(tok::hashhash))
896 Result.setKind(tok::unknown);
897 }
898
899 // Transfer properties of the LHS over the Result.
900 Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine());
901 Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace());
902
903 // Finally, replace LHS with the result, consume the RHS, and iterate.
904 ++CurIdx;
905
906 // Set Token::HasUCN flag if LHS or RHS contains any UCNs.
907 HasUCNs = LHSTok.hasUCN() || RHS.hasUCN() || HasUCNs;
908 LHSTok = Result;
909 } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash));
910
911 SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation();
912
913 // The token's current location indicate where the token was lexed from. We
914 // need this information to compute the spelling of the token, but any
915 // diagnostics for the expanded token should appear as if the token was
916 // expanded from the full ## expression. Pull this information together into
917 // a new SourceLocation that captures all of this.
918 SourceManager &SM = PP.getSourceManager();
919 if (StartLoc.isFileID())
920 StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
921 if (EndLoc.isFileID())
922 EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
923 FileID MacroFID = SM.getFileID(MacroExpansionStart);
924 while (SM.getFileID(StartLoc) != MacroFID)
925 StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin();
926 while (SM.getFileID(EndLoc) != MacroFID)
927 EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
928
929 LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc,
930 LHSTok.getLength()));
931
932 // Now that we got the result token, it will be subject to expansion. Since
933 // token pasting re-lexes the result token in raw mode, identifier information
934 // isn't looked up. As such, if the result is an identifier, look up id info.
935 if (LHSTok.is(tok::raw_identifier)) {
936
937 // If there has any UNCs in concated token, we should mark this token
938 // with Token::HasUCN flag, then LookUpIdentifierInfo will expand UCNs in
939 // token.
940 if (HasUCNs)
941 LHSTok.setFlag(Token::HasUCN);
942
943 // Look up the identifier info for the token. We disabled identifier lookup
944 // by saying we're skipping contents, so we need to do this manually.
945 PP.LookUpIdentifierInfo(LHSTok);
946 }
947 return false;
948}
949
950/// isNextTokenLParen - If the next token lexed will pop this macro off the
951/// expansion stack, return std::nullopt, otherwise return the next unexpanded
952/// token.
953std::optional<Token> TokenLexer::peekNextPPToken() const {
954 // Out of tokens?
955 if (isAtEnd())
956 return std::nullopt;
957 return Tokens[CurTokenIdx];
958}
959
960/// isParsingPreprocessorDirective - Return true if we are in the middle of a
961/// preprocessor directive.
963 return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
964}
965
966/// setLexingCXXModuleDirective - This is set to true if this TokenLexer is
967/// created when handling C++ module directive.
969 LexingCXXModuleDirective = Val;
970}
971
972/// isLexingCXXModuleDirective - Return true if we are lexing a C++ module or
973/// import directive.
975 return LexingCXXModuleDirective;
976}
977
978/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
979/// together to form a comment that comments out everything in the current
980/// macro, other active macros, and anything left on the current physical
981/// source line of the expanded buffer. Handle this by returning the
982/// first token on the next line.
983void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
984 PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
985
986 // We 'comment out' the rest of this macro by just ignoring the rest of the
987 // tokens that have not been lexed yet, if any.
988
989 // Since this must be a macro, mark the macro enabled now that it is no longer
990 // being expanded.
991 assert(Macro && "Token streams can't paste comments");
992 Macro->EnableMacro();
993
995}
996
997/// If \arg loc is a file ID and points inside the current macro
998/// definition, returns the appropriate source location pointing at the
999/// macro expansion source location entry, otherwise it returns an invalid
1000/// SourceLocation.
1002TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
1003 assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
1004 "Not appropriate for token streams");
1005 assert(loc.isValid() && loc.isFileID());
1006
1008 assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
1009 "Expected loc to come from the macro definition");
1010
1011 SourceLocation::UIntTy relativeOffset = 0;
1012 SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
1013 return MacroExpansionStart.getLocWithOffset(relativeOffset);
1014}
1015
1016/// Finds the tokens that are consecutive (from the same FileID)
1017/// creates a single SLocEntry, and assigns SourceLocations to each token that
1018/// point to that SLocEntry. e.g for
1019/// assert(foo == bar);
1020/// There will be a single SLocEntry for the "foo == bar" chunk and locations
1021/// for the 'foo', '==', 'bar' tokens will point inside that chunk.
1022///
1023/// \arg begin_tokens will be updated to a position past all the found
1024/// consecutive tokens.
1026 SourceLocation ExpandLoc,
1027 Token *&begin_tokens,
1028 Token * end_tokens) {
1029 assert(begin_tokens + 1 < end_tokens);
1030 SourceLocation BeginLoc = begin_tokens->getLocation();
1031 llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
1033
1034 auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
1035 // The maximum distance between two consecutive tokens in a partition.
1036 // This is an important trick to avoid using too much SourceLocation address
1037 // space!
1038 static constexpr SourceLocation::IntTy MaxDistance = 50;
1039 auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
1040 Last = Loc;
1041 return Distance <= MaxDistance;
1042 };
1043
1044 // Partition the tokens by their FileID.
1045 // This is a hot function, and calling getFileID can be expensive, the
1046 // implementation is optimized by reducing the number of getFileID.
1047 if (BeginLoc.isFileID()) {
1048 // Consecutive tokens not written in macros must be from the same file.
1049 // (Neither #include nor eof can occur inside a macro argument.)
1050 Partition = All.take_while([&](const Token &T) {
1051 return T.getLocation().isFileID() && NearLast(T.getLocation());
1052 });
1053 } else {
1054 // Call getFileID once to calculate the bounds, and use the cheaper
1055 // sourcelocation-against-bounds comparison.
1056 FileID BeginFID = SM.getFileID(BeginLoc);
1057 SourceLocation Limit =
1058 SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
1059 Partition = All.take_while([&](const Token &T) {
1060 // NOTE: the Limit is included! The lexer recovery only ever inserts a
1061 // single token past the end of the FileID, specifically the ) when a
1062 // macro-arg containing a comma should be guarded by parentheses.
1063 //
1064 // It is safe to include the Limit here because SourceManager allocates
1065 // FileSize + 1 for each SLocEntry.
1066 //
1067 // See https://github.com/llvm/llvm-project/issues/60722.
1068 return T.getLocation() >= BeginLoc && T.getLocation() <= Limit
1069 && NearLast(T.getLocation());
1070 });
1071 }
1072 assert(!Partition.empty());
1073
1074 // For the consecutive tokens, find the length of the SLocEntry to contain
1075 // all of them.
1076 SourceLocation::UIntTy FullLength =
1077 Partition.back().getEndLoc().getRawEncoding() -
1078 Partition.front().getLocation().getRawEncoding();
1079 // Create a macro expansion SLocEntry that will "contain" all of the tokens.
1080 SourceLocation Expansion =
1081 SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);
1082
1083#ifdef EXPENSIVE_CHECKS
1084 assert(llvm::all_of(Partition.drop_front(),
1085 [&SM, ID = SM.getFileID(Partition.front().getLocation())](
1086 const Token &T) {
1087 return ID == SM.getFileID(T.getLocation());
1088 }) &&
1089 "Must have the same FIleID!");
1090#endif
1091 // Change the location of the tokens from the spelling location to the new
1092 // expanded location.
1093 for (Token& T : Partition) {
1094 SourceLocation::IntTy RelativeOffset =
1095 T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
1096 T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
1097 }
1098 begin_tokens = &Partition.back() + 1;
1099}
1100
1101/// Creates SLocEntries and updates the locations of macro argument
1102/// tokens to their new expanded locations.
1103///
1104/// \param ArgIdSpellLoc the location of the macro argument id inside the macro
1105/// definition.
1106void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
1107 Token *begin_tokens,
1108 Token *end_tokens) {
1109 SourceManager &SM = PP.getSourceManager();
1110
1111 SourceLocation ExpandLoc =
1112 getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
1113
1114 while (begin_tokens < end_tokens) {
1115 // If there's only one token just create a SLocEntry for it.
1116 if (end_tokens - begin_tokens == 1) {
1117 Token &Tok = *begin_tokens;
1118 Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
1119 ExpandLoc,
1120 Tok.getLength()));
1121 return;
1122 }
1123
1124 updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
1125 }
1126}
1127
1128void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
1129 AtStartOfLine = Result.isAtStartOfLine();
1130 HasLeadingSpace = Result.hasLeadingSpace();
1131}
Defines the Diagnostic-related interfaces.
Token Tok
The Token.
bool is(tok::TokenKind Kind) const
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
#define SM(sm)
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
static bool isInvalid(LocType Loc, bool *Invalid)
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok)
Checks if two tokens form wide string literal.
static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation ExpandLoc, Token *&begin_tokens, Token *end_tokens)
Finds the tokens that are consecutive (from the same FileID) creates a single SLocEntry,...
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens,...
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition MacroArgs.cpp:77
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:39
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
const LangOptions & getLangOpts() const
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
bool isParsingPreprocessorDirective() const
isParsingPreprocessorDirective - Return true if we are in the middle of a preprocessor directive.
bool isLexingCXXModuleDirective() const
isLexingCXXModuleDirective - Return true if we are lexing a C++ module or import directive.
bool Lex(Token &Tok)
Lex and return a token from this macro stream.
std::optional< Token > peekNextPPToken() const
If TokenLexer::isAtEnd returns true(the next token lexed will pop thismacro off the expansion stack),...
void Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, MacroArgs *Actuals)
Initialize this TokenLexer to expand from the specified macro with the specified argument information...
void setLexingCXXModuleDirective(bool Val=true)
setLexingCXXModuleDirective - This is set to true if this TokenLexer is created when handling a C++ m...
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:195
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition Token.h:118
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition Token.h:318
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition Token.h:262
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition Token.h:124
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:140
unsigned getLength() const
Definition Token.h:143
void setKind(tok::TokenKind K)
Definition Token.h:98
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:102
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition Token.h:284
bool isOneOf(Ts... Ks) const
Definition Token.h:103
@ IgnoredComma
Definition Token.h:84
@ IsReinjected
Definition Token.h:89
@ LeadingEmptyMacro
Definition Token.h:81
@ LeadingSpace
Definition Token.h:77
@ StartOfLine
Definition Token.h:75
@ StringifiedInMacro
Definition Token.h:85
@ CommaAfterElided
Definition Token.h:87
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition Token.h:288
void setLocation(SourceLocation L)
Definition Token.h:148
bool isNot(tok::TokenKind K) const
Definition Token.h:109
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition Token.h:322
void startToken()
Reset all flags to cleared.
Definition Token.h:185
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition Token.h:233
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition Token.h:252
A class for tracking whether we're inside a VA_OPT during a traversal of the tokens of a macro during...
bool isInVAOpt() const
Returns true if we have seen the VA_OPT and '(' but before having seen the matching ')'.
bool isVAOptToken(const Token &T) const
void sawHashOrHashAtBefore(const bool HasLeadingSpace, const bool IsHashAt)
SourceLocation getVAOptLoc() const
unsigned int getNumberOfTokensPriorToVAOpt() const
void sawOpeningParen(SourceLocation LParenLoc)
Call this function each time an lparen is seen.
bool sawClosingParen()
Call this function each time an rparen is seen.
void sawVAOptFollowedByOpeningParens(const SourceLocation VAOptLoc, const unsigned int NumPriorTokens)
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T