clang API Documentation
00001 //===--- TokenLexer.cpp - Lex from a token stream -------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the TokenLexer interface. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "clang/Lex/TokenLexer.h" 00015 #include "MacroArgs.h" 00016 #include "clang/Lex/MacroInfo.h" 00017 #include "clang/Lex/Preprocessor.h" 00018 #include "clang/Basic/SourceManager.h" 00019 #include "clang/Lex/LexDiagnostic.h" 00020 #include "llvm/ADT/SmallString.h" 00021 using namespace clang; 00022 00023 00024 /// Create a TokenLexer for the specified macro with the specified actual 00025 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer. 00026 void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) { 00027 // If the client is reusing a TokenLexer, make sure to free any memory 00028 // associated with it. 00029 destroy(); 00030 00031 Macro = PP.getMacroInfo(Tok.getIdentifierInfo()); 00032 ActualArgs = Actuals; 00033 CurToken = 0; 00034 00035 ExpandLocStart = Tok.getLocation(); 00036 ExpandLocEnd = ELEnd; 00037 AtStartOfLine = Tok.isAtStartOfLine(); 00038 HasLeadingSpace = Tok.hasLeadingSpace(); 00039 Tokens = &*Macro->tokens_begin(); 00040 OwnsTokens = false; 00041 DisableMacroExpansion = false; 00042 NumTokens = Macro->tokens_end()-Macro->tokens_begin(); 00043 MacroExpansionStart = SourceLocation(); 00044 00045 SourceManager &SM = PP.getSourceManager(); 00046 MacroStartSLocOffset = SM.getNextLocalOffset(); 00047 00048 if (NumTokens > 0) { 00049 assert(Tokens[0].getLocation().isValid()); 00050 assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) && 00051 "Macro defined in macro?"); 00052 assert(ExpandLocStart.isValid()); 00053 00054 // Reserve a source location entry chunk for the length of the macro 00055 // definition. Tokens that get lexed directly from the definition will 00056 // have their locations pointing inside this chunk. This is to avoid 00057 // creating separate source location entries for each token. 00058 MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation()); 00059 MacroDefLength = Macro->getDefinitionLength(SM); 00060 MacroExpansionStart = SM.createExpansionLoc(MacroDefStart, 00061 ExpandLocStart, 00062 ExpandLocEnd, 00063 MacroDefLength); 00064 } 00065 00066 // If this is a function-like macro, expand the arguments and change 00067 // Tokens to point to the expanded tokens. 00068 if (Macro->isFunctionLike() && Macro->getNumArgs()) 00069 ExpandFunctionArguments(); 00070 00071 // Mark the macro as currently disabled, so that it is not recursively 00072 // expanded. The macro must be disabled only after argument pre-expansion of 00073 // function-like macro arguments occurs. 00074 Macro->DisableMacro(); 00075 } 00076 00077 00078 00079 /// Create a TokenLexer for the specified token stream. This does not 00080 /// take ownership of the specified token vector. 00081 void TokenLexer::Init(const Token *TokArray, unsigned NumToks, 00082 bool disableMacroExpansion, bool ownsTokens) { 00083 // If the client is reusing a TokenLexer, make sure to free any memory 00084 // associated with it. 00085 destroy(); 00086 00087 Macro = 0; 00088 ActualArgs = 0; 00089 Tokens = TokArray; 00090 OwnsTokens = ownsTokens; 00091 DisableMacroExpansion = disableMacroExpansion; 00092 NumTokens = NumToks; 00093 CurToken = 0; 00094 ExpandLocStart = ExpandLocEnd = SourceLocation(); 00095 AtStartOfLine = false; 00096 HasLeadingSpace = false; 00097 MacroExpansionStart = SourceLocation(); 00098 00099 // Set HasLeadingSpace/AtStartOfLine so that the first token will be 00100 // returned unmodified. 00101 if (NumToks != 0) { 00102 AtStartOfLine = TokArray[0].isAtStartOfLine(); 00103 HasLeadingSpace = TokArray[0].hasLeadingSpace(); 00104 } 00105 } 00106 00107 00108 void TokenLexer::destroy() { 00109 // If this was a function-like macro that actually uses its arguments, delete 00110 // the expanded tokens. 00111 if (OwnsTokens) { 00112 delete [] Tokens; 00113 Tokens = 0; 00114 OwnsTokens = false; 00115 } 00116 00117 // TokenLexer owns its formal arguments. 00118 if (ActualArgs) ActualArgs->destroy(PP); 00119 } 00120 00121 /// Expand the arguments of a function-like macro so that we can quickly 00122 /// return preexpanded tokens from Tokens. 00123 void TokenLexer::ExpandFunctionArguments() { 00124 00125 SmallVector<Token, 128> ResultToks; 00126 00127 // Loop through 'Tokens', expanding them into ResultToks. Keep 00128 // track of whether we change anything. If not, no need to keep them. If so, 00129 // we install the newly expanded sequence as the new 'Tokens' list. 00130 bool MadeChange = false; 00131 00132 // NextTokGetsSpace - When this is true, the next token appended to the 00133 // output list will get a leading space, regardless of whether it had one to 00134 // begin with or not. This is used for placemarker support. 00135 bool NextTokGetsSpace = false; 00136 00137 for (unsigned i = 0, e = NumTokens; i != e; ++i) { 00138 // If we found the stringify operator, get the argument stringified. The 00139 // preprocessor already verified that the following token is a macro name 00140 // when the #define was parsed. 00141 const Token &CurTok = Tokens[i]; 00142 if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) { 00143 int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo()); 00144 assert(ArgNo != -1 && "Token following # is not an argument?"); 00145 00146 SourceLocation ExpansionLocStart = 00147 getExpansionLocForMacroDefLoc(CurTok.getLocation()); 00148 SourceLocation ExpansionLocEnd = 00149 getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation()); 00150 00151 Token Res; 00152 if (CurTok.is(tok::hash)) // Stringify 00153 Res = ActualArgs->getStringifiedArgument(ArgNo, PP, 00154 ExpansionLocStart, 00155 ExpansionLocEnd); 00156 else { 00157 // 'charify': don't bother caching these. 00158 Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), 00159 PP, true, 00160 ExpansionLocStart, 00161 ExpansionLocEnd); 00162 } 00163 00164 // The stringified/charified string leading space flag gets set to match 00165 // the #/#@ operator. 00166 if (CurTok.hasLeadingSpace() || NextTokGetsSpace) 00167 Res.setFlag(Token::LeadingSpace); 00168 00169 ResultToks.push_back(Res); 00170 MadeChange = true; 00171 ++i; // Skip arg name. 00172 NextTokGetsSpace = false; 00173 continue; 00174 } 00175 00176 // Otherwise, if this is not an argument token, just add the token to the 00177 // output buffer. 00178 IdentifierInfo *II = CurTok.getIdentifierInfo(); 00179 int ArgNo = II ? Macro->getArgumentNum(II) : -1; 00180 if (ArgNo == -1) { 00181 // This isn't an argument, just add it. 00182 ResultToks.push_back(CurTok); 00183 00184 if (NextTokGetsSpace) { 00185 ResultToks.back().setFlag(Token::LeadingSpace); 00186 NextTokGetsSpace = false; 00187 } 00188 continue; 00189 } 00190 00191 // An argument is expanded somehow, the result is different than the 00192 // input. 00193 MadeChange = true; 00194 00195 // Otherwise, this is a use of the argument. Find out if there is a paste 00196 // (##) operator before or after the argument. 00197 bool PasteBefore = 00198 !ResultToks.empty() && ResultToks.back().is(tok::hashhash); 00199 bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); 00200 00201 // If it is not the LHS/RHS of a ## operator, we must pre-expand the 00202 // argument and substitute the expanded tokens into the result. This is 00203 // C99 6.10.3.1p1. 00204 if (!PasteBefore && !PasteAfter) { 00205 const Token *ResultArgToks; 00206 00207 // Only preexpand the argument if it could possibly need it. This 00208 // avoids some work in common cases. 00209 const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); 00210 if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) 00211 ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0]; 00212 else 00213 ResultArgToks = ArgTok; // Use non-preexpanded tokens. 00214 00215 // If the arg token expanded into anything, append it. 00216 if (ResultArgToks->isNot(tok::eof)) { 00217 unsigned FirstResult = ResultToks.size(); 00218 unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); 00219 ResultToks.append(ResultArgToks, ResultArgToks+NumToks); 00220 00221 // If the '##' came from expanding an argument, turn it into 'unknown' 00222 // to avoid pasting. 00223 for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) { 00224 Token &Tok = ResultToks[i]; 00225 if (Tok.is(tok::hashhash)) 00226 Tok.setKind(tok::unknown); 00227 } 00228 00229 if(ExpandLocStart.isValid()) { 00230 updateLocForMacroArgTokens(CurTok.getLocation(), 00231 ResultToks.begin()+FirstResult, 00232 ResultToks.end()); 00233 } 00234 00235 // If any tokens were substituted from the argument, the whitespace 00236 // before the first token should match the whitespace of the arg 00237 // identifier. 00238 ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, 00239 CurTok.hasLeadingSpace() || 00240 NextTokGetsSpace); 00241 NextTokGetsSpace = false; 00242 } else { 00243 // If this is an empty argument, and if there was whitespace before the 00244 // formal token, make sure the next token gets whitespace before it. 00245 NextTokGetsSpace = CurTok.hasLeadingSpace(); 00246 } 00247 continue; 00248 } 00249 00250 // Okay, we have a token that is either the LHS or RHS of a paste (##) 00251 // argument. It gets substituted as its non-pre-expanded tokens. 00252 const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); 00253 unsigned NumToks = MacroArgs::getArgLength(ArgToks); 00254 if (NumToks) { // Not an empty argument? 00255 // If this is the GNU ", ## __VA_ARG__" extension, and we just learned 00256 // that __VA_ARG__ expands to multiple tokens, avoid a pasting error when 00257 // the expander trys to paste ',' with the first token of the __VA_ARG__ 00258 // expansion. 00259 if (PasteBefore && ResultToks.size() >= 2 && 00260 ResultToks[ResultToks.size()-2].is(tok::comma) && 00261 (unsigned)ArgNo == Macro->getNumArgs()-1 && 00262 Macro->isVariadic()) { 00263 // Remove the paste operator, report use of the extension. 00264 PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); 00265 ResultToks.pop_back(); 00266 } 00267 00268 ResultToks.append(ArgToks, ArgToks+NumToks); 00269 00270 // If the '##' came from expanding an argument, turn it into 'unknown' 00271 // to avoid pasting. 00272 for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size(); 00273 i != e; ++i) { 00274 Token &Tok = ResultToks[i]; 00275 if (Tok.is(tok::hashhash)) 00276 Tok.setKind(tok::unknown); 00277 } 00278 00279 if (ExpandLocStart.isValid()) { 00280 updateLocForMacroArgTokens(CurTok.getLocation(), 00281 ResultToks.end()-NumToks, ResultToks.end()); 00282 } 00283 00284 // If this token (the macro argument) was supposed to get leading 00285 // whitespace, transfer this information onto the first token of the 00286 // expansion. 00287 // 00288 // Do not do this if the paste operator occurs before the macro argument, 00289 // as in "A ## MACROARG". In valid code, the first token will get 00290 // smooshed onto the preceding one anyway (forming AMACROARG). In 00291 // assembler-with-cpp mode, invalid pastes are allowed through: in this 00292 // case, we do not want the extra whitespace to be added. For example, 00293 // we want ". ## foo" -> ".foo" not ". foo". 00294 if ((CurTok.hasLeadingSpace() || NextTokGetsSpace) && 00295 !PasteBefore) 00296 ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace); 00297 00298 NextTokGetsSpace = false; 00299 continue; 00300 } 00301 00302 // If an empty argument is on the LHS or RHS of a paste, the standard (C99 00303 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We 00304 // implement this by eating ## operators when a LHS or RHS expands to 00305 // empty. 00306 NextTokGetsSpace |= CurTok.hasLeadingSpace(); 00307 if (PasteAfter) { 00308 // Discard the argument token and skip (don't copy to the expansion 00309 // buffer) the paste operator after it. 00310 NextTokGetsSpace |= Tokens[i+1].hasLeadingSpace(); 00311 ++i; 00312 continue; 00313 } 00314 00315 // If this is on the RHS of a paste operator, we've already copied the 00316 // paste operator to the ResultToks list. Remove it. 00317 assert(PasteBefore && ResultToks.back().is(tok::hashhash)); 00318 NextTokGetsSpace |= ResultToks.back().hasLeadingSpace(); 00319 ResultToks.pop_back(); 00320 00321 // If this is the __VA_ARGS__ token, and if the argument wasn't provided, 00322 // and if the macro had at least one real argument, and if the token before 00323 // the ## was a comma, remove the comma. 00324 if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__ 00325 ActualArgs->isVarargsElidedUse() && // Argument elided. 00326 !ResultToks.empty() && ResultToks.back().is(tok::comma)) { 00327 // Never add a space, even if the comma, ##, or arg had a space. 00328 NextTokGetsSpace = false; 00329 // Remove the paste operator, report use of the extension. 00330 PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); 00331 ResultToks.pop_back(); 00332 00333 // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"), 00334 // then removal of the comma should produce a placemarker token (in C99 00335 // terms) which we model by popping off the previous ##, giving us a plain 00336 // "X" when __VA_ARGS__ is empty. 00337 if (!ResultToks.empty() && ResultToks.back().is(tok::hashhash)) 00338 ResultToks.pop_back(); 00339 } 00340 continue; 00341 } 00342 00343 // If anything changed, install this as the new Tokens list. 00344 if (MadeChange) { 00345 assert(!OwnsTokens && "This would leak if we already own the token list"); 00346 // This is deleted in the dtor. 00347 NumTokens = ResultToks.size(); 00348 // The tokens will be added to Preprocessor's cache and will be removed 00349 // when this TokenLexer finishes lexing them. 00350 Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); 00351 00352 // The preprocessor cache of macro expanded tokens owns these tokens,not us. 00353 OwnsTokens = false; 00354 } 00355 } 00356 00357 /// Lex - Lex and return a token from this macro stream. 00358 /// 00359 void TokenLexer::Lex(Token &Tok) { 00360 // Lexing off the end of the macro, pop this macro off the expansion stack. 00361 if (isAtEnd()) { 00362 // If this is a macro (not a token stream), mark the macro enabled now 00363 // that it is no longer being expanded. 00364 if (Macro) Macro->EnableMacro(); 00365 00366 // Pop this context off the preprocessors lexer stack and get the next 00367 // token. This will delete "this" so remember the PP instance var. 00368 Preprocessor &PPCache = PP; 00369 if (PP.HandleEndOfTokenLexer(Tok)) 00370 return; 00371 00372 // HandleEndOfTokenLexer may not return a token. If it doesn't, lex 00373 // whatever is next. 00374 return PPCache.Lex(Tok); 00375 } 00376 00377 SourceManager &SM = PP.getSourceManager(); 00378 00379 // If this is the first token of the expanded result, we inherit spacing 00380 // properties later. 00381 bool isFirstToken = CurToken == 0; 00382 00383 // Get the next token to return. 00384 Tok = Tokens[CurToken++]; 00385 00386 bool TokenIsFromPaste = false; 00387 00388 // If this token is followed by a token paste (##) operator, paste the tokens! 00389 // Note that ## is a normal token when not expanding a macro. 00390 if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash) && Macro) { 00391 // When handling the microsoft /##/ extension, the final token is 00392 // returned by PasteTokens, not the pasted token. 00393 if (PasteTokens(Tok)) 00394 return; 00395 00396 TokenIsFromPaste = true; 00397 } 00398 00399 // The token's current location indicate where the token was lexed from. We 00400 // need this information to compute the spelling of the token, but any 00401 // diagnostics for the expanded token should appear as if they came from 00402 // ExpansionLoc. Pull this information together into a new SourceLocation 00403 // that captures all of this. 00404 if (ExpandLocStart.isValid() && // Don't do this for token streams. 00405 // Check that the token's location was not already set properly. 00406 SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { 00407 SourceLocation instLoc; 00408 if (Tok.is(tok::comment)) { 00409 instLoc = SM.createExpansionLoc(Tok.getLocation(), 00410 ExpandLocStart, 00411 ExpandLocEnd, 00412 Tok.getLength()); 00413 } else { 00414 instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); 00415 } 00416 00417 Tok.setLocation(instLoc); 00418 } 00419 00420 // If this is the first token, set the lexical properties of the token to 00421 // match the lexical properties of the macro identifier. 00422 if (isFirstToken) { 00423 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 00424 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 00425 } 00426 00427 // Handle recursive expansion! 00428 if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != 0) { 00429 // Change the kind of this identifier to the appropriate token kind, e.g. 00430 // turning "for" into a keyword. 00431 IdentifierInfo *II = Tok.getIdentifierInfo(); 00432 Tok.setKind(II->getTokenID()); 00433 00434 // If this identifier was poisoned and from a paste, emit an error. This 00435 // won't be handled by Preprocessor::HandleIdentifier because this is coming 00436 // from a macro expansion. 00437 if (II->isPoisoned() && TokenIsFromPaste) { 00438 PP.HandlePoisonedIdentifier(Tok); 00439 } 00440 00441 if (!DisableMacroExpansion && II->isHandleIdentifierCase()) 00442 PP.HandleIdentifier(Tok); 00443 } 00444 00445 // Otherwise, return a normal token. 00446 } 00447 00448 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## 00449 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there 00450 /// are more ## after it, chomp them iteratively. Return the result as Tok. 00451 /// If this returns true, the caller should immediately return the token. 00452 bool TokenLexer::PasteTokens(Token &Tok) { 00453 SmallString<128> Buffer; 00454 const char *ResultTokStrPtr = 0; 00455 SourceLocation StartLoc = Tok.getLocation(); 00456 SourceLocation PasteOpLoc; 00457 do { 00458 // Consume the ## operator. 00459 PasteOpLoc = Tokens[CurToken].getLocation(); 00460 ++CurToken; 00461 assert(!isAtEnd() && "No token on the RHS of a paste operator!"); 00462 00463 // Get the RHS token. 00464 const Token &RHS = Tokens[CurToken]; 00465 00466 // Allocate space for the result token. This is guaranteed to be enough for 00467 // the two tokens. 00468 Buffer.resize(Tok.getLength() + RHS.getLength()); 00469 00470 // Get the spelling of the LHS token in Buffer. 00471 const char *BufPtr = &Buffer[0]; 00472 bool Invalid = false; 00473 unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid); 00474 if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! 00475 memcpy(&Buffer[0], BufPtr, LHSLen); 00476 if (Invalid) 00477 return true; 00478 00479 BufPtr = &Buffer[LHSLen]; 00480 unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid); 00481 if (Invalid) 00482 return true; 00483 if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! 00484 memcpy(&Buffer[LHSLen], BufPtr, RHSLen); 00485 00486 // Trim excess space. 00487 Buffer.resize(LHSLen+RHSLen); 00488 00489 // Plop the pasted result (including the trailing newline and null) into a 00490 // scratch buffer where we can lex it. 00491 Token ResultTokTmp; 00492 ResultTokTmp.startToken(); 00493 00494 // Claim that the tmp token is a string_literal so that we can get the 00495 // character pointer back from CreateString in getLiteralData(). 00496 ResultTokTmp.setKind(tok::string_literal); 00497 PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp); 00498 SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); 00499 ResultTokStrPtr = ResultTokTmp.getLiteralData(); 00500 00501 // Lex the resultant pasted token into Result. 00502 Token Result; 00503 00504 if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) { 00505 // Common paste case: identifier+identifier = identifier. Avoid creating 00506 // a lexer and other overhead. 00507 PP.IncrementPasteCounter(true); 00508 Result.startToken(); 00509 Result.setKind(tok::raw_identifier); 00510 Result.setRawIdentifierData(ResultTokStrPtr); 00511 Result.setLocation(ResultTokLoc); 00512 Result.setLength(LHSLen+RHSLen); 00513 } else { 00514 PP.IncrementPasteCounter(false); 00515 00516 assert(ResultTokLoc.isFileID() && 00517 "Should be a raw location into scratch buffer"); 00518 SourceManager &SourceMgr = PP.getSourceManager(); 00519 FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); 00520 00521 bool Invalid = false; 00522 const char *ScratchBufStart 00523 = SourceMgr.getBufferData(LocFileID, &Invalid).data(); 00524 if (Invalid) 00525 return false; 00526 00527 // Make a lexer to lex this string from. Lex just this one token. 00528 // Make a lexer object so that we lex and expand the paste result. 00529 Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), 00530 PP.getLangOpts(), ScratchBufStart, 00531 ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); 00532 00533 // Lex a token in raw mode. This way it won't look up identifiers 00534 // automatically, lexing off the end will return an eof token, and 00535 // warnings are disabled. This returns true if the result token is the 00536 // entire buffer. 00537 bool isInvalid = !TL.LexFromRawLexer(Result); 00538 00539 // If we got an EOF token, we didn't form even ONE token. For example, we 00540 // did "/ ## /" to get "//". 00541 isInvalid |= Result.is(tok::eof); 00542 00543 // If pasting the two tokens didn't form a full new token, this is an 00544 // error. This occurs with "x ## +" and other stuff. Return with Tok 00545 // unmodified and with RHS as the next token to lex. 00546 if (isInvalid) { 00547 // Test for the Microsoft extension of /##/ turning into // here on the 00548 // error path. 00549 if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) && 00550 RHS.is(tok::slash)) { 00551 HandleMicrosoftCommentPaste(Tok); 00552 return true; 00553 } 00554 00555 // Do not emit the error when preprocessing assembler code. 00556 if (!PP.getLangOpts().AsmPreprocessor) { 00557 // Explicitly convert the token location to have proper expansion 00558 // information so that the user knows where it came from. 00559 SourceManager &SM = PP.getSourceManager(); 00560 SourceLocation Loc = 00561 SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); 00562 // If we're in microsoft extensions mode, downgrade this from a hard 00563 // error to a warning that defaults to an error. This allows 00564 // disabling it. 00565 PP.Diag(Loc, 00566 PP.getLangOpts().MicrosoftExt ? diag::err_pp_bad_paste_ms 00567 : diag::err_pp_bad_paste) 00568 << Buffer.str(); 00569 } 00570 00571 // Do not consume the RHS. 00572 --CurToken; 00573 } 00574 00575 // Turn ## into 'unknown' to avoid # ## # from looking like a paste 00576 // operator. 00577 if (Result.is(tok::hashhash)) 00578 Result.setKind(tok::unknown); 00579 } 00580 00581 // Transfer properties of the LHS over the the Result. 00582 Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); 00583 Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); 00584 00585 // Finally, replace LHS with the result, consume the RHS, and iterate. 00586 ++CurToken; 00587 Tok = Result; 00588 } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); 00589 00590 SourceLocation EndLoc = Tokens[CurToken - 1].getLocation(); 00591 00592 // The token's current location indicate where the token was lexed from. We 00593 // need this information to compute the spelling of the token, but any 00594 // diagnostics for the expanded token should appear as if the token was 00595 // expanded from the full ## expression. Pull this information together into 00596 // a new SourceLocation that captures all of this. 00597 SourceManager &SM = PP.getSourceManager(); 00598 if (StartLoc.isFileID()) 00599 StartLoc = getExpansionLocForMacroDefLoc(StartLoc); 00600 if (EndLoc.isFileID()) 00601 EndLoc = getExpansionLocForMacroDefLoc(EndLoc); 00602 Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, 00603 Tok.getLength())); 00604 00605 // Now that we got the result token, it will be subject to expansion. Since 00606 // token pasting re-lexes the result token in raw mode, identifier information 00607 // isn't looked up. As such, if the result is an identifier, look up id info. 00608 if (Tok.is(tok::raw_identifier)) { 00609 // Look up the identifier info for the token. We disabled identifier lookup 00610 // by saying we're skipping contents, so we need to do this manually. 00611 PP.LookUpIdentifierInfo(Tok); 00612 } 00613 return false; 00614 } 00615 00616 /// isNextTokenLParen - If the next token lexed will pop this macro off the 00617 /// expansion stack, return 2. If the next unexpanded token is a '(', return 00618 /// 1, otherwise return 0. 00619 unsigned TokenLexer::isNextTokenLParen() const { 00620 // Out of tokens? 00621 if (isAtEnd()) 00622 return 2; 00623 return Tokens[CurToken].is(tok::l_paren); 00624 } 00625 00626 /// isParsingPreprocessorDirective - Return true if we are in the middle of a 00627 /// preprocessor directive. 00628 bool TokenLexer::isParsingPreprocessorDirective() const { 00629 return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd(); 00630 } 00631 00632 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes 00633 /// together to form a comment that comments out everything in the current 00634 /// macro, other active macros, and anything left on the current physical 00635 /// source line of the expanded buffer. Handle this by returning the 00636 /// first token on the next line. 00637 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) { 00638 // We 'comment out' the rest of this macro by just ignoring the rest of the 00639 // tokens that have not been lexed yet, if any. 00640 00641 // Since this must be a macro, mark the macro enabled now that it is no longer 00642 // being expanded. 00643 assert(Macro && "Token streams can't paste comments"); 00644 Macro->EnableMacro(); 00645 00646 PP.HandleMicrosoftCommentPaste(Tok); 00647 } 00648 00649 /// \brief If \arg loc is a file ID and points inside the current macro 00650 /// definition, returns the appropriate source location pointing at the 00651 /// macro expansion source location entry, otherwise it returns an invalid 00652 /// SourceLocation. 00653 SourceLocation 00654 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { 00655 assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() && 00656 "Not appropriate for token streams"); 00657 assert(loc.isValid() && loc.isFileID()); 00658 00659 SourceManager &SM = PP.getSourceManager(); 00660 assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) && 00661 "Expected loc to come from the macro definition"); 00662 00663 unsigned relativeOffset = 0; 00664 SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset); 00665 return MacroExpansionStart.getLocWithOffset(relativeOffset); 00666 } 00667 00668 /// \brief Finds the tokens that are consecutive (from the same FileID) 00669 /// creates a single SLocEntry, and assigns SourceLocations to each token that 00670 /// point to that SLocEntry. e.g for 00671 /// assert(foo == bar); 00672 /// There will be a single SLocEntry for the "foo == bar" chunk and locations 00673 /// for the 'foo', '==', 'bar' tokens will point inside that chunk. 00674 /// 00675 /// \arg begin_tokens will be updated to a position past all the found 00676 /// consecutive tokens. 00677 static void updateConsecutiveMacroArgTokens(SourceManager &SM, 00678 SourceLocation InstLoc, 00679 Token *&begin_tokens, 00680 Token * end_tokens) { 00681 assert(begin_tokens < end_tokens); 00682 00683 SourceLocation FirstLoc = begin_tokens->getLocation(); 00684 SourceLocation CurLoc = FirstLoc; 00685 00686 // Compare the source location offset of tokens and group together tokens that 00687 // are close, even if their locations point to different FileIDs. e.g. 00688 // 00689 // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) 00690 // ^ ^ 00691 // |bar foo cake| (one SLocEntry chunk for all tokens) 00692 // 00693 // we can perform this "merge" since the token's spelling location depends 00694 // on the relative offset. 00695 00696 Token *NextTok = begin_tokens + 1; 00697 for (; NextTok < end_tokens; ++NextTok) { 00698 int RelOffs; 00699 if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs)) 00700 break; // Token from different local/loaded location. 00701 // Check that token is not before the previous token or more than 50 00702 // "characters" away. 00703 if (RelOffs < 0 || RelOffs > 50) 00704 break; 00705 CurLoc = NextTok->getLocation(); 00706 } 00707 00708 // For the consecutive tokens, find the length of the SLocEntry to contain 00709 // all of them. 00710 Token &LastConsecutiveTok = *(NextTok-1); 00711 int LastRelOffs = 0; 00712 SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), 00713 &LastRelOffs); 00714 unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength(); 00715 00716 // Create a macro expansion SLocEntry that will "contain" all of the tokens. 00717 SourceLocation Expansion = 00718 SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); 00719 00720 // Change the location of the tokens from the spelling location to the new 00721 // expanded location. 00722 for (; begin_tokens < NextTok; ++begin_tokens) { 00723 Token &Tok = *begin_tokens; 00724 int RelOffs = 0; 00725 SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); 00726 Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); 00727 } 00728 } 00729 00730 /// \brief Creates SLocEntries and updates the locations of macro argument 00731 /// tokens to their new expanded locations. 00732 /// 00733 /// \param ArgIdDefLoc the location of the macro argument id inside the macro 00734 /// definition. 00735 /// \param Tokens the macro argument tokens to update. 00736 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, 00737 Token *begin_tokens, 00738 Token *end_tokens) { 00739 SourceManager &SM = PP.getSourceManager(); 00740 00741 SourceLocation InstLoc = 00742 getExpansionLocForMacroDefLoc(ArgIdSpellLoc); 00743 00744 while (begin_tokens < end_tokens) { 00745 // If there's only one token just create a SLocEntry for it. 00746 if (end_tokens - begin_tokens == 1) { 00747 Token &Tok = *begin_tokens; 00748 Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), 00749 InstLoc, 00750 Tok.getLength())); 00751 return; 00752 } 00753 00754 updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); 00755 } 00756 }