clang 20.0.0git
|
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens. More...
#include "clang/Lex/Lexer.h"
Classes | |
struct | SizedChar |
Represents a char and the number of bytes parsed to produce it. More... | |
Public Member Functions | |
Lexer (FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true) | |
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocessor managing the lexing process. | |
Lexer (SourceLocation FileLoc, const LangOptions &LangOpts, const char *BufStart, const char *BufPtr, const char *BufEnd, bool IsFirstIncludeOfFile=true) | |
Lexer constructor - Create a new raw lexer object. | |
Lexer (FileID FID, const llvm::MemoryBufferRef &FromFile, const SourceManager &SM, const LangOptions &LangOpts, bool IsFirstIncludeOfFile=true) | |
Lexer constructor - Create a new raw lexer object. | |
Lexer (const Lexer &)=delete | |
Lexer & | operator= (const Lexer &)=delete |
SourceLocation | getFileLoc () const |
getFileLoc - Return the File Location for the file we are lexing out of. | |
bool | Lex (Token &Result) |
Lex - Return the next token in the file. | |
bool | isPragmaLexer () const |
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. | |
bool | LexFromRawLexer (Token &Result) |
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object. | |
bool | isKeepWhitespaceMode () const |
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file, including whitespace and comments. | |
void | SetKeepWhitespaceMode (bool Val) |
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode. | |
bool | inKeepCommentMode () const |
inKeepCommentMode - Return true if the lexer should return comments as tokens. | |
void | SetCommentRetentionState (bool Mode) |
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode. | |
void | resetExtendedTokenMode () |
Sets the extended token mode back to its initial value, according to the language options and preprocessor. | |
StringRef | getBuffer () const |
Gets source code buffer. | |
void | ReadToEndOfLine (SmallVectorImpl< char > *Result=nullptr) |
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string. | |
DiagnosticBuilder | Diag (const char *Loc, unsigned DiagID) const |
Diag - Forwarding function for diagnostics. | |
SourceLocation | getSourceLocation (const char *Loc, unsigned TokLen=1) const |
getSourceLocation - Return a source location identifier for the specified offset in the current file. | |
SourceLocation | getSourceLocation () override |
getSourceLocation - Return a source location for the next character in the current file. | |
const char * | getBufferLocation () const |
Return the current location in the buffer. | |
unsigned | getCurrentBufferOffset () |
Returns the current lexing offset. | |
void | seek (unsigned Offset, bool IsAtStartOfLine) |
Set the lexer's buffer pointer to Offset . | |
bool | isFirstTimeLexingFile () const |
Check if this is the first time we're lexing the input file. | |
Public Member Functions inherited from clang::PreprocessorLexer | |
PreprocessorLexer (const PreprocessorLexer &)=delete | |
PreprocessorLexer & | operator= (const PreprocessorLexer &)=delete |
void | LexIncludeFilename (Token &FilenameTok) |
Lex a token, producing a header-name token if possible. | |
void | setParsingPreprocessorDirective (bool f) |
Inform the lexer whether or not we are currently lexing a preprocessor directive. | |
bool | isLexingRawMode () const |
Return true if this lexer is in raw mode or not. | |
Preprocessor * | getPP () const |
Return the preprocessor object for this lexer. | |
FileID | getFileID () const |
unsigned | getInitialNumSLocEntries () const |
Number of SLocEntries before lexing the file. | |
OptionalFileEntryRef | getFileEntry () const |
getFileEntry - Return the FileEntry corresponding to this FileID. | |
conditional_iterator | conditional_begin () const |
conditional_iterator | conditional_end () const |
void | setConditionalLevels (ArrayRef< PPConditionalInfo > CL) |
Static Public Member Functions | |
static Lexer * | Create_PragmaLexer (SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP) |
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion. | |
static std::string | Stringify (StringRef Str, bool Charify=false) |
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) replacing newline character(s) with "\n". | |
static void | Stringify (SmallVectorImpl< char > &Str) |
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) replacing newline character(s) with "\n". | |
static unsigned | getSpelling (const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr) |
getSpelling - This method is used to get the spelling of a token into a preallocated buffer, instead of as an std::string. | |
static std::string | getSpelling (const Token &Tok, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr) |
getSpelling() - Return the 'spelling' of the Tok token. | |
static StringRef | getSpelling (SourceLocation loc, SmallVectorImpl< char > &buffer, const SourceManager &SM, const LangOptions &options, bool *invalid=nullptr) |
getSpelling - This method is used to get the spelling of the token at the given source location. | |
static unsigned | MeasureTokenLength (SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) |
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the input file. | |
static bool | getRawToken (SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false) |
Relex the token at the specified location. | |
static SourceLocation | GetBeginningOfToken (SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) |
Given a location any where in a source buffer, find the location that corresponds to the beginning of the token in which the original source location lands. | |
static unsigned | getTokenPrefixLength (SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts) |
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters of the token starting at TokStart. | |
static SourceLocation | AdvanceToTokenCharacter (SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts) |
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token, return a new location that specifies a character within the token. | |
static SourceLocation | getLocForEndOfToken (SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts) |
Computes the source location just past the end of the token at this source location. | |
static CharSourceRange | getAsCharRange (SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) |
Given a token range, produce a corresponding CharSourceRange that is not a token range. | |
static CharSourceRange | getAsCharRange (CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) |
static bool | isAtStartOfMacroExpansion (SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr) |
Returns true if the given MacroID location points at the first token of the macro expansion. | |
static bool | isAtEndOfMacroExpansion (SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr) |
Returns true if the given MacroID location points at the last token of the macro expansion. | |
static CharSourceRange | makeFileCharRange (CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) |
Accepts a range and returns a character range with file locations. | |
static StringRef | getSourceText (CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr) |
Returns a string for the source that the range encompasses. | |
static StringRef | getImmediateMacroName (SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) |
Retrieve the name of the immediate macro expansion. | |
static StringRef | getImmediateMacroNameForDiagnostics (SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) |
Retrieve the name of the immediate macro expansion. | |
static PreambleBounds | ComputePreamble (StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0) |
Compute the preamble of the given file. | |
static std::optional< Token > | findNextToken (SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) |
Finds the token that comes right after the given location. | |
static SourceLocation | findLocationAfterToken (SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) |
Checks that the given token is the first token that occurs after the given location (this excludes comments and whitespace). | |
static bool | isAsciiIdentifierContinueChar (char c, const LangOptions &LangOpts) |
Returns true if the given character could appear in an identifier. | |
static bool | isNewLineEscaped (const char *BufferStart, const char *Str) |
Checks whether new line pointed by Str is preceded by escape sequence. | |
static SizedChar | getCharAndSizeNoWarn (const char *Ptr, const LangOptions &LangOpts) |
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning. | |
static StringRef | getIndentationForLine (SourceLocation Loc, const SourceManager &SM) |
Returns the leading whitespace for line that corresponds to the given location Loc . | |
Friends | |
class | Preprocessor |
Additional Inherited Members | |
Public Types inherited from clang::PreprocessorLexer | |
using | conditional_iterator = SmallVectorImpl< PPConditionalInfo >::const_iterator |
Iterator that traverses the current stack of preprocessor conditional directives (#if/#ifdef/#ifndef). | |
Protected Member Functions inherited from clang::PreprocessorLexer | |
PreprocessorLexer () | |
PreprocessorLexer (Preprocessor *pp, FileID fid) | |
virtual | ~PreprocessorLexer ()=default |
virtual void | IndirectLex (Token &Result)=0 |
virtual SourceLocation | getSourceLocation ()=0 |
Return the source location for the next observable location. | |
void | pushConditionalLevel (SourceLocation DirectiveStart, bool WasSkipping, bool FoundNonSkip, bool FoundElse) |
pushConditionalLevel - When we enter a #if directive, this keeps track of what we are currently in for diagnostic emission (e.g. | |
void | pushConditionalLevel (const PPConditionalInfo &CI) |
bool | popConditionalLevel (PPConditionalInfo &CI) |
popConditionalLevel - Remove an entry off the top of the conditional stack, returning information about it. | |
PPConditionalInfo & | peekConditionalLevel () |
Return the top of the conditional stack. | |
unsigned | getConditionalStackDepth () const |
Protected Attributes inherited from clang::PreprocessorLexer | |
Preprocessor * | PP = nullptr |
const FileID | FID |
The SourceManager FileID corresponding to the file being lexed. | |
unsigned | InitialNumSLocEntries = 0 |
Number of SLocEntries before lexing the file. | |
bool | ParsingPreprocessorDirective = false |
True when parsing #XXX; turns '\n' into a tok::eod token. | |
bool | ParsingFilename = false |
True after #include; turns <xx> or "xxx" into a tok::header_name token. | |
bool | LexingRawMode = false |
True if in raw mode. | |
MultipleIncludeOpt | MIOpt |
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization. | |
SmallVector< PPConditionalInfo, 4 > | ConditionalStack |
Information about the set of #if/#ifdef/#ifndef blocks we are currently in. | |
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
This provides no support for file reading or buffering, or buffering/seeking of tokens, only forward lexing is supported. It relies on the specified Preprocessor object to handle preprocessor directives, etc.
Lexer::Lexer | ( | FileID | FID, |
const llvm::MemoryBufferRef & | InputFile, | ||
Preprocessor & | PP, | ||
bool | IsFirstIncludeOfFile = true |
||
) |
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocessor managing the lexing process.
This lexer assumes that the associated file buffer and Preprocessor objects will outlive it, so it doesn't take ownership of either of them.
Definition at line 183 of file Lexer.cpp.
References resetExtendedTokenMode().
Lexer::Lexer | ( | SourceLocation | fileloc, |
const LangOptions & | langOpts, | ||
const char * | BufStart, | ||
const char * | BufPtr, | ||
const char * | BufEnd, | ||
bool | IsFirstIncludeOfFile = true |
||
) |
Lexer constructor - Create a new raw lexer object.
This object is only suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text range will outlive it, so it doesn't take ownership of it.
Definition at line 198 of file Lexer.cpp.
References clang::PreprocessorLexer::LexingRawMode.
Lexer::Lexer | ( | FileID | FID, |
const llvm::MemoryBufferRef & | FromFile, | ||
const SourceManager & | SM, | ||
const LangOptions & | langOpts, | ||
bool | IsFirstIncludeOfFile = true |
||
) |
|
delete |
|
inlinestatic |
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token, return a new location that specifies a character within the token.
This handles trigraphs and escaped newlines.
Definition at line 399 of file Lexer.h.
References clang::SourceLocation::getLocWithOffset(), getTokenPrefixLength(), and SM.
Referenced by clang::Sema::ActOnStringLiteral(), clang::Preprocessor::AdvanceToTokenCharacter(), Diag(), clang::StringLiteral::getLocationOfByte(), getUDSuffixLoc(), MakeCharSourceRange(), and clang::NumericLiteralParser::NumericLiteralParser().
|
static |
Compute the preamble of the given file.
The preamble of a file contains the initial comments, include directives, and other preprocessor directives that occur before the code in this particular file actually begins. The preamble of the main source file is a potential prefix header.
Buffer | The memory buffer containing the file's contents. |
MaxLines | If non-zero, restrict the length of the preamble to fewer than this number of lines. |
Definition at line 636 of file Lexer.cpp.
References clang::SourceLocation::getFromRawEncoding(), clang::Token::getKind(), clang::Token::getLocation(), clang::SourceLocation::getRawEncoding(), clang::Token::getRawIdentifier(), clang::Token::isAtStartOfLine(), clang::SourceLocation::isInvalid(), clang::SourceLocation::isValid(), LexFromRawLexer(), clang::Token::needsCleaning(), and SetCommentRetentionState().
Referenced by clang::ComputePreambleBounds(), and clang::PrintPreambleAction::ExecuteAction().
|
static |
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
This has a variety of magic semantics that this method sets up. It returns a new'd Lexer that must be delete'd when done.
This has a variety of magic semantics that this method sets up. It returns a new'd Lexer that must be delete'd when done.
On entrance to this routine, TokStartLoc is a macro location which has a spelling loc that indicates the bytes to be lexed for the token and an expansion location that indicates where all lexed tokens should be "expanded from".
TODO: It would really be nice to make _Pragma just be a wrapper around a normal lexer that remaps tokens as they fly by. This would require making Preprocessor::Lex virtual. Given that, we could just dump in a magic lexer interface that could handle this stuff. This would pull GetMappedTokenLoc out of the critical path of the lexer!
Definition at line 242 of file Lexer.cpp.
References clang::Preprocessor::getSourceManager(), clang::PreprocessorLexer::ParsingPreprocessorDirective, clang::PreprocessorLexer::PP, and SM.
DiagnosticBuilder Lexer::Diag | ( | const char * | Loc, |
unsigned | DiagID | ||
) | const |
Diag - Forwarding function for diagnostics.
This translate a source position in the current buffer into a SourceLocation object for rendering.
Definition at line 1231 of file Lexer.cpp.
References clang::Preprocessor::Diag(), getSourceLocation(), Loc, and clang::PreprocessorLexer::PP.
Referenced by DecodeTrigraphChar(), and isEndOfBlockCommentWithEscapedNewLine().
|
static |
Checks that the given token is the first token that occurs after the given location (this excludes comments and whitespace).
Returns the location immediately after the specified token. If the token is not found or the location is inside a macro, the returned source location will be invalid.
Definition at line 1357 of file Lexer.cpp.
References clang::C, findNextToken(), clang::SourceLocation::getLocWithOffset(), clang::isHorizontalWhitespace(), Loc, and SM.
Referenced by DiagnoseMismatchedNewDelete(), clang::Sema::DiagnoseUnterminatedPragmaAlignPack(), and GenerateFixForUnusedDecl().
|
static |
Finds the token that comes right after the given location.
Returns the next token, or std::nullopt if the location is inside a macro.
Definition at line 1324 of file Lexer.cpp.
References clang::File, getLocForEndOfToken(), isAtEndOfMacroExpansion(), clang::SourceLocation::isMacroID(), LexFromRawLexer(), Loc, and SM.
Referenced by clang::tooling::ExtractionSemicolonPolicy::compute(), findLocationAfterToken(), and fixVarDeclWithArray().
|
inlinestatic |
Definition at line 438 of file Lexer.h.
References getAsCharRange(), Range, and SM.
|
inlinestatic |
Given a token range, produce a corresponding CharSourceRange that is not a token range.
This allows the source range to be used by components that don't have access to the lexer and thus can't find the end of the range for themselves.
Definition at line 430 of file Lexer.h.
References clang::CharSourceRange::getCharRange(), getLocForEndOfToken(), Range, and SM.
Referenced by getAsCharRange(), clang::tooling::maybeExtendRange(), clang::ento::ConditionBRVisitor::patternMatch(), and clang::arcmt::writeARCDiagsToPlist().
|
static |
Given a location any where in a source buffer, find the location that corresponds to the beginning of the token in which the original source location lands.
Definition at line 608 of file Lexer.cpp.
References getBeginningOfFileToken(), clang::SourceLocation::getLocWithOffset(), clang::SourceLocation::isFileID(), Loc, and SM.
Referenced by adjustRemoval(), and findPreviousTokenStart().
|
inline |
|
inline |
Return the current location in the buffer.
Definition at line 308 of file Lexer.h.
Referenced by getBeginningOfFileToken().
|
inlinestatic |
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
Definition at line 586 of file Lexer.h.
Referenced by getSpellingSlow(), and getTokenPrefixLength().
|
inline |
Returns the current lexing offset.
Definition at line 311 of file Lexer.h.
Referenced by clang::syntax::tokenize().
|
inline |
getFileLoc - Return the File Location for the file we are lexing out of.
The physical location encodes the location where the characters come from, the virtual location encodes where we should claim the characters came from. Currently this is only used by _Pragma handling.
|
static |
Retrieve the name of the immediate macro expansion.
This routine starts from a source location, and finds the name of the macro responsible for its immediate expansion. It looks through any intervening macro argument expansions to compute this. It returns a StringRef which refers to the SourceManager-owned buffer of the source where that macro name is spelled. Thus, the result shouldn't out-live that SourceManager.
Definition at line 1059 of file Lexer.cpp.
References E, clang::PreprocessorLexer::FID, clang::SrcMgr::ExpansionInfo::getExpansionLocStart(), clang::SrcMgr::ExpansionInfo::getSpellingLoc(), clang::SourceLocation::isFileID(), clang::SrcMgr::ExpansionInfo::isMacroArgExpansion(), clang::SourceLocation::isMacroID(), Loc, MeasureTokenLength(), and SM.
Referenced by BuiltinOverflow(), CheckMemaccessSize(), clang::Preprocessor::getImmediateMacroName(), getMacroName(), clang::ento::CheckerContext::getMacroNameOrSpelling(), IsEnumConstOrFromMacro(), clang::data_collection::printMacroName(), and clang::Sema::ProcessAPINotes().
|
static |
Retrieve the name of the immediate macro expansion.
This routine starts from a source location, and finds the name of the macro responsible for its immediate expansion. It looks through any intervening macro argument expansions to compute this. It returns a StringRef which refers to the SourceManager-owned buffer of the source where that macro name is spelled. Thus, the result shouldn't out-live that SourceManager.
This differs from Lexer::getImmediateMacroName in that any macro argument location will result in the topmost function macro that accepted it. e.g.
for location of 'foo' token, this function will return "MAC1" while Lexer::getImmediateMacroName will return "MAC2".
Definition at line 1106 of file Lexer.cpp.
References clang::SourceLocation::isFileID(), clang::SourceLocation::isMacroID(), Loc, MeasureTokenLength(), and SM.
Referenced by DiagnoseNullConversion().
|
static |
Returns the leading whitespace for line that corresponds to the given location Loc
.
Definition at line 1157 of file Lexer.cpp.
References findBeginningOfLine(), clang::Invalid, clang::SourceLocation::isInvalid(), clang::SourceLocation::isMacroID(), clang::Line, Loc, and SM.
|
static |
Computes the source location just past the end of the token at this source location.
This routine can be used to produce a source location that points just past the end of the token referenced by Loc
, and is generally used when a diagnostic needs to point just after a token where it expected something different that it received. If the returned source location would not be meaningful (e.g., if it points into a macro), this routine returns an invalid source location.
Offset | an offset from the end of the token, where the source location should refer to. The default offset (0) produces a source location pointing just past the end of the token; an offset of 1 produces a source location pointing to the last character in the token, etc. |
Definition at line 849 of file Lexer.cpp.
References clang::SourceLocation::getLocWithOffset(), isAtEndOfMacroExpansion(), clang::SourceLocation::isInvalid(), clang::SourceLocation::isMacroID(), Loc, MeasureTokenLength(), and SM.
Referenced by AnalyzeImplicitConversions(), convertTokenRangeToCharRange(), createAttributeInsertion(), findNextToken(), clang::arcmt::trans::findSemiAfterLocation(), getAsCharRange(), getFunNameText(), clang::Preprocessor::getLocForEndOfToken(), clang::Sema::getLocForEndOfToken(), getPastLoc(), getPointeeTypeText(), getRange(), clang::diff::SyntaxTree::getSourceRangeOffsets(), getSourceRangeToTokenEnd(), getVarDeclIdentifierText(), makeRangeFromFileLocs(), clang::tooling::RecursiveSymbolVisitor< T >::VisitCXXConstructorDecl(), clang::tooling::RecursiveSymbolVisitor< T >::VisitTypedefTypeLoc(), and clang::tooling::RecursiveSymbolVisitor< T >::VisitTypeLoc().
|
static |
Relex the token at the specified location.
Definition at line 509 of file Lexer.cpp.
References clang::Invalid, clang::isWhitespace(), LexFromRawLexer(), Loc, clang::Result, SetCommentRetentionState(), and SM.
Referenced by atOrBeforeSeparation(), findPreviousTokenKind(), clang::Preprocessor::getRawToken(), clang::tooling::maybeExtendRange(), and MeasureTokenLength().
|
inlineoverridevirtual |
getSourceLocation - Return a source location for the next character in the current file.
Implements clang::PreprocessorLexer.
Definition at line 303 of file Lexer.h.
References getSourceLocation().
Referenced by Diag(), and getSourceLocation().
SourceLocation Lexer::getSourceLocation | ( | const char * | Loc, |
unsigned | TokLen = 1 |
||
) | const |
getSourceLocation - Return a source location identifier for the specified offset in the current file.
Definition at line 1212 of file Lexer.cpp.
References clang::SourceLocation::getLocWithOffset(), GetMappedTokenLoc(), clang::SourceLocation::isFileID(), Loc, and clang::PreprocessorLexer::PP.
Referenced by makeCharRange(), and clang::ModuleMap::parseModuleMapFile().
|
static |
Returns a string for the source that the range encompasses.
Definition at line 1023 of file Lexer.cpp.
References clang::SourceRange::getBegin(), clang::SourceRange::getEnd(), clang::Invalid, clang::SourceRange::isInvalid(), makeFileCharRange(), Range, and SM.
Referenced by AnalyzeImplicitConversions(), constructDebugPieceForTrackedCondition(), clang::tooling::createRenameAtomicChanges(), DiagnoseBaseOrMemInitializerOrder(), diagnoseXorMisusedAsPow(), GCRewriteFinalize(), clang::diff::SyntaxTree::Impl::getDeclValue(), GetDefaultValueString(), getExprText(), getLiteralInfo(), clang::MacroExpansionContext::getOriginalText(), getRangeText(), clang::tooling::getText(), clang::tooling::fixit::internal::getText(), clang::ento::ConditionBRVisitor::patternMatch(), printExprAsWritten(), clang::tooling::replaceStmtWithStmt(), and clang::tooling::ReplaceNodeWithTemplate::run().
|
static |
getSpelling - This method is used to get the spelling of a token into a preallocated buffer, instead of as an std::string.
The caller is required to allocate enough space for the token, which is guaranteed to be at least Tok.getLength() bytes long. The length of the actual result is returned.
Note that this method may do two possible things: it may either fill in the buffer specified with characters, or it may change the input pointer to point to a constant buffer with the data already in it (avoiding a copy). The caller is not allowed to modify the returned buffer pointer if an internal buffer is returned.
The caller is required to allocate enough space for the token, which is guaranteed to be at least Tok.getLength() bytes long. The actual length of the token is returned.
Note that this method may do two possible things: it may either fill in the buffer specified with characters, or it may change the input pointer to point to a constant buffer with the data already in it (avoiding a copy). The caller is not allowed to modify the returned buffer pointer if an internal buffer is returned.
Definition at line 451 of file Lexer.cpp.
References clang::SourceManager::getCharacterData(), clang::Token::getIdentifierInfo(), clang::Token::getLength(), clang::Token::getLiteralData(), clang::Token::getLocation(), clang::Token::getRawIdentifier(), getSpellingSlow(), clang::Token::hasUCN(), clang::Invalid, clang::Token::is(), clang::Token::isLiteral(), and clang::Token::needsCleaning().
Referenced by findDirectives(), clang::ento::CheckerContext::getMacroNameOrSpelling(), clang::StringLiteralParser::getOffsetOfStringByte(), clang::Preprocessor::getSpelling(), highlightLines(), clang::ast_matchers::internal::isTokenAtLoc(), and ReadOriginalFileName().
|
static |
getSpelling() - Return the 'spelling' of the Tok token.
getSpelling() - Return the 'spelling' of this token.
The spelling of a token is the characters used to represent the token in the source file after trigraph expansion and escaped-newline folding. In particular, this wants to get the true, uncanonicalized, spelling of things like digraphs UCNs, etc.
The spelling of a token are the characters used to represent the token in the source file after trigraph expansion and escaped-newline folding. In particular, this wants to get the true, uncanonicalized, spelling of things like digraphs UCNs, etc.
Definition at line 419 of file Lexer.cpp.
References clang::SourceManager::getCharacterData(), clang::Token::getLength(), clang::Token::getLocation(), getSpellingSlow(), clang::Invalid, clang::Token::needsCleaning(), and clang::Result.
|
static |
getSpelling - This method is used to get the spelling of the token at the given source location.
getSpelling() - Return the 'spelling' of this token.
If, as is usually true, it is not necessary to copy any data, then the returned string may not point into the provided buffer.
This method lexes at the expansion depth of the given location and does not jump to the expansion or spelling location.
The spelling of a token are the characters used to represent the token in the source file after trigraph expansion and escaped-newline folding. In particular, this wants to get the true, uncanonicalized, spelling of things like digraphs UCNs, etc.
Definition at line 378 of file Lexer.cpp.
References clang::Token::getLength(), getSpellingSlow(), length(), LexFromRawLexer(), clang::Token::needsCleaning(), and SM.
|
static |
Get the physical length (including trigraphs and escaped newlines) of the first Characters
characters of the token starting at TokStart.
Definition at line 790 of file Lexer.cpp.
References getCharAndSizeNoWarn(), clang::Invalid, and SM.
Referenced by AdvanceToTokenCharacter().
|
inline |
|
static |
Returns true if the given character could appear in an identifier.
Definition at line 1133 of file Lexer.cpp.
References c, and clang::isAsciiIdentifierContinue().
Referenced by addFixitForObjCARCConversion(), and canBeJoined().
|
static |
Returns true if the given MacroID location points at the last token of the macro expansion.
MacroEnd | If non-null and function returns true, it is set to end location of the macro. |
Definition at line 893 of file Lexer.cpp.
References clang::SourceLocation::getLocWithOffset(), isAtEndOfMacroExpansion(), clang::SourceLocation::isFileID(), clang::SourceLocation::isMacroID(), clang::SourceLocation::isValid(), MeasureTokenLength(), and SM.
Referenced by FindLocsWithCommonFileID(), findNextToken(), clang::arcmt::trans::findSemiAfterLocation(), getLocForEndOfToken(), getVarDeclIdentifierText(), isAtEndOfMacroExpansion(), clang::Preprocessor::isAtEndOfMacroExpansion(), makeFileCharRange(), and clang::ento::ConditionBRVisitor::patternMatch().
|
static |
Returns true if the given MacroID location points at the first token of the macro expansion.
MacroBegin | If non-null and function returns true, it is set to begin location of the macro. |
Definition at line 871 of file Lexer.cpp.
References isAtStartOfMacroExpansion(), clang::SourceLocation::isFileID(), clang::SourceLocation::isMacroID(), clang::SourceLocation::isValid(), and SM.
Referenced by FindLocsWithCommonFileID(), isAtStartOfMacroExpansion(), clang::Preprocessor::isAtStartOfMacroExpansion(), makeFileCharRange(), and clang::ento::ConditionBRVisitor::patternMatch().
|
inline |
|
inline |
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file, including whitespace and comments.
This should only be used in raw mode, as the preprocessor is not prepared to deal with the excess tokens.
Definition at line 248 of file Lexer.h.
Referenced by SetCommentRetentionState().
|
static |
Checks whether new line pointed by Str is preceded by escape sequence.
Definition at line 1137 of file Lexer.cpp.
References clang::isHorizontalWhitespace(), and clang::isVerticalWhitespace().
Referenced by findBeginningOfLine().
|
inline |
Lex - Return the next token in the file.
If this is the end of file, it return the tok::eof token. This implicitly involves the preprocessor.
Definition at line 3679 of file Lexer.cpp.
References clang::PreprocessorLexer::isLexingRawMode(), clang::Token::LeadingEmptyMacro, clang::Token::LeadingSpace, clang::Result, and clang::Token::StartOfLine.
Referenced by LexFromRawLexer(), and ReadToEndOfLine().
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object.
Return true if the 'next character to read' pointer points at the end of the lexer buffer, false otherwise.
Definition at line 236 of file Lexer.h.
References Lex(), clang::PreprocessorLexer::LexingRawMode, and clang::Result.
Referenced by clang::arcmt::trans::MigrationContext::addPropertyAttribute(), ComputePreamble(), EmitAlphaCounter(), clang::DumpRawTokensAction::ExecuteAction(), findDirectives(), findNextToken(), clang::arcmt::trans::findSemiAfterLocation(), getBeginningOfFileToken(), getEntityEndLoc(), clang::StringLiteral::getLocationOfByte(), getRawToken(), getSpelling(), LexRawTokensFromMainFile(), LocPropertyAttribute(), NormalizeLine(), clang::format::IntegerLiteralSeparatorFixer::process(), clang::arcmt::trans::MigrationContext::rewritePropertyAttribute(), clang::syntax::tokenize(), and clang::TokenRewriter::TokenRewriter().
|
static |
Accepts a range and returns a character range with file locations.
Returns a null range if a part of the range resides inside a macro expansion or the range does not reside on the same FileID.
This function is trying to deal with macros and return a range based on file locations. The cases where it can successfully handle macros are:
-begin or end range lies at the start or end of a macro expansion, in which case the location will be set to the expansion point, e.g: #define M 1 2 a M If you have a range [a, 2] (where 2 came from the macro), the function will return a range for "a M" if you have range [a, 1], the function will fail because the range overlaps with only a part of the macro
-The macro is a function macro and the range can be mapped to the macro arguments, e.g: #define M 1 2 #define FM(x) x FM(a b M) if you have range [b, 2], the function will return the file range "b M" inside the macro arguments. if you have range [a, 2], the function will return the file range "FM(a b M)" since the range includes all of the macro expansion.
Definition at line 954 of file Lexer.cpp.
References Begin, clang::SourceRange::getBegin(), clang::SourceRange::getEnd(), clang::SrcMgr::SLocEntry::getExpansion(), clang::SrcMgr::ExpansionInfo::getExpansionLocStart(), clang::Invalid, isAtEndOfMacroExpansion(), isAtStartOfMacroExpansion(), clang::SourceLocation::isFileID(), isInExpansionTokenRange(), clang::SourceLocation::isInvalid(), clang::SrcMgr::ExpansionInfo::isMacroArgExpansion(), clang::SourceLocation::isMacroID(), makeFileCharRange(), makeRangeFromFileLocs(), Range, clang::SourceRange::setBegin(), clang::SourceRange::setEnd(), and SM.
Referenced by clang::transformer::after(), clang::tooling::getAssociatedRange(), getRange(), getSourceText(), makeFileCharRange(), and makeStandaloneRange().
|
static |
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the input file.
If the token needs cleaning (e.g. includes a trigraph or an escaped newline) then this count includes bytes that are part of that.
Definition at line 498 of file Lexer.cpp.
References clang::Token::getLength(), getRawToken(), Loc, and SM.
Referenced by clang::TextDiagnostic::emitDiagnosticLoc(), getEndCharLoc(), getImmediateMacroName(), getImmediateMacroNameForDiagnostics(), getLocForEndOfToken(), clang::Rewriter::getRangeSize(), getRangeSize(), clang::Rewriter::getRewrittenText(), clang::html::HighlightRange(), isAtEndOfMacroExpansion(), prepareAndFilterRanges(), and clang::PrintingCodeCompleteConsumer::ProcessCodeCompleteResults().
void Lexer::ReadToEndOfLine | ( | SmallVectorImpl< char > * | Result = nullptr | ) |
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
This switches the lexer out of directive mode.
Definition at line 3066 of file Lexer.cpp.
References clang::Preprocessor::CodeCompleteNaturalLanguage(), clang::Token::is(), Lex(), clang::PreprocessorLexer::ParsingFilename, clang::PreprocessorLexer::ParsingPreprocessorDirective, clang::PreprocessorLexer::PP, clang::Result, and clang::Token::startToken().
void Lexer::resetExtendedTokenMode | ( | ) |
Sets the extended token mode back to its initial value, according to the language options and preprocessor.
This controls whether the lexer produces comment and whitespace tokens.
This requires the lexer to have an associated preprocessor. A standalone lexer has nothing to reset to.
Definition at line 219 of file Lexer.cpp.
References clang::Preprocessor::getCommentRetentionState(), clang::PreprocessorLexer::PP, SetCommentRetentionState(), and SetKeepWhitespaceMode().
Referenced by Lexer().
|
inline |
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
This is really only useful when lexing in raw mode, because otherwise the lexer needs to manage this.
Definition at line 269 of file Lexer.h.
References isKeepWhitespaceMode().
Referenced by ComputePreamble(), findDirectives(), getBeginningOfFileToken(), getRawToken(), LexRawTokensFromMainFile(), clang::format::IntegerLiteralSeparatorFixer::process(), and resetExtendedTokenMode().
|
inline |
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
Definition at line 254 of file Lexer.h.
References clang::PreprocessorLexer::LexingRawMode.
Referenced by clang::DumpRawTokensAction::ExecuteAction(), getEntityEndLoc(), resetExtendedTokenMode(), and clang::TokenRewriter::TokenRewriter().
|
static |
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) replacing newline character(s) with "\n".
Definition at line 316 of file Lexer.cpp.
References StringifyImpl().
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) replacing newline character(s) with "\n".
If Charify is true, this escapes the ' character instead of ".
Definition at line 309 of file Lexer.cpp.
References clang::Result, and StringifyImpl().
Referenced by clang::Sema::ExpandFunctionLocalPredefinedMacros(), PrintHeaderInfo(), and clang::MacroArgs::StringifyArgument().
|
friend |