Go to the documentation of this file.
13 #ifndef LLVM_CLANG_LEX_LEXER_H
14 #define LLVM_CLANG_LEX_LEXER_H
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
31 class MemoryBufferRef;
37 class DiagnosticBuilder;
81 void anchor()
override;
87 const char *BufferStart;
90 const char *BufferEnd;
122 unsigned char ExtendedTokenMode;
131 const char *BufferPtr;
135 bool IsAtStartOfLine;
137 bool IsAtPhysicalStartOfLine;
139 bool HasLeadingSpace;
141 bool HasLeadingEmptyMacro;
144 bool IsFirstTimeLexingFile;
148 const char *NewLinePtr;
158 unsigned NextDepDirectiveTokenIndex = 0;
160 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
168 bool IsFirstIncludeOfFile =
true);
174 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
175 bool IsFirstIncludeOfFile =
true);
182 bool IsFirstIncludeOfFile =
true);
204 bool Lex(
Token &Result);
207 bool LexDependencyDirectiveToken(
Token &Result);
211 bool LexDependencyDirectiveTokenWhileSkipping(
Token &Result);
215 bool isDependencyDirectivesLexer()
const {
return !DepDirectives.empty(); }
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &Result);
230 void IndirectLex(
Token &Result)
override { Lex(Result); }
241 return BufferPtr == BufferEnd;
249 return ExtendedTokenMode > 1;
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
263 return ExtendedTokenMode > 0;
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
285 return StringRef(BufferStart, BufferEnd - BufferStart);
312 assert(BufferPtr >= BufferStart &&
"Invalid buffer state");
313 return BufferPtr - BufferStart;
317 void seek(
unsigned Offset,
bool IsAtStartOfLine);
365 bool *invalid =
nullptr);
380 bool IgnoreWhiteSpace =
false);
436 Range.getBegin(),
End);
441 return Range.isTokenRange()
550 unsigned MaxLines = 0);
568 bool SkipTrailingWhitespaceAndNewLine);
584 if (isObviouslySimpleCharacter(Ptr[0])) {
590 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
608 bool LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine);
610 bool CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
const char *CurPtr);
612 bool LexUnicodeIdentifierStart(
Token &Result, uint32_t
C,
const char *CurPtr);
619 void FormTokenWithChars(
Token &Result,
const char *TokEnd,
621 unsigned TokLen = TokEnd-BufferPtr;
622 Result.setLength(TokLen);
624 Result.setKind(
Kind);
631 unsigned isNextPPTokenLParen();
655 static bool isObviouslySimpleCharacter(
char C) {
656 return C !=
'?' &&
C !=
'\\';
663 inline char getAndAdvanceChar(
const char *&Ptr, Token &Tok) {
666 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
669 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
678 const char *ConsumeChar(
const char *Ptr,
unsigned Size, Token &Tok) {
686 getCharAndSizeSlow(Ptr, Size, &Tok);
694 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
697 if (isObviouslySimpleCharacter(Ptr[0])) {
703 return getCharAndSizeSlow(Ptr, Size);
708 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
709 Token *Tok =
nullptr);
714 static unsigned getEscapedNewLineSize(
const char *
P);
719 static const char *SkipEscapedNewLines(
const char *
P);
723 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
724 const LangOptions &LangOpts);
729 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
731 void PropagateLineStartLeadingSpaceInfo(Token &Result);
733 const char *LexUDSuffix(Token &Result,
const char *CurPtr,
734 bool IsStringLiteral);
740 bool LexIdentifierContinue(Token &Result,
const char *CurPtr);
742 bool LexNumericConstant (Token &Result,
const char *CurPtr);
743 bool LexStringLiteral (Token &Result,
const char *CurPtr,
745 bool LexRawStringLiteral (Token &Result,
const char *CurPtr,
747 bool LexAngledStringLiteral(Token &Result,
const char *CurPtr);
748 bool LexCharConstant (Token &Result,
const char *CurPtr,
750 bool LexEndOfFile (Token &Result,
const char *CurPtr);
751 bool SkipWhitespace (Token &Result,
const char *CurPtr,
752 bool &TokAtPhysicalStartOfLine);
753 bool SkipLineComment (Token &Result,
const char *CurPtr,
754 bool &TokAtPhysicalStartOfLine);
755 bool SkipBlockComment (Token &Result,
const char *CurPtr,
756 bool &TokAtPhysicalStartOfLine);
757 bool SaveLineComment (Token &Result,
const char *CurPtr);
759 bool IsStartOfConflictMarker(
const char *CurPtr);
760 bool HandleEndOfConflictMarker(
const char *CurPtr);
762 bool lexEditorPlaceholder(Token &Result,
const char *CurPtr);
764 bool isCodeCompletionPoint(
const char *CurPtr)
const;
765 void cutOffLexing() { BufferPtr = BufferEnd; }
767 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
769 void codeCompleteIncludedFile(
const char *PathStart,
770 const char *CompletionPoint,
bool IsAngled);
772 std::optional<uint32_t>
773 tryReadNumericUCN(
const char *&StartPtr,
const char *SlashLoc, Token *Result);
774 std::optional<uint32_t> tryReadNamedUCN(
const char *&StartPtr,
775 const char *SlashLoc, Token *Result);
789 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc, Token *Result);
802 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
810 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
815 #endif // LLVM_CLANG_LEX_LEXER_H
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
YAML serialization mapping.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
A little helper class used to produce diagnostics.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
A trivial tuple used to represent a source range.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
bool LexingRawMode
True if in raw mode.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Token - This structure provides full information about a lexed token.
This class handles loading and caching of source files into memory.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static CharSourceRange getCharRange(SourceRange R)
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
unsigned Size
Size of the preamble in bytes.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
const char * getBufferLocation() const
Return the current location in the buffer.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
Lexer & operator=(const Lexer &)=delete
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
Represents a character-granular source range.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
StringRef getBuffer() const
Gets source code buffer.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
__device__ __2f16 float c
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
@ CMK_None
Not within a conflict marker.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.