13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
37class DiagnosticBuilder;
81 void anchor()
override;
87 const char *BufferStart;
90 const char *BufferEnd;
122 unsigned char ExtendedTokenMode;
131 const char *BufferPtr;
135 bool IsAtStartOfLine;
137 bool IsAtPhysicalStartOfLine;
139 bool HasLeadingSpace;
141 bool HasLeadingEmptyMacro;
144 bool IsFirstTimeLexingFile;
148 const char *NewLinePtr;
158 unsigned NextDepDirectiveTokenIndex = 0;
160 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
168 bool IsFirstIncludeOfFile =
true);
174 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
175 bool IsFirstIncludeOfFile =
true);
182 bool IsFirstIncludeOfFile =
true);
211 bool LexDependencyDirectiveTokenWhileSkipping(
Token &
Result);
215 bool isDependencyDirectivesLexer()
const {
return !DepDirectives.empty(); }
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &
Result);
241 return BufferPtr == BufferEnd;
249 return ExtendedTokenMode > 1;
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
263 return ExtendedTokenMode > 0;
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
285 return StringRef(BufferStart, BufferEnd - BufferStart);
312 assert(BufferPtr >= BufferStart &&
"Invalid buffer state");
313 return BufferPtr - BufferStart;
317 void seek(
unsigned Offset,
bool IsAtStartOfLine);
322 static std::string
Stringify(StringRef Str,
bool Charify =
false);
365 bool *invalid =
nullptr);
380 bool IgnoreWhiteSpace =
false);
436 Range.getBegin(), End);
441 return Range.isTokenRange()
550 unsigned MaxLines = 0);
568 bool SkipTrailingWhitespaceAndNewLine);
584 if (isObviouslySimpleCharacter(Ptr[0])) {
590 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
608 bool LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine);
610 bool CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
const char *CurPtr);
612 bool LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
const char *CurPtr);
619 void FormTokenWithChars(
Token &
Result,
const char *TokEnd,
621 unsigned TokLen = TokEnd-BufferPtr;
631 unsigned isNextPPTokenLParen();
655 static bool isObviouslySimpleCharacter(
char C) {
656 return C !=
'?' &&
C !=
'\\';
663 inline char getAndAdvanceChar(
const char *&Ptr, Token &Tok) {
666 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
669 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
678 const char *ConsumeChar(
const char *Ptr,
unsigned Size, Token &Tok) {
686 getCharAndSizeSlow(Ptr, Size, &Tok);
694 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
697 if (isObviouslySimpleCharacter(Ptr[0])) {
703 return getCharAndSizeSlow(Ptr, Size);
708 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
709 Token *Tok =
nullptr);
714 static unsigned getEscapedNewLineSize(
const char *
P);
719 static const char *SkipEscapedNewLines(
const char *
P);
723 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
724 const LangOptions &LangOpts);
729 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
731 void PropagateLineStartLeadingSpaceInfo(Token &
Result);
733 const char *LexUDSuffix(Token &
Result,
const char *CurPtr,
734 bool IsStringLiteral);
740 bool LexIdentifierContinue(Token &
Result,
const char *CurPtr);
742 bool LexNumericConstant (Token &
Result,
const char *CurPtr);
743 bool LexStringLiteral (Token &
Result,
const char *CurPtr,
745 bool LexRawStringLiteral (Token &
Result,
const char *CurPtr,
747 bool LexAngledStringLiteral(Token &
Result,
const char *CurPtr);
748 bool LexCharConstant (Token &
Result,
const char *CurPtr,
750 bool LexEndOfFile (Token &
Result,
const char *CurPtr);
751 bool SkipWhitespace (Token &
Result,
const char *CurPtr,
752 bool &TokAtPhysicalStartOfLine);
753 bool SkipLineComment (Token &
Result,
const char *CurPtr,
754 bool &TokAtPhysicalStartOfLine);
755 bool SkipBlockComment (Token &
Result,
const char *CurPtr,
756 bool &TokAtPhysicalStartOfLine);
757 bool SaveLineComment (Token &
Result,
const char *CurPtr);
759 bool IsStartOfConflictMarker(
const char *CurPtr);
760 bool HandleEndOfConflictMarker(
const char *CurPtr);
762 bool lexEditorPlaceholder(Token &
Result,
const char *CurPtr);
764 bool isCodeCompletionPoint(
const char *CurPtr)
const;
765 void cutOffLexing() { BufferPtr = BufferEnd; }
767 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
769 void codeCompleteIncludedFile(
const char *PathStart,
770 const char *CompletionPoint,
bool IsAngled);
772 std::optional<uint32_t>
773 tryReadNumericUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
774 std::optional<uint32_t> tryReadNamedUCN(
const char *&StartPtr,
775 const char *SlashLoc, Token *
Result);
789 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
802 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
810 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
const char * getBufferLocation() const
Return the current location in the buffer.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
@ C
Languages that the frontend can parse and compile.
@ Result
The result type of a method or function.
YAML serialization mapping.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
unsigned Size
Size of the preamble in bytes.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)