13 #ifndef LLVM_CLANG_LEX_LEXER_H 14 #define LLVM_CLANG_LEX_LEXER_H 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 36 class DiagnosticBuilder;
69 : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
79 void anchor()
override;
85 const char *BufferStart;
88 const char *BufferEnd;
110 unsigned char ExtendedTokenMode;
119 const char *BufferPtr;
123 bool IsAtStartOfLine;
125 bool IsAtPhysicalStartOfLine;
127 bool HasLeadingSpace;
129 bool HasLeadingEmptyMacro;
134 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
147 const char *BufStart,
const char *BufPtr,
const char *BufEnd);
152 Lexer(
FileID FID,
const llvm::MemoryBuffer *FromFile,
179 bool Lex(
Token &Result);
188 void IndirectLex(
Token &Result)
override { Lex(Result); }
195 assert(LexingRawMode &&
"Not already in raw mode!");
199 return BufferPtr == BufferEnd;
207 return ExtendedTokenMode > 1;
213 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
214 "Can only retain whitespace in raw mode or -traditional-cpp");
215 ExtendedTokenMode = Val ? 2 : 0;
221 return ExtendedTokenMode > 0;
228 assert(!isKeepWhitespaceMode() &&
229 "Can't play with comment retention state when retaining whitespace");
230 ExtendedTokenMode = Mode ? 1 : 0;
239 void resetExtendedTokenMode();
243 return StringRef(BufferStart, BufferEnd - BufferStart);
257 SourceLocation getSourceLocation(
const char *Loc,
unsigned TokLen = 1)
const;
262 return getSourceLocation(BufferPtr);
271 static std::string Stringify(StringRef Str,
bool Charify =
false);
287 static unsigned getSpelling(
const Token &
Tok,
const char *&Buffer,
290 bool *Invalid =
nullptr);
297 static std::string getSpelling(
const Token &Tok,
300 bool *Invalid =
nullptr);
314 bool *invalid =
nullptr);
329 bool IgnoreWhiteSpace =
false);
353 getTokenPrefixLength(TokStart, Characters, SM, LangOpts));
384 : CharSourceRange::getCharRange(
449 bool *Invalid =
nullptr);
479 static StringRef getImmediateMacroNameForDiagnostics(
499 unsigned MaxLines = 0);
517 bool SkipTrailingWhitespaceAndNewLine);
520 static bool isIdentifierBodyChar(
char c,
const LangOptions &LangOpts);
524 static bool isNewLineEscaped(
const char *BufferStart,
const char *Str);
532 if (isObviouslySimpleCharacter(Ptr[0])) {
538 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
553 bool LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine);
555 bool CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
const char *CurPtr);
560 bool LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr);
567 void FormTokenWithChars(
Token &Result,
const char *TokEnd,
569 unsigned TokLen = TokEnd-BufferPtr;
571 Result.
setLocation(getSourceLocation(BufferPtr, TokLen));
579 unsigned isNextPPTokenLParen();
603 static bool isObviouslySimpleCharacter(
char C) {
604 return C !=
'?' && C !=
'\\';
611 inline char getAndAdvanceChar(
const char *&Ptr,
Token &Tok) {
614 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
617 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
626 const char *ConsumeChar(
const char *Ptr,
unsigned Size,
Token &Tok) {
634 getCharAndSizeSlow(Ptr, Size, &Tok);
642 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
645 if (isObviouslySimpleCharacter(Ptr[0])) {
651 return getCharAndSizeSlow(Ptr, Size);
656 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
657 Token *Tok =
nullptr);
662 static unsigned getEscapedNewLineSize(
const char *
P);
667 static const char *SkipEscapedNewLines(
const char *P);
671 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
677 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
679 void PropagateLineStartLeadingSpaceInfo(
Token &Result);
681 const char *LexUDSuffix(
Token &Result,
const char *CurPtr,
682 bool IsStringLiteral);
685 bool LexIdentifier (
Token &Result,
const char *CurPtr);
686 bool LexNumericConstant (
Token &Result,
const char *CurPtr);
687 bool LexStringLiteral (
Token &Result,
const char *CurPtr,
689 bool LexRawStringLiteral (
Token &Result,
const char *CurPtr,
691 bool LexAngledStringLiteral(
Token &Result,
const char *CurPtr);
692 bool LexCharConstant (
Token &Result,
const char *CurPtr,
694 bool LexEndOfFile (
Token &Result,
const char *CurPtr);
695 bool SkipWhitespace (
Token &Result,
const char *CurPtr,
696 bool &TokAtPhysicalStartOfLine);
697 bool SkipLineComment (
Token &Result,
const char *CurPtr,
698 bool &TokAtPhysicalStartOfLine);
699 bool SkipBlockComment (
Token &Result,
const char *CurPtr,
700 bool &TokAtPhysicalStartOfLine);
701 bool SaveLineComment (
Token &Result,
const char *CurPtr);
703 bool IsStartOfConflictMarker(
const char *CurPtr);
704 bool HandleEndOfConflictMarker(
const char *CurPtr);
706 bool lexEditorPlaceholder(
Token &Result,
const char *CurPtr);
708 bool isCodeCompletionPoint(
const char *CurPtr)
const;
709 void cutOffLexing() { BufferPtr = BufferEnd; }
711 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
713 void codeCompleteIncludedFile(
const char *PathStart,
714 const char *CompletionPoint,
bool IsAngled);
728 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
Token *Result);
741 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
749 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
754 #endif // LLVM_CLANG_LEX_LEXER_H Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
const char * getBufferLocation() const
Return the current location in the buffer.
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
StringRef getBuffer() const
Gets source code buffer.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
A little helper class used to produce diagnostics.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
Defines the clang::LangOptions interface.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
SourceLocation getEnd() const
Encodes a location in the source.
void setLength(unsigned Len)
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
SourceRange getAsRange() const
Dataflow Directional Tag Classes.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
unsigned Size
Size of the preamble in bytes.
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
Not within a conflict marker.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
void setLocation(SourceLocation L)
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
Defines the PreprocessorLexer interface.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.