clang 22.0.0git
Lexer.cpp File Reference
#include "clang/Lex/Lexer.h"
#include "UnicodeCharSets.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/MultipleIncludeOpt.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Lex/Token.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Unicode.h"
#include "llvm/Support/UnicodeCharRanges.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <optional>
#include <string>
#include <tuple>
#include "clang/Basic/TransformTypeTraits.def"

Go to the source code of this file.

Macros

#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait)

Functions

template<typename T>
static void StringifyImpl (T &Str, char Quote)
static size_t getSpellingSlow (const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
 Slow case of getSpelling.
static const char * findBeginningOfLine (StringRef Buffer, unsigned Offset)
 Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
static SourceLocation getBeginningOfFileToken (SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static CharSourceRange makeRangeFromFileLocs (CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isInExpansionTokenRange (const SourceLocation Loc, const SourceManager &SM)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc (Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
 GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all expanded at a single point, perform the mapping.
static char GetTrigraphCharForLetter (char Letter)
 GetTrigraphCharForLetter - Given a character that occurs after a ?
static char DecodeTrigraphChar (const char *CP, Lexer *L, bool Trigraphs)
 DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static bool isUnicodeWhitespace (uint32_t Codepoint)
static llvm::SmallString< 5 > codepointAsHexString (uint32_t C)
static bool isMathematicalExtensionID (uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static bool isAllowedIDChar (uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static bool isAllowedInitiallyIDChar (uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void diagnoseExtensionInIdentifier (DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static CharSourceRange makeCharRange (Lexer &L, const char *Begin, const char *End)
static void maybeDiagnoseIDCharCompat (DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static void maybeDiagnoseUTF8Homoglyph (DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
 After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.
static void diagnoseInvalidUnicodeCodepointInIdentifier (DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static const char * fastParseASCIIIdentifier (const char *CurPtr, const char *BufferEnd)
static bool isEndOfBlockCommentWithEscapedNewLine (const char *CurPtr, Lexer *L, bool Trigraphs)
 isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) is part of an escaped newline sequence.
static const char * FindConflictEnd (const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
 Find the end of a version control conflict marker.
static const char * findPlaceholderEnd (const char *CurPtr, const char *BufferEnd)

Macro Definition Documentation

◆ TRANSFORM_TYPE_TRAIT_DEF

#define TRANSFORM_TYPE_TRAIT_DEF ( _,
Trait )
Value:
case tok::kw___##Trait:

Function Documentation

◆ codepointAsHexString()

llvm::SmallString< 5 > codepointAsHexString ( uint32_t C)
static

◆ DecodeTrigraphChar()

char DecodeTrigraphChar ( const char * CP,
Lexer * L,
bool Trigraphs )
static

DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?

?, emit a trigraph warning. If trigraphs are enabled, return the result character. Finally, emit a warning about trigraph use whether trigraphs are enabled or not.

Definition at line 1257 of file Lexer.cpp.

References clang::Lexer::Diag(), GetTrigraphCharForLetter(), and clang::PreprocessorLexer::isLexingRawMode().

◆ diagnoseExtensionInIdentifier()

void diagnoseExtensionInIdentifier ( DiagnosticsEngine & Diags,
uint32_t C,
CharSourceRange Range )
static

◆ diagnoseInvalidUnicodeCodepointInIdentifier()

void diagnoseInvalidUnicodeCodepointInIdentifier ( DiagnosticsEngine & Diags,
const LangOptions & LangOpts,
uint32_t CodePoint,
CharSourceRange Range,
bool IsFirst )
static

◆ fastParseASCIIIdentifier()

const char * fastParseASCIIIdentifier ( const char * CurPtr,
const char * BufferEnd )
static

◆ findBeginningOfLine()

const char * findBeginningOfLine ( StringRef Buffer,
unsigned Offset )
static

Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.

Definition at line 543 of file Lexer.cpp.

References clang::Lexer::isNewLineEscaped(), and clang::isVerticalWhitespace().

Referenced by getBeginningOfFileToken(), and clang::Lexer::getIndentationForLine().

◆ FindConflictEnd()

const char * FindConflictEnd ( const char * CurPtr,
const char * BufferEnd,
ConflictMarkerKind CMK )
static

Find the end of a version control conflict marker.

Definition at line 3245 of file Lexer.cpp.

References clang::CMK_Perforce.

◆ findPlaceholderEnd()

const char * findPlaceholderEnd ( const char * CurPtr,
const char * BufferEnd )
static

Definition at line 3349 of file Lexer.cpp.

◆ getBeginningOfFileToken()

◆ GetMappedTokenLoc()

SourceLocation GetMappedTokenLoc ( Preprocessor & PP,
SourceLocation FileLoc,
unsigned CharNo,
unsigned TokLen )
static

GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all expanded at a single point, perform the mapping.

This is currently only used for _Pragma implementation, so it is the slow path of the hot getSourceLocation method. Do not allow it to be inlined.

Definition at line 1185 of file Lexer.cpp.

References clang::CharSourceRange::getBegin(), clang::CharSourceRange::getEnd(), clang::SourceLocation::getLocWithOffset(), clang::Preprocessor::getSourceManager(), clang::SourceLocation::isMacroID(), and SM.

Referenced by clang::Lexer::getSourceLocation().

◆ getSpellingSlow()

size_t getSpellingSlow ( const Token & Tok,
const char * BufPtr,
const LangOptions & LangOpts,
char * Spelling )
static

Slow case of getSpelling.

Extract the characters comprising the spelling of this token from the provided input buffer.

Definition at line 324 of file Lexer.cpp.

References clang::Lexer::getCharAndSizeNoWarn(), clang::tok::isStringLiteral(), memcpy(), and Tok.

Referenced by clang::Lexer::getSpelling(), clang::Lexer::getSpelling(), and clang::Lexer::getSpelling().

◆ GetTrigraphCharForLetter()

char GetTrigraphCharForLetter ( char Letter)
static

GetTrigraphCharForLetter - Given a character that occurs after a ?

? pair, return the decoded trigraph letter it corresponds to, or '\0' if nothing.

Definition at line 1238 of file Lexer.cpp.

Referenced by DecodeTrigraphChar().

◆ isAllowedIDChar()

bool isAllowedIDChar ( uint32_t C,
const LangOptions & LangOpts,
bool & IsExtension )
static

◆ isAllowedInitiallyIDChar()

bool isAllowedInitiallyIDChar ( uint32_t C,
const LangOptions & LangOpts,
bool & IsExtension )
static

◆ isEndOfBlockCommentWithEscapedNewLine()

bool isEndOfBlockCommentWithEscapedNewLine ( const char * CurPtr,
Lexer * L,
bool Trigraphs )
static

isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) is part of an escaped newline sequence.

Issue a diagnostic if so. We know that the newline is inside of a block comment.

Definition at line 2778 of file Lexer.cpp.

References clang::Lexer::Diag(), clang::isHorizontalWhitespace(), and clang::PreprocessorLexer::isLexingRawMode().

◆ isInExpansionTokenRange()

bool isInExpansionTokenRange ( const SourceLocation Loc,
const SourceManager & SM )
static

Definition at line 944 of file Lexer.cpp.

References SM.

Referenced by clang::Lexer::makeFileCharRange().

◆ isMathematicalExtensionID()

bool isMathematicalExtensionID ( uint32_t C,
const LangOptions & LangOpts,
bool IsStart,
bool & IsExtension )
static

◆ isUnicodeWhitespace()

bool isUnicodeWhitespace ( uint32_t Codepoint)
static

Definition at line 1545 of file Lexer.cpp.

References UnicodeWhitespaceCharRanges.

◆ makeCharRange()

CharSourceRange makeCharRange ( Lexer & L,
const char * Begin,
const char * End )
inlinestatic

◆ makeRangeFromFileLocs()

◆ maybeDiagnoseIDCharCompat()

void maybeDiagnoseIDCharCompat ( DiagnosticsEngine & Diags,
uint32_t C,
CharSourceRange Range,
bool IsFirst )
static

◆ maybeDiagnoseUTF8Homoglyph()

void maybeDiagnoseUTF8Homoglyph ( DiagnosticsEngine & Diags,
uint32_t C,
CharSourceRange Range )
static

After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.

Definition at line 1683 of file Lexer.cpp.

References clang::C, codepointAsHexString(), clang::operator<(), and clang::DiagnosticsEngine::Report().

◆ StringifyImpl()

template<typename T>
void StringifyImpl ( T & Str,
char Quote )
static

Definition at line 284 of file Lexer.cpp.

References clang::T.

Referenced by clang::Lexer::Stringify(), and clang::Lexer::Stringify().