doxygen/Token_8h_source.html

//===--- Token.h - Token interface ------------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

//  This file defines the Token interface.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_CLANG_LEX_TOKEN_H

#define LLVM_CLANG_LEX_TOKEN_H


#include "clang/Basic/SourceLocation.h"

#include "clang/Basic/TokenKinds.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/StringRef.h"

#include <cassert>


namespace clang {


class IdentifierInfo;

class LangOptions;


/// Token - This structure provides full information about a lexed token.

/// It is not intended to be space efficient, it is intended to return as much

/// information as possible about each returned token.  This is expected to be

/// compressed into a smaller form if memory footprint is important.

///

/// The parser can create a special "annotation token" representing a stream of

/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"

/// can be represented by a single typename annotation token that carries

/// information about the SourceRange of the tokens and the type object.

class Token {

  /// The location of the token. This is actually a SourceLocation.

  SourceLocation::UIntTy Loc;


  // Conceptually these next two fields could be in a union.  However, this

  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical

  // routine. Keeping as separate members with casts until a more beautiful fix

  // presents itself.


  /// UintData - This holds either the length of the token text, when

  /// a normal token, or the end of the SourceRange when an annotation

  /// token.

  SourceLocation::UIntTy UintData;


  /// PtrData - This is a union of four different pointer types, which depends

  /// on what type of token this is:

  ///  Identifiers, keywords, etc:

  ///    This is an IdentifierInfo*, which contains the uniqued identifier

  ///    spelling.

  ///  Literals:  isLiteral() returns true.

  ///    This is a pointer to the start of the token in a text buffer, which

  ///    may be dirty (have trigraphs / escaped newlines).

  ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().

  ///    This is a pointer to sema-specific data for the annotation token.

  ///  Eof:

  ///    This is a pointer to a Decl.

  ///  Other:

  ///    This is null.

  void *PtrData;


  /// Kind - The actual flavor of token this is.

  tok::TokenKind Kind;


  /// Flags - Bits we track about this token, members of the TokenFlags enum.

  unsigned short Flags;


public:

  // Various flags set per token:

  enum TokenFlags {

    StartOfLine = 0x01,   // At start of line or only after whitespace

                          // (considering the line after macro expansion).

    LeadingSpace = 0x02,  // Whitespace exists before this token (considering

                          // whitespace after macro expansion).

    DisableExpand = 0x04, // This identifier may never be macro expanded.

    NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.

    LeadingEmptyMacro = 0x10, // Empty macro exists before this token.

    HasUDSuffix = 0x20,  // This string or character literal has a ud-suffix.

    HasUCN = 0x40,       // This identifier contains a UCN.

    IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).

    StringifiedInMacro = 0x100, // This string or character literal is formed by

                                // macro stringizing or charizing operator.

    CommaAfterElided = 0x200, // The comma following this token was elided (MS).

    IsEditorPlaceholder = 0x400, // This identifier is a placeholder.

    IsReinjected = 0x800, // A phase 4 token that was produced before and

                          // re-added, e.g. via EnterTokenStream. Annotation

                          // tokens are *not* reinjected.

  };


  tok::TokenKind getKind() const { return Kind; }

  void setKind(tok::TokenKind K) { Kind = K; }


  /// is/isNot - Predicates to check if this token is a specific kind, as in

  /// "if (Tok.is(tok::l_brace)) {...}".

  bool is(tok::TokenKind K) const { return Kind == K; }

  bool isNot(tok::TokenKind K) const { return Kind != K; }

  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {

    return is(K1) || is(K2);

  }

  template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {

    return is(K1) || isOneOf(Ks...);

  }


  /// Return true if this is a raw identifier (when lexing

  /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).

  bool isAnyIdentifier() const {

    return tok::isAnyIdentifier(getKind());

  }


  /// Return true if this is a "literal", like a numeric

  /// constant, string, etc.

  bool isLiteral() const {

    return tok::isLiteral(getKind());

  }


  /// Return true if this is any of tok::annot_* kind tokens.

  bool isAnnotation() const { return tok::isAnnotation(getKind()); }


  /// Return true if the token is a keyword that is parsed in the same

  /// position as a standard attribute, but that has semantic meaning

  /// and so cannot be a true attribute.

  bool isRegularKeywordAttribute() const {

    return tok::isRegularKeywordAttribute(getKind());

  }


  /// Return a source location identifier for the specified

  /// offset in the current file.

  SourceLocation getLocation() const {

    return SourceLocation::getFromRawEncoding(Loc);

  }

  unsigned getLength() const {

    assert(!isAnnotation() && "Annotation tokens have no length field");

    return UintData;

  }


  void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }

  void setLength(unsigned Len) {

    assert(!isAnnotation() && "Annotation tokens have no length field");

    UintData = Len;

  }


  SourceLocation getAnnotationEndLoc() const {

    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");

    return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);

  }

  void setAnnotationEndLoc(SourceLocation L) {

    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");

    UintData = L.getRawEncoding();

  }


  SourceLocation getLastLoc() const {

    return isAnnotation() ? getAnnotationEndLoc() : getLocation();

  }


  SourceLocation getEndLoc() const {

    return isAnnotation() ? getAnnotationEndLoc()

                          : getLocation().getLocWithOffset(getLength());

  }


  /// SourceRange of the group of tokens that this annotation token

  /// represents.

  SourceRange getAnnotationRange() const {

    return SourceRange(getLocation(), getAnnotationEndLoc());

  }

  void setAnnotationRange(SourceRange R) {

    setLocation(R.getBegin());

    setAnnotationEndLoc(R.getEnd());

  }


  const char *getName() const { return tok::getTokenName(Kind); }


  /// Reset all flags to cleared.

  void startToken() {

    Kind = tok::unknown;

    Flags = 0;

    PtrData = nullptr;

    UintData = 0;

    Loc = SourceLocation().getRawEncoding();

  }


  bool hasPtrData() const { return PtrData != nullptr; }


  IdentifierInfo *getIdentifierInfo() const {

    assert(isNot(tok::raw_identifier) &&

           "getIdentifierInfo() on a tok::raw_identifier token!");

    assert(!isAnnotation() &&

           "getIdentifierInfo() on an annotation token!");

    if (isLiteral()) return nullptr;

    if (is(tok::eof)) return nullptr;

    return (IdentifierInfo*) PtrData;

  }

  void setIdentifierInfo(IdentifierInfo *II) {

    PtrData = (void*) II;

  }


  const void *getEofData() const {

    assert(is(tok::eof));

    return reinterpret_cast<const void *>(PtrData);

  }

  void setEofData(const void *D) {

    assert(is(tok::eof));

    assert(!PtrData);

    PtrData = const_cast<void *>(D);

  }


  /// getRawIdentifier - For a raw identifier token (i.e., an identifier

  /// lexed in raw mode), returns a reference to the text substring in the

  /// buffer if known.

  StringRef getRawIdentifier() const {

    assert(is(tok::raw_identifier));

    return StringRef(reinterpret_cast<const char *>(PtrData), getLength());

  }

  void setRawIdentifierData(const char *Ptr) {

    assert(is(tok::raw_identifier));

    PtrData = const_cast<char*>(Ptr);

  }


  /// getLiteralData - For a literal token (numeric constant, string, etc), this

  /// returns a pointer to the start of it in the text buffer if known, null

  /// otherwise.

  const char *getLiteralData() const {

    assert(isLiteral() && "Cannot get literal data of non-literal");

    return reinterpret_cast<const char*>(PtrData);

  }

  void setLiteralData(const char *Ptr) {

    assert(isLiteral() && "Cannot set literal data of non-literal");

    PtrData = const_cast<char*>(Ptr);

  }


  void *getAnnotationValue() const {

    assert(isAnnotation() && "Used AnnotVal on non-annotation token");

    return PtrData;

  }

  void setAnnotationValue(void *val) {

    assert(isAnnotation() && "Used AnnotVal on non-annotation token");

    PtrData = val;

  }


  /// Set the specified flag.

  void setFlag(TokenFlags Flag) {

    Flags |= Flag;

  }


  /// Get the specified flag.

  bool getFlag(TokenFlags Flag) const {

    return (Flags & Flag) != 0;

  }


  /// Unset the specified flag.

  void clearFlag(TokenFlags Flag) {

    Flags &= ~Flag;

  }


  /// Return the internal represtation of the flags.

  ///

  /// This is only intended for low-level operations such as writing tokens to

  /// disk.

  unsigned getFlags() const {

    return Flags;

  }


  /// Set a flag to either true or false.

  void setFlagValue(TokenFlags Flag, bool Val) {

    if (Val)

      setFlag(Flag);

    else

      clearFlag(Flag);

  }


  /// isAtStartOfLine - Return true if this token is at the start of a line.

  ///

  bool isAtStartOfLine() const { return getFlag(StartOfLine); }


  /// Return true if this token has whitespace before it.

  ///

  bool hasLeadingSpace() const { return getFlag(LeadingSpace); }


  /// Return true if this identifier token should never

  /// be expanded in the future, due to C99 6.10.3.4p2.

  bool isExpandDisabled() const { return getFlag(DisableExpand); }


  /// Return true if we have an ObjC keyword identifier.

  bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;


  /// Return the ObjC keyword kind.

  tok::ObjCKeywordKind getObjCKeywordID() const;


  bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;


  /// Return true if this token has trigraphs or escaped newlines in it.

  bool needsCleaning() const { return getFlag(NeedsCleaning); }


  /// Return true if this token has an empty macro before it.

  ///

  bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); }


  /// Return true if this token is a string or character literal which

  /// has a ud-suffix.

  bool hasUDSuffix() const { return getFlag(HasUDSuffix); }


  /// Returns true if this token contains a universal character name.

  bool hasUCN() const { return getFlag(HasUCN); }


  /// Returns true if this token is formed by macro by stringizing or charizing

  /// operator.

  bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); }


  /// Returns true if the comma after this token was elided.

  bool commaAfterElided() const { return getFlag(CommaAfterElided); }


  /// Returns true if this token is an editor placeholder.

  ///

  /// Editor placeholders are produced by the code-completion engine and are

  /// represented as characters between '<#' and '#>' in the source code. The

  /// lexer uses identifier tokens to represent placeholders.

  bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); }

};


/// Information about the conditional stack (\#if directives)

/// currently active.

struct PPConditionalInfo {

  /// Location where the conditional started.

  SourceLocation IfLoc;


  /// True if this was contained in a skipping directive, e.g.,

  /// in a "\#if 0" block.

  bool WasSkipping;


  /// True if we have emitted tokens already, and now we're in

  /// an \#else block or something.  Only useful in Skipping blocks.

  bool FoundNonSkip;


  /// True if we've seen a \#else in this block.  If so,

  /// \#elif/\#else directives are not allowed.

  bool FoundElse;

};


// Extra information needed for annonation tokens.

struct PragmaLoopHintInfo {

  Token PragmaName;

  Token Option;

  ArrayRef<Token> Toks;

};

} // end namespace clang


#endif // LLVM_CLANG_LEX_TOKEN_H

isOneOf
static constexpr bool isOneOf()
Definition: RetainSummaryManager.cpp:28

SourceLocation.h
Defines the clang::SourceLocation class and associated facilities.

TokenKinds.h
Defines the clang::TokenKind enum and support functions.

clang::IdentifierInfo
One of these records is kept for each identifier that is lexed.
Definition: IdentifierTable.h:117

clang::LangOptions
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:461

clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:88

clang::SourceLocation::getFromRawEncoding
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
Definition: SourceLocation.h:154

clang::SourceLocation::getLocWithOffset
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
Definition: SourceLocation.h:136

clang::SourceLocation::getRawEncoding
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
Definition: SourceLocation.h:148

clang::SourceLocation::UIntTy
uint32_t UIntTy
Definition: SourceLocation.h:95

clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:212

clang::SourceRange::getEnd
SourceLocation getEnd() const
Definition: SourceLocation.h:222

clang::SourceRange::getBegin
SourceLocation getBegin() const
Definition: SourceLocation.h:221

clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:36

clang::Token::getIdentifierInfo
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187

clang::Token::setLiteralData
void setLiteralData(const char *Ptr)
Definition: Token.h:229

clang::Token::isAnyIdentifier
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:110

clang::Token::getEndLoc
SourceLocation getEndLoc() const
Definition: Token.h:159

clang::Token::getFlags
unsigned getFlags() const
Return the internal represtation of the flags.
Definition: Token.h:262

clang::Token::setAnnotationEndLoc
void setAnnotationEndLoc(SourceLocation L)
Definition: Token.h:150

clang::Token::hasUCN
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:306

clang::Token::clearFlag
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:254

clang::Token::isLiteral
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116

clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132

clang::Token::getName
const char * getName() const
Definition: Token.h:174

clang::Token::getLength
unsigned getLength() const
Definition: Token.h:135

clang::Token::setLength
void setLength(unsigned Len)
Definition: Token.h:141

clang::Token::isEditorPlaceholder
bool isEditorPlaceholder() const
Returns true if this token is an editor placeholder.
Definition: Token.h:320

clang::Token::isExpandDisabled
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition: Token.h:284

clang::Token::setKind
void setKind(tok::TokenKind K)
Definition: Token.h:95

clang::Token::commaAfterElided
bool commaAfterElided() const
Returns true if the comma after this token was elided.
Definition: Token.h:313

clang::Token::getAnnotationEndLoc
SourceLocation getAnnotationEndLoc() const
Definition: Token.h:146

clang::Token::getObjCKeywordID
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:70

clang::Token::is
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99

clang::Token::getAnnotationValue
void * getAnnotationValue() const
Definition: Token.h:234

clang::Token::isOneOf
bool isOneOf(tok::TokenKind K1, Ts... Ks) const
Definition: Token.h:104

clang::Token::getKind
tok::TokenKind getKind() const
Definition: Token.h:94

clang::Token::isRegularKeywordAttribute
bool isRegularKeywordAttribute() const
Return true if the token is a keyword that is parsed in the same position as a standard attribute,...
Definition: Token.h:126

clang::Token::isAtStartOfLine
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276

clang::Token::setEofData
void setEofData(const void *D)
Definition: Token.h:204

clang::Token::getFlag
bool getFlag(TokenFlags Flag) const
Get the specified flag.
Definition: Token.h:249

clang::Token::TokenFlags
TokenFlags
Definition: Token.h:74

clang::Token::DisableExpand
@ DisableExpand
Definition: Token.h:79

clang::Token::HasUCN
@ HasUCN
Definition: Token.h:83

clang::Token::IsEditorPlaceholder
@ IsEditorPlaceholder
Definition: Token.h:88

clang::Token::IgnoredComma
@ IgnoredComma
Definition: Token.h:84

clang::Token::IsReinjected
@ IsReinjected
Definition: Token.h:89

clang::Token::LeadingEmptyMacro
@ LeadingEmptyMacro
Definition: Token.h:81

clang::Token::LeadingSpace
@ LeadingSpace
Definition: Token.h:77

clang::Token::StartOfLine
@ StartOfLine
Definition: Token.h:75

clang::Token::StringifiedInMacro
@ StringifiedInMacro
Definition: Token.h:85

clang::Token::HasUDSuffix
@ HasUDSuffix
Definition: Token.h:82

clang::Token::CommaAfterElided
@ CommaAfterElided
Definition: Token.h:87

clang::Token::NeedsCleaning
@ NeedsCleaning
Definition: Token.h:80

clang::Token::hasLeadingSpace
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280

clang::Token::getAnnotationRange
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition: Token.h:166

clang::Token::setLocation
void setLocation(SourceLocation L)
Definition: Token.h:140

clang::Token::hasLeadingEmptyMacro
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:299

clang::Token::setRawIdentifierData
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:217

clang::Token::isOneOf
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101

clang::Token::isNot
bool isNot(tok::TokenKind K) const
Definition: Token.h:100

clang::Token::hasPtrData
bool hasPtrData() const
Definition: Token.h:185

clang::Token::isAnnotation
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:121

clang::Token::setAnnotationValue
void setAnnotationValue(void *val)
Definition: Token.h:238

clang::Token::getEofData
const void * getEofData() const
Definition: Token.h:200

clang::Token::hasUDSuffix
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix.
Definition: Token.h:303

clang::Token::stringifiedInMacro
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:310

clang::Token::isObjCAtKeyword
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:61

clang::Token::setAnnotationRange
void setAnnotationRange(SourceRange R)
Definition: Token.h:169

clang::Token::isSimpleTypeSpecifier
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
Definition: Lexer.cpp:78

clang::Token::startToken
void startToken()
Reset all flags to cleared.
Definition: Token.h:177

clang::Token::needsCleaning
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:295

clang::Token::setIdentifierInfo
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:196

clang::Token::getLastLoc
SourceLocation getLastLoc() const
Definition: Token.h:155

clang::Token::setFlagValue
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:267

clang::Token::getRawIdentifier
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition: Token.h:213

clang::Token::getLiteralData
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:225

clang::Token::setFlag
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:244

llvm::ArrayRef
Definition: LLVM.h:31

clang::tok::getTokenName
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:24

clang::tok::isAnyIdentifier
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition: TokenKinds.h:83

clang::tok::ObjCKeywordKind
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41

clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25

clang::tok::isRegularKeywordAttribute
constexpr bool isRegularKeywordAttribute(TokenKind K)
Definition: TokenKinds.h:110

clang::tok::isLiteral
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97

clang::tok::isAnnotation
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.cpp:58

clang
The JSON file list parser is used to communicate input to InstallAPI.
Definition: CalledOnceCheck.h:17

clang::PPConditionalInfo
Information about the conditional stack (#if directives) currently active.
Definition: Token.h:325

clang::PPConditionalInfo::FoundNonSkip
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition: Token.h:335

clang::PPConditionalInfo::IfLoc
SourceLocation IfLoc
Location where the conditional started.
Definition: Token.h:327

clang::PPConditionalInfo::WasSkipping
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition: Token.h:331

clang::PPConditionalInfo::FoundElse
bool FoundElse
True if we've seen a #else in this block.
Definition: Token.h:339

clang::PragmaLoopHintInfo
Definition: Token.h:343

clang::PragmaLoopHintInfo::Toks
ArrayRef< Token > Toks
Definition: Token.h:346

clang::PragmaLoopHintInfo::PragmaName
Token PragmaName
Definition: Token.h:344

clang::PragmaLoopHintInfo::Option
Token Option
Definition: Token.h:345