clang API Documentation

Token.h
Go to the documentation of this file.
00001 //===--- Token.h - Token interface ------------------------------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file defines the Token interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #ifndef LLVM_CLANG_TOKEN_H
00015 #define LLVM_CLANG_TOKEN_H
00016 
00017 #include "clang/Basic/TemplateKinds.h"
00018 #include "clang/Basic/TokenKinds.h"
00019 #include "clang/Basic/SourceLocation.h"
00020 #include "clang/Basic/OperatorKinds.h"
00021 #include <cstdlib>
00022 
00023 namespace clang {
00024 
00025 class IdentifierInfo;
00026 
00027 /// Token - This structure provides full information about a lexed token.
00028 /// It is not intended to be space efficient, it is intended to return as much
00029 /// information as possible about each returned token.  This is expected to be
00030 /// compressed into a smaller form if memory footprint is important.
00031 ///
00032 /// The parser can create a special "annotation token" representing a stream of
00033 /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
00034 /// can be represented by a single typename annotation token that carries
00035 /// information about the SourceRange of the tokens and the type object.
00036 class Token {
00037   /// The location of the token.
00038   SourceLocation Loc;
00039 
00040   // Conceptually these next two fields could be in a union.  However, this
00041   // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
00042   // routine. Keeping as separate members with casts until a more beautiful fix
00043   // presents itself.
00044 
00045   /// UintData - This holds either the length of the token text, when
00046   /// a normal token, or the end of the SourceRange when an annotation
00047   /// token.
00048   unsigned UintData;
00049 
00050   /// PtrData - This is a union of four different pointer types, which depends
00051   /// on what type of token this is:
00052   ///  Identifiers, keywords, etc:
00053   ///    This is an IdentifierInfo*, which contains the uniqued identifier
00054   ///    spelling.
00055   ///  Literals:  isLiteral() returns true.
00056   ///    This is a pointer to the start of the token in a text buffer, which
00057   ///    may be dirty (have trigraphs / escaped newlines).
00058   ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().
00059   ///    This is a pointer to sema-specific data for the annotation token.
00060   ///  Other:
00061   ///    This is null.
00062   void *PtrData;
00063 
00064   /// Kind - The actual flavor of token this is.
00065   ///
00066   unsigned short Kind;
00067 
00068   /// Flags - Bits we track about this token, members of the TokenFlags enum.
00069   unsigned char Flags;
00070 public:
00071 
00072   // Various flags set per token:
00073   enum TokenFlags {
00074     StartOfLine   = 0x01,  // At start of line or only after whitespace.
00075     LeadingSpace  = 0x02,  // Whitespace exists before this token.
00076     DisableExpand = 0x04,  // This identifier may never be macro expanded.
00077     NeedsCleaning = 0x08,   // Contained an escaped newline or trigraph.
00078     LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
00079     HasUDSuffix = 0x20     // This string or character literal has a ud-suffix.
00080   };
00081 
00082   tok::TokenKind getKind() const { return (tok::TokenKind)Kind; }
00083   void setKind(tok::TokenKind K) { Kind = K; }
00084 
00085   /// is/isNot - Predicates to check if this token is a specific kind, as in
00086   /// "if (Tok.is(tok::l_brace)) {...}".
00087   bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
00088   bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
00089 
00090   /// isAnyIdentifier - Return true if this is a raw identifier (when lexing
00091   /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
00092   bool isAnyIdentifier() const {
00093     return is(tok::identifier) || is(tok::raw_identifier);
00094   }
00095 
00096   /// isLiteral - Return true if this is a "literal", like a numeric
00097   /// constant, string, etc.
00098   bool isLiteral() const {
00099     return is(tok::numeric_constant) || is(tok::char_constant) ||
00100            is(tok::wide_char_constant) || is(tok::utf16_char_constant) ||
00101            is(tok::utf32_char_constant) || is(tok::string_literal) ||
00102            is(tok::wide_string_literal) || is(tok::utf8_string_literal) ||
00103            is(tok::utf16_string_literal) || is(tok::utf32_string_literal) ||
00104            is(tok::angle_string_literal);
00105   }
00106 
00107   bool isAnnotation() const {
00108 #define ANNOTATION(NAME) \
00109     if (is(tok::annot_##NAME)) \
00110       return true;
00111 #include "clang/Basic/TokenKinds.def"
00112     return false;
00113   }
00114 
00115   /// getLocation - Return a source location identifier for the specified
00116   /// offset in the current file.
00117   SourceLocation getLocation() const { return Loc; }
00118   unsigned getLength() const {
00119     assert(!isAnnotation() && "Annotation tokens have no length field");
00120     return UintData;
00121   }
00122 
00123   void setLocation(SourceLocation L) { Loc = L; }
00124   void setLength(unsigned Len) {
00125     assert(!isAnnotation() && "Annotation tokens have no length field");
00126     UintData = Len;
00127   }
00128 
00129   SourceLocation getAnnotationEndLoc() const {
00130     assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
00131     return SourceLocation::getFromRawEncoding(UintData);
00132   }
00133   void setAnnotationEndLoc(SourceLocation L) {
00134     assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
00135     UintData = L.getRawEncoding();
00136   }
00137 
00138   SourceLocation getLastLoc() const {
00139     return isAnnotation() ? getAnnotationEndLoc() : getLocation();
00140   }
00141 
00142   /// getAnnotationRange - SourceRange of the group of tokens that this
00143   /// annotation token represents.
00144   SourceRange getAnnotationRange() const {
00145     return SourceRange(getLocation(), getAnnotationEndLoc());
00146   }
00147   void setAnnotationRange(SourceRange R) {
00148     setLocation(R.getBegin());
00149     setAnnotationEndLoc(R.getEnd());
00150   }
00151 
00152   const char *getName() const {
00153     return tok::getTokenName( (tok::TokenKind) Kind);
00154   }
00155 
00156   /// startToken - Reset all flags to cleared.
00157   ///
00158   void startToken() {
00159     Kind = tok::unknown;
00160     Flags = 0;
00161     PtrData = 0;
00162     UintData = 0;
00163     Loc = SourceLocation();
00164   }
00165 
00166   IdentifierInfo *getIdentifierInfo() const {
00167     assert(isNot(tok::raw_identifier) &&
00168            "getIdentifierInfo() on a tok::raw_identifier token!");
00169     assert(!isAnnotation() &&
00170            "getIdentifierInfo() on an annotation token!");
00171     if (isLiteral()) return 0;
00172     return (IdentifierInfo*) PtrData;
00173   }
00174   void setIdentifierInfo(IdentifierInfo *II) {
00175     PtrData = (void*) II;
00176   }
00177 
00178   /// getRawIdentifierData - For a raw identifier token (i.e., an identifier
00179   /// lexed in raw mode), returns a pointer to the start of it in the text
00180   /// buffer if known, null otherwise.
00181   const char *getRawIdentifierData() const {
00182     assert(is(tok::raw_identifier));
00183     return reinterpret_cast<const char*>(PtrData);
00184   }
00185   void setRawIdentifierData(const char *Ptr) {
00186     assert(is(tok::raw_identifier));
00187     PtrData = const_cast<char*>(Ptr);
00188   }
00189 
00190   /// getLiteralData - For a literal token (numeric constant, string, etc), this
00191   /// returns a pointer to the start of it in the text buffer if known, null
00192   /// otherwise.
00193   const char *getLiteralData() const {
00194     assert(isLiteral() && "Cannot get literal data of non-literal");
00195     return reinterpret_cast<const char*>(PtrData);
00196   }
00197   void setLiteralData(const char *Ptr) {
00198     assert(isLiteral() && "Cannot set literal data of non-literal");
00199     PtrData = const_cast<char*>(Ptr);
00200   }
00201 
00202   void *getAnnotationValue() const {
00203     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
00204     return PtrData;
00205   }
00206   void setAnnotationValue(void *val) {
00207     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
00208     PtrData = val;
00209   }
00210 
00211   /// setFlag - Set the specified flag.
00212   void setFlag(TokenFlags Flag) {
00213     Flags |= Flag;
00214   }
00215 
00216   /// clearFlag - Unset the specified flag.
00217   void clearFlag(TokenFlags Flag) {
00218     Flags &= ~Flag;
00219   }
00220 
00221   /// getFlags - Return the internal represtation of the flags.
00222   ///  Only intended for low-level operations such as writing tokens to
00223   //   disk.
00224   unsigned getFlags() const {
00225     return Flags;
00226   }
00227 
00228   /// setFlagValue - Set a flag to either true or false.
00229   void setFlagValue(TokenFlags Flag, bool Val) {
00230     if (Val)
00231       setFlag(Flag);
00232     else
00233       clearFlag(Flag);
00234   }
00235 
00236   /// isAtStartOfLine - Return true if this token is at the start of a line.
00237   ///
00238   bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; }
00239 
00240   /// hasLeadingSpace - Return true if this token has whitespace before it.
00241   ///
00242   bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; }
00243 
00244   /// isExpandDisabled - Return true if this identifier token should never
00245   /// be expanded in the future, due to C99 6.10.3.4p2.
00246   bool isExpandDisabled() const {
00247     return (Flags & DisableExpand) ? true : false;
00248   }
00249 
00250   /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
00251   bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
00252 
00253   /// getObjCKeywordID - Return the ObjC keyword kind.
00254   tok::ObjCKeywordKind getObjCKeywordID() const;
00255 
00256   /// needsCleaning - Return true if this token has trigraphs or escaped
00257   /// newlines in it.
00258   ///
00259   bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; }
00260 
00261   /// \brief Return true if this token has an empty macro before it.
00262   ///
00263   bool hasLeadingEmptyMacro() const {
00264     return (Flags & LeadingEmptyMacro) ? true : false;
00265   }
00266 
00267   /// \brief Return true if this token is a string or character literal which
00268   /// has a ud-suffix.
00269   bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; }
00270 };
00271 
00272 /// PPConditionalInfo - Information about the conditional stack (#if directives)
00273 /// currently active.
00274 struct PPConditionalInfo {
00275   /// IfLoc - Location where the conditional started.
00276   ///
00277   SourceLocation IfLoc;
00278 
00279   /// WasSkipping - True if this was contained in a skipping directive, e.g.
00280   /// in a "#if 0" block.
00281   bool WasSkipping;
00282 
00283   /// FoundNonSkip - True if we have emitted tokens already, and now we're in
00284   /// an #else block or something.  Only useful in Skipping blocks.
00285   bool FoundNonSkip;
00286 
00287   /// FoundElse - True if we've seen a #else in this block.  If so,
00288   /// #elif/#else directives are not allowed.
00289   bool FoundElse;
00290 };
00291 
00292 }  // end namespace clang
00293 
00294 namespace llvm {
00295   template <>
00296   struct isPodLike<clang::Token> { static const bool value = true; };
00297 }  // end namespace llvm
00298 
00299 #endif