clang API Documentation

Preprocessor.h
Go to the documentation of this file.
00001 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file defines the Preprocessor interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
00015 #define LLVM_CLANG_LEX_PREPROCESSOR_H
00016 
00017 #include "clang/Lex/MacroInfo.h"
00018 #include "clang/Lex/Lexer.h"
00019 #include "clang/Lex/PTHLexer.h"
00020 #include "clang/Lex/PPCallbacks.h"
00021 #include "clang/Lex/TokenLexer.h"
00022 #include "clang/Lex/PTHManager.h"
00023 #include "clang/Basic/Builtins.h"
00024 #include "clang/Basic/Diagnostic.h"
00025 #include "clang/Basic/IdentifierTable.h"
00026 #include "clang/Basic/SourceLocation.h"
00027 #include "llvm/ADT/DenseMap.h"
00028 #include "llvm/ADT/IntrusiveRefCntPtr.h"
00029 #include "llvm/ADT/SmallPtrSet.h"
00030 #include "llvm/ADT/OwningPtr.h"
00031 #include "llvm/ADT/SmallVector.h"
00032 #include "llvm/ADT/ArrayRef.h"
00033 #include "llvm/Support/Allocator.h"
00034 #include <vector>
00035 
00036 namespace llvm {
00037   template<unsigned InternalLen> class SmallString;
00038 }
00039 
00040 namespace clang {
00041 
00042 class SourceManager;
00043 class ExternalPreprocessorSource;
00044 class FileManager;
00045 class FileEntry;
00046 class HeaderSearch;
00047 class PragmaNamespace;
00048 class PragmaHandler;
00049 class CommentHandler;
00050 class ScratchBuffer;
00051 class TargetInfo;
00052 class PPCallbacks;
00053 class CodeCompletionHandler;
00054 class DirectoryLookup;
00055 class PreprocessingRecord;
00056 class ModuleLoader;
00057 
00058 /// Preprocessor - This object engages in a tight little dance with the lexer to
00059 /// efficiently preprocess tokens.  Lexers know only about tokens within a
00060 /// single source file, and don't know anything about preprocessor-level issues
00061 /// like the #include stack, token expansion, etc.
00062 ///
00063 class Preprocessor : public RefCountedBase<Preprocessor> {
00064   DiagnosticsEngine        *Diags;
00065   LangOptions       &LangOpts;
00066   const TargetInfo  *Target;
00067   FileManager       &FileMgr;
00068   SourceManager     &SourceMgr;
00069   ScratchBuffer     *ScratchBuf;
00070   HeaderSearch      &HeaderInfo;
00071   ModuleLoader      &TheModuleLoader;
00072 
00073   /// \brief External source of macros.
00074   ExternalPreprocessorSource *ExternalSource;
00075 
00076 
00077   /// PTH - An optional PTHManager object used for getting tokens from
00078   ///  a token cache rather than lexing the original source file.
00079   OwningPtr<PTHManager> PTH;
00080 
00081   /// BP - A BumpPtrAllocator object used to quickly allocate and release
00082   ///  objects internal to the Preprocessor.
00083   llvm::BumpPtrAllocator BP;
00084 
00085   /// Identifiers for builtin macros and other builtins.
00086   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
00087   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
00088   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
00089   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
00090   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
00091   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
00092   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
00093   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
00094   IdentifierInfo *Ident__has_feature;              // __has_feature
00095   IdentifierInfo *Ident__has_extension;            // __has_extension
00096   IdentifierInfo *Ident__has_builtin;              // __has_builtin
00097   IdentifierInfo *Ident__has_attribute;            // __has_attribute
00098   IdentifierInfo *Ident__has_include;              // __has_include
00099   IdentifierInfo *Ident__has_include_next;         // __has_include_next
00100   IdentifierInfo *Ident__has_warning;              // __has_warning
00101 
00102   SourceLocation DATELoc, TIMELoc;
00103   unsigned CounterValue;  // Next __COUNTER__ value.
00104 
00105   enum {
00106     /// MaxIncludeStackDepth - Maximum depth of #includes.
00107     MaxAllowedIncludeStackDepth = 200
00108   };
00109 
00110   // State that is set before the preprocessor begins.
00111   bool KeepComments : 1;
00112   bool KeepMacroComments : 1;
00113   bool SuppressIncludeNotFoundError : 1;
00114 
00115   // State that changes while the preprocessor runs:
00116   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
00117 
00118   /// Whether the preprocessor owns the header search object.
00119   bool OwnsHeaderSearch : 1;
00120 
00121   /// DisableMacroExpansion - True if macro expansion is disabled.
00122   bool DisableMacroExpansion : 1;
00123 
00124   /// \brief Whether we have already loaded macros from the external source.
00125   mutable bool ReadMacrosFromExternalSource : 1;
00126 
00127   /// \brief True if we are pre-expanding macro arguments.
00128   bool InMacroArgPreExpansion;
00129 
00130   /// Identifiers - This is mapping/lookup information for all identifiers in
00131   /// the program, including program keywords.
00132   mutable IdentifierTable Identifiers;
00133 
00134   /// Selectors - This table contains all the selectors in the program. Unlike
00135   /// IdentifierTable above, this table *isn't* populated by the preprocessor.
00136   /// It is declared/expanded here because it's role/lifetime is
00137   /// conceptually similar the IdentifierTable. In addition, the current control
00138   /// flow (in clang::ParseAST()), make it convenient to put here.
00139   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
00140   /// the lifetime of the preprocessor.
00141   SelectorTable Selectors;
00142 
00143   /// BuiltinInfo - Information about builtins.
00144   Builtin::Context BuiltinInfo;
00145 
00146   /// PragmaHandlers - This tracks all of the pragmas that the client registered
00147   /// with this preprocessor.
00148   PragmaNamespace *PragmaHandlers;
00149 
00150   /// \brief Tracks all of the comment handlers that the client registered
00151   /// with this preprocessor.
00152   std::vector<CommentHandler *> CommentHandlers;
00153 
00154   /// \brief True if we want to ignore EOF token and continue later on (thus 
00155   /// avoid tearing the Lexer and etc. down).
00156   bool IncrementalProcessing;
00157 
00158   /// \brief The code-completion handler.
00159   CodeCompletionHandler *CodeComplete;
00160 
00161   /// \brief The file that we're performing code-completion for, if any.
00162   const FileEntry *CodeCompletionFile;
00163 
00164   /// \brief The offset in file for the code-completion point.
00165   unsigned CodeCompletionOffset;
00166 
00167   /// \brief The location for the code-completion point. This gets instantiated
00168   /// when the CodeCompletionFile gets #include'ed for preprocessing.
00169   SourceLocation CodeCompletionLoc;
00170 
00171   /// \brief The start location for the file of the code-completion point.
00172   /// This gets instantiated when the CodeCompletionFile gets #include'ed
00173   /// for preprocessing.
00174   SourceLocation CodeCompletionFileLoc;
00175 
00176   /// \brief The source location of the 'import' contextual keyword we just 
00177   /// lexed, if any.
00178   SourceLocation ModuleImportLoc;
00179 
00180   /// \brief The module import path that we're currently processing.
00181   llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> 
00182     ModuleImportPath;
00183   
00184   /// \brief Whether the module import expectes an identifier next. Otherwise,
00185   /// it expects a '.' or ';'.
00186   bool ModuleImportExpectsIdentifier;
00187   
00188   /// \brief The source location of the currently-active
00189   /// #pragma clang arc_cf_code_audited begin.
00190   SourceLocation PragmaARCCFCodeAuditedLoc;
00191 
00192   /// \brief True if we hit the code-completion point.
00193   bool CodeCompletionReached;
00194 
00195   /// \brief The number of bytes that we will initially skip when entering the
00196   /// main file, which is used when loading a precompiled preamble, along
00197   /// with a flag that indicates whether skipping this number of bytes will
00198   /// place the lexer at the start of a line.
00199   std::pair<unsigned, bool> SkipMainFilePreamble;
00200 
00201   /// CurLexer - This is the current top of the stack that we're lexing from if
00202   /// not expanding a macro and we are lexing directly from source code.
00203   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
00204   OwningPtr<Lexer> CurLexer;
00205 
00206   /// CurPTHLexer - This is the current top of stack that we're lexing from if
00207   ///  not expanding from a macro and we are lexing from a PTH cache.
00208   ///  Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
00209   OwningPtr<PTHLexer> CurPTHLexer;
00210 
00211   /// CurPPLexer - This is the current top of the stack what we're lexing from
00212   ///  if not expanding a macro.  This is an alias for either CurLexer or
00213   ///  CurPTHLexer.
00214   PreprocessorLexer *CurPPLexer;
00215 
00216   /// CurLookup - The DirectoryLookup structure used to find the current
00217   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
00218   /// implement #include_next and find directory-specific properties.
00219   const DirectoryLookup *CurDirLookup;
00220 
00221   /// CurTokenLexer - This is the current macro we are expanding, if we are
00222   /// expanding a macro.  One of CurLexer and CurTokenLexer must be null.
00223   OwningPtr<TokenLexer> CurTokenLexer;
00224 
00225   /// \brief The kind of lexer we're currently working with.
00226   enum CurLexerKind {
00227     CLK_Lexer,
00228     CLK_PTHLexer,
00229     CLK_TokenLexer,
00230     CLK_CachingLexer,
00231     CLK_LexAfterModuleImport
00232   } CurLexerKind;
00233 
00234   /// IncludeMacroStack - This keeps track of the stack of files currently
00235   /// #included, and macros currently being expanded from, not counting
00236   /// CurLexer/CurTokenLexer.
00237   struct IncludeStackInfo {
00238     enum CurLexerKind     CurLexerKind;
00239     Lexer                 *TheLexer;
00240     PTHLexer              *ThePTHLexer;
00241     PreprocessorLexer     *ThePPLexer;
00242     TokenLexer            *TheTokenLexer;
00243     const DirectoryLookup *TheDirLookup;
00244 
00245     IncludeStackInfo(enum CurLexerKind K, Lexer *L, PTHLexer* P,
00246                      PreprocessorLexer* PPL,
00247                      TokenLexer* TL, const DirectoryLookup *D)
00248       : CurLexerKind(K), TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL),
00249         TheTokenLexer(TL), TheDirLookup(D) {}
00250   };
00251   std::vector<IncludeStackInfo> IncludeMacroStack;
00252 
00253   /// Callbacks - These are actions invoked when some preprocessor activity is
00254   /// encountered (e.g. a file is #included, etc).
00255   PPCallbacks *Callbacks;
00256 
00257   struct MacroExpandsInfo {
00258     Token Tok;
00259     MacroInfo *MI;
00260     SourceRange Range;
00261     MacroExpandsInfo(Token Tok, MacroInfo *MI, SourceRange Range)
00262       : Tok(Tok), MI(MI), Range(Range) { }
00263   };
00264   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
00265 
00266   /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
00267   /// to the actual definition of the macro.
00268   llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
00269 
00270   /// \brief Macros that we want to warn because they are not used at the end
00271   /// of the translation unit; we store just their SourceLocations instead
00272   /// something like MacroInfo*. The benefit of this is that when we are
00273   /// deserializing from PCH, we don't need to deserialize identifier & macros
00274   /// just so that we can report that they are unused, we just warn using
00275   /// the SourceLocations of this set (that will be filled by the ASTReader).
00276   /// We are using SmallPtrSet instead of a vector for faster removal.
00277   typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
00278   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
00279 
00280   /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
00281   /// reused for quick allocation.
00282   MacroArgs *MacroArgCache;
00283   friend class MacroArgs;
00284 
00285   /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
00286   /// push_macro directive, we keep a MacroInfo stack used to restore
00287   /// previous macro value.
00288   llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
00289 
00290   // Various statistics we track for performance analysis.
00291   unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
00292   unsigned NumIf, NumElse, NumEndif;
00293   unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
00294   unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
00295   unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
00296   unsigned NumSkipped;
00297 
00298   /// Predefines - This string is the predefined macros that preprocessor
00299   /// should use from the command line etc.
00300   std::string Predefines;
00301 
00302   /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
00303   enum { TokenLexerCacheSize = 8 };
00304   unsigned NumCachedTokenLexers;
00305   TokenLexer *TokenLexerCache[TokenLexerCacheSize];
00306 
00307   /// \brief Keeps macro expanded tokens for TokenLexers.
00308   //
00309   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
00310   /// going to lex in the cache and when it finishes the tokens are removed
00311   /// from the end of the cache.
00312   SmallVector<Token, 16> MacroExpandedTokens;
00313   std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
00314 
00315   /// \brief A record of the macro definitions and expansions that
00316   /// occurred during preprocessing.
00317   ///
00318   /// This is an optional side structure that can be enabled with
00319   /// \c createPreprocessingRecord() prior to preprocessing.
00320   PreprocessingRecord *Record;
00321 
00322 private:  // Cached tokens state.
00323   typedef SmallVector<Token, 1> CachedTokensTy;
00324 
00325   /// CachedTokens - Cached tokens are stored here when we do backtracking or
00326   /// lookahead. They are "lexed" by the CachingLex() method.
00327   CachedTokensTy CachedTokens;
00328 
00329   /// CachedLexPos - The position of the cached token that CachingLex() should
00330   /// "lex" next. If it points beyond the CachedTokens vector, it means that
00331   /// a normal Lex() should be invoked.
00332   CachedTokensTy::size_type CachedLexPos;
00333 
00334   /// BacktrackPositions - Stack of backtrack positions, allowing nested
00335   /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
00336   /// indicate where CachedLexPos should be set when the BackTrack() method is
00337   /// invoked (at which point the last position is popped).
00338   std::vector<CachedTokensTy::size_type> BacktrackPositions;
00339 
00340   struct MacroInfoChain {
00341     MacroInfo MI;
00342     MacroInfoChain *Next;
00343     MacroInfoChain *Prev;
00344   };
00345 
00346   /// MacroInfos are managed as a chain for easy disposal.  This is the head
00347   /// of that list.
00348   MacroInfoChain *MIChainHead;
00349 
00350   /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
00351   /// allocation.
00352   MacroInfoChain *MICache;
00353 
00354   MacroInfo *getInfoForMacro(IdentifierInfo *II) const;
00355 
00356 public:
00357   Preprocessor(DiagnosticsEngine &diags, LangOptions &opts,
00358                const TargetInfo *target,
00359                SourceManager &SM, HeaderSearch &Headers,
00360                ModuleLoader &TheModuleLoader,
00361                IdentifierInfoLookup *IILookup = 0,
00362                bool OwnsHeaderSearch = false,
00363                bool DelayInitialization = false,
00364                bool IncrProcessing = false);
00365 
00366   ~Preprocessor();
00367 
00368   /// \brief Initialize the preprocessor, if the constructor did not already
00369   /// perform the initialization.
00370   ///
00371   /// \param Target Information about the target.
00372   void Initialize(const TargetInfo &Target);
00373 
00374   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
00375   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
00376 
00377   const LangOptions &getLangOpts() const { return LangOpts; }
00378   const TargetInfo &getTargetInfo() const { return *Target; }
00379   FileManager &getFileManager() const { return FileMgr; }
00380   SourceManager &getSourceManager() const { return SourceMgr; }
00381   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
00382 
00383   IdentifierTable &getIdentifierTable() { return Identifiers; }
00384   SelectorTable &getSelectorTable() { return Selectors; }
00385   Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
00386   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
00387 
00388   void setPTHManager(PTHManager* pm);
00389 
00390   PTHManager *getPTHManager() { return PTH.get(); }
00391 
00392   void setExternalSource(ExternalPreprocessorSource *Source) {
00393     ExternalSource = Source;
00394   }
00395 
00396   ExternalPreprocessorSource *getExternalSource() const {
00397     return ExternalSource;
00398   }
00399 
00400   /// \brief Retrieve the module loader associated with this preprocessor.
00401   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
00402 
00403   /// SetCommentRetentionState - Control whether or not the preprocessor retains
00404   /// comments in output.
00405   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
00406     this->KeepComments = KeepComments | KeepMacroComments;
00407     this->KeepMacroComments = KeepMacroComments;
00408   }
00409 
00410   bool getCommentRetentionState() const { return KeepComments; }
00411 
00412   void SetSuppressIncludeNotFoundError(bool Suppress) {
00413     SuppressIncludeNotFoundError = Suppress;
00414   }
00415 
00416   bool GetSuppressIncludeNotFoundError() {
00417     return SuppressIncludeNotFoundError;
00418   }
00419 
00420   /// isCurrentLexer - Return true if we are lexing directly from the specified
00421   /// lexer.
00422   bool isCurrentLexer(const PreprocessorLexer *L) const {
00423     return CurPPLexer == L;
00424   }
00425 
00426   /// getCurrentLexer - Return the current lexer being lexed from.  Note
00427   /// that this ignores any potentially active macro expansions and _Pragma
00428   /// expansions going on at the time.
00429   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
00430 
00431   /// getCurrentFileLexer - Return the current file lexer being lexed from.
00432   /// Note that this ignores any potentially active macro expansions and _Pragma
00433   /// expansions going on at the time.
00434   PreprocessorLexer *getCurrentFileLexer() const;
00435 
00436   /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
00437   /// Note that this class takes ownership of any PPCallbacks object given to
00438   /// it.
00439   PPCallbacks *getPPCallbacks() const { return Callbacks; }
00440   void addPPCallbacks(PPCallbacks *C) {
00441     if (Callbacks)
00442       C = new PPChainedCallbacks(C, Callbacks);
00443     Callbacks = C;
00444   }
00445 
00446   /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to
00447   /// or null if it isn't #define'd.
00448   MacroInfo *getMacroInfo(IdentifierInfo *II) const {
00449     if (!II->hasMacroDefinition())
00450       return 0;
00451 
00452     return getInfoForMacro(II);
00453   }
00454 
00455   /// setMacroInfo - Specify a macro for this identifier.
00456   ///
00457   void setMacroInfo(IdentifierInfo *II, MacroInfo *MI,
00458                     bool LoadedFromAST = false);
00459 
00460   /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
00461   /// state of the macro table.  This visits every currently-defined macro.
00462   typedef llvm::DenseMap<IdentifierInfo*,
00463                          MacroInfo*>::const_iterator macro_iterator;
00464   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
00465   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
00466 
00467   const std::string &getPredefines() const { return Predefines; }
00468   /// setPredefines - Set the predefines for this Preprocessor.  These
00469   /// predefines are automatically injected when parsing the main file.
00470   void setPredefines(const char *P) { Predefines = P; }
00471   void setPredefines(const std::string &P) { Predefines = P; }
00472 
00473   /// getIdentifierInfo - Return information about the specified preprocessor
00474   /// identifier token.  The version of this method that takes two character
00475   /// pointers is preferred unless the identifier is already available as a
00476   /// string (this avoids allocation and copying of memory to construct an
00477   /// std::string).
00478   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
00479     return &Identifiers.get(Name);
00480   }
00481 
00482   /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
00483   /// If 'Namespace' is non-null, then it is a token required to exist on the
00484   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
00485   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
00486   void AddPragmaHandler(PragmaHandler *Handler) {
00487     AddPragmaHandler(StringRef(), Handler);
00488   }
00489 
00490   /// RemovePragmaHandler - Remove the specific pragma handler from
00491   /// the preprocessor. If \arg Namespace is non-null, then it should
00492   /// be the namespace that \arg Handler was added to. It is an error
00493   /// to remove a handler that has not been registered.
00494   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
00495   void RemovePragmaHandler(PragmaHandler *Handler) {
00496     RemovePragmaHandler(StringRef(), Handler);
00497   }
00498 
00499   /// \brief Add the specified comment handler to the preprocessor.
00500   void AddCommentHandler(CommentHandler *Handler);
00501 
00502   /// \brief Remove the specified comment handler.
00503   ///
00504   /// It is an error to remove a handler that has not been registered.
00505   void RemoveCommentHandler(CommentHandler *Handler);
00506 
00507   /// \brief Set the code completion handler to the given object.
00508   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
00509     CodeComplete = &Handler;
00510   }
00511 
00512   /// \brief Retrieve the current code-completion handler.
00513   CodeCompletionHandler *getCodeCompletionHandler() const {
00514     return CodeComplete;
00515   }
00516 
00517   /// \brief Clear out the code completion handler.
00518   void clearCodeCompletionHandler() {
00519     CodeComplete = 0;
00520   }
00521 
00522   /// \brief Hook used by the lexer to invoke the "natural language" code
00523   /// completion point.
00524   void CodeCompleteNaturalLanguage();
00525 
00526   /// \brief Retrieve the preprocessing record, or NULL if there is no
00527   /// preprocessing record.
00528   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
00529 
00530   /// \brief Create a new preprocessing record, which will keep track of
00531   /// all macro expansions, macro definitions, etc.
00532   void createPreprocessingRecord(bool RecordConditionalDirectives);
00533 
00534   /// EnterMainSourceFile - Enter the specified FileID as the main source file,
00535   /// which implicitly adds the builtin defines etc.
00536   void EnterMainSourceFile();
00537 
00538   /// EndSourceFile - Inform the preprocessor callbacks that processing is
00539   /// complete.
00540   void EndSourceFile();
00541 
00542   /// EnterSourceFile - Add a source file to the top of the include stack and
00543   /// start lexing tokens from it instead of the current buffer.  Emit an error
00544   /// and don't enter the file on error.
00545   void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
00546                        SourceLocation Loc);
00547 
00548   /// EnterMacro - Add a Macro to the top of the include stack and start lexing
00549   /// tokens from it instead of the current buffer.  Args specifies the
00550   /// tokens input to a function-like macro.
00551   ///
00552   /// ILEnd specifies the location of the ')' for a function-like macro or the
00553   /// identifier for an object-like macro.
00554   void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroArgs *Args);
00555 
00556   /// EnterTokenStream - Add a "macro" context to the top of the include stack,
00557   /// which will cause the lexer to start returning the specified tokens.
00558   ///
00559   /// If DisableMacroExpansion is true, tokens lexed from the token stream will
00560   /// not be subject to further macro expansion.  Otherwise, these tokens will
00561   /// be re-macro-expanded when/if expansion is enabled.
00562   ///
00563   /// If OwnsTokens is false, this method assumes that the specified stream of
00564   /// tokens has a permanent owner somewhere, so they do not need to be copied.
00565   /// If it is true, it assumes the array of tokens is allocated with new[] and
00566   /// must be freed.
00567   ///
00568   void EnterTokenStream(const Token *Toks, unsigned NumToks,
00569                         bool DisableMacroExpansion, bool OwnsTokens);
00570 
00571   /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
00572   /// lexer stack.  This should only be used in situations where the current
00573   /// state of the top-of-stack lexer is known.
00574   void RemoveTopOfLexerStack();
00575 
00576   /// EnableBacktrackAtThisPos - From the point that this method is called, and
00577   /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
00578   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
00579   /// make the Preprocessor re-lex the same tokens.
00580   ///
00581   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
00582   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
00583   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
00584   ///
00585   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
00586   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
00587   /// tokens will continue indefinitely.
00588   ///
00589   void EnableBacktrackAtThisPos();
00590 
00591   /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
00592   void CommitBacktrackedTokens();
00593 
00594   /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
00595   /// EnableBacktrackAtThisPos() was previously called.
00596   void Backtrack();
00597 
00598   /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
00599   /// caching of tokens is on.
00600   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
00601 
00602   /// Lex - To lex a token from the preprocessor, just pull a token from the
00603   /// current lexer or macro object.
00604   void Lex(Token &Result) {
00605     switch (CurLexerKind) {
00606     case CLK_Lexer: CurLexer->Lex(Result); break;
00607     case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
00608     case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
00609     case CLK_CachingLexer: CachingLex(Result); break;
00610     case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
00611     }
00612   }
00613 
00614   void LexAfterModuleImport(Token &Result);
00615 
00616   /// LexNonComment - Lex a token.  If it's a comment, keep lexing until we get
00617   /// something not a comment.  This is useful in -E -C mode where comments
00618   /// would foul up preprocessor directive handling.
00619   void LexNonComment(Token &Result) {
00620     do
00621       Lex(Result);
00622     while (Result.getKind() == tok::comment);
00623   }
00624 
00625   /// LexUnexpandedToken - This is just like Lex, but this disables macro
00626   /// expansion of identifier tokens.
00627   void LexUnexpandedToken(Token &Result) {
00628     // Disable macro expansion.
00629     bool OldVal = DisableMacroExpansion;
00630     DisableMacroExpansion = true;
00631     // Lex the token.
00632     Lex(Result);
00633 
00634     // Reenable it.
00635     DisableMacroExpansion = OldVal;
00636   }
00637 
00638   /// LexUnexpandedNonComment - Like LexNonComment, but this disables macro
00639   /// expansion of identifier tokens.
00640   void LexUnexpandedNonComment(Token &Result) {
00641     do
00642       LexUnexpandedToken(Result);
00643     while (Result.getKind() == tok::comment);
00644   }
00645 
00646   /// LookAhead - This peeks ahead N tokens and returns that token without
00647   /// consuming any tokens.  LookAhead(0) returns the next token that would be
00648   /// returned by Lex(), LookAhead(1) returns the token after it, etc.  This
00649   /// returns normal tokens after phase 5.  As such, it is equivalent to using
00650   /// 'Lex', not 'LexUnexpandedToken'.
00651   const Token &LookAhead(unsigned N) {
00652     if (CachedLexPos + N < CachedTokens.size())
00653       return CachedTokens[CachedLexPos+N];
00654     else
00655       return PeekAhead(N+1);
00656   }
00657 
00658   /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
00659   /// this allows to revert a specific number of tokens.
00660   /// Note that the number of tokens being reverted should be up to the last
00661   /// backtrack position, not more.
00662   void RevertCachedTokens(unsigned N) {
00663     assert(isBacktrackEnabled() &&
00664            "Should only be called when tokens are cached for backtracking");
00665     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
00666          && "Should revert tokens up to the last backtrack position, not more");
00667     assert(signed(CachedLexPos) - signed(N) >= 0 &&
00668            "Corrupted backtrack positions ?");
00669     CachedLexPos -= N;
00670   }
00671 
00672   /// EnterToken - Enters a token in the token stream to be lexed next. If
00673   /// BackTrack() is called afterwards, the token will remain at the insertion
00674   /// point.
00675   void EnterToken(const Token &Tok) {
00676     EnterCachingLexMode();
00677     CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
00678   }
00679 
00680   /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
00681   /// tokens (because backtrack is enabled) it should replace the most recent
00682   /// cached tokens with the given annotation token. This function has no effect
00683   /// if backtracking is not enabled.
00684   ///
00685   /// Note that the use of this function is just for optimization; so that the
00686   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
00687   /// invoked.
00688   void AnnotateCachedTokens(const Token &Tok) {
00689     assert(Tok.isAnnotation() && "Expected annotation token");
00690     if (CachedLexPos != 0 && isBacktrackEnabled())
00691       AnnotatePreviousCachedTokens(Tok);
00692   }
00693 
00694   /// \brief Replace the last token with an annotation token.
00695   ///
00696   /// Like AnnotateCachedTokens(), this routine replaces an
00697   /// already-parsed (and resolved) token with an annotation
00698   /// token. However, this routine only replaces the last token with
00699   /// the annotation token; it does not affect any other cached
00700   /// tokens. This function has no effect if backtracking is not
00701   /// enabled.
00702   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
00703     assert(Tok.isAnnotation() && "Expected annotation token");
00704     if (CachedLexPos != 0 && isBacktrackEnabled())
00705       CachedTokens[CachedLexPos-1] = Tok;
00706   }
00707 
00708   /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
00709   /// CurTokenLexer pointers.
00710   void recomputeCurLexerKind();
00711 
00712   /// \brief Returns true if incremental processing is enabled
00713   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
00714 
00715   /// \brief Enables the incremental processing
00716   void enableIncrementalProcessing(bool value = true) {
00717     IncrementalProcessing = value;
00718   }
00719   
00720   /// \brief Specify the point at which code-completion will be performed.
00721   ///
00722   /// \param File the file in which code completion should occur. If
00723   /// this file is included multiple times, code-completion will
00724   /// perform completion the first time it is included. If NULL, this
00725   /// function clears out the code-completion point.
00726   ///
00727   /// \param Line the line at which code completion should occur
00728   /// (1-based).
00729   ///
00730   /// \param Column the column at which code completion should occur
00731   /// (1-based).
00732   ///
00733   /// \returns true if an error occurred, false otherwise.
00734   bool SetCodeCompletionPoint(const FileEntry *File,
00735                               unsigned Line, unsigned Column);
00736 
00737   /// \brief Determine if we are performing code completion.
00738   bool isCodeCompletionEnabled() const { return CodeCompletionFile != 0; }
00739 
00740   /// \brief Returns the location of the code-completion point.
00741   /// Returns an invalid location if code-completion is not enabled or the file
00742   /// containing the code-completion point has not been lexed yet.
00743   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
00744 
00745   /// \brief Returns the start location of the file of code-completion point.
00746   /// Returns an invalid location if code-completion is not enabled or the file
00747   /// containing the code-completion point has not been lexed yet.
00748   SourceLocation getCodeCompletionFileLoc() const {
00749     return CodeCompletionFileLoc;
00750   }
00751 
00752   /// \brief Returns true if code-completion is enabled and we have hit the
00753   /// code-completion point.
00754   bool isCodeCompletionReached() const { return CodeCompletionReached; }
00755 
00756   /// \brief Note that we hit the code-completion point.
00757   void setCodeCompletionReached() {
00758     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
00759     CodeCompletionReached = true;
00760     // Silence any diagnostics that occur after we hit the code-completion.
00761     getDiagnostics().setSuppressAllDiagnostics(true);
00762   }
00763 
00764   /// \brief The location of the currently-active #pragma clang
00765   /// arc_cf_code_audited begin.  Returns an invalid location if there
00766   /// is no such pragma active.
00767   SourceLocation getPragmaARCCFCodeAuditedLoc() const {
00768     return PragmaARCCFCodeAuditedLoc;
00769   }
00770 
00771   /// \brief Set the location of the currently-active #pragma clang
00772   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
00773   void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
00774     PragmaARCCFCodeAuditedLoc = Loc;
00775   }
00776 
00777   /// \brief Instruct the preprocessor to skip part of the main
00778   /// the main source file.
00779   ///
00780   /// \brief Bytes The number of bytes in the preamble to skip.
00781   ///
00782   /// \brief StartOfLine Whether skipping these bytes puts the lexer at the
00783   /// start of a line.
00784   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
00785     SkipMainFilePreamble.first = Bytes;
00786     SkipMainFilePreamble.second = StartOfLine;
00787   }
00788 
00789   /// Diag - Forwarding function for diagnostics.  This emits a diagnostic at
00790   /// the specified Token's location, translating the token's start
00791   /// position in the current buffer into a SourcePosition object for rendering.
00792   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
00793     return Diags->Report(Loc, DiagID);
00794   }
00795 
00796   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
00797     return Diags->Report(Tok.getLocation(), DiagID);
00798   }
00799 
00800   /// getSpelling() - Return the 'spelling' of the token at the given
00801   /// location; does not go up to the spelling location or down to the
00802   /// expansion location.
00803   ///
00804   /// \param buffer A buffer which will be used only if the token requires
00805   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
00806   /// \param invalid If non-null, will be set \c true if an error occurs.
00807   StringRef getSpelling(SourceLocation loc,
00808                               SmallVectorImpl<char> &buffer,
00809                               bool *invalid = 0) const {
00810     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
00811   }
00812 
00813   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
00814   /// token is the characters used to represent the token in the source file
00815   /// after trigraph expansion and escaped-newline folding.  In particular, this
00816   /// wants to get the true, uncanonicalized, spelling of things like digraphs
00817   /// UCNs, etc.
00818   ///
00819   /// \param Invalid If non-null, will be set \c true if an error occurs.
00820   std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
00821     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
00822   }
00823 
00824   /// getSpelling - This method is used to get the spelling of a token into a
00825   /// preallocated buffer, instead of as an std::string.  The caller is required
00826   /// to allocate enough space for the token, which is guaranteed to be at least
00827   /// Tok.getLength() bytes long.  The length of the actual result is returned.
00828   ///
00829   /// Note that this method may do two possible things: it may either fill in
00830   /// the buffer specified with characters, or it may *change the input pointer*
00831   /// to point to a constant buffer with the data already in it (avoiding a
00832   /// copy).  The caller is not allowed to modify the returned buffer pointer
00833   /// if an internal buffer is returned.
00834   unsigned getSpelling(const Token &Tok, const char *&Buffer,
00835                        bool *Invalid = 0) const {
00836     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
00837   }
00838 
00839   /// getSpelling - This method is used to get the spelling of a token into a
00840   /// SmallVector. Note that the returned StringRef may not point to the
00841   /// supplied buffer if a copy can be avoided.
00842   StringRef getSpelling(const Token &Tok,
00843                         SmallVectorImpl<char> &Buffer,
00844                         bool *Invalid = 0) const;
00845 
00846   /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
00847   /// with length 1, return the character.
00848   char getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
00849                                                    bool *Invalid = 0) const {
00850     assert(Tok.is(tok::numeric_constant) &&
00851            Tok.getLength() == 1 && "Called on unsupported token");
00852     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
00853 
00854     // If the token is carrying a literal data pointer, just use it.
00855     if (const char *D = Tok.getLiteralData())
00856       return *D;
00857 
00858     // Otherwise, fall back on getCharacterData, which is slower, but always
00859     // works.
00860     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
00861   }
00862 
00863   /// \brief Retrieve the name of the immediate macro expansion.
00864   ///
00865   /// This routine starts from a source location, and finds the name of the macro
00866   /// responsible for its immediate expansion. It looks through any intervening
00867   /// macro argument expansions to compute this. It returns a StringRef which
00868   /// refers to the SourceManager-owned buffer of the source where that macro
00869   /// name is spelled. Thus, the result shouldn't out-live the SourceManager.
00870   StringRef getImmediateMacroName(SourceLocation Loc) {
00871     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
00872   }
00873 
00874   /// CreateString - Plop the specified string into a scratch buffer and set the
00875   /// specified token's location and length to it.  If specified, the source
00876   /// location provides a location of the expansion point of the token.
00877   void CreateString(const char *Buf, unsigned Len, Token &Tok,
00878                     SourceLocation ExpansionLocStart = SourceLocation(),
00879                     SourceLocation ExpansionLocEnd = SourceLocation());
00880 
00881   /// \brief Computes the source location just past the end of the
00882   /// token at this source location.
00883   ///
00884   /// This routine can be used to produce a source location that
00885   /// points just past the end of the token referenced by \p Loc, and
00886   /// is generally used when a diagnostic needs to point just after a
00887   /// token where it expected something different that it received. If
00888   /// the returned source location would not be meaningful (e.g., if
00889   /// it points into a macro), this routine returns an invalid
00890   /// source location.
00891   ///
00892   /// \param Offset an offset from the end of the token, where the source
00893   /// location should refer to. The default offset (0) produces a source
00894   /// location pointing just past the end of the token; an offset of 1 produces
00895   /// a source location pointing to the last character in the token, etc.
00896   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
00897     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
00898   }
00899 
00900   /// \brief Returns true if the given MacroID location points at the first
00901   /// token of the macro expansion.
00902   ///
00903   /// \param MacroBegin If non-null and function returns true, it is set to
00904   /// begin location of the macro.
00905   bool isAtStartOfMacroExpansion(SourceLocation loc,
00906                                  SourceLocation *MacroBegin = 0) const {
00907     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
00908                                             MacroBegin);
00909   }
00910 
00911   /// \brief Returns true if the given MacroID location points at the last
00912   /// token of the macro expansion.
00913   ///
00914   /// \param MacroBegin If non-null and function returns true, it is set to
00915   /// end location of the macro.
00916   bool isAtEndOfMacroExpansion(SourceLocation loc,
00917                                SourceLocation *MacroEnd = 0) const {
00918     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
00919   }
00920 
00921   /// DumpToken - Print the token to stderr, used for debugging.
00922   ///
00923   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
00924   void DumpLocation(SourceLocation Loc) const;
00925   void DumpMacro(const MacroInfo &MI) const;
00926 
00927   /// AdvanceToTokenCharacter - Given a location that specifies the start of a
00928   /// token, return a new location that specifies a character within the token.
00929   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
00930                                          unsigned Char) const {
00931     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
00932   }
00933 
00934   /// IncrementPasteCounter - Increment the counters for the number of token
00935   /// paste operations performed.  If fast was specified, this is a 'fast paste'
00936   /// case we handled.
00937   ///
00938   void IncrementPasteCounter(bool isFast) {
00939     if (isFast)
00940       ++NumFastTokenPaste;
00941     else
00942       ++NumTokenPaste;
00943   }
00944 
00945   void PrintStats();
00946 
00947   size_t getTotalMemory() const;
00948 
00949   /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
00950   /// comment (/##/) in microsoft mode, this method handles updating the current
00951   /// state, returning the token on the next source line.
00952   void HandleMicrosoftCommentPaste(Token &Tok);
00953 
00954   //===--------------------------------------------------------------------===//
00955   // Preprocessor callback methods.  These are invoked by a lexer as various
00956   // directives and events are found.
00957 
00958   /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
00959   /// identifier information for the token and install it into the token,
00960   /// updating the token kind accordingly.
00961   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
00962 
00963 private:
00964   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
00965 
00966 public:
00967 
00968   // SetPoisonReason - Call this function to indicate the reason for
00969   // poisoning an identifier. If that identifier is accessed while
00970   // poisoned, then this reason will be used instead of the default
00971   // "poisoned" diagnostic.
00972   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
00973 
00974   // HandlePoisonedIdentifier - Display reason for poisoned
00975   // identifier.
00976   void HandlePoisonedIdentifier(Token & Tok);
00977 
00978   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
00979     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
00980       if(II->isPoisoned()) {
00981         HandlePoisonedIdentifier(Identifier);
00982       }
00983     }
00984   }
00985 
00986 private:
00987   /// Identifiers used for SEH handling in Borland. These are only
00988   /// allowed in particular circumstances
00989   // __except block
00990   IdentifierInfo *Ident__exception_code,
00991                  *Ident___exception_code,
00992                  *Ident_GetExceptionCode;
00993   // __except filter expression
00994   IdentifierInfo *Ident__exception_info,
00995                  *Ident___exception_info,
00996                  *Ident_GetExceptionInfo;
00997   // __finally
00998   IdentifierInfo *Ident__abnormal_termination,
00999                  *Ident___abnormal_termination,
01000                  *Ident_AbnormalTermination;
01001 public:
01002   void PoisonSEHIdentifiers(bool Poison = true); // Borland
01003 
01004   /// HandleIdentifier - This callback is invoked when the lexer reads an
01005   /// identifier and has filled in the tokens IdentifierInfo member.  This
01006   /// callback potentially macro expands it or turns it into a named token (like
01007   /// 'for').
01008   void HandleIdentifier(Token &Identifier);
01009 
01010 
01011   /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
01012   /// the current file.  This either returns the EOF token and returns true, or
01013   /// pops a level off the include stack and returns false, at which point the
01014   /// client should call lex again.
01015   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
01016 
01017   /// HandleEndOfTokenLexer - This callback is invoked when the current
01018   /// TokenLexer hits the end of its token stream.
01019   bool HandleEndOfTokenLexer(Token &Result);
01020 
01021   /// HandleDirective - This callback is invoked when the lexer sees a # token
01022   /// at the start of a line.  This consumes the directive, modifies the
01023   /// lexer/preprocessor state, and advances the lexer(s) so that the next token
01024   /// read is the correct one.
01025   void HandleDirective(Token &Result);
01026 
01027   /// CheckEndOfDirective - Ensure that the next token is a tok::eod token.  If
01028   /// not, emit a diagnostic and consume up until the eod.  If EnableMacros is
01029   /// true, then we consider macros that expand to zero tokens as being ok.
01030   void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
01031 
01032   /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
01033   /// current line until the tok::eod token is found.
01034   void DiscardUntilEndOfDirective();
01035 
01036   /// SawDateOrTime - This returns true if the preprocessor has seen a use of
01037   /// __DATE__ or __TIME__ in the file so far.
01038   bool SawDateOrTime() const {
01039     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
01040   }
01041   unsigned getCounterValue() const { return CounterValue; }
01042   void setCounterValue(unsigned V) { CounterValue = V; }
01043 
01044   /// \brief Retrieves the module that we're currently building, if any.
01045   Module *getCurrentModule();
01046   
01047   /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide
01048   ///  SourceLocation.
01049   MacroInfo *AllocateMacroInfo(SourceLocation L);
01050 
01051   /// CloneMacroInfo - Allocate a new MacroInfo object which is clone of MI.
01052   MacroInfo *CloneMacroInfo(const MacroInfo &MI);
01053 
01054   /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
01055   /// checked and spelled filename, e.g. as an operand of #include. This returns
01056   /// true if the input filename was in <>'s or false if it were in ""'s.  The
01057   /// caller is expected to provide a buffer that is large enough to hold the
01058   /// spelling of the filename, but is also expected to handle the case when
01059   /// this method decides to use a different buffer.
01060   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
01061 
01062   /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
01063   /// return null on failure.  isAngled indicates whether the file reference is
01064   /// for system #include's or not (i.e. using <> instead of "").
01065   const FileEntry *LookupFile(StringRef Filename,
01066                               bool isAngled, const DirectoryLookup *FromDir,
01067                               const DirectoryLookup *&CurDir,
01068                               SmallVectorImpl<char> *SearchPath,
01069                               SmallVectorImpl<char> *RelativePath,
01070                               Module **SuggestedModule,
01071                               bool SkipCache = false);
01072 
01073   /// GetCurLookup - The DirectoryLookup structure used to find the current
01074   /// FileEntry, if CurLexer is non-null and if applicable.  This allows us to
01075   /// implement #include_next and find directory-specific properties.
01076   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
01077 
01078   /// isInPrimaryFile - Return true if we're in the top-level file, not in a
01079   /// #include.
01080   bool isInPrimaryFile() const;
01081 
01082   /// ConcatenateIncludeName - Handle cases where the #include name is expanded
01083   /// from a macro as multiple tokens, which need to be glued together.  This
01084   /// occurs for code like:
01085   ///    #define FOO <a/b.h>
01086   ///    #include FOO
01087   /// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
01088   ///
01089   /// This code concatenates and consumes tokens up to the '>' token.  It
01090   /// returns false if the > was found, otherwise it returns true if it finds
01091   /// and consumes the EOD marker.
01092   bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
01093                               SourceLocation &End);
01094 
01095   /// LexOnOffSwitch - Lex an on-off-switch (C99 6.10.6p2) and verify that it is
01096   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
01097   bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
01098 
01099 private:
01100 
01101   void PushIncludeMacroStack() {
01102     IncludeMacroStack.push_back(IncludeStackInfo(CurLexerKind,
01103                                                  CurLexer.take(),
01104                                                  CurPTHLexer.take(),
01105                                                  CurPPLexer,
01106                                                  CurTokenLexer.take(),
01107                                                  CurDirLookup));
01108     CurPPLexer = 0;
01109   }
01110 
01111   void PopIncludeMacroStack() {
01112     CurLexer.reset(IncludeMacroStack.back().TheLexer);
01113     CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
01114     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
01115     CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
01116     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
01117     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
01118     IncludeMacroStack.pop_back();
01119   }
01120 
01121   /// AllocateMacroInfo - Allocate a new MacroInfo object.
01122   MacroInfo *AllocateMacroInfo();
01123 
01124   /// ReleaseMacroInfo - Release the specified MacroInfo.  This memory will
01125   ///  be reused for allocating new MacroInfo objects.
01126   void ReleaseMacroInfo(MacroInfo* MI);
01127 
01128   /// ReadMacroName - Lex and validate a macro name, which occurs after a
01129   /// #define or #undef.  This emits a diagnostic, sets the token kind to eod,
01130   /// and discards the rest of the macro line if the macro name is invalid.
01131   void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
01132 
01133   /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
01134   /// definition has just been read.  Lex the rest of the arguments and the
01135   /// closing ), updating MI with what we learn and saving in LastTok the
01136   /// last token read.
01137   /// Return true if an error occurs parsing the arg list.
01138   bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
01139 
01140   /// SkipExcludedConditionalBlock - We just read a #if or related directive and
01141   /// decided that the subsequent tokens are in the #if'd out portion of the
01142   /// file.  Lex the rest of the file, until we see an #endif.  If
01143   /// FoundNonSkipPortion is true, then we have already emitted code for part of
01144   /// this #if directive, so #else/#elif blocks should never be entered. If
01145   /// FoundElse is false, then #else directives are ok, if not, then we have
01146   /// already seen one so a #else directive is a duplicate.  When this returns,
01147   /// the caller can lex the first valid token.
01148   void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
01149                                     bool FoundNonSkipPortion, bool FoundElse,
01150                                     SourceLocation ElseLoc = SourceLocation());
01151 
01152   /// PTHSkipExcludedConditionalBlock - A fast PTH version of
01153   ///  SkipExcludedConditionalBlock.
01154   void PTHSkipExcludedConditionalBlock();
01155 
01156   /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
01157   /// may occur after a #if or #elif directive and return it as a bool.  If the
01158   /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
01159   bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
01160 
01161   /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
01162   /// #pragma GCC poison/system_header/dependency and #pragma once.
01163   void RegisterBuiltinPragmas();
01164 
01165   /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
01166   /// identifier table.
01167   void RegisterBuiltinMacros();
01168 
01169   /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
01170   /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
01171   /// the macro should not be expanded return true, otherwise return false.
01172   bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
01173 
01174   /// \brief Cache macro expanded tokens for TokenLexers.
01175   //
01176   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
01177   /// going to lex in the cache and when it finishes the tokens are removed
01178   /// from the end of the cache.
01179   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
01180                                   ArrayRef<Token> tokens);
01181   void removeCachedMacroExpandedTokensOfLastLexer();
01182   friend void TokenLexer::ExpandFunctionArguments();
01183 
01184   /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
01185   /// lexed is a '('.  If so, consume the token and return true, if not, this
01186   /// method should have no observable side-effect on the lexed tokens.
01187   bool isNextPPTokenLParen();
01188 
01189   /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
01190   /// invoked to read all of the formal arguments specified for the macro
01191   /// invocation.  This returns null on error.
01192   MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
01193                                        SourceLocation &ExpansionEnd);
01194 
01195   /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
01196   /// as a builtin macro, handle it and return the next token as 'Tok'.
01197   void ExpandBuiltinMacro(Token &Tok);
01198 
01199   /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
01200   /// return the first token after the directive.  The _Pragma token has just
01201   /// been read into 'Tok'.
01202   void Handle_Pragma(Token &Tok);
01203 
01204   /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
01205   /// is not enclosed within a string literal.
01206   void HandleMicrosoft__pragma(Token &Tok);
01207 
01208   /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
01209   /// start lexing tokens from it instead of the current buffer.
01210   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
01211 
01212   /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
01213   /// start getting tokens from it using the PTH cache.
01214   void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
01215 
01216   /// IsFileLexer - Returns true if we are lexing from a file and not a
01217   ///  pragma or a macro.
01218   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
01219     return L ? !L->isPragmaLexer() : P != 0;
01220   }
01221 
01222   static bool IsFileLexer(const IncludeStackInfo& I) {
01223     return IsFileLexer(I.TheLexer, I.ThePPLexer);
01224   }
01225 
01226   bool IsFileLexer() const {
01227     return IsFileLexer(CurLexer.get(), CurPPLexer);
01228   }
01229 
01230   //===--------------------------------------------------------------------===//
01231   // Caching stuff.
01232   void CachingLex(Token &Result);
01233   bool InCachingLexMode() const {
01234     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
01235     // that we are past EOF, not that we are in CachingLex mode.
01236     return CurPPLexer == 0 && CurTokenLexer == 0 && CurPTHLexer == 0 &&
01237            !IncludeMacroStack.empty();
01238   }
01239   void EnterCachingLexMode();
01240   void ExitCachingLexMode() {
01241     if (InCachingLexMode())
01242       RemoveTopOfLexerStack();
01243   }
01244   const Token &PeekAhead(unsigned N);
01245   void AnnotatePreviousCachedTokens(const Token &Tok);
01246 
01247   //===--------------------------------------------------------------------===//
01248   /// Handle*Directive - implement the various preprocessor directives.  These
01249   /// should side-effect the current preprocessor object so that the next call
01250   /// to Lex() will return the appropriate token next.
01251   void HandleLineDirective(Token &Tok);
01252   void HandleDigitDirective(Token &Tok);
01253   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
01254   void HandleIdentSCCSDirective(Token &Tok);
01255   void HandleMacroPublicDirective(Token &Tok);
01256   void HandleMacroPrivateDirective(Token &Tok);
01257 
01258   // File inclusion.
01259   void HandleIncludeDirective(SourceLocation HashLoc,
01260                               Token &Tok,
01261                               const DirectoryLookup *LookupFrom = 0,
01262                               bool isImport = false);
01263   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
01264   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
01265   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
01266   void HandleMicrosoftImportDirective(Token &Tok);
01267 
01268   // Macro handling.
01269   void HandleDefineDirective(Token &Tok);
01270   void HandleUndefDirective(Token &Tok);
01271 
01272   // Conditional Inclusion.
01273   void HandleIfdefDirective(Token &Tok, bool isIfndef,
01274                             bool ReadAnyTokensBeforeDirective);
01275   void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
01276   void HandleEndifDirective(Token &Tok);
01277   void HandleElseDirective(Token &Tok);
01278   void HandleElifDirective(Token &Tok);
01279 
01280   // Pragmas.
01281   void HandlePragmaDirective(unsigned Introducer);
01282 public:
01283   void HandlePragmaOnce(Token &OnceTok);
01284   void HandlePragmaMark();
01285   void HandlePragmaPoison(Token &PoisonTok);
01286   void HandlePragmaSystemHeader(Token &SysHeaderTok);
01287   void HandlePragmaDependency(Token &DependencyTok);
01288   void HandlePragmaComment(Token &CommentTok);
01289   void HandlePragmaMessage(Token &MessageTok);
01290   void HandlePragmaPushMacro(Token &Tok);
01291   void HandlePragmaPopMacro(Token &Tok);
01292   void HandlePragmaIncludeAlias(Token &Tok);
01293   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
01294 
01295   // Return true and store the first token only if any CommentHandler
01296   // has inserted some tokens and getCommentRetentionState() is false.
01297   bool HandleComment(Token &Token, SourceRange Comment);
01298 
01299   /// \brief A macro is used, update information about macros that need unused
01300   /// warnings.
01301   void markMacroAsUsed(MacroInfo *MI);
01302 };
01303 
01304 /// \brief Abstract base class that describes a handler that will receive
01305 /// source ranges for each of the comments encountered in the source file.
01306 class CommentHandler {
01307 public:
01308   virtual ~CommentHandler();
01309 
01310   // The handler shall return true if it has pushed any tokens
01311   // to be read using e.g. EnterToken or EnterTokenStream.
01312   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
01313 };
01314 
01315 }  // end namespace clang
01316 
01317 #endif