clang  9.0.0svn
Preprocessor.h
Go to the documentation of this file.
1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Builtins.h"
18 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Basic/LLVM.h"
22 #include "clang/Basic/Module.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Lex/MacroInfo.h"
28 #include "clang/Lex/ModuleLoader.h"
29 #include "clang/Lex/ModuleMap.h"
30 #include "clang/Lex/PPCallbacks.h"
31 #include "clang/Lex/Token.h"
32 #include "clang/Lex/TokenLexer.h"
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/FoldingSet.h"
36 #include "llvm/ADT/None.h"
37 #include "llvm/ADT/Optional.h"
38 #include "llvm/ADT/PointerUnion.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/ADT/TinyPtrVector.h"
44 #include "llvm/ADT/iterator_range.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Registry.h"
48 #include <cassert>
49 #include <cstddef>
50 #include <cstdint>
51 #include <memory>
52 #include <map>
53 #include <string>
54 #include <utility>
55 #include <vector>
56 
57 namespace llvm {
58 
59 template<unsigned InternalLen> class SmallString;
60 
61 } // namespace llvm
62 
63 namespace clang {
64 
65 class CodeCompletionHandler;
66 class CommentHandler;
67 class DirectoryEntry;
68 class DirectoryLookup;
69 class ExternalPreprocessorSource;
70 class FileEntry;
71 class FileManager;
72 class HeaderSearch;
73 class MacroArgs;
74 class PragmaHandler;
75 class PragmaNamespace;
76 class PreprocessingRecord;
77 class PreprocessorLexer;
78 class PreprocessorOptions;
79 class ScratchBuffer;
80 class TargetInfo;
81 
82 /// Stores token information for comparing actual tokens with
83 /// predefined values. Only handles simple tokens and identifiers.
84 class TokenValue {
86  IdentifierInfo *II;
87 
88 public:
89  TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
90  assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
91  assert(Kind != tok::identifier &&
92  "Identifiers should be created by TokenValue(IdentifierInfo *)");
93  assert(!tok::isLiteral(Kind) && "Literals are not supported.");
94  assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
95  }
96 
97  TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
98 
99  bool operator==(const Token &Tok) const {
100  return Tok.getKind() == Kind &&
101  (!II || II == Tok.getIdentifierInfo());
102  }
103 };
104 
105 /// Context in which macro name is used.
106 enum MacroUse {
107  // other than #define or #undef
108  MU_Other = 0,
109 
110  // macro name specified in #define
112 
113  // macro name specified in #undef
115 };
116 
117 /// Engages in a tight little dance with the lexer to efficiently
118 /// preprocess tokens.
119 ///
120 /// Lexers know only about tokens within a single source file, and don't
121 /// know anything about preprocessor-level issues like the \#include stack,
122 /// token expansion, etc.
126 
127  std::shared_ptr<PreprocessorOptions> PPOpts;
128  DiagnosticsEngine *Diags;
129  LangOptions &LangOpts;
130  const TargetInfo *Target = nullptr;
131  const TargetInfo *AuxTarget = nullptr;
132  FileManager &FileMgr;
133  SourceManager &SourceMgr;
134  std::unique_ptr<ScratchBuffer> ScratchBuf;
135  HeaderSearch &HeaderInfo;
136  ModuleLoader &TheModuleLoader;
137 
138  /// External source of macros.
139  ExternalPreprocessorSource *ExternalSource;
140 
141  /// A BumpPtrAllocator object used to quickly allocate and release
142  /// objects internal to the Preprocessor.
143  llvm::BumpPtrAllocator BP;
144 
145  /// Identifiers for builtin macros and other builtins.
146  IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
147  IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
148  IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
149  IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
150  IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
151  IdentifierInfo *Ident__COUNTER__; // __COUNTER__
152  IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
153  IdentifierInfo *Ident__identifier; // __identifier
154  IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
155  IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
156  IdentifierInfo *Ident__has_feature; // __has_feature
157  IdentifierInfo *Ident__has_extension; // __has_extension
158  IdentifierInfo *Ident__has_builtin; // __has_builtin
159  IdentifierInfo *Ident__has_attribute; // __has_attribute
160  IdentifierInfo *Ident__has_include; // __has_include
161  IdentifierInfo *Ident__has_include_next; // __has_include_next
162  IdentifierInfo *Ident__has_warning; // __has_warning
163  IdentifierInfo *Ident__is_identifier; // __is_identifier
164  IdentifierInfo *Ident__building_module; // __building_module
165  IdentifierInfo *Ident__MODULE__; // __MODULE__
166  IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
167  IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
168  IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
169  IdentifierInfo *Ident__is_target_arch; // __is_target_arch
170  IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
171  IdentifierInfo *Ident__is_target_os; // __is_target_os
172  IdentifierInfo *Ident__is_target_environment; // __is_target_environment
173 
174  // Weak, only valid (and set) while InMacroArgs is true.
175  Token* ArgMacro;
176 
177  SourceLocation DATELoc, TIMELoc;
178 
179  // Next __COUNTER__ value, starts at 0.
180  unsigned CounterValue = 0;
181 
182  enum {
183  /// Maximum depth of \#includes.
184  MaxAllowedIncludeStackDepth = 200
185  };
186 
187  // State that is set before the preprocessor begins.
188  bool KeepComments : 1;
189  bool KeepMacroComments : 1;
190  bool SuppressIncludeNotFoundError : 1;
191 
192  // State that changes while the preprocessor runs:
193  bool InMacroArgs : 1; // True if parsing fn macro invocation args.
194 
195  /// Whether the preprocessor owns the header search object.
196  bool OwnsHeaderSearch : 1;
197 
198  /// True if macro expansion is disabled.
199  bool DisableMacroExpansion : 1;
200 
201  /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
202  /// when parsing preprocessor directives.
203  bool MacroExpansionInDirectivesOverride : 1;
204 
206 
207  /// Whether we have already loaded macros from the external source.
208  mutable bool ReadMacrosFromExternalSource : 1;
209 
210  /// True if pragmas are enabled.
211  bool PragmasEnabled : 1;
212 
213  /// True if the current build action is a preprocessing action.
214  bool PreprocessedOutput : 1;
215 
216  /// True if we are currently preprocessing a #if or #elif directive
217  bool ParsingIfOrElifDirective;
218 
219  /// True if we are pre-expanding macro arguments.
220  bool InMacroArgPreExpansion;
221 
222  /// Mapping/lookup information for all identifiers in
223  /// the program, including program keywords.
224  mutable IdentifierTable Identifiers;
225 
226  /// This table contains all the selectors in the program.
227  ///
228  /// Unlike IdentifierTable above, this table *isn't* populated by the
229  /// preprocessor. It is declared/expanded here because its role/lifetime is
230  /// conceptually similar to the IdentifierTable. In addition, the current
231  /// control flow (in clang::ParseAST()), make it convenient to put here.
232  ///
233  /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
234  /// the lifetime of the preprocessor.
235  SelectorTable Selectors;
236 
237  /// Information about builtins.
239 
240  /// Tracks all of the pragmas that the client registered
241  /// with this preprocessor.
242  std::unique_ptr<PragmaNamespace> PragmaHandlers;
243 
244  /// Pragma handlers of the original source is stored here during the
245  /// parsing of a model file.
246  std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
247 
248  /// Tracks all of the comment handlers that the client registered
249  /// with this preprocessor.
250  std::vector<CommentHandler *> CommentHandlers;
251 
252  /// True if we want to ignore EOF token and continue later on (thus
253  /// avoid tearing the Lexer and etc. down).
254  bool IncrementalProcessing = false;
255 
256  /// The kind of translation unit we are processing.
257  TranslationUnitKind TUKind;
258 
259  /// The code-completion handler.
260  CodeCompletionHandler *CodeComplete = nullptr;
261 
262  /// The file that we're performing code-completion for, if any.
263  const FileEntry *CodeCompletionFile = nullptr;
264 
265  /// The offset in file for the code-completion point.
266  unsigned CodeCompletionOffset = 0;
267 
268  /// The location for the code-completion point. This gets instantiated
269  /// when the CodeCompletionFile gets \#include'ed for preprocessing.
270  SourceLocation CodeCompletionLoc;
271 
272  /// The start location for the file of the code-completion point.
273  ///
274  /// This gets instantiated when the CodeCompletionFile gets \#include'ed
275  /// for preprocessing.
276  SourceLocation CodeCompletionFileLoc;
277 
278  /// The source location of the \c import contextual keyword we just
279  /// lexed, if any.
280  SourceLocation ModuleImportLoc;
281 
282  /// The module import path that we're currently processing.
284 
285  /// Whether the last token we lexed was an '@'.
286  bool LastTokenWasAt = false;
287 
288  /// A position within a C++20 import-seq.
289  class ImportSeq {
290  public:
291  enum State : int {
292  // Positive values represent a number of unclosed brackets.
293  AtTopLevel = 0,
294  AfterTopLevelTokenSeq = -1,
295  AfterExport = -2,
296  AfterImportSeq = -3,
297  };
298 
299  ImportSeq(State S) : S(S) {}
300 
301  /// Saw any kind of open bracket.
302  void handleOpenBracket() {
303  S = static_cast<State>(std::max<int>(S, 0) + 1);
304  }
305  /// Saw any kind of close bracket other than '}'.
306  void handleCloseBracket() {
307  S = static_cast<State>(std::max<int>(S, 1) - 1);
308  }
309  /// Saw a close brace.
310  void handleCloseBrace() {
311  handleCloseBracket();
312  if (S == AtTopLevel && !AfterHeaderName)
313  S = AfterTopLevelTokenSeq;
314  }
315  /// Saw a semicolon.
316  void handleSemi() {
317  if (atTopLevel()) {
318  S = AfterTopLevelTokenSeq;
319  AfterHeaderName = false;
320  }
321  }
322 
323  /// Saw an 'export' identifier.
324  void handleExport() {
325  if (S == AfterTopLevelTokenSeq)
326  S = AfterExport;
327  else if (S <= 0)
328  S = AtTopLevel;
329  }
330  /// Saw an 'import' identifier.
331  void handleImport() {
332  if (S == AfterTopLevelTokenSeq || S == AfterExport)
333  S = AfterImportSeq;
334  else if (S <= 0)
335  S = AtTopLevel;
336  }
337 
338  /// Saw a 'header-name' token; do not recognize any more 'import' tokens
339  /// until we reach a top-level semicolon.
340  void handleHeaderName() {
341  if (S == AfterImportSeq)
342  AfterHeaderName = true;
343  handleMisc();
344  }
345 
346  /// Saw any other token.
347  void handleMisc() {
348  if (S <= 0)
349  S = AtTopLevel;
350  }
351 
352  bool atTopLevel() { return S <= 0; }
353  bool afterImportSeq() { return S == AfterImportSeq; }
354 
355  private:
356  State S;
357  /// Whether we're in the pp-import-suffix following the header-name in a
358  /// pp-import. If so, a close-brace is not sufficient to end the
359  /// top-level-token-seq of an import-seq.
360  bool AfterHeaderName = false;
361  };
362 
363  /// Our current position within a C++20 import-seq.
364  ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
365 
366  /// Whether the module import expects an identifier next. Otherwise,
367  /// it expects a '.' or ';'.
368  bool ModuleImportExpectsIdentifier = false;
369 
370  /// The source location of the currently-active
371  /// \#pragma clang arc_cf_code_audited begin.
372  SourceLocation PragmaARCCFCodeAuditedLoc;
373 
374  /// The source location of the currently-active
375  /// \#pragma clang assume_nonnull begin.
376  SourceLocation PragmaAssumeNonNullLoc;
377 
378  /// True if we hit the code-completion point.
379  bool CodeCompletionReached = false;
380 
381  /// The code completion token containing the information
382  /// on the stem that is to be code completed.
383  IdentifierInfo *CodeCompletionII = nullptr;
384 
385  /// Range for the code completion token.
386  SourceRange CodeCompletionTokenRange;
387 
388  /// The directory that the main file should be considered to occupy,
389  /// if it does not correspond to a real file (as happens when building a
390  /// module).
391  const DirectoryEntry *MainFileDir = nullptr;
392 
393  /// The number of bytes that we will initially skip when entering the
394  /// main file, along with a flag that indicates whether skipping this number
395  /// of bytes will place the lexer at the start of a line.
396  ///
397  /// This is used when loading a precompiled preamble.
398  std::pair<int, bool> SkipMainFilePreamble;
399 
400  /// Whether we hit an error due to reaching max allowed include depth. Allows
401  /// to avoid hitting the same error over and over again.
402  bool HasReachedMaxIncludeDepth = false;
403 
404  /// The number of currently-active calls to Lex.
405  ///
406  /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
407  /// require asking for multiple additional tokens. This counter makes it
408  /// possible for Lex to detect whether it's producing a token for the end
409  /// of phase 4 of translation or for some other situation.
410  unsigned LexLevel = 0;
411 
412 public:
417  bool FoundElse;
419 
421  bool FoundNonSkipPortion, bool FoundElse,
422  SourceLocation ElseLoc)
423  : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
424  FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
425  ElseLoc(ElseLoc) {}
426  };
427 
428 private:
429  friend class ASTReader;
430  friend class MacroArgs;
431 
432  class PreambleConditionalStackStore {
433  enum State {
434  Off = 0,
435  Recording = 1,
436  Replaying = 2,
437  };
438 
439  public:
440  PreambleConditionalStackStore() = default;
441 
442  void startRecording() { ConditionalStackState = Recording; }
443  void startReplaying() { ConditionalStackState = Replaying; }
444  bool isRecording() const { return ConditionalStackState == Recording; }
445  bool isReplaying() const { return ConditionalStackState == Replaying; }
446 
447  ArrayRef<PPConditionalInfo> getStack() const {
448  return ConditionalStack;
449  }
450 
451  void doneReplaying() {
452  ConditionalStack.clear();
453  ConditionalStackState = Off;
454  }
455 
456  void setStack(ArrayRef<PPConditionalInfo> s) {
457  if (!isRecording() && !isReplaying())
458  return;
459  ConditionalStack.clear();
460  ConditionalStack.append(s.begin(), s.end());
461  }
462 
463  bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
464 
465  bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
466 
467  void clearSkipInfo() { SkipInfo.reset(); }
468 
470 
471  private:
472  SmallVector<PPConditionalInfo, 4> ConditionalStack;
473  State ConditionalStackState = Off;
474  } PreambleConditionalStack;
475 
476  /// The current top of the stack that we're lexing from if
477  /// not expanding a macro and we are lexing directly from source code.
478  ///
479  /// Only one of CurLexer, or CurTokenLexer will be non-null.
480  std::unique_ptr<Lexer> CurLexer;
481 
482  /// The current top of the stack what we're lexing from
483  /// if not expanding a macro.
484  ///
485  /// This is an alias for CurLexer.
486  PreprocessorLexer *CurPPLexer = nullptr;
487 
488  /// Used to find the current FileEntry, if CurLexer is non-null
489  /// and if applicable.
490  ///
491  /// This allows us to implement \#include_next and find directory-specific
492  /// properties.
493  const DirectoryLookup *CurDirLookup = nullptr;
494 
495  /// The current macro we are expanding, if we are expanding a macro.
496  ///
497  /// One of CurLexer and CurTokenLexer must be null.
498  std::unique_ptr<TokenLexer> CurTokenLexer;
499 
500  /// The kind of lexer we're currently working with.
501  enum CurLexerKind {
502  CLK_Lexer,
503  CLK_TokenLexer,
504  CLK_CachingLexer,
505  CLK_LexAfterModuleImport
506  } CurLexerKind = CLK_Lexer;
507 
508  /// If the current lexer is for a submodule that is being built, this
509  /// is that submodule.
510  Module *CurLexerSubmodule = nullptr;
511 
512  /// Keeps track of the stack of files currently
513  /// \#included, and macros currently being expanded from, not counting
514  /// CurLexer/CurTokenLexer.
515  struct IncludeStackInfo {
516  enum CurLexerKind CurLexerKind;
517  Module *TheSubmodule;
518  std::unique_ptr<Lexer> TheLexer;
519  PreprocessorLexer *ThePPLexer;
520  std::unique_ptr<TokenLexer> TheTokenLexer;
521  const DirectoryLookup *TheDirLookup;
522 
523  // The following constructors are completely useless copies of the default
524  // versions, only needed to pacify MSVC.
525  IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
526  std::unique_ptr<Lexer> &&TheLexer,
527  PreprocessorLexer *ThePPLexer,
528  std::unique_ptr<TokenLexer> &&TheTokenLexer,
529  const DirectoryLookup *TheDirLookup)
530  : CurLexerKind(std::move(CurLexerKind)),
531  TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
532  ThePPLexer(std::move(ThePPLexer)),
533  TheTokenLexer(std::move(TheTokenLexer)),
534  TheDirLookup(std::move(TheDirLookup)) {}
535  };
536  std::vector<IncludeStackInfo> IncludeMacroStack;
537 
538  /// Actions invoked when some preprocessor activity is
539  /// encountered (e.g. a file is \#included, etc).
540  std::unique_ptr<PPCallbacks> Callbacks;
541 
542  struct MacroExpandsInfo {
543  Token Tok;
544  MacroDefinition MD;
545  SourceRange Range;
546 
547  MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
548  : Tok(Tok), MD(MD), Range(Range) {}
549  };
550  SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
551 
552  /// Information about a name that has been used to define a module macro.
553  struct ModuleMacroInfo {
554  /// The most recent macro directive for this identifier.
555  MacroDirective *MD;
556 
557  /// The active module macros for this identifier.
558  llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
559 
560  /// The generation number at which we last updated ActiveModuleMacros.
561  /// \see Preprocessor::VisibleModules.
562  unsigned ActiveModuleMacrosGeneration = 0;
563 
564  /// Whether this macro name is ambiguous.
565  bool IsAmbiguous = false;
566 
567  /// The module macros that are overridden by this macro.
568  llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
569 
570  ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
571  };
572 
573  /// The state of a macro for an identifier.
574  class MacroState {
575  mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
576 
577  ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
578  const IdentifierInfo *II) const {
579  if (II->isOutOfDate())
580  PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
581  // FIXME: Find a spare bit on IdentifierInfo and store a
582  // HasModuleMacros flag.
583  if (!II->hasMacroDefinition() ||
584  (!PP.getLangOpts().Modules &&
585  !PP.getLangOpts().ModulesLocalVisibility) ||
586  !PP.CurSubmoduleState->VisibleModules.getGeneration())
587  return nullptr;
588 
589  auto *Info = State.dyn_cast<ModuleMacroInfo*>();
590  if (!Info) {
591  Info = new (PP.getPreprocessorAllocator())
592  ModuleMacroInfo(State.get<MacroDirective *>());
593  State = Info;
594  }
595 
596  if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
597  Info->ActiveModuleMacrosGeneration)
598  PP.updateModuleMacroInfo(II, *Info);
599  return Info;
600  }
601 
602  public:
603  MacroState() : MacroState(nullptr) {}
604  MacroState(MacroDirective *MD) : State(MD) {}
605 
606  MacroState(MacroState &&O) noexcept : State(O.State) {
607  O.State = (MacroDirective *)nullptr;
608  }
609 
610  MacroState &operator=(MacroState &&O) noexcept {
611  auto S = O.State;
612  O.State = (MacroDirective *)nullptr;
613  State = S;
614  return *this;
615  }
616 
617  ~MacroState() {
618  if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
619  Info->~ModuleMacroInfo();
620  }
621 
622  MacroDirective *getLatest() const {
623  if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
624  return Info->MD;
625  return State.get<MacroDirective*>();
626  }
627 
628  void setLatest(MacroDirective *MD) {
629  if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
630  Info->MD = MD;
631  else
632  State = MD;
633  }
634 
635  bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
636  auto *Info = getModuleInfo(PP, II);
637  return Info ? Info->IsAmbiguous : false;
638  }
639 
641  getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
642  if (auto *Info = getModuleInfo(PP, II))
643  return Info->ActiveModuleMacros;
644  return None;
645  }
646 
647  MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
648  SourceManager &SourceMgr) const {
649  // FIXME: Incorporate module macros into the result of this.
650  if (auto *Latest = getLatest())
651  return Latest->findDirectiveAtLoc(Loc, SourceMgr);
652  return {};
653  }
654 
655  void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
656  if (auto *Info = getModuleInfo(PP, II)) {
657  Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
658  Info->ActiveModuleMacros.begin(),
659  Info->ActiveModuleMacros.end());
660  Info->ActiveModuleMacros.clear();
661  Info->IsAmbiguous = false;
662  }
663  }
664 
665  ArrayRef<ModuleMacro*> getOverriddenMacros() const {
666  if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
667  return Info->OverriddenMacros;
668  return None;
669  }
670 
671  void setOverriddenMacros(Preprocessor &PP,
672  ArrayRef<ModuleMacro *> Overrides) {
673  auto *Info = State.dyn_cast<ModuleMacroInfo*>();
674  if (!Info) {
675  if (Overrides.empty())
676  return;
677  Info = new (PP.getPreprocessorAllocator())
678  ModuleMacroInfo(State.get<MacroDirective *>());
679  State = Info;
680  }
681  Info->OverriddenMacros.clear();
682  Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
683  Overrides.begin(), Overrides.end());
684  Info->ActiveModuleMacrosGeneration = 0;
685  }
686  };
687 
688  /// For each IdentifierInfo that was associated with a macro, we
689  /// keep a mapping to the history of all macro definitions and #undefs in
690  /// the reverse order (the latest one is in the head of the list).
691  ///
692  /// This mapping lives within the \p CurSubmoduleState.
693  using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
694 
695  struct SubmoduleState;
696 
697  /// Information about a submodule that we're currently building.
698  struct BuildingSubmoduleInfo {
699  /// The module that we are building.
700  Module *M;
701 
702  /// The location at which the module was included.
703  SourceLocation ImportLoc;
704 
705  /// Whether we entered this submodule via a pragma.
706  bool IsPragma;
707 
708  /// The previous SubmoduleState.
709  SubmoduleState *OuterSubmoduleState;
710 
711  /// The number of pending module macro names when we started building this.
712  unsigned OuterPendingModuleMacroNames;
713 
714  BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
715  SubmoduleState *OuterSubmoduleState,
716  unsigned OuterPendingModuleMacroNames)
717  : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
718  OuterSubmoduleState(OuterSubmoduleState),
719  OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
720  };
721  SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
722 
723  /// Information about a submodule's preprocessor state.
724  struct SubmoduleState {
725  /// The macros for the submodule.
726  MacroMap Macros;
727 
728  /// The set of modules that are visible within the submodule.
729  VisibleModuleSet VisibleModules;
730 
731  // FIXME: CounterValue?
732  // FIXME: PragmaPushMacroInfo?
733  };
734  std::map<Module *, SubmoduleState> Submodules;
735 
736  /// The preprocessor state for preprocessing outside of any submodule.
737  SubmoduleState NullSubmoduleState;
738 
739  /// The current submodule state. Will be \p NullSubmoduleState if we're not
740  /// in a submodule.
741  SubmoduleState *CurSubmoduleState;
742 
743  /// The set of known macros exported from modules.
744  llvm::FoldingSet<ModuleMacro> ModuleMacros;
745 
746  /// The names of potential module macros that we've not yet processed.
747  llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
748 
749  /// The list of module macros, for each identifier, that are not overridden by
750  /// any other module macro.
751  llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
752  LeafModuleMacros;
753 
754  /// Macros that we want to warn because they are not used at the end
755  /// of the translation unit.
756  ///
757  /// We store just their SourceLocations instead of
758  /// something like MacroInfo*. The benefit of this is that when we are
759  /// deserializing from PCH, we don't need to deserialize identifier & macros
760  /// just so that we can report that they are unused, we just warn using
761  /// the SourceLocations of this set (that will be filled by the ASTReader).
762  /// We are using SmallPtrSet instead of a vector for faster removal.
763  using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
764  WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
765 
766  /// A "freelist" of MacroArg objects that can be
767  /// reused for quick allocation.
768  MacroArgs *MacroArgCache = nullptr;
769 
770  /// For each IdentifierInfo used in a \#pragma push_macro directive,
771  /// we keep a MacroInfo stack used to restore the previous macro value.
772  llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
773  PragmaPushMacroInfo;
774 
775  // Various statistics we track for performance analysis.
776  unsigned NumDirectives = 0;
777  unsigned NumDefined = 0;
778  unsigned NumUndefined = 0;
779  unsigned NumPragma = 0;
780  unsigned NumIf = 0;
781  unsigned NumElse = 0;
782  unsigned NumEndif = 0;
783  unsigned NumEnteredSourceFiles = 0;
784  unsigned MaxIncludeStackDepth = 0;
785  unsigned NumMacroExpanded = 0;
786  unsigned NumFnMacroExpanded = 0;
787  unsigned NumBuiltinMacroExpanded = 0;
788  unsigned NumFastMacroExpanded = 0;
789  unsigned NumTokenPaste = 0;
790  unsigned NumFastTokenPaste = 0;
791  unsigned NumSkipped = 0;
792 
793  /// The predefined macros that preprocessor should use from the
794  /// command line etc.
795  std::string Predefines;
796 
797  /// The file ID for the preprocessor predefines.
798  FileID PredefinesFileID;
799 
800  /// The file ID for the PCH through header.
801  FileID PCHThroughHeaderFileID;
802 
803  /// Whether tokens are being skipped until a #pragma hdrstop is seen.
804  bool SkippingUntilPragmaHdrStop = false;
805 
806  /// Whether tokens are being skipped until the through header is seen.
807  bool SkippingUntilPCHThroughHeader = false;
808 
809  /// \{
810  /// Cache of macro expanders to reduce malloc traffic.
811  enum { TokenLexerCacheSize = 8 };
812  unsigned NumCachedTokenLexers;
813  std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
814  /// \}
815 
816  /// Keeps macro expanded tokens for TokenLexers.
817  //
818  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
819  /// going to lex in the cache and when it finishes the tokens are removed
820  /// from the end of the cache.
821  SmallVector<Token, 16> MacroExpandedTokens;
822  std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
823 
824  /// A record of the macro definitions and expansions that
825  /// occurred during preprocessing.
826  ///
827  /// This is an optional side structure that can be enabled with
828  /// \c createPreprocessingRecord() prior to preprocessing.
829  PreprocessingRecord *Record = nullptr;
830 
831  /// Cached tokens state.
833 
834  /// Cached tokens are stored here when we do backtracking or
835  /// lookahead. They are "lexed" by the CachingLex() method.
837 
838  /// The position of the cached token that CachingLex() should
839  /// "lex" next.
840  ///
841  /// If it points beyond the CachedTokens vector, it means that a normal
842  /// Lex() should be invoked.
843  CachedTokensTy::size_type CachedLexPos = 0;
844 
845  /// Stack of backtrack positions, allowing nested backtracks.
846  ///
847  /// The EnableBacktrackAtThisPos() method pushes a position to
848  /// indicate where CachedLexPos should be set when the BackTrack() method is
849  /// invoked (at which point the last position is popped).
850  std::vector<CachedTokensTy::size_type> BacktrackPositions;
851 
852  struct MacroInfoChain {
853  MacroInfo MI;
854  MacroInfoChain *Next;
855  };
856 
857  /// MacroInfos are managed as a chain for easy disposal. This is the head
858  /// of that list.
859  MacroInfoChain *MIChainHead = nullptr;
860 
861  void updateOutOfDateIdentifier(IdentifierInfo &II) const;
862 
863 public:
864  Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
866  HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
867  IdentifierInfoLookup *IILookup = nullptr,
868  bool OwnsHeaderSearch = false,
870 
871  ~Preprocessor();
872 
873  /// Initialize the preprocessor using information about the target.
874  ///
875  /// \param Target is owned by the caller and must remain valid for the
876  /// lifetime of the preprocessor.
877  /// \param AuxTarget is owned by the caller and must remain valid for
878  /// the lifetime of the preprocessor.
879  void Initialize(const TargetInfo &Target,
880  const TargetInfo *AuxTarget = nullptr);
881 
882  /// Initialize the preprocessor to parse a model file
883  ///
884  /// To parse model files the preprocessor of the original source is reused to
885  /// preserver the identifier table. However to avoid some duplicate
886  /// information in the preprocessor some cleanup is needed before it is used
887  /// to parse model files. This method does that cleanup.
888  void InitializeForModelFile();
889 
890  /// Cleanup after model file parsing
891  void FinalizeForModelFile();
892 
893  /// Retrieve the preprocessor options used to initialize this
894  /// preprocessor.
895  PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
896 
897  DiagnosticsEngine &getDiagnostics() const { return *Diags; }
898  void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
899 
900  const LangOptions &getLangOpts() const { return LangOpts; }
901  const TargetInfo &getTargetInfo() const { return *Target; }
902  const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
903  FileManager &getFileManager() const { return FileMgr; }
904  SourceManager &getSourceManager() const { return SourceMgr; }
905  HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
906 
907  IdentifierTable &getIdentifierTable() { return Identifiers; }
908  const IdentifierTable &getIdentifierTable() const { return Identifiers; }
909  SelectorTable &getSelectorTable() { return Selectors; }
911  llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
912 
914  ExternalSource = Source;
915  }
916 
918  return ExternalSource;
919  }
920 
921  /// Retrieve the module loader associated with this preprocessor.
922  ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
923 
925  return TheModuleLoader.HadFatalFailure;
926  }
927 
928  /// True if we are currently preprocessing a #if or #elif directive
930  return ParsingIfOrElifDirective;
931  }
932 
933  /// Control whether the preprocessor retains comments in output.
934  void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
935  this->KeepComments = KeepComments | KeepMacroComments;
936  this->KeepMacroComments = KeepMacroComments;
937  }
938 
939  bool getCommentRetentionState() const { return KeepComments; }
940 
941  void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
942  bool getPragmasEnabled() const { return PragmasEnabled; }
943 
944  void SetSuppressIncludeNotFoundError(bool Suppress) {
945  SuppressIncludeNotFoundError = Suppress;
946  }
947 
949  return SuppressIncludeNotFoundError;
950  }
951 
952  /// Sets whether the preprocessor is responsible for producing output or if
953  /// it is producing tokens to be consumed by Parse and Sema.
954  void setPreprocessedOutput(bool IsPreprocessedOutput) {
955  PreprocessedOutput = IsPreprocessedOutput;
956  }
957 
958  /// Returns true if the preprocessor is responsible for generating output,
959  /// false if it is producing tokens to be consumed by Parse and Sema.
960  bool isPreprocessedOutput() const { return PreprocessedOutput; }
961 
962  /// Return true if we are lexing directly from the specified lexer.
963  bool isCurrentLexer(const PreprocessorLexer *L) const {
964  return CurPPLexer == L;
965  }
966 
967  /// Return the current lexer being lexed from.
968  ///
969  /// Note that this ignores any potentially active macro expansions and _Pragma
970  /// expansions going on at the time.
971  PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
972 
973  /// Return the current file lexer being lexed from.
974  ///
975  /// Note that this ignores any potentially active macro expansions and _Pragma
976  /// expansions going on at the time.
977  PreprocessorLexer *getCurrentFileLexer() const;
978 
979  /// Return the submodule owning the file being lexed. This may not be
980  /// the current module if we have changed modules since entering the file.
981  Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
982 
983  /// Returns the FileID for the preprocessor predefines.
984  FileID getPredefinesFileID() const { return PredefinesFileID; }
985 
986  /// \{
987  /// Accessors for preprocessor callbacks.
988  ///
989  /// Note that this class takes ownership of any PPCallbacks object given to
990  /// it.
991  PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
992  void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
993  if (Callbacks)
994  C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
995  std::move(Callbacks));
996  Callbacks = std::move(C);
997  }
998  /// \}
999 
1000  bool isMacroDefined(StringRef Id) {
1001  return isMacroDefined(&Identifiers.get(Id));
1002  }
1003  bool isMacroDefined(const IdentifierInfo *II) {
1004  return II->hasMacroDefinition() &&
1005  (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1006  }
1007 
1008  /// Determine whether II is defined as a macro within the module M,
1009  /// if that is a module that we've already preprocessed. Does not check for
1010  /// macros imported into M.
1012  if (!II->hasMacroDefinition())
1013  return false;
1014  auto I = Submodules.find(M);
1015  if (I == Submodules.end())
1016  return false;
1017  auto J = I->second.Macros.find(II);
1018  if (J == I->second.Macros.end())
1019  return false;
1020  auto *MD = J->second.getLatest();
1021  return MD && MD->isDefined();
1022  }
1023 
1025  if (!II->hasMacroDefinition())
1026  return {};
1027 
1028  MacroState &S = CurSubmoduleState->Macros[II];
1029  auto *MD = S.getLatest();
1030  while (MD && isa<VisibilityMacroDirective>(MD))
1031  MD = MD->getPrevious();
1032  return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1033  S.getActiveModuleMacros(*this, II),
1034  S.isAmbiguous(*this, II));
1035  }
1036 
1038  SourceLocation Loc) {
1039  if (!II->hadMacroDefinition())
1040  return {};
1041 
1042  MacroState &S = CurSubmoduleState->Macros[II];
1044  if (auto *MD = S.getLatest())
1045  DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1046  // FIXME: Compute the set of active module macros at the specified location.
1047  return MacroDefinition(DI.getDirective(),
1048  S.getActiveModuleMacros(*this, II),
1049  S.isAmbiguous(*this, II));
1050  }
1051 
1052  /// Given an identifier, return its latest non-imported MacroDirective
1053  /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1055  if (!II->hasMacroDefinition())
1056  return nullptr;
1057 
1058  auto *MD = getLocalMacroDirectiveHistory(II);
1059  if (!MD || MD->getDefinition().isUndefined())
1060  return nullptr;
1061 
1062  return MD;
1063  }
1064 
1065  const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1066  return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1067  }
1068 
1070  if (!II->hasMacroDefinition())
1071  return nullptr;
1072  if (auto MD = getMacroDefinition(II))
1073  return MD.getMacroInfo();
1074  return nullptr;
1075  }
1076 
1077  /// Given an identifier, return the latest non-imported macro
1078  /// directive for that identifier.
1079  ///
1080  /// One can iterate over all previous macro directives from the most recent
1081  /// one.
1082  MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1083 
1084  /// Add a directive to the macro directive history for this identifier.
1085  void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1087  SourceLocation Loc) {
1088  DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1089  appendMacroDirective(II, MD);
1090  return MD;
1091  }
1093  MacroInfo *MI) {
1094  return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1095  }
1096 
1097  /// Set a MacroDirective that was loaded from a PCH file.
1098  void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1099  MacroDirective *MD);
1100 
1101  /// Register an exported macro for a module and identifier.
1102  ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1103  ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1104  ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
1105 
1106  /// Get the list of leaf (non-overridden) module macros for a name.
1108  if (II->isOutOfDate())
1109  updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1110  auto I = LeafModuleMacros.find(II);
1111  if (I != LeafModuleMacros.end())
1112  return I->second;
1113  return None;
1114  }
1115 
1116  /// \{
1117  /// Iterators for the macro history table. Currently defined macros have
1118  /// IdentifierInfo::hasMacroDefinition() set and an empty
1119  /// MacroInfo::getUndefLoc() at the head of the list.
1120  using macro_iterator = MacroMap::const_iterator;
1121 
1122  macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1123  macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1124 
1125  llvm::iterator_range<macro_iterator>
1126  macros(bool IncludeExternalMacros = true) const {
1127  macro_iterator begin = macro_begin(IncludeExternalMacros);
1128  macro_iterator end = macro_end(IncludeExternalMacros);
1129  return llvm::make_range(begin, end);
1130  }
1131 
1132  /// \}
1133 
1134  /// Return the name of the macro defined before \p Loc that has
1135  /// spelling \p Tokens. If there are multiple macros with same spelling,
1136  /// return the last one defined.
1137  StringRef getLastMacroWithSpelling(SourceLocation Loc,
1138  ArrayRef<TokenValue> Tokens) const;
1139 
1140  const std::string &getPredefines() const { return Predefines; }
1141 
1142  /// Set the predefines for this Preprocessor.
1143  ///
1144  /// These predefines are automatically injected when parsing the main file.
1145  void setPredefines(const char *P) { Predefines = P; }
1146  void setPredefines(StringRef P) { Predefines = P; }
1147 
1148  /// Return information about the specified preprocessor
1149  /// identifier token.
1150  IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1151  return &Identifiers.get(Name);
1152  }
1153 
1154  /// Add the specified pragma handler to this preprocessor.
1155  ///
1156  /// If \p Namespace is non-null, then it is a token required to exist on the
1157  /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1158  void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1160  AddPragmaHandler(StringRef(), Handler);
1161  }
1162 
1163  /// Remove the specific pragma handler from this preprocessor.
1164  ///
1165  /// If \p Namespace is non-null, then it should be the namespace that
1166  /// \p Handler was added to. It is an error to remove a handler that
1167  /// has not been registered.
1168  void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1170  RemovePragmaHandler(StringRef(), Handler);
1171  }
1172 
1173  /// Install empty handlers for all pragmas (making them ignored).
1174  void IgnorePragmas();
1175 
1176  /// Add the specified comment handler to the preprocessor.
1177  void addCommentHandler(CommentHandler *Handler);
1178 
1179  /// Remove the specified comment handler.
1180  ///
1181  /// It is an error to remove a handler that has not been registered.
1182  void removeCommentHandler(CommentHandler *Handler);
1183 
1184  /// Set the code completion handler to the given object.
1186  CodeComplete = &Handler;
1187  }
1188 
1189  /// Retrieve the current code-completion handler.
1191  return CodeComplete;
1192  }
1193 
1194  /// Clear out the code completion handler.
1196  CodeComplete = nullptr;
1197  }
1198 
1199  /// Hook used by the lexer to invoke the "included file" code
1200  /// completion point.
1201  void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1202 
1203  /// Hook used by the lexer to invoke the "natural language" code
1204  /// completion point.
1205  void CodeCompleteNaturalLanguage();
1206 
1207  /// Set the code completion token for filtering purposes.
1209  CodeCompletionII = Filter;
1210  }
1211 
1212  /// Set the code completion token range for detecting replacement range later
1213  /// on.
1215  const SourceLocation End) {
1216  CodeCompletionTokenRange = {Start, End};
1217  }
1219  return CodeCompletionTokenRange;
1220  }
1221 
1222  /// Get the code completion token for filtering purposes.
1224  if (CodeCompletionII)
1225  return CodeCompletionII->getName();
1226  return {};
1227  }
1228 
1229  /// Retrieve the preprocessing record, or NULL if there is no
1230  /// preprocessing record.
1231  PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1232 
1233  /// Create a new preprocessing record, which will keep track of
1234  /// all macro expansions, macro definitions, etc.
1235  void createPreprocessingRecord();
1236 
1237  /// Returns true if the FileEntry is the PCH through header.
1238  bool isPCHThroughHeader(const FileEntry *FE);
1239 
1240  /// True if creating a PCH with a through header.
1241  bool creatingPCHWithThroughHeader();
1242 
1243  /// True if using a PCH with a through header.
1244  bool usingPCHWithThroughHeader();
1245 
1246  /// True if creating a PCH with a #pragma hdrstop.
1247  bool creatingPCHWithPragmaHdrStop();
1248 
1249  /// True if using a PCH with a #pragma hdrstop.
1250  bool usingPCHWithPragmaHdrStop();
1251 
1252  /// Skip tokens until after the #include of the through header or
1253  /// until after a #pragma hdrstop.
1254  void SkipTokensWhileUsingPCH();
1255 
1256  /// Process directives while skipping until the through header or
1257  /// #pragma hdrstop is found.
1258  void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1259  SourceLocation HashLoc);
1260 
1261  /// Enter the specified FileID as the main source file,
1262  /// which implicitly adds the builtin defines etc.
1263  void EnterMainSourceFile();
1264 
1265  /// Inform the preprocessor callbacks that processing is complete.
1266  void EndSourceFile();
1267 
1268  /// Add a source file to the top of the include stack and
1269  /// start lexing tokens from it instead of the current buffer.
1270  ///
1271  /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1272  bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1273  SourceLocation Loc);
1274 
1275  /// Add a Macro to the top of the include stack and start lexing
1276  /// tokens from it instead of the current buffer.
1277  ///
1278  /// \param Args specifies the tokens input to a function-like macro.
1279  /// \param ILEnd specifies the location of the ')' for a function-like macro
1280  /// or the identifier for an object-like macro.
1281  void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1282  MacroArgs *Args);
1283 
1284  /// Add a "macro" context to the top of the include stack,
1285  /// which will cause the lexer to start returning the specified tokens.
1286  ///
1287  /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1288  /// will not be subject to further macro expansion. Otherwise, these tokens
1289  /// will be re-macro-expanded when/if expansion is enabled.
1290  ///
1291  /// If \p OwnsTokens is false, this method assumes that the specified stream
1292  /// of tokens has a permanent owner somewhere, so they do not need to be
1293  /// copied. If it is true, it assumes the array of tokens is allocated with
1294  /// \c new[] and the Preprocessor will delete[] it.
1295 private:
1296  void EnterTokenStream(const Token *Toks, unsigned NumToks,
1297  bool DisableMacroExpansion, bool OwnsTokens);
1298 
1299 public:
1300  void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1301  bool DisableMacroExpansion) {
1302  EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
1303  }
1304 
1305  void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
1306  EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
1307  }
1308 
1309  /// Pop the current lexer/macro exp off the top of the lexer stack.
1310  ///
1311  /// This should only be used in situations where the current state of the
1312  /// top-of-stack lexer is known.
1313  void RemoveTopOfLexerStack();
1314 
1315  /// From the point that this method is called, and until
1316  /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1317  /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1318  /// make the Preprocessor re-lex the same tokens.
1319  ///
1320  /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1321  /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1322  /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1323  ///
1324  /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1325  /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1326  /// tokens will continue indefinitely.
1327  ///
1328  void EnableBacktrackAtThisPos();
1329 
1330  /// Disable the last EnableBacktrackAtThisPos call.
1331  void CommitBacktrackedTokens();
1332 
1333  /// Make Preprocessor re-lex the tokens that were lexed since
1334  /// EnableBacktrackAtThisPos() was previously called.
1335  void Backtrack();
1336 
1337  /// True if EnableBacktrackAtThisPos() was called and
1338  /// caching of tokens is on.
1339  bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1340 
1341  /// Lex the next token for this preprocessor.
1342  void Lex(Token &Result);
1343 
1344  /// Lex a token, forming a header-name token if possible.
1345  bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1346 
1347  bool LexAfterModuleImport(Token &Result);
1348  void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1349 
1350  void makeModuleVisible(Module *M, SourceLocation Loc);
1351 
1353  return CurSubmoduleState->VisibleModules.getImportLoc(M);
1354  }
1355 
1356  /// Lex a string literal, which may be the concatenation of multiple
1357  /// string literals and may even come from macro expansion.
1358  /// \returns true on success, false if a error diagnostic has been generated.
1359  bool LexStringLiteral(Token &Result, std::string &String,
1360  const char *DiagnosticTag, bool AllowMacroExpansion) {
1361  if (AllowMacroExpansion)
1362  Lex(Result);
1363  else
1364  LexUnexpandedToken(Result);
1365  return FinishLexStringLiteral(Result, String, DiagnosticTag,
1366  AllowMacroExpansion);
1367  }
1368 
1369  /// Complete the lexing of a string literal where the first token has
1370  /// already been lexed (see LexStringLiteral).
1371  bool FinishLexStringLiteral(Token &Result, std::string &String,
1372  const char *DiagnosticTag,
1373  bool AllowMacroExpansion);
1374 
1375  /// Lex a token. If it's a comment, keep lexing until we get
1376  /// something not a comment.
1377  ///
1378  /// This is useful in -E -C mode where comments would foul up preprocessor
1379  /// directive handling.
1380  void LexNonComment(Token &Result) {
1381  do
1382  Lex(Result);
1383  while (Result.getKind() == tok::comment);
1384  }
1385 
1386  /// Just like Lex, but disables macro expansion of identifier tokens.
1387  void LexUnexpandedToken(Token &Result) {
1388  // Disable macro expansion.
1389  bool OldVal = DisableMacroExpansion;
1390  DisableMacroExpansion = true;
1391  // Lex the token.
1392  Lex(Result);
1393 
1394  // Reenable it.
1395  DisableMacroExpansion = OldVal;
1396  }
1397 
1398  /// Like LexNonComment, but this disables macro expansion of
1399  /// identifier tokens.
1401  do
1402  LexUnexpandedToken(Result);
1403  while (Result.getKind() == tok::comment);
1404  }
1405 
1406  /// Parses a simple integer literal to get its numeric value. Floating
1407  /// point literals and user defined literals are rejected. Used primarily to
1408  /// handle pragmas that accept integer arguments.
1409  bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1410 
1411  /// Disables macro expansion everywhere except for preprocessor directives.
1413  DisableMacroExpansion = true;
1414  MacroExpansionInDirectivesOverride = true;
1415  }
1416 
1417  /// Peeks ahead N tokens and returns that token without consuming any
1418  /// tokens.
1419  ///
1420  /// LookAhead(0) returns the next token that would be returned by Lex(),
1421  /// LookAhead(1) returns the token after it, etc. This returns normal
1422  /// tokens after phase 5. As such, it is equivalent to using
1423  /// 'Lex', not 'LexUnexpandedToken'.
1424  const Token &LookAhead(unsigned N) {
1425  assert(LexLevel == 0 && "cannot use lookahead while lexing");
1426  if (CachedLexPos + N < CachedTokens.size())
1427  return CachedTokens[CachedLexPos+N];
1428  else
1429  return PeekAhead(N+1);
1430  }
1431 
1432  /// When backtracking is enabled and tokens are cached,
1433  /// this allows to revert a specific number of tokens.
1434  ///
1435  /// Note that the number of tokens being reverted should be up to the last
1436  /// backtrack position, not more.
1437  void RevertCachedTokens(unsigned N) {
1438  assert(isBacktrackEnabled() &&
1439  "Should only be called when tokens are cached for backtracking");
1440  assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1441  && "Should revert tokens up to the last backtrack position, not more");
1442  assert(signed(CachedLexPos) - signed(N) >= 0 &&
1443  "Corrupted backtrack positions ?");
1444  CachedLexPos -= N;
1445  }
1446 
1447  /// Enters a token in the token stream to be lexed next.
1448  ///
1449  /// If BackTrack() is called afterwards, the token will remain at the
1450  /// insertion point.
1451  void EnterToken(const Token &Tok) {
1452  if (LexLevel) {
1453  // It's not correct in general to enter caching lex mode while in the
1454  // middle of a nested lexing action.
1455  auto TokCopy = llvm::make_unique<Token[]>(1);
1456  TokCopy[0] = Tok;
1457  EnterTokenStream(std::move(TokCopy), 1, true);
1458  } else {
1459  EnterCachingLexMode();
1460  CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1461  }
1462  }
1463 
1464  /// We notify the Preprocessor that if it is caching tokens (because
1465  /// backtrack is enabled) it should replace the most recent cached tokens
1466  /// with the given annotation token. This function has no effect if
1467  /// backtracking is not enabled.
1468  ///
1469  /// Note that the use of this function is just for optimization, so that the
1470  /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1471  /// invoked.
1472  void AnnotateCachedTokens(const Token &Tok) {
1473  assert(Tok.isAnnotation() && "Expected annotation token");
1474  if (CachedLexPos != 0 && isBacktrackEnabled())
1475  AnnotatePreviousCachedTokens(Tok);
1476  }
1477 
1478  /// Get the location of the last cached token, suitable for setting the end
1479  /// location of an annotation token.
1481  assert(CachedLexPos != 0);
1482  return CachedTokens[CachedLexPos-1].getLastLoc();
1483  }
1484 
1485  /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1486  /// CachedTokens.
1487  bool IsPreviousCachedToken(const Token &Tok) const;
1488 
1489  /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1490  /// in \p NewToks.
1491  ///
1492  /// Useful when a token needs to be split in smaller ones and CachedTokens
1493  /// most recent token must to be updated to reflect that.
1494  void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1495 
1496  /// Replace the last token with an annotation token.
1497  ///
1498  /// Like AnnotateCachedTokens(), this routine replaces an
1499  /// already-parsed (and resolved) token with an annotation
1500  /// token. However, this routine only replaces the last token with
1501  /// the annotation token; it does not affect any other cached
1502  /// tokens. This function has no effect if backtracking is not
1503  /// enabled.
1505  assert(Tok.isAnnotation() && "Expected annotation token");
1506  if (CachedLexPos != 0 && isBacktrackEnabled())
1507  CachedTokens[CachedLexPos-1] = Tok;
1508  }
1509 
1510  /// Enter an annotation token into the token stream.
1511  void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1512  void *AnnotationVal);
1513 
1514  /// Update the current token to represent the provided
1515  /// identifier, in order to cache an action performed by typo correction.
1516  void TypoCorrectToken(const Token &Tok) {
1517  assert(Tok.getIdentifierInfo() && "Expected identifier token");
1518  if (CachedLexPos != 0 && isBacktrackEnabled())
1519  CachedTokens[CachedLexPos-1] = Tok;
1520  }
1521 
1522  /// Recompute the current lexer kind based on the CurLexer/
1523  /// CurTokenLexer pointers.
1524  void recomputeCurLexerKind();
1525 
1526  /// Returns true if incremental processing is enabled
1527  bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1528 
1529  /// Enables the incremental processing
1530  void enableIncrementalProcessing(bool value = true) {
1531  IncrementalProcessing = value;
1532  }
1533 
1534  /// Specify the point at which code-completion will be performed.
1535  ///
1536  /// \param File the file in which code completion should occur. If
1537  /// this file is included multiple times, code-completion will
1538  /// perform completion the first time it is included. If NULL, this
1539  /// function clears out the code-completion point.
1540  ///
1541  /// \param Line the line at which code completion should occur
1542  /// (1-based).
1543  ///
1544  /// \param Column the column at which code completion should occur
1545  /// (1-based).
1546  ///
1547  /// \returns true if an error occurred, false otherwise.
1548  bool SetCodeCompletionPoint(const FileEntry *File,
1549  unsigned Line, unsigned Column);
1550 
1551  /// Determine if we are performing code completion.
1552  bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1553 
1554  /// Returns the location of the code-completion point.
1555  ///
1556  /// Returns an invalid location if code-completion is not enabled or the file
1557  /// containing the code-completion point has not been lexed yet.
1558  SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1559 
1560  /// Returns the start location of the file of code-completion point.
1561  ///
1562  /// Returns an invalid location if code-completion is not enabled or the file
1563  /// containing the code-completion point has not been lexed yet.
1565  return CodeCompletionFileLoc;
1566  }
1567 
1568  /// Returns true if code-completion is enabled and we have hit the
1569  /// code-completion point.
1570  bool isCodeCompletionReached() const { return CodeCompletionReached; }
1571 
1572  /// Note that we hit the code-completion point.
1574  assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1575  CodeCompletionReached = true;
1576  // Silence any diagnostics that occur after we hit the code-completion.
1577  getDiagnostics().setSuppressAllDiagnostics(true);
1578  }
1579 
1580  /// The location of the currently-active \#pragma clang
1581  /// arc_cf_code_audited begin.
1582  ///
1583  /// Returns an invalid location if there is no such pragma active.
1585  return PragmaARCCFCodeAuditedLoc;
1586  }
1587 
1588  /// Set the location of the currently-active \#pragma clang
1589  /// arc_cf_code_audited begin. An invalid location ends the pragma.
1591  PragmaARCCFCodeAuditedLoc = Loc;
1592  }
1593 
1594  /// The location of the currently-active \#pragma clang
1595  /// assume_nonnull begin.
1596  ///
1597  /// Returns an invalid location if there is no such pragma active.
1599  return PragmaAssumeNonNullLoc;
1600  }
1601 
1602  /// Set the location of the currently-active \#pragma clang
1603  /// assume_nonnull begin. An invalid location ends the pragma.
1605  PragmaAssumeNonNullLoc = Loc;
1606  }
1607 
1608  /// Set the directory in which the main file should be considered
1609  /// to have been found, if it is not a real file.
1610  void setMainFileDir(const DirectoryEntry *Dir) {
1611  MainFileDir = Dir;
1612  }
1613 
1614  /// Instruct the preprocessor to skip part of the main source file.
1615  ///
1616  /// \param Bytes The number of bytes in the preamble to skip.
1617  ///
1618  /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1619  /// start of a line.
1620  void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1621  SkipMainFilePreamble.first = Bytes;
1622  SkipMainFilePreamble.second = StartOfLine;
1623  }
1624 
1625  /// Forwarding function for diagnostics. This emits a diagnostic at
1626  /// the specified Token's location, translating the token's start
1627  /// position in the current buffer into a SourcePosition object for rendering.
1628  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1629  return Diags->Report(Loc, DiagID);
1630  }
1631 
1632  DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1633  return Diags->Report(Tok.getLocation(), DiagID);
1634  }
1635 
1636  /// Return the 'spelling' of the token at the given
1637  /// location; does not go up to the spelling location or down to the
1638  /// expansion location.
1639  ///
1640  /// \param buffer A buffer which will be used only if the token requires
1641  /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1642  /// \param invalid If non-null, will be set \c true if an error occurs.
1644  SmallVectorImpl<char> &buffer,
1645  bool *invalid = nullptr) const {
1646  return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1647  }
1648 
1649  /// Return the 'spelling' of the Tok token.
1650  ///
1651  /// The spelling of a token is the characters used to represent the token in
1652  /// the source file after trigraph expansion and escaped-newline folding. In
1653  /// particular, this wants to get the true, uncanonicalized, spelling of
1654  /// things like digraphs, UCNs, etc.
1655  ///
1656  /// \param Invalid If non-null, will be set \c true if an error occurs.
1657  std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1658  return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1659  }
1660 
1661  /// Get the spelling of a token into a preallocated buffer, instead
1662  /// of as an std::string.
1663  ///
1664  /// The caller is required to allocate enough space for the token, which is
1665  /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1666  /// actual result is returned.
1667  ///
1668  /// Note that this method may do two possible things: it may either fill in
1669  /// the buffer specified with characters, or it may *change the input pointer*
1670  /// to point to a constant buffer with the data already in it (avoiding a
1671  /// copy). The caller is not allowed to modify the returned buffer pointer
1672  /// if an internal buffer is returned.
1673  unsigned getSpelling(const Token &Tok, const char *&Buffer,
1674  bool *Invalid = nullptr) const {
1675  return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1676  }
1677 
1678  /// Get the spelling of a token into a SmallVector.
1679  ///
1680  /// Note that the returned StringRef may not point to the
1681  /// supplied buffer if a copy can be avoided.
1682  StringRef getSpelling(const Token &Tok,
1683  SmallVectorImpl<char> &Buffer,
1684  bool *Invalid = nullptr) const;
1685 
1686  /// Relex the token at the specified location.
1687  /// \returns true if there was a failure, false on success.
1688  bool getRawToken(SourceLocation Loc, Token &Result,
1689  bool IgnoreWhiteSpace = false) {
1690  return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1691  }
1692 
1693  /// Given a Token \p Tok that is a numeric constant with length 1,
1694  /// return the character.
1695  char
1697  bool *Invalid = nullptr) const {
1698  assert(Tok.is(tok::numeric_constant) &&
1699  Tok.getLength() == 1 && "Called on unsupported token");
1700  assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1701 
1702  // If the token is carrying a literal data pointer, just use it.
1703  if (const char *D = Tok.getLiteralData())
1704  return *D;
1705 
1706  // Otherwise, fall back on getCharacterData, which is slower, but always
1707  // works.
1708  return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1709  }
1710 
1711  /// Retrieve the name of the immediate macro expansion.
1712  ///
1713  /// This routine starts from a source location, and finds the name of the
1714  /// macro responsible for its immediate expansion. It looks through any
1715  /// intervening macro argument expansions to compute this. It returns a
1716  /// StringRef that refers to the SourceManager-owned buffer of the source
1717  /// where that macro name is spelled. Thus, the result shouldn't out-live
1718  /// the SourceManager.
1720  return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1721  }
1722 
1723  /// Plop the specified string into a scratch buffer and set the
1724  /// specified token's location and length to it.
1725  ///
1726  /// If specified, the source location provides a location of the expansion
1727  /// point of the token.
1728  void CreateString(StringRef Str, Token &Tok,
1729  SourceLocation ExpansionLocStart = SourceLocation(),
1730  SourceLocation ExpansionLocEnd = SourceLocation());
1731 
1732  /// Split the first Length characters out of the token starting at TokLoc
1733  /// and return a location pointing to the split token. Re-lexing from the
1734  /// split token will return the split token rather than the original.
1735  SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1736 
1737  /// Computes the source location just past the end of the
1738  /// token at this source location.
1739  ///
1740  /// This routine can be used to produce a source location that
1741  /// points just past the end of the token referenced by \p Loc, and
1742  /// is generally used when a diagnostic needs to point just after a
1743  /// token where it expected something different that it received. If
1744  /// the returned source location would not be meaningful (e.g., if
1745  /// it points into a macro), this routine returns an invalid
1746  /// source location.
1747  ///
1748  /// \param Offset an offset from the end of the token, where the source
1749  /// location should refer to. The default offset (0) produces a source
1750  /// location pointing just past the end of the token; an offset of 1 produces
1751  /// a source location pointing to the last character in the token, etc.
1753  return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1754  }
1755 
1756  /// Returns true if the given MacroID location points at the first
1757  /// token of the macro expansion.
1758  ///
1759  /// \param MacroBegin If non-null and function returns true, it is set to
1760  /// begin location of the macro.
1762  SourceLocation *MacroBegin = nullptr) const {
1763  return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1764  MacroBegin);
1765  }
1766 
1767  /// Returns true if the given MacroID location points at the last
1768  /// token of the macro expansion.
1769  ///
1770  /// \param MacroEnd If non-null and function returns true, it is set to
1771  /// end location of the macro.
1773  SourceLocation *MacroEnd = nullptr) const {
1774  return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1775  }
1776 
1777  /// Print the token to stderr, used for debugging.
1778  void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1779  void DumpLocation(SourceLocation Loc) const;
1780  void DumpMacro(const MacroInfo &MI) const;
1781  void dumpMacroInfo(const IdentifierInfo *II);
1782 
1783  /// Given a location that specifies the start of a
1784  /// token, return a new location that specifies a character within the token.
1786  unsigned Char) const {
1787  return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1788  }
1789 
1790  /// Increment the counters for the number of token paste operations
1791  /// performed.
1792  ///
1793  /// If fast was specified, this is a 'fast paste' case we handled.
1794  void IncrementPasteCounter(bool isFast) {
1795  if (isFast)
1796  ++NumFastTokenPaste;
1797  else
1798  ++NumTokenPaste;
1799  }
1800 
1801  void PrintStats();
1802 
1803  size_t getTotalMemory() const;
1804 
1805  /// When the macro expander pastes together a comment (/##/) in Microsoft
1806  /// mode, this method handles updating the current state, returning the
1807  /// token on the next source line.
1808  void HandleMicrosoftCommentPaste(Token &Tok);
1809 
1810  //===--------------------------------------------------------------------===//
1811  // Preprocessor callback methods. These are invoked by a lexer as various
1812  // directives and events are found.
1813 
1814  /// Given a tok::raw_identifier token, look up the
1815  /// identifier information for the token and install it into the token,
1816  /// updating the token kind accordingly.
1817  IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1818 
1819 private:
1820  llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1821 
1822 public:
1823  /// Specifies the reason for poisoning an identifier.
1824  ///
1825  /// If that identifier is accessed while poisoned, then this reason will be
1826  /// used instead of the default "poisoned" diagnostic.
1827  void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1828 
1829  /// Display reason for poisoned identifier.
1830  void HandlePoisonedIdentifier(Token & Identifier);
1831 
1833  if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1834  if(II->isPoisoned()) {
1835  HandlePoisonedIdentifier(Identifier);
1836  }
1837  }
1838  }
1839 
1840 private:
1841  /// Identifiers used for SEH handling in Borland. These are only
1842  /// allowed in particular circumstances
1843  // __except block
1844  IdentifierInfo *Ident__exception_code,
1845  *Ident___exception_code,
1846  *Ident_GetExceptionCode;
1847  // __except filter expression
1848  IdentifierInfo *Ident__exception_info,
1849  *Ident___exception_info,
1850  *Ident_GetExceptionInfo;
1851  // __finally
1852  IdentifierInfo *Ident__abnormal_termination,
1853  *Ident___abnormal_termination,
1854  *Ident_AbnormalTermination;
1855 
1856  const char *getCurLexerEndPos();
1857  void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1858 
1859 public:
1860  void PoisonSEHIdentifiers(bool Poison = true); // Borland
1861 
1862  /// Callback invoked when the lexer reads an identifier and has
1863  /// filled in the tokens IdentifierInfo member.
1864  ///
1865  /// This callback potentially macro expands it or turns it into a named
1866  /// token (like 'for').
1867  ///
1868  /// \returns true if we actually computed a token, false if we need to
1869  /// lex again.
1870  bool HandleIdentifier(Token &Identifier);
1871 
1872  /// Callback invoked when the lexer hits the end of the current file.
1873  ///
1874  /// This either returns the EOF token and returns true, or
1875  /// pops a level off the include stack and returns false, at which point the
1876  /// client should call lex again.
1877  bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1878 
1879  /// Callback invoked when the current TokenLexer hits the end of its
1880  /// token stream.
1881  bool HandleEndOfTokenLexer(Token &Result);
1882 
1883  /// Callback invoked when the lexer sees a # token at the start of a
1884  /// line.
1885  ///
1886  /// This consumes the directive, modifies the lexer/preprocessor state, and
1887  /// advances the lexer(s) so that the next token read is the correct one.
1888  void HandleDirective(Token &Result);
1889 
1890  /// Ensure that the next token is a tok::eod token.
1891  ///
1892  /// If not, emit a diagnostic and consume up until the eod.
1893  /// If \p EnableMacros is true, then we consider macros that expand to zero
1894  /// tokens as being ok.
1895  ///
1896  /// \return The location of the end of the directive (the terminating
1897  /// newline).
1898  SourceLocation CheckEndOfDirective(const char *DirType,
1899  bool EnableMacros = false);
1900 
1901  /// Read and discard all tokens remaining on the current line until
1902  /// the tok::eod token is found. Returns the range of the skipped tokens.
1903  SourceRange DiscardUntilEndOfDirective();
1904 
1905  /// Returns true if the preprocessor has seen a use of
1906  /// __DATE__ or __TIME__ in the file so far.
1907  bool SawDateOrTime() const {
1908  return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1909  }
1910  unsigned getCounterValue() const { return CounterValue; }
1911  void setCounterValue(unsigned V) { CounterValue = V; }
1912 
1913  /// Retrieves the module that we're currently building, if any.
1914  Module *getCurrentModule();
1915 
1916  /// Allocate a new MacroInfo object with the provided SourceLocation.
1917  MacroInfo *AllocateMacroInfo(SourceLocation L);
1918 
1919  /// Turn the specified lexer token into a fully checked and spelled
1920  /// filename, e.g. as an operand of \#include.
1921  ///
1922  /// The caller is expected to provide a buffer that is large enough to hold
1923  /// the spelling of the filename, but is also expected to handle the case
1924  /// when this method decides to use a different buffer.
1925  ///
1926  /// \returns true if the input filename was in <>'s or false if it was
1927  /// in ""'s.
1928  bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
1929 
1930  /// Given a "foo" or <foo> reference, look up the indicated file.
1931  ///
1932  /// Returns null on failure. \p isAngled indicates whether the file
1933  /// reference is for system \#include's or not (i.e. using <> instead of "").
1934  const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1935  bool isAngled, const DirectoryLookup *FromDir,
1936  const FileEntry *FromFile,
1937  const DirectoryLookup *&CurDir,
1938  SmallVectorImpl<char> *SearchPath,
1939  SmallVectorImpl<char> *RelativePath,
1940  ModuleMap::KnownHeader *SuggestedModule,
1941  bool *IsMapped, bool *IsFrameworkFound,
1942  bool SkipCache = false);
1943 
1944  /// Get the DirectoryLookup structure used to find the current
1945  /// FileEntry, if CurLexer is non-null and if applicable.
1946  ///
1947  /// This allows us to implement \#include_next and find directory-specific
1948  /// properties.
1949  const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1950 
1951  /// Return true if we're in the top-level file, not in a \#include.
1952  bool isInPrimaryFile() const;
1953 
1954  /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1955  /// followed by EOD. Return true if the token is not a valid on-off-switch.
1956  bool LexOnOffSwitch(tok::OnOffSwitch &Result);
1957 
1958  bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
1959  bool *ShadowFlag = nullptr);
1960 
1961  void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
1962  Module *LeaveSubmodule(bool ForPragma);
1963 
1964 private:
1965  friend void TokenLexer::ExpandFunctionArguments();
1966 
1967  void PushIncludeMacroStack() {
1968  assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
1969  IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
1970  std::move(CurLexer), CurPPLexer,
1971  std::move(CurTokenLexer), CurDirLookup);
1972  CurPPLexer = nullptr;
1973  }
1974 
1975  void PopIncludeMacroStack() {
1976  CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1977  CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1978  CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1979  CurDirLookup = IncludeMacroStack.back().TheDirLookup;
1980  CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
1981  CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1982  IncludeMacroStack.pop_back();
1983  }
1984 
1985  void PropagateLineStartLeadingSpaceInfo(Token &Result);
1986 
1987  /// Determine whether we need to create module macros for #defines in the
1988  /// current context.
1989  bool needModuleMacros() const;
1990 
1991  /// Update the set of active module macros and ambiguity flag for a module
1992  /// macro name.
1993  void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
1994 
1995  DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1996  SourceLocation Loc);
1997  UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1998  VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1999  bool isPublic);
2000 
2001  /// Lex and validate a macro name, which occurs after a
2002  /// \#define or \#undef.
2003  ///
2004  /// \param MacroNameTok Token that represents the name defined or undefined.
2005  /// \param IsDefineUndef Kind if preprocessor directive.
2006  /// \param ShadowFlag Points to flag that is set if macro name shadows
2007  /// a keyword.
2008  ///
2009  /// This emits a diagnostic, sets the token kind to eod,
2010  /// and discards the rest of the macro line if the macro name is invalid.
2011  void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2012  bool *ShadowFlag = nullptr);
2013 
2014  /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2015  /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2016  /// doing so performs certain validity checks including (but not limited to):
2017  /// - # (stringization) is followed by a macro parameter
2018  /// \param MacroNameTok - Token that represents the macro name
2019  /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2020  ///
2021  /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2022  /// returns a nullptr if an invalid sequence of tokens is encountered.
2023  MacroInfo *ReadOptionalMacroParameterListAndBody(
2024  const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2025 
2026  /// The ( starting an argument list of a macro definition has just been read.
2027  /// Lex the rest of the parameters and the closing ), updating \p MI with
2028  /// what we learn and saving in \p LastTok the last token read.
2029  /// Return true if an error occurs parsing the arg list.
2030  bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2031 
2032  /// We just read a \#if or related directive and decided that the
2033  /// subsequent tokens are in the \#if'd out portion of the
2034  /// file. Lex the rest of the file, until we see an \#endif. If \p
2035  /// FoundNonSkipPortion is true, then we have already emitted code for part of
2036  /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2037  /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2038  /// already seen one so a \#else directive is a duplicate. When this returns,
2039  /// the caller can lex the first valid token.
2040  void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2041  SourceLocation IfTokenLoc,
2042  bool FoundNonSkipPortion, bool FoundElse,
2043  SourceLocation ElseLoc = SourceLocation());
2044 
2045  /// Information about the result for evaluating an expression for a
2046  /// preprocessor directive.
2047  struct DirectiveEvalResult {
2048  /// Whether the expression was evaluated as true or not.
2049  bool Conditional;
2050 
2051  /// True if the expression contained identifiers that were undefined.
2052  bool IncludedUndefinedIds;
2053 
2054  /// The source range for the expression.
2055  SourceRange ExprRange;
2056  };
2057 
2058  /// Evaluate an integer constant expression that may occur after a
2059  /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2060  ///
2061  /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2062  DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2063 
2064  /// Install the standard preprocessor pragmas:
2065  /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2066  void RegisterBuiltinPragmas();
2067 
2068  /// Register builtin macros such as __LINE__ with the identifier table.
2069  void RegisterBuiltinMacros();
2070 
2071  /// If an identifier token is read that is to be expanded as a macro, handle
2072  /// it and return the next token as 'Tok'. If we lexed a token, return true;
2073  /// otherwise the caller should lex again.
2074  bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2075 
2076  /// Cache macro expanded tokens for TokenLexers.
2077  //
2078  /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2079  /// going to lex in the cache and when it finishes the tokens are removed
2080  /// from the end of the cache.
2081  Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2082  ArrayRef<Token> tokens);
2083 
2084  void removeCachedMacroExpandedTokensOfLastLexer();
2085 
2086  /// Determine whether the next preprocessor token to be
2087  /// lexed is a '('. If so, consume the token and return true, if not, this
2088  /// method should have no observable side-effect on the lexed tokens.
2089  bool isNextPPTokenLParen();
2090 
2091  /// After reading "MACRO(", this method is invoked to read all of the formal
2092  /// arguments specified for the macro invocation. Returns null on error.
2093  MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2094  SourceLocation &MacroEnd);
2095 
2096  /// If an identifier token is read that is to be expanded
2097  /// as a builtin macro, handle it and return the next token as 'Tok'.
2098  void ExpandBuiltinMacro(Token &Tok);
2099 
2100  /// Read a \c _Pragma directive, slice it up, process it, then
2101  /// return the first token after the directive.
2102  /// This assumes that the \c _Pragma token has just been read into \p Tok.
2103  void Handle_Pragma(Token &Tok);
2104 
2105  /// Like Handle_Pragma except the pragma text is not enclosed within
2106  /// a string literal.
2107  void HandleMicrosoft__pragma(Token &Tok);
2108 
2109  /// Add a lexer to the top of the include stack and
2110  /// start lexing tokens from it instead of the current buffer.
2111  void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2112 
2113  /// Set the FileID for the preprocessor predefines.
2114  void setPredefinesFileID(FileID FID) {
2115  assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2116  PredefinesFileID = FID;
2117  }
2118 
2119  /// Set the FileID for the PCH through header.
2120  void setPCHThroughHeaderFileID(FileID FID);
2121 
2122  /// Returns true if we are lexing from a file and not a
2123  /// pragma or a macro.
2124  static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2125  return L ? !L->isPragmaLexer() : P != nullptr;
2126  }
2127 
2128  static bool IsFileLexer(const IncludeStackInfo& I) {
2129  return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2130  }
2131 
2132  bool IsFileLexer() const {
2133  return IsFileLexer(CurLexer.get(), CurPPLexer);
2134  }
2135 
2136  //===--------------------------------------------------------------------===//
2137  // Caching stuff.
2138  void CachingLex(Token &Result, bool &IsNewToken);
2139 
2140  bool InCachingLexMode() const {
2141  // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2142  // that we are past EOF, not that we are in CachingLex mode.
2143  return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2144  }
2145 
2146  void EnterCachingLexMode();
2147  void EnterCachingLexModeUnchecked();
2148 
2149  void ExitCachingLexMode() {
2150  if (InCachingLexMode())
2151  RemoveTopOfLexerStack();
2152  }
2153 
2154  const Token &PeekAhead(unsigned N);
2155  void AnnotatePreviousCachedTokens(const Token &Tok);
2156 
2157  //===--------------------------------------------------------------------===//
2158  /// Handle*Directive - implement the various preprocessor directives. These
2159  /// should side-effect the current preprocessor object so that the next call
2160  /// to Lex() will return the appropriate token next.
2161  void HandleLineDirective();
2162  void HandleDigitDirective(Token &Tok);
2163  void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2164  void HandleIdentSCCSDirective(Token &Tok);
2165  void HandleMacroPublicDirective(Token &Tok);
2166  void HandleMacroPrivateDirective();
2167 
2168  /// An additional notification that can be produced by a header inclusion or
2169  /// import to tell the parser what happened.
2170  struct ImportAction {
2171  enum ActionKind {
2172  None,
2173  ModuleBegin,
2174  ModuleImport,
2175  SkippedModuleImport,
2176  } Kind;
2177  Module *ModuleForHeader = nullptr;
2178 
2179  ImportAction(ActionKind AK, Module *Mod = nullptr)
2180  : Kind(AK), ModuleForHeader(Mod) {
2181  assert((AK == None || Mod) && "no module for module action");
2182  }
2183  };
2184 
2185  // File inclusion.
2186  void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2187  const DirectoryLookup *LookupFrom = nullptr,
2188  const FileEntry *LookupFromFile = nullptr);
2189  ImportAction
2190  HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2191  Token &FilenameTok, SourceLocation EndLoc,
2192  const DirectoryLookup *LookupFrom = nullptr,
2193  const FileEntry *LookupFromFile = nullptr);
2194  void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2195  void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2196  void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2197  void HandleMicrosoftImportDirective(Token &Tok);
2198 
2199 public:
2200  /// Check that the given module is available, producing a diagnostic if not.
2201  /// \return \c true if the check failed (because the module is not available).
2202  /// \c false if the module appears to be usable.
2203  static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2204  const TargetInfo &TargetInfo,
2205  DiagnosticsEngine &Diags, Module *M);
2206 
2207  // Module inclusion testing.
2208  /// Find the module that owns the source or header file that
2209  /// \p Loc points to. If the location is in a file that was included
2210  /// into a module, or is outside any module, returns nullptr.
2211  Module *getModuleForLocation(SourceLocation Loc);
2212 
2213  /// We want to produce a diagnostic at location IncLoc concerning a
2214  /// missing module import.
2215  ///
2216  /// \param IncLoc The location at which the missing import was detected.
2217  /// \param M The desired module.
2218  /// \param MLoc A location within the desired module at which some desired
2219  /// effect occurred (eg, where a desired entity was declared).
2220  ///
2221  /// \return A file that can be #included to import a module containing MLoc.
2222  /// Null if no such file could be determined or if a #include is not
2223  /// appropriate.
2224  const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2225  Module *M,
2226  SourceLocation MLoc);
2227 
2228  bool isRecordingPreamble() const {
2229  return PreambleConditionalStack.isRecording();
2230  }
2231 
2232  bool hasRecordedPreamble() const {
2233  return PreambleConditionalStack.hasRecordedPreamble();
2234  }
2235 
2237  return PreambleConditionalStack.getStack();
2238  }
2239 
2241  PreambleConditionalStack.setStack(s);
2242  }
2243 
2246  PreambleConditionalStack.startReplaying();
2247  PreambleConditionalStack.setStack(s);
2248  PreambleConditionalStack.SkipInfo = SkipInfo;
2249  }
2250 
2252  return PreambleConditionalStack.SkipInfo;
2253  }
2254 
2255 private:
2256  /// After processing predefined file, initialize the conditional stack from
2257  /// the preamble.
2258  void replayPreambleConditionalStack();
2259 
2260  // Macro handling.
2261  void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2262  void HandleUndefDirective();
2263 
2264  // Conditional Inclusion.
2265  void HandleIfdefDirective(Token &Result, const Token &HashToken,
2266  bool isIfndef, bool ReadAnyTokensBeforeDirective);
2267  void HandleIfDirective(Token &IfToken, const Token &HashToken,
2268  bool ReadAnyTokensBeforeDirective);
2269  void HandleEndifDirective(Token &EndifToken);
2270  void HandleElseDirective(Token &Result, const Token &HashToken);
2271  void HandleElifDirective(Token &ElifToken, const Token &HashToken);
2272 
2273  // Pragmas.
2274  void HandlePragmaDirective(SourceLocation IntroducerLoc,
2275  PragmaIntroducerKind Introducer);
2276 
2277 public:
2278  void HandlePragmaOnce(Token &OnceTok);
2279  void HandlePragmaMark();
2280  void HandlePragmaPoison();
2281  void HandlePragmaSystemHeader(Token &SysHeaderTok);
2282  void HandlePragmaDependency(Token &DependencyTok);
2283  void HandlePragmaPushMacro(Token &Tok);
2284  void HandlePragmaPopMacro(Token &Tok);
2285  void HandlePragmaIncludeAlias(Token &Tok);
2286  void HandlePragmaModuleBuild(Token &Tok);
2287  void HandlePragmaHdrstop(Token &Tok);
2288  IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2289 
2290  // Return true and store the first token only if any CommentHandler
2291  // has inserted some tokens and getCommentRetentionState() is false.
2292  bool HandleComment(Token &result, SourceRange Comment);
2293 
2294  /// A macro is used, update information about macros that need unused
2295  /// warnings.
2296  void markMacroAsUsed(MacroInfo *MI);
2297 };
2298 
2299 /// Abstract base class that describes a handler that will receive
2300 /// source ranges for each of the comments encountered in the source file.
2302 public:
2303  virtual ~CommentHandler();
2304 
2305  // The handler shall return true if it has pushed any tokens
2306  // to be read using e.g. EnterToken or EnterTokenStream.
2307  virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2308 };
2309 
2310 /// Registry of pragma handlers added by plugins
2311 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2312 
2313 } // namespace clang
2314 
2315 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, bool FoundNonSkipPortion, bool FoundElse, SourceLocation ElseLoc)
Definition: Preprocessor.h:420
A set of visible modules.
Definition: Module.h:595
StringRef Identifier
Definition: Format.cpp:1714
llvm::BumpPtrAllocator & getPreprocessorAllocator()
Definition: Preprocessor.h:911
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
Definition: Lexer.h:76
SelectorTable & getSelectorTable()
Definition: Preprocessor.h:909
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
Definition: Builtins.h:67
Implements support for file system lookup, file system caching, and directory search management...
Definition: FileManager.h:116
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
std::string getSpelling(const Token &Tok, bool *Invalid=nullptr) const
Return the &#39;spelling&#39; of the Tok token.
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:29
const Token & LookAhead(unsigned N)
Peeks ahead N tokens and returns that token without consuming any tokens.
void MaybeHandlePoisonedIdentifier(Token &Identifier)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:94
Defines the SourceManager interface.
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:20
bool LexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Lex a string literal, which may be the concatenation of multiple string literals and may even come fr...
FileManager & getFileManager() const
Definition: Preprocessor.h:903
Defines the clang::Module class, which describes a module in the source code.
bool isCodeCompletionReached() const
Returns true if code-completion is enabled and we have hit the code-completion point.
SourceLocation getPragmaARCCFCodeAuditedLoc() const
The location of the currently-active #pragma clang arc_cf_code_audited begin.
StringRef P
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
Defines the clang::MacroInfo and clang::MacroDirective classes.
A description of the current definition of a macro.
Definition: MacroInfo.h:564
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1296
A directive for an undefined macro.
Definition: MacroInfo.h:429
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool hadModuleLoaderFatalFailure() const
Definition: Preprocessor.h:924
void EnterToken(const Token &Tok)
Enters a token in the token stream to be lexed next.
bool getRawToken(SourceLocation Loc, Token &Result, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
bool operator==(const Token &Tok) const
Definition: Preprocessor.h:99
Builtin::Context & getBuiltinInfo()
Definition: Preprocessor.h:910
void setPredefines(const char *P)
Set the predefines for this Preprocessor.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
char getSpellingOfSingleCharacterNumericConstant(const Token &Tok, bool *Invalid=nullptr) const
Given a Token Tok that is a numeric constant with length 1, return the character. ...
This interface provides a way to observe the actions of the preprocessor as it does its thing...
Definition: PPCallbacks.h:35
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:117
const MacroDirective * getPrevious() const
Get previous definition of the macro with the same name.
Definition: MacroInfo.h:328
tok::TokenKind getKind() const
Definition: Token.h:89
TokenValue(tok::TokenKind Kind)
Definition: Preprocessor.h:89
SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset=0)
Computes the source location just past the end of the token at this source location.
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, SourceLocation Loc)
One of these records is kept for each identifier that is lexed.
ArrayRef< PPConditionalInfo > getPreambleConditionalStack() const
Represents a macro directive exported by a module.
Definition: MacroInfo.h:488
A directive for a defined macro or a macro imported from a module.
Definition: MacroInfo.h:406
This table allows us to fully hide how we implement multi-keyword caching.
LineState State
Definition: Format.h:2222
const TargetInfo & getTargetInfo() const
Definition: Preprocessor.h:901
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
Definition: Preprocessor.h:934
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:900
Describes a module or submodule.
Definition: Module.h:64
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
Definition: Lexer.h:183
A directive for setting the module visibility of a macro.
Definition: MacroInfo.h:444
bool isInvalid() const
void SetSuppressIncludeNotFoundError(bool Suppress)
Definition: Preprocessor.h:944
SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Char) const
Given a location that specifies the start of a token, return a new location that specifies a characte...
MacroUse
Context in which macro name is used.
Definition: Preprocessor.h:106
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
Module * getCurrentLexerSubmodule() const
Return the submodule owning the file being lexed.
Definition: Preprocessor.h:981
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the &#39;spelling&#39; of the token at the given location; does not go up to the spelling location or ...
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
HeaderSearch & getHeaderSearchInfo() const
Definition: Preprocessor.h:905
const FormatToken & Tok
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
void LexNonComment(Token &Result)
Lex a token.
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:148
bool isMacroDefined(const IdentifierInfo *II)
bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M)
Determine whether II is defined as a macro within the module M, if that is a module that we&#39;ve alread...
Defines the Diagnostic-related interfaces.
const TargetInfo * getAuxTargetInfo() const
Definition: Preprocessor.h:902
TokenLexer - This implements a lexer that returns tokens from a macro body or token stream instead of...
Definition: TokenLexer.h:30
void setCodeCompletionHandler(CodeCompletionHandler &Handler)
Set the code completion handler to the given object.
void setPreprocessedOutput(bool IsPreprocessedOutput)
Sets whether the preprocessor is responsible for producing output or if it is producing tokens to be ...
Definition: Preprocessor.h:954
PragmaIntroducerKind
Describes how the pragma was introduced, e.g., with #pragma, _Pragma, or __pragma.
Definition: Pragma.h:31
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
Encapsulates the information needed to find the file referenced by a #include or #include_next, (sub-)framework lookup, etc.
Definition: HeaderSearch.h:158
A little helper class used to produce diagnostics.
Definition: Diagnostic.h:1044
void clearCodeCompletionHandler()
Clear out the code completion handler.
StringRef Filename
Definition: Format.cpp:1707
Provides lookups to, and iteration over, IdentiferInfo objects.
unsigned Offset
Definition: Format.cpp:1709
Exposes information about the current target.
Definition: TargetInfo.h:161
const IdentifierTable & getIdentifierTable() const
Definition: Preprocessor.h:908
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
Abstract interface for external sources of preprocessor information.
Defines the clang::LangOptions interface.
unsigned getCounterValue() const
SourceLocation End
const DefMacroDirective * getDirective() const
Definition: MacroInfo.h:349
int Id
Definition: ASTDiff.cpp:190
void AnnotateCachedTokens(const Token &Tok)
We notify the Preprocessor that if it is caching tokens (because backtrack is enabled) it should repl...
const AnnotatedLine * Line
bool isDefined() const
Definition: MacroInfo.h:384
void SetMacroExpansionOnlyInDirectives()
Disables macro expansion everywhere except for preprocessor directives.
bool isCurrentLexer(const PreprocessorLexer *L) const
Return true if we are lexing directly from the specified lexer.
Definition: Preprocessor.h:963
Implements an efficient mapping from strings to IdentifierInfo nodes.
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:29
void RevertCachedTokens(unsigned N)
When backtracking is enabled and tokens are cached, this allows to revert a specific number of tokens...
A class for tracking whether we&#39;re inside a VA_OPT during a traversal of the tokens of a variadic mac...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:123
#define bool
Definition: stdbool.h:15
DefInfo getDefinition()
Traverses the macro directives history and returns the next macro definition directive along with inf...
Definition: MacroInfo.cpp:178
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
Stores token information for comparing actual tokens with predefined values.
Definition: Preprocessor.h:84
llvm::Optional< PreambleSkipInfo > getPreambleSkipInfo() const
DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const
void setPragmaAssumeNonNullLoc(SourceLocation Loc)
Set the location of the currently-active #pragma clang assume_nonnull begin.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
bool getCommentRetentionState() const
Definition: Preprocessor.h:939
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
TokenValue(IdentifierInfo *II)
Definition: Preprocessor.h:97
PPCallbacks * getPPCallbacks() const
Definition: Preprocessor.h:991
const SourceManager & SM
Definition: Format.cpp:1568
DirectoryLookup - This class represents one entry in the search list that specifies the search order ...
bool isRecordingPreamble() const
const DirectoryLookup * GetCurDirLookup()
Get the DirectoryLookup structure used to find the current FileEntry, if CurLexer is non-null and if ...
SourceManager & getSourceManager() const
Definition: Preprocessor.h:904
MacroDirective * getLocalMacroDirective(const IdentifierInfo *II) const
Given an identifier, return its latest non-imported MacroDirective if it is #define&#39;d and not #undef&#39;...
const DefInfo findDirectiveAtLoc(SourceLocation L, const SourceManager &SM) const
Find macro definition active in the specified source location.
Definition: MacroInfo.cpp:202
ExternalPreprocessorSource * getExternalSource() const
Definition: Preprocessor.h:917
llvm::cl::opt< std::string > Filter
Encapsulates changes to the "macros namespace" (the location where the macro name became active...
Definition: MacroInfo.h:290
Kind
void TypoCorrectToken(const Token &Tok)
Update the current token to represent the provided identifier, in order to cache an action performed ...
void setExternalSource(ExternalPreprocessorSource *Source)
Definition: Preprocessor.h:913
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.h:93
void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc)
Set the location of the currently-active #pragma clang arc_cf_code_audited begin. ...
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176
IdentifierTable & getIdentifierTable()
Definition: Preprocessor.h:907
void setPragmasEnabled(bool Enabled)
Definition: Preprocessor.h:941
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
CodeCompletionHandler * getCodeCompletionHandler() const
Retrieve the current code-completion handler.
bool isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the first token of the macro expansion...
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:85
bool isPoisoned() const
Return true if this token has been poisoned.
bool SawDateOrTime() const
Returns true if the preprocessor has seen a use of DATE or TIME in the file so far.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:24
void enableIncrementalProcessing(bool value=true)
Enables the incremental processing.
MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, SourceLocation Loc)
PreprocessorLexer * getCurrentLexer() const
Return the current lexer being lexed from.
Definition: Preprocessor.h:971
bool isAtEndOfMacroExpansion(SourceLocation loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the last token of the macro expansion.
void ReplaceLastTokenWithAnnotation(const Token &Tok)
Replace the last token with an annotation token.
SourceLocation getPragmaAssumeNonNullLoc() const
The location of the currently-active #pragma clang assume_nonnull begin.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
const MacroInfo * getMacroInfo() const
Definition: MacroInfo.h:390
StringRef getName() const
Return the actual identifier string.
bool isMacroDefined(StringRef Id)
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
bool hadMacroDefinition() const
Returns true if this identifier was #defined to some value at any moment.
Dataflow Directional Tag Classes.
void EnterTokenStream(ArrayRef< Token > Toks, bool DisableMacroExpansion)
SmallVector< Token, 4 > CachedTokens
A set of tokens that has been cached for later parsing.
Definition: DeclSpec.h:1141
void EnterTokenStream(std::unique_ptr< Token[]> Toks, unsigned NumToks, bool DisableMacroExpansion)
SourceRange getCodeCompletionTokenRange() const
PreprocessingRecord * getPreprocessingRecord() const
Retrieve the preprocessing record, or NULL if there is no preprocessing record.
Reads an AST files chain containing the contents of a translation unit.
Definition: ASTReader.h:354
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition: Pragma.h:58
llvm::iterator_range< macro_iterator > macros(bool IncludeExternalMacros=true) const
SourceLocation getLastCachedTokenLocation() const
Get the location of the last cached token, suitable for setting the end location of an annotation tok...
StringRef getCodeCompletionFilter()
Get the code completion token for filtering purposes.
Abstract interface for a module loader.
Definition: ModuleLoader.h:73
unsigned getLength() const
Definition: Token.h:126
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
OnOffSwitch
Defines the possible values of an on-off-switch (C99 6.10.6p2).
Definition: TokenKinds.h:47
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:214
bool getPragmasEnabled() const
Definition: Preprocessor.h:942
unsigned getSpelling(const Token &Tok, const char *&Buffer, bool *Invalid=nullptr) const
Get the spelling of a token into a preallocated buffer, instead of as an std::string.
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition: MacroInfo.h:123
ModuleLoader & getModuleLoader() const
Retrieve the module loader associated with this preprocessor.
Definition: Preprocessor.h:922
void LexUnexpandedNonComment(Token &Result)
Like LexNonComment, but this disables macro expansion of identifier tokens.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
Definition: Preprocessor.h:895
Cached information about one directory (either on disk or in the virtual file system).
Definition: FileManager.h:45
Defines the PPCallbacks interface.
Defines the clang::TokenKind enum and support functions.
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI)
SourceLocation getModuleImportLoc(Module *M) const
Defines the clang::SourceLocation class and associated facilities.
void RemovePragmaHandler(PragmaHandler *Handler)
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
Definition: Preprocessor.h:984
DiagnosticsEngine & getDiagnostics() const
Definition: Preprocessor.h:897
void setMainFileDir(const DirectoryEntry *Dir)
Set the directory in which the main file should be considered to have been found, if it is not a real...
static bool isMacroDefined(const Sema &S, SourceLocation Loc, StringRef Name)
TranslationUnitKind
Describes the kind of translation unit being processed.
Definition: LangOptions.h:361
bool hasRecordedPreamble() const
const std::string & getPredefines() const
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:282
void setPredefines(StringRef P)
ArrayRef< ModuleMacro * > getLeafModuleMacros(const IdentifierInfo *II) const
Get the list of leaf (non-overridden) module macros for a name.
bool isIncrementalProcessingEnabled() const
Returns true if incremental processing is enabled.
The translation unit is a complete translation unit.
Definition: LangOptions.h:363
StringRef getImmediateMacroName(SourceLocation Loc)
Retrieve the name of the immediate macro expansion.
MacroMap::const_iterator macro_iterator
Abstract base class that describes a handler that will receive source ranges for each of the comments...
void setCounterValue(unsigned V)
bool isParsingIfOrElifDirective() const
True if we are currently preprocessing a if or #elif directive.
Definition: Preprocessor.h:929
void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine)
Instruct the preprocessor to skip part of the main source file.
A trivial tuple used to represent a source range.
bool GetSuppressIncludeNotFoundError()
Definition: Preprocessor.h:948
void setReplayablePreambleConditionalStack(ArrayRef< PPConditionalInfo > s, llvm::Optional< PreambleSkipInfo > SkipInfo)
MacroInfo * getMacroInfo(const IdentifierInfo *II)
Callback handler that receives notifications when performing code completion within the preprocessor...
A header that is known to reside within a given module, whether it was included or excluded...
Definition: ModuleMap.h:150
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
Definition: Preprocessor.h:960
bool hasMacroDefinition() const
Return true if this identifier is #defined to some other value.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:992
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
Defines enum values for all the target-independent builtin functions.
An RAII class that tracks when the Preprocessor starts and stops lexing the definition of a (ISO C/C+...
void AddPragmaHandler(PragmaHandler *Handler)
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:123
void setDiagnostics(DiagnosticsEngine &D)
Definition: Preprocessor.h:898