clang 23.0.0git
Preprocessor.h
Go to the documentation of this file.
1//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the clang::Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
20#include "clang/Basic/LLVM.h"
22#include "clang/Basic/Module.h"
27#include "clang/Lex/Lexer.h"
28#include "clang/Lex/MacroInfo.h"
30#include "clang/Lex/ModuleMap.h"
33#include "clang/Lex/Token.h"
36#include "llvm/ADT/APSInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
39#include "llvm/ADT/FoldingSet.h"
40#include "llvm/ADT/FunctionExtras.h"
41#include "llvm/ADT/PointerUnion.h"
42#include "llvm/ADT/STLExtras.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/ADT/SmallVector.h"
45#include "llvm/ADT/StringRef.h"
46#include "llvm/ADT/TinyPtrVector.h"
47#include "llvm/ADT/iterator_range.h"
48#include "llvm/Support/Allocator.h"
49#include "llvm/Support/Casting.h"
50#include "llvm/Support/Registry.h"
51#include "llvm/Support/TrailingObjects.h"
52#include <cassert>
53#include <cstddef>
54#include <cstdint>
55#include <map>
56#include <memory>
57#include <optional>
58#include <string>
59#include <utility>
60#include <vector>
61
62namespace llvm {
63
64template<unsigned InternalLen> class SmallString;
65
66} // namespace llvm
67
68namespace clang {
69
71class CommentHandler;
72class DirectoryEntry;
75class FileEntry;
76class FileManager;
77class HeaderSearch;
78class MacroArgs;
79class PragmaHandler;
80class PragmaNamespace;
84class ScratchBuffer;
85class TargetInfo;
87
88namespace Builtin {
89class Context;
90}
91
92/// Stores token information for comparing actual tokens with
93/// predefined values. Only handles simple tokens and identifiers.
95 tok::TokenKind Kind;
97
98public:
99 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
100 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
101 assert(Kind != tok::identifier &&
102 "Identifiers should be created by TokenValue(IdentifierInfo *)");
103 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
104 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
105 }
106
107 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
108
109 bool operator==(const Token &Tok) const {
110 return Tok.getKind() == Kind &&
111 (!II || II == Tok.getIdentifierInfo());
112 }
113};
114
115/// Context in which macro name is used.
117 // other than #define or #undef
119
120 // macro name specified in #define
122
123 // macro name specified in #undef
125};
126
127enum class EmbedResult {
128 Invalid = -1, // Parsing error occurred.
129 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
130 Found = 1, // Corresponds to __STDC_EMBED_FOUND__
131 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
132};
133
139
140class ModuleNameLoc final
141 : llvm::TrailingObjects<ModuleNameLoc, IdentifierLoc> {
142 friend TrailingObjects;
143 unsigned NumIdentifierLocs;
144 unsigned numTrailingObjects(OverloadToken<IdentifierLoc>) const {
145 return getNumIdentifierLocs();
146 }
147
148 ModuleNameLoc(ModuleIdPath Path) : NumIdentifierLocs(Path.size()) {
149 (void)llvm::copy(Path, getTrailingObjectsNonStrict<IdentifierLoc>());
150 }
151
152public:
153 static ModuleNameLoc *Create(Preprocessor &PP, ModuleIdPath Path);
154 unsigned getNumIdentifierLocs() const { return NumIdentifierLocs; }
156 return {getTrailingObjectsNonStrict<IdentifierLoc>(),
158 }
159
161 return getModuleIdPath().front().getLoc();
162 }
164 auto &Last = getModuleIdPath().back();
165 return Last.getLoc().getLocWithOffset(
166 Last.getIdentifierInfo()->getLength());
167 }
168 SourceRange getRange() const { return {getBeginLoc(), getEndLoc()}; }
169 std::string str() const {
171 }
172};
173
174/// Engages in a tight little dance with the lexer to efficiently
175/// preprocess tokens.
176///
177/// Lexers know only about tokens within a single source file, and don't
178/// know anything about preprocessor-level issues like the \#include stack,
179/// token expansion, etc.
183
184 llvm::unique_function<void(const clang::Token &)> OnToken;
185 /// Functor for getting the dependency preprocessor directives of a file.
186 ///
187 /// These are directives derived from a special form of lexing where the
188 /// source input is scanned for the preprocessor directives that might have an
189 /// effect on the dependencies for a compilation unit.
190 DependencyDirectivesGetter *GetDependencyDirectives = nullptr;
191 const PreprocessorOptions &PPOpts;
192 DiagnosticsEngine *Diags;
193 const LangOptions &LangOpts;
194 const TargetInfo *Target = nullptr;
195 const TargetInfo *AuxTarget = nullptr;
196 FileManager &FileMgr;
197 SourceManager &SourceMgr;
198 std::unique_ptr<ScratchBuffer> ScratchBuf;
199 HeaderSearch &HeaderInfo;
200 ModuleLoader &TheModuleLoader;
201
202 /// External source of macros.
203 ExternalPreprocessorSource *ExternalSource;
204
205 /// A BumpPtrAllocator object used to quickly allocate and release
206 /// objects internal to the Preprocessor.
207 llvm::BumpPtrAllocator BP;
208
209 /// Identifiers for builtin macros and other builtins.
210 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
211 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
212 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
213 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
214 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
215 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
216 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
217 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
218 IdentifierInfo *Ident__identifier; // __identifier
219 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
220 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
221 IdentifierInfo *Ident__has_feature; // __has_feature
222 IdentifierInfo *Ident__has_extension; // __has_extension
223 IdentifierInfo *Ident__has_builtin; // __has_builtin
224 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin
225 IdentifierInfo *Ident__has_attribute; // __has_attribute
226 IdentifierInfo *Ident__has_embed; // __has_embed
227 IdentifierInfo *Ident__has_include; // __has_include
228 IdentifierInfo *Ident__has_include_next; // __has_include_next
229 IdentifierInfo *Ident__has_warning; // __has_warning
230 IdentifierInfo *Ident__is_identifier; // __is_identifier
231 IdentifierInfo *Ident__building_module; // __building_module
232 IdentifierInfo *Ident__MODULE__; // __MODULE__
233 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
234 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
235 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
236 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
237 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
238 IdentifierInfo *Ident__is_target_os; // __is_target_os
239 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
240 IdentifierInfo *Ident__is_target_variant_os;
241 IdentifierInfo *Ident__is_target_variant_environment;
242 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD
243
244 // Weak, only valid (and set) while InMacroArgs is true.
245 Token* ArgMacro;
246
247 SourceLocation DATELoc, TIMELoc;
248
249 // FEM_UnsetOnCommandLine means that an explicit evaluation method was
250 // not specified on the command line. The target is queried to set the
251 // default evaluation method.
252 LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
254
255 // The most recent pragma location where the floating point evaluation
256 // method was modified. This is used to determine whether the
257 // 'pragma clang fp eval_method' was used whithin the current scope.
258 SourceLocation LastFPEvalPragmaLocation;
259
260 LangOptions::FPEvalMethodKind TUFPEvalMethod =
262
263 // Next __COUNTER__ value, starts at 0.
264 uint32_t CounterValue = 0;
265
266 enum {
267 /// Maximum depth of \#includes.
268 MaxAllowedIncludeStackDepth = 200
269 };
270
271 // State that is set before the preprocessor begins.
272 bool KeepComments : 1;
273 bool KeepMacroComments : 1;
274 bool SuppressIncludeNotFoundError : 1;
275
276 // State that changes while the preprocessor runs:
277 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
278
279 /// Whether the preprocessor owns the header search object.
280 bool OwnsHeaderSearch : 1;
281
282 /// True if macro expansion is disabled.
283 bool DisableMacroExpansion : 1;
284
285 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
286 /// when parsing preprocessor directives.
287 bool MacroExpansionInDirectivesOverride : 1;
288
289 class ResetMacroExpansionHelper;
290
291 /// Whether we have already loaded macros from the external source.
292 mutable bool ReadMacrosFromExternalSource : 1;
293
294 /// True if pragmas are enabled.
295 bool PragmasEnabled : 1;
296
297 /// True if the current build action is a preprocessing action.
298 bool PreprocessedOutput : 1;
299
300 /// True if we are currently preprocessing a #if or #elif directive
301 bool ParsingIfOrElifDirective;
302
303 /// True if we are pre-expanding macro arguments.
304 bool InMacroArgPreExpansion;
305
306 /// Mapping/lookup information for all identifiers in
307 /// the program, including program keywords.
308 mutable IdentifierTable Identifiers;
309
310 /// This table contains all the selectors in the program.
311 ///
312 /// Unlike IdentifierTable above, this table *isn't* populated by the
313 /// preprocessor. It is declared/expanded here because its role/lifetime is
314 /// conceptually similar to the IdentifierTable. In addition, the current
315 /// control flow (in clang::ParseAST()), make it convenient to put here.
316 ///
317 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
318 /// the lifetime of the preprocessor.
319 SelectorTable Selectors;
320
321 /// Information about builtins.
322 std::unique_ptr<Builtin::Context> BuiltinInfo;
323
324 /// Tracks all of the pragmas that the client registered
325 /// with this preprocessor.
326 std::unique_ptr<PragmaNamespace> PragmaHandlers;
327
328 /// Pragma handlers of the original source is stored here during the
329 /// parsing of a model file.
330 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
331
332 /// Tracks all of the comment handlers that the client registered
333 /// with this preprocessor.
334 std::vector<CommentHandler *> CommentHandlers;
335
336 /// Empty line handler.
337 EmptylineHandler *Emptyline = nullptr;
338
339 /// True to avoid tearing down the lexer etc on EOF
340 bool IncrementalProcessing = false;
341
342public:
343 /// The kind of translation unit we are processing.
345
346 /// Returns a pointer into the given file's buffer that's guaranteed
347 /// to be between tokens. The returned pointer is always before \p Start.
348 /// The maximum distance betweenthe returned pointer and \p Start is
349 /// limited by a constant value, but also an implementation detail.
350 /// If no such check point exists, \c nullptr is returned.
351 const char *getCheckPoint(FileID FID, const char *Start) const;
352
353private:
354 /// The code-completion handler.
355 CodeCompletionHandler *CodeComplete = nullptr;
356
357 /// The file that we're performing code-completion for, if any.
358 const FileEntry *CodeCompletionFile = nullptr;
359
360 /// The offset in file for the code-completion point.
361 unsigned CodeCompletionOffset = 0;
362
363 /// The location for the code-completion point. This gets instantiated
364 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
365 SourceLocation CodeCompletionLoc;
366
367 /// The start location for the file of the code-completion point.
368 ///
369 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
370 /// for preprocessing.
371 SourceLocation CodeCompletionFileLoc;
372
373 /// The source location of the \c import contextual keyword we just
374 /// lexed, if any.
375 SourceLocation ModuleImportLoc;
376
377 /// The source location of the \c module contextual keyword we just
378 /// lexed, if any.
379 SourceLocation ModuleDeclLoc;
380
381 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
382 unsigned CheckPointCounter = 0;
383
384 /// Whether we're importing a standard C++20 named Modules.
385 bool ImportingCXXNamedModules = false;
386
387 /// Whether the last token we lexed was an 'export' keyword.
388 Token LastExportKeyword;
389
390 /// First pp-token source location in current translation unit.
391 SourceLocation FirstPPTokenLoc;
392
393 /// A preprocessor directive tracer to trace whether the preprocessing
394 /// state changed. These changes would mean most semantically observable
395 /// preprocessor state, particularly anything that is order dependent.
396 NoTrivialPPDirectiveTracer *DirTracer = nullptr;
397
398 /// A position within a C++20 import-seq.
399 class StdCXXImportSeq {
400 public:
401 enum State : int {
402 // Positive values represent a number of unclosed brackets.
403 AtTopLevel = 0,
404 AfterTopLevelTokenSeq = -1,
405 AfterExport = -2,
406 AfterImportSeq = -3,
407 };
408
409 StdCXXImportSeq(State S) : S(S) {}
410
411 /// Saw any kind of open bracket.
412 void handleOpenBracket() {
413 S = static_cast<State>(std::max<int>(S, 0) + 1);
414 }
415 /// Saw any kind of close bracket other than '}'.
416 void handleCloseBracket() {
417 S = static_cast<State>(std::max<int>(S, 1) - 1);
418 }
419 /// Saw a close brace.
420 void handleCloseBrace() {
421 handleCloseBracket();
422 if (S == AtTopLevel && !AfterHeaderName)
423 S = AfterTopLevelTokenSeq;
424 }
425 /// Saw a semicolon.
426 void handleSemi() {
427 if (atTopLevel()) {
428 S = AfterTopLevelTokenSeq;
429 AfterHeaderName = false;
430 }
431 }
432
433 /// Saw an 'export' identifier.
434 void handleExport() {
435 if (S == AfterTopLevelTokenSeq)
436 S = AfterExport;
437 else if (S <= 0)
438 S = AtTopLevel;
439 }
440 /// Saw an 'import' identifier.
441 void handleImport() {
442 if (S == AfterTopLevelTokenSeq || S == AfterExport)
443 S = AfterImportSeq;
444 else if (S <= 0)
445 S = AtTopLevel;
446 }
447
448 /// Saw a 'header-name' token; do not recognize any more 'import' tokens
449 /// until we reach a top-level semicolon.
450 void handleHeaderName() {
451 if (S == AfterImportSeq)
452 AfterHeaderName = true;
453 handleMisc();
454 }
455
456 /// Saw any other token.
457 void handleMisc() {
458 if (S <= 0)
459 S = AtTopLevel;
460 }
461
462 bool atTopLevel() { return S <= 0; }
463 bool afterImportSeq() { return S == AfterImportSeq; }
464 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
465
466 private:
467 State S;
468 /// Whether we're in the pp-import-suffix following the header-name in a
469 /// pp-import. If so, a close-brace is not sufficient to end the
470 /// top-level-token-seq of an import-seq.
471 bool AfterHeaderName = false;
472 };
473
474 /// Our current position within a C++20 import-seq.
475 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
476
477 /// Track whether we are in a Global Module Fragment
478 class TrackGMF {
479 public:
480 enum GMFState : int {
481 GMFActive = 1,
482 MaybeGMF = 0,
483 BeforeGMFIntroducer = -1,
484 GMFAbsentOrEnded = -2,
485 };
486
487 TrackGMF(GMFState S) : S(S) {}
488
489 /// Saw a semicolon.
490 void handleSemi() {
491 // If it is immediately after the first instance of the module keyword,
492 // then that introduces the GMF.
493 if (S == MaybeGMF)
494 S = GMFActive;
495 }
496
497 /// Saw an 'export' identifier.
498 void handleExport() {
499 // The presence of an 'export' keyword always ends or excludes a GMF.
500 S = GMFAbsentOrEnded;
501 }
502
503 /// Saw an 'import' identifier.
504 void handleImport(bool AfterTopLevelTokenSeq) {
505 // If we see this before any 'module' kw, then we have no GMF.
506 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
507 S = GMFAbsentOrEnded;
508 }
509
510 /// Saw a 'module' identifier.
511 void handleModule(bool AfterTopLevelTokenSeq) {
512 // This was the first module identifier and not preceded by any token
513 // that would exclude a GMF. It could begin a GMF, but only if directly
514 // followed by a semicolon.
515 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
516 S = MaybeGMF;
517 else
518 S = GMFAbsentOrEnded;
519 }
520
521 /// Saw any other token.
522 void handleMisc() {
523 // We saw something other than ; after the 'module' kw, so not a GMF.
524 if (S == MaybeGMF)
525 S = GMFAbsentOrEnded;
526 }
527
528 bool inGMF() { return S == GMFActive; }
529
530 private:
531 /// Track the transitions into and out of a Global Module Fragment,
532 /// if one is present.
533 GMFState S;
534 };
535
536 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
537
538 /// Track the status of the c++20 module decl.
539 ///
540 /// module-declaration:
541 /// 'export'[opt] 'module' module-name module-partition[opt]
542 /// attribute-specifier-seq[opt] ';'
543 ///
544 /// module-name:
545 /// module-name-qualifier[opt] identifier
546 ///
547 /// module-partition:
548 /// ':' module-name-qualifier[opt] identifier
549 ///
550 /// module-name-qualifier:
551 /// identifier '.'
552 /// module-name-qualifier identifier '.'
553 ///
554 /// Transition state:
555 ///
556 /// NotAModuleDecl --- export ---> FoundExport
557 /// NotAModuleDecl --- module ---> ImplementationCandidate
558 /// FoundExport --- module ---> InterfaceCandidate
559 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate
560 /// ImplementationCandidate --- period ---> ImplementationCandidate
561 /// ImplementationCandidate --- colon ---> ImplementationCandidate
562 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate
563 /// InterfaceCandidate --- period ---> InterfaceCandidate
564 /// InterfaceCandidate --- colon ---> InterfaceCandidate
565 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation
566 /// NamedModuleInterface --- Semi ---> NamedModuleInterface
567 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation
568 /// NamedModuleInterface --- Anything ---> NamedModuleInterface
569 ///
570 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
571 /// soon since we don't support any module attributes yet.
572 class ModuleDeclSeq {
573 enum ModuleDeclState : int {
574 NotAModuleDecl,
575 FoundExport,
576 InterfaceCandidate,
577 ImplementationCandidate,
578 NamedModuleInterface,
579 NamedModuleImplementation,
580 };
581
582 public:
583 ModuleDeclSeq() = default;
584
585 void handleExport() {
586 if (State == NotAModuleDecl)
587 State = FoundExport;
588 else if (!isNamedModule())
589 reset();
590 }
591
592 void handleModule() {
593 if (State == FoundExport)
594 State = InterfaceCandidate;
595 else if (State == NotAModuleDecl)
596 State = ImplementationCandidate;
597 else if (!isNamedModule())
598 reset();
599 }
600
601 void handleModuleName(ModuleNameLoc *NameLoc) {
602 if (isModuleCandidate() && NameLoc)
603 Name += NameLoc->str();
604 else if (!isNamedModule())
605 reset();
606 }
607
608 void handleColon() {
609 if (isModuleCandidate())
610 Name += ":";
611 else if (!isNamedModule())
612 reset();
613 }
614
615 void handleSemi() {
616 if (!Name.empty() && isModuleCandidate()) {
617 if (State == InterfaceCandidate)
618 State = NamedModuleInterface;
619 else if (State == ImplementationCandidate)
620 State = NamedModuleImplementation;
621 else
622 llvm_unreachable("Unimaged ModuleDeclState.");
623 } else if (!isNamedModule())
624 reset();
625 }
626
627 void handleMisc() {
628 if (!isNamedModule())
629 reset();
630 }
631
632 bool isModuleCandidate() const {
633 return State == InterfaceCandidate || State == ImplementationCandidate;
634 }
635
636 bool isNamedModule() const {
637 return State == NamedModuleInterface ||
638 State == NamedModuleImplementation;
639 }
640
641 bool isNamedInterface() const { return State == NamedModuleInterface; }
642
643 bool isImplementationUnit() const {
644 return State == NamedModuleImplementation && !getName().contains(':');
645 }
646
647 bool isNotAModuleDecl() const { return State == NotAModuleDecl; }
648
649 StringRef getName() const {
650 assert(isNamedModule() && "Can't get name from a non named module");
651 return Name;
652 }
653
654 StringRef getPrimaryName() const {
655 assert(isNamedModule() && "Can't get name from a non named module");
656 return getName().split(':').first;
657 }
658
659 void reset() {
660 Name.clear();
661 State = NotAModuleDecl;
662 }
663
664 private:
665 ModuleDeclState State = NotAModuleDecl;
666 std::string Name;
667 };
668
669 ModuleDeclSeq ModuleDeclState;
670
671 /// The identifier and source location of the currently-active
672 /// \#pragma clang arc_cf_code_audited begin.
673 IdentifierLoc PragmaARCCFCodeAuditedInfo;
674
675 /// The source location of the currently-active
676 /// \#pragma clang assume_nonnull begin.
677 SourceLocation PragmaAssumeNonNullLoc;
678
679 /// Set only for preambles which end with an active
680 /// \#pragma clang assume_nonnull begin.
681 ///
682 /// When the preamble is loaded into the main file,
683 /// `PragmaAssumeNonNullLoc` will be set to this to
684 /// replay the unterminated assume_nonnull.
685 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
686
687 /// True if we hit the code-completion point.
688 bool CodeCompletionReached = false;
689
690 /// The code completion token containing the information
691 /// on the stem that is to be code completed.
692 IdentifierInfo *CodeCompletionII = nullptr;
693
694 /// Range for the code completion token.
695 SourceRange CodeCompletionTokenRange;
696
697 /// The directory that the main file should be considered to occupy,
698 /// if it does not correspond to a real file (as happens when building a
699 /// module).
700 OptionalDirectoryEntryRef MainFileDir;
701
702 /// The number of bytes that we will initially skip when entering the
703 /// main file, along with a flag that indicates whether skipping this number
704 /// of bytes will place the lexer at the start of a line.
705 ///
706 /// This is used when loading a precompiled preamble.
707 std::pair<int, bool> SkipMainFilePreamble;
708
709 /// Whether we hit an error due to reaching max allowed include depth. Allows
710 /// to avoid hitting the same error over and over again.
711 bool HasReachedMaxIncludeDepth = false;
712
713 /// The number of currently-active calls to Lex.
714 ///
715 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
716 /// require asking for multiple additional tokens. This counter makes it
717 /// possible for Lex to detect whether it's producing a token for the end
718 /// of phase 4 of translation or for some other situation.
719 unsigned LexLevel = 0;
720
721 /// The number of (LexLevel 0) preprocessor tokens.
722 unsigned TokenCount = 0;
723
724 /// Preprocess every token regardless of LexLevel.
725 bool PreprocessToken = false;
726
727 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
728 /// warning, or zero for unlimited.
729 unsigned MaxTokens = 0;
730 SourceLocation MaxTokensOverrideLoc;
731
732public:
747
748 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
749
750private:
751 friend class ASTReader;
752 friend class MacroArgs;
753
754 class PreambleConditionalStackStore {
755 enum State {
756 Off = 0,
757 Recording = 1,
758 Replaying = 2,
759 };
760
761 public:
762 PreambleConditionalStackStore() = default;
763
764 void startRecording() { ConditionalStackState = Recording; }
765 void startReplaying() { ConditionalStackState = Replaying; }
766 bool isRecording() const { return ConditionalStackState == Recording; }
767 bool isReplaying() const { return ConditionalStackState == Replaying; }
768
769 ArrayRef<PPConditionalInfo> getStack() const {
770 return ConditionalStack;
771 }
772
773 void doneReplaying() {
774 ConditionalStack.clear();
775 ConditionalStackState = Off;
776 }
777
778 void setStack(ArrayRef<PPConditionalInfo> s) {
779 if (!isRecording() && !isReplaying())
780 return;
781 ConditionalStack.clear();
782 ConditionalStack.append(s.begin(), s.end());
783 }
784
785 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
786
787 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
788
789 void clearSkipInfo() { SkipInfo.reset(); }
790
791 std::optional<PreambleSkipInfo> SkipInfo;
792
793 private:
794 SmallVector<PPConditionalInfo, 4> ConditionalStack;
795 State ConditionalStackState = Off;
796 } PreambleConditionalStack;
797
798 /// The current top of the stack that we're lexing from if
799 /// not expanding a macro and we are lexing directly from source code.
800 ///
801 /// Only one of CurLexer, or CurTokenLexer will be non-null.
802 std::unique_ptr<Lexer> CurLexer;
803
804 /// Lexers that are pending destruction, deferred until the current
805 /// Stack of Lexer unwinds completely (LexLevel returns to 0).
806 /// This avoids use-after-free when HandleEndOfFile is called from
807 /// within a Lexer method that still needs to access its members.
808 SmallVector<std::unique_ptr<Lexer>, 2> PendingDestroyLexers;
809
810 /// The current top of the stack that we're lexing from
811 /// if not expanding a macro.
812 ///
813 /// This is an alias for CurLexer.
814 PreprocessorLexer *CurPPLexer = nullptr;
815
816 /// Used to find the current FileEntry, if CurLexer is non-null
817 /// and if applicable.
818 ///
819 /// This allows us to implement \#include_next and find directory-specific
820 /// properties.
821 ConstSearchDirIterator CurDirLookup = nullptr;
822
823 /// The current macro we are expanding, if we are expanding a macro.
824 ///
825 /// One of CurLexer and CurTokenLexer must be null.
826 std::unique_ptr<TokenLexer> CurTokenLexer;
827
828 /// The kind of lexer we're currently working with.
829 typedef bool (*LexerCallback)(Preprocessor &, Token &);
830 LexerCallback CurLexerCallback = &CLK_Lexer;
831
832 /// If the current lexer is for a submodule that is being built, this
833 /// is that submodule.
834 Module *CurLexerSubmodule = nullptr;
835
836 /// Keeps track of the stack of files currently
837 /// \#included, and macros currently being expanded from, not counting
838 /// CurLexer/CurTokenLexer.
839 struct IncludeStackInfo {
840 LexerCallback CurLexerCallback;
841 Module *TheSubmodule;
842 std::unique_ptr<Lexer> TheLexer;
843 PreprocessorLexer *ThePPLexer;
844 std::unique_ptr<TokenLexer> TheTokenLexer;
845 ConstSearchDirIterator TheDirLookup;
846
847 // The following constructors are completely useless copies of the default
848 // versions, only needed to pacify MSVC.
849 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
850 std::unique_ptr<Lexer> &&TheLexer,
851 PreprocessorLexer *ThePPLexer,
852 std::unique_ptr<TokenLexer> &&TheTokenLexer,
853 ConstSearchDirIterator TheDirLookup)
854 : CurLexerCallback(std::move(CurLexerCallback)),
855 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
856 ThePPLexer(std::move(ThePPLexer)),
857 TheTokenLexer(std::move(TheTokenLexer)),
858 TheDirLookup(std::move(TheDirLookup)) {}
859 };
860 std::vector<IncludeStackInfo> IncludeMacroStack;
861
862 /// Actions invoked when some preprocessor activity is
863 /// encountered (e.g. a file is \#included, etc).
864 std::unique_ptr<PPCallbacks> Callbacks;
865
866 struct MacroExpandsInfo {
867 Token Tok;
868 MacroDefinition MD;
869 SourceRange Range;
870
871 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
872 : Tok(Tok), MD(MD), Range(Range) {}
873 };
874 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
875
876 /// Information about a name that has been used to define a module macro.
877 struct ModuleMacroInfo {
878 /// The most recent macro directive for this identifier.
879 MacroDirective *MD;
880
881 /// The active module macros for this identifier.
882 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
883
884 /// The generation number at which we last updated ActiveModuleMacros.
885 /// \see Preprocessor::VisibleModules.
886 unsigned ActiveModuleMacrosGeneration = 0;
887
888 /// Whether this macro name is ambiguous.
889 bool IsAmbiguous = false;
890
891 /// The module macros that are overridden by this macro.
892 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
893
894 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
895 };
896
897 /// The state of a macro for an identifier.
898 class MacroState {
899 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
900
901 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
902 const IdentifierInfo *II) const {
903 if (II->isOutOfDate())
904 PP.updateOutOfDateIdentifier(*II);
905 // FIXME: Find a spare bit on IdentifierInfo and store a
906 // HasModuleMacros flag.
907 if (!II->hasMacroDefinition() ||
908 (!PP.getLangOpts().Modules &&
909 !PP.getLangOpts().ModulesLocalVisibility) ||
910 !PP.CurSubmoduleState->VisibleModules.getGeneration())
911 return nullptr;
912
913 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
914 if (!Info) {
915 Info = new (PP.getPreprocessorAllocator())
916 ModuleMacroInfo(cast<MacroDirective *>(State));
917 State = Info;
918 }
919
920 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
921 Info->ActiveModuleMacrosGeneration)
922 PP.updateModuleMacroInfo(II, *Info);
923 return Info;
924 }
925
926 public:
927 MacroState() : MacroState(nullptr) {}
928 MacroState(MacroDirective *MD) : State(MD) {}
929
930 MacroState(MacroState &&O) noexcept : State(O.State) {
931 O.State = (MacroDirective *)nullptr;
932 }
933
934 MacroState &operator=(MacroState &&O) noexcept {
935 auto S = O.State;
936 O.State = (MacroDirective *)nullptr;
937 State = S;
938 return *this;
939 }
940
941 ~MacroState() {
942 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
943 Info->~ModuleMacroInfo();
944 }
945
946 MacroDirective *getLatest() const {
947 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
948 return Info->MD;
949 return cast<MacroDirective *>(State);
950 }
951
952 void setLatest(MacroDirective *MD) {
953 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
954 Info->MD = MD;
955 else
956 State = MD;
957 }
958
959 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
960 auto *Info = getModuleInfo(PP, II);
961 return Info ? Info->IsAmbiguous : false;
962 }
963
964 ArrayRef<ModuleMacro *>
965 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
966 if (auto *Info = getModuleInfo(PP, II))
967 return Info->ActiveModuleMacros;
968 return {};
969 }
970
971 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
972 SourceManager &SourceMgr) const {
973 // FIXME: Incorporate module macros into the result of this.
974 if (auto *Latest = getLatest())
975 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
976 return {};
977 }
978
979 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
980 if (auto *Info = getModuleInfo(PP, II)) {
981 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
982 Info->ActiveModuleMacros.begin(),
983 Info->ActiveModuleMacros.end());
984 Info->ActiveModuleMacros.clear();
985 Info->IsAmbiguous = false;
986 }
987 }
988
989 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
990 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
991 return Info->OverriddenMacros;
992 return {};
993 }
994
995 void setOverriddenMacros(Preprocessor &PP,
996 ArrayRef<ModuleMacro *> Overrides) {
997 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
998 if (!Info) {
999 if (Overrides.empty())
1000 return;
1001 Info = new (PP.getPreprocessorAllocator())
1002 ModuleMacroInfo(cast<MacroDirective *>(State));
1003 State = Info;
1004 }
1005 Info->OverriddenMacros.clear();
1006 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
1007 Overrides.begin(), Overrides.end());
1008 Info->ActiveModuleMacrosGeneration = 0;
1009 }
1010 };
1011
1012 /// For each IdentifierInfo that was associated with a macro, we
1013 /// keep a mapping to the history of all macro definitions and #undefs in
1014 /// the reverse order (the latest one is in the head of the list).
1015 ///
1016 /// This mapping lives within the \p CurSubmoduleState.
1017 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
1018
1019 struct SubmoduleState;
1020
1021 /// Information about a submodule that we're currently building.
1022 struct BuildingSubmoduleInfo {
1023 /// The module that we are building.
1024 Module *M;
1025
1026 /// The location at which the module was included.
1027 SourceLocation ImportLoc;
1028
1029 /// Whether we entered this submodule via a pragma.
1030 bool IsPragma;
1031
1032 /// The previous SubmoduleState.
1033 SubmoduleState *OuterSubmoduleState;
1034
1035 /// The number of pending module macro names when we started building this.
1036 unsigned OuterPendingModuleMacroNames;
1037
1038 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
1039 SubmoduleState *OuterSubmoduleState,
1040 unsigned OuterPendingModuleMacroNames)
1041 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
1042 OuterSubmoduleState(OuterSubmoduleState),
1043 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
1044 };
1045 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
1046
1047 /// Information about a submodule's preprocessor state.
1048 struct SubmoduleState {
1049 /// The macros for the submodule.
1050 MacroMap Macros;
1051
1052 /// The set of modules that are visible within the submodule.
1053 VisibleModuleSet VisibleModules;
1054
1055 // FIXME: CounterValue?
1056 // FIXME: PragmaPushMacroInfo?
1057 };
1058 std::map<Module *, SubmoduleState> Submodules;
1059
1060 /// The preprocessor state for preprocessing outside of any submodule.
1061 SubmoduleState NullSubmoduleState;
1062
1063 /// The current submodule state. Will be \p NullSubmoduleState if we're not
1064 /// in a submodule.
1065 SubmoduleState *CurSubmoduleState;
1066
1067 /// The files that have been included.
1068 IncludedFilesSet IncludedFiles;
1069
1070 /// The set of top-level modules that affected preprocessing, but were not
1071 /// imported.
1072 llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1073
1074 /// The set of known macros exported from modules.
1075 llvm::FoldingSet<ModuleMacro> ModuleMacros;
1076
1077 /// The names of potential module macros that we've not yet processed.
1078 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1079
1080 /// The list of module macros, for each identifier, that are not overridden by
1081 /// any other module macro.
1082 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1083 LeafModuleMacros;
1084
1085 /// Macros that we want to warn because they are not used at the end
1086 /// of the translation unit.
1087 ///
1088 /// We store just their SourceLocations instead of
1089 /// something like MacroInfo*. The benefit of this is that when we are
1090 /// deserializing from PCH, we don't need to deserialize identifier & macros
1091 /// just so that we can report that they are unused, we just warn using
1092 /// the SourceLocations of this set (that will be filled by the ASTReader).
1093 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1094 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1095
1096 /// This is a pair of an optional message and source location used for pragmas
1097 /// that annotate macros like pragma clang restrict_expansion and pragma clang
1098 /// deprecated. This pair stores the optional message and the location of the
1099 /// annotation pragma for use producing diagnostics and notes.
1100 using MsgLocationPair = std::pair<std::string, SourceLocation>;
1101
1102 struct MacroAnnotationInfo {
1103 SourceLocation Location;
1104 std::string Message;
1105 };
1106
1107 struct MacroAnnotations {
1108 std::optional<MacroAnnotationInfo> DeprecationInfo;
1109 std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1110 std::optional<SourceLocation> FinalAnnotationLoc;
1111 };
1112
1113 /// Warning information for macro annotations.
1114 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1115
1116 /// A "freelist" of MacroArg objects that can be
1117 /// reused for quick allocation.
1118 MacroArgs *MacroArgCache = nullptr;
1119
1120 /// For each IdentifierInfo used in a \#pragma push_macro directive,
1121 /// we keep a MacroInfo stack used to restore the previous macro value.
1122 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1123 PragmaPushMacroInfo;
1124
1125 // Various statistics we track for performance analysis.
1126 unsigned NumDirectives = 0;
1127 unsigned NumDefined = 0;
1128 unsigned NumUndefined = 0;
1129 unsigned NumPragma = 0;
1130 unsigned NumIf = 0;
1131 unsigned NumElse = 0;
1132 unsigned NumEndif = 0;
1133 unsigned NumEnteredSourceFiles = 0;
1134 unsigned MaxIncludeStackDepth = 0;
1135 unsigned NumMacroExpanded = 0;
1136 unsigned NumFnMacroExpanded = 0;
1137 unsigned NumBuiltinMacroExpanded = 0;
1138 unsigned NumFastMacroExpanded = 0;
1139 unsigned NumTokenPaste = 0;
1140 unsigned NumFastTokenPaste = 0;
1141 unsigned NumSkipped = 0;
1142
1143 /// The predefined macros that preprocessor should use from the
1144 /// command line etc.
1145 std::string Predefines;
1146
1147 /// The file ID for the preprocessor predefines.
1148 FileID PredefinesFileID;
1149
1150 /// The file ID for the PCH through header.
1151 FileID PCHThroughHeaderFileID;
1152
1153 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1154 bool SkippingUntilPragmaHdrStop = false;
1155
1156 /// Whether tokens are being skipped until the through header is seen.
1157 bool SkippingUntilPCHThroughHeader = false;
1158
1159 /// Whether the main file is preprocessed module file.
1160 bool MainFileIsPreprocessedModuleFile = false;
1161
1162 /// \{
1163 /// Cache of macro expanders to reduce malloc traffic.
1164 enum { TokenLexerCacheSize = 8 };
1165 unsigned NumCachedTokenLexers;
1166 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1167 /// \}
1168
1169 /// Keeps macro expanded tokens for TokenLexers.
1170 //
1171 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1172 /// going to lex in the cache and when it finishes the tokens are removed
1173 /// from the end of the cache.
1174 SmallVector<Token, 16> MacroExpandedTokens;
1175 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1176
1177 /// A record of the macro definitions and expansions that
1178 /// occurred during preprocessing.
1179 ///
1180 /// This is an optional side structure that can be enabled with
1181 /// \c createPreprocessingRecord() prior to preprocessing.
1182 PreprocessingRecord *Record = nullptr;
1183
1184 /// Cached tokens state.
1185 using CachedTokensTy = SmallVector<Token, 1>;
1186
1187 /// Cached tokens are stored here when we do backtracking or
1188 /// lookahead. They are "lexed" by the CachingLex() method.
1189 CachedTokensTy CachedTokens;
1190
1191 /// The position of the cached token that CachingLex() should
1192 /// "lex" next.
1193 ///
1194 /// If it points beyond the CachedTokens vector, it means that a normal
1195 /// Lex() should be invoked.
1196 CachedTokensTy::size_type CachedLexPos = 0;
1197
1198 /// Stack of backtrack positions, allowing nested backtracks.
1199 ///
1200 /// The EnableBacktrackAtThisPos() method pushes a position to
1201 /// indicate where CachedLexPos should be set when the BackTrack() method is
1202 /// invoked (at which point the last position is popped).
1203 std::vector<CachedTokensTy::size_type> BacktrackPositions;
1204
1205 /// Stack of cached tokens/initial number of cached tokens pairs, allowing
1206 /// nested unannotated backtracks.
1207 std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1208 UnannotatedBacktrackTokens;
1209
1210 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1211 /// This is used to guard against calling this function recursively.
1212 ///
1213 /// See comments at the use-site for more context about why it is needed.
1214 bool SkippingExcludedConditionalBlock = false;
1215
1216 /// Keeps track of skipped range mappings that were recorded while skipping
1217 /// excluded conditional directives. It maps the source buffer pointer at
1218 /// the beginning of a skipped block, to the number of bytes that should be
1219 /// skipped.
1220 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1221
1222 void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1223
1224public:
1225 Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags,
1226 const LangOptions &LangOpts, SourceManager &SM,
1227 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
1228 IdentifierInfoLookup *IILookup = nullptr,
1229 bool OwnsHeaderSearch = false,
1231
1232 ~Preprocessor();
1233
1234 /// Initialize the preprocessor using information about the target.
1235 ///
1236 /// \param Target is owned by the caller and must remain valid for the
1237 /// lifetime of the preprocessor.
1238 /// \param AuxTarget is owned by the caller and must remain valid for
1239 /// the lifetime of the preprocessor.
1240 void Initialize(const TargetInfo &Target,
1241 const TargetInfo *AuxTarget = nullptr);
1242
1243 /// Initialize the preprocessor to parse a model file
1244 ///
1245 /// To parse model files the preprocessor of the original source is reused to
1246 /// preserver the identifier table. However to avoid some duplicate
1247 /// information in the preprocessor some cleanup is needed before it is used
1248 /// to parse model files. This method does that cleanup.
1250
1251 /// Cleanup after model file parsing
1252 void FinalizeForModelFile();
1253
1254 /// Retrieve the preprocessor options used to initialize this preprocessor.
1255 const PreprocessorOptions &getPreprocessorOpts() const { return PPOpts; }
1256
1257 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1258 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1259
1260 const LangOptions &getLangOpts() const { return LangOpts; }
1261 const TargetInfo &getTargetInfo() const { return *Target; }
1262 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1263 FileManager &getFileManager() const { return FileMgr; }
1264 SourceManager &getSourceManager() const { return SourceMgr; }
1265 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1266
1267 IdentifierTable &getIdentifierTable() { return Identifiers; }
1268 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1269 SelectorTable &getSelectorTable() { return Selectors; }
1270 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1271 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1272
1274 ExternalSource = Source;
1275 }
1276
1278 return ExternalSource;
1279 }
1280
1281 /// Retrieve the module loader associated with this preprocessor.
1282 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1283
1285 return TheModuleLoader.HadFatalFailure;
1286 }
1287
1288 /// Retrieve the number of Directives that have been processed by the
1289 /// Preprocessor.
1290 unsigned getNumDirectives() const {
1291 return NumDirectives;
1292 }
1293
1294 /// True if we are currently preprocessing a #if or #elif directive
1296 return ParsingIfOrElifDirective;
1297 }
1298
1299 /// Control whether the preprocessor retains comments in output.
1300 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1301 this->KeepComments = KeepComments | KeepMacroComments;
1302 this->KeepMacroComments = KeepMacroComments;
1303 }
1304
1305 bool getCommentRetentionState() const { return KeepComments; }
1306
1307 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1308 bool getPragmasEnabled() const { return PragmasEnabled; }
1309
1311 SuppressIncludeNotFoundError = Suppress;
1312 }
1313
1315 return SuppressIncludeNotFoundError;
1316 }
1317
1318 /// Sets whether the preprocessor is responsible for producing output or if
1319 /// it is producing tokens to be consumed by Parse and Sema.
1320 void setPreprocessedOutput(bool IsPreprocessedOutput) {
1321 PreprocessedOutput = IsPreprocessedOutput;
1322 }
1323
1324 /// Returns true if the preprocessor is responsible for generating output,
1325 /// false if it is producing tokens to be consumed by Parse and Sema.
1326 bool isPreprocessedOutput() const { return PreprocessedOutput; }
1327
1328 /// Return true if we are lexing directly from the specified lexer.
1329 bool isCurrentLexer(const PreprocessorLexer *L) const {
1330 return CurPPLexer == L;
1331 }
1332
1333 /// Return the current lexer being lexed from.
1334 ///
1335 /// Note that this ignores any potentially active macro expansions and _Pragma
1336 /// expansions going on at the time.
1337 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1338
1339 /// Return the current file lexer being lexed from.
1340 ///
1341 /// Note that this ignores any potentially active macro expansions and _Pragma
1342 /// expansions going on at the time.
1344
1345 /// Return the submodule owning the file being lexed. This may not be
1346 /// the current module if we have changed modules since entering the file.
1347 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1348
1349 /// Returns the FileID for the preprocessor predefines.
1350 FileID getPredefinesFileID() const { return PredefinesFileID; }
1351
1352 /// \{
1353 /// Accessors for preprocessor callbacks.
1354 ///
1355 /// Note that this class takes ownership of any PPCallbacks object given to
1356 /// it.
1357 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1358 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1359 if (Callbacks)
1360 C = std::make_unique<PPChainedCallbacks>(std::move(C),
1361 std::move(Callbacks));
1362 Callbacks = std::move(C);
1363 }
1364 void removePPCallbacks() { Callbacks.reset(); }
1365 /// \}
1366
1367 /// Get the number of tokens processed so far.
1368 unsigned getTokenCount() const { return TokenCount; }
1369
1370 /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1371 unsigned getMaxTokens() const { return MaxTokens; }
1372
1374 MaxTokens = Value;
1375 MaxTokensOverrideLoc = Loc;
1376 };
1377
1378 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1379
1380 /// Register a function that would be called on each token in the final
1381 /// expanded token stream.
1382 /// This also reports annotation tokens produced by the parser.
1383 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1384 OnToken = std::move(F);
1385 }
1386
1388 GetDependencyDirectives = &Get;
1389 }
1390
1391 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1392
1393 bool isMacroDefined(StringRef Id) {
1394 return isMacroDefined(&Identifiers.get(Id));
1395 }
1397 return II->hasMacroDefinition() &&
1398 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1399 }
1400
1401 /// Determine whether II is defined as a macro within the module M,
1402 /// if that is a module that we've already preprocessed. Does not check for
1403 /// macros imported into M.
1405 if (!II->hasMacroDefinition())
1406 return false;
1407 auto I = Submodules.find(M);
1408 if (I == Submodules.end())
1409 return false;
1410 auto J = I->second.Macros.find(II);
1411 if (J == I->second.Macros.end())
1412 return false;
1413 auto *MD = J->second.getLatest();
1414 return MD && MD->isDefined();
1415 }
1416
1418 if (!II->hasMacroDefinition())
1419 return {};
1420
1421 MacroState &S = CurSubmoduleState->Macros[II];
1422 auto *MD = S.getLatest();
1423 while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1424 MD = MD->getPrevious();
1425 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1426 S.getActiveModuleMacros(*this, II),
1427 S.isAmbiguous(*this, II));
1428 }
1429
1431 SourceLocation Loc) {
1432 if (!II->hadMacroDefinition())
1433 return {};
1434
1435 MacroState &S = CurSubmoduleState->Macros[II];
1437 if (auto *MD = S.getLatest())
1438 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1439 // FIXME: Compute the set of active module macros at the specified location.
1440 return MacroDefinition(DI.getDirective(),
1441 S.getActiveModuleMacros(*this, II),
1442 S.isAmbiguous(*this, II));
1443 }
1444
1445 /// Given an identifier, return its latest non-imported MacroDirective
1446 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1448 if (!II->hasMacroDefinition())
1449 return nullptr;
1450
1451 auto *MD = getLocalMacroDirectiveHistory(II);
1452 if (!MD || MD->getDefinition().isUndefined())
1453 return nullptr;
1454
1455 return MD;
1456 }
1457
1458 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1459 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1460 }
1461
1463 if (!II->hasMacroDefinition())
1464 return nullptr;
1465 if (auto MD = getMacroDefinition(II))
1466 return MD.getMacroInfo();
1467 return nullptr;
1468 }
1469
1470 /// Given an identifier, return the latest non-imported macro
1471 /// directive for that identifier.
1472 ///
1473 /// One can iterate over all previous macro directives from the most recent
1474 /// one.
1476
1477 /// Add a directive to the macro directive history for this identifier.
1480 SourceLocation Loc) {
1481 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1482 appendMacroDirective(II, MD);
1483 return MD;
1484 }
1489
1490 /// Set a MacroDirective that was loaded from a PCH file.
1492 MacroDirective *MD);
1493
1494 /// Register an exported macro for a module and identifier.
1497 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1499
1500 /// Get the list of leaf (non-overridden) module macros for a name.
1502 if (II->isOutOfDate())
1503 updateOutOfDateIdentifier(*II);
1504 auto I = LeafModuleMacros.find(II);
1505 if (I != LeafModuleMacros.end())
1506 return I->second;
1507 return {};
1508 }
1509
1510 /// Get the list of submodules that we're currently building.
1512 return BuildingSubmoduleStack;
1513 }
1514
1515 /// \{
1516 /// Iterators for the macro history table. Currently defined macros have
1517 /// IdentifierInfo::hasMacroDefinition() set and an empty
1518 /// MacroInfo::getUndefLoc() at the head of the list.
1519 using macro_iterator = MacroMap::const_iterator;
1520
1521 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1522 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1523
1524 llvm::iterator_range<macro_iterator>
1525 macros(bool IncludeExternalMacros = true) const {
1526 macro_iterator begin = macro_begin(IncludeExternalMacros);
1527 macro_iterator end = macro_end(IncludeExternalMacros);
1528 return llvm::make_range(begin, end);
1529 }
1530
1531 /// \}
1532
1533 /// Mark the given clang module as affecting the current clang module or translation unit.
1535 assert(M->isModuleMapModule());
1536 if (!BuildingSubmoduleStack.empty()) {
1537 if (M != BuildingSubmoduleStack.back().M)
1538 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1539 } else {
1540 AffectingClangModules.insert(M);
1541 }
1542 }
1543
1544 /// Get the set of top-level clang modules that affected preprocessing, but were not
1545 /// imported.
1547 return AffectingClangModules;
1548 }
1549
1550 /// Mark the file as included.
1551 /// Returns true if this is the first time the file was included.
1553 HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
1554 return IncludedFiles.insert(File).second;
1555 }
1556
1557 /// Return true if this header has already been included.
1559 HeaderInfo.getFileInfo(File);
1560 return IncludedFiles.count(File);
1561 }
1562
1563 /// Get the set of included files.
1564 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1565 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1566
1567 /// Return the name of the macro defined before \p Loc that has
1568 /// spelling \p Tokens. If there are multiple macros with same spelling,
1569 /// return the last one defined.
1571 ArrayRef<TokenValue> Tokens) const;
1572
1573 /// Get the predefines for this processor.
1574 /// Used by some third-party tools to inspect and add predefines (see
1575 /// https://github.com/llvm/llvm-project/issues/57483).
1576 const std::string &getPredefines() const { return Predefines; }
1577
1578 /// Set the predefines for this Preprocessor.
1579 ///
1580 /// These predefines are automatically injected when parsing the main file.
1581 void setPredefines(std::string P) { Predefines = std::move(P); }
1582
1583 /// Return information about the specified preprocessor
1584 /// identifier token.
1585 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1586 return &Identifiers.get(Name);
1587 }
1588
1589 /// Add the specified pragma handler to this preprocessor.
1590 ///
1591 /// If \p Namespace is non-null, then it is a token required to exist on the
1592 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1593 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1595 AddPragmaHandler(StringRef(), Handler);
1596 }
1597
1598 /// Remove the specific pragma handler from this preprocessor.
1599 ///
1600 /// If \p Namespace is non-null, then it should be the namespace that
1601 /// \p Handler was added to. It is an error to remove a handler that
1602 /// has not been registered.
1603 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1605 RemovePragmaHandler(StringRef(), Handler);
1606 }
1607
1608 /// Install empty handlers for all pragmas (making them ignored).
1609 void IgnorePragmas();
1610
1611 /// Set empty line handler.
1612 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1613
1614 EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1615
1616 /// Add the specified comment handler to the preprocessor.
1617 void addCommentHandler(CommentHandler *Handler);
1618
1619 /// Remove the specified comment handler.
1620 ///
1621 /// It is an error to remove a handler that has not been registered.
1622 void removeCommentHandler(CommentHandler *Handler);
1623
1624 /// Set the code completion handler to the given object.
1626 CodeComplete = &Handler;
1627 }
1628
1629 /// Retrieve the current code-completion handler.
1631 return CodeComplete;
1632 }
1633
1634 /// Clear out the code completion handler.
1636 CodeComplete = nullptr;
1637 }
1638
1639 /// Hook used by the lexer to invoke the "included file" code
1640 /// completion point.
1641 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1642
1643 /// Hook used by the lexer to invoke the "natural language" code
1644 /// completion point.
1646
1647 /// Set the code completion token for filtering purposes.
1649 CodeCompletionII = Filter;
1650 }
1651
1652 /// Set the code completion token range for detecting replacement range later
1653 /// on.
1655 const SourceLocation End) {
1656 CodeCompletionTokenRange = {Start, End};
1657 }
1659 return CodeCompletionTokenRange;
1660 }
1661
1662 /// Get the code completion token for filtering purposes.
1664 if (CodeCompletionII)
1665 return CodeCompletionII->getName();
1666 return {};
1667 }
1668
1669 /// Retrieve the preprocessing record, or NULL if there is no
1670 /// preprocessing record.
1672
1673 /// Create a new preprocessing record, which will keep track of
1674 /// all macro expansions, macro definitions, etc.
1676
1677 /// Returns true if the FileEntry is the PCH through header.
1678 bool isPCHThroughHeader(const FileEntry *FE);
1679
1680 /// True if creating a PCH with a through header.
1682
1683 /// True if using a PCH with a through header.
1685
1686 /// True if creating a PCH with a #pragma hdrstop.
1688
1689 /// True if using a PCH with a #pragma hdrstop.
1691
1692 /// Skip tokens until after the #include of the through header or
1693 /// until after a #pragma hdrstop.
1695
1696 /// Process directives while skipping until the through header or
1697 /// #pragma hdrstop is found.
1699 SourceLocation HashLoc);
1700
1701 /// Enter the specified FileID as the main source file,
1702 /// which implicitly adds the builtin defines etc.
1703 void EnterMainSourceFile();
1704
1705 /// Inform the preprocessor callbacks that processing is complete.
1706 void EndSourceFile();
1707
1708 /// Add a source file to the top of the include stack and
1709 /// start lexing tokens from it instead of the current buffer.
1710 ///
1711 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1713 SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1714
1715 /// Add a Macro to the top of the include stack and start lexing
1716 /// tokens from it instead of the current buffer.
1717 ///
1718 /// \param Args specifies the tokens input to a function-like macro.
1719 /// \param ILEnd specifies the location of the ')' for a function-like macro
1720 /// or the identifier for an object-like macro.
1722 MacroArgs *Args);
1723
1724private:
1725 /// Add a "macro" context to the top of the include stack,
1726 /// which will cause the lexer to start returning the specified tokens.
1727 ///
1728 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1729 /// will not be subject to further macro expansion. Otherwise, these tokens
1730 /// will be re-macro-expanded when/if expansion is enabled.
1731 ///
1732 /// If \p OwnsTokens is false, this method assumes that the specified stream
1733 /// of tokens has a permanent owner somewhere, so they do not need to be
1734 /// copied. If it is true, it assumes the array of tokens is allocated with
1735 /// \c new[] and the Preprocessor will delete[] it.
1736 ///
1737 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1738 /// set, see the flag documentation for details.
1739 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1740 bool DisableMacroExpansion, bool OwnsTokens,
1741 bool IsReinject);
1742
1743public:
1744 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1745 bool DisableMacroExpansion, bool IsReinject) {
1746 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1747 IsReinject);
1748 }
1749
1750 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1751 bool IsReinject) {
1752 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1753 IsReinject);
1754 }
1755
1756 /// Pop the current lexer/macro exp off the top of the lexer stack.
1757 ///
1758 /// This should only be used in situations where the current state of the
1759 /// top-of-stack lexer is known.
1760 void RemoveTopOfLexerStack();
1761
1762 /// From the point that this method is called, and until
1763 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1764 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1765 /// make the Preprocessor re-lex the same tokens.
1766 ///
1767 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1768 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1769 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1770 ///
1771 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1772 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1773 /// tokens will continue indefinitely.
1774 ///
1775 /// \param Unannotated Whether token annotations are reverted upon calling
1776 /// Backtrack().
1777 void EnableBacktrackAtThisPos(bool Unannotated = false);
1778
1779private:
1780 std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1781
1782 CachedTokensTy PopUnannotatedBacktrackTokens();
1783
1784public:
1785 /// Disable the last EnableBacktrackAtThisPos call.
1787
1788 /// Make Preprocessor re-lex the tokens that were lexed since
1789 /// EnableBacktrackAtThisPos() was previously called.
1790 void Backtrack();
1791
1792 /// True if EnableBacktrackAtThisPos() was called and
1793 /// caching of tokens is on.
1794 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1795
1796 /// True if EnableBacktrackAtThisPos() was called and
1797 /// caching of unannotated tokens is on.
1799 return !UnannotatedBacktrackTokens.empty();
1800 }
1801
1802 /// Lex the next token for this preprocessor.
1803 void Lex(Token &Result);
1804
1805 /// Lex all tokens for this preprocessor until (and excluding) end of file.
1806 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1807
1808 /// Lex a token, forming a header-name token if possible.
1809 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1810
1811 /// Lex the parameters for an #embed directive, returns nullopt on error.
1812 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1813 bool ForHasEmbed);
1814
1815 /// Whether the main file is preprocessed module file.
1817 return MainFileIsPreprocessedModuleFile;
1818 }
1819
1820 /// Mark the main file as a preprocessed module file, then the 'module' and
1821 /// 'import' directive recognition will be suppressed. Only
1822 /// '__preprocessed_moduke' and '__preprocessed_import' are allowed.
1824 MainFileIsPreprocessedModuleFile = true;
1825 }
1826
1828 SmallVectorImpl<Token> &Suffix,
1830 bool AllowMacroExpansion, bool IsPartition);
1831 bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok,
1833 SmallVectorImpl<Token> &DirToks,
1834 bool AllowMacroExpansion, bool IsPartition);
1836 void HandleCXXImportDirective(Token Import);
1838
1839 /// Callback invoked when the lexer sees one of export, import or module token
1840 /// at the start of a line.
1841 ///
1842 /// This consumes the import/module directive, modifies the
1843 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1844 /// read is the correct one.
1846
1847 /// Get the start location of the first pp-token in main file.
1849 assert(FirstPPTokenLoc.isValid() &&
1850 "Did not see the first pp-token in the main file");
1851 return FirstPPTokenLoc;
1852 }
1853
1855 bool StopUntilEOD = false);
1857 bool StopUntilEOD = false);
1858
1860 bool IncludeExports = true);
1861
1863 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1864 }
1865
1866 /// Lex a string literal, which may be the concatenation of multiple
1867 /// string literals and may even come from macro expansion.
1868 /// \returns true on success, false if a error diagnostic has been generated.
1869 bool LexStringLiteral(Token &Result, std::string &String,
1870 const char *DiagnosticTag, bool AllowMacroExpansion) {
1871 if (AllowMacroExpansion)
1872 Lex(Result);
1873 else
1875 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1876 AllowMacroExpansion);
1877 }
1878
1879 /// Complete the lexing of a string literal where the first token has
1880 /// already been lexed (see LexStringLiteral).
1881 bool FinishLexStringLiteral(Token &Result, std::string &String,
1882 const char *DiagnosticTag,
1883 bool AllowMacroExpansion);
1884
1885 /// Lex a token. If it's a comment, keep lexing until we get
1886 /// something not a comment.
1887 ///
1888 /// This is useful in -E -C mode where comments would foul up preprocessor
1889 /// directive handling.
1891 do
1892 Lex(Result);
1893 while (Result.getKind() == tok::comment);
1894 }
1895
1896 /// Just like Lex, but disables macro expansion of identifier tokens.
1898 // Disable macro expansion.
1899 bool OldVal = DisableMacroExpansion;
1900 DisableMacroExpansion = true;
1901 // Lex the token.
1902 Lex(Result);
1903
1904 // Reenable it.
1905 DisableMacroExpansion = OldVal;
1906 }
1907
1908 /// Like LexNonComment, but this disables macro expansion of
1909 /// identifier tokens.
1911 do
1913 while (Result.getKind() == tok::comment);
1914 }
1915
1916 /// Parses a simple integer literal to get its numeric value. Floating
1917 /// point literals and user defined literals are rejected. Used primarily to
1918 /// handle pragmas that accept integer arguments.
1919 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1920
1921 /// Disables macro expansion everywhere except for preprocessor directives.
1923 DisableMacroExpansion = true;
1924 MacroExpansionInDirectivesOverride = true;
1925 }
1926
1928 DisableMacroExpansion = MacroExpansionInDirectivesOverride = false;
1929 }
1930
1931 /// Peeks ahead N tokens and returns that token without consuming any
1932 /// tokens.
1933 ///
1934 /// LookAhead(0) returns the next token that would be returned by Lex(),
1935 /// LookAhead(1) returns the token after it, etc. This returns normal
1936 /// tokens after phase 5. As such, it is equivalent to using
1937 /// 'Lex', not 'LexUnexpandedToken'.
1938 const Token &LookAhead(unsigned N) {
1939 assert(LexLevel == 0 && "cannot use lookahead while lexing");
1940 if (CachedLexPos + N < CachedTokens.size())
1941 return CachedTokens[CachedLexPos+N];
1942 else
1943 return PeekAhead(N+1);
1944 }
1945
1946 /// When backtracking is enabled and tokens are cached,
1947 /// this allows to revert a specific number of tokens.
1948 ///
1949 /// Note that the number of tokens being reverted should be up to the last
1950 /// backtrack position, not more.
1951 void RevertCachedTokens(unsigned N) {
1952 assert(isBacktrackEnabled() &&
1953 "Should only be called when tokens are cached for backtracking");
1954 assert(signed(CachedLexPos) - signed(N) >=
1955 signed(LastBacktrackPos().first) &&
1956 "Should revert tokens up to the last backtrack position, not more");
1957 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1958 "Corrupted backtrack positions ?");
1959 CachedLexPos -= N;
1960 }
1961
1962 /// Enters a token in the token stream to be lexed next.
1963 ///
1964 /// If BackTrack() is called afterwards, the token will remain at the
1965 /// insertion point.
1966 /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1967 /// flag set. See the flag documentation for details.
1968 void EnterToken(const Token &Tok, bool IsReinject) {
1969 if (LexLevel) {
1970 // It's not correct in general to enter caching lex mode while in the
1971 // middle of a nested lexing action.
1972 auto TokCopy = std::make_unique<Token[]>(1);
1973 TokCopy[0] = Tok;
1974 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1975 } else {
1976 EnterCachingLexMode();
1977 assert(IsReinject && "new tokens in the middle of cached stream");
1978 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1979 }
1980 }
1981
1982 /// We notify the Preprocessor that if it is caching tokens (because
1983 /// backtrack is enabled) it should replace the most recent cached tokens
1984 /// with the given annotation token. This function has no effect if
1985 /// backtracking is not enabled.
1986 ///
1987 /// Note that the use of this function is just for optimization, so that the
1988 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1989 /// invoked.
1991 assert(Tok.isAnnotation() && "Expected annotation token");
1992 if (CachedLexPos != 0 && isBacktrackEnabled())
1993 AnnotatePreviousCachedTokens(Tok);
1994 }
1995
1996 /// Get the location of the last cached token, suitable for setting the end
1997 /// location of an annotation token.
1999 assert(CachedLexPos != 0);
2000 return CachedTokens[CachedLexPos-1].getLastLoc();
2001 }
2002
2003 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
2004 /// CachedTokens.
2005 bool IsPreviousCachedToken(const Token &Tok) const;
2006
2007 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
2008 /// in \p NewToks.
2009 ///
2010 /// Useful when a token needs to be split in smaller ones and CachedTokens
2011 /// most recent token must to be updated to reflect that.
2013
2014 /// Replace the last token with an annotation token.
2015 ///
2016 /// Like AnnotateCachedTokens(), this routine replaces an
2017 /// already-parsed (and resolved) token with an annotation
2018 /// token. However, this routine only replaces the last token with
2019 /// the annotation token; it does not affect any other cached
2020 /// tokens. This function has no effect if backtracking is not
2021 /// enabled.
2023 assert(Tok.isAnnotation() && "Expected annotation token");
2024 if (CachedLexPos != 0 && isBacktrackEnabled())
2025 CachedTokens[CachedLexPos-1] = Tok;
2026 }
2027
2028 /// Enter an annotation token into the token stream.
2030 void *AnnotationVal);
2031
2032 /// Determine whether it's possible for a future call to Lex to produce an
2033 /// annotation token created by a previous call to EnterAnnotationToken.
2035 return CurLexerCallback != CLK_Lexer;
2036 }
2037
2038 /// Update the current token to represent the provided
2039 /// identifier, in order to cache an action performed by typo correction.
2041 assert(Tok.getIdentifierInfo() && "Expected identifier token");
2042 if (CachedLexPos != 0 && isBacktrackEnabled())
2043 CachedTokens[CachedLexPos-1] = Tok;
2044 }
2045
2046 /// Recompute the current lexer kind based on the CurLexer/
2047 /// CurTokenLexer pointers.
2048 void recomputeCurLexerKind();
2049
2050 /// Returns true if incremental processing is enabled
2051 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
2052
2053 /// Enables the incremental processing
2054 void enableIncrementalProcessing(bool value = true) {
2055 IncrementalProcessing = value;
2056 }
2057
2058 /// Specify the point at which code-completion will be performed.
2059 ///
2060 /// \param File the file in which code completion should occur. If
2061 /// this file is included multiple times, code-completion will
2062 /// perform completion the first time it is included. If NULL, this
2063 /// function clears out the code-completion point.
2064 ///
2065 /// \param Line the line at which code completion should occur
2066 /// (1-based).
2067 ///
2068 /// \param Column the column at which code completion should occur
2069 /// (1-based).
2070 ///
2071 /// \returns true if an error occurred, false otherwise.
2073 unsigned Column);
2074
2075 /// Determine if we are performing code completion.
2076 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
2077
2078 /// Returns the location of the code-completion point.
2079 ///
2080 /// Returns an invalid location if code-completion is not enabled or the file
2081 /// containing the code-completion point has not been lexed yet.
2082 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
2083
2084 /// Returns the start location of the file of code-completion point.
2085 ///
2086 /// Returns an invalid location if code-completion is not enabled or the file
2087 /// containing the code-completion point has not been lexed yet.
2089 return CodeCompletionFileLoc;
2090 }
2091
2092 /// Returns true if code-completion is enabled and we have hit the
2093 /// code-completion point.
2094 bool isCodeCompletionReached() const { return CodeCompletionReached; }
2095
2096 /// Note that we hit the code-completion point.
2098 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
2099 CodeCompletionReached = true;
2100 // Silence any diagnostics that occur after we hit the code-completion.
2102 }
2103
2104 /// The location of the currently-active \#pragma clang
2105 /// arc_cf_code_audited begin.
2106 ///
2107 /// Returns an invalid location if there is no such pragma active.
2109 return PragmaARCCFCodeAuditedInfo;
2110 }
2111
2112 /// Set the location of the currently-active \#pragma clang
2113 /// arc_cf_code_audited begin. An invalid location ends the pragma.
2115 SourceLocation Loc) {
2116 PragmaARCCFCodeAuditedInfo = IdentifierLoc(Loc, Ident);
2117 }
2118
2119 /// The location of the currently-active \#pragma clang
2120 /// assume_nonnull begin.
2121 ///
2122 /// Returns an invalid location if there is no such pragma active.
2124 return PragmaAssumeNonNullLoc;
2125 }
2126
2127 /// Set the location of the currently-active \#pragma clang
2128 /// assume_nonnull begin. An invalid location ends the pragma.
2130 PragmaAssumeNonNullLoc = Loc;
2131 }
2132
2133 /// Get the location of the recorded unterminated \#pragma clang
2134 /// assume_nonnull begin in the preamble, if one exists.
2135 ///
2136 /// Returns an invalid location if the premable did not end with
2137 /// such a pragma active or if there is no recorded preamble.
2139 return PreambleRecordedPragmaAssumeNonNullLoc;
2140 }
2141
2142 /// Record the location of the unterminated \#pragma clang
2143 /// assume_nonnull begin in the preamble.
2145 PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2146 }
2147
2148 /// Set the directory in which the main file should be considered
2149 /// to have been found, if it is not a real file.
2150 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2151
2152 /// Instruct the preprocessor to skip part of the main source file.
2153 ///
2154 /// \param Bytes The number of bytes in the preamble to skip.
2155 ///
2156 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2157 /// start of a line.
2158 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2159 SkipMainFilePreamble.first = Bytes;
2160 SkipMainFilePreamble.second = StartOfLine;
2161 }
2162
2163 /// Forwarding function for diagnostics. This emits a diagnostic at
2164 /// the specified Token's location, translating the token's start
2165 /// position in the current buffer into a SourcePosition object for rendering.
2166 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2167 return Diags->Report(Loc, DiagID);
2168 }
2169
2170 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2171 return Diags->Report(Tok.getLocation(), DiagID);
2172 }
2173
2174 /// Return the 'spelling' of the token at the given
2175 /// location; does not go up to the spelling location or down to the
2176 /// expansion location.
2177 ///
2178 /// \param buffer A buffer which will be used only if the token requires
2179 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
2180 /// \param invalid If non-null, will be set \c true if an error occurs.
2182 SmallVectorImpl<char> &buffer,
2183 bool *invalid = nullptr) const {
2184 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2185 }
2186
2187 /// Return the 'spelling' of the Tok token.
2188 ///
2189 /// The spelling of a token is the characters used to represent the token in
2190 /// the source file after trigraph expansion and escaped-newline folding. In
2191 /// particular, this wants to get the true, uncanonicalized, spelling of
2192 /// things like digraphs, UCNs, etc.
2193 ///
2194 /// \param Invalid If non-null, will be set \c true if an error occurs.
2195 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2196 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2197 }
2198
2199 /// Get the spelling of a token into a preallocated buffer, instead
2200 /// of as an std::string.
2201 ///
2202 /// The caller is required to allocate enough space for the token, which is
2203 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2204 /// actual result is returned.
2205 ///
2206 /// Note that this method may do two possible things: it may either fill in
2207 /// the buffer specified with characters, or it may *change the input pointer*
2208 /// to point to a constant buffer with the data already in it (avoiding a
2209 /// copy). The caller is not allowed to modify the returned buffer pointer
2210 /// if an internal buffer is returned.
2211 unsigned getSpelling(const Token &Tok, const char *&Buffer,
2212 bool *Invalid = nullptr) const {
2213 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2214 }
2215
2216 /// Get the spelling of a token into a SmallVector.
2217 ///
2218 /// Note that the returned StringRef may not point to the
2219 /// supplied buffer if a copy can be avoided.
2220 StringRef getSpelling(const Token &Tok,
2221 SmallVectorImpl<char> &Buffer,
2222 bool *Invalid = nullptr) const;
2223
2224 /// Relex the token at the specified location.
2225 /// \returns true if there was a failure, false on success.
2227 bool IgnoreWhiteSpace = false) {
2228 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2229 }
2230
2231 /// Given a Token \p Tok that is a numeric constant with length 1,
2232 /// return the value of constant as an unsigned 8-bit integer.
2233 uint8_t
2235 bool *Invalid = nullptr) const {
2236 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2237 Tok.getLength() == 1 && "Called on unsupported token");
2238 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2239
2240 // If the token is carrying a literal data pointer, just use it.
2241 if (const char *D = Tok.getLiteralData())
2242 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2243
2244 assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2245 // Otherwise, fall back on getCharacterData, which is slower, but always
2246 // works.
2247 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2248 }
2249
2250 /// Retrieve the name of the immediate macro expansion.
2251 ///
2252 /// This routine starts from a source location, and finds the name of the
2253 /// macro responsible for its immediate expansion. It looks through any
2254 /// intervening macro argument expansions to compute this. It returns a
2255 /// StringRef that refers to the SourceManager-owned buffer of the source
2256 /// where that macro name is spelled. Thus, the result shouldn't out-live
2257 /// the SourceManager.
2259 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2260 }
2261
2262 /// Plop the specified string into a scratch buffer and set the
2263 /// specified token's location and length to it.
2264 ///
2265 /// If specified, the source location provides a location of the expansion
2266 /// point of the token.
2267 void CreateString(StringRef Str, Token &Tok,
2268 SourceLocation ExpansionLocStart = SourceLocation(),
2269 SourceLocation ExpansionLocEnd = SourceLocation());
2270
2271 /// Split the first Length characters out of the token starting at TokLoc
2272 /// and return a location pointing to the split token. Re-lexing from the
2273 /// split token will return the split token rather than the original.
2274 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2275
2276 /// Computes the source location just past the end of the
2277 /// token at this source location.
2278 ///
2279 /// This routine can be used to produce a source location that
2280 /// points just past the end of the token referenced by \p Loc, and
2281 /// is generally used when a diagnostic needs to point just after a
2282 /// token where it expected something different that it received. If
2283 /// the returned source location would not be meaningful (e.g., if
2284 /// it points into a macro), this routine returns an invalid
2285 /// source location.
2286 ///
2287 /// \param Offset an offset from the end of the token, where the source
2288 /// location should refer to. The default offset (0) produces a source
2289 /// location pointing just past the end of the token; an offset of 1 produces
2290 /// a source location pointing to the last character in the token, etc.
2292 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2293 }
2294
2295 /// Returns true if the given MacroID location points at the first
2296 /// token of the macro expansion.
2297 ///
2298 /// \param MacroBegin If non-null and function returns true, it is set to
2299 /// begin location of the macro.
2301 SourceLocation *MacroBegin = nullptr) const {
2302 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2303 MacroBegin);
2304 }
2305
2306 /// Returns true if the given MacroID location points at the last
2307 /// token of the macro expansion.
2308 ///
2309 /// \param MacroEnd If non-null and function returns true, it is set to
2310 /// end location of the macro.
2312 SourceLocation *MacroEnd = nullptr) const {
2313 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2314 }
2315
2316 /// Print the token to stderr, used for debugging.
2317 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2318 void DumpLocation(SourceLocation Loc) const;
2319 void DumpMacro(const MacroInfo &MI) const;
2320 void dumpMacroInfo(const IdentifierInfo *II);
2321
2322 /// Given a location that specifies the start of a
2323 /// token, return a new location that specifies a character within the token.
2325 unsigned Char) const {
2326 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2327 }
2328
2329 /// Increment the counters for the number of token paste operations
2330 /// performed.
2331 ///
2332 /// If fast was specified, this is a 'fast paste' case we handled.
2333 void IncrementPasteCounter(bool isFast) {
2334 if (isFast)
2335 ++NumFastTokenPaste;
2336 else
2337 ++NumTokenPaste;
2338 }
2339
2340 void PrintStats();
2341
2342 size_t getTotalMemory() const;
2343
2344 /// When the macro expander pastes together a comment (/##/) in Microsoft
2345 /// mode, this method handles updating the current state, returning the
2346 /// token on the next source line.
2348
2349 //===--------------------------------------------------------------------===//
2350 // Preprocessor callback methods. These are invoked by a lexer as various
2351 // directives and events are found.
2352
2353 /// Given a tok::raw_identifier token, look up the
2354 /// identifier information for the token and install it into the token,
2355 /// updating the token kind accordingly.
2356 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2357
2358private:
2359 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2360
2361public:
2362 /// Specifies the reason for poisoning an identifier.
2363 ///
2364 /// If that identifier is accessed while poisoned, then this reason will be
2365 /// used instead of the default "poisoned" diagnostic.
2366 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2367
2368 /// Display reason for poisoned identifier.
2369 void HandlePoisonedIdentifier(Token & Identifier);
2370
2372 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2373 if(II->isPoisoned()) {
2374 HandlePoisonedIdentifier(Identifier);
2375 }
2376 }
2377 }
2378
2379 /// isNextPPTokenOneOf - Check whether the next pp-token is one of the
2380 /// specificed token kind. this method should have no observable side-effect
2381 /// on the lexed tokens.
2382 template <typename... Ts> bool isNextPPTokenOneOf(Ts... Ks) const {
2383 static_assert(sizeof...(Ts) > 0,
2384 "requires at least one tok::TokenKind specified");
2385 auto NextTokOpt = peekNextPPToken();
2386 return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false;
2387 }
2388
2389private:
2390 /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
2391 /// buffer controlled by this lexer, otherwise return the next unexpanded
2392 /// token.
2393 std::optional<Token> peekNextPPToken() const;
2394
2395 /// Identifiers used for SEH handling in Borland. These are only
2396 /// allowed in particular circumstances
2397 // __except block
2398 IdentifierInfo *Ident__exception_code,
2399 *Ident___exception_code,
2400 *Ident_GetExceptionCode;
2401 // __except filter expression
2402 IdentifierInfo *Ident__exception_info,
2403 *Ident___exception_info,
2404 *Ident_GetExceptionInfo;
2405 // __finally
2406 IdentifierInfo *Ident__abnormal_termination,
2407 *Ident___abnormal_termination,
2408 *Ident_AbnormalTermination;
2409
2410 const char *getCurLexerEndPos();
2411 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2412
2413public:
2414 void PoisonSEHIdentifiers(bool Poison = true); // Borland
2415
2416 /// Callback invoked when the lexer reads an identifier and has
2417 /// filled in the tokens IdentifierInfo member.
2418 ///
2419 /// This callback potentially macro expands it or turns it into a named
2420 /// token (like 'for').
2421 ///
2422 /// \returns true if we actually computed a token, false if we need to
2423 /// lex again.
2424 bool HandleIdentifier(Token &Identifier);
2425
2426 /// Callback invoked when the lexer hits the end of the current file.
2427 ///
2428 /// This either returns the EOF token and returns true, or
2429 /// pops a level off the include stack and returns false, at which point the
2430 /// client should call lex again.
2431 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2432
2433 /// Callback invoked when the current TokenLexer hits the end of its
2434 /// token stream.
2436
2437 /// Callback invoked when the lexer sees a # token at the start of a
2438 /// line.
2439 ///
2440 /// This consumes the directive, modifies the lexer/preprocessor state, and
2441 /// advances the lexer(s) so that the next token read is the correct one.
2443
2444 /// Ensure that the next token is a tok::eod token.
2445 ///
2446 /// If not, emit a diagnostic and consume up until the eod.
2447 /// If \p EnableMacros is true, then we consider macros that expand to zero
2448 /// tokens as being ok.
2449 ///
2450 /// If \p ExtraToks not null, the extra tokens will be saved in this
2451 /// container.
2452 ///
2453 /// \return The location of the end of the directive (the terminating
2454 /// newline).
2456 CheckEndOfDirective(StringRef DirType, bool EnableMacros = false,
2457 SmallVectorImpl<Token> *ExtraToks = nullptr);
2458
2459 /// Read and discard all tokens remaining on the current line until
2460 /// the tok::eod token is found. Returns the range of the skipped tokens.
2463 Token Tmp;
2464 return DiscardUntilEndOfDirective(Tmp, DiscardedToks);
2465 }
2466
2467 /// Same as above except retains the token that was found.
2470 SmallVectorImpl<Token> *DiscardedToks = nullptr);
2471
2472 /// Returns true if the preprocessor has seen a use of
2473 /// __DATE__ or __TIME__ in the file so far.
2474 bool SawDateOrTime() const {
2475 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2476 }
2477 uint32_t getCounterValue() const { return CounterValue; }
2478 void setCounterValue(uint32_t V) { CounterValue = V; }
2479
2481 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2482 "FPEvalMethod should be set either from command line or from the "
2483 "target info");
2484 return CurrentFPEvalMethod;
2485 }
2486
2488 return TUFPEvalMethod;
2489 }
2490
2492 return LastFPEvalPragmaLocation;
2493 }
2494
2498 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2499 // This is the location of the '#pragma float_control" where the
2500 // execution state is modifed.
2501 LastFPEvalPragmaLocation = PragmaLoc;
2502 CurrentFPEvalMethod = Val;
2503 TUFPEvalMethod = Val;
2504 }
2505
2508 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2509 TUFPEvalMethod = Val;
2510 }
2511
2512 /// Retrieves the module that we're currently building, if any.
2514
2515 /// Retrieves the module whose implementation we're current compiling, if any.
2517
2518 /// If we are preprocessing a named module.
2519 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2520
2521 /// If we are proprocessing a named interface unit.
2522 /// Note that a module implementation partition is not considered as an
2523 /// named interface unit here although it is importable
2524 /// to ease the parsing.
2526 return ModuleDeclState.isNamedInterface();
2527 }
2528
2529 /// Get the named module name we're preprocessing.
2530 /// Requires we're preprocessing a named module.
2531 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2532
2533 /// If we are implementing an implementation module unit.
2534 /// Note that the module implementation partition is not considered as an
2535 /// implementation unit.
2537 return ModuleDeclState.isImplementationUnit();
2538 }
2539
2540 /// If we're importing a standard C++20 Named Modules.
2542 assert(getLangOpts().CPlusPlusModules &&
2543 "Import C++ named modules are only valid for C++20 modules");
2544 return ImportingCXXNamedModules;
2545 }
2546
2547 /// Allocate a new MacroInfo object with the provided SourceLocation.
2549
2550 /// Turn the specified lexer token into a fully checked and spelled
2551 /// filename, e.g. as an operand of \#include.
2552 ///
2553 /// The caller is expected to provide a buffer that is large enough to hold
2554 /// the spelling of the filename, but is also expected to handle the case
2555 /// when this method decides to use a different buffer.
2556 ///
2557 /// \returns true if the input filename was in <>'s or false if it was
2558 /// in ""'s.
2559 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2560
2561 /// Given a "foo" or <foo> reference, look up the indicated file.
2562 ///
2563 /// Returns std::nullopt on failure. \p isAngled indicates whether the file
2564 /// reference is for system \#include's or not (i.e. using <> instead of "").
2566 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2567 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2568 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2569 SmallVectorImpl<char> *RelativePath,
2570 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2571 bool *IsFrameworkFound, bool SkipCache = false,
2572 bool OpenFile = true, bool CacheFailures = true);
2573
2574 /// Given a "Filename" or <Filename> reference, look up the indicated embed
2575 /// resource. \p isAngled indicates whether the file reference is for
2576 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2577 /// is true, the file looked up is opened for reading, otherwise it only
2578 /// validates that the file exists.
2579 ///
2580 /// Returns std::nullopt on failure.
2581 OptionalFileEntryRef LookupEmbedFile(StringRef Filename, bool isAngled,
2582 bool OpenFile);
2583
2584 /// Return true if we're in the top-level file, not in a \#include.
2585 bool isInPrimaryFile() const;
2586
2587 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2588 /// followed by EOD. Return true if the token is not a valid on-off-switch.
2590
2591 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2592 bool *ShadowFlag = nullptr);
2593
2594 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2595 Module *LeaveSubmodule(bool ForPragma);
2596
2597private:
2598 friend void TokenLexer::ExpandFunctionArguments();
2599
2600 void PushIncludeMacroStack() {
2601 assert(CurLexerCallback != CLK_CachingLexer &&
2602 "cannot push a caching lexer");
2603 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2604 std::move(CurLexer), CurPPLexer,
2605 std::move(CurTokenLexer), CurDirLookup);
2606 CurPPLexer = nullptr;
2607 }
2608
2609 void PopIncludeMacroStack() {
2610 if (CurLexer)
2611 PendingDestroyLexers.push_back(std::move(CurLexer));
2612 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2613 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2614 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2615 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
2616 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2617 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2618 IncludeMacroStack.pop_back();
2619 }
2620
2621 void PropagateLineStartLeadingSpaceInfo(Token &Result);
2622
2623 /// Determine whether we need to create module macros for #defines in the
2624 /// current context.
2625 bool needModuleMacros() const;
2626
2627 /// Update the set of active module macros and ambiguity flag for a module
2628 /// macro name.
2629 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2630
2631 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2632 SourceLocation Loc);
2633 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2634 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2635 bool isPublic);
2636
2637 /// Lex and validate a macro name, which occurs after a
2638 /// \#define or \#undef.
2639 ///
2640 /// \param MacroNameTok Token that represents the name defined or undefined.
2641 /// \param IsDefineUndef Kind if preprocessor directive.
2642 /// \param ShadowFlag Points to flag that is set if macro name shadows
2643 /// a keyword.
2644 ///
2645 /// This emits a diagnostic, sets the token kind to eod,
2646 /// and discards the rest of the macro line if the macro name is invalid.
2647 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2648 bool *ShadowFlag = nullptr);
2649
2650 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2651 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2652 /// doing so performs certain validity checks including (but not limited to):
2653 /// - # (stringization) is followed by a macro parameter
2654 /// \param MacroNameTok - Token that represents the macro name
2655 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2656 ///
2657 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2658 /// returns a nullptr if an invalid sequence of tokens is encountered.
2659 MacroInfo *ReadOptionalMacroParameterListAndBody(
2660 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2661
2662 /// The ( starting an argument list of a macro definition has just been read.
2663 /// Lex the rest of the parameters and the closing ), updating \p MI with
2664 /// what we learn and saving in \p LastTok the last token read.
2665 /// Return true if an error occurs parsing the arg list.
2666 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2667
2668 /// Provide a suggestion for a typoed directive. If there is no typo, then
2669 /// just skip suggesting.
2670 ///
2671 /// \param Tok - Token that represents the directive
2672 /// \param Directive - String reference for the directive name
2673 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2674
2675 /// We just read a \#if or related directive and decided that the
2676 /// subsequent tokens are in the \#if'd out portion of the
2677 /// file. Lex the rest of the file, until we see an \#endif. If \p
2678 /// FoundNonSkipPortion is true, then we have already emitted code for part of
2679 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2680 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2681 /// already seen one so a \#else directive is a duplicate. When this returns,
2682 /// the caller can lex the first valid token.
2683 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2684 SourceLocation IfTokenLoc,
2685 bool FoundNonSkipPortion, bool FoundElse,
2686 SourceLocation ElseLoc = SourceLocation());
2687
2688 /// Information about the result for evaluating an expression for a
2689 /// preprocessor directive.
2690 struct DirectiveEvalResult {
2691 /// The integral value of the expression.
2692 std::optional<llvm::APSInt> Value;
2693
2694 /// Whether the expression was evaluated as true or not.
2695 bool Conditional;
2696
2697 /// True if the expression contained identifiers that were undefined.
2698 bool IncludedUndefinedIds;
2699
2700 /// The source range for the expression.
2701 SourceRange ExprRange;
2702 };
2703
2704 /// Evaluate an integer constant expression that may occur after a
2705 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2706 ///
2707 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2708 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2709 bool CheckForEoD = true);
2710
2711 /// Evaluate an integer constant expression that may occur after a
2712 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2713 ///
2714 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2715 /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2716 /// in the evaluated expression or not.
2717 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2718 Token &Tok,
2719 bool &EvaluatedDefined,
2720 bool CheckForEoD = true);
2721
2722 /// Process a '__has_embed("path" [, ...])' expression.
2723 ///
2724 /// Returns predefined `__STDC_EMBED_*` macro values if
2725 /// successful.
2726 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2727
2728 /// Process a '__has_include("path")' expression.
2729 ///
2730 /// Returns true if successful.
2731 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2732
2733 /// Process '__has_include_next("path")' expression.
2734 ///
2735 /// Returns true if successful.
2736 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2737
2738 /// Get the directory and file from which to start \#include_next lookup.
2739 std::pair<ConstSearchDirIterator, const FileEntry *>
2740 getIncludeNextStart(const Token &IncludeNextTok) const;
2741
2742 /// Install the standard preprocessor pragmas:
2743 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2744 void RegisterBuiltinPragmas();
2745
2746 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
2747 /// table and mark it as a builtin macro to be expanded.
2748 IdentifierInfo *RegisterBuiltinMacro(const char *Name) {
2749 // Get the identifier.
2750 IdentifierInfo *Id = getIdentifierInfo(Name);
2751
2752 // Mark it as being a macro that is builtin.
2753 MacroInfo *MI = AllocateMacroInfo(SourceLocation());
2754 MI->setIsBuiltinMacro();
2756 return Id;
2757 }
2758
2759 /// Register builtin macros such as __LINE__ with the identifier table.
2760 void RegisterBuiltinMacros();
2761
2762 /// If an identifier token is read that is to be expanded as a macro, handle
2763 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2764 /// otherwise the caller should lex again.
2765 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2766
2767 /// Cache macro expanded tokens for TokenLexers.
2768 //
2769 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2770 /// going to lex in the cache and when it finishes the tokens are removed
2771 /// from the end of the cache.
2772 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2773 ArrayRef<Token> tokens);
2774
2775 void removeCachedMacroExpandedTokensOfLastLexer();
2776
2777 /// After reading "MACRO(", this method is invoked to read all of the formal
2778 /// arguments specified for the macro invocation. Returns null on error.
2779 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2780 SourceLocation &MacroEnd);
2781
2782 /// If an identifier token is read that is to be expanded
2783 /// as a builtin macro, handle it and return the next token as 'Tok'.
2784 void ExpandBuiltinMacro(Token &Tok);
2785
2786 /// Read a \c _Pragma directive, slice it up, process it, then
2787 /// return the first token after the directive.
2788 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2789 void Handle_Pragma(Token &Tok);
2790
2791 /// Like Handle_Pragma except the pragma text is not enclosed within
2792 /// a string literal.
2793 void HandleMicrosoft__pragma(Token &Tok);
2794
2795 /// Add a lexer to the top of the include stack and
2796 /// start lexing tokens from it instead of the current buffer.
2797 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2798
2799 /// Set the FileID for the preprocessor predefines.
2800 void setPredefinesFileID(FileID FID) {
2801 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2802 PredefinesFileID = FID;
2803 }
2804
2805 /// Set the FileID for the PCH through header.
2806 void setPCHThroughHeaderFileID(FileID FID);
2807
2808 /// Returns true if we are lexing from a file and not a
2809 /// pragma or a macro.
2810 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2811 return L ? !L->isPragmaLexer() : P != nullptr;
2812 }
2813
2814 static bool IsFileLexer(const IncludeStackInfo& I) {
2815 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2816 }
2817
2818 bool IsFileLexer() const {
2819 return IsFileLexer(CurLexer.get(), CurPPLexer);
2820 }
2821
2822 //===--------------------------------------------------------------------===//
2823 // Standard Library Identification
2824 std::optional<CXXStandardLibraryVersionInfo> CXXStandardLibraryVersion;
2825
2826public:
2827 std::optional<std::uint64_t> getStdLibCxxVersion();
2828 bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion);
2829
2830private:
2831 //===--------------------------------------------------------------------===//
2832 // Caching stuff.
2833 void CachingLex(Token &Result);
2834
2835 bool InCachingLexMode() const {
2836 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2837 // that we are past EOF, not that we are in CachingLex mode.
2838 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2839 }
2840
2841 void EnterCachingLexMode();
2842 void EnterCachingLexModeUnchecked();
2843
2844 void ExitCachingLexMode() {
2845 if (InCachingLexMode())
2847 }
2848
2849 const Token &PeekAhead(unsigned N);
2850 void AnnotatePreviousCachedTokens(const Token &Tok);
2851
2852 //===--------------------------------------------------------------------===//
2853 /// Handle*Directive - implement the various preprocessor directives. These
2854 /// should side-effect the current preprocessor object so that the next call
2855 /// to Lex() will return the appropriate token next.
2856 void HandleLineDirective();
2857 void HandleDigitDirective(Token &Tok);
2858 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2859 void HandleIdentSCCSDirective(Token &Tok);
2860 void HandleMacroPublicDirective(Token &Tok);
2861 void HandleMacroPrivateDirective();
2862
2863 /// An additional notification that can be produced by a header inclusion or
2864 /// import to tell the parser what happened.
2865 struct ImportAction {
2866 enum ActionKind {
2867 None,
2868 ModuleBegin,
2869 ModuleImport,
2870 HeaderUnitImport,
2871 SkippedModuleImport,
2872 Failure,
2873 } Kind;
2874 Module *ModuleForHeader = nullptr;
2875
2876 ImportAction(ActionKind AK, Module *Mod = nullptr)
2877 : Kind(AK), ModuleForHeader(Mod) {
2878 assert((AK == None || Mod || AK == Failure) &&
2879 "no module for module action");
2880 }
2881 };
2882
2883 OptionalFileEntryRef LookupHeaderIncludeOrImport(
2884 ConstSearchDirIterator *CurDir, StringRef &Filename,
2885 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2886 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2887 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2888 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2889 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2890 ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2891 // Binary data inclusion
2892 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok);
2893 void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2894 const LexEmbedParametersResult &Params,
2895 StringRef BinaryContents, StringRef FileName);
2896
2897 // File inclusion.
2898 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2899 ConstSearchDirIterator LookupFrom = nullptr,
2900 const FileEntry *LookupFromFile = nullptr);
2901 ImportAction
2902 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2903 Token &FilenameTok, SourceLocation EndLoc,
2904 ConstSearchDirIterator LookupFrom = nullptr,
2905 const FileEntry *LookupFromFile = nullptr);
2906 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2907 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2908 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2909 void HandleMicrosoftImportDirective(Token &Tok);
2910 void HandleObjCImportDirective(Token &AtTok, Token &ImportTok);
2911
2912public:
2913 /// Check that the given module is available, producing a diagnostic if not.
2914 /// \return \c true if the check failed (because the module is not available).
2915 /// \c false if the module appears to be usable.
2916 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2917 const TargetInfo &TargetInfo,
2918 const Module &M, DiagnosticsEngine &Diags);
2919
2920 // Module inclusion testing.
2921 /// Find the module that owns the source or header file that
2922 /// \p Loc points to. If the location is in a file that was included
2923 /// into a module, or is outside any module, returns nullptr.
2924 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2925
2926 /// We want to produce a diagnostic at location IncLoc concerning an
2927 /// unreachable effect at location MLoc (eg, where a desired entity was
2928 /// declared or defined). Determine whether the right way to make MLoc
2929 /// reachable is by #include, and if so, what header should be included.
2930 ///
2931 /// This is not necessarily fast, and might load unexpected module maps, so
2932 /// should only be called by code that intends to produce an error.
2933 ///
2934 /// \param IncLoc The location at which the missing effect was detected.
2935 /// \param MLoc A location within an unimported module at which the desired
2936 /// effect occurred.
2937 /// \return A file that can be #included to provide the desired effect. Null
2938 /// if no such file could be determined or if a #include is not
2939 /// appropriate (eg, if a module should be imported instead).
2941 SourceLocation MLoc);
2942
2943 bool isRecordingPreamble() const {
2944 return PreambleConditionalStack.isRecording();
2945 }
2946
2947 bool hasRecordedPreamble() const {
2948 return PreambleConditionalStack.hasRecordedPreamble();
2949 }
2950
2952 return PreambleConditionalStack.getStack();
2953 }
2954
2956 PreambleConditionalStack.setStack(s);
2957 }
2958
2960 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2961 PreambleConditionalStack.startReplaying();
2962 PreambleConditionalStack.setStack(s);
2963 PreambleConditionalStack.SkipInfo = SkipInfo;
2964 }
2965
2966 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2967 return PreambleConditionalStack.SkipInfo;
2968 }
2969
2970private:
2971 /// After processing predefined file, initialize the conditional stack from
2972 /// the preamble.
2973 void replayPreambleConditionalStack();
2974
2975 // Macro handling.
2976 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2977 void HandleUndefDirective();
2978
2979 // Conditional Inclusion.
2980 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2981 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2982 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2983 bool ReadAnyTokensBeforeDirective);
2984 void HandleEndifDirective(Token &EndifToken);
2985 void HandleElseDirective(Token &Result, const Token &HashToken);
2986 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2987 tok::PPKeywordKind Kind);
2988
2989 // Pragmas.
2990 void HandlePragmaDirective(PragmaIntroducer Introducer);
2991
2992public:
2993 void HandlePragmaOnce(Token &OnceTok);
2994 void HandlePragmaMark(Token &MarkTok);
2995 void HandlePragmaPoison();
2996 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2997 void HandlePragmaDependency(Token &DependencyTok);
3004
3005 // Return true and store the first token only if any CommentHandler
3006 // has inserted some tokens and getCommentRetentionState() is false.
3007 bool HandleComment(Token &result, SourceRange Comment);
3008
3009 /// A macro is used, update information about macros that need unused
3010 /// warnings.
3011 void markMacroAsUsed(MacroInfo *MI);
3012
3013 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
3014 SourceLocation AnnotationLoc) {
3015 AnnotationInfos[II].DeprecationInfo =
3016 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
3017 }
3018
3019 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
3020 SourceLocation AnnotationLoc) {
3021 AnnotationInfos[II].RestrictExpansionInfo =
3022 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
3023 }
3024
3025 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
3026 AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
3027 }
3028
3029 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
3030 return AnnotationInfos.find(II)->second;
3031 }
3032
3033 void emitMacroExpansionWarnings(const Token &Identifier,
3034 bool IsIfnDef = false) const {
3035 IdentifierInfo *Info = Identifier.getIdentifierInfo();
3036 if (Info->isDeprecatedMacro())
3037 emitMacroDeprecationWarning(Identifier);
3038
3039 if (Info->isRestrictExpansion() &&
3040 !SourceMgr.isInMainFile(Identifier.getLocation()))
3041 emitRestrictExpansionWarning(Identifier);
3042
3043 if (!IsIfnDef) {
3044 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
3045 emitRestrictInfNaNWarning(Identifier, 0);
3046 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
3047 emitRestrictInfNaNWarning(Identifier, 1);
3048 }
3049 }
3050
3052 const LangOptions &LangOpts,
3053 const TargetInfo &TI);
3054
3056 const PresumedLoc &PLoc,
3057 const LangOptions &LangOpts,
3058 const TargetInfo &TI);
3059
3060private:
3061 void emitMacroDeprecationWarning(const Token &Identifier) const;
3062 void emitRestrictExpansionWarning(const Token &Identifier) const;
3063 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
3064 void emitRestrictInfNaNWarning(const Token &Identifier,
3065 unsigned DiagSelection) const;
3066
3067 /// This boolean state keeps track if the current scanned token (by this PP)
3068 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
3069 /// translation unit in a linear order.
3070 bool InSafeBufferOptOutRegion = false;
3071
3072 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
3073 /// region if PP is currently in such a region. Hold undefined value
3074 /// otherwise.
3075 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
3076
3077 using SafeBufferOptOutRegionsTy =
3079 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
3080 // translation unit. Each region is represented by a pair of start and
3081 // end locations.
3082 SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
3083
3084 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
3085 // following structure to manage them by their ASTs.
3086 struct {
3087 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a
3088 // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
3089 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
3090
3091 // Returns a reference to the safe buffer opt-out regions of the loaded
3092 // AST where `Loc` belongs to. (Construct if absent)
3093 SafeBufferOptOutRegionsTy &
3094 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
3095 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
3096 }
3097
3098 // Returns a reference to the safe buffer opt-out regions of the loaded
3099 // AST where `Loc` belongs to. (This const function returns nullptr if
3100 // absent.)
3101 const SafeBufferOptOutRegionsTy *
3102 lookupLoadedOptOutMap(SourceLocation Loc,
3103 const SourceManager &SrcMgr) const {
3104 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
3105 auto Iter = LoadedRegions.find(FID);
3106
3107 if (Iter == LoadedRegions.end())
3108 return nullptr;
3109 return &Iter->getSecond();
3110 }
3111 } LoadedSafeBufferOptOutMap;
3112
3113public:
3114 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
3115 /// region. This `Loc` must be a source location that has been pre-processed.
3116 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
3117
3118 /// Alter the state of whether this PP currently is in a
3119 /// "-Wunsafe-buffer-usage" opt-out region.
3120 ///
3121 /// \param isEnter true if this PP is entering a region; otherwise, this PP
3122 /// is exiting a region
3123 /// \param Loc the location of the entry or exit of a
3124 /// region
3125 /// \return true iff it is INVALID to enter or exit a region, i.e.,
3126 /// attempt to enter a region before exiting a previous region, or exiting a
3127 /// region that PP is not currently in.
3128 bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
3129 const SourceLocation &Loc);
3130
3131 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3132 /// opt-out region
3134
3135 /// \param StartLoc output argument. It will be set to the start location of
3136 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3137 /// returns true.
3138 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3139 /// opt-out region
3140 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3141
3142 /// \return a sequence of SourceLocations representing ordered opt-out regions
3143 /// specified by
3144 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3145 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3146
3147 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3148 /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3149 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3150 /// is same as itself before the call.
3152 const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3153
3154 /// Whether we've seen pp-directives which may have changed the preprocessing
3155 /// state.
3156 bool hasSeenNoTrivialPPDirective() const;
3157
3158private:
3159 /// Helper functions to forward lexing to the actual lexer. They all share the
3160 /// same signature.
3161 static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3162 return P.CurLexer->Lex(Result);
3163 }
3164 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3165 return P.CurTokenLexer->Lex(Result);
3166 }
3167 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3168 P.CachingLex(Result);
3169 return true;
3170 }
3171 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3172 return P.CurLexer->LexDependencyDirectiveToken(Result);
3173 }
3174};
3175
3176/// Abstract base class that describes a handler that will receive
3177/// source ranges for each of the comments encountered in the source file.
3179public:
3181
3182 // The handler shall return true if it has pushed any tokens
3183 // to be read using e.g. EnterToken or EnterTokenStream.
3184 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3185};
3186
3187/// Abstract base class that describes a handler that will receive
3188/// source ranges for empty lines encountered in the source file.
3190public:
3192
3193 // The handler handles empty lines.
3194 virtual void HandleEmptyline(SourceRange Range) = 0;
3195};
3196
3197/// Helper class to shuttle information about #embed directives from the
3198/// preprocessor to the parser through an annotation token.
3200 StringRef BinaryData;
3201 StringRef FileName;
3202};
3203
3204/// Registry of pragma handlers added by plugins
3205using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3206
3207} // namespace clang
3208
3209namespace llvm {
3210extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>;
3211} // namespace llvm
3212
3213#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
#define V(N, I)
Defines the Diagnostic-related interfaces.
Defines the Diagnostic IDs-related interfaces.
Token Tok
The Token.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Defines the PPCallbacks interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
VerifyDiagnosticConsumer::Directive Directive
__device__ __2f16 float __ockl_bool s
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
Definition Builtins.h:235
Callback handler that receives notifications when performing code completion within the preprocessor.
Abstract base class that describes a handler that will receive source ranges for each of the comments...
virtual bool HandleComment(Preprocessor &PP, SourceRange Comment)=0
A directive for a defined macro or a macro imported from a module.
Definition MacroInfo.h:432
Functor that returns the dependency directives for a given file.
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:233
void setSuppressAllDiagnostics(bool Val)
Suppress all diagnostics, to silence the front end when we know that we don't want any more diagnosti...
Definition Diagnostic.h:736
A reference to a DirectoryEntry that includes the name of the directory as it was accessed by the Fil...
Cached information about one directory (either on disk or in the virtual file system).
Abstract base class that describes a handler that will receive source ranges for empty lines encounte...
virtual void HandleEmptyline(SourceRange Range)=0
Abstract interface for external sources of preprocessor information.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition FileEntry.h:302
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements support for file system lookup, file system caching, and directory search management.
Definition FileManager.h:53
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
One of these records is kept for each identifier that is lexed.
bool hadMacroDefinition() const
Returns true if this identifier was #defined to some value at any moment.
bool hasMacroDefinition() const
Return true if this identifier is #defined to some other value.
bool isDeprecatedMacro() const
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
StringRef getName() const
Return the actual identifier string.
bool isRestrictExpansion() const
A simple pair of identifier info and location.
Implements an efficient mapping from strings to IdentifierInfo nodes.
FPEvalMethodKind
Possible float expression evaluation method choices.
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
Definition Lexer.cpp:1088
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
Definition Lexer.cpp:889
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition Lexer.h:399
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
Definition Lexer.cpp:911
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition Lexer.cpp:462
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition Lexer.cpp:520
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Definition Lexer.cpp:859
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:590
const DefMacroDirective * getDirective() const
Definition MacroInfo.h:375
Encapsulates changes to the "macros namespace" (the location where the macro name became active,...
Definition MacroInfo.h:313
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:39
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition MacroInfo.h:125
Abstract interface for a module loader.
static std::string getFlatNameFromPath(ModuleIdPath Path)
Represents a macro directive exported by a module.
Definition MacroInfo.h:514
A header that is known to reside within a given module, whether it was included or excluded.
Definition ModuleMap.h:158
unsigned getNumIdentifierLocs() const
std::string str() const
SourceLocation getBeginLoc() const
SourceLocation getEndLoc() const
SourceRange getRange() const
ModuleIdPath getModuleIdPath() const
Describes a module or submodule.
Definition Module.h:246
bool isModuleMapModule() const
Definition Module.h:356
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition Pragma.h:65
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getLastFPEvalPragmaLocation() const
bool isMacroDefined(const IdentifierInfo *II)
MacroDirective * getLocalMacroDirective(const IdentifierInfo *II) const
Given an identifier, return its latest non-imported MacroDirective if it is #define'd and not #undef'...
bool markIncluded(FileEntryRef File)
Mark the file as included.
void HandlePragmaPushMacro(Token &Tok)
Handle #pragma push_macro.
Definition Pragma.cpp:634
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
void HandlePragmaPoison()
HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'.
Definition Pragma.cpp:439
void setCodeCompletionHandler(CodeCompletionHandler &Handler)
Set the code completion handler to the given object.
void dumpMacroInfo(const IdentifierInfo *II)
void HandlePragmaSystemHeader(Token &SysHeaderTok)
HandlePragmaSystemHeader - Implement #pragma GCC system_header.
Definition Pragma.cpp:481
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void MaybeHandlePoisonedIdentifier(Token &Identifier)
ModuleMacro * addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, ArrayRef< ModuleMacro * > Overrides, bool &IsNew)
Register an exported macro for a module and identifier.
void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, MacroDirective *MD)
Set a MacroDirective that was loaded from a PCH file.
MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, SourceLocation Loc)
void EnterModuleSuffixTokenStream(ArrayRef< Token > Toks)
void markClangModuleAsAffecting(Module *M)
Mark the given clang module as affecting the current clang module or translation unit.
void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, SourceLocation Loc)
Set the location of the currently-active #pragma clang arc_cf_code_audited begin.
void HandlePragmaModuleBuild(Token &Tok)
Definition Pragma.cpp:811
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
ArrayRef< ModuleMacro * > getLeafModuleMacros(const IdentifierInfo *II) const
Get the list of leaf (non-overridden) module macros for a name.
bool isIncrementalProcessingEnabled() const
Returns true if incremental processing is enabled.
void EnterToken(const Token &Tok, bool IsReinject)
Enters a token in the token stream to be lexed next.
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition Pragma.cpp:2219
void HandleCXXImportDirective(Token Import)
HandleCXXImportDirective - Handle the C++ modules import directives.
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI)
PPCallbacks * getPPCallbacks() const
bool isInNamedInterfaceUnit() const
If we are proprocessing a named interface unit.
ArrayRef< PPConditionalInfo > getPreambleConditionalStack() const
void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)
Record the location of the unterminated #pragma clang assume_nonnull begin in the preamble.
SourceRange DiscardUntilEndOfDirective(SmallVectorImpl< Token > *DiscardedToks=nullptr)
Read and discard all tokens remaining on the current line until the tok::eod token is found.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
ArrayRef< BuildingSubmoduleInfo > getBuildingSubmodules() const
Get the list of submodules that we're currently building.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const
SourceRange getCodeCompletionTokenRange() const
SourceLocation getModuleImportLoc(Module *M) const
void overrideMaxTokens(unsigned Value, SourceLocation Loc)
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void HandleSkippedDirectiveWhileUsingPCH(Token &Result, SourceLocation HashLoc)
Process directives while skipping until the through header or pragma hdrstop is found.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
void enableIncrementalProcessing(bool value=true)
Enables the incremental processing.
void TypoCorrectToken(const Token &Tok)
Update the current token to represent the provided identifier, in order to cache an action performed ...
bool GetSuppressIncludeNotFoundError()
bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M)
Determine whether II is defined as a macro within the module M, if that is a module that we've alread...
void setPragmaAssumeNonNullLoc(SourceLocation Loc)
Set the location of the currently-active #pragma clang assume_nonnull begin.
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void markMacroAsUsed(MacroInfo *MI)
A macro is used, update information about macros that need unused warnings.
LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const
void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma)
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, SourceLocation AnnotationLoc)
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, SourceLocation AnnotationLoc)
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
MacroDirective * getLocalMacroDirectiveHistory(const IdentifierInfo *II) const
Given an identifier, return the latest non-imported macro directive for that identifier.
void setPreprocessedOutput(bool IsPreprocessedOutput)
Sets whether the preprocessor is responsible for producing output or if it is producing tokens to be ...
void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc)
bool IsPreviousCachedToken(const Token &Tok) const
Whether Tok is the most recent token (CachedLexPos - 1) in CachedTokens.
bool SawDateOrTime() const
Returns true if the preprocessor has seen a use of DATE or TIME in the file so far.
const TargetInfo * getAuxTargetInfo() const
void CommitBacktrackedTokens()
Disable the last EnableBacktrackAtThisPos call.
Definition PPCaching.cpp:56
friend class MacroArgs
void DumpMacro(const MacroInfo &MI) const
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
void setDiagnostics(DiagnosticsEngine &D)
IncludedFilesSet & getIncludedFiles()
Get the set of included files.
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void AnnotateCachedTokens(const Token &Tok)
We notify the Preprocessor that if it is caching tokens (because backtrack is enabled) it should repl...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
StringRef getNamedModuleName() const
Get the named module name we're preprocessing.
bool mightHavePendingAnnotationTokens()
Determine whether it's possible for a future call to Lex to produce an annotation token created by a ...
void Lex(Token &Result)
Lex the next token for this preprocessor.
void EnterTokenStream(ArrayRef< Token > Toks, bool DisableMacroExpansion, bool IsReinject)
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
bool isParsingIfOrElifDirective() const
True if we are currently preprocessing a if or elif directive.
unsigned getNumDirectives() const
Retrieve the number of Directives that have been processed by the Preprocessor.
bool isInImplementationUnit() const
If we are implementing an implementation module unit.
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
ModuleLoader & getModuleLoader() const
Retrieve the module loader associated with this preprocessor.
void LexNonComment(Token &Result)
Lex a token.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
PreprocessorLexer * getCurrentLexer() const
Return the current lexer being lexed from.
bool LexOnOffSwitch(tok::OnOffSwitch &Result)
Lex an on-off-switch (C99 6.10.6p2) and verify that it is followed by EOD.
Definition Pragma.cpp:972
StringRef getCodeCompletionFilter()
Get the code completion token for filtering purposes.
void setMainFileDir(DirectoryEntryRef Dir)
Set the directory in which the main file should be considered to have been found, if it is not a real...
const IdentifierTable & getIdentifierTable() const
void HandlePragmaDependency(Token &DependencyTok)
HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah.
Definition Pragma.cpp:513
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
friend class ASTReader
void Backtrack()
Make Preprocessor re-lex the tokens that were lexed since EnableBacktrackAtThisPos() was previously c...
Definition PPCaching.cpp:66
bool isCurrentLexer(const PreprocessorLexer *L) const
Return true if we are lexing directly from the specified lexer.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
IdentifierLoc getPragmaARCCFCodeAuditedInfo() const
The location of the currently-active #pragma clang arc_cf_code_audited begin.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
void setReplayablePreambleConditionalStack(ArrayRef< PPConditionalInfo > s, std::optional< PreambleSkipInfo > SkipInfo)
const Token & LookAhead(unsigned N)
Peeks ahead N tokens and returns that token without consuming any tokens.
friend class VAOptDefinitionContext
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
uint8_t getSpellingOfSingleCharacterNumericConstant(const Token &Tok, bool *Invalid=nullptr) const
Given a Token Tok that is a numeric constant with length 1, return the value of constant as an unsign...
macro_iterator macro_end(bool IncludeExternalMacros=true) const
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, bool *ShadowFlag=nullptr)
std::optional< PreambleSkipInfo > getPreambleSkipInfo() const
void setPreprocessToken(bool Preprocess)
bool isPreprocessedModuleFile() const
Whether the main file is preprocessed module file.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
void HandlePragmaOnce(Token &OnceTok)
HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'.
Definition Pragma.cpp:414
SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros=false, SmallVectorImpl< Token > *ExtraToks=nullptr)
Ensure that the next token is a tok::eod token.
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool isMacroDefined(StringRef Id)
static bool checkModuleIsAvailable(const LangOptions &LangOpts, const TargetInfo &TargetInfo, const Module &M, DiagnosticsEngine &Diags)
Check that the given module is available, producing a diagnostic if not.
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
void SetMacroExpansionOnlyInDirectives()
Disables macro expansion everywhere except for preprocessor directives.
bool hasRecordedPreamble() const
SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Char) const
Given a location that specifies the start of a token, return a new location that specifies a characte...
SourceLocation getPragmaAssumeNonNullLoc() const
The location of the currently-active #pragma clang assume_nonnull begin.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
bool isUnannotatedBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of unannotated tokens is on.
void EnterTokenStream(std::unique_ptr< Token[]> Toks, unsigned NumToks, bool DisableMacroExpansion, bool IsReinject)
void RevertCachedTokens(unsigned N)
When backtracking is enabled and tokens are cached, this allows to revert a specific number of tokens...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
std::optional< std::uint64_t > getStdLibCxxVersion()
void RemovePragmaHandler(PragmaHandler *Handler)
unsigned getTokenCount() const
Get the number of tokens processed so far.
OptionalFileEntryRef LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile)
Given a "Filename" or <Filename> reference, look up the indicated embed resource.
unsigned getMaxTokens() const
Get the max number of tokens before issuing a -Wmax-tokens warning.
SourceLocation getMaxTokensOverrideLoc() const
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
bool hadModuleLoaderFatalFailure() const
static void processPathToFileName(SmallVectorImpl< char > &FileName, const PresumedLoc &PLoc, const LangOptions &LangOpts, const TargetInfo &TI)
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
bool HandleModuleContextualKeyword(Token &Result)
Callback invoked when the lexer sees one of export, import or module token at the start of a line.
const TargetInfo & getTargetInfo() const
FileManager & getFileManager() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
std::string getSpelling(const Token &Tok, bool *Invalid=nullptr) const
Return the 'spelling' of the Tok token.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
friend class VariadicMacroScopeGuard
Module * getCurrentLexerSubmodule() const
Return the submodule owning the file being lexed.
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
MacroInfo * AllocateMacroInfo(SourceLocation L)
Allocate a new MacroInfo object with the provided SourceLocation.
void setDependencyDirectivesGetter(DependencyDirectivesGetter &Get)
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
StringRef getImmediateMacroName(SourceLocation Loc)
Retrieve the name of the immediate macro expansion.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a pragma hdrstop.
bool alreadyIncluded(FileEntryRef File) const
Return true if this header has already been included.
llvm::iterator_range< macro_iterator > macros(bool IncludeExternalMacros=true) const
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
void LexUnexpandedNonComment(Token &Result)
Like LexNonComment, but this disables macro expansion of identifier tokens.
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition Pragma.cpp:919
llvm::BumpPtrAllocator & getPreprocessorAllocator()
ModuleMacro * getModuleMacro(Module *Mod, const IdentifierInfo *II)
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
bool GetIncludeFilenameSpelling(SourceLocation Loc, StringRef &Buffer)
Turn the specified lexer token into a fully checked and spelled filename, e.g.
PreprocessorLexer * getCurrentFileLexer() const
Return the current file lexer being lexed from.
HeaderSearch & getHeaderSearchInfo() const
void emitMacroExpansionWarnings(const Token &Identifier, bool IsIfnDef=false) const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
void HandlePragmaPopMacro(Token &Tok)
Handle #pragma pop_macro.
Definition Pragma.cpp:657
void ReplaceLastTokenWithAnnotation(const Token &Tok)
Replace the last token with an annotation token.
ExternalPreprocessorSource * getExternalSource() const
bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion)
Module * LeaveSubmodule(bool ForPragma)
const std::string & getPredefines() const
Get the predefines for this processor.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
CodeCompletionHandler * getCodeCompletionHandler() const
Retrieve the current code-completion handler.
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, void *AnnotationVal)
Enter an annotation token into the token stream.
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream.
MacroInfo * getMacroInfo(const IdentifierInfo *II)
void setPredefines(std::string P)
Set the predefines for this Preprocessor.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, SmallVectorImpl< Token > &Suffix, SmallVectorImpl< IdentifierLoc > &Path, bool AllowMacroExpansion, bool IsPartition)
Builtin::Context & getBuiltinInfo()
void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine)
Instruct the preprocessor to skip part of the main source file.
const PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
void ReplacePreviousCachedToken(ArrayRef< Token > NewToks)
Replace token in CachedLexPos - 1 in CachedTokens by the tokens in NewToks.
LangOptions::FPEvalMethodKind getTUFPEvalMethod() const
const LangOptions & getLangOpts() const
bool isImportingCXXNamedModules() const
If we're importing a standard C++20 Named Modules.
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void SetSuppressIncludeNotFoundError(bool Suppress)
static void processPathForFileMacro(SmallVectorImpl< char > &Path, const LangOptions &LangOpts, const TargetInfo &TI)
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
bool isInNamedModule() const
If we are preprocessing a named module.
void EnableBacktrackAtThisPos(bool Unannotated=false)
From the point that this method is called, and until CommitBacktrackedTokens() or Backtrack() is call...
Definition PPCaching.cpp:34
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
void PoisonSEHIdentifiers(bool Poison=true)
bool isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the first token of the macro expansion.
size_t getTotalMemory() const
void setCounterValue(uint32_t V)
void setExternalSource(ExternalPreprocessorSource *Source)
void clearCodeCompletionHandler()
Clear out the code completion handler.
void AddPragmaHandler(PragmaHandler *Handler)
OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, SourceLocation MLoc)
We want to produce a diagnostic at location IncLoc concerning an unreachable effect at location MLoc ...
bool isCodeCompletionReached() const
Returns true if code-completion is enabled and we have hit the code-completion point.
IdentifierInfo * ParsePragmaPushOrPopMacro(Token &Tok)
ParsePragmaPushOrPopMacro - Handle parsing of pragma push_macro/pop_macro.
Definition Pragma.cpp:569
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool getRawToken(SourceLocation Loc, Token &Result, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
bool isNextPPTokenOneOf(Ts... Ks) const
isNextPPTokenOneOf - Check whether the next pp-token is one of the specificed token kind.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
void setPragmasEnabled(bool Enabled)
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, SourceLocation Loc)
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
bool isAtEndOfMacroExpansion(SourceLocation loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getMainFileFirstPPTokenLoc() const
Get the start location of the first pp-token in main file.
void HandlePragmaMark(Token &MarkTok)
Definition Pragma.cpp:429
void CollectPPImportSuffix(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Collect the tokens of a C++20 pp-import-suffix.
bool getPragmasEnabled() const
void HandlePragmaHdrstop(Token &Tok)
Definition Pragma.cpp:885
PreprocessingRecord * getPreprocessingRecord() const
Retrieve the preprocessing record, or NULL if there is no preprocessing record.
void setEmptylineHandler(EmptylineHandler *Handler)
Set empty line handler.
DiagnosticsEngine & getDiagnostics() const
void HandleCXXModuleDirective(Token Module)
HandleCXXModuleDirective - Handle C++ module declaration directives.
SourceLocation getLastCachedTokenLocation() const
Get the location of the last cached token, suitable for setting the end location of an annotation tok...
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
llvm::DenseSet< const FileEntry * > IncludedFilesSet
unsigned getSpelling(const Token &Tok, const char *&Buffer, bool *Invalid=nullptr) const
Get the spelling of a token into a preallocated buffer, instead of as an std::string.
SelectorTable & getSelectorTable()
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition Pragma.cpp:950
SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset=0)
Computes the source location just past the end of the token at this source location.
const llvm::SmallSetVector< Module *, 2 > & getAffectingClangModules() const
Get the set of top-level clang modules that affected preprocessing, but were not imported.
std::optional< LexEmbedParametersResult > LexEmbedParameters(Token &Current, bool ForHasEmbed)
Lex the parameters for an embed directive, returns nullopt on error.
const IncludedFilesSet & getIncludedFiles() const
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void HandlePragmaIncludeAlias(Token &Tok)
Definition Pragma.cpp:692
Module * getModuleForLocation(SourceLocation Loc, bool AllowTextual)
Find the module that owns the source or header file that Loc points to.
uint32_t getCounterValue() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok, SmallVectorImpl< IdentifierLoc > &Path, SmallVectorImpl< Token > &DirToks, bool AllowMacroExpansion, bool IsPartition)
SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const
Get the location of the recorded unterminated #pragma clang assume_nonnull begin in the preamble,...
void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, MacroArgs *Args)
Add a Macro to the top of the include stack and start lexing tokens from it instead of the current bu...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the include of the through header or until after a pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
void markMainFileAsPreprocessedModuleFile()
Mark the main file as a preprocessed module file, then the 'module' and 'import' directive recognitio...
bool LexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Lex a string literal, which may be the concatenation of multiple string literals and may even come fr...
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD)
Add a directive to the macro directive history for this identifier.
Represents an unpacked "presumed" location which can be presented to the user.
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
This table allows us to fully hide how we implement multi-keyword caching.
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Exposes information about the current target.
Definition TargetInfo.h:227
TokenValue(IdentifierInfo *II)
TokenValue(tok::TokenKind Kind)
bool operator==(const Token &Tok) const
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
Public enums and private classes that are part of the SourceManager implementation.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition TokenKinds.h:25
OnOffSwitch
Defines the possible values of an on-off-switch (C99 6.10.6p2).
Definition TokenKinds.h:56
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition TokenKinds.h:101
PPKeywordKind
Provides a namespace for preprocessor keywords which start with a '#' at the beginning of the line.
Definition TokenKinds.h:33
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
detail::SearchDirIteratorImpl< true > ConstSearchDirIterator
@ Create
'create' clause, allowed on Compute and Combined constructs, plus 'data', 'enter data',...
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
MacroUse
Context in which macro name is used.
@ Module
Module linkage, which indicates that the entity can be referred to from other translation units withi...
Definition Linkage.h:54
@ Result
The result type of a method or function.
Definition TypeBase.h:905
TranslationUnitKind
Describes the kind of translation unit being processed.
@ TU_Complete
The translation unit is a complete translation unit.
CustomizableOptional< DirectoryEntryRef > OptionalDirectoryEntryRef
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
Helper class to shuttle information about embed directives from the preprocessor to the parser throug...
Describes how and where the pragma was introduced.
Definition Pragma.h:51
PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, bool FoundNonSkipPortion, bool FoundElse, SourceLocation ElseLoc)