clang 23.0.0git
Preprocessor.h
Go to the documentation of this file.
1//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the clang::Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
20#include "clang/Basic/LLVM.h"
22#include "clang/Basic/Module.h"
27#include "clang/Lex/Lexer.h"
28#include "clang/Lex/MacroInfo.h"
30#include "clang/Lex/ModuleMap.h"
33#include "clang/Lex/Token.h"
36#include "llvm/ADT/APSInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
39#include "llvm/ADT/FoldingSet.h"
40#include "llvm/ADT/FunctionExtras.h"
41#include "llvm/ADT/PointerUnion.h"
42#include "llvm/ADT/STLExtras.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/ADT/SmallVector.h"
45#include "llvm/ADT/StringRef.h"
46#include "llvm/ADT/TinyPtrVector.h"
47#include "llvm/ADT/iterator_range.h"
48#include "llvm/Support/Allocator.h"
49#include "llvm/Support/Casting.h"
50#include "llvm/Support/Registry.h"
51#include "llvm/Support/TrailingObjects.h"
52#include <cassert>
53#include <cstddef>
54#include <cstdint>
55#include <map>
56#include <memory>
57#include <optional>
58#include <string>
59#include <utility>
60#include <vector>
61
62namespace llvm {
63
64template<unsigned InternalLen> class SmallString;
65
66} // namespace llvm
67
68namespace clang {
69
71class CommentHandler;
72class DirectoryEntry;
75class FileEntry;
76class FileManager;
77class HeaderSearch;
78class MacroArgs;
79class PragmaHandler;
80class PragmaNamespace;
84class ScratchBuffer;
85class TargetInfo;
87
88namespace Builtin {
89class Context;
90}
91
92/// Stores token information for comparing actual tokens with
93/// predefined values. Only handles simple tokens and identifiers.
95 tok::TokenKind Kind;
97
98public:
99 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
100 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
101 assert(Kind != tok::identifier &&
102 "Identifiers should be created by TokenValue(IdentifierInfo *)");
103 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
104 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
105 }
106
107 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
108
109 bool operator==(const Token &Tok) const {
110 return Tok.getKind() == Kind &&
111 (!II || II == Tok.getIdentifierInfo());
112 }
113};
114
115/// Context in which macro name is used.
117 // other than #define or #undef
119
120 // macro name specified in #define
122
123 // macro name specified in #undef
125};
126
127enum class EmbedResult {
128 Invalid = -1, // Parsing error occurred.
129 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
130 Found = 1, // Corresponds to __STDC_EMBED_FOUND__
131 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
132};
133
139
140class ModuleNameLoc final
141 : llvm::TrailingObjects<ModuleNameLoc, IdentifierLoc> {
142 friend TrailingObjects;
143 unsigned NumIdentifierLocs;
144 unsigned numTrailingObjects(OverloadToken<IdentifierLoc>) const {
145 return getNumIdentifierLocs();
146 }
147
148 ModuleNameLoc(ModuleIdPath Path) : NumIdentifierLocs(Path.size()) {
149 (void)llvm::copy(Path, getTrailingObjectsNonStrict<IdentifierLoc>());
150 }
151
152public:
153 static ModuleNameLoc *Create(Preprocessor &PP, ModuleIdPath Path);
154 unsigned getNumIdentifierLocs() const { return NumIdentifierLocs; }
156 return {getTrailingObjectsNonStrict<IdentifierLoc>(),
158 }
159
161 return getModuleIdPath().front().getLoc();
162 }
164 auto &Last = getModuleIdPath().back();
165 return Last.getLoc().getLocWithOffset(
166 Last.getIdentifierInfo()->getLength());
167 }
168 SourceRange getRange() const { return {getBeginLoc(), getEndLoc()}; }
169 std::string str() const {
171 }
172};
173
174/// Engages in a tight little dance with the lexer to efficiently
175/// preprocess tokens.
176///
177/// Lexers know only about tokens within a single source file, and don't
178/// know anything about preprocessor-level issues like the \#include stack,
179/// token expansion, etc.
183
184 llvm::unique_function<void(const clang::Token &)> OnToken;
185 /// Functor for getting the dependency preprocessor directives of a file.
186 ///
187 /// These are directives derived from a special form of lexing where the
188 /// source input is scanned for the preprocessor directives that might have an
189 /// effect on the dependencies for a compilation unit.
190 DependencyDirectivesGetter *GetDependencyDirectives = nullptr;
191 const PreprocessorOptions &PPOpts;
192 DiagnosticsEngine *Diags;
193 const LangOptions &LangOpts;
194 const TargetInfo *Target = nullptr;
195 const TargetInfo *AuxTarget = nullptr;
196 FileManager &FileMgr;
197 SourceManager &SourceMgr;
198 std::unique_ptr<ScratchBuffer> ScratchBuf;
199 HeaderSearch &HeaderInfo;
200 ModuleLoader &TheModuleLoader;
201
202 /// External source of macros.
203 ExternalPreprocessorSource *ExternalSource;
204
205 /// A BumpPtrAllocator object used to quickly allocate and release
206 /// objects internal to the Preprocessor.
207 llvm::BumpPtrAllocator BP;
208
209 /// Identifiers for builtin macros and other builtins.
210 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
211 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
212 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
213 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
214 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
215 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
216 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
217 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
218 IdentifierInfo *Ident__identifier; // __identifier
219 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
220 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
221 IdentifierInfo *Ident__has_feature; // __has_feature
222 IdentifierInfo *Ident__has_extension; // __has_extension
223 IdentifierInfo *Ident__has_builtin; // __has_builtin
224 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin
225 IdentifierInfo *Ident__has_attribute; // __has_attribute
226 IdentifierInfo *Ident__has_embed; // __has_embed
227 IdentifierInfo *Ident__has_include; // __has_include
228 IdentifierInfo *Ident__has_include_next; // __has_include_next
229 IdentifierInfo *Ident__has_warning; // __has_warning
230 IdentifierInfo *Ident__is_identifier; // __is_identifier
231 IdentifierInfo *Ident__building_module; // __building_module
232 IdentifierInfo *Ident__MODULE__; // __MODULE__
233 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
234 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
235 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
236 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
237 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
238 IdentifierInfo *Ident__is_target_os; // __is_target_os
239 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
240 IdentifierInfo *Ident__is_target_variant_os;
241 IdentifierInfo *Ident__is_target_variant_environment;
242 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD
243
244 // Weak, only valid (and set) while InMacroArgs is true.
245 Token* ArgMacro;
246
247 SourceLocation DATELoc, TIMELoc;
248
249 // FEM_UnsetOnCommandLine means that an explicit evaluation method was
250 // not specified on the command line. The target is queried to set the
251 // default evaluation method.
252 LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
254
255 // The most recent pragma location where the floating point evaluation
256 // method was modified. This is used to determine whether the
257 // 'pragma clang fp eval_method' was used whithin the current scope.
258 SourceLocation LastFPEvalPragmaLocation;
259
260 LangOptions::FPEvalMethodKind TUFPEvalMethod =
262
263 // Next __COUNTER__ value, starts at 0.
264 uint32_t CounterValue = 0;
265
266 enum {
267 /// Maximum depth of \#includes.
268 MaxAllowedIncludeStackDepth = 200
269 };
270
271 // State that is set before the preprocessor begins.
272 bool KeepComments : 1;
273 bool KeepMacroComments : 1;
274 bool SuppressIncludeNotFoundError : 1;
275
276 // State that changes while the preprocessor runs:
277 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
278
279 /// Whether the preprocessor owns the header search object.
280 bool OwnsHeaderSearch : 1;
281
282 /// True if macro expansion is disabled.
283 bool DisableMacroExpansion : 1;
284
285 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
286 /// when parsing preprocessor directives.
287 bool MacroExpansionInDirectivesOverride : 1;
288
289 class ResetMacroExpansionHelper;
290
291 /// Whether we have already loaded macros from the external source.
292 mutable bool ReadMacrosFromExternalSource : 1;
293
294 /// True if pragmas are enabled.
295 bool PragmasEnabled : 1;
296
297 /// True if the current build action is a preprocessing action.
298 bool PreprocessedOutput : 1;
299
300 /// True if we are currently preprocessing a #if or #elif directive
301 bool ParsingIfOrElifDirective;
302
303 /// True if we are pre-expanding macro arguments.
304 bool InMacroArgPreExpansion;
305
306 /// Mapping/lookup information for all identifiers in
307 /// the program, including program keywords.
308 mutable IdentifierTable Identifiers;
309
310 /// This table contains all the selectors in the program.
311 ///
312 /// Unlike IdentifierTable above, this table *isn't* populated by the
313 /// preprocessor. It is declared/expanded here because its role/lifetime is
314 /// conceptually similar to the IdentifierTable. In addition, the current
315 /// control flow (in clang::ParseAST()), make it convenient to put here.
316 ///
317 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
318 /// the lifetime of the preprocessor.
319 SelectorTable Selectors;
320
321 /// Information about builtins.
322 std::unique_ptr<Builtin::Context> BuiltinInfo;
323
324 /// Tracks all of the pragmas that the client registered
325 /// with this preprocessor.
326 std::unique_ptr<PragmaNamespace> PragmaHandlers;
327
328 /// Pragma handlers of the original source is stored here during the
329 /// parsing of a model file.
330 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
331
332 /// Tracks all of the comment handlers that the client registered
333 /// with this preprocessor.
334 std::vector<CommentHandler *> CommentHandlers;
335
336 /// Empty line handler.
337 EmptylineHandler *Emptyline = nullptr;
338
339 /// True to avoid tearing down the lexer etc on EOF
340 bool IncrementalProcessing = false;
341
342public:
343 /// The kind of translation unit we are processing.
345
346 /// Returns a pointer into the given file's buffer that's guaranteed
347 /// to be between tokens. The returned pointer is always before \p Start.
348 /// The maximum distance betweenthe returned pointer and \p Start is
349 /// limited by a constant value, but also an implementation detail.
350 /// If no such check point exists, \c nullptr is returned.
351 const char *getCheckPoint(FileID FID, const char *Start) const;
352
353private:
354 /// The code-completion handler.
355 CodeCompletionHandler *CodeComplete = nullptr;
356
357 /// The file that we're performing code-completion for, if any.
358 const FileEntry *CodeCompletionFile = nullptr;
359
360 /// The offset in file for the code-completion point.
361 unsigned CodeCompletionOffset = 0;
362
363 /// The location for the code-completion point. This gets instantiated
364 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
365 SourceLocation CodeCompletionLoc;
366
367 /// The start location for the file of the code-completion point.
368 ///
369 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
370 /// for preprocessing.
371 SourceLocation CodeCompletionFileLoc;
372
373 /// The source location of the \c import contextual keyword we just
374 /// lexed, if any.
375 SourceLocation ModuleImportLoc;
376
377 /// The source location of the \c module contextual keyword we just
378 /// lexed, if any.
379 SourceLocation ModuleDeclLoc;
380
381 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
382 unsigned CheckPointCounter = 0;
383
384 /// Whether we're importing a standard C++20 named Modules.
385 bool ImportingCXXNamedModules = false;
386
387 /// Whether the last token we lexed was an 'export' keyword.
388 Token LastExportKeyword;
389
390 /// First pp-token source location in current translation unit.
391 SourceLocation FirstPPTokenLoc;
392
393 /// A preprocessor directive tracer to trace whether the preprocessing
394 /// state changed. These changes would mean most semantically observable
395 /// preprocessor state, particularly anything that is order dependent.
396 NoTrivialPPDirectiveTracer *DirTracer = nullptr;
397
398 /// A position within a C++20 import-seq.
399 class StdCXXImportSeq {
400 public:
401 enum State : int {
402 // Positive values represent a number of unclosed brackets.
403 AtTopLevel = 0,
404 AfterTopLevelTokenSeq = -1,
405 AfterExport = -2,
406 AfterImportSeq = -3,
407 };
408
409 StdCXXImportSeq(State S) : S(S) {}
410
411 /// Saw any kind of open bracket.
412 void handleOpenBracket() {
413 S = static_cast<State>(std::max<int>(S, 0) + 1);
414 }
415 /// Saw any kind of close bracket other than '}'.
416 void handleCloseBracket() {
417 S = static_cast<State>(std::max<int>(S, 1) - 1);
418 }
419 /// Saw a close brace.
420 void handleCloseBrace() {
421 handleCloseBracket();
422 if (S == AtTopLevel && !AfterHeaderName)
423 S = AfterTopLevelTokenSeq;
424 }
425 /// Saw a semicolon.
426 void handleSemi() {
427 if (atTopLevel()) {
428 S = AfterTopLevelTokenSeq;
429 AfterHeaderName = false;
430 }
431 }
432
433 /// Saw an 'export' identifier.
434 void handleExport() {
435 if (S == AfterTopLevelTokenSeq)
436 S = AfterExport;
437 else if (S <= 0)
438 S = AtTopLevel;
439 }
440 /// Saw an 'import' identifier.
441 void handleImport() {
442 if (S == AfterTopLevelTokenSeq || S == AfterExport)
443 S = AfterImportSeq;
444 else if (S <= 0)
445 S = AtTopLevel;
446 }
447
448 /// Saw a 'header-name' token; do not recognize any more 'import' tokens
449 /// until we reach a top-level semicolon.
450 void handleHeaderName() {
451 if (S == AfterImportSeq)
452 AfterHeaderName = true;
453 handleMisc();
454 }
455
456 /// Saw any other token.
457 void handleMisc() {
458 if (S <= 0)
459 S = AtTopLevel;
460 }
461
462 bool atTopLevel() { return S <= 0; }
463 bool afterImportSeq() { return S == AfterImportSeq; }
464 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
465
466 private:
467 State S;
468 /// Whether we're in the pp-import-suffix following the header-name in a
469 /// pp-import. If so, a close-brace is not sufficient to end the
470 /// top-level-token-seq of an import-seq.
471 bool AfterHeaderName = false;
472 };
473
474 /// Our current position within a C++20 import-seq.
475 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
476
477 /// Track whether we are in a Global Module Fragment
478 class TrackGMF {
479 public:
480 enum GMFState : int {
481 GMFActive = 1,
482 MaybeGMF = 0,
483 BeforeGMFIntroducer = -1,
484 GMFAbsentOrEnded = -2,
485 };
486
487 TrackGMF(GMFState S) : S(S) {}
488
489 /// Saw a semicolon.
490 void handleSemi() {
491 // If it is immediately after the first instance of the module keyword,
492 // then that introduces the GMF.
493 if (S == MaybeGMF)
494 S = GMFActive;
495 }
496
497 /// Saw an 'export' identifier.
498 void handleExport() {
499 // The presence of an 'export' keyword always ends or excludes a GMF.
500 S = GMFAbsentOrEnded;
501 }
502
503 /// Saw an 'import' identifier.
504 void handleImport(bool AfterTopLevelTokenSeq) {
505 // If we see this before any 'module' kw, then we have no GMF.
506 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
507 S = GMFAbsentOrEnded;
508 }
509
510 /// Saw a 'module' identifier.
511 void handleModule(bool AfterTopLevelTokenSeq) {
512 // This was the first module identifier and not preceded by any token
513 // that would exclude a GMF. It could begin a GMF, but only if directly
514 // followed by a semicolon.
515 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
516 S = MaybeGMF;
517 else
518 S = GMFAbsentOrEnded;
519 }
520
521 /// Saw any other token.
522 void handleMisc() {
523 // We saw something other than ; after the 'module' kw, so not a GMF.
524 if (S == MaybeGMF)
525 S = GMFAbsentOrEnded;
526 }
527
528 bool inGMF() { return S == GMFActive; }
529
530 private:
531 /// Track the transitions into and out of a Global Module Fragment,
532 /// if one is present.
533 GMFState S;
534 };
535
536 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
537
538 /// Track the status of the c++20 module decl.
539 ///
540 /// module-declaration:
541 /// 'export'[opt] 'module' module-name module-partition[opt]
542 /// attribute-specifier-seq[opt] ';'
543 ///
544 /// module-name:
545 /// module-name-qualifier[opt] identifier
546 ///
547 /// module-partition:
548 /// ':' module-name-qualifier[opt] identifier
549 ///
550 /// module-name-qualifier:
551 /// identifier '.'
552 /// module-name-qualifier identifier '.'
553 ///
554 /// Transition state:
555 ///
556 /// NotAModuleDecl --- export ---> FoundExport
557 /// NotAModuleDecl --- module ---> ImplementationCandidate
558 /// FoundExport --- module ---> InterfaceCandidate
559 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate
560 /// ImplementationCandidate --- period ---> ImplementationCandidate
561 /// ImplementationCandidate --- colon ---> ImplementationCandidate
562 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate
563 /// InterfaceCandidate --- period ---> InterfaceCandidate
564 /// InterfaceCandidate --- colon ---> InterfaceCandidate
565 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation
566 /// NamedModuleInterface --- Semi ---> NamedModuleInterface
567 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation
568 /// NamedModuleInterface --- Anything ---> NamedModuleInterface
569 ///
570 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
571 /// soon since we don't support any module attributes yet.
572 class ModuleDeclSeq {
573 enum ModuleDeclState : int {
574 NotAModuleDecl,
575 FoundExport,
576 InterfaceCandidate,
577 ImplementationCandidate,
578 NamedModuleInterface,
579 NamedModuleImplementation,
580 };
581
582 public:
583 ModuleDeclSeq() = default;
584
585 void handleExport() {
586 if (State == NotAModuleDecl)
587 State = FoundExport;
588 else if (!isNamedModule())
589 reset();
590 }
591
592 void handleModule() {
593 if (State == FoundExport)
594 State = InterfaceCandidate;
595 else if (State == NotAModuleDecl)
596 State = ImplementationCandidate;
597 else if (!isNamedModule())
598 reset();
599 }
600
601 void handleModuleName(ModuleNameLoc *NameLoc) {
602 if (isModuleCandidate() && NameLoc)
603 Name += NameLoc->str();
604 else if (!isNamedModule())
605 reset();
606 }
607
608 void handleColon() {
609 if (isModuleCandidate())
610 Name += ":";
611 else if (!isNamedModule())
612 reset();
613 }
614
615 void handleSemi() {
616 if (!Name.empty() && isModuleCandidate()) {
617 if (State == InterfaceCandidate)
618 State = NamedModuleInterface;
619 else if (State == ImplementationCandidate)
620 State = NamedModuleImplementation;
621 else
622 llvm_unreachable("Unimaged ModuleDeclState.");
623 } else if (!isNamedModule())
624 reset();
625 }
626
627 void handleMisc() {
628 if (!isNamedModule())
629 reset();
630 }
631
632 bool isModuleCandidate() const {
633 return State == InterfaceCandidate || State == ImplementationCandidate;
634 }
635
636 bool isNamedModule() const {
637 return State == NamedModuleInterface ||
638 State == NamedModuleImplementation;
639 }
640
641 bool isNamedInterface() const { return State == NamedModuleInterface; }
642
643 bool isImplementationUnit() const {
644 return State == NamedModuleImplementation && !getName().contains(':');
645 }
646
647 bool isNotAModuleDecl() const { return State == NotAModuleDecl; }
648
649 StringRef getName() const {
650 assert(isNamedModule() && "Can't get name from a non named module");
651 return Name;
652 }
653
654 StringRef getPrimaryName() const {
655 assert(isNamedModule() && "Can't get name from a non named module");
656 return getName().split(':').first;
657 }
658
659 void reset() {
660 Name.clear();
661 State = NotAModuleDecl;
662 }
663
664 private:
665 ModuleDeclState State = NotAModuleDecl;
666 std::string Name;
667 };
668
669 ModuleDeclSeq ModuleDeclState;
670
671 /// The identifier and source location of the currently-active
672 /// \#pragma clang arc_cf_code_audited begin.
673 IdentifierLoc PragmaARCCFCodeAuditedInfo;
674
675 /// The source location of the currently-active
676 /// \#pragma clang assume_nonnull begin.
677 SourceLocation PragmaAssumeNonNullLoc;
678
679 /// Set only for preambles which end with an active
680 /// \#pragma clang assume_nonnull begin.
681 ///
682 /// When the preamble is loaded into the main file,
683 /// `PragmaAssumeNonNullLoc` will be set to this to
684 /// replay the unterminated assume_nonnull.
685 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
686
687 /// True if we hit the code-completion point.
688 bool CodeCompletionReached = false;
689
690 /// The code completion token containing the information
691 /// on the stem that is to be code completed.
692 IdentifierInfo *CodeCompletionII = nullptr;
693
694 /// Range for the code completion token.
695 SourceRange CodeCompletionTokenRange;
696
697 /// The directory that the main file should be considered to occupy,
698 /// if it does not correspond to a real file (as happens when building a
699 /// module).
700 OptionalDirectoryEntryRef MainFileDir;
701
702 /// The number of bytes that we will initially skip when entering the
703 /// main file, along with a flag that indicates whether skipping this number
704 /// of bytes will place the lexer at the start of a line.
705 ///
706 /// This is used when loading a precompiled preamble.
707 std::pair<int, bool> SkipMainFilePreamble;
708
709 /// Whether we hit an error due to reaching max allowed include depth. Allows
710 /// to avoid hitting the same error over and over again.
711 bool HasReachedMaxIncludeDepth = false;
712
713 /// The number of currently-active calls to Lex.
714 ///
715 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
716 /// require asking for multiple additional tokens. This counter makes it
717 /// possible for Lex to detect whether it's producing a token for the end
718 /// of phase 4 of translation or for some other situation.
719 unsigned LexLevel = 0;
720
721 /// The number of (LexLevel 0) preprocessor tokens.
722 unsigned TokenCount = 0;
723
724 /// Preprocess every token regardless of LexLevel.
725 bool PreprocessToken = false;
726
727 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
728 /// warning, or zero for unlimited.
729 unsigned MaxTokens = 0;
730 SourceLocation MaxTokensOverrideLoc;
731
732public:
747
748 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
749
750private:
751 friend class ASTReader;
752 friend class MacroArgs;
753
754 class PreambleConditionalStackStore {
755 enum State {
756 Off = 0,
757 Recording = 1,
758 Replaying = 2,
759 };
760
761 public:
762 PreambleConditionalStackStore() = default;
763
764 void startRecording() { ConditionalStackState = Recording; }
765 void startReplaying() { ConditionalStackState = Replaying; }
766 bool isRecording() const { return ConditionalStackState == Recording; }
767 bool isReplaying() const { return ConditionalStackState == Replaying; }
768
769 ArrayRef<PPConditionalInfo> getStack() const {
770 return ConditionalStack;
771 }
772
773 void doneReplaying() {
774 ConditionalStack.clear();
775 ConditionalStackState = Off;
776 }
777
778 void setStack(ArrayRef<PPConditionalInfo> s) {
779 if (!isRecording() && !isReplaying())
780 return;
781 ConditionalStack.clear();
782 ConditionalStack.append(s.begin(), s.end());
783 }
784
785 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
786
787 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
788
789 void clearSkipInfo() { SkipInfo.reset(); }
790
791 std::optional<PreambleSkipInfo> SkipInfo;
792
793 private:
794 SmallVector<PPConditionalInfo, 4> ConditionalStack;
795 State ConditionalStackState = Off;
796 } PreambleConditionalStack;
797
798 /// The current top of the stack that we're lexing from if
799 /// not expanding a macro and we are lexing directly from source code.
800 ///
801 /// Only one of CurLexer, or CurTokenLexer will be non-null.
802 std::unique_ptr<Lexer> CurLexer;
803
804 /// Lexers that are pending destruction, deferred until the current
805 /// Stack of Lexer unwinds completely (LexLevel returns to 0).
806 /// This avoids use-after-free when HandleEndOfFile is called from
807 /// within a Lexer method that still needs to access its members.
808 SmallVector<std::unique_ptr<Lexer>, 2> PendingDestroyLexers;
809
810 /// The current top of the stack that we're lexing from
811 /// if not expanding a macro.
812 ///
813 /// This is an alias for CurLexer.
814 PreprocessorLexer *CurPPLexer = nullptr;
815
816 /// Used to find the current FileEntry, if CurLexer is non-null
817 /// and if applicable.
818 ///
819 /// This allows us to implement \#include_next and find directory-specific
820 /// properties.
821 ConstSearchDirIterator CurDirLookup = nullptr;
822
823 /// The current macro we are expanding, if we are expanding a macro.
824 ///
825 /// One of CurLexer and CurTokenLexer must be null.
826 std::unique_ptr<TokenLexer> CurTokenLexer;
827
828 /// The kind of lexer we're currently working with.
829 typedef bool (*LexerCallback)(Preprocessor &, Token &);
830 LexerCallback CurLexerCallback = &CLK_Lexer;
831
832 /// If the current lexer is for a submodule that is being built, this
833 /// is that submodule.
834 Module *CurLexerSubmodule = nullptr;
835
836 /// Keeps track of the stack of files currently
837 /// \#included, and macros currently being expanded from, not counting
838 /// CurLexer/CurTokenLexer.
839 struct IncludeStackInfo {
840 LexerCallback CurLexerCallback;
841 Module *TheSubmodule;
842 std::unique_ptr<Lexer> TheLexer;
843 PreprocessorLexer *ThePPLexer;
844 std::unique_ptr<TokenLexer> TheTokenLexer;
845 ConstSearchDirIterator TheDirLookup;
846
847 // The following constructors are completely useless copies of the default
848 // versions, only needed to pacify MSVC.
849 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
850 std::unique_ptr<Lexer> &&TheLexer,
851 PreprocessorLexer *ThePPLexer,
852 std::unique_ptr<TokenLexer> &&TheTokenLexer,
853 ConstSearchDirIterator TheDirLookup)
854 : CurLexerCallback(std::move(CurLexerCallback)),
855 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
856 ThePPLexer(std::move(ThePPLexer)),
857 TheTokenLexer(std::move(TheTokenLexer)),
858 TheDirLookup(std::move(TheDirLookup)) {}
859 };
860 std::vector<IncludeStackInfo> IncludeMacroStack;
861
862 /// Actions invoked when some preprocessor activity is
863 /// encountered (e.g. a file is \#included, etc).
864 std::unique_ptr<PPCallbacks> Callbacks;
865
866 struct MacroExpandsInfo {
867 Token Tok;
868 MacroDefinition MD;
869 SourceRange Range;
870
871 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
872 : Tok(Tok), MD(MD), Range(Range) {}
873 };
874 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
875
876 /// Information about a name that has been used to define a module macro.
877 struct FullModuleMacroInfo {
878 /// The most recent macro directive for this identifier.
879 MacroDirective *MD;
880
881 /// The active module macros for this identifier.
882 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
883
884 /// The generation number at which we last updated ActiveModuleMacros.
885 /// \see Preprocessor::VisibleModules.
886 unsigned ActiveModuleMacrosGeneration = 0;
887
888 /// Whether this macro name is ambiguous.
889 bool IsAmbiguous = false;
890
891 /// The module macros that are overridden by this macro.
892 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
893
894 FullModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
895 };
896
897 /// The state of a macro for an identifier.
898 class MacroState {
899 mutable llvm::PointerUnion<MacroDirective *, FullModuleMacroInfo *> State;
900
901 FullModuleMacroInfo *getFullModuleInfo(Preprocessor &PP,
902 const IdentifierInfo *II) const {
903 if (II->isOutOfDate())
904 PP.updateOutOfDateIdentifier(*II);
905 // FIXME: Find a spare bit on IdentifierInfo and store a
906 // HasModuleMacros flag.
907 if (!II->hasMacroDefinition() ||
908 (!PP.getLangOpts().Modules &&
909 !PP.getLangOpts().ModulesLocalVisibility) ||
910 !PP.CurSubmoduleState->VisibleModules.getGeneration())
911 return nullptr;
912
913 auto *Info = dyn_cast_if_present<FullModuleMacroInfo *>(State);
914 if (!Info) {
915 Info = new (PP.getPreprocessorAllocator())
916 FullModuleMacroInfo(cast<MacroDirective *>(State));
917 State = Info;
918 }
919
920 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
921 Info->ActiveModuleMacrosGeneration)
922 PP.updateModuleMacroInfo(II, *Info);
923 return Info;
924 }
925
926 public:
927 MacroState() : MacroState(nullptr) {}
928 MacroState(MacroDirective *MD) : State(MD) {}
929
930 MacroState(MacroState &&O) noexcept : State(O.State) {
931 O.State = (MacroDirective *)nullptr;
932 }
933
934 MacroState &operator=(MacroState &&O) noexcept {
935 auto S = O.State;
936 O.State = (MacroDirective *)nullptr;
937 State = S;
938 return *this;
939 }
940
941 ~MacroState() {
942 if (auto *Info = dyn_cast_if_present<FullModuleMacroInfo *>(State))
943 Info->~FullModuleMacroInfo();
944 }
945
946 MacroDirective *getLatest() const {
947 if (auto *Info = dyn_cast_if_present<FullModuleMacroInfo *>(State))
948 return Info->MD;
949 return cast<MacroDirective *>(State);
950 }
951
952 void setLatest(MacroDirective *MD) {
953 if (auto *Info = dyn_cast_if_present<FullModuleMacroInfo *>(State))
954 Info->MD = MD;
955 else
956 State = MD;
957 }
958
959 ModuleMacroInfo getModuleInfo(Preprocessor &PP,
960 const IdentifierInfo *II) const {
961 if (auto *Info = getFullModuleInfo(PP, II))
962 return ModuleMacroInfo{Info->ActiveModuleMacros, Info->IsAmbiguous};
963 return {};
964 }
965
966 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
967 SourceManager &SourceMgr) const {
968 // FIXME: Incorporate module macros into the result of this.
969 if (auto *Latest = getLatest())
970 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
971 return {};
972 }
973
974 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
975 if (auto *Info = getFullModuleInfo(PP, II)) {
976 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
977 Info->ActiveModuleMacros.begin(),
978 Info->ActiveModuleMacros.end());
979 Info->ActiveModuleMacros.clear();
980 Info->IsAmbiguous = false;
981 }
982 }
983
984 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
985 if (auto *Info = dyn_cast_if_present<FullModuleMacroInfo *>(State))
986 return Info->OverriddenMacros;
987 return {};
988 }
989
990 void setOverriddenMacros(Preprocessor &PP,
991 ArrayRef<ModuleMacro *> Overrides) {
992 auto *Info = dyn_cast_if_present<FullModuleMacroInfo *>(State);
993 if (!Info) {
994 if (Overrides.empty())
995 return;
996 Info = new (PP.getPreprocessorAllocator())
997 FullModuleMacroInfo(cast<MacroDirective *>(State));
998 State = Info;
999 }
1000 Info->OverriddenMacros.clear();
1001 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
1002 Overrides.begin(), Overrides.end());
1003 Info->ActiveModuleMacrosGeneration = 0;
1004 }
1005 };
1006
1007 /// For each IdentifierInfo that was associated with a macro, we
1008 /// keep a mapping to the history of all macro definitions and #undefs in
1009 /// the reverse order (the latest one is in the head of the list).
1010 ///
1011 /// This mapping lives within the \p CurSubmoduleState.
1012 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
1013
1014 struct SubmoduleState;
1015
1016 /// Information about a submodule that we're currently building.
1017 struct BuildingSubmoduleInfo {
1018 /// The module that we are building.
1019 Module *M;
1020
1021 /// The location at which the module was included.
1022 SourceLocation ImportLoc;
1023
1024 /// Whether we entered this submodule via a pragma.
1025 bool IsPragma;
1026
1027 /// The previous SubmoduleState.
1028 SubmoduleState *OuterSubmoduleState;
1029
1030 /// The number of pending module macro names when we started building this.
1031 unsigned OuterPendingModuleMacroNames;
1032
1033 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
1034 SubmoduleState *OuterSubmoduleState,
1035 unsigned OuterPendingModuleMacroNames)
1036 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
1037 OuterSubmoduleState(OuterSubmoduleState),
1038 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
1039 };
1040 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
1041
1042 /// Information about a submodule's preprocessor state.
1043 struct SubmoduleState {
1044 /// The macros for the submodule.
1045 MacroMap Macros;
1046
1047 /// The set of modules that are visible within the submodule.
1048 VisibleModuleSet VisibleModules;
1049
1050 // FIXME: CounterValue?
1051 // FIXME: PragmaPushMacroInfo?
1052 };
1053 std::map<Module *, SubmoduleState> Submodules;
1054
1055 /// The preprocessor state for preprocessing outside of any submodule.
1056 SubmoduleState NullSubmoduleState;
1057
1058 /// The current submodule state. Will be \p NullSubmoduleState if we're not
1059 /// in a submodule.
1060 SubmoduleState *CurSubmoduleState;
1061
1062 /// The files that have been included.
1063 IncludedFilesSet IncludedFiles;
1064
1065 /// The set of top-level modules that affected preprocessing, but were not
1066 /// imported.
1067 llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1068
1069 /// The set of known macros exported from modules.
1070 llvm::FoldingSet<ModuleMacro> ModuleMacros;
1071
1072 /// The names of potential module macros that we've not yet processed.
1073 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1074
1075 /// The list of module macros, for each identifier, that are not overridden by
1076 /// any other module macro.
1077 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1078 LeafModuleMacros;
1079
1080 /// Macros that we want to warn because they are not used at the end
1081 /// of the translation unit.
1082 ///
1083 /// We store just their SourceLocations instead of
1084 /// something like MacroInfo*. The benefit of this is that when we are
1085 /// deserializing from PCH, we don't need to deserialize identifier & macros
1086 /// just so that we can report that they are unused, we just warn using
1087 /// the SourceLocations of this set (that will be filled by the ASTReader).
1088 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1089 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1090
1091 /// This is a pair of an optional message and source location used for pragmas
1092 /// that annotate macros like pragma clang restrict_expansion and pragma clang
1093 /// deprecated. This pair stores the optional message and the location of the
1094 /// annotation pragma for use producing diagnostics and notes.
1095 using MsgLocationPair = std::pair<std::string, SourceLocation>;
1096
1097 struct MacroAnnotationInfo {
1098 SourceLocation Location;
1099 std::string Message;
1100 };
1101
1102 struct MacroAnnotations {
1103 std::optional<MacroAnnotationInfo> DeprecationInfo;
1104 std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1105 std::optional<SourceLocation> FinalAnnotationLoc;
1106 };
1107
1108 /// Warning information for macro annotations.
1109 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1110
1111 /// A "freelist" of MacroArg objects that can be
1112 /// reused for quick allocation.
1113 MacroArgs *MacroArgCache = nullptr;
1114
1115 /// For each IdentifierInfo used in a \#pragma push_macro directive,
1116 /// we keep a MacroInfo stack used to restore the previous macro value.
1117 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1118 PragmaPushMacroInfo;
1119
1120 // Various statistics we track for performance analysis.
1121 unsigned NumDirectives = 0;
1122 unsigned NumDefined = 0;
1123 unsigned NumUndefined = 0;
1124 unsigned NumPragma = 0;
1125 unsigned NumIf = 0;
1126 unsigned NumElse = 0;
1127 unsigned NumEndif = 0;
1128 unsigned NumEnteredSourceFiles = 0;
1129 unsigned MaxIncludeStackDepth = 0;
1130 unsigned NumMacroExpanded = 0;
1131 unsigned NumFnMacroExpanded = 0;
1132 unsigned NumBuiltinMacroExpanded = 0;
1133 unsigned NumFastMacroExpanded = 0;
1134 unsigned NumTokenPaste = 0;
1135 unsigned NumFastTokenPaste = 0;
1136 unsigned NumSkipped = 0;
1137
1138 /// The predefined macros that preprocessor should use from the
1139 /// command line etc.
1140 std::string Predefines;
1141
1142 /// The file ID for the preprocessor predefines.
1143 FileID PredefinesFileID;
1144
1145 /// The file ID for the PCH through header.
1146 FileID PCHThroughHeaderFileID;
1147
1148 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1149 bool SkippingUntilPragmaHdrStop = false;
1150
1151 /// Whether tokens are being skipped until the through header is seen.
1152 bool SkippingUntilPCHThroughHeader = false;
1153
1154 /// Whether the main file is preprocessed module file.
1155 bool MainFileIsPreprocessedModuleFile = false;
1156
1157 /// \{
1158 /// Cache of macro expanders to reduce malloc traffic.
1159 enum { TokenLexerCacheSize = 8 };
1160 unsigned NumCachedTokenLexers;
1161 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1162 /// \}
1163
1164 /// Keeps macro expanded tokens for TokenLexers.
1165 //
1166 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1167 /// going to lex in the cache and when it finishes the tokens are removed
1168 /// from the end of the cache.
1169 SmallVector<Token, 16> MacroExpandedTokens;
1170 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1171
1172 /// A record of the macro definitions and expansions that
1173 /// occurred during preprocessing.
1174 ///
1175 /// This is an optional side structure that can be enabled with
1176 /// \c createPreprocessingRecord() prior to preprocessing.
1177 PreprocessingRecord *Record = nullptr;
1178
1179 /// Cached tokens state.
1180 using CachedTokensTy = SmallVector<Token, 1>;
1181
1182 /// Cached tokens are stored here when we do backtracking or
1183 /// lookahead. They are "lexed" by the CachingLex() method.
1184 CachedTokensTy CachedTokens;
1185
1186 /// The position of the cached token that CachingLex() should
1187 /// "lex" next.
1188 ///
1189 /// If it points beyond the CachedTokens vector, it means that a normal
1190 /// Lex() should be invoked.
1191 CachedTokensTy::size_type CachedLexPos = 0;
1192
1193 /// Stack of backtrack positions, allowing nested backtracks.
1194 ///
1195 /// The EnableBacktrackAtThisPos() method pushes a position to
1196 /// indicate where CachedLexPos should be set when the BackTrack() method is
1197 /// invoked (at which point the last position is popped).
1198 std::vector<CachedTokensTy::size_type> BacktrackPositions;
1199
1200 /// Stack of cached tokens/initial number of cached tokens pairs, allowing
1201 /// nested unannotated backtracks.
1202 std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1203 UnannotatedBacktrackTokens;
1204
1205 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1206 /// This is used to guard against calling this function recursively.
1207 ///
1208 /// See comments at the use-site for more context about why it is needed.
1209 bool SkippingExcludedConditionalBlock = false;
1210
1211 /// Keeps track of skipped range mappings that were recorded while skipping
1212 /// excluded conditional directives. It maps the source buffer pointer at
1213 /// the beginning of a skipped block, to the number of bytes that should be
1214 /// skipped.
1215 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1216
1217 void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1218
1219public:
1220 Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags,
1221 const LangOptions &LangOpts, SourceManager &SM,
1222 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
1223 IdentifierInfoLookup *IILookup = nullptr,
1224 bool OwnsHeaderSearch = false,
1226
1227 ~Preprocessor();
1228
1229 /// Initialize the preprocessor using information about the target.
1230 ///
1231 /// \param Target is owned by the caller and must remain valid for the
1232 /// lifetime of the preprocessor.
1233 /// \param AuxTarget is owned by the caller and must remain valid for
1234 /// the lifetime of the preprocessor.
1235 void Initialize(const TargetInfo &Target,
1236 const TargetInfo *AuxTarget = nullptr);
1237
1238 /// Initialize the preprocessor to parse a model file
1239 ///
1240 /// To parse model files the preprocessor of the original source is reused to
1241 /// preserver the identifier table. However to avoid some duplicate
1242 /// information in the preprocessor some cleanup is needed before it is used
1243 /// to parse model files. This method does that cleanup.
1245
1246 /// Cleanup after model file parsing
1247 void FinalizeForModelFile();
1248
1249 /// Retrieve the preprocessor options used to initialize this preprocessor.
1250 const PreprocessorOptions &getPreprocessorOpts() const { return PPOpts; }
1251
1252 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1253 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1254
1255 const LangOptions &getLangOpts() const { return LangOpts; }
1256 const TargetInfo &getTargetInfo() const { return *Target; }
1257 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1258 FileManager &getFileManager() const { return FileMgr; }
1259 SourceManager &getSourceManager() const { return SourceMgr; }
1260 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1261
1262 IdentifierTable &getIdentifierTable() { return Identifiers; }
1263 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1264 SelectorTable &getSelectorTable() { return Selectors; }
1265 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1266 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1267
1269 ExternalSource = Source;
1270 }
1271
1273 return ExternalSource;
1274 }
1275
1276 /// Retrieve the module loader associated with this preprocessor.
1277 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1278
1280 return TheModuleLoader.HadFatalFailure;
1281 }
1282
1283 /// Retrieve the number of Directives that have been processed by the
1284 /// Preprocessor.
1285 unsigned getNumDirectives() const {
1286 return NumDirectives;
1287 }
1288
1289 /// True if we are currently preprocessing a #if or #elif directive
1291 return ParsingIfOrElifDirective;
1292 }
1293
1294 /// Control whether the preprocessor retains comments in output.
1295 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1296 this->KeepComments = KeepComments | KeepMacroComments;
1297 this->KeepMacroComments = KeepMacroComments;
1298 }
1299
1300 bool getCommentRetentionState() const { return KeepComments; }
1301
1302 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1303 bool getPragmasEnabled() const { return PragmasEnabled; }
1304
1306 SuppressIncludeNotFoundError = Suppress;
1307 }
1308
1310 return SuppressIncludeNotFoundError;
1311 }
1312
1313 /// Sets whether the preprocessor is responsible for producing output or if
1314 /// it is producing tokens to be consumed by Parse and Sema.
1315 void setPreprocessedOutput(bool IsPreprocessedOutput) {
1316 PreprocessedOutput = IsPreprocessedOutput;
1317 }
1318
1319 /// Returns true if the preprocessor is responsible for generating output,
1320 /// false if it is producing tokens to be consumed by Parse and Sema.
1321 bool isPreprocessedOutput() const { return PreprocessedOutput; }
1322
1323 /// Return true if we are lexing directly from the specified lexer.
1324 bool isCurrentLexer(const PreprocessorLexer *L) const {
1325 return CurPPLexer == L;
1326 }
1327
1328 /// Return the current lexer being lexed from.
1329 ///
1330 /// Note that this ignores any potentially active macro expansions and _Pragma
1331 /// expansions going on at the time.
1332 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1333
1334 /// Return the current file lexer being lexed from.
1335 ///
1336 /// Note that this ignores any potentially active macro expansions and _Pragma
1337 /// expansions going on at the time.
1339
1340 /// Return the submodule owning the file being lexed. This may not be
1341 /// the current module if we have changed modules since entering the file.
1342 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1343
1344 /// Returns the FileID for the preprocessor predefines.
1345 FileID getPredefinesFileID() const { return PredefinesFileID; }
1346
1347 /// \{
1348 /// Accessors for preprocessor callbacks.
1349 ///
1350 /// Note that this class takes ownership of any PPCallbacks object given to
1351 /// it.
1352 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1353 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1354 if (Callbacks)
1355 C = std::make_unique<PPChainedCallbacks>(std::move(C),
1356 std::move(Callbacks));
1357 Callbacks = std::move(C);
1358 }
1359 void removePPCallbacks();
1360 /// \}
1361
1362 /// Get the number of tokens processed so far.
1363 unsigned getTokenCount() const { return TokenCount; }
1364
1365 /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1366 unsigned getMaxTokens() const { return MaxTokens; }
1367
1369 MaxTokens = Value;
1370 MaxTokensOverrideLoc = Loc;
1371 };
1372
1373 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1374
1375 /// Register a function that would be called on each token in the final
1376 /// expanded token stream.
1377 /// This also reports annotation tokens produced by the parser.
1378 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1379 OnToken = std::move(F);
1380 }
1381
1383 GetDependencyDirectives = &Get;
1384 }
1385
1386 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1387
1388 bool isMacroDefined(StringRef Id) {
1389 return isMacroDefined(&Identifiers.get(Id));
1390 }
1392 return II->hasMacroDefinition() &&
1393 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1394 }
1395
1396 /// Determine whether II is defined as a macro within the module M,
1397 /// if that is a module that we've already preprocessed. Does not check for
1398 /// macros imported into M.
1400 if (!II->hasMacroDefinition())
1401 return false;
1402 auto I = Submodules.find(M);
1403 if (I == Submodules.end())
1404 return false;
1405 auto J = I->second.Macros.find(II);
1406 if (J == I->second.Macros.end())
1407 return false;
1408 auto *MD = J->second.getLatest();
1409 return MD && MD->isDefined();
1410 }
1411
1413 if (!II->hasMacroDefinition())
1414 return {};
1415
1416 MacroState &S = CurSubmoduleState->Macros[II];
1417 auto *MD = S.getLatest();
1418 while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1419 MD = MD->getPrevious();
1420 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1421 S.getModuleInfo(*this, II));
1422 }
1423
1425 SourceLocation Loc) {
1426 if (!II->hadMacroDefinition())
1427 return {};
1428
1429 MacroState &S = CurSubmoduleState->Macros[II];
1431 if (auto *MD = S.getLatest())
1432 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1433 // FIXME: Compute the set of active module macros at the specified location.
1434 return MacroDefinition(DI.getDirective(), S.getModuleInfo(*this, II));
1435 }
1436
1437 /// Given an identifier, return its latest non-imported MacroDirective
1438 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1440 if (!II->hasMacroDefinition())
1441 return nullptr;
1442
1443 auto *MD = getLocalMacroDirectiveHistory(II);
1444 if (!MD || MD->getDefinition().isUndefined())
1445 return nullptr;
1446
1447 return MD;
1448 }
1449
1450 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1451 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1452 }
1453
1455 if (!II->hasMacroDefinition())
1456 return nullptr;
1457 if (auto MD = getMacroDefinition(II))
1458 return MD.getMacroInfo();
1459 return nullptr;
1460 }
1461
1462 /// Given an identifier, return the latest non-imported macro
1463 /// directive for that identifier.
1464 ///
1465 /// One can iterate over all previous macro directives from the most recent
1466 /// one.
1468
1469 /// Add a directive to the macro directive history for this identifier.
1472 SourceLocation Loc) {
1473 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1474 appendMacroDirective(II, MD);
1475 return MD;
1476 }
1481
1482 /// Set a MacroDirective that was loaded from a PCH file.
1484 MacroDirective *MD);
1485
1486 /// Register an exported macro for a module and identifier.
1489 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1491
1492 /// Get the list of leaf (non-overridden) module macros for a name.
1494 if (II->isOutOfDate())
1495 updateOutOfDateIdentifier(*II);
1496 auto I = LeafModuleMacros.find(II);
1497 if (I != LeafModuleMacros.end())
1498 return I->second;
1499 return {};
1500 }
1501
1502 /// Get the list of submodules that we're currently building.
1504 return BuildingSubmoduleStack;
1505 }
1506
1507 /// \{
1508 /// Iterators for the macro history table. Currently defined macros have
1509 /// IdentifierInfo::hasMacroDefinition() set and an empty
1510 /// MacroInfo::getUndefLoc() at the head of the list.
1511 using macro_iterator = MacroMap::const_iterator;
1512
1513 llvm::iterator_range<macro_iterator>
1514 macros(bool IncludeExternalMacros = true) const;
1515
1516 /// \}
1517
1518 /// Mark the given clang module as affecting the current clang module or translation unit.
1520 assert(M->isModuleMapModule());
1521 if (!BuildingSubmoduleStack.empty()) {
1522 if (M != BuildingSubmoduleStack.back().M)
1523 BuildingSubmoduleStack.back().M->AffectingClangModules.push_back(M);
1524 } else {
1525 AffectingClangModules.insert(M);
1526 }
1527 }
1528
1529 /// Get the set of top-level clang modules that affected preprocessing, but were not
1530 /// imported.
1532 return AffectingClangModules;
1533 }
1534
1535 /// Mark the file as included.
1536 /// Returns true if this is the first time the file was included.
1538 HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
1539 return IncludedFiles.insert(File).second;
1540 }
1541
1542 /// Return true if this header has already been included.
1544 HeaderInfo.getFileInfo(File);
1545 return IncludedFiles.count(File);
1546 }
1547
1548 /// Get the set of included files.
1549 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1550 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1551
1552 /// Return the name of the macro defined before \p Loc that has
1553 /// spelling \p Tokens. If there are multiple macros with same spelling,
1554 /// return the last one defined.
1556 ArrayRef<TokenValue> Tokens) const;
1557
1558 /// Get the predefines for this processor.
1559 /// Used by some third-party tools to inspect and add predefines (see
1560 /// https://github.com/llvm/llvm-project/issues/57483).
1561 const std::string &getPredefines() const { return Predefines; }
1562
1563 /// Set the predefines for this Preprocessor.
1564 ///
1565 /// These predefines are automatically injected when parsing the main file.
1566 void setPredefines(std::string P) { Predefines = std::move(P); }
1567
1568 /// Return information about the specified preprocessor
1569 /// identifier token.
1570 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1571 return &Identifiers.get(Name);
1572 }
1573
1574 /// Add the specified pragma handler to this preprocessor.
1575 ///
1576 /// If \p Namespace is non-null, then it is a token required to exist on the
1577 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1578 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1580 AddPragmaHandler(StringRef(), Handler);
1581 }
1582
1583 /// Remove the specific pragma handler from this preprocessor.
1584 ///
1585 /// If \p Namespace is non-null, then it should be the namespace that
1586 /// \p Handler was added to. It is an error to remove a handler that
1587 /// has not been registered.
1588 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1590 RemovePragmaHandler(StringRef(), Handler);
1591 }
1592
1593 /// Install empty handlers for all pragmas (making them ignored).
1594 void IgnorePragmas();
1595
1596 /// Set empty line handler.
1597 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1598
1599 EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1600
1601 /// Add the specified comment handler to the preprocessor.
1602 void addCommentHandler(CommentHandler *Handler);
1603
1604 /// Remove the specified comment handler.
1605 ///
1606 /// It is an error to remove a handler that has not been registered.
1607 void removeCommentHandler(CommentHandler *Handler);
1608
1609 /// Set the code completion handler to the given object.
1611 CodeComplete = &Handler;
1612 }
1613
1614 /// Retrieve the current code-completion handler.
1616 return CodeComplete;
1617 }
1618
1619 /// Clear out the code completion handler.
1621 CodeComplete = nullptr;
1622 }
1623
1624 /// Hook used by the lexer to invoke the "included file" code
1625 /// completion point.
1626 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1627
1628 /// Hook used by the lexer to invoke the "natural language" code
1629 /// completion point.
1631
1632 /// Set the code completion token for filtering purposes.
1634 CodeCompletionII = Filter;
1635 }
1636
1637 /// Set the code completion token range for detecting replacement range later
1638 /// on.
1640 const SourceLocation End) {
1641 CodeCompletionTokenRange = {Start, End};
1642 }
1644 return CodeCompletionTokenRange;
1645 }
1646
1647 /// Get the code completion token for filtering purposes.
1649 if (CodeCompletionII)
1650 return CodeCompletionII->getName();
1651 return {};
1652 }
1653
1654 /// Retrieve the preprocessing record, or NULL if there is no
1655 /// preprocessing record.
1657
1658 /// Create a new preprocessing record, which will keep track of
1659 /// all macro expansions, macro definitions, etc.
1661
1662 /// Returns true if the FileEntry is the PCH through header.
1663 bool isPCHThroughHeader(const FileEntry *FE);
1664
1665 /// True if creating a PCH with a through header.
1667
1668 /// True if using a PCH with a through header.
1670
1671 /// True if creating a PCH with a #pragma hdrstop.
1673
1674 /// True if using a PCH with a #pragma hdrstop.
1676
1677 /// Skip tokens until after the #include of the through header or
1678 /// until after a #pragma hdrstop.
1680
1681 /// Process directives while skipping until the through header or
1682 /// #pragma hdrstop is found.
1684 SourceLocation HashLoc);
1685
1686 /// Enter the specified FileID as the main source file,
1687 /// which implicitly adds the builtin defines etc.
1688 void EnterMainSourceFile();
1689
1690 /// Inform the preprocessor callbacks that processing is complete.
1691 void EndSourceFile();
1692
1693 /// Add a source file to the top of the include stack and
1694 /// start lexing tokens from it instead of the current buffer.
1695 ///
1696 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1698 SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1699
1700 /// Add a Macro to the top of the include stack and start lexing
1701 /// tokens from it instead of the current buffer.
1702 ///
1703 /// \param Args specifies the tokens input to a function-like macro.
1704 /// \param ILEnd specifies the location of the ')' for a function-like macro
1705 /// or the identifier for an object-like macro.
1707 MacroArgs *Args);
1708
1709private:
1710 /// Add a "macro" context to the top of the include stack,
1711 /// which will cause the lexer to start returning the specified tokens.
1712 ///
1713 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1714 /// will not be subject to further macro expansion. Otherwise, these tokens
1715 /// will be re-macro-expanded when/if expansion is enabled.
1716 ///
1717 /// If \p OwnsTokens is false, this method assumes that the specified stream
1718 /// of tokens has a permanent owner somewhere, so they do not need to be
1719 /// copied. If it is true, it assumes the array of tokens is allocated with
1720 /// \c new[] and the Preprocessor will delete[] it.
1721 ///
1722 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1723 /// set, see the flag documentation for details.
1724 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1725 bool DisableMacroExpansion, bool OwnsTokens,
1726 bool IsReinject);
1727
1728public:
1729 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1730 bool DisableMacroExpansion, bool IsReinject) {
1731 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1732 IsReinject);
1733 }
1734
1735 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1736 bool IsReinject) {
1737 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1738 IsReinject);
1739 }
1740
1741 /// Pop the current lexer/macro exp off the top of the lexer stack.
1742 ///
1743 /// This should only be used in situations where the current state of the
1744 /// top-of-stack lexer is known.
1745 void RemoveTopOfLexerStack();
1746
1747 /// From the point that this method is called, and until
1748 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1749 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1750 /// make the Preprocessor re-lex the same tokens.
1751 ///
1752 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1753 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1754 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1755 ///
1756 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1757 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1758 /// tokens will continue indefinitely.
1759 ///
1760 /// \param Unannotated Whether token annotations are reverted upon calling
1761 /// Backtrack().
1762 void EnableBacktrackAtThisPos(bool Unannotated = false);
1763
1764private:
1765 std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1766
1767 CachedTokensTy PopUnannotatedBacktrackTokens();
1768
1769public:
1770 /// Disable the last EnableBacktrackAtThisPos call.
1772
1773 /// Make Preprocessor re-lex the tokens that were lexed since
1774 /// EnableBacktrackAtThisPos() was previously called.
1775 void Backtrack();
1776
1777 /// True if EnableBacktrackAtThisPos() was called and
1778 /// caching of tokens is on.
1779 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1780
1781 /// True if EnableBacktrackAtThisPos() was called and
1782 /// caching of unannotated tokens is on.
1784 return !UnannotatedBacktrackTokens.empty();
1785 }
1786
1787 /// Lex the next token for this preprocessor.
1788 void Lex(Token &Result);
1789
1790 /// Lex all tokens for this preprocessor until (and excluding) end of file.
1791 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1792
1793 /// Lex a token, forming a header-name token if possible.
1794 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1795
1796 /// Lex the parameters for an #embed directive, returns nullopt on error.
1797 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1798 bool ForHasEmbed);
1799
1800 /// Whether the main file is preprocessed module file.
1802 return MainFileIsPreprocessedModuleFile;
1803 }
1804
1805 /// Mark the main file as a preprocessed module file, then the 'module' and
1806 /// 'import' directive recognition will be suppressed. Only
1807 /// '__preprocessed_moduke' and '__preprocessed_import' are allowed.
1809 MainFileIsPreprocessedModuleFile = true;
1810 }
1811
1813 SmallVectorImpl<Token> &Suffix,
1815 bool AllowMacroExpansion, bool IsPartition);
1816 bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok,
1818 SmallVectorImpl<Token> &DirToks,
1819 bool AllowMacroExpansion, bool IsPartition);
1821 void HandleCXXImportDirective(Token Import);
1823
1824 /// Callback invoked when the lexer sees one of export, import or module token
1825 /// at the start of a line.
1826 ///
1827 /// This consumes the import/module directive, modifies the
1828 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1829 /// read is the correct one.
1831
1832 /// Get the start location of the first pp-token in main file.
1834 assert(FirstPPTokenLoc.isValid() &&
1835 "Did not see the first pp-token in the main file");
1836 return FirstPPTokenLoc;
1837 }
1838
1840 bool StopUntilEOD = false);
1842 bool StopUntilEOD = false);
1843
1845 bool IncludeExports = true);
1846
1848 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1849 }
1850
1851 /// Lex a string literal, which may be the concatenation of multiple
1852 /// string literals and may even come from macro expansion.
1853 /// \returns true on success, false if a error diagnostic has been generated.
1854 bool LexStringLiteral(Token &Result, std::string &String,
1855 const char *DiagnosticTag, bool AllowMacroExpansion) {
1856 if (AllowMacroExpansion)
1857 Lex(Result);
1858 else
1860 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1861 AllowMacroExpansion);
1862 }
1863
1864 /// Complete the lexing of a string literal where the first token has
1865 /// already been lexed (see LexStringLiteral).
1866 bool FinishLexStringLiteral(Token &Result, std::string &String,
1867 const char *DiagnosticTag,
1868 bool AllowMacroExpansion);
1869
1870 /// Lex a token. If it's a comment, keep lexing until we get
1871 /// something not a comment.
1872 ///
1873 /// This is useful in -E -C mode where comments would foul up preprocessor
1874 /// directive handling.
1876 do
1877 Lex(Result);
1878 while (Result.getKind() == tok::comment);
1879 }
1880
1881 /// Just like Lex, but disables macro expansion of identifier tokens.
1883 // Disable macro expansion.
1884 bool OldVal = DisableMacroExpansion;
1885 DisableMacroExpansion = true;
1886 // Lex the token.
1887 Lex(Result);
1888
1889 // Reenable it.
1890 DisableMacroExpansion = OldVal;
1891 }
1892
1893 /// Like LexNonComment, but this disables macro expansion of
1894 /// identifier tokens.
1896 do
1898 while (Result.getKind() == tok::comment);
1899 }
1900
1901 /// Parses a simple integer literal to get its numeric value. Floating
1902 /// point literals and user defined literals are rejected. Used primarily to
1903 /// handle pragmas that accept integer arguments.
1904 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1905
1906 /// Disables macro expansion everywhere except for preprocessor directives.
1908 DisableMacroExpansion = true;
1909 MacroExpansionInDirectivesOverride = true;
1910 }
1911
1913 DisableMacroExpansion = MacroExpansionInDirectivesOverride = false;
1914 }
1915
1916 /// Peeks ahead N tokens and returns that token without consuming any
1917 /// tokens.
1918 ///
1919 /// LookAhead(0) returns the next token that would be returned by Lex(),
1920 /// LookAhead(1) returns the token after it, etc. This returns normal
1921 /// tokens after phase 5. As such, it is equivalent to using
1922 /// 'Lex', not 'LexUnexpandedToken'.
1923 const Token &LookAhead(unsigned N) {
1924 assert(LexLevel == 0 && "cannot use lookahead while lexing");
1925 if (CachedLexPos + N < CachedTokens.size())
1926 return CachedTokens[CachedLexPos+N];
1927 else
1928 return PeekAhead(N+1);
1929 }
1930
1931 /// When backtracking is enabled and tokens are cached,
1932 /// this allows to revert a specific number of tokens.
1933 ///
1934 /// Note that the number of tokens being reverted should be up to the last
1935 /// backtrack position, not more.
1936 void RevertCachedTokens(unsigned N) {
1937 assert(isBacktrackEnabled() &&
1938 "Should only be called when tokens are cached for backtracking");
1939 assert(signed(CachedLexPos) - signed(N) >=
1940 signed(LastBacktrackPos().first) &&
1941 "Should revert tokens up to the last backtrack position, not more");
1942 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1943 "Corrupted backtrack positions ?");
1944 CachedLexPos -= N;
1945 }
1946
1947 /// Enters a token in the token stream to be lexed next.
1948 ///
1949 /// If BackTrack() is called afterwards, the token will remain at the
1950 /// insertion point.
1951 /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1952 /// flag set. See the flag documentation for details.
1953 void EnterToken(const Token &Tok, bool IsReinject) {
1954 if (LexLevel) {
1955 // It's not correct in general to enter caching lex mode while in the
1956 // middle of a nested lexing action.
1957 auto TokCopy = std::make_unique<Token[]>(1);
1958 TokCopy[0] = Tok;
1959 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1960 } else {
1961 EnterCachingLexMode();
1962 assert(IsReinject && "new tokens in the middle of cached stream");
1963 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1964 }
1965 }
1966
1967 /// We notify the Preprocessor that if it is caching tokens (because
1968 /// backtrack is enabled) it should replace the most recent cached tokens
1969 /// with the given annotation token. This function has no effect if
1970 /// backtracking is not enabled.
1971 ///
1972 /// Note that the use of this function is just for optimization, so that the
1973 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1974 /// invoked.
1976 assert(Tok.isAnnotation() && "Expected annotation token");
1977 if (CachedLexPos != 0 && isBacktrackEnabled())
1978 AnnotatePreviousCachedTokens(Tok);
1979 }
1980
1981 /// Get the location of the last cached token, suitable for setting the end
1982 /// location of an annotation token.
1984 assert(CachedLexPos != 0);
1985 return CachedTokens[CachedLexPos-1].getLastLoc();
1986 }
1987
1988 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1989 /// CachedTokens.
1990 bool IsPreviousCachedToken(const Token &Tok) const;
1991
1992 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1993 /// in \p NewToks.
1994 ///
1995 /// Useful when a token needs to be split in smaller ones and CachedTokens
1996 /// most recent token must to be updated to reflect that.
1998
1999 /// Replace the last token with an annotation token.
2000 ///
2001 /// Like AnnotateCachedTokens(), this routine replaces an
2002 /// already-parsed (and resolved) token with an annotation
2003 /// token. However, this routine only replaces the last token with
2004 /// the annotation token; it does not affect any other cached
2005 /// tokens. This function has no effect if backtracking is not
2006 /// enabled.
2008 assert(Tok.isAnnotation() && "Expected annotation token");
2009 if (CachedLexPos != 0 && isBacktrackEnabled())
2010 CachedTokens[CachedLexPos-1] = Tok;
2011 }
2012
2013 /// Enter an annotation token into the token stream.
2015 void *AnnotationVal);
2016
2017 /// Determine whether it's possible for a future call to Lex to produce an
2018 /// annotation token created by a previous call to EnterAnnotationToken.
2020 return CurLexerCallback != CLK_Lexer;
2021 }
2022
2023 /// Update the current token to represent the provided
2024 /// identifier, in order to cache an action performed by typo correction.
2026 assert(Tok.getIdentifierInfo() && "Expected identifier token");
2027 if (CachedLexPos != 0 && isBacktrackEnabled())
2028 CachedTokens[CachedLexPos-1] = Tok;
2029 }
2030
2031 /// Recompute the current lexer kind based on the CurLexer/
2032 /// CurTokenLexer pointers.
2033 void recomputeCurLexerKind();
2034
2035 /// Returns true if incremental processing is enabled
2036 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
2037
2038 /// Enables the incremental processing
2039 void enableIncrementalProcessing(bool value = true) {
2040 IncrementalProcessing = value;
2041 }
2042
2043 /// Specify the point at which code-completion will be performed.
2044 ///
2045 /// \param File the file in which code completion should occur. If
2046 /// this file is included multiple times, code-completion will
2047 /// perform completion the first time it is included. If NULL, this
2048 /// function clears out the code-completion point.
2049 ///
2050 /// \param Line the line at which code completion should occur
2051 /// (1-based).
2052 ///
2053 /// \param Column the column at which code completion should occur
2054 /// (1-based).
2055 ///
2056 /// \returns true if an error occurred, false otherwise.
2058 unsigned Column);
2059
2060 /// Determine if we are performing code completion.
2061 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
2062
2063 /// Returns the location of the code-completion point.
2064 ///
2065 /// Returns an invalid location if code-completion is not enabled or the file
2066 /// containing the code-completion point has not been lexed yet.
2067 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
2068
2069 /// Returns the start location of the file of code-completion point.
2070 ///
2071 /// Returns an invalid location if code-completion is not enabled or the file
2072 /// containing the code-completion point has not been lexed yet.
2074 return CodeCompletionFileLoc;
2075 }
2076
2077 /// Returns true if code-completion is enabled and we have hit the
2078 /// code-completion point.
2079 bool isCodeCompletionReached() const { return CodeCompletionReached; }
2080
2081 /// Note that we hit the code-completion point.
2083 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
2084 CodeCompletionReached = true;
2085 // Silence any diagnostics that occur after we hit the code-completion.
2087 }
2088
2089 /// The location of the currently-active \#pragma clang
2090 /// arc_cf_code_audited begin.
2091 ///
2092 /// Returns an invalid location if there is no such pragma active.
2094 return PragmaARCCFCodeAuditedInfo;
2095 }
2096
2097 /// Set the location of the currently-active \#pragma clang
2098 /// arc_cf_code_audited begin. An invalid location ends the pragma.
2100 SourceLocation Loc) {
2101 PragmaARCCFCodeAuditedInfo = IdentifierLoc(Loc, Ident);
2102 }
2103
2104 /// The location of the currently-active \#pragma clang
2105 /// assume_nonnull begin.
2106 ///
2107 /// Returns an invalid location if there is no such pragma active.
2109 return PragmaAssumeNonNullLoc;
2110 }
2111
2112 /// Set the location of the currently-active \#pragma clang
2113 /// assume_nonnull begin. An invalid location ends the pragma.
2115 PragmaAssumeNonNullLoc = Loc;
2116 }
2117
2118 /// Get the location of the recorded unterminated \#pragma clang
2119 /// assume_nonnull begin in the preamble, if one exists.
2120 ///
2121 /// Returns an invalid location if the premable did not end with
2122 /// such a pragma active or if there is no recorded preamble.
2124 return PreambleRecordedPragmaAssumeNonNullLoc;
2125 }
2126
2127 /// Record the location of the unterminated \#pragma clang
2128 /// assume_nonnull begin in the preamble.
2130 PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2131 }
2132
2133 /// Set the directory in which the main file should be considered
2134 /// to have been found, if it is not a real file.
2135 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2136
2137 /// Instruct the preprocessor to skip part of the main source file.
2138 ///
2139 /// \param Bytes The number of bytes in the preamble to skip.
2140 ///
2141 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2142 /// start of a line.
2143 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2144 SkipMainFilePreamble.first = Bytes;
2145 SkipMainFilePreamble.second = StartOfLine;
2146 }
2147
2148 /// Forwarding function for diagnostics. This emits a diagnostic at
2149 /// the specified Token's location, translating the token's start
2150 /// position in the current buffer into a SourcePosition object for rendering.
2151 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2152 return Diags->Report(Loc, DiagID);
2153 }
2154
2155 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2156 return Diags->Report(Tok.getLocation(), DiagID);
2157 }
2158
2159 /// Return the 'spelling' of the token at the given
2160 /// location; does not go up to the spelling location or down to the
2161 /// expansion location.
2162 ///
2163 /// \param buffer A buffer which will be used only if the token requires
2164 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
2165 /// \param invalid If non-null, will be set \c true if an error occurs.
2167 SmallVectorImpl<char> &buffer,
2168 bool *invalid = nullptr) const {
2169 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2170 }
2171
2172 /// Return the 'spelling' of the Tok token.
2173 ///
2174 /// The spelling of a token is the characters used to represent the token in
2175 /// the source file after trigraph expansion and escaped-newline folding. In
2176 /// particular, this wants to get the true, uncanonicalized, spelling of
2177 /// things like digraphs, UCNs, etc.
2178 ///
2179 /// \param Invalid If non-null, will be set \c true if an error occurs.
2180 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2181 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2182 }
2183
2184 /// Get the spelling of a token into a preallocated buffer, instead
2185 /// of as an std::string.
2186 ///
2187 /// The caller is required to allocate enough space for the token, which is
2188 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2189 /// actual result is returned.
2190 ///
2191 /// Note that this method may do two possible things: it may either fill in
2192 /// the buffer specified with characters, or it may *change the input pointer*
2193 /// to point to a constant buffer with the data already in it (avoiding a
2194 /// copy). The caller is not allowed to modify the returned buffer pointer
2195 /// if an internal buffer is returned.
2196 unsigned getSpelling(const Token &Tok, const char *&Buffer,
2197 bool *Invalid = nullptr) const {
2198 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2199 }
2200
2201 /// Get the spelling of a token into a SmallVector.
2202 ///
2203 /// Note that the returned StringRef may not point to the
2204 /// supplied buffer if a copy can be avoided.
2205 StringRef getSpelling(const Token &Tok,
2206 SmallVectorImpl<char> &Buffer,
2207 bool *Invalid = nullptr) const;
2208
2209 /// Relex the token at the specified location.
2210 /// \returns true if there was a failure, false on success.
2212 bool IgnoreWhiteSpace = false) {
2213 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2214 }
2215
2216 /// Given a Token \p Tok that is a numeric constant with length 1,
2217 /// return the value of constant as an unsigned 8-bit integer.
2218 uint8_t
2220 bool *Invalid = nullptr) const {
2221 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2222 Tok.getLength() == 1 && "Called on unsupported token");
2223 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2224
2225 // If the token is carrying a literal data pointer, just use it.
2226 if (const char *D = Tok.getLiteralData())
2227 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2228
2229 assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2230 // Otherwise, fall back on getCharacterData, which is slower, but always
2231 // works.
2232 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2233 }
2234
2235 /// Retrieve the name of the immediate macro expansion.
2236 ///
2237 /// This routine starts from a source location, and finds the name of the
2238 /// macro responsible for its immediate expansion. It looks through any
2239 /// intervening macro argument expansions to compute this. It returns a
2240 /// StringRef that refers to the SourceManager-owned buffer of the source
2241 /// where that macro name is spelled. Thus, the result shouldn't out-live
2242 /// the SourceManager.
2244 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2245 }
2246
2247 /// Plop the specified string into a scratch buffer and set the
2248 /// specified token's location and length to it.
2249 ///
2250 /// If specified, the source location provides a location of the expansion
2251 /// point of the token.
2252 void CreateString(StringRef Str, Token &Tok,
2253 SourceLocation ExpansionLocStart = SourceLocation(),
2254 SourceLocation ExpansionLocEnd = SourceLocation());
2255
2256 /// Split the first Length characters out of the token starting at TokLoc
2257 /// and return a location pointing to the split token. Re-lexing from the
2258 /// split token will return the split token rather than the original.
2259 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2260
2261 /// Computes the source location just past the end of the
2262 /// token at this source location.
2263 ///
2264 /// This routine can be used to produce a source location that
2265 /// points just past the end of the token referenced by \p Loc, and
2266 /// is generally used when a diagnostic needs to point just after a
2267 /// token where it expected something different that it received. If
2268 /// the returned source location would not be meaningful (e.g., if
2269 /// it points into a macro), this routine returns an invalid
2270 /// source location.
2271 ///
2272 /// \param Offset an offset from the end of the token, where the source
2273 /// location should refer to. The default offset (0) produces a source
2274 /// location pointing just past the end of the token; an offset of 1 produces
2275 /// a source location pointing to the last character in the token, etc.
2277 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2278 }
2279
2280 /// Returns true if the given MacroID location points at the first
2281 /// token of the macro expansion.
2282 ///
2283 /// \param MacroBegin If non-null and function returns true, it is set to
2284 /// begin location of the macro.
2286 SourceLocation *MacroBegin = nullptr) const {
2287 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2288 MacroBegin);
2289 }
2290
2291 /// Returns true if the given MacroID location points at the last
2292 /// token of the macro expansion.
2293 ///
2294 /// \param MacroEnd If non-null and function returns true, it is set to
2295 /// end location of the macro.
2297 SourceLocation *MacroEnd = nullptr) const {
2298 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2299 }
2300
2301 /// Print the token to stderr, used for debugging.
2302 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2303 void DumpLocation(SourceLocation Loc) const;
2304 void DumpMacro(const MacroInfo &MI) const;
2305 void dumpMacroInfo(const IdentifierInfo *II);
2306
2307 /// Given a location that specifies the start of a
2308 /// token, return a new location that specifies a character within the token.
2310 unsigned Char) const {
2311 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2312 }
2313
2314 /// Increment the counters for the number of token paste operations
2315 /// performed.
2316 ///
2317 /// If fast was specified, this is a 'fast paste' case we handled.
2318 void IncrementPasteCounter(bool isFast) {
2319 if (isFast)
2320 ++NumFastTokenPaste;
2321 else
2322 ++NumTokenPaste;
2323 }
2324
2325 void PrintStats();
2326
2327 size_t getTotalMemory() const;
2328
2329 /// When the macro expander pastes together a comment (/##/) in Microsoft
2330 /// mode, this method handles updating the current state, returning the
2331 /// token on the next source line.
2333
2334 //===--------------------------------------------------------------------===//
2335 // Preprocessor callback methods. These are invoked by a lexer as various
2336 // directives and events are found.
2337
2338 /// Given a tok::raw_identifier token, look up the
2339 /// identifier information for the token and install it into the token,
2340 /// updating the token kind accordingly.
2341 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2342
2343private:
2344 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2345
2346public:
2347 /// Specifies the reason for poisoning an identifier.
2348 ///
2349 /// If that identifier is accessed while poisoned, then this reason will be
2350 /// used instead of the default "poisoned" diagnostic.
2351 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2352
2353 /// Display reason for poisoned identifier.
2354 void HandlePoisonedIdentifier(Token & Identifier);
2355
2357 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2358 if(II->isPoisoned()) {
2359 HandlePoisonedIdentifier(Identifier);
2360 }
2361 }
2362 }
2363
2364 /// isNextPPTokenOneOf - Check whether the next pp-token is one of the
2365 /// specificed token kind. this method should have no observable side-effect
2366 /// on the lexed tokens.
2367 template <typename... Ts> bool isNextPPTokenOneOf(Ts... Ks) const {
2368 static_assert(sizeof...(Ts) > 0,
2369 "requires at least one tok::TokenKind specified");
2370 auto NextTokOpt = peekNextPPToken();
2371 return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false;
2372 }
2373
2374private:
2375 /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
2376 /// buffer controlled by this lexer, otherwise return the next unexpanded
2377 /// token.
2378 std::optional<Token> peekNextPPToken() const;
2379
2380 /// Identifiers used for SEH handling in Borland. These are only
2381 /// allowed in particular circumstances
2382 // __except block
2383 IdentifierInfo *Ident__exception_code,
2384 *Ident___exception_code,
2385 *Ident_GetExceptionCode;
2386 // __except filter expression
2387 IdentifierInfo *Ident__exception_info,
2388 *Ident___exception_info,
2389 *Ident_GetExceptionInfo;
2390 // __finally
2391 IdentifierInfo *Ident__abnormal_termination,
2392 *Ident___abnormal_termination,
2393 *Ident_AbnormalTermination;
2394
2395 const char *getCurLexerEndPos();
2396 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2397
2398public:
2399 void PoisonSEHIdentifiers(bool Poison = true); // Borland
2400
2401 /// Callback invoked when the lexer reads an identifier and has
2402 /// filled in the tokens IdentifierInfo member.
2403 ///
2404 /// This callback potentially macro expands it or turns it into a named
2405 /// token (like 'for').
2406 ///
2407 /// \returns true if we actually computed a token, false if we need to
2408 /// lex again.
2409 bool HandleIdentifier(Token &Identifier);
2410
2411 /// Callback invoked when the lexer hits the end of the current file.
2412 ///
2413 /// This either returns the EOF token and returns true, or
2414 /// pops a level off the include stack and returns false, at which point the
2415 /// client should call lex again.
2416 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2417
2418 /// Callback invoked when the current TokenLexer hits the end of its
2419 /// token stream.
2421
2422 /// Callback invoked when the lexer sees a # token at the start of a
2423 /// line.
2424 ///
2425 /// This consumes the directive, modifies the lexer/preprocessor state, and
2426 /// advances the lexer(s) so that the next token read is the correct one.
2428
2429 /// Ensure that the next token is a tok::eod token.
2430 ///
2431 /// If not, emit a diagnostic and consume up until the eod.
2432 /// If \p EnableMacros is true, then we consider macros that expand to zero
2433 /// tokens as being ok.
2434 ///
2435 /// If \p ExtraToks not null, the extra tokens will be saved in this
2436 /// container.
2437 ///
2438 /// \return The location of the end of the directive (the terminating
2439 /// newline).
2441 CheckEndOfDirective(StringRef DirType, bool EnableMacros = false,
2442 SmallVectorImpl<Token> *ExtraToks = nullptr);
2443
2444 /// Read and discard all tokens remaining on the current line until
2445 /// the tok::eod token is found. Returns the range of the skipped tokens.
2448 Token Tmp;
2449 return DiscardUntilEndOfDirective(Tmp, DiscardedToks);
2450 }
2451
2452 /// Same as above except retains the token that was found.
2455 SmallVectorImpl<Token> *DiscardedToks = nullptr);
2456
2457 /// Returns true if the preprocessor has seen a use of
2458 /// __DATE__ or __TIME__ in the file so far.
2459 bool SawDateOrTime() const {
2460 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2461 }
2462 uint32_t getCounterValue() const { return CounterValue; }
2463 void setCounterValue(uint32_t V) { CounterValue = V; }
2464
2466 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2467 "FPEvalMethod should be set either from command line or from the "
2468 "target info");
2469 return CurrentFPEvalMethod;
2470 }
2471
2473 return TUFPEvalMethod;
2474 }
2475
2477 return LastFPEvalPragmaLocation;
2478 }
2479
2483 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2484 // This is the location of the '#pragma float_control" where the
2485 // execution state is modifed.
2486 LastFPEvalPragmaLocation = PragmaLoc;
2487 CurrentFPEvalMethod = Val;
2488 TUFPEvalMethod = Val;
2489 }
2490
2493 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2494 TUFPEvalMethod = Val;
2495 }
2496
2497 /// Retrieves the module that we're currently building, if any.
2499
2500 /// Retrieves the module whose implementation we're current compiling, if any.
2502
2503 /// If we are preprocessing a named module.
2504 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2505
2506 /// If we are proprocessing a named interface unit.
2507 /// Note that a module implementation partition is not considered as an
2508 /// named interface unit here although it is importable
2509 /// to ease the parsing.
2511 return ModuleDeclState.isNamedInterface();
2512 }
2513
2514 /// Get the named module name we're preprocessing.
2515 /// Requires we're preprocessing a named module.
2516 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2517
2518 /// If we are implementing an implementation module unit.
2519 /// Note that the module implementation partition is not considered as an
2520 /// implementation unit.
2522 return ModuleDeclState.isImplementationUnit();
2523 }
2524
2525 /// If we're importing a standard C++20 Named Modules.
2527 assert(getLangOpts().CPlusPlusModules &&
2528 "Import C++ named modules are only valid for C++20 modules");
2529 return ImportingCXXNamedModules;
2530 }
2531
2532 /// Allocate a new MacroInfo object with the provided SourceLocation.
2534
2535 /// Turn the specified lexer token into a fully checked and spelled
2536 /// filename, e.g. as an operand of \#include.
2537 ///
2538 /// The caller is expected to provide a buffer that is large enough to hold
2539 /// the spelling of the filename, but is also expected to handle the case
2540 /// when this method decides to use a different buffer.
2541 ///
2542 /// \returns true if the input filename was in <>'s or false if it was
2543 /// in ""'s.
2544 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2545
2546 /// Given a "foo" or <foo> reference, look up the indicated file.
2547 ///
2548 /// Returns std::nullopt on failure. \p isAngled indicates whether the file
2549 /// reference is for system \#include's or not (i.e. using <> instead of "").
2551 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2552 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2553 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2554 SmallVectorImpl<char> *RelativePath,
2555 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2556 bool *IsFrameworkFound, bool SkipCache = false,
2557 bool OpenFile = true, bool CacheFailures = true);
2558
2559 /// Given a "Filename" or <Filename> reference, look up the indicated embed
2560 /// resource. \p isAngled indicates whether the file reference is for
2561 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2562 /// is true, the file looked up is opened for reading, otherwise it only
2563 /// validates that the file exists.
2564 ///
2565 /// Returns std::nullopt on failure.
2566 OptionalFileEntryRef LookupEmbedFile(StringRef Filename, bool isAngled,
2567 bool OpenFile);
2568
2569 /// Return true if we're in the top-level file, not in a \#include.
2570 bool isInPrimaryFile() const;
2571
2572 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2573 /// followed by EOD. Return true if the token is not a valid on-off-switch.
2575
2576 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2577 bool *ShadowFlag = nullptr);
2578
2579 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2580 Module *LeaveSubmodule(bool ForPragma);
2581
2582private:
2583 friend void TokenLexer::ExpandFunctionArguments();
2584
2585 void PushIncludeMacroStack() {
2586 assert(CurLexerCallback != CLK_CachingLexer &&
2587 "cannot push a caching lexer");
2588 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2589 std::move(CurLexer), CurPPLexer,
2590 std::move(CurTokenLexer), CurDirLookup);
2591 CurPPLexer = nullptr;
2592 }
2593
2594 void PopIncludeMacroStack() {
2595 if (CurLexer)
2596 PendingDestroyLexers.push_back(std::move(CurLexer));
2597 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2598 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2599 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2600 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
2601 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2602 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2603 IncludeMacroStack.pop_back();
2604 }
2605
2606 void PropagateLineStartLeadingSpaceInfo(Token &Result);
2607
2608 /// Determine whether we need to create module macros for #defines in the
2609 /// current context.
2610 bool needModuleMacros() const;
2611
2612 /// Update the set of active module macros and ambiguity flag for a module
2613 /// macro name.
2614 void updateModuleMacroInfo(const IdentifierInfo *II,
2615 FullModuleMacroInfo &Info);
2616
2617 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2618 SourceLocation Loc);
2619 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2620 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2621 bool isPublic);
2622
2623 /// Lex and validate a macro name, which occurs after a
2624 /// \#define or \#undef.
2625 ///
2626 /// \param MacroNameTok Token that represents the name defined or undefined.
2627 /// \param IsDefineUndef Kind if preprocessor directive.
2628 /// \param ShadowFlag Points to flag that is set if macro name shadows
2629 /// a keyword.
2630 ///
2631 /// This emits a diagnostic, sets the token kind to eod,
2632 /// and discards the rest of the macro line if the macro name is invalid.
2633 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2634 bool *ShadowFlag = nullptr);
2635
2636 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2637 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2638 /// doing so performs certain validity checks including (but not limited to):
2639 /// - # (stringization) is followed by a macro parameter
2640 /// \param MacroNameTok - Token that represents the macro name
2641 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2642 ///
2643 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2644 /// returns a nullptr if an invalid sequence of tokens is encountered.
2645 MacroInfo *ReadOptionalMacroParameterListAndBody(
2646 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2647
2648 /// The ( starting an argument list of a macro definition has just been read.
2649 /// Lex the rest of the parameters and the closing ), updating \p MI with
2650 /// what we learn and saving in \p LastTok the last token read.
2651 /// Return true if an error occurs parsing the arg list.
2652 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2653
2654 /// Provide a suggestion for a typoed directive. If there is no typo, then
2655 /// just skip suggesting.
2656 ///
2657 /// \param Tok - Token that represents the directive
2658 /// \param Directive - String reference for the directive name
2659 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2660
2661 /// We just read a \#if or related directive and decided that the
2662 /// subsequent tokens are in the \#if'd out portion of the
2663 /// file. Lex the rest of the file, until we see an \#endif. If \p
2664 /// FoundNonSkipPortion is true, then we have already emitted code for part of
2665 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2666 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2667 /// already seen one so a \#else directive is a duplicate. When this returns,
2668 /// the caller can lex the first valid token.
2669 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2670 SourceLocation IfTokenLoc,
2671 bool FoundNonSkipPortion, bool FoundElse,
2672 SourceLocation ElseLoc = SourceLocation());
2673
2674 /// Information about the result for evaluating an expression for a
2675 /// preprocessor directive.
2676 struct DirectiveEvalResult {
2677 /// The integral value of the expression.
2678 std::optional<llvm::APSInt> Value;
2679
2680 /// Whether the expression was evaluated as true or not.
2681 bool Conditional;
2682
2683 /// True if the expression contained identifiers that were undefined.
2684 bool IncludedUndefinedIds;
2685
2686 /// The source range for the expression.
2687 SourceRange ExprRange;
2688 };
2689
2690 /// Evaluate an integer constant expression that may occur after a
2691 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2692 ///
2693 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2694 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2695 bool CheckForEoD = true);
2696
2697 /// Evaluate an integer constant expression that may occur after a
2698 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2699 ///
2700 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2701 /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2702 /// in the evaluated expression or not.
2703 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2704 Token &Tok,
2705 bool &EvaluatedDefined,
2706 bool CheckForEoD = true);
2707
2708 /// Process a '__has_embed("path" [, ...])' expression.
2709 ///
2710 /// Returns predefined `__STDC_EMBED_*` macro values if
2711 /// successful.
2712 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2713
2714 /// Process a '__has_include("path")' expression.
2715 ///
2716 /// Returns true if successful.
2717 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2718
2719 /// Process '__has_include_next("path")' expression.
2720 ///
2721 /// Returns true if successful.
2722 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2723
2724 /// Get the directory and file from which to start \#include_next lookup.
2725 std::pair<ConstSearchDirIterator, const FileEntry *>
2726 getIncludeNextStart(const Token &IncludeNextTok) const;
2727
2728 /// Install the standard preprocessor pragmas:
2729 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2730 void RegisterBuiltinPragmas();
2731
2732 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
2733 /// table and mark it as a builtin macro to be expanded.
2734 IdentifierInfo *RegisterBuiltinMacro(const char *Name) {
2735 // Get the identifier.
2736 IdentifierInfo *Id = getIdentifierInfo(Name);
2737
2738 // Mark it as being a macro that is builtin.
2739 MacroInfo *MI = AllocateMacroInfo(SourceLocation());
2740 MI->setIsBuiltinMacro();
2742 return Id;
2743 }
2744
2745 /// Register builtin macros such as __LINE__ with the identifier table.
2746 void RegisterBuiltinMacros();
2747
2748 /// If an identifier token is read that is to be expanded as a macro, handle
2749 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2750 /// otherwise the caller should lex again.
2751 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2752
2753 /// Cache macro expanded tokens for TokenLexers.
2754 //
2755 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2756 /// going to lex in the cache and when it finishes the tokens are removed
2757 /// from the end of the cache.
2758 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2759 ArrayRef<Token> tokens);
2760
2761 void removeCachedMacroExpandedTokensOfLastLexer();
2762
2763 /// After reading "MACRO(", this method is invoked to read all of the formal
2764 /// arguments specified for the macro invocation. Returns null on error.
2765 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2766 SourceLocation &MacroEnd);
2767
2768 /// If an identifier token is read that is to be expanded
2769 /// as a builtin macro, handle it and return the next token as 'Tok'.
2770 void ExpandBuiltinMacro(Token &Tok);
2771
2772 /// Read a \c _Pragma directive, slice it up, process it, then
2773 /// return the first token after the directive.
2774 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2775 void Handle_Pragma(Token &Tok);
2776
2777 /// Like Handle_Pragma except the pragma text is not enclosed within
2778 /// a string literal.
2779 void HandleMicrosoft__pragma(Token &Tok);
2780
2781 /// Add a lexer to the top of the include stack and
2782 /// start lexing tokens from it instead of the current buffer.
2783 void EnterSourceFileWithLexer(std::unique_ptr<Lexer> TheLexer,
2785
2786 /// Set the FileID for the preprocessor predefines.
2787 void setPredefinesFileID(FileID FID) {
2788 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2789 PredefinesFileID = FID;
2790 }
2791
2792 /// Set the FileID for the PCH through header.
2793 void setPCHThroughHeaderFileID(FileID FID);
2794
2795 /// Returns true if we are lexing from a file and not a
2796 /// pragma or a macro.
2797 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2798 return L ? !L->isPragmaLexer() : P != nullptr;
2799 }
2800
2801 static bool IsFileLexer(const IncludeStackInfo& I) {
2802 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2803 }
2804
2805 bool IsFileLexer() const {
2806 return IsFileLexer(CurLexer.get(), CurPPLexer);
2807 }
2808
2809 //===--------------------------------------------------------------------===//
2810 // Standard Library Identification
2811 std::optional<CXXStandardLibraryVersionInfo> CXXStandardLibraryVersion;
2812
2813public:
2814 std::optional<std::uint64_t> getStdLibCxxVersion();
2815 bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion);
2816
2817private:
2818 //===--------------------------------------------------------------------===//
2819 // Caching stuff.
2820 void CachingLex(Token &Result);
2821
2822 bool InCachingLexMode() const {
2823 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2824 // that we are past EOF, not that we are in CachingLex mode.
2825 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2826 }
2827
2828 void EnterCachingLexMode();
2829 void EnterCachingLexModeUnchecked();
2830
2831 void ExitCachingLexMode() {
2832 if (InCachingLexMode())
2834 }
2835
2836 const Token &PeekAhead(unsigned N);
2837 void AnnotatePreviousCachedTokens(const Token &Tok);
2838
2839 //===--------------------------------------------------------------------===//
2840 /// Handle*Directive - implement the various preprocessor directives. These
2841 /// should side-effect the current preprocessor object so that the next call
2842 /// to Lex() will return the appropriate token next.
2843 void HandleLineDirective();
2844 void HandleDigitDirective(Token &Tok);
2845 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2846 void HandleIdentSCCSDirective(Token &Tok);
2847 void HandleMacroPublicDirective(Token &Tok);
2848 void HandleMacroPrivateDirective();
2849
2850 /// An additional notification that can be produced by a header inclusion or
2851 /// import to tell the parser what happened.
2852 struct ImportAction {
2853 enum ActionKind {
2854 None,
2855 ModuleBegin,
2856 ModuleImport,
2857 HeaderUnitImport,
2858 SkippedModuleImport,
2859 Failure,
2860 } Kind;
2861 Module *ModuleForHeader = nullptr;
2862
2863 ImportAction(ActionKind AK, Module *Mod = nullptr)
2864 : Kind(AK), ModuleForHeader(Mod) {
2865 assert((AK == None || Mod || AK == Failure) &&
2866 "no module for module action");
2867 }
2868 };
2869
2870 OptionalFileEntryRef LookupHeaderIncludeOrImport(
2871 ConstSearchDirIterator *CurDir, StringRef &Filename,
2872 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2873 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2874 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2875 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2876 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2877 ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2878 // Binary data inclusion
2879 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok);
2880 void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2881 const LexEmbedParametersResult &Params,
2882 StringRef BinaryContents, StringRef FileName);
2883
2884 // File inclusion.
2885 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2886 ConstSearchDirIterator LookupFrom = nullptr,
2887 const FileEntry *LookupFromFile = nullptr);
2888 ImportAction
2889 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2890 Token &FilenameTok, SourceLocation EndLoc,
2891 ConstSearchDirIterator LookupFrom = nullptr,
2892 const FileEntry *LookupFromFile = nullptr);
2893 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2894 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2895 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2896 void HandleMicrosoftImportDirective(Token &Tok);
2897 void HandleObjCImportDirective(Token &AtTok, Token &ImportTok);
2898
2899public:
2900 /// Check that the given module is available, producing a diagnostic if not.
2901 /// \return \c true if the check failed (because the module is not available).
2902 /// \c false if the module appears to be usable.
2903 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2904 const TargetInfo &TargetInfo,
2905 const Module &M, DiagnosticsEngine &Diags);
2906
2907 // Module inclusion testing.
2908 /// Find the module that owns the source or header file that
2909 /// \p Loc points to. If the location is in a file that was included
2910 /// into a module, or is outside any module, returns nullptr.
2911 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2912
2913 /// We want to produce a diagnostic at location IncLoc concerning an
2914 /// unreachable effect at location MLoc (eg, where a desired entity was
2915 /// declared or defined). Determine whether the right way to make MLoc
2916 /// reachable is by #include, and if so, what header should be included.
2917 ///
2918 /// This is not necessarily fast, and might load unexpected module maps, so
2919 /// should only be called by code that intends to produce an error.
2920 ///
2921 /// \param IncLoc The location at which the missing effect was detected.
2922 /// \param MLoc A location within an unimported module at which the desired
2923 /// effect occurred.
2924 /// \return A file that can be #included to provide the desired effect. Null
2925 /// if no such file could be determined or if a #include is not
2926 /// appropriate (eg, if a module should be imported instead).
2928 SourceLocation MLoc);
2929
2930 bool isRecordingPreamble() const {
2931 return PreambleConditionalStack.isRecording();
2932 }
2933
2934 bool hasRecordedPreamble() const {
2935 return PreambleConditionalStack.hasRecordedPreamble();
2936 }
2937
2939 return PreambleConditionalStack.getStack();
2940 }
2941
2943 PreambleConditionalStack.setStack(s);
2944 }
2945
2947 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2948 PreambleConditionalStack.startReplaying();
2949 PreambleConditionalStack.setStack(s);
2950 PreambleConditionalStack.SkipInfo = SkipInfo;
2951 }
2952
2953 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2954 return PreambleConditionalStack.SkipInfo;
2955 }
2956
2957private:
2958 /// After processing predefined file, initialize the conditional stack from
2959 /// the preamble.
2960 void replayPreambleConditionalStack();
2961
2962 // Macro handling.
2963 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2964 void HandleUndefDirective();
2965
2966 // Conditional Inclusion.
2967 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2968 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2969 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2970 bool ReadAnyTokensBeforeDirective);
2971 void HandleEndifDirective(Token &EndifToken);
2972 void HandleElseDirective(Token &Result, const Token &HashToken);
2973 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2974 tok::PPKeywordKind Kind);
2975
2976 // Pragmas.
2977 void HandlePragmaDirective(PragmaIntroducer Introducer);
2978
2979public:
2980 void HandlePragmaOnce(Token &OnceTok);
2981 void HandlePragmaMark(Token &MarkTok);
2982 void HandlePragmaPoison();
2983 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2984 void HandlePragmaDependency(Token &DependencyTok);
2991
2992 // Return true and store the first token only if any CommentHandler
2993 // has inserted some tokens and getCommentRetentionState() is false.
2994 bool HandleComment(Token &result, SourceRange Comment);
2995
2996 /// A macro is used, update information about macros that need unused
2997 /// warnings.
2998 void markMacroAsUsed(MacroInfo *MI);
2999
3000 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
3001 SourceLocation AnnotationLoc) {
3002 AnnotationInfos[II].DeprecationInfo =
3003 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
3004 }
3005
3006 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
3007 SourceLocation AnnotationLoc) {
3008 AnnotationInfos[II].RestrictExpansionInfo =
3009 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
3010 }
3011
3012 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
3013 AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
3014 }
3015
3016 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
3017 return AnnotationInfos.find(II)->second;
3018 }
3019
3020 void emitMacroExpansionWarnings(const Token &Identifier,
3021 bool IsIfnDef = false) const {
3022 IdentifierInfo *Info = Identifier.getIdentifierInfo();
3023 if (Info->isDeprecatedMacro())
3024 emitMacroDeprecationWarning(Identifier);
3025
3026 if (Info->isRestrictExpansion() &&
3027 !SourceMgr.isInMainFile(Identifier.getLocation()))
3028 emitRestrictExpansionWarning(Identifier);
3029
3030 if (!IsIfnDef) {
3031 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
3032 emitRestrictInfNaNWarning(Identifier, 0);
3033 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
3034 emitRestrictInfNaNWarning(Identifier, 1);
3035 }
3036 }
3037
3039 const LangOptions &LangOpts,
3040 const TargetInfo &TI);
3041
3043 const PresumedLoc &PLoc,
3044 const LangOptions &LangOpts,
3045 const TargetInfo &TI);
3046
3047private:
3048 void emitMacroDeprecationWarning(const Token &Identifier) const;
3049 void emitRestrictExpansionWarning(const Token &Identifier) const;
3050 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
3051 void emitRestrictInfNaNWarning(const Token &Identifier,
3052 unsigned DiagSelection) const;
3053
3054 /// This boolean state keeps track if the current scanned token (by this PP)
3055 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
3056 /// translation unit in a linear order.
3057 bool InSafeBufferOptOutRegion = false;
3058
3059 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
3060 /// region if PP is currently in such a region. Hold undefined value
3061 /// otherwise.
3062 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
3063
3064 using SafeBufferOptOutRegionsTy =
3066 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
3067 // translation unit. Each region is represented by a pair of start and
3068 // end locations.
3069 SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
3070
3071 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
3072 // following structure to manage them by their ASTs.
3073 struct {
3074 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a
3075 // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
3076 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
3077
3078 // Returns a reference to the safe buffer opt-out regions of the loaded
3079 // AST where `Loc` belongs to. (Construct if absent)
3080 SafeBufferOptOutRegionsTy &
3081 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
3082 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
3083 }
3084
3085 // Returns a reference to the safe buffer opt-out regions of the loaded
3086 // AST where `Loc` belongs to. (This const function returns nullptr if
3087 // absent.)
3088 const SafeBufferOptOutRegionsTy *
3089 lookupLoadedOptOutMap(SourceLocation Loc,
3090 const SourceManager &SrcMgr) const {
3091 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
3092 auto Iter = LoadedRegions.find(FID);
3093
3094 if (Iter == LoadedRegions.end())
3095 return nullptr;
3096 return &Iter->getSecond();
3097 }
3098 } LoadedSafeBufferOptOutMap;
3099
3100public:
3101 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
3102 /// region. This `Loc` must be a source location that has been pre-processed.
3103 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
3104
3105 /// Alter the state of whether this PP currently is in a
3106 /// "-Wunsafe-buffer-usage" opt-out region.
3107 ///
3108 /// \param isEnter true if this PP is entering a region; otherwise, this PP
3109 /// is exiting a region
3110 /// \param Loc the location of the entry or exit of a
3111 /// region
3112 /// \return true iff it is INVALID to enter or exit a region, i.e.,
3113 /// attempt to enter a region before exiting a previous region, or exiting a
3114 /// region that PP is not currently in.
3115 bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
3116 const SourceLocation &Loc);
3117
3118 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3119 /// opt-out region
3121
3122 /// \param StartLoc output argument. It will be set to the start location of
3123 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3124 /// returns true.
3125 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3126 /// opt-out region
3127 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3128
3129 /// \return a sequence of SourceLocations representing ordered opt-out regions
3130 /// specified by
3131 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3132 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3133
3134 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3135 /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3136 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3137 /// is same as itself before the call.
3139 const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3140
3141 /// Whether we've seen pp-directives which may have changed the preprocessing
3142 /// state.
3143 bool hasSeenNoTrivialPPDirective() const;
3144
3145private:
3146 /// Helper functions to forward lexing to the actual lexer. They all share the
3147 /// same signature.
3148 static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3149 return P.CurLexer->Lex(Result);
3150 }
3151 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3152 return P.CurTokenLexer->Lex(Result);
3153 }
3154 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3155 P.CachingLex(Result);
3156 return true;
3157 }
3158 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3159 return P.CurLexer->LexDependencyDirectiveToken(Result);
3160 }
3161};
3162
3163/// Abstract base class that describes a handler that will receive
3164/// source ranges for each of the comments encountered in the source file.
3166public:
3168
3169 // The handler shall return true if it has pushed any tokens
3170 // to be read using e.g. EnterToken or EnterTokenStream.
3171 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3172};
3173
3174/// Abstract base class that describes a handler that will receive
3175/// source ranges for empty lines encountered in the source file.
3177public:
3179
3180 // The handler handles empty lines.
3181 virtual void HandleEmptyline(SourceRange Range) = 0;
3182};
3183
3184/// Helper class to shuttle information about #embed directives from the
3185/// preprocessor to the parser through an annotation token.
3187 StringRef BinaryData;
3188 StringRef FileName;
3189};
3190
3191/// Registry of pragma handlers added by plugins
3192using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3193
3194} // namespace clang
3195
3196namespace llvm {
3197extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>;
3198} // namespace llvm
3199
3200#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
#define V(N, I)
Defines the Diagnostic-related interfaces.
Defines the Diagnostic IDs-related interfaces.
Token Tok
The Token.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Result
Implement __builtin_bit_cast and related operations.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Defines the PPCallbacks interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
VerifyDiagnosticConsumer::Directive Directive
__device__ __2f16 float __ockl_bool s
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
Definition Builtins.h:236
Callback handler that receives notifications when performing code completion within the preprocessor.
Abstract base class that describes a handler that will receive source ranges for each of the comments...
virtual bool HandleComment(Preprocessor &PP, SourceRange Comment)=0
A directive for a defined macro or a macro imported from a module.
Definition MacroInfo.h:433
Functor that returns the dependency directives for a given file.
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:233
void setSuppressAllDiagnostics(bool Val)
Suppress all diagnostics, to silence the front end when we know that we don't want any more diagnosti...
Definition Diagnostic.h:736
A reference to a DirectoryEntry that includes the name of the directory as it was accessed by the Fil...
Cached information about one directory (either on disk or in the virtual file system).
Abstract base class that describes a handler that will receive source ranges for empty lines encounte...
virtual void HandleEmptyline(SourceRange Range)=0
Abstract interface for external sources of preprocessor information.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition FileEntry.h:302
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements support for file system lookup, file system caching, and directory search management.
Definition FileManager.h:54
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
One of these records is kept for each identifier that is lexed.
bool hadMacroDefinition() const
Returns true if this identifier was #defined to some value at any moment.
bool hasMacroDefinition() const
Return true if this identifier is #defined to some other value.
bool isDeprecatedMacro() const
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
StringRef getName() const
Return the actual identifier string.
bool isRestrictExpansion() const
A simple pair of identifier info and location.
Implements an efficient mapping from strings to IdentifierInfo nodes.
FPEvalMethodKind
Possible float expression evaluation method choices.
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
Definition Lexer.cpp:1110
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
Definition Lexer.cpp:911
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition Lexer.h:407
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
Definition Lexer.cpp:933
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition Lexer.cpp:461
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition Lexer.cpp:542
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Definition Lexer.cpp:881
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:596
const DefMacroDirective * getDirective() const
Definition MacroInfo.h:376
Encapsulates changes to the "macros namespace" (the location where the macro name became active,...
Definition MacroInfo.h:314
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:40
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition MacroInfo.h:126
Abstract interface for a module loader.
static std::string getFlatNameFromPath(ModuleIdPath Path)
Represents a macro directive exported by a module.
Definition MacroInfo.h:515
A header that is known to reside within a given module, whether it was included or excluded.
Definition ModuleMap.h:158
unsigned getNumIdentifierLocs() const
std::string str() const
SourceLocation getBeginLoc() const
SourceLocation getEndLoc() const
SourceRange getRange() const
ModuleIdPath getModuleIdPath() const
Describes a module or submodule.
Definition Module.h:340
bool isModuleMapModule() const
Definition Module.h:450
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition Pragma.h:65
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getLastFPEvalPragmaLocation() const
bool isMacroDefined(const IdentifierInfo *II)
MacroDirective * getLocalMacroDirective(const IdentifierInfo *II) const
Given an identifier, return its latest non-imported MacroDirective if it is #define'd and not #undef'...
bool markIncluded(FileEntryRef File)
Mark the file as included.
void HandlePragmaPushMacro(Token &Tok)
Handle #pragma push_macro.
Definition Pragma.cpp:634
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
void HandlePragmaPoison()
HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'.
Definition Pragma.cpp:439
void setCodeCompletionHandler(CodeCompletionHandler &Handler)
Set the code completion handler to the given object.
void dumpMacroInfo(const IdentifierInfo *II)
void HandlePragmaSystemHeader(Token &SysHeaderTok)
HandlePragmaSystemHeader - Implement #pragma GCC system_header.
Definition Pragma.cpp:481
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void MaybeHandlePoisonedIdentifier(Token &Identifier)
ModuleMacro * addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, ArrayRef< ModuleMacro * > Overrides, bool &IsNew)
Register an exported macro for a module and identifier.
void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, MacroDirective *MD)
Set a MacroDirective that was loaded from a PCH file.
MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, SourceLocation Loc)
void EnterModuleSuffixTokenStream(ArrayRef< Token > Toks)
void markClangModuleAsAffecting(Module *M)
Mark the given clang module as affecting the current clang module or translation unit.
void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, SourceLocation Loc)
Set the location of the currently-active #pragma clang arc_cf_code_audited begin.
void HandlePragmaModuleBuild(Token &Tok)
Definition Pragma.cpp:811
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
ArrayRef< ModuleMacro * > getLeafModuleMacros(const IdentifierInfo *II) const
Get the list of leaf (non-overridden) module macros for a name.
bool isIncrementalProcessingEnabled() const
Returns true if incremental processing is enabled.
void EnterToken(const Token &Tok, bool IsReinject)
Enters a token in the token stream to be lexed next.
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition Pragma.cpp:2219
void HandleCXXImportDirective(Token Import)
HandleCXXImportDirective - Handle the C++ modules import directives.
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI)
PPCallbacks * getPPCallbacks() const
bool isInNamedInterfaceUnit() const
If we are proprocessing a named interface unit.
ArrayRef< PPConditionalInfo > getPreambleConditionalStack() const
void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)
Record the location of the unterminated #pragma clang assume_nonnull begin in the preamble.
SourceRange DiscardUntilEndOfDirective(SmallVectorImpl< Token > *DiscardedToks=nullptr)
Read and discard all tokens remaining on the current line until the tok::eod token is found.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
ArrayRef< BuildingSubmoduleInfo > getBuildingSubmodules() const
Get the list of submodules that we're currently building.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const
SourceRange getCodeCompletionTokenRange() const
SourceLocation getModuleImportLoc(Module *M) const
void overrideMaxTokens(unsigned Value, SourceLocation Loc)
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void HandleSkippedDirectiveWhileUsingPCH(Token &Result, SourceLocation HashLoc)
Process directives while skipping until the through header or pragma hdrstop is found.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
void enableIncrementalProcessing(bool value=true)
Enables the incremental processing.
void TypoCorrectToken(const Token &Tok)
Update the current token to represent the provided identifier, in order to cache an action performed ...
bool GetSuppressIncludeNotFoundError()
bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M)
Determine whether II is defined as a macro within the module M, if that is a module that we've alread...
void setPragmaAssumeNonNullLoc(SourceLocation Loc)
Set the location of the currently-active #pragma clang assume_nonnull begin.
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void markMacroAsUsed(MacroInfo *MI)
A macro is used, update information about macros that need unused warnings.
LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const
void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma)
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, SourceLocation AnnotationLoc)
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, SourceLocation AnnotationLoc)
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
MacroDirective * getLocalMacroDirectiveHistory(const IdentifierInfo *II) const
Given an identifier, return the latest non-imported macro directive for that identifier.
void setPreprocessedOutput(bool IsPreprocessedOutput)
Sets whether the preprocessor is responsible for producing output or if it is producing tokens to be ...
void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc)
bool IsPreviousCachedToken(const Token &Tok) const
Whether Tok is the most recent token (CachedLexPos - 1) in CachedTokens.
bool SawDateOrTime() const
Returns true if the preprocessor has seen a use of DATE or TIME in the file so far.
const TargetInfo * getAuxTargetInfo() const
void CommitBacktrackedTokens()
Disable the last EnableBacktrackAtThisPos call.
Definition PPCaching.cpp:56
friend class MacroArgs
void DumpMacro(const MacroInfo &MI) const
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
void setDiagnostics(DiagnosticsEngine &D)
llvm::iterator_range< macro_iterator > macros(bool IncludeExternalMacros=true) const
IncludedFilesSet & getIncludedFiles()
Get the set of included files.
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void AnnotateCachedTokens(const Token &Tok)
We notify the Preprocessor that if it is caching tokens (because backtrack is enabled) it should repl...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
StringRef getNamedModuleName() const
Get the named module name we're preprocessing.
bool mightHavePendingAnnotationTokens()
Determine whether it's possible for a future call to Lex to produce an annotation token created by a ...
void Lex(Token &Result)
Lex the next token for this preprocessor.
void EnterTokenStream(ArrayRef< Token > Toks, bool DisableMacroExpansion, bool IsReinject)
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
bool isParsingIfOrElifDirective() const
True if we are currently preprocessing a if or elif directive.
unsigned getNumDirectives() const
Retrieve the number of Directives that have been processed by the Preprocessor.
bool isInImplementationUnit() const
If we are implementing an implementation module unit.
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
ModuleLoader & getModuleLoader() const
Retrieve the module loader associated with this preprocessor.
void LexNonComment(Token &Result)
Lex a token.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
PreprocessorLexer * getCurrentLexer() const
Return the current lexer being lexed from.
bool LexOnOffSwitch(tok::OnOffSwitch &Result)
Lex an on-off-switch (C99 6.10.6p2) and verify that it is followed by EOD.
Definition Pragma.cpp:972
StringRef getCodeCompletionFilter()
Get the code completion token for filtering purposes.
void setMainFileDir(DirectoryEntryRef Dir)
Set the directory in which the main file should be considered to have been found, if it is not a real...
const IdentifierTable & getIdentifierTable() const
void HandlePragmaDependency(Token &DependencyTok)
HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah.
Definition Pragma.cpp:513
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
friend class ASTReader
void Backtrack()
Make Preprocessor re-lex the tokens that were lexed since EnableBacktrackAtThisPos() was previously c...
Definition PPCaching.cpp:66
bool isCurrentLexer(const PreprocessorLexer *L) const
Return true if we are lexing directly from the specified lexer.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
IdentifierLoc getPragmaARCCFCodeAuditedInfo() const
The location of the currently-active #pragma clang arc_cf_code_audited begin.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
void setReplayablePreambleConditionalStack(ArrayRef< PPConditionalInfo > s, std::optional< PreambleSkipInfo > SkipInfo)
const Token & LookAhead(unsigned N)
Peeks ahead N tokens and returns that token without consuming any tokens.
friend class VAOptDefinitionContext
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
uint8_t getSpellingOfSingleCharacterNumericConstant(const Token &Tok, bool *Invalid=nullptr) const
Given a Token Tok that is a numeric constant with length 1, return the value of constant as an unsign...
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, bool *ShadowFlag=nullptr)
std::optional< PreambleSkipInfo > getPreambleSkipInfo() const
void setPreprocessToken(bool Preprocess)
bool isPreprocessedModuleFile() const
Whether the main file is preprocessed module file.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
void HandlePragmaOnce(Token &OnceTok)
HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'.
Definition Pragma.cpp:414
SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros=false, SmallVectorImpl< Token > *ExtraToks=nullptr)
Ensure that the next token is a tok::eod token.
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool isMacroDefined(StringRef Id)
static bool checkModuleIsAvailable(const LangOptions &LangOpts, const TargetInfo &TargetInfo, const Module &M, DiagnosticsEngine &Diags)
Check that the given module is available, producing a diagnostic if not.
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
void SetMacroExpansionOnlyInDirectives()
Disables macro expansion everywhere except for preprocessor directives.
bool hasRecordedPreamble() const
SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Char) const
Given a location that specifies the start of a token, return a new location that specifies a characte...
SourceLocation getPragmaAssumeNonNullLoc() const
The location of the currently-active #pragma clang assume_nonnull begin.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
bool isUnannotatedBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of unannotated tokens is on.
void EnterTokenStream(std::unique_ptr< Token[]> Toks, unsigned NumToks, bool DisableMacroExpansion, bool IsReinject)
void RevertCachedTokens(unsigned N)
When backtracking is enabled and tokens are cached, this allows to revert a specific number of tokens...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
std::optional< std::uint64_t > getStdLibCxxVersion()
void RemovePragmaHandler(PragmaHandler *Handler)
unsigned getTokenCount() const
Get the number of tokens processed so far.
OptionalFileEntryRef LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile)
Given a "Filename" or <Filename> reference, look up the indicated embed resource.
unsigned getMaxTokens() const
Get the max number of tokens before issuing a -Wmax-tokens warning.
SourceLocation getMaxTokensOverrideLoc() const
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
bool hadModuleLoaderFatalFailure() const
static void processPathToFileName(SmallVectorImpl< char > &FileName, const PresumedLoc &PLoc, const LangOptions &LangOpts, const TargetInfo &TI)
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
bool HandleModuleContextualKeyword(Token &Result)
Callback invoked when the lexer sees one of export, import or module token at the start of a line.
const TargetInfo & getTargetInfo() const
FileManager & getFileManager() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
std::string getSpelling(const Token &Tok, bool *Invalid=nullptr) const
Return the 'spelling' of the Tok token.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
friend class VariadicMacroScopeGuard
Module * getCurrentLexerSubmodule() const
Return the submodule owning the file being lexed.
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
MacroInfo * AllocateMacroInfo(SourceLocation L)
Allocate a new MacroInfo object with the provided SourceLocation.
void setDependencyDirectivesGetter(DependencyDirectivesGetter &Get)
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
StringRef getImmediateMacroName(SourceLocation Loc)
Retrieve the name of the immediate macro expansion.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a pragma hdrstop.
bool alreadyIncluded(FileEntryRef File) const
Return true if this header has already been included.
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
void LexUnexpandedNonComment(Token &Result)
Like LexNonComment, but this disables macro expansion of identifier tokens.
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition Pragma.cpp:919
llvm::BumpPtrAllocator & getPreprocessorAllocator()
ModuleMacro * getModuleMacro(Module *Mod, const IdentifierInfo *II)
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
bool GetIncludeFilenameSpelling(SourceLocation Loc, StringRef &Buffer)
Turn the specified lexer token into a fully checked and spelled filename, e.g.
PreprocessorLexer * getCurrentFileLexer() const
Return the current file lexer being lexed from.
HeaderSearch & getHeaderSearchInfo() const
void emitMacroExpansionWarnings(const Token &Identifier, bool IsIfnDef=false) const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
void HandlePragmaPopMacro(Token &Tok)
Handle #pragma pop_macro.
Definition Pragma.cpp:657
void ReplaceLastTokenWithAnnotation(const Token &Tok)
Replace the last token with an annotation token.
ExternalPreprocessorSource * getExternalSource() const
bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion)
Module * LeaveSubmodule(bool ForPragma)
const std::string & getPredefines() const
Get the predefines for this processor.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
CodeCompletionHandler * getCodeCompletionHandler() const
Retrieve the current code-completion handler.
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, void *AnnotationVal)
Enter an annotation token into the token stream.
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream.
MacroInfo * getMacroInfo(const IdentifierInfo *II)
void setPredefines(std::string P)
Set the predefines for this Preprocessor.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, SmallVectorImpl< Token > &Suffix, SmallVectorImpl< IdentifierLoc > &Path, bool AllowMacroExpansion, bool IsPartition)
Builtin::Context & getBuiltinInfo()
void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine)
Instruct the preprocessor to skip part of the main source file.
const PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
void ReplacePreviousCachedToken(ArrayRef< Token > NewToks)
Replace token in CachedLexPos - 1 in CachedTokens by the tokens in NewToks.
LangOptions::FPEvalMethodKind getTUFPEvalMethod() const
const LangOptions & getLangOpts() const
bool isImportingCXXNamedModules() const
If we're importing a standard C++20 Named Modules.
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void SetSuppressIncludeNotFoundError(bool Suppress)
static void processPathForFileMacro(SmallVectorImpl< char > &Path, const LangOptions &LangOpts, const TargetInfo &TI)
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
bool isInNamedModule() const
If we are preprocessing a named module.
void EnableBacktrackAtThisPos(bool Unannotated=false)
From the point that this method is called, and until CommitBacktrackedTokens() or Backtrack() is call...
Definition PPCaching.cpp:34
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
void PoisonSEHIdentifiers(bool Poison=true)
bool isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the first token of the macro expansion.
size_t getTotalMemory() const
void setCounterValue(uint32_t V)
void setExternalSource(ExternalPreprocessorSource *Source)
void clearCodeCompletionHandler()
Clear out the code completion handler.
void AddPragmaHandler(PragmaHandler *Handler)
OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, SourceLocation MLoc)
We want to produce a diagnostic at location IncLoc concerning an unreachable effect at location MLoc ...
bool isCodeCompletionReached() const
Returns true if code-completion is enabled and we have hit the code-completion point.
IdentifierInfo * ParsePragmaPushOrPopMacro(Token &Tok)
ParsePragmaPushOrPopMacro - Handle parsing of pragma push_macro/pop_macro.
Definition Pragma.cpp:569
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool getRawToken(SourceLocation Loc, Token &Result, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
bool isNextPPTokenOneOf(Ts... Ks) const
isNextPPTokenOneOf - Check whether the next pp-token is one of the specificed token kind.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
void setPragmasEnabled(bool Enabled)
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, SourceLocation Loc)
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
bool isAtEndOfMacroExpansion(SourceLocation loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getMainFileFirstPPTokenLoc() const
Get the start location of the first pp-token in main file.
void HandlePragmaMark(Token &MarkTok)
Definition Pragma.cpp:429
void CollectPPImportSuffix(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Collect the tokens of a C++20 pp-import-suffix.
bool getPragmasEnabled() const
void HandlePragmaHdrstop(Token &Tok)
Definition Pragma.cpp:885
PreprocessingRecord * getPreprocessingRecord() const
Retrieve the preprocessing record, or NULL if there is no preprocessing record.
void setEmptylineHandler(EmptylineHandler *Handler)
Set empty line handler.
DiagnosticsEngine & getDiagnostics() const
void HandleCXXModuleDirective(Token Module)
HandleCXXModuleDirective - Handle C++ module declaration directives.
SourceLocation getLastCachedTokenLocation() const
Get the location of the last cached token, suitable for setting the end location of an annotation tok...
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
llvm::DenseSet< const FileEntry * > IncludedFilesSet
unsigned getSpelling(const Token &Tok, const char *&Buffer, bool *Invalid=nullptr) const
Get the spelling of a token into a preallocated buffer, instead of as an std::string.
SelectorTable & getSelectorTable()
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition Pragma.cpp:950
SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset=0)
Computes the source location just past the end of the token at this source location.
const llvm::SmallSetVector< Module *, 2 > & getAffectingClangModules() const
Get the set of top-level clang modules that affected preprocessing, but were not imported.
std::optional< LexEmbedParametersResult > LexEmbedParameters(Token &Current, bool ForHasEmbed)
Lex the parameters for an embed directive, returns nullopt on error.
const IncludedFilesSet & getIncludedFiles() const
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void HandlePragmaIncludeAlias(Token &Tok)
Definition Pragma.cpp:692
Module * getModuleForLocation(SourceLocation Loc, bool AllowTextual)
Find the module that owns the source or header file that Loc points to.
uint32_t getCounterValue() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok, SmallVectorImpl< IdentifierLoc > &Path, SmallVectorImpl< Token > &DirToks, bool AllowMacroExpansion, bool IsPartition)
SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const
Get the location of the recorded unterminated #pragma clang assume_nonnull begin in the preamble,...
void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, MacroArgs *Args)
Add a Macro to the top of the include stack and start lexing tokens from it instead of the current bu...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the include of the through header or until after a pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
void markMainFileAsPreprocessedModuleFile()
Mark the main file as a preprocessed module file, then the 'module' and 'import' directive recognitio...
bool LexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Lex a string literal, which may be the concatenation of multiple string literals and may even come fr...
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD)
Add a directive to the macro directive history for this identifier.
Represents an unpacked "presumed" location which can be presented to the user.
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
This table allows us to fully hide how we implement multi-keyword caching.
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Exposes information about the current target.
Definition TargetInfo.h:227
TokenValue(IdentifierInfo *II)
TokenValue(tok::TokenKind Kind)
bool operator==(const Token &Tok) const
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
Public enums and private classes that are part of the SourceManager implementation.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition TokenKinds.h:25
OnOffSwitch
Defines the possible values of an on-off-switch (C99 6.10.6p2).
Definition TokenKinds.h:56
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition TokenKinds.h:101
PPKeywordKind
Provides a namespace for preprocessor keywords which start with a '#' at the beginning of the line.
Definition TokenKinds.h:33
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
detail::SearchDirIteratorImpl< true > ConstSearchDirIterator
@ Create
'create' clause, allowed on Compute and Combined constructs, plus 'data', 'enter data',...
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
MacroUse
Context in which macro name is used.
@ Module
Module linkage, which indicates that the entity can be referred to from other translation units withi...
Definition Linkage.h:54
@ Result
The result type of a method or function.
Definition TypeBase.h:905
TranslationUnitKind
Describes the kind of translation unit being processed.
@ TU_Complete
The translation unit is a complete translation unit.
CustomizableOptional< DirectoryEntryRef > OptionalDirectoryEntryRef
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
Helper class to shuttle information about embed directives from the preprocessor to the parser throug...
Describes how and where the pragma was introduced.
Definition Pragma.h:51
PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, bool FoundNonSkipPortion, bool FoundElse, SourceLocation ElseLoc)