clang 23.0.0git
Preprocessor.h
Go to the documentation of this file.
1//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the clang::Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
20#include "clang/Basic/LLVM.h"
22#include "clang/Basic/Module.h"
27#include "clang/Lex/Lexer.h"
28#include "clang/Lex/MacroInfo.h"
30#include "clang/Lex/ModuleMap.h"
33#include "clang/Lex/Token.h"
36#include "llvm/ADT/APSInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
39#include "llvm/ADT/FoldingSet.h"
40#include "llvm/ADT/FunctionExtras.h"
41#include "llvm/ADT/PointerUnion.h"
42#include "llvm/ADT/STLExtras.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/ADT/SmallVector.h"
45#include "llvm/ADT/StringRef.h"
46#include "llvm/ADT/TinyPtrVector.h"
47#include "llvm/ADT/iterator_range.h"
48#include "llvm/Support/Allocator.h"
49#include "llvm/Support/Casting.h"
50#include "llvm/Support/Registry.h"
51#include "llvm/Support/TrailingObjects.h"
52#include <cassert>
53#include <cstddef>
54#include <cstdint>
55#include <map>
56#include <memory>
57#include <optional>
58#include <string>
59#include <utility>
60#include <vector>
61
62namespace llvm {
63
64template<unsigned InternalLen> class SmallString;
65
66} // namespace llvm
67
68namespace clang {
69
71class CommentHandler;
72class DirectoryEntry;
75class FileEntry;
76class FileManager;
77class HeaderSearch;
78class MacroArgs;
79class PragmaHandler;
80class PragmaNamespace;
84class ScratchBuffer;
85class TargetInfo;
87
88namespace Builtin {
89class Context;
90}
91
92/// Stores token information for comparing actual tokens with
93/// predefined values. Only handles simple tokens and identifiers.
95 tok::TokenKind Kind;
97
98public:
99 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
100 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
101 assert(Kind != tok::identifier &&
102 "Identifiers should be created by TokenValue(IdentifierInfo *)");
103 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
104 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
105 }
106
107 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
108
109 bool operator==(const Token &Tok) const {
110 return Tok.getKind() == Kind &&
111 (!II || II == Tok.getIdentifierInfo());
112 }
113};
114
115/// Context in which macro name is used.
117 // other than #define or #undef
119
120 // macro name specified in #define
122
123 // macro name specified in #undef
125};
126
127enum class EmbedResult {
128 Invalid = -1, // Parsing error occurred.
129 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
130 Found = 1, // Corresponds to __STDC_EMBED_FOUND__
131 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
132};
133
139
140class ModuleNameLoc final
141 : llvm::TrailingObjects<ModuleNameLoc, IdentifierLoc> {
142 friend TrailingObjects;
143 unsigned NumIdentifierLocs;
144 unsigned numTrailingObjects(OverloadToken<IdentifierLoc>) const {
145 return getNumIdentifierLocs();
146 }
147
148 ModuleNameLoc(ModuleIdPath Path) : NumIdentifierLocs(Path.size()) {
149 (void)llvm::copy(Path, getTrailingObjectsNonStrict<IdentifierLoc>());
150 }
151
152public:
153 static ModuleNameLoc *Create(Preprocessor &PP, ModuleIdPath Path);
154 unsigned getNumIdentifierLocs() const { return NumIdentifierLocs; }
156 return {getTrailingObjectsNonStrict<IdentifierLoc>(),
158 }
159
161 return getModuleIdPath().front().getLoc();
162 }
164 auto &Last = getModuleIdPath().back();
165 return Last.getLoc().getLocWithOffset(
166 Last.getIdentifierInfo()->getLength());
167 }
168 SourceRange getRange() const { return {getBeginLoc(), getEndLoc()}; }
169 std::string str() const {
171 }
172};
173
174/// Engages in a tight little dance with the lexer to efficiently
175/// preprocess tokens.
176///
177/// Lexers know only about tokens within a single source file, and don't
178/// know anything about preprocessor-level issues like the \#include stack,
179/// token expansion, etc.
183
184 llvm::unique_function<void(const clang::Token &)> OnToken;
185 /// Functor for getting the dependency preprocessor directives of a file.
186 ///
187 /// These are directives derived from a special form of lexing where the
188 /// source input is scanned for the preprocessor directives that might have an
189 /// effect on the dependencies for a compilation unit.
190 DependencyDirectivesGetter *GetDependencyDirectives = nullptr;
191 const PreprocessorOptions &PPOpts;
192 DiagnosticsEngine *Diags;
193 const LangOptions &LangOpts;
194 const TargetInfo *Target = nullptr;
195 const TargetInfo *AuxTarget = nullptr;
196 FileManager &FileMgr;
197 SourceManager &SourceMgr;
198 std::unique_ptr<ScratchBuffer> ScratchBuf;
199 HeaderSearch &HeaderInfo;
200 ModuleLoader &TheModuleLoader;
201
202 /// External source of macros.
203 ExternalPreprocessorSource *ExternalSource;
204
205 /// A BumpPtrAllocator object used to quickly allocate and release
206 /// objects internal to the Preprocessor.
207 llvm::BumpPtrAllocator BP;
208
209 /// Identifiers for builtin macros and other builtins.
210 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
211 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
212 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
213 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
214 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
215 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
216 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
217 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
218 IdentifierInfo *Ident__identifier; // __identifier
219 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
220 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
221 IdentifierInfo *Ident__has_feature; // __has_feature
222 IdentifierInfo *Ident__has_extension; // __has_extension
223 IdentifierInfo *Ident__has_builtin; // __has_builtin
224 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin
225 IdentifierInfo *Ident__has_attribute; // __has_attribute
226 IdentifierInfo *Ident__has_embed; // __has_embed
227 IdentifierInfo *Ident__has_include; // __has_include
228 IdentifierInfo *Ident__has_include_next; // __has_include_next
229 IdentifierInfo *Ident__has_warning; // __has_warning
230 IdentifierInfo *Ident__is_identifier; // __is_identifier
231 IdentifierInfo *Ident__building_module; // __building_module
232 IdentifierInfo *Ident__MODULE__; // __MODULE__
233 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
234 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
235 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
236 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
237 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
238 IdentifierInfo *Ident__is_target_os; // __is_target_os
239 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
240 IdentifierInfo *Ident__is_target_variant_os;
241 IdentifierInfo *Ident__is_target_variant_environment;
242 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD
243
244 // Weak, only valid (and set) while InMacroArgs is true.
245 Token* ArgMacro;
246
247 SourceLocation DATELoc, TIMELoc;
248
249 // FEM_UnsetOnCommandLine means that an explicit evaluation method was
250 // not specified on the command line. The target is queried to set the
251 // default evaluation method.
252 LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
254
255 // The most recent pragma location where the floating point evaluation
256 // method was modified. This is used to determine whether the
257 // 'pragma clang fp eval_method' was used whithin the current scope.
258 SourceLocation LastFPEvalPragmaLocation;
259
260 LangOptions::FPEvalMethodKind TUFPEvalMethod =
262
263 // Next __COUNTER__ value, starts at 0.
264 uint32_t CounterValue = 0;
265
266 enum {
267 /// Maximum depth of \#includes.
268 MaxAllowedIncludeStackDepth = 200
269 };
270
271 // State that is set before the preprocessor begins.
272 bool KeepComments : 1;
273 bool KeepMacroComments : 1;
274 bool SuppressIncludeNotFoundError : 1;
275
276 // State that changes while the preprocessor runs:
277 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
278
279 /// Whether the preprocessor owns the header search object.
280 bool OwnsHeaderSearch : 1;
281
282 /// True if macro expansion is disabled.
283 bool DisableMacroExpansion : 1;
284
285 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
286 /// when parsing preprocessor directives.
287 bool MacroExpansionInDirectivesOverride : 1;
288
289 class ResetMacroExpansionHelper;
290
291 /// Whether we have already loaded macros from the external source.
292 mutable bool ReadMacrosFromExternalSource : 1;
293
294 /// True if pragmas are enabled.
295 bool PragmasEnabled : 1;
296
297 /// True if the current build action is a preprocessing action.
298 bool PreprocessedOutput : 1;
299
300 /// True if we are currently preprocessing a #if or #elif directive
301 bool ParsingIfOrElifDirective;
302
303 /// True if we are pre-expanding macro arguments.
304 bool InMacroArgPreExpansion;
305
306 /// Mapping/lookup information for all identifiers in
307 /// the program, including program keywords.
308 mutable IdentifierTable Identifiers;
309
310 /// This table contains all the selectors in the program.
311 ///
312 /// Unlike IdentifierTable above, this table *isn't* populated by the
313 /// preprocessor. It is declared/expanded here because its role/lifetime is
314 /// conceptually similar to the IdentifierTable. In addition, the current
315 /// control flow (in clang::ParseAST()), make it convenient to put here.
316 ///
317 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
318 /// the lifetime of the preprocessor.
319 SelectorTable Selectors;
320
321 /// Information about builtins.
322 std::unique_ptr<Builtin::Context> BuiltinInfo;
323
324 /// Tracks all of the pragmas that the client registered
325 /// with this preprocessor.
326 std::unique_ptr<PragmaNamespace> PragmaHandlers;
327
328 /// Pragma handlers of the original source is stored here during the
329 /// parsing of a model file.
330 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
331
332 /// Tracks all of the comment handlers that the client registered
333 /// with this preprocessor.
334 std::vector<CommentHandler *> CommentHandlers;
335
336 /// Empty line handler.
337 EmptylineHandler *Emptyline = nullptr;
338
339 /// True to avoid tearing down the lexer etc on EOF
340 bool IncrementalProcessing = false;
341
342public:
343 /// The kind of translation unit we are processing.
345
346 /// Returns a pointer into the given file's buffer that's guaranteed
347 /// to be between tokens. The returned pointer is always before \p Start.
348 /// The maximum distance betweenthe returned pointer and \p Start is
349 /// limited by a constant value, but also an implementation detail.
350 /// If no such check point exists, \c nullptr is returned.
351 const char *getCheckPoint(FileID FID, const char *Start) const;
352
353private:
354 /// The code-completion handler.
355 CodeCompletionHandler *CodeComplete = nullptr;
356
357 /// The file that we're performing code-completion for, if any.
358 const FileEntry *CodeCompletionFile = nullptr;
359
360 /// The offset in file for the code-completion point.
361 unsigned CodeCompletionOffset = 0;
362
363 /// The location for the code-completion point. This gets instantiated
364 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
365 SourceLocation CodeCompletionLoc;
366
367 /// The start location for the file of the code-completion point.
368 ///
369 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
370 /// for preprocessing.
371 SourceLocation CodeCompletionFileLoc;
372
373 /// The source location of the \c import contextual keyword we just
374 /// lexed, if any.
375 SourceLocation ModuleImportLoc;
376
377 /// The source location of the \c module contextual keyword we just
378 /// lexed, if any.
379 SourceLocation ModuleDeclLoc;
380
381 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
382 unsigned CheckPointCounter = 0;
383
384 /// Whether the import is an `@import` or a standard c++ modules import.
385 bool IsAtImport = false;
386
387 /// Whether the last token we lexed was an '@'.
388 bool LastTokenWasAt = false;
389
390 /// Whether we're importing a standard C++20 named Modules.
391 bool ImportingCXXNamedModules = false;
392
393 /// Whether the last token we lexed was an 'export' keyword.
394 Token LastExportKeyword;
395
396 /// First pp-token source location in current translation unit.
397 SourceLocation FirstPPTokenLoc;
398
399 /// A preprocessor directive tracer to trace whether the preprocessing
400 /// state changed. These changes would mean most semantically observable
401 /// preprocessor state, particularly anything that is order dependent.
402 NoTrivialPPDirectiveTracer *DirTracer = nullptr;
403
404 /// A position within a C++20 import-seq.
405 class StdCXXImportSeq {
406 public:
407 enum State : int {
408 // Positive values represent a number of unclosed brackets.
409 AtTopLevel = 0,
410 AfterTopLevelTokenSeq = -1,
411 AfterExport = -2,
412 AfterImportSeq = -3,
413 };
414
415 StdCXXImportSeq(State S) : S(S) {}
416
417 /// Saw any kind of open bracket.
418 void handleOpenBracket() {
419 S = static_cast<State>(std::max<int>(S, 0) + 1);
420 }
421 /// Saw any kind of close bracket other than '}'.
422 void handleCloseBracket() {
423 S = static_cast<State>(std::max<int>(S, 1) - 1);
424 }
425 /// Saw a close brace.
426 void handleCloseBrace() {
427 handleCloseBracket();
428 if (S == AtTopLevel && !AfterHeaderName)
429 S = AfterTopLevelTokenSeq;
430 }
431 /// Saw a semicolon.
432 void handleSemi() {
433 if (atTopLevel()) {
434 S = AfterTopLevelTokenSeq;
435 AfterHeaderName = false;
436 }
437 }
438
439 /// Saw an 'export' identifier.
440 void handleExport() {
441 if (S == AfterTopLevelTokenSeq)
442 S = AfterExport;
443 else if (S <= 0)
444 S = AtTopLevel;
445 }
446 /// Saw an 'import' identifier.
447 void handleImport() {
448 if (S == AfterTopLevelTokenSeq || S == AfterExport)
449 S = AfterImportSeq;
450 else if (S <= 0)
451 S = AtTopLevel;
452 }
453
454 /// Saw a 'header-name' token; do not recognize any more 'import' tokens
455 /// until we reach a top-level semicolon.
456 void handleHeaderName() {
457 if (S == AfterImportSeq)
458 AfterHeaderName = true;
459 handleMisc();
460 }
461
462 /// Saw any other token.
463 void handleMisc() {
464 if (S <= 0)
465 S = AtTopLevel;
466 }
467
468 bool atTopLevel() { return S <= 0; }
469 bool afterImportSeq() { return S == AfterImportSeq; }
470 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
471
472 private:
473 State S;
474 /// Whether we're in the pp-import-suffix following the header-name in a
475 /// pp-import. If so, a close-brace is not sufficient to end the
476 /// top-level-token-seq of an import-seq.
477 bool AfterHeaderName = false;
478 };
479
480 /// Our current position within a C++20 import-seq.
481 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
482
483 /// Track whether we are in a Global Module Fragment
484 class TrackGMF {
485 public:
486 enum GMFState : int {
487 GMFActive = 1,
488 MaybeGMF = 0,
489 BeforeGMFIntroducer = -1,
490 GMFAbsentOrEnded = -2,
491 };
492
493 TrackGMF(GMFState S) : S(S) {}
494
495 /// Saw a semicolon.
496 void handleSemi() {
497 // If it is immediately after the first instance of the module keyword,
498 // then that introduces the GMF.
499 if (S == MaybeGMF)
500 S = GMFActive;
501 }
502
503 /// Saw an 'export' identifier.
504 void handleExport() {
505 // The presence of an 'export' keyword always ends or excludes a GMF.
506 S = GMFAbsentOrEnded;
507 }
508
509 /// Saw an 'import' identifier.
510 void handleImport(bool AfterTopLevelTokenSeq) {
511 // If we see this before any 'module' kw, then we have no GMF.
512 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
513 S = GMFAbsentOrEnded;
514 }
515
516 /// Saw a 'module' identifier.
517 void handleModule(bool AfterTopLevelTokenSeq) {
518 // This was the first module identifier and not preceded by any token
519 // that would exclude a GMF. It could begin a GMF, but only if directly
520 // followed by a semicolon.
521 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
522 S = MaybeGMF;
523 else
524 S = GMFAbsentOrEnded;
525 }
526
527 /// Saw any other token.
528 void handleMisc() {
529 // We saw something other than ; after the 'module' kw, so not a GMF.
530 if (S == MaybeGMF)
531 S = GMFAbsentOrEnded;
532 }
533
534 bool inGMF() { return S == GMFActive; }
535
536 private:
537 /// Track the transitions into and out of a Global Module Fragment,
538 /// if one is present.
539 GMFState S;
540 };
541
542 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
543
544 /// Track the status of the c++20 module decl.
545 ///
546 /// module-declaration:
547 /// 'export'[opt] 'module' module-name module-partition[opt]
548 /// attribute-specifier-seq[opt] ';'
549 ///
550 /// module-name:
551 /// module-name-qualifier[opt] identifier
552 ///
553 /// module-partition:
554 /// ':' module-name-qualifier[opt] identifier
555 ///
556 /// module-name-qualifier:
557 /// identifier '.'
558 /// module-name-qualifier identifier '.'
559 ///
560 /// Transition state:
561 ///
562 /// NotAModuleDecl --- export ---> FoundExport
563 /// NotAModuleDecl --- module ---> ImplementationCandidate
564 /// FoundExport --- module ---> InterfaceCandidate
565 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate
566 /// ImplementationCandidate --- period ---> ImplementationCandidate
567 /// ImplementationCandidate --- colon ---> ImplementationCandidate
568 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate
569 /// InterfaceCandidate --- period ---> InterfaceCandidate
570 /// InterfaceCandidate --- colon ---> InterfaceCandidate
571 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation
572 /// NamedModuleInterface --- Semi ---> NamedModuleInterface
573 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation
574 /// NamedModuleInterface --- Anything ---> NamedModuleInterface
575 ///
576 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
577 /// soon since we don't support any module attributes yet.
578 class ModuleDeclSeq {
579 enum ModuleDeclState : int {
580 NotAModuleDecl,
581 FoundExport,
582 InterfaceCandidate,
583 ImplementationCandidate,
584 NamedModuleInterface,
585 NamedModuleImplementation,
586 };
587
588 public:
589 ModuleDeclSeq() = default;
590
591 void handleExport() {
592 if (State == NotAModuleDecl)
593 State = FoundExport;
594 else if (!isNamedModule())
595 reset();
596 }
597
598 void handleModule() {
599 if (State == FoundExport)
600 State = InterfaceCandidate;
601 else if (State == NotAModuleDecl)
602 State = ImplementationCandidate;
603 else if (!isNamedModule())
604 reset();
605 }
606
607 void handleModuleName(ModuleNameLoc *NameLoc) {
608 if (isModuleCandidate() && NameLoc)
609 Name += NameLoc->str();
610 else if (!isNamedModule())
611 reset();
612 }
613
614 void handleColon() {
615 if (isModuleCandidate())
616 Name += ":";
617 else if (!isNamedModule())
618 reset();
619 }
620
621 void handleSemi() {
622 if (!Name.empty() && isModuleCandidate()) {
623 if (State == InterfaceCandidate)
624 State = NamedModuleInterface;
625 else if (State == ImplementationCandidate)
626 State = NamedModuleImplementation;
627 else
628 llvm_unreachable("Unimaged ModuleDeclState.");
629 } else if (!isNamedModule())
630 reset();
631 }
632
633 void handleMisc() {
634 if (!isNamedModule())
635 reset();
636 }
637
638 bool isModuleCandidate() const {
639 return State == InterfaceCandidate || State == ImplementationCandidate;
640 }
641
642 bool isNamedModule() const {
643 return State == NamedModuleInterface ||
644 State == NamedModuleImplementation;
645 }
646
647 bool isNamedInterface() const { return State == NamedModuleInterface; }
648
649 bool isImplementationUnit() const {
650 return State == NamedModuleImplementation && !getName().contains(':');
651 }
652
653 bool isNotAModuleDecl() const { return State == NotAModuleDecl; }
654
655 StringRef getName() const {
656 assert(isNamedModule() && "Can't get name from a non named module");
657 return Name;
658 }
659
660 StringRef getPrimaryName() const {
661 assert(isNamedModule() && "Can't get name from a non named module");
662 return getName().split(':').first;
663 }
664
665 void reset() {
666 Name.clear();
667 State = NotAModuleDecl;
668 }
669
670 private:
671 ModuleDeclState State = NotAModuleDecl;
672 std::string Name;
673 };
674
675 ModuleDeclSeq ModuleDeclState;
676
677 /// The identifier and source location of the currently-active
678 /// \#pragma clang arc_cf_code_audited begin.
679 IdentifierLoc PragmaARCCFCodeAuditedInfo;
680
681 /// The source location of the currently-active
682 /// \#pragma clang assume_nonnull begin.
683 SourceLocation PragmaAssumeNonNullLoc;
684
685 /// Set only for preambles which end with an active
686 /// \#pragma clang assume_nonnull begin.
687 ///
688 /// When the preamble is loaded into the main file,
689 /// `PragmaAssumeNonNullLoc` will be set to this to
690 /// replay the unterminated assume_nonnull.
691 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
692
693 /// True if we hit the code-completion point.
694 bool CodeCompletionReached = false;
695
696 /// The code completion token containing the information
697 /// on the stem that is to be code completed.
698 IdentifierInfo *CodeCompletionII = nullptr;
699
700 /// Range for the code completion token.
701 SourceRange CodeCompletionTokenRange;
702
703 /// The directory that the main file should be considered to occupy,
704 /// if it does not correspond to a real file (as happens when building a
705 /// module).
706 OptionalDirectoryEntryRef MainFileDir;
707
708 /// The number of bytes that we will initially skip when entering the
709 /// main file, along with a flag that indicates whether skipping this number
710 /// of bytes will place the lexer at the start of a line.
711 ///
712 /// This is used when loading a precompiled preamble.
713 std::pair<int, bool> SkipMainFilePreamble;
714
715 /// Whether we hit an error due to reaching max allowed include depth. Allows
716 /// to avoid hitting the same error over and over again.
717 bool HasReachedMaxIncludeDepth = false;
718
719 /// The number of currently-active calls to Lex.
720 ///
721 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
722 /// require asking for multiple additional tokens. This counter makes it
723 /// possible for Lex to detect whether it's producing a token for the end
724 /// of phase 4 of translation or for some other situation.
725 unsigned LexLevel = 0;
726
727 /// The number of (LexLevel 0) preprocessor tokens.
728 unsigned TokenCount = 0;
729
730 /// Preprocess every token regardless of LexLevel.
731 bool PreprocessToken = false;
732
733 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
734 /// warning, or zero for unlimited.
735 unsigned MaxTokens = 0;
736 SourceLocation MaxTokensOverrideLoc;
737
738public:
753
754 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
755
756private:
757 friend class ASTReader;
758 friend class MacroArgs;
759
760 class PreambleConditionalStackStore {
761 enum State {
762 Off = 0,
763 Recording = 1,
764 Replaying = 2,
765 };
766
767 public:
768 PreambleConditionalStackStore() = default;
769
770 void startRecording() { ConditionalStackState = Recording; }
771 void startReplaying() { ConditionalStackState = Replaying; }
772 bool isRecording() const { return ConditionalStackState == Recording; }
773 bool isReplaying() const { return ConditionalStackState == Replaying; }
774
775 ArrayRef<PPConditionalInfo> getStack() const {
776 return ConditionalStack;
777 }
778
779 void doneReplaying() {
780 ConditionalStack.clear();
781 ConditionalStackState = Off;
782 }
783
784 void setStack(ArrayRef<PPConditionalInfo> s) {
785 if (!isRecording() && !isReplaying())
786 return;
787 ConditionalStack.clear();
788 ConditionalStack.append(s.begin(), s.end());
789 }
790
791 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
792
793 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
794
795 void clearSkipInfo() { SkipInfo.reset(); }
796
797 std::optional<PreambleSkipInfo> SkipInfo;
798
799 private:
800 SmallVector<PPConditionalInfo, 4> ConditionalStack;
801 State ConditionalStackState = Off;
802 } PreambleConditionalStack;
803
804 /// The current top of the stack that we're lexing from if
805 /// not expanding a macro and we are lexing directly from source code.
806 ///
807 /// Only one of CurLexer, or CurTokenLexer will be non-null.
808 std::unique_ptr<Lexer> CurLexer;
809
810 /// Lexers that are pending destruction, deferred until the current
811 /// Stack of Lexer unwinds completely (LexLevel returns to 0).
812 /// This avoids use-after-free when HandleEndOfFile is called from
813 /// within a Lexer method that still needs to access its members.
814 SmallVector<std::unique_ptr<Lexer>, 2> PendingDestroyLexers;
815
816 /// The current top of the stack that we're lexing from
817 /// if not expanding a macro.
818 ///
819 /// This is an alias for CurLexer.
820 PreprocessorLexer *CurPPLexer = nullptr;
821
822 /// Used to find the current FileEntry, if CurLexer is non-null
823 /// and if applicable.
824 ///
825 /// This allows us to implement \#include_next and find directory-specific
826 /// properties.
827 ConstSearchDirIterator CurDirLookup = nullptr;
828
829 /// The current macro we are expanding, if we are expanding a macro.
830 ///
831 /// One of CurLexer and CurTokenLexer must be null.
832 std::unique_ptr<TokenLexer> CurTokenLexer;
833
834 /// The kind of lexer we're currently working with.
835 typedef bool (*LexerCallback)(Preprocessor &, Token &);
836 LexerCallback CurLexerCallback = &CLK_Lexer;
837
838 /// If the current lexer is for a submodule that is being built, this
839 /// is that submodule.
840 Module *CurLexerSubmodule = nullptr;
841
842 /// Keeps track of the stack of files currently
843 /// \#included, and macros currently being expanded from, not counting
844 /// CurLexer/CurTokenLexer.
845 struct IncludeStackInfo {
846 LexerCallback CurLexerCallback;
847 Module *TheSubmodule;
848 std::unique_ptr<Lexer> TheLexer;
849 PreprocessorLexer *ThePPLexer;
850 std::unique_ptr<TokenLexer> TheTokenLexer;
851 ConstSearchDirIterator TheDirLookup;
852
853 // The following constructors are completely useless copies of the default
854 // versions, only needed to pacify MSVC.
855 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
856 std::unique_ptr<Lexer> &&TheLexer,
857 PreprocessorLexer *ThePPLexer,
858 std::unique_ptr<TokenLexer> &&TheTokenLexer,
859 ConstSearchDirIterator TheDirLookup)
860 : CurLexerCallback(std::move(CurLexerCallback)),
861 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
862 ThePPLexer(std::move(ThePPLexer)),
863 TheTokenLexer(std::move(TheTokenLexer)),
864 TheDirLookup(std::move(TheDirLookup)) {}
865 };
866 std::vector<IncludeStackInfo> IncludeMacroStack;
867
868 /// Actions invoked when some preprocessor activity is
869 /// encountered (e.g. a file is \#included, etc).
870 std::unique_ptr<PPCallbacks> Callbacks;
871
872 struct MacroExpandsInfo {
873 Token Tok;
874 MacroDefinition MD;
875 SourceRange Range;
876
877 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
878 : Tok(Tok), MD(MD), Range(Range) {}
879 };
880 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
881
882 /// Information about a name that has been used to define a module macro.
883 struct ModuleMacroInfo {
884 /// The most recent macro directive for this identifier.
885 MacroDirective *MD;
886
887 /// The active module macros for this identifier.
888 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
889
890 /// The generation number at which we last updated ActiveModuleMacros.
891 /// \see Preprocessor::VisibleModules.
892 unsigned ActiveModuleMacrosGeneration = 0;
893
894 /// Whether this macro name is ambiguous.
895 bool IsAmbiguous = false;
896
897 /// The module macros that are overridden by this macro.
898 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
899
900 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
901 };
902
903 /// The state of a macro for an identifier.
904 class MacroState {
905 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
906
907 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
908 const IdentifierInfo *II) const {
909 if (II->isOutOfDate())
910 PP.updateOutOfDateIdentifier(*II);
911 // FIXME: Find a spare bit on IdentifierInfo and store a
912 // HasModuleMacros flag.
913 if (!II->hasMacroDefinition() ||
914 (!PP.getLangOpts().Modules &&
915 !PP.getLangOpts().ModulesLocalVisibility) ||
916 !PP.CurSubmoduleState->VisibleModules.getGeneration())
917 return nullptr;
918
919 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
920 if (!Info) {
921 Info = new (PP.getPreprocessorAllocator())
922 ModuleMacroInfo(cast<MacroDirective *>(State));
923 State = Info;
924 }
925
926 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
927 Info->ActiveModuleMacrosGeneration)
928 PP.updateModuleMacroInfo(II, *Info);
929 return Info;
930 }
931
932 public:
933 MacroState() : MacroState(nullptr) {}
934 MacroState(MacroDirective *MD) : State(MD) {}
935
936 MacroState(MacroState &&O) noexcept : State(O.State) {
937 O.State = (MacroDirective *)nullptr;
938 }
939
940 MacroState &operator=(MacroState &&O) noexcept {
941 auto S = O.State;
942 O.State = (MacroDirective *)nullptr;
943 State = S;
944 return *this;
945 }
946
947 ~MacroState() {
948 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
949 Info->~ModuleMacroInfo();
950 }
951
952 MacroDirective *getLatest() const {
953 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
954 return Info->MD;
955 return cast<MacroDirective *>(State);
956 }
957
958 void setLatest(MacroDirective *MD) {
959 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
960 Info->MD = MD;
961 else
962 State = MD;
963 }
964
965 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
966 auto *Info = getModuleInfo(PP, II);
967 return Info ? Info->IsAmbiguous : false;
968 }
969
970 ArrayRef<ModuleMacro *>
971 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
972 if (auto *Info = getModuleInfo(PP, II))
973 return Info->ActiveModuleMacros;
974 return {};
975 }
976
977 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
978 SourceManager &SourceMgr) const {
979 // FIXME: Incorporate module macros into the result of this.
980 if (auto *Latest = getLatest())
981 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
982 return {};
983 }
984
985 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
986 if (auto *Info = getModuleInfo(PP, II)) {
987 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
988 Info->ActiveModuleMacros.begin(),
989 Info->ActiveModuleMacros.end());
990 Info->ActiveModuleMacros.clear();
991 Info->IsAmbiguous = false;
992 }
993 }
994
995 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
996 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
997 return Info->OverriddenMacros;
998 return {};
999 }
1000
1001 void setOverriddenMacros(Preprocessor &PP,
1002 ArrayRef<ModuleMacro *> Overrides) {
1003 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
1004 if (!Info) {
1005 if (Overrides.empty())
1006 return;
1007 Info = new (PP.getPreprocessorAllocator())
1008 ModuleMacroInfo(cast<MacroDirective *>(State));
1009 State = Info;
1010 }
1011 Info->OverriddenMacros.clear();
1012 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
1013 Overrides.begin(), Overrides.end());
1014 Info->ActiveModuleMacrosGeneration = 0;
1015 }
1016 };
1017
1018 /// For each IdentifierInfo that was associated with a macro, we
1019 /// keep a mapping to the history of all macro definitions and #undefs in
1020 /// the reverse order (the latest one is in the head of the list).
1021 ///
1022 /// This mapping lives within the \p CurSubmoduleState.
1023 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
1024
1025 struct SubmoduleState;
1026
1027 /// Information about a submodule that we're currently building.
1028 struct BuildingSubmoduleInfo {
1029 /// The module that we are building.
1030 Module *M;
1031
1032 /// The location at which the module was included.
1033 SourceLocation ImportLoc;
1034
1035 /// Whether we entered this submodule via a pragma.
1036 bool IsPragma;
1037
1038 /// The previous SubmoduleState.
1039 SubmoduleState *OuterSubmoduleState;
1040
1041 /// The number of pending module macro names when we started building this.
1042 unsigned OuterPendingModuleMacroNames;
1043
1044 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
1045 SubmoduleState *OuterSubmoduleState,
1046 unsigned OuterPendingModuleMacroNames)
1047 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
1048 OuterSubmoduleState(OuterSubmoduleState),
1049 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
1050 };
1051 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
1052
1053 /// Information about a submodule's preprocessor state.
1054 struct SubmoduleState {
1055 /// The macros for the submodule.
1056 MacroMap Macros;
1057
1058 /// The set of modules that are visible within the submodule.
1059 VisibleModuleSet VisibleModules;
1060
1061 // FIXME: CounterValue?
1062 // FIXME: PragmaPushMacroInfo?
1063 };
1064 std::map<Module *, SubmoduleState> Submodules;
1065
1066 /// The preprocessor state for preprocessing outside of any submodule.
1067 SubmoduleState NullSubmoduleState;
1068
1069 /// The current submodule state. Will be \p NullSubmoduleState if we're not
1070 /// in a submodule.
1071 SubmoduleState *CurSubmoduleState;
1072
1073 /// The files that have been included.
1074 IncludedFilesSet IncludedFiles;
1075
1076 /// The set of top-level modules that affected preprocessing, but were not
1077 /// imported.
1078 llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1079
1080 /// The set of known macros exported from modules.
1081 llvm::FoldingSet<ModuleMacro> ModuleMacros;
1082
1083 /// The names of potential module macros that we've not yet processed.
1084 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1085
1086 /// The list of module macros, for each identifier, that are not overridden by
1087 /// any other module macro.
1088 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1089 LeafModuleMacros;
1090
1091 /// Macros that we want to warn because they are not used at the end
1092 /// of the translation unit.
1093 ///
1094 /// We store just their SourceLocations instead of
1095 /// something like MacroInfo*. The benefit of this is that when we are
1096 /// deserializing from PCH, we don't need to deserialize identifier & macros
1097 /// just so that we can report that they are unused, we just warn using
1098 /// the SourceLocations of this set (that will be filled by the ASTReader).
1099 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1100 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1101
1102 /// This is a pair of an optional message and source location used for pragmas
1103 /// that annotate macros like pragma clang restrict_expansion and pragma clang
1104 /// deprecated. This pair stores the optional message and the location of the
1105 /// annotation pragma for use producing diagnostics and notes.
1106 using MsgLocationPair = std::pair<std::string, SourceLocation>;
1107
1108 struct MacroAnnotationInfo {
1109 SourceLocation Location;
1110 std::string Message;
1111 };
1112
1113 struct MacroAnnotations {
1114 std::optional<MacroAnnotationInfo> DeprecationInfo;
1115 std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1116 std::optional<SourceLocation> FinalAnnotationLoc;
1117 };
1118
1119 /// Warning information for macro annotations.
1120 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1121
1122 /// A "freelist" of MacroArg objects that can be
1123 /// reused for quick allocation.
1124 MacroArgs *MacroArgCache = nullptr;
1125
1126 /// For each IdentifierInfo used in a \#pragma push_macro directive,
1127 /// we keep a MacroInfo stack used to restore the previous macro value.
1128 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1129 PragmaPushMacroInfo;
1130
1131 // Various statistics we track for performance analysis.
1132 unsigned NumDirectives = 0;
1133 unsigned NumDefined = 0;
1134 unsigned NumUndefined = 0;
1135 unsigned NumPragma = 0;
1136 unsigned NumIf = 0;
1137 unsigned NumElse = 0;
1138 unsigned NumEndif = 0;
1139 unsigned NumEnteredSourceFiles = 0;
1140 unsigned MaxIncludeStackDepth = 0;
1141 unsigned NumMacroExpanded = 0;
1142 unsigned NumFnMacroExpanded = 0;
1143 unsigned NumBuiltinMacroExpanded = 0;
1144 unsigned NumFastMacroExpanded = 0;
1145 unsigned NumTokenPaste = 0;
1146 unsigned NumFastTokenPaste = 0;
1147 unsigned NumSkipped = 0;
1148
1149 /// The predefined macros that preprocessor should use from the
1150 /// command line etc.
1151 std::string Predefines;
1152
1153 /// The file ID for the preprocessor predefines.
1154 FileID PredefinesFileID;
1155
1156 /// The file ID for the PCH through header.
1157 FileID PCHThroughHeaderFileID;
1158
1159 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1160 bool SkippingUntilPragmaHdrStop = false;
1161
1162 /// Whether tokens are being skipped until the through header is seen.
1163 bool SkippingUntilPCHThroughHeader = false;
1164
1165 /// Whether the main file is preprocessed module file.
1166 bool MainFileIsPreprocessedModuleFile = false;
1167
1168 /// \{
1169 /// Cache of macro expanders to reduce malloc traffic.
1170 enum { TokenLexerCacheSize = 8 };
1171 unsigned NumCachedTokenLexers;
1172 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1173 /// \}
1174
1175 /// Keeps macro expanded tokens for TokenLexers.
1176 //
1177 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1178 /// going to lex in the cache and when it finishes the tokens are removed
1179 /// from the end of the cache.
1180 SmallVector<Token, 16> MacroExpandedTokens;
1181 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1182
1183 /// A record of the macro definitions and expansions that
1184 /// occurred during preprocessing.
1185 ///
1186 /// This is an optional side structure that can be enabled with
1187 /// \c createPreprocessingRecord() prior to preprocessing.
1188 PreprocessingRecord *Record = nullptr;
1189
1190 /// Cached tokens state.
1191 using CachedTokensTy = SmallVector<Token, 1>;
1192
1193 /// Cached tokens are stored here when we do backtracking or
1194 /// lookahead. They are "lexed" by the CachingLex() method.
1195 CachedTokensTy CachedTokens;
1196
1197 /// The position of the cached token that CachingLex() should
1198 /// "lex" next.
1199 ///
1200 /// If it points beyond the CachedTokens vector, it means that a normal
1201 /// Lex() should be invoked.
1202 CachedTokensTy::size_type CachedLexPos = 0;
1203
1204 /// Stack of backtrack positions, allowing nested backtracks.
1205 ///
1206 /// The EnableBacktrackAtThisPos() method pushes a position to
1207 /// indicate where CachedLexPos should be set when the BackTrack() method is
1208 /// invoked (at which point the last position is popped).
1209 std::vector<CachedTokensTy::size_type> BacktrackPositions;
1210
1211 /// Stack of cached tokens/initial number of cached tokens pairs, allowing
1212 /// nested unannotated backtracks.
1213 std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1214 UnannotatedBacktrackTokens;
1215
1216 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1217 /// This is used to guard against calling this function recursively.
1218 ///
1219 /// See comments at the use-site for more context about why it is needed.
1220 bool SkippingExcludedConditionalBlock = false;
1221
1222 /// Keeps track of skipped range mappings that were recorded while skipping
1223 /// excluded conditional directives. It maps the source buffer pointer at
1224 /// the beginning of a skipped block, to the number of bytes that should be
1225 /// skipped.
1226 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1227
1228 void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1229
1230public:
1231 Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags,
1232 const LangOptions &LangOpts, SourceManager &SM,
1233 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
1234 IdentifierInfoLookup *IILookup = nullptr,
1235 bool OwnsHeaderSearch = false,
1237
1238 ~Preprocessor();
1239
1240 /// Initialize the preprocessor using information about the target.
1241 ///
1242 /// \param Target is owned by the caller and must remain valid for the
1243 /// lifetime of the preprocessor.
1244 /// \param AuxTarget is owned by the caller and must remain valid for
1245 /// the lifetime of the preprocessor.
1246 void Initialize(const TargetInfo &Target,
1247 const TargetInfo *AuxTarget = nullptr);
1248
1249 /// Initialize the preprocessor to parse a model file
1250 ///
1251 /// To parse model files the preprocessor of the original source is reused to
1252 /// preserver the identifier table. However to avoid some duplicate
1253 /// information in the preprocessor some cleanup is needed before it is used
1254 /// to parse model files. This method does that cleanup.
1256
1257 /// Cleanup after model file parsing
1258 void FinalizeForModelFile();
1259
1260 /// Retrieve the preprocessor options used to initialize this preprocessor.
1261 const PreprocessorOptions &getPreprocessorOpts() const { return PPOpts; }
1262
1263 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1264 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1265
1266 const LangOptions &getLangOpts() const { return LangOpts; }
1267 const TargetInfo &getTargetInfo() const { return *Target; }
1268 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1269 FileManager &getFileManager() const { return FileMgr; }
1270 SourceManager &getSourceManager() const { return SourceMgr; }
1271 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1272
1273 IdentifierTable &getIdentifierTable() { return Identifiers; }
1274 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1275 SelectorTable &getSelectorTable() { return Selectors; }
1276 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1277 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1278
1280 ExternalSource = Source;
1281 }
1282
1284 return ExternalSource;
1285 }
1286
1287 /// Retrieve the module loader associated with this preprocessor.
1288 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1289
1291 return TheModuleLoader.HadFatalFailure;
1292 }
1293
1294 /// Retrieve the number of Directives that have been processed by the
1295 /// Preprocessor.
1296 unsigned getNumDirectives() const {
1297 return NumDirectives;
1298 }
1299
1300 /// True if we are currently preprocessing a #if or #elif directive
1302 return ParsingIfOrElifDirective;
1303 }
1304
1305 /// Control whether the preprocessor retains comments in output.
1306 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1307 this->KeepComments = KeepComments | KeepMacroComments;
1308 this->KeepMacroComments = KeepMacroComments;
1309 }
1310
1311 bool getCommentRetentionState() const { return KeepComments; }
1312
1313 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1314 bool getPragmasEnabled() const { return PragmasEnabled; }
1315
1317 SuppressIncludeNotFoundError = Suppress;
1318 }
1319
1321 return SuppressIncludeNotFoundError;
1322 }
1323
1324 /// Sets whether the preprocessor is responsible for producing output or if
1325 /// it is producing tokens to be consumed by Parse and Sema.
1326 void setPreprocessedOutput(bool IsPreprocessedOutput) {
1327 PreprocessedOutput = IsPreprocessedOutput;
1328 }
1329
1330 /// Returns true if the preprocessor is responsible for generating output,
1331 /// false if it is producing tokens to be consumed by Parse and Sema.
1332 bool isPreprocessedOutput() const { return PreprocessedOutput; }
1333
1334 /// Return true if we are lexing directly from the specified lexer.
1335 bool isCurrentLexer(const PreprocessorLexer *L) const {
1336 return CurPPLexer == L;
1337 }
1338
1339 /// Return the current lexer being lexed from.
1340 ///
1341 /// Note that this ignores any potentially active macro expansions and _Pragma
1342 /// expansions going on at the time.
1343 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1344
1345 /// Return the current file lexer being lexed from.
1346 ///
1347 /// Note that this ignores any potentially active macro expansions and _Pragma
1348 /// expansions going on at the time.
1350
1351 /// Return the submodule owning the file being lexed. This may not be
1352 /// the current module if we have changed modules since entering the file.
1353 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1354
1355 /// Returns the FileID for the preprocessor predefines.
1356 FileID getPredefinesFileID() const { return PredefinesFileID; }
1357
1358 /// \{
1359 /// Accessors for preprocessor callbacks.
1360 ///
1361 /// Note that this class takes ownership of any PPCallbacks object given to
1362 /// it.
1363 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1364 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1365 if (Callbacks)
1366 C = std::make_unique<PPChainedCallbacks>(std::move(C),
1367 std::move(Callbacks));
1368 Callbacks = std::move(C);
1369 }
1370 void removePPCallbacks() { Callbacks.reset(); }
1371 /// \}
1372
1373 /// Get the number of tokens processed so far.
1374 unsigned getTokenCount() const { return TokenCount; }
1375
1376 /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1377 unsigned getMaxTokens() const { return MaxTokens; }
1378
1380 MaxTokens = Value;
1381 MaxTokensOverrideLoc = Loc;
1382 };
1383
1384 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1385
1386 /// Register a function that would be called on each token in the final
1387 /// expanded token stream.
1388 /// This also reports annotation tokens produced by the parser.
1389 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1390 OnToken = std::move(F);
1391 }
1392
1394 GetDependencyDirectives = &Get;
1395 }
1396
1397 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1398
1399 bool isMacroDefined(StringRef Id) {
1400 return isMacroDefined(&Identifiers.get(Id));
1401 }
1403 return II->hasMacroDefinition() &&
1404 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1405 }
1406
1407 /// Determine whether II is defined as a macro within the module M,
1408 /// if that is a module that we've already preprocessed. Does not check for
1409 /// macros imported into M.
1411 if (!II->hasMacroDefinition())
1412 return false;
1413 auto I = Submodules.find(M);
1414 if (I == Submodules.end())
1415 return false;
1416 auto J = I->second.Macros.find(II);
1417 if (J == I->second.Macros.end())
1418 return false;
1419 auto *MD = J->second.getLatest();
1420 return MD && MD->isDefined();
1421 }
1422
1424 if (!II->hasMacroDefinition())
1425 return {};
1426
1427 MacroState &S = CurSubmoduleState->Macros[II];
1428 auto *MD = S.getLatest();
1429 while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1430 MD = MD->getPrevious();
1431 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1432 S.getActiveModuleMacros(*this, II),
1433 S.isAmbiguous(*this, II));
1434 }
1435
1437 SourceLocation Loc) {
1438 if (!II->hadMacroDefinition())
1439 return {};
1440
1441 MacroState &S = CurSubmoduleState->Macros[II];
1443 if (auto *MD = S.getLatest())
1444 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1445 // FIXME: Compute the set of active module macros at the specified location.
1446 return MacroDefinition(DI.getDirective(),
1447 S.getActiveModuleMacros(*this, II),
1448 S.isAmbiguous(*this, II));
1449 }
1450
1451 /// Given an identifier, return its latest non-imported MacroDirective
1452 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1454 if (!II->hasMacroDefinition())
1455 return nullptr;
1456
1457 auto *MD = getLocalMacroDirectiveHistory(II);
1458 if (!MD || MD->getDefinition().isUndefined())
1459 return nullptr;
1460
1461 return MD;
1462 }
1463
1464 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1465 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1466 }
1467
1469 if (!II->hasMacroDefinition())
1470 return nullptr;
1471 if (auto MD = getMacroDefinition(II))
1472 return MD.getMacroInfo();
1473 return nullptr;
1474 }
1475
1476 /// Given an identifier, return the latest non-imported macro
1477 /// directive for that identifier.
1478 ///
1479 /// One can iterate over all previous macro directives from the most recent
1480 /// one.
1482
1483 /// Add a directive to the macro directive history for this identifier.
1486 SourceLocation Loc) {
1487 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1488 appendMacroDirective(II, MD);
1489 return MD;
1490 }
1495
1496 /// Set a MacroDirective that was loaded from a PCH file.
1498 MacroDirective *MD);
1499
1500 /// Register an exported macro for a module and identifier.
1503 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1505
1506 /// Get the list of leaf (non-overridden) module macros for a name.
1508 if (II->isOutOfDate())
1509 updateOutOfDateIdentifier(*II);
1510 auto I = LeafModuleMacros.find(II);
1511 if (I != LeafModuleMacros.end())
1512 return I->second;
1513 return {};
1514 }
1515
1516 /// Get the list of submodules that we're currently building.
1518 return BuildingSubmoduleStack;
1519 }
1520
1521 /// \{
1522 /// Iterators for the macro history table. Currently defined macros have
1523 /// IdentifierInfo::hasMacroDefinition() set and an empty
1524 /// MacroInfo::getUndefLoc() at the head of the list.
1525 using macro_iterator = MacroMap::const_iterator;
1526
1527 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1528 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1529
1530 llvm::iterator_range<macro_iterator>
1531 macros(bool IncludeExternalMacros = true) const {
1532 macro_iterator begin = macro_begin(IncludeExternalMacros);
1533 macro_iterator end = macro_end(IncludeExternalMacros);
1534 return llvm::make_range(begin, end);
1535 }
1536
1537 /// \}
1538
1539 /// Mark the given clang module as affecting the current clang module or translation unit.
1541 assert(M->isModuleMapModule());
1542 if (!BuildingSubmoduleStack.empty()) {
1543 if (M != BuildingSubmoduleStack.back().M)
1544 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1545 } else {
1546 AffectingClangModules.insert(M);
1547 }
1548 }
1549
1550 /// Get the set of top-level clang modules that affected preprocessing, but were not
1551 /// imported.
1553 return AffectingClangModules;
1554 }
1555
1556 /// Mark the file as included.
1557 /// Returns true if this is the first time the file was included.
1559 HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
1560 return IncludedFiles.insert(File).second;
1561 }
1562
1563 /// Return true if this header has already been included.
1565 HeaderInfo.getFileInfo(File);
1566 return IncludedFiles.count(File);
1567 }
1568
1569 /// Get the set of included files.
1570 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1571 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1572
1573 /// Return the name of the macro defined before \p Loc that has
1574 /// spelling \p Tokens. If there are multiple macros with same spelling,
1575 /// return the last one defined.
1577 ArrayRef<TokenValue> Tokens) const;
1578
1579 /// Get the predefines for this processor.
1580 /// Used by some third-party tools to inspect and add predefines (see
1581 /// https://github.com/llvm/llvm-project/issues/57483).
1582 const std::string &getPredefines() const { return Predefines; }
1583
1584 /// Set the predefines for this Preprocessor.
1585 ///
1586 /// These predefines are automatically injected when parsing the main file.
1587 void setPredefines(std::string P) { Predefines = std::move(P); }
1588
1589 /// Return information about the specified preprocessor
1590 /// identifier token.
1591 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1592 return &Identifiers.get(Name);
1593 }
1594
1595 /// Add the specified pragma handler to this preprocessor.
1596 ///
1597 /// If \p Namespace is non-null, then it is a token required to exist on the
1598 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1599 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1601 AddPragmaHandler(StringRef(), Handler);
1602 }
1603
1604 /// Remove the specific pragma handler from this preprocessor.
1605 ///
1606 /// If \p Namespace is non-null, then it should be the namespace that
1607 /// \p Handler was added to. It is an error to remove a handler that
1608 /// has not been registered.
1609 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1611 RemovePragmaHandler(StringRef(), Handler);
1612 }
1613
1614 /// Install empty handlers for all pragmas (making them ignored).
1615 void IgnorePragmas();
1616
1617 /// Set empty line handler.
1618 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1619
1620 EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1621
1622 /// Add the specified comment handler to the preprocessor.
1623 void addCommentHandler(CommentHandler *Handler);
1624
1625 /// Remove the specified comment handler.
1626 ///
1627 /// It is an error to remove a handler that has not been registered.
1628 void removeCommentHandler(CommentHandler *Handler);
1629
1630 /// Set the code completion handler to the given object.
1632 CodeComplete = &Handler;
1633 }
1634
1635 /// Retrieve the current code-completion handler.
1637 return CodeComplete;
1638 }
1639
1640 /// Clear out the code completion handler.
1642 CodeComplete = nullptr;
1643 }
1644
1645 /// Hook used by the lexer to invoke the "included file" code
1646 /// completion point.
1647 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1648
1649 /// Hook used by the lexer to invoke the "natural language" code
1650 /// completion point.
1652
1653 /// Set the code completion token for filtering purposes.
1655 CodeCompletionII = Filter;
1656 }
1657
1658 /// Set the code completion token range for detecting replacement range later
1659 /// on.
1661 const SourceLocation End) {
1662 CodeCompletionTokenRange = {Start, End};
1663 }
1665 return CodeCompletionTokenRange;
1666 }
1667
1668 /// Get the code completion token for filtering purposes.
1670 if (CodeCompletionII)
1671 return CodeCompletionII->getName();
1672 return {};
1673 }
1674
1675 /// Retrieve the preprocessing record, or NULL if there is no
1676 /// preprocessing record.
1678
1679 /// Create a new preprocessing record, which will keep track of
1680 /// all macro expansions, macro definitions, etc.
1682
1683 /// Returns true if the FileEntry is the PCH through header.
1684 bool isPCHThroughHeader(const FileEntry *FE);
1685
1686 /// True if creating a PCH with a through header.
1688
1689 /// True if using a PCH with a through header.
1691
1692 /// True if creating a PCH with a #pragma hdrstop.
1694
1695 /// True if using a PCH with a #pragma hdrstop.
1697
1698 /// Skip tokens until after the #include of the through header or
1699 /// until after a #pragma hdrstop.
1701
1702 /// Process directives while skipping until the through header or
1703 /// #pragma hdrstop is found.
1705 SourceLocation HashLoc);
1706
1707 /// Enter the specified FileID as the main source file,
1708 /// which implicitly adds the builtin defines etc.
1709 void EnterMainSourceFile();
1710
1711 /// Inform the preprocessor callbacks that processing is complete.
1712 void EndSourceFile();
1713
1714 /// Add a source file to the top of the include stack and
1715 /// start lexing tokens from it instead of the current buffer.
1716 ///
1717 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1719 SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1720
1721 /// Add a Macro to the top of the include stack and start lexing
1722 /// tokens from it instead of the current buffer.
1723 ///
1724 /// \param Args specifies the tokens input to a function-like macro.
1725 /// \param ILEnd specifies the location of the ')' for a function-like macro
1726 /// or the identifier for an object-like macro.
1728 MacroArgs *Args);
1729
1730private:
1731 /// Add a "macro" context to the top of the include stack,
1732 /// which will cause the lexer to start returning the specified tokens.
1733 ///
1734 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1735 /// will not be subject to further macro expansion. Otherwise, these tokens
1736 /// will be re-macro-expanded when/if expansion is enabled.
1737 ///
1738 /// If \p OwnsTokens is false, this method assumes that the specified stream
1739 /// of tokens has a permanent owner somewhere, so they do not need to be
1740 /// copied. If it is true, it assumes the array of tokens is allocated with
1741 /// \c new[] and the Preprocessor will delete[] it.
1742 ///
1743 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1744 /// set, see the flag documentation for details.
1745 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1746 bool DisableMacroExpansion, bool OwnsTokens,
1747 bool IsReinject);
1748
1749public:
1750 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1751 bool DisableMacroExpansion, bool IsReinject) {
1752 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1753 IsReinject);
1754 }
1755
1756 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1757 bool IsReinject) {
1758 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1759 IsReinject);
1760 }
1761
1762 /// Pop the current lexer/macro exp off the top of the lexer stack.
1763 ///
1764 /// This should only be used in situations where the current state of the
1765 /// top-of-stack lexer is known.
1766 void RemoveTopOfLexerStack();
1767
1768 /// From the point that this method is called, and until
1769 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1770 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1771 /// make the Preprocessor re-lex the same tokens.
1772 ///
1773 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1774 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1775 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1776 ///
1777 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1778 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1779 /// tokens will continue indefinitely.
1780 ///
1781 /// \param Unannotated Whether token annotations are reverted upon calling
1782 /// Backtrack().
1783 void EnableBacktrackAtThisPos(bool Unannotated = false);
1784
1785private:
1786 std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1787
1788 CachedTokensTy PopUnannotatedBacktrackTokens();
1789
1790public:
1791 /// Disable the last EnableBacktrackAtThisPos call.
1793
1794 /// Make Preprocessor re-lex the tokens that were lexed since
1795 /// EnableBacktrackAtThisPos() was previously called.
1796 void Backtrack();
1797
1798 /// True if EnableBacktrackAtThisPos() was called and
1799 /// caching of tokens is on.
1800 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1801
1802 /// True if EnableBacktrackAtThisPos() was called and
1803 /// caching of unannotated tokens is on.
1805 return !UnannotatedBacktrackTokens.empty();
1806 }
1807
1808 /// Lex the next token for this preprocessor.
1809 void Lex(Token &Result);
1810
1811 /// Lex all tokens for this preprocessor until (and excluding) end of file.
1812 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1813
1814 /// Lex a token, forming a header-name token if possible.
1815 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1816
1817 /// Lex the parameters for an #embed directive, returns nullopt on error.
1818 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1819 bool ForHasEmbed);
1820
1821 /// Whether the main file is preprocessed module file.
1823 return MainFileIsPreprocessedModuleFile;
1824 }
1825
1826 /// Mark the main file as a preprocessed module file, then the 'module' and
1827 /// 'import' directive recognition will be suppressed. Only
1828 /// '__preprocessed_moduke' and '__preprocessed_import' are allowed.
1830 MainFileIsPreprocessedModuleFile = true;
1831 }
1832
1834 SmallVectorImpl<Token> &Suffix,
1836 bool AllowMacroExpansion, bool IsPartition);
1837 bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok,
1839 SmallVectorImpl<Token> &DirToks,
1840 bool AllowMacroExpansion, bool IsPartition);
1842 void HandleCXXImportDirective(Token Import);
1844
1845 /// Callback invoked when the lexer sees one of export, import or module token
1846 /// at the start of a line.
1847 ///
1848 /// This consumes the import/module directive, modifies the
1849 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1850 /// read is the correct one.
1852
1853 /// Get the start location of the first pp-token in main file.
1855 assert(FirstPPTokenLoc.isValid() &&
1856 "Did not see the first pp-token in the main file");
1857 return FirstPPTokenLoc;
1858 }
1859
1862 bool StopUntilEOD = false);
1864 bool StopUntilEOD = false);
1865
1867 bool IncludeExports = true);
1868
1870 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1871 }
1872
1873 /// Lex a string literal, which may be the concatenation of multiple
1874 /// string literals and may even come from macro expansion.
1875 /// \returns true on success, false if a error diagnostic has been generated.
1876 bool LexStringLiteral(Token &Result, std::string &String,
1877 const char *DiagnosticTag, bool AllowMacroExpansion) {
1878 if (AllowMacroExpansion)
1879 Lex(Result);
1880 else
1882 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1883 AllowMacroExpansion);
1884 }
1885
1886 /// Complete the lexing of a string literal where the first token has
1887 /// already been lexed (see LexStringLiteral).
1888 bool FinishLexStringLiteral(Token &Result, std::string &String,
1889 const char *DiagnosticTag,
1890 bool AllowMacroExpansion);
1891
1892 /// Lex a token. If it's a comment, keep lexing until we get
1893 /// something not a comment.
1894 ///
1895 /// This is useful in -E -C mode where comments would foul up preprocessor
1896 /// directive handling.
1898 do
1899 Lex(Result);
1900 while (Result.getKind() == tok::comment);
1901 }
1902
1903 /// Just like Lex, but disables macro expansion of identifier tokens.
1905 // Disable macro expansion.
1906 bool OldVal = DisableMacroExpansion;
1907 DisableMacroExpansion = true;
1908 // Lex the token.
1909 Lex(Result);
1910
1911 // Reenable it.
1912 DisableMacroExpansion = OldVal;
1913 }
1914
1915 /// Like LexNonComment, but this disables macro expansion of
1916 /// identifier tokens.
1918 do
1920 while (Result.getKind() == tok::comment);
1921 }
1922
1923 /// Parses a simple integer literal to get its numeric value. Floating
1924 /// point literals and user defined literals are rejected. Used primarily to
1925 /// handle pragmas that accept integer arguments.
1926 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1927
1928 /// Disables macro expansion everywhere except for preprocessor directives.
1930 DisableMacroExpansion = true;
1931 MacroExpansionInDirectivesOverride = true;
1932 }
1933
1935 DisableMacroExpansion = MacroExpansionInDirectivesOverride = false;
1936 }
1937
1938 /// Peeks ahead N tokens and returns that token without consuming any
1939 /// tokens.
1940 ///
1941 /// LookAhead(0) returns the next token that would be returned by Lex(),
1942 /// LookAhead(1) returns the token after it, etc. This returns normal
1943 /// tokens after phase 5. As such, it is equivalent to using
1944 /// 'Lex', not 'LexUnexpandedToken'.
1945 const Token &LookAhead(unsigned N) {
1946 assert(LexLevel == 0 && "cannot use lookahead while lexing");
1947 if (CachedLexPos + N < CachedTokens.size())
1948 return CachedTokens[CachedLexPos+N];
1949 else
1950 return PeekAhead(N+1);
1951 }
1952
1953 /// When backtracking is enabled and tokens are cached,
1954 /// this allows to revert a specific number of tokens.
1955 ///
1956 /// Note that the number of tokens being reverted should be up to the last
1957 /// backtrack position, not more.
1958 void RevertCachedTokens(unsigned N) {
1959 assert(isBacktrackEnabled() &&
1960 "Should only be called when tokens are cached for backtracking");
1961 assert(signed(CachedLexPos) - signed(N) >=
1962 signed(LastBacktrackPos().first) &&
1963 "Should revert tokens up to the last backtrack position, not more");
1964 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1965 "Corrupted backtrack positions ?");
1966 CachedLexPos -= N;
1967 }
1968
1969 /// Enters a token in the token stream to be lexed next.
1970 ///
1971 /// If BackTrack() is called afterwards, the token will remain at the
1972 /// insertion point.
1973 /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1974 /// flag set. See the flag documentation for details.
1975 void EnterToken(const Token &Tok, bool IsReinject) {
1976 if (LexLevel) {
1977 // It's not correct in general to enter caching lex mode while in the
1978 // middle of a nested lexing action.
1979 auto TokCopy = std::make_unique<Token[]>(1);
1980 TokCopy[0] = Tok;
1981 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1982 } else {
1983 EnterCachingLexMode();
1984 assert(IsReinject && "new tokens in the middle of cached stream");
1985 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1986 }
1987 }
1988
1989 /// We notify the Preprocessor that if it is caching tokens (because
1990 /// backtrack is enabled) it should replace the most recent cached tokens
1991 /// with the given annotation token. This function has no effect if
1992 /// backtracking is not enabled.
1993 ///
1994 /// Note that the use of this function is just for optimization, so that the
1995 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1996 /// invoked.
1998 assert(Tok.isAnnotation() && "Expected annotation token");
1999 if (CachedLexPos != 0 && isBacktrackEnabled())
2000 AnnotatePreviousCachedTokens(Tok);
2001 }
2002
2003 /// Get the location of the last cached token, suitable for setting the end
2004 /// location of an annotation token.
2006 assert(CachedLexPos != 0);
2007 return CachedTokens[CachedLexPos-1].getLastLoc();
2008 }
2009
2010 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
2011 /// CachedTokens.
2012 bool IsPreviousCachedToken(const Token &Tok) const;
2013
2014 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
2015 /// in \p NewToks.
2016 ///
2017 /// Useful when a token needs to be split in smaller ones and CachedTokens
2018 /// most recent token must to be updated to reflect that.
2020
2021 /// Replace the last token with an annotation token.
2022 ///
2023 /// Like AnnotateCachedTokens(), this routine replaces an
2024 /// already-parsed (and resolved) token with an annotation
2025 /// token. However, this routine only replaces the last token with
2026 /// the annotation token; it does not affect any other cached
2027 /// tokens. This function has no effect if backtracking is not
2028 /// enabled.
2030 assert(Tok.isAnnotation() && "Expected annotation token");
2031 if (CachedLexPos != 0 && isBacktrackEnabled())
2032 CachedTokens[CachedLexPos-1] = Tok;
2033 }
2034
2035 /// Enter an annotation token into the token stream.
2037 void *AnnotationVal);
2038
2039 /// Determine whether it's possible for a future call to Lex to produce an
2040 /// annotation token created by a previous call to EnterAnnotationToken.
2042 return CurLexerCallback != CLK_Lexer;
2043 }
2044
2045 /// Update the current token to represent the provided
2046 /// identifier, in order to cache an action performed by typo correction.
2048 assert(Tok.getIdentifierInfo() && "Expected identifier token");
2049 if (CachedLexPos != 0 && isBacktrackEnabled())
2050 CachedTokens[CachedLexPos-1] = Tok;
2051 }
2052
2053 /// Recompute the current lexer kind based on the CurLexer/
2054 /// CurTokenLexer pointers.
2055 void recomputeCurLexerKind();
2056
2057 /// Returns true if incremental processing is enabled
2058 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
2059
2060 /// Enables the incremental processing
2061 void enableIncrementalProcessing(bool value = true) {
2062 IncrementalProcessing = value;
2063 }
2064
2065 /// Specify the point at which code-completion will be performed.
2066 ///
2067 /// \param File the file in which code completion should occur. If
2068 /// this file is included multiple times, code-completion will
2069 /// perform completion the first time it is included. If NULL, this
2070 /// function clears out the code-completion point.
2071 ///
2072 /// \param Line the line at which code completion should occur
2073 /// (1-based).
2074 ///
2075 /// \param Column the column at which code completion should occur
2076 /// (1-based).
2077 ///
2078 /// \returns true if an error occurred, false otherwise.
2080 unsigned Column);
2081
2082 /// Determine if we are performing code completion.
2083 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
2084
2085 /// Returns the location of the code-completion point.
2086 ///
2087 /// Returns an invalid location if code-completion is not enabled or the file
2088 /// containing the code-completion point has not been lexed yet.
2089 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
2090
2091 /// Returns the start location of the file of code-completion point.
2092 ///
2093 /// Returns an invalid location if code-completion is not enabled or the file
2094 /// containing the code-completion point has not been lexed yet.
2096 return CodeCompletionFileLoc;
2097 }
2098
2099 /// Returns true if code-completion is enabled and we have hit the
2100 /// code-completion point.
2101 bool isCodeCompletionReached() const { return CodeCompletionReached; }
2102
2103 /// Note that we hit the code-completion point.
2105 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
2106 CodeCompletionReached = true;
2107 // Silence any diagnostics that occur after we hit the code-completion.
2109 }
2110
2111 /// The location of the currently-active \#pragma clang
2112 /// arc_cf_code_audited begin.
2113 ///
2114 /// Returns an invalid location if there is no such pragma active.
2116 return PragmaARCCFCodeAuditedInfo;
2117 }
2118
2119 /// Set the location of the currently-active \#pragma clang
2120 /// arc_cf_code_audited begin. An invalid location ends the pragma.
2122 SourceLocation Loc) {
2123 PragmaARCCFCodeAuditedInfo = IdentifierLoc(Loc, Ident);
2124 }
2125
2126 /// The location of the currently-active \#pragma clang
2127 /// assume_nonnull begin.
2128 ///
2129 /// Returns an invalid location if there is no such pragma active.
2131 return PragmaAssumeNonNullLoc;
2132 }
2133
2134 /// Set the location of the currently-active \#pragma clang
2135 /// assume_nonnull begin. An invalid location ends the pragma.
2137 PragmaAssumeNonNullLoc = Loc;
2138 }
2139
2140 /// Get the location of the recorded unterminated \#pragma clang
2141 /// assume_nonnull begin in the preamble, if one exists.
2142 ///
2143 /// Returns an invalid location if the premable did not end with
2144 /// such a pragma active or if there is no recorded preamble.
2146 return PreambleRecordedPragmaAssumeNonNullLoc;
2147 }
2148
2149 /// Record the location of the unterminated \#pragma clang
2150 /// assume_nonnull begin in the preamble.
2152 PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2153 }
2154
2155 /// Set the directory in which the main file should be considered
2156 /// to have been found, if it is not a real file.
2157 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2158
2159 /// Instruct the preprocessor to skip part of the main source file.
2160 ///
2161 /// \param Bytes The number of bytes in the preamble to skip.
2162 ///
2163 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2164 /// start of a line.
2165 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2166 SkipMainFilePreamble.first = Bytes;
2167 SkipMainFilePreamble.second = StartOfLine;
2168 }
2169
2170 /// Forwarding function for diagnostics. This emits a diagnostic at
2171 /// the specified Token's location, translating the token's start
2172 /// position in the current buffer into a SourcePosition object for rendering.
2173 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2174 return Diags->Report(Loc, DiagID);
2175 }
2176
2177 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2178 return Diags->Report(Tok.getLocation(), DiagID);
2179 }
2180
2181 /// Return the 'spelling' of the token at the given
2182 /// location; does not go up to the spelling location or down to the
2183 /// expansion location.
2184 ///
2185 /// \param buffer A buffer which will be used only if the token requires
2186 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
2187 /// \param invalid If non-null, will be set \c true if an error occurs.
2189 SmallVectorImpl<char> &buffer,
2190 bool *invalid = nullptr) const {
2191 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2192 }
2193
2194 /// Return the 'spelling' of the Tok token.
2195 ///
2196 /// The spelling of a token is the characters used to represent the token in
2197 /// the source file after trigraph expansion and escaped-newline folding. In
2198 /// particular, this wants to get the true, uncanonicalized, spelling of
2199 /// things like digraphs, UCNs, etc.
2200 ///
2201 /// \param Invalid If non-null, will be set \c true if an error occurs.
2202 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2203 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2204 }
2205
2206 /// Get the spelling of a token into a preallocated buffer, instead
2207 /// of as an std::string.
2208 ///
2209 /// The caller is required to allocate enough space for the token, which is
2210 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2211 /// actual result is returned.
2212 ///
2213 /// Note that this method may do two possible things: it may either fill in
2214 /// the buffer specified with characters, or it may *change the input pointer*
2215 /// to point to a constant buffer with the data already in it (avoiding a
2216 /// copy). The caller is not allowed to modify the returned buffer pointer
2217 /// if an internal buffer is returned.
2218 unsigned getSpelling(const Token &Tok, const char *&Buffer,
2219 bool *Invalid = nullptr) const {
2220 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2221 }
2222
2223 /// Get the spelling of a token into a SmallVector.
2224 ///
2225 /// Note that the returned StringRef may not point to the
2226 /// supplied buffer if a copy can be avoided.
2227 StringRef getSpelling(const Token &Tok,
2228 SmallVectorImpl<char> &Buffer,
2229 bool *Invalid = nullptr) const;
2230
2231 /// Relex the token at the specified location.
2232 /// \returns true if there was a failure, false on success.
2234 bool IgnoreWhiteSpace = false) {
2235 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2236 }
2237
2238 /// Given a Token \p Tok that is a numeric constant with length 1,
2239 /// return the value of constant as an unsigned 8-bit integer.
2240 uint8_t
2242 bool *Invalid = nullptr) const {
2243 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2244 Tok.getLength() == 1 && "Called on unsupported token");
2245 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2246
2247 // If the token is carrying a literal data pointer, just use it.
2248 if (const char *D = Tok.getLiteralData())
2249 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2250
2251 assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2252 // Otherwise, fall back on getCharacterData, which is slower, but always
2253 // works.
2254 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2255 }
2256
2257 /// Retrieve the name of the immediate macro expansion.
2258 ///
2259 /// This routine starts from a source location, and finds the name of the
2260 /// macro responsible for its immediate expansion. It looks through any
2261 /// intervening macro argument expansions to compute this. It returns a
2262 /// StringRef that refers to the SourceManager-owned buffer of the source
2263 /// where that macro name is spelled. Thus, the result shouldn't out-live
2264 /// the SourceManager.
2266 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2267 }
2268
2269 /// Plop the specified string into a scratch buffer and set the
2270 /// specified token's location and length to it.
2271 ///
2272 /// If specified, the source location provides a location of the expansion
2273 /// point of the token.
2274 void CreateString(StringRef Str, Token &Tok,
2275 SourceLocation ExpansionLocStart = SourceLocation(),
2276 SourceLocation ExpansionLocEnd = SourceLocation());
2277
2278 /// Split the first Length characters out of the token starting at TokLoc
2279 /// and return a location pointing to the split token. Re-lexing from the
2280 /// split token will return the split token rather than the original.
2281 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2282
2283 /// Computes the source location just past the end of the
2284 /// token at this source location.
2285 ///
2286 /// This routine can be used to produce a source location that
2287 /// points just past the end of the token referenced by \p Loc, and
2288 /// is generally used when a diagnostic needs to point just after a
2289 /// token where it expected something different that it received. If
2290 /// the returned source location would not be meaningful (e.g., if
2291 /// it points into a macro), this routine returns an invalid
2292 /// source location.
2293 ///
2294 /// \param Offset an offset from the end of the token, where the source
2295 /// location should refer to. The default offset (0) produces a source
2296 /// location pointing just past the end of the token; an offset of 1 produces
2297 /// a source location pointing to the last character in the token, etc.
2299 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2300 }
2301
2302 /// Returns true if the given MacroID location points at the first
2303 /// token of the macro expansion.
2304 ///
2305 /// \param MacroBegin If non-null and function returns true, it is set to
2306 /// begin location of the macro.
2308 SourceLocation *MacroBegin = nullptr) const {
2309 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2310 MacroBegin);
2311 }
2312
2313 /// Returns true if the given MacroID location points at the last
2314 /// token of the macro expansion.
2315 ///
2316 /// \param MacroEnd If non-null and function returns true, it is set to
2317 /// end location of the macro.
2319 SourceLocation *MacroEnd = nullptr) const {
2320 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2321 }
2322
2323 /// Print the token to stderr, used for debugging.
2324 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2325 void DumpLocation(SourceLocation Loc) const;
2326 void DumpMacro(const MacroInfo &MI) const;
2327 void dumpMacroInfo(const IdentifierInfo *II);
2328
2329 /// Given a location that specifies the start of a
2330 /// token, return a new location that specifies a character within the token.
2332 unsigned Char) const {
2333 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2334 }
2335
2336 /// Increment the counters for the number of token paste operations
2337 /// performed.
2338 ///
2339 /// If fast was specified, this is a 'fast paste' case we handled.
2340 void IncrementPasteCounter(bool isFast) {
2341 if (isFast)
2342 ++NumFastTokenPaste;
2343 else
2344 ++NumTokenPaste;
2345 }
2346
2347 void PrintStats();
2348
2349 size_t getTotalMemory() const;
2350
2351 /// When the macro expander pastes together a comment (/##/) in Microsoft
2352 /// mode, this method handles updating the current state, returning the
2353 /// token on the next source line.
2355
2356 //===--------------------------------------------------------------------===//
2357 // Preprocessor callback methods. These are invoked by a lexer as various
2358 // directives and events are found.
2359
2360 /// Given a tok::raw_identifier token, look up the
2361 /// identifier information for the token and install it into the token,
2362 /// updating the token kind accordingly.
2363 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2364
2365private:
2366 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2367
2368public:
2369 /// Specifies the reason for poisoning an identifier.
2370 ///
2371 /// If that identifier is accessed while poisoned, then this reason will be
2372 /// used instead of the default "poisoned" diagnostic.
2373 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2374
2375 /// Display reason for poisoned identifier.
2376 void HandlePoisonedIdentifier(Token & Identifier);
2377
2379 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2380 if(II->isPoisoned()) {
2381 HandlePoisonedIdentifier(Identifier);
2382 }
2383 }
2384 }
2385
2386 /// isNextPPTokenOneOf - Check whether the next pp-token is one of the
2387 /// specificed token kind. this method should have no observable side-effect
2388 /// on the lexed tokens.
2389 template <typename... Ts> bool isNextPPTokenOneOf(Ts... Ks) const {
2390 static_assert(sizeof...(Ts) > 0,
2391 "requires at least one tok::TokenKind specified");
2392 auto NextTokOpt = peekNextPPToken();
2393 return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false;
2394 }
2395
2396private:
2397 /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
2398 /// buffer controlled by this lexer, otherwise return the next unexpanded
2399 /// token.
2400 std::optional<Token> peekNextPPToken() const;
2401
2402 /// Identifiers used for SEH handling in Borland. These are only
2403 /// allowed in particular circumstances
2404 // __except block
2405 IdentifierInfo *Ident__exception_code,
2406 *Ident___exception_code,
2407 *Ident_GetExceptionCode;
2408 // __except filter expression
2409 IdentifierInfo *Ident__exception_info,
2410 *Ident___exception_info,
2411 *Ident_GetExceptionInfo;
2412 // __finally
2413 IdentifierInfo *Ident__abnormal_termination,
2414 *Ident___abnormal_termination,
2415 *Ident_AbnormalTermination;
2416
2417 const char *getCurLexerEndPos();
2418 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2419
2420public:
2421 void PoisonSEHIdentifiers(bool Poison = true); // Borland
2422
2423 /// Callback invoked when the lexer reads an identifier and has
2424 /// filled in the tokens IdentifierInfo member.
2425 ///
2426 /// This callback potentially macro expands it or turns it into a named
2427 /// token (like 'for').
2428 ///
2429 /// \returns true if we actually computed a token, false if we need to
2430 /// lex again.
2431 bool HandleIdentifier(Token &Identifier);
2432
2433 /// Callback invoked when the lexer hits the end of the current file.
2434 ///
2435 /// This either returns the EOF token and returns true, or
2436 /// pops a level off the include stack and returns false, at which point the
2437 /// client should call lex again.
2438 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2439
2440 /// Callback invoked when the current TokenLexer hits the end of its
2441 /// token stream.
2443
2444 /// Callback invoked when the lexer sees a # token at the start of a
2445 /// line.
2446 ///
2447 /// This consumes the directive, modifies the lexer/preprocessor state, and
2448 /// advances the lexer(s) so that the next token read is the correct one.
2450
2451 /// Ensure that the next token is a tok::eod token.
2452 ///
2453 /// If not, emit a diagnostic and consume up until the eod.
2454 /// If \p EnableMacros is true, then we consider macros that expand to zero
2455 /// tokens as being ok.
2456 ///
2457 /// If \p ExtraToks not null, the extra tokens will be saved in this
2458 /// container.
2459 ///
2460 /// \return The location of the end of the directive (the terminating
2461 /// newline).
2463 CheckEndOfDirective(StringRef DirType, bool EnableMacros = false,
2464 SmallVectorImpl<Token> *ExtraToks = nullptr);
2465
2466 /// Read and discard all tokens remaining on the current line until
2467 /// the tok::eod token is found. Returns the range of the skipped tokens.
2470 Token Tmp;
2471 return DiscardUntilEndOfDirective(Tmp, DiscardedToks);
2472 }
2473
2474 /// Same as above except retains the token that was found.
2477 SmallVectorImpl<Token> *DiscardedToks = nullptr);
2478
2479 /// Returns true if the preprocessor has seen a use of
2480 /// __DATE__ or __TIME__ in the file so far.
2481 bool SawDateOrTime() const {
2482 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2483 }
2484 uint32_t getCounterValue() const { return CounterValue; }
2485 void setCounterValue(uint32_t V) { CounterValue = V; }
2486
2488 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2489 "FPEvalMethod should be set either from command line or from the "
2490 "target info");
2491 return CurrentFPEvalMethod;
2492 }
2493
2495 return TUFPEvalMethod;
2496 }
2497
2499 return LastFPEvalPragmaLocation;
2500 }
2501
2505 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2506 // This is the location of the '#pragma float_control" where the
2507 // execution state is modifed.
2508 LastFPEvalPragmaLocation = PragmaLoc;
2509 CurrentFPEvalMethod = Val;
2510 TUFPEvalMethod = Val;
2511 }
2512
2515 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2516 TUFPEvalMethod = Val;
2517 }
2518
2519 /// Retrieves the module that we're currently building, if any.
2521
2522 /// Retrieves the module whose implementation we're current compiling, if any.
2524
2525 /// If we are preprocessing a named module.
2526 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2527
2528 /// If we are proprocessing a named interface unit.
2529 /// Note that a module implementation partition is not considered as an
2530 /// named interface unit here although it is importable
2531 /// to ease the parsing.
2533 return ModuleDeclState.isNamedInterface();
2534 }
2535
2536 /// Get the named module name we're preprocessing.
2537 /// Requires we're preprocessing a named module.
2538 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2539
2540 /// If we are implementing an implementation module unit.
2541 /// Note that the module implementation partition is not considered as an
2542 /// implementation unit.
2544 return ModuleDeclState.isImplementationUnit();
2545 }
2546
2547 /// If we're importing a standard C++20 Named Modules.
2549 assert(getLangOpts().CPlusPlusModules &&
2550 "Import C++ named modules are only valid for C++20 modules");
2551 return ImportingCXXNamedModules;
2552 }
2553
2554 /// Allocate a new MacroInfo object with the provided SourceLocation.
2556
2557 /// Turn the specified lexer token into a fully checked and spelled
2558 /// filename, e.g. as an operand of \#include.
2559 ///
2560 /// The caller is expected to provide a buffer that is large enough to hold
2561 /// the spelling of the filename, but is also expected to handle the case
2562 /// when this method decides to use a different buffer.
2563 ///
2564 /// \returns true if the input filename was in <>'s or false if it was
2565 /// in ""'s.
2566 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2567
2568 /// Given a "foo" or <foo> reference, look up the indicated file.
2569 ///
2570 /// Returns std::nullopt on failure. \p isAngled indicates whether the file
2571 /// reference is for system \#include's or not (i.e. using <> instead of "").
2573 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2574 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2575 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2576 SmallVectorImpl<char> *RelativePath,
2577 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2578 bool *IsFrameworkFound, bool SkipCache = false,
2579 bool OpenFile = true, bool CacheFailures = true);
2580
2581 /// Given a "Filename" or <Filename> reference, look up the indicated embed
2582 /// resource. \p isAngled indicates whether the file reference is for
2583 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2584 /// is true, the file looked up is opened for reading, otherwise it only
2585 /// validates that the file exists.
2586 ///
2587 /// Returns std::nullopt on failure.
2588 OptionalFileEntryRef LookupEmbedFile(StringRef Filename, bool isAngled,
2589 bool OpenFile);
2590
2591 /// Return true if we're in the top-level file, not in a \#include.
2592 bool isInPrimaryFile() const;
2593
2594 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2595 /// followed by EOD. Return true if the token is not a valid on-off-switch.
2597
2598 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2599 bool *ShadowFlag = nullptr);
2600
2601 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2602 Module *LeaveSubmodule(bool ForPragma);
2603
2604private:
2605 friend void TokenLexer::ExpandFunctionArguments();
2606
2607 void PushIncludeMacroStack() {
2608 assert(CurLexerCallback != CLK_CachingLexer &&
2609 "cannot push a caching lexer");
2610 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2611 std::move(CurLexer), CurPPLexer,
2612 std::move(CurTokenLexer), CurDirLookup);
2613 CurPPLexer = nullptr;
2614 }
2615
2616 void PopIncludeMacroStack() {
2617 if (CurLexer)
2618 PendingDestroyLexers.push_back(std::move(CurLexer));
2619 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2620 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2621 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2622 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
2623 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2624 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2625 IncludeMacroStack.pop_back();
2626 }
2627
2628 void PropagateLineStartLeadingSpaceInfo(Token &Result);
2629
2630 /// Determine whether we need to create module macros for #defines in the
2631 /// current context.
2632 bool needModuleMacros() const;
2633
2634 /// Update the set of active module macros and ambiguity flag for a module
2635 /// macro name.
2636 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2637
2638 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2639 SourceLocation Loc);
2640 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2641 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2642 bool isPublic);
2643
2644 /// Lex and validate a macro name, which occurs after a
2645 /// \#define or \#undef.
2646 ///
2647 /// \param MacroNameTok Token that represents the name defined or undefined.
2648 /// \param IsDefineUndef Kind if preprocessor directive.
2649 /// \param ShadowFlag Points to flag that is set if macro name shadows
2650 /// a keyword.
2651 ///
2652 /// This emits a diagnostic, sets the token kind to eod,
2653 /// and discards the rest of the macro line if the macro name is invalid.
2654 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2655 bool *ShadowFlag = nullptr);
2656
2657 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2658 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2659 /// doing so performs certain validity checks including (but not limited to):
2660 /// - # (stringization) is followed by a macro parameter
2661 /// \param MacroNameTok - Token that represents the macro name
2662 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2663 ///
2664 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2665 /// returns a nullptr if an invalid sequence of tokens is encountered.
2666 MacroInfo *ReadOptionalMacroParameterListAndBody(
2667 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2668
2669 /// The ( starting an argument list of a macro definition has just been read.
2670 /// Lex the rest of the parameters and the closing ), updating \p MI with
2671 /// what we learn and saving in \p LastTok the last token read.
2672 /// Return true if an error occurs parsing the arg list.
2673 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2674
2675 /// Provide a suggestion for a typoed directive. If there is no typo, then
2676 /// just skip suggesting.
2677 ///
2678 /// \param Tok - Token that represents the directive
2679 /// \param Directive - String reference for the directive name
2680 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2681
2682 /// We just read a \#if or related directive and decided that the
2683 /// subsequent tokens are in the \#if'd out portion of the
2684 /// file. Lex the rest of the file, until we see an \#endif. If \p
2685 /// FoundNonSkipPortion is true, then we have already emitted code for part of
2686 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2687 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2688 /// already seen one so a \#else directive is a duplicate. When this returns,
2689 /// the caller can lex the first valid token.
2690 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2691 SourceLocation IfTokenLoc,
2692 bool FoundNonSkipPortion, bool FoundElse,
2693 SourceLocation ElseLoc = SourceLocation());
2694
2695 /// Information about the result for evaluating an expression for a
2696 /// preprocessor directive.
2697 struct DirectiveEvalResult {
2698 /// The integral value of the expression.
2699 std::optional<llvm::APSInt> Value;
2700
2701 /// Whether the expression was evaluated as true or not.
2702 bool Conditional;
2703
2704 /// True if the expression contained identifiers that were undefined.
2705 bool IncludedUndefinedIds;
2706
2707 /// The source range for the expression.
2708 SourceRange ExprRange;
2709 };
2710
2711 /// Evaluate an integer constant expression that may occur after a
2712 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2713 ///
2714 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2715 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2716 bool CheckForEoD = true);
2717
2718 /// Evaluate an integer constant expression that may occur after a
2719 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2720 ///
2721 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2722 /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2723 /// in the evaluated expression or not.
2724 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2725 Token &Tok,
2726 bool &EvaluatedDefined,
2727 bool CheckForEoD = true);
2728
2729 /// Process a '__has_embed("path" [, ...])' expression.
2730 ///
2731 /// Returns predefined `__STDC_EMBED_*` macro values if
2732 /// successful.
2733 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2734
2735 /// Process a '__has_include("path")' expression.
2736 ///
2737 /// Returns true if successful.
2738 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2739
2740 /// Process '__has_include_next("path")' expression.
2741 ///
2742 /// Returns true if successful.
2743 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2744
2745 /// Get the directory and file from which to start \#include_next lookup.
2746 std::pair<ConstSearchDirIterator, const FileEntry *>
2747 getIncludeNextStart(const Token &IncludeNextTok) const;
2748
2749 /// Install the standard preprocessor pragmas:
2750 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2751 void RegisterBuiltinPragmas();
2752
2753 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
2754 /// table and mark it as a builtin macro to be expanded.
2755 IdentifierInfo *RegisterBuiltinMacro(const char *Name) {
2756 // Get the identifier.
2757 IdentifierInfo *Id = getIdentifierInfo(Name);
2758
2759 // Mark it as being a macro that is builtin.
2760 MacroInfo *MI = AllocateMacroInfo(SourceLocation());
2761 MI->setIsBuiltinMacro();
2763 return Id;
2764 }
2765
2766 /// Register builtin macros such as __LINE__ with the identifier table.
2767 void RegisterBuiltinMacros();
2768
2769 /// If an identifier token is read that is to be expanded as a macro, handle
2770 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2771 /// otherwise the caller should lex again.
2772 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2773
2774 /// Cache macro expanded tokens for TokenLexers.
2775 //
2776 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2777 /// going to lex in the cache and when it finishes the tokens are removed
2778 /// from the end of the cache.
2779 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2780 ArrayRef<Token> tokens);
2781
2782 void removeCachedMacroExpandedTokensOfLastLexer();
2783
2784 /// After reading "MACRO(", this method is invoked to read all of the formal
2785 /// arguments specified for the macro invocation. Returns null on error.
2786 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2787 SourceLocation &MacroEnd);
2788
2789 /// If an identifier token is read that is to be expanded
2790 /// as a builtin macro, handle it and return the next token as 'Tok'.
2791 void ExpandBuiltinMacro(Token &Tok);
2792
2793 /// Read a \c _Pragma directive, slice it up, process it, then
2794 /// return the first token after the directive.
2795 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2796 void Handle_Pragma(Token &Tok);
2797
2798 /// Like Handle_Pragma except the pragma text is not enclosed within
2799 /// a string literal.
2800 void HandleMicrosoft__pragma(Token &Tok);
2801
2802 /// Add a lexer to the top of the include stack and
2803 /// start lexing tokens from it instead of the current buffer.
2804 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2805
2806 /// Set the FileID for the preprocessor predefines.
2807 void setPredefinesFileID(FileID FID) {
2808 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2809 PredefinesFileID = FID;
2810 }
2811
2812 /// Set the FileID for the PCH through header.
2813 void setPCHThroughHeaderFileID(FileID FID);
2814
2815 /// Returns true if we are lexing from a file and not a
2816 /// pragma or a macro.
2817 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2818 return L ? !L->isPragmaLexer() : P != nullptr;
2819 }
2820
2821 static bool IsFileLexer(const IncludeStackInfo& I) {
2822 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2823 }
2824
2825 bool IsFileLexer() const {
2826 return IsFileLexer(CurLexer.get(), CurPPLexer);
2827 }
2828
2829 //===--------------------------------------------------------------------===//
2830 // Standard Library Identification
2831 std::optional<CXXStandardLibraryVersionInfo> CXXStandardLibraryVersion;
2832
2833public:
2834 std::optional<std::uint64_t> getStdLibCxxVersion();
2835 bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion);
2836
2837private:
2838 //===--------------------------------------------------------------------===//
2839 // Caching stuff.
2840 void CachingLex(Token &Result);
2841
2842 bool InCachingLexMode() const {
2843 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2844 // that we are past EOF, not that we are in CachingLex mode.
2845 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2846 }
2847
2848 void EnterCachingLexMode();
2849 void EnterCachingLexModeUnchecked();
2850
2851 void ExitCachingLexMode() {
2852 if (InCachingLexMode())
2854 }
2855
2856 const Token &PeekAhead(unsigned N);
2857 void AnnotatePreviousCachedTokens(const Token &Tok);
2858
2859 //===--------------------------------------------------------------------===//
2860 /// Handle*Directive - implement the various preprocessor directives. These
2861 /// should side-effect the current preprocessor object so that the next call
2862 /// to Lex() will return the appropriate token next.
2863 void HandleLineDirective();
2864 void HandleDigitDirective(Token &Tok);
2865 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2866 void HandleIdentSCCSDirective(Token &Tok);
2867 void HandleMacroPublicDirective(Token &Tok);
2868 void HandleMacroPrivateDirective();
2869
2870 /// An additional notification that can be produced by a header inclusion or
2871 /// import to tell the parser what happened.
2872 struct ImportAction {
2873 enum ActionKind {
2874 None,
2875 ModuleBegin,
2876 ModuleImport,
2877 HeaderUnitImport,
2878 SkippedModuleImport,
2879 Failure,
2880 } Kind;
2881 Module *ModuleForHeader = nullptr;
2882
2883 ImportAction(ActionKind AK, Module *Mod = nullptr)
2884 : Kind(AK), ModuleForHeader(Mod) {
2885 assert((AK == None || Mod || AK == Failure) &&
2886 "no module for module action");
2887 }
2888 };
2889
2890 OptionalFileEntryRef LookupHeaderIncludeOrImport(
2891 ConstSearchDirIterator *CurDir, StringRef &Filename,
2892 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2893 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2894 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2895 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2896 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2897 ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2898 // Binary data inclusion
2899 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok);
2900 void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2901 const LexEmbedParametersResult &Params,
2902 StringRef BinaryContents, StringRef FileName);
2903
2904 // File inclusion.
2905 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2906 ConstSearchDirIterator LookupFrom = nullptr,
2907 const FileEntry *LookupFromFile = nullptr);
2908 ImportAction
2909 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2910 Token &FilenameTok, SourceLocation EndLoc,
2911 ConstSearchDirIterator LookupFrom = nullptr,
2912 const FileEntry *LookupFromFile = nullptr);
2913 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2914 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2915 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2916 void HandleMicrosoftImportDirective(Token &Tok);
2917
2918public:
2919 /// Check that the given module is available, producing a diagnostic if not.
2920 /// \return \c true if the check failed (because the module is not available).
2921 /// \c false if the module appears to be usable.
2922 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2923 const TargetInfo &TargetInfo,
2924 const Module &M, DiagnosticsEngine &Diags);
2925
2926 // Module inclusion testing.
2927 /// Find the module that owns the source or header file that
2928 /// \p Loc points to. If the location is in a file that was included
2929 /// into a module, or is outside any module, returns nullptr.
2930 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2931
2932 /// We want to produce a diagnostic at location IncLoc concerning an
2933 /// unreachable effect at location MLoc (eg, where a desired entity was
2934 /// declared or defined). Determine whether the right way to make MLoc
2935 /// reachable is by #include, and if so, what header should be included.
2936 ///
2937 /// This is not necessarily fast, and might load unexpected module maps, so
2938 /// should only be called by code that intends to produce an error.
2939 ///
2940 /// \param IncLoc The location at which the missing effect was detected.
2941 /// \param MLoc A location within an unimported module at which the desired
2942 /// effect occurred.
2943 /// \return A file that can be #included to provide the desired effect. Null
2944 /// if no such file could be determined or if a #include is not
2945 /// appropriate (eg, if a module should be imported instead).
2947 SourceLocation MLoc);
2948
2949 bool isRecordingPreamble() const {
2950 return PreambleConditionalStack.isRecording();
2951 }
2952
2953 bool hasRecordedPreamble() const {
2954 return PreambleConditionalStack.hasRecordedPreamble();
2955 }
2956
2958 return PreambleConditionalStack.getStack();
2959 }
2960
2962 PreambleConditionalStack.setStack(s);
2963 }
2964
2966 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2967 PreambleConditionalStack.startReplaying();
2968 PreambleConditionalStack.setStack(s);
2969 PreambleConditionalStack.SkipInfo = SkipInfo;
2970 }
2971
2972 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2973 return PreambleConditionalStack.SkipInfo;
2974 }
2975
2976private:
2977 /// After processing predefined file, initialize the conditional stack from
2978 /// the preamble.
2979 void replayPreambleConditionalStack();
2980
2981 // Macro handling.
2982 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2983 void HandleUndefDirective();
2984
2985 // Conditional Inclusion.
2986 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2987 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2988 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2989 bool ReadAnyTokensBeforeDirective);
2990 void HandleEndifDirective(Token &EndifToken);
2991 void HandleElseDirective(Token &Result, const Token &HashToken);
2992 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2993 tok::PPKeywordKind Kind);
2994
2995 // Pragmas.
2996 void HandlePragmaDirective(PragmaIntroducer Introducer);
2997
2998public:
2999 void HandlePragmaOnce(Token &OnceTok);
3000 void HandlePragmaMark(Token &MarkTok);
3001 void HandlePragmaPoison();
3002 void HandlePragmaSystemHeader(Token &SysHeaderTok);
3003 void HandlePragmaDependency(Token &DependencyTok);
3010
3011 // Return true and store the first token only if any CommentHandler
3012 // has inserted some tokens and getCommentRetentionState() is false.
3013 bool HandleComment(Token &result, SourceRange Comment);
3014
3015 /// A macro is used, update information about macros that need unused
3016 /// warnings.
3017 void markMacroAsUsed(MacroInfo *MI);
3018
3019 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
3020 SourceLocation AnnotationLoc) {
3021 AnnotationInfos[II].DeprecationInfo =
3022 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
3023 }
3024
3025 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
3026 SourceLocation AnnotationLoc) {
3027 AnnotationInfos[II].RestrictExpansionInfo =
3028 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
3029 }
3030
3031 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
3032 AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
3033 }
3034
3035 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
3036 return AnnotationInfos.find(II)->second;
3037 }
3038
3039 void emitMacroExpansionWarnings(const Token &Identifier,
3040 bool IsIfnDef = false) const {
3041 IdentifierInfo *Info = Identifier.getIdentifierInfo();
3042 if (Info->isDeprecatedMacro())
3043 emitMacroDeprecationWarning(Identifier);
3044
3045 if (Info->isRestrictExpansion() &&
3046 !SourceMgr.isInMainFile(Identifier.getLocation()))
3047 emitRestrictExpansionWarning(Identifier);
3048
3049 if (!IsIfnDef) {
3050 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
3051 emitRestrictInfNaNWarning(Identifier, 0);
3052 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
3053 emitRestrictInfNaNWarning(Identifier, 1);
3054 }
3055 }
3056
3058 const LangOptions &LangOpts,
3059 const TargetInfo &TI);
3060
3062 const PresumedLoc &PLoc,
3063 const LangOptions &LangOpts,
3064 const TargetInfo &TI);
3065
3066private:
3067 void emitMacroDeprecationWarning(const Token &Identifier) const;
3068 void emitRestrictExpansionWarning(const Token &Identifier) const;
3069 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
3070 void emitRestrictInfNaNWarning(const Token &Identifier,
3071 unsigned DiagSelection) const;
3072
3073 /// This boolean state keeps track if the current scanned token (by this PP)
3074 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
3075 /// translation unit in a linear order.
3076 bool InSafeBufferOptOutRegion = false;
3077
3078 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
3079 /// region if PP is currently in such a region. Hold undefined value
3080 /// otherwise.
3081 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
3082
3083 using SafeBufferOptOutRegionsTy =
3085 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
3086 // translation unit. Each region is represented by a pair of start and
3087 // end locations.
3088 SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
3089
3090 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
3091 // following structure to manage them by their ASTs.
3092 struct {
3093 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a
3094 // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
3095 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
3096
3097 // Returns a reference to the safe buffer opt-out regions of the loaded
3098 // AST where `Loc` belongs to. (Construct if absent)
3099 SafeBufferOptOutRegionsTy &
3100 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
3101 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
3102 }
3103
3104 // Returns a reference to the safe buffer opt-out regions of the loaded
3105 // AST where `Loc` belongs to. (This const function returns nullptr if
3106 // absent.)
3107 const SafeBufferOptOutRegionsTy *
3108 lookupLoadedOptOutMap(SourceLocation Loc,
3109 const SourceManager &SrcMgr) const {
3110 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
3111 auto Iter = LoadedRegions.find(FID);
3112
3113 if (Iter == LoadedRegions.end())
3114 return nullptr;
3115 return &Iter->getSecond();
3116 }
3117 } LoadedSafeBufferOptOutMap;
3118
3119public:
3120 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
3121 /// region. This `Loc` must be a source location that has been pre-processed.
3122 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
3123
3124 /// Alter the state of whether this PP currently is in a
3125 /// "-Wunsafe-buffer-usage" opt-out region.
3126 ///
3127 /// \param isEnter true if this PP is entering a region; otherwise, this PP
3128 /// is exiting a region
3129 /// \param Loc the location of the entry or exit of a
3130 /// region
3131 /// \return true iff it is INVALID to enter or exit a region, i.e.,
3132 /// attempt to enter a region before exiting a previous region, or exiting a
3133 /// region that PP is not currently in.
3134 bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
3135 const SourceLocation &Loc);
3136
3137 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3138 /// opt-out region
3140
3141 /// \param StartLoc output argument. It will be set to the start location of
3142 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3143 /// returns true.
3144 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3145 /// opt-out region
3146 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3147
3148 /// \return a sequence of SourceLocations representing ordered opt-out regions
3149 /// specified by
3150 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3151 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3152
3153 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3154 /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3155 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3156 /// is same as itself before the call.
3158 const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3159
3160 /// Whether we've seen pp-directives which may have changed the preprocessing
3161 /// state.
3162 bool hasSeenNoTrivialPPDirective() const;
3163
3164private:
3165 /// Helper functions to forward lexing to the actual lexer. They all share the
3166 /// same signature.
3167 static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3168 return P.CurLexer->Lex(Result);
3169 }
3170 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3171 return P.CurTokenLexer->Lex(Result);
3172 }
3173 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3174 P.CachingLex(Result);
3175 return true;
3176 }
3177 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3178 return P.CurLexer->LexDependencyDirectiveToken(Result);
3179 }
3180 static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
3181 return P.LexAfterModuleImport(Result);
3182 }
3183};
3184
3185/// Abstract base class that describes a handler that will receive
3186/// source ranges for each of the comments encountered in the source file.
3188public:
3190
3191 // The handler shall return true if it has pushed any tokens
3192 // to be read using e.g. EnterToken or EnterTokenStream.
3193 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3194};
3195
3196/// Abstract base class that describes a handler that will receive
3197/// source ranges for empty lines encountered in the source file.
3199public:
3201
3202 // The handler handles empty lines.
3203 virtual void HandleEmptyline(SourceRange Range) = 0;
3204};
3205
3206/// Helper class to shuttle information about #embed directives from the
3207/// preprocessor to the parser through an annotation token.
3209 StringRef BinaryData;
3210 StringRef FileName;
3211};
3212
3213/// Registry of pragma handlers added by plugins
3214using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3215
3216} // namespace clang
3217
3218namespace llvm {
3219extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>;
3220} // namespace llvm
3221
3222#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
#define V(N, I)
Defines the Diagnostic-related interfaces.
Defines the Diagnostic IDs-related interfaces.
Token Tok
The Token.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Defines the PPCallbacks interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
VerifyDiagnosticConsumer::Directive Directive
__device__ __2f16 float __ockl_bool s
Holds information about both target-independent and target-specific builtins, allowing easy queries b...
Definition Builtins.h:235
Callback handler that receives notifications when performing code completion within the preprocessor.
Abstract base class that describes a handler that will receive source ranges for each of the comments...
virtual bool HandleComment(Preprocessor &PP, SourceRange Comment)=0
A directive for a defined macro or a macro imported from a module.
Definition MacroInfo.h:432
Functor that returns the dependency directives for a given file.
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:232
void setSuppressAllDiagnostics(bool Val)
Suppress all diagnostics, to silence the front end when we know that we don't want any more diagnosti...
Definition Diagnostic.h:735
A reference to a DirectoryEntry that includes the name of the directory as it was accessed by the Fil...
Cached information about one directory (either on disk or in the virtual file system).
Abstract base class that describes a handler that will receive source ranges for empty lines encounte...
virtual void HandleEmptyline(SourceRange Range)=0
Abstract interface for external sources of preprocessor information.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition FileEntry.h:302
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements support for file system lookup, file system caching, and directory search management.
Definition FileManager.h:53
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
One of these records is kept for each identifier that is lexed.
bool hadMacroDefinition() const
Returns true if this identifier was #defined to some value at any moment.
bool hasMacroDefinition() const
Return true if this identifier is #defined to some other value.
bool isDeprecatedMacro() const
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
StringRef getName() const
Return the actual identifier string.
bool isRestrictExpansion() const
A simple pair of identifier info and location.
Implements an efficient mapping from strings to IdentifierInfo nodes.
FPEvalMethodKind
Possible float expression evaluation method choices.
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
Definition Lexer.cpp:1074
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
Definition Lexer.cpp:888
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition Lexer.h:399
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
Definition Lexer.cpp:910
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition Lexer.cpp:461
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition Lexer.cpp:519
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Definition Lexer.cpp:858
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:590
const DefMacroDirective * getDirective() const
Definition MacroInfo.h:375
Encapsulates changes to the "macros namespace" (the location where the macro name became active,...
Definition MacroInfo.h:313
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:39
SourceLocation getDefinitionLoc() const
Return the location that the macro was defined at.
Definition MacroInfo.h:125
Abstract interface for a module loader.
static std::string getFlatNameFromPath(ModuleIdPath Path)
Represents a macro directive exported by a module.
Definition MacroInfo.h:514
A header that is known to reside within a given module, whether it was included or excluded.
Definition ModuleMap.h:158
unsigned getNumIdentifierLocs() const
std::string str() const
SourceLocation getBeginLoc() const
SourceLocation getEndLoc() const
SourceRange getRange() const
ModuleIdPath getModuleIdPath() const
Describes a module or submodule.
Definition Module.h:246
bool isModuleMapModule() const
Definition Module.h:353
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
PragmaHandler - Instances of this interface defined to handle the various pragmas that the language f...
Definition Pragma.h:65
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getLastFPEvalPragmaLocation() const
bool isMacroDefined(const IdentifierInfo *II)
MacroDirective * getLocalMacroDirective(const IdentifierInfo *II) const
Given an identifier, return its latest non-imported MacroDirective if it is #define'd and not #undef'...
bool markIncluded(FileEntryRef File)
Mark the file as included.
void HandlePragmaPushMacro(Token &Tok)
Handle #pragma push_macro.
Definition Pragma.cpp:634
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
void HandlePragmaPoison()
HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'.
Definition Pragma.cpp:439
void setCodeCompletionHandler(CodeCompletionHandler &Handler)
Set the code completion handler to the given object.
void dumpMacroInfo(const IdentifierInfo *II)
void HandlePragmaSystemHeader(Token &SysHeaderTok)
HandlePragmaSystemHeader - Implement #pragma GCC system_header.
Definition Pragma.cpp:481
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void MaybeHandlePoisonedIdentifier(Token &Identifier)
ModuleMacro * addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, ArrayRef< ModuleMacro * > Overrides, bool &IsNew)
Register an exported macro for a module and identifier.
void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, MacroDirective *MD)
Set a MacroDirective that was loaded from a PCH file.
MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, SourceLocation Loc)
void EnterModuleSuffixTokenStream(ArrayRef< Token > Toks)
void markClangModuleAsAffecting(Module *M)
Mark the given clang module as affecting the current clang module or translation unit.
void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, SourceLocation Loc)
Set the location of the currently-active #pragma clang arc_cf_code_audited begin.
void HandlePragmaModuleBuild(Token &Tok)
Definition Pragma.cpp:811
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
ArrayRef< ModuleMacro * > getLeafModuleMacros(const IdentifierInfo *II) const
Get the list of leaf (non-overridden) module macros for a name.
bool isIncrementalProcessingEnabled() const
Returns true if incremental processing is enabled.
void EnterToken(const Token &Tok, bool IsReinject)
Enters a token in the token stream to be lexed next.
void IgnorePragmas()
Install empty handlers for all pragmas (making them ignored).
Definition Pragma.cpp:2219
void HandleCXXImportDirective(Token Import)
HandleCXXImportDirective - Handle the C++ modules import directives.
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI)
PPCallbacks * getPPCallbacks() const
bool isInNamedInterfaceUnit() const
If we are proprocessing a named interface unit.
ArrayRef< PPConditionalInfo > getPreambleConditionalStack() const
void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)
Record the location of the unterminated #pragma clang assume_nonnull begin in the preamble.
SourceRange DiscardUntilEndOfDirective(SmallVectorImpl< Token > *DiscardedToks=nullptr)
Read and discard all tokens remaining on the current line until the tok::eod token is found.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
ArrayRef< BuildingSubmoduleInfo > getBuildingSubmodules() const
Get the list of submodules that we're currently building.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const
SourceRange getCodeCompletionTokenRange() const
SourceLocation getModuleImportLoc(Module *M) const
void overrideMaxTokens(unsigned Value, SourceLocation Loc)
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void HandleSkippedDirectiveWhileUsingPCH(Token &Result, SourceLocation HashLoc)
Process directives while skipping until the through header or pragma hdrstop is found.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
void enableIncrementalProcessing(bool value=true)
Enables the incremental processing.
bool LexAfterModuleImport(Token &Result)
Lex a token following the 'import' contextual keyword.
void TypoCorrectToken(const Token &Tok)
Update the current token to represent the provided identifier, in order to cache an action performed ...
bool GetSuppressIncludeNotFoundError()
bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M)
Determine whether II is defined as a macro within the module M, if that is a module that we've alread...
void setPragmaAssumeNonNullLoc(SourceLocation Loc)
Set the location of the currently-active #pragma clang assume_nonnull begin.
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void markMacroAsUsed(MacroInfo *MI)
A macro is used, update information about macros that need unused warnings.
LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const
void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma)
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, SourceLocation AnnotationLoc)
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, SourceLocation AnnotationLoc)
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
MacroDirective * getLocalMacroDirectiveHistory(const IdentifierInfo *II) const
Given an identifier, return the latest non-imported macro directive for that identifier.
void setPreprocessedOutput(bool IsPreprocessedOutput)
Sets whether the preprocessor is responsible for producing output or if it is producing tokens to be ...
void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc)
bool IsPreviousCachedToken(const Token &Tok) const
Whether Tok is the most recent token (CachedLexPos - 1) in CachedTokens.
bool SawDateOrTime() const
Returns true if the preprocessor has seen a use of DATE or TIME in the file so far.
const TargetInfo * getAuxTargetInfo() const
void CommitBacktrackedTokens()
Disable the last EnableBacktrackAtThisPos call.
Definition PPCaching.cpp:56
friend class MacroArgs
void DumpMacro(const MacroInfo &MI) const
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
void setDiagnostics(DiagnosticsEngine &D)
IncludedFilesSet & getIncludedFiles()
Get the set of included files.
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void AnnotateCachedTokens(const Token &Tok)
We notify the Preprocessor that if it is caching tokens (because backtrack is enabled) it should repl...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
StringRef getNamedModuleName() const
Get the named module name we're preprocessing.
bool mightHavePendingAnnotationTokens()
Determine whether it's possible for a future call to Lex to produce an annotation token created by a ...
void Lex(Token &Result)
Lex the next token for this preprocessor.
void EnterTokenStream(ArrayRef< Token > Toks, bool DisableMacroExpansion, bool IsReinject)
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
bool isParsingIfOrElifDirective() const
True if we are currently preprocessing a if or elif directive.
unsigned getNumDirectives() const
Retrieve the number of Directives that have been processed by the Preprocessor.
bool isInImplementationUnit() const
If we are implementing an implementation module unit.
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
ModuleLoader & getModuleLoader() const
Retrieve the module loader associated with this preprocessor.
void LexNonComment(Token &Result)
Lex a token.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
PreprocessorLexer * getCurrentLexer() const
Return the current lexer being lexed from.
bool LexOnOffSwitch(tok::OnOffSwitch &Result)
Lex an on-off-switch (C99 6.10.6p2) and verify that it is followed by EOD.
Definition Pragma.cpp:972
StringRef getCodeCompletionFilter()
Get the code completion token for filtering purposes.
void setMainFileDir(DirectoryEntryRef Dir)
Set the directory in which the main file should be considered to have been found, if it is not a real...
const IdentifierTable & getIdentifierTable() const
void HandlePragmaDependency(Token &DependencyTok)
HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah.
Definition Pragma.cpp:513
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
friend class ASTReader
void Backtrack()
Make Preprocessor re-lex the tokens that were lexed since EnableBacktrackAtThisPos() was previously c...
Definition PPCaching.cpp:66
bool isCurrentLexer(const PreprocessorLexer *L) const
Return true if we are lexing directly from the specified lexer.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
IdentifierLoc getPragmaARCCFCodeAuditedInfo() const
The location of the currently-active #pragma clang arc_cf_code_audited begin.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
void setReplayablePreambleConditionalStack(ArrayRef< PPConditionalInfo > s, std::optional< PreambleSkipInfo > SkipInfo)
const Token & LookAhead(unsigned N)
Peeks ahead N tokens and returns that token without consuming any tokens.
friend class VAOptDefinitionContext
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
uint8_t getSpellingOfSingleCharacterNumericConstant(const Token &Tok, bool *Invalid=nullptr) const
Given a Token Tok that is a numeric constant with length 1, return the value of constant as an unsign...
macro_iterator macro_end(bool IncludeExternalMacros=true) const
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, bool *ShadowFlag=nullptr)
std::optional< PreambleSkipInfo > getPreambleSkipInfo() const
void setPreprocessToken(bool Preprocess)
bool isPreprocessedModuleFile() const
Whether the main file is preprocessed module file.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
void HandlePragmaOnce(Token &OnceTok)
HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'.
Definition Pragma.cpp:414
SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros=false, SmallVectorImpl< Token > *ExtraToks=nullptr)
Ensure that the next token is a tok::eod token.
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool isMacroDefined(StringRef Id)
static bool checkModuleIsAvailable(const LangOptions &LangOpts, const TargetInfo &TargetInfo, const Module &M, DiagnosticsEngine &Diags)
Check that the given module is available, producing a diagnostic if not.
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
void SetMacroExpansionOnlyInDirectives()
Disables macro expansion everywhere except for preprocessor directives.
bool hasRecordedPreamble() const
SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Char) const
Given a location that specifies the start of a token, return a new location that specifies a characte...
SourceLocation getPragmaAssumeNonNullLoc() const
The location of the currently-active #pragma clang assume_nonnull begin.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
bool isUnannotatedBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of unannotated tokens is on.
void EnterTokenStream(std::unique_ptr< Token[]> Toks, unsigned NumToks, bool DisableMacroExpansion, bool IsReinject)
void RevertCachedTokens(unsigned N)
When backtracking is enabled and tokens are cached, this allows to revert a specific number of tokens...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
std::optional< std::uint64_t > getStdLibCxxVersion()
void RemovePragmaHandler(PragmaHandler *Handler)
unsigned getTokenCount() const
Get the number of tokens processed so far.
OptionalFileEntryRef LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile)
Given a "Filename" or <Filename> reference, look up the indicated embed resource.
unsigned getMaxTokens() const
Get the max number of tokens before issuing a -Wmax-tokens warning.
SourceLocation getMaxTokensOverrideLoc() const
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
bool hadModuleLoaderFatalFailure() const
static void processPathToFileName(SmallVectorImpl< char > &FileName, const PresumedLoc &PLoc, const LangOptions &LangOpts, const TargetInfo &TI)
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
bool HandleModuleContextualKeyword(Token &Result)
Callback invoked when the lexer sees one of export, import or module token at the start of a line.
const TargetInfo & getTargetInfo() const
FileManager & getFileManager() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
std::string getSpelling(const Token &Tok, bool *Invalid=nullptr) const
Return the 'spelling' of the Tok token.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
friend class VariadicMacroScopeGuard
Module * getCurrentLexerSubmodule() const
Return the submodule owning the file being lexed.
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
MacroInfo * AllocateMacroInfo(SourceLocation L)
Allocate a new MacroInfo object with the provided SourceLocation.
void setDependencyDirectivesGetter(DependencyDirectivesGetter &Get)
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
StringRef getImmediateMacroName(SourceLocation Loc)
Retrieve the name of the immediate macro expansion.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a pragma hdrstop.
bool alreadyIncluded(FileEntryRef File) const
Return true if this header has already been included.
llvm::iterator_range< macro_iterator > macros(bool IncludeExternalMacros=true) const
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
void LexUnexpandedNonComment(Token &Result)
Like LexNonComment, but this disables macro expansion of identifier tokens.
void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Add the specified pragma handler to this preprocessor.
Definition Pragma.cpp:919
llvm::BumpPtrAllocator & getPreprocessorAllocator()
ModuleMacro * getModuleMacro(Module *Mod, const IdentifierInfo *II)
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
bool GetIncludeFilenameSpelling(SourceLocation Loc, StringRef &Buffer)
Turn the specified lexer token into a fully checked and spelled filename, e.g.
PreprocessorLexer * getCurrentFileLexer() const
Return the current file lexer being lexed from.
HeaderSearch & getHeaderSearchInfo() const
void emitMacroExpansionWarnings(const Token &Identifier, bool IsIfnDef=false) const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
void HandlePragmaPopMacro(Token &Tok)
Handle #pragma pop_macro.
Definition Pragma.cpp:657
void ReplaceLastTokenWithAnnotation(const Token &Tok)
Replace the last token with an annotation token.
ExternalPreprocessorSource * getExternalSource() const
bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion)
Module * LeaveSubmodule(bool ForPragma)
const std::string & getPredefines() const
Get the predefines for this processor.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
CodeCompletionHandler * getCodeCompletionHandler() const
Retrieve the current code-completion handler.
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, void *AnnotationVal)
Enter an annotation token into the token stream.
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream.
MacroInfo * getMacroInfo(const IdentifierInfo *II)
void setPredefines(std::string P)
Set the predefines for this Preprocessor.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, SmallVectorImpl< Token > &Suffix, SmallVectorImpl< IdentifierLoc > &Path, bool AllowMacroExpansion, bool IsPartition)
Builtin::Context & getBuiltinInfo()
void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine)
Instruct the preprocessor to skip part of the main source file.
const PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
void ReplacePreviousCachedToken(ArrayRef< Token > NewToks)
Replace token in CachedLexPos - 1 in CachedTokens by the tokens in NewToks.
LangOptions::FPEvalMethodKind getTUFPEvalMethod() const
const LangOptions & getLangOpts() const
bool isImportingCXXNamedModules() const
If we're importing a standard C++20 Named Modules.
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void SetSuppressIncludeNotFoundError(bool Suppress)
static void processPathForFileMacro(SmallVectorImpl< char > &Path, const LangOptions &LangOpts, const TargetInfo &TI)
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
bool isInNamedModule() const
If we are preprocessing a named module.
void EnableBacktrackAtThisPos(bool Unannotated=false)
From the point that this method is called, and until CommitBacktrackedTokens() or Backtrack() is call...
Definition PPCaching.cpp:34
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
void PoisonSEHIdentifiers(bool Poison=true)
bool isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the first token of the macro expansion.
size_t getTotalMemory() const
void setCounterValue(uint32_t V)
void setExternalSource(ExternalPreprocessorSource *Source)
void clearCodeCompletionHandler()
Clear out the code completion handler.
void AddPragmaHandler(PragmaHandler *Handler)
OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, SourceLocation MLoc)
We want to produce a diagnostic at location IncLoc concerning an unreachable effect at location MLoc ...
bool isCodeCompletionReached() const
Returns true if code-completion is enabled and we have hit the code-completion point.
IdentifierInfo * ParsePragmaPushOrPopMacro(Token &Tok)
ParsePragmaPushOrPopMacro - Handle parsing of pragma push_macro/pop_macro.
Definition Pragma.cpp:569
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool getRawToken(SourceLocation Loc, Token &Result, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
bool isNextPPTokenOneOf(Ts... Ks) const
isNextPPTokenOneOf - Check whether the next pp-token is one of the specificed token kind.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
void setPragmasEnabled(bool Enabled)
DefMacroDirective * appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, SourceLocation Loc)
void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments)
Control whether the preprocessor retains comments in output.
bool isAtEndOfMacroExpansion(SourceLocation loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getMainFileFirstPPTokenLoc() const
Get the start location of the first pp-token in main file.
void HandlePragmaMark(Token &MarkTok)
Definition Pragma.cpp:429
void CollectPPImportSuffix(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Collect the tokens of a C++20 pp-import-suffix.
bool getPragmasEnabled() const
void HandlePragmaHdrstop(Token &Tok)
Definition Pragma.cpp:885
PreprocessingRecord * getPreprocessingRecord() const
Retrieve the preprocessing record, or NULL if there is no preprocessing record.
void setEmptylineHandler(EmptylineHandler *Handler)
Set empty line handler.
DiagnosticsEngine & getDiagnostics() const
void HandleCXXModuleDirective(Token Module)
HandleCXXModuleDirective - Handle C++ module declaration directives.
SourceLocation getLastCachedTokenLocation() const
Get the location of the last cached token, suitable for setting the end location of an annotation tok...
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
llvm::DenseSet< const FileEntry * > IncludedFilesSet
unsigned getSpelling(const Token &Tok, const char *&Buffer, bool *Invalid=nullptr) const
Get the spelling of a token into a preallocated buffer, instead of as an std::string.
SelectorTable & getSelectorTable()
void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler)
Remove the specific pragma handler from this preprocessor.
Definition Pragma.cpp:950
SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset=0)
Computes the source location just past the end of the token at this source location.
const llvm::SmallSetVector< Module *, 2 > & getAffectingClangModules() const
Get the set of top-level clang modules that affected preprocessing, but were not imported.
std::optional< LexEmbedParametersResult > LexEmbedParameters(Token &Current, bool ForHasEmbed)
Lex the parameters for an embed directive, returns nullopt on error.
const IncludedFilesSet & getIncludedFiles() const
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void HandlePragmaIncludeAlias(Token &Tok)
Definition Pragma.cpp:692
Module * getModuleForLocation(SourceLocation Loc, bool AllowTextual)
Find the module that owns the source or header file that Loc points to.
uint32_t getCounterValue() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok, SmallVectorImpl< IdentifierLoc > &Path, SmallVectorImpl< Token > &DirToks, bool AllowMacroExpansion, bool IsPartition)
SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const
Get the location of the recorded unterminated #pragma clang assume_nonnull begin in the preamble,...
void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, MacroArgs *Args)
Add a Macro to the top of the include stack and start lexing tokens from it instead of the current bu...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the include of the through header or until after a pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
void markMainFileAsPreprocessedModuleFile()
Mark the main file as a preprocessed module file, then the 'module' and 'import' directive recognitio...
bool LexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Lex a string literal, which may be the concatenation of multiple string literals and may even come fr...
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD)
Add a directive to the macro directive history for this identifier.
Represents an unpacked "presumed" location which can be presented to the user.
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
This table allows us to fully hide how we implement multi-keyword caching.
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Exposes information about the current target.
Definition TargetInfo.h:227
TokenValue(IdentifierInfo *II)
TokenValue(tok::TokenKind Kind)
bool operator==(const Token &Tok) const
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
Public enums and private classes that are part of the SourceManager implementation.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition TokenKinds.h:25
OnOffSwitch
Defines the possible values of an on-off-switch (C99 6.10.6p2).
Definition TokenKinds.h:56
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition TokenKinds.h:101
PPKeywordKind
Provides a namespace for preprocessor keywords which start with a '#' at the beginning of the line.
Definition TokenKinds.h:33
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
@ Conditional
A conditional (?:) operator.
Definition Sema.h:669
detail::SearchDirIteratorImpl< true > ConstSearchDirIterator
@ Create
'create' clause, allowed on Compute and Combined constructs, plus 'data', 'enter data',...
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
MacroUse
Context in which macro name is used.
@ Module
Module linkage, which indicates that the entity can be referred to from other translation units withi...
Definition Linkage.h:54
@ Result
The result type of a method or function.
Definition TypeBase.h:905
TranslationUnitKind
Describes the kind of translation unit being processed.
@ TU_Complete
The translation unit is a complete translation unit.
CustomizableOptional< DirectoryEntryRef > OptionalDirectoryEntryRef
U cast(CodeGen::Address addr)
Definition Address.h:327
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
Helper class to shuttle information about embed directives from the preprocessor to the parser throug...
Describes how and where the pragma was introduced.
Definition Pragma.h:51
PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, bool FoundNonSkipPortion, bool FoundElse, SourceLocation ElseLoc)