clang 20.0.0git
Preprocessor.cpp
Go to the documentation of this file.
1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
31#include "clang/Basic/LLVM.h"
33#include "clang/Basic/Module.h"
41#include "clang/Lex/Lexer.h"
43#include "clang/Lex/MacroArgs.h"
44#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/Pragma.h"
51#include "clang/Lex/Token.h"
53#include "llvm/ADT/APInt.h"
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
56#include "llvm/ADT/STLExtras.h"
57#include "llvm/ADT/SmallVector.h"
58#include "llvm/ADT/StringRef.h"
59#include "llvm/Support/Capacity.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/MemoryBuffer.h"
62#include "llvm/Support/raw_ostream.h"
63#include <algorithm>
64#include <cassert>
65#include <memory>
66#include <optional>
67#include <string>
68#include <utility>
69#include <vector>
70
71using namespace clang;
72
73/// Minimum distance between two check points, in tokens.
74static constexpr unsigned CheckPointStepSize = 1024;
75
76LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
77
79
80Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
81 DiagnosticsEngine &diags, const LangOptions &opts,
82 SourceManager &SM, HeaderSearch &Headers,
83 ModuleLoader &TheModuleLoader,
84 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
86 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
87 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
88 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
89 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
90 // As the language options may have not been loaded yet (when
91 // deserializing an ASTUnit), adding keywords to the identifier table is
92 // deferred to Preprocessor::Initialize().
93 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
94 TUKind(TUKind), SkipMainFilePreamble(0, true),
95 CurSubmoduleState(&NullSubmoduleState) {
96 OwnsHeaderSearch = OwnsHeaders;
97
98 // Default to discarding comments.
99 KeepComments = false;
100 KeepMacroComments = false;
101 SuppressIncludeNotFoundError = false;
102
103 // Macro expansion is enabled.
104 DisableMacroExpansion = false;
105 MacroExpansionInDirectivesOverride = false;
106 InMacroArgs = false;
107 ArgMacro = nullptr;
108 InMacroArgPreExpansion = false;
109 NumCachedTokenLexers = 0;
110 PragmasEnabled = true;
111 ParsingIfOrElifDirective = false;
112 PreprocessedOutput = false;
113
114 // We haven't read anything from the external source.
115 ReadMacrosFromExternalSource = false;
116
117 BuiltinInfo = std::make_unique<Builtin::Context>();
118
119 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
120 // a macro. They get unpoisoned where it is allowed.
121 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
122 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
123 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
124 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
125
126 // Initialize the pragma handlers.
127 RegisterBuiltinPragmas();
128
129 // Initialize builtin macros like __LINE__ and friends.
130 RegisterBuiltinMacros();
131
132 if(LangOpts.Borland) {
133 Ident__exception_info = getIdentifierInfo("_exception_info");
134 Ident___exception_info = getIdentifierInfo("__exception_info");
135 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
136 Ident__exception_code = getIdentifierInfo("_exception_code");
137 Ident___exception_code = getIdentifierInfo("__exception_code");
138 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
139 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
140 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
141 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
142 } else {
143 Ident__exception_info = Ident__exception_code = nullptr;
144 Ident__abnormal_termination = Ident___exception_info = nullptr;
145 Ident___exception_code = Ident___abnormal_termination = nullptr;
146 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
147 Ident_AbnormalTermination = nullptr;
148 }
149
150 // Default incremental processing to -fincremental-extensions, clients can
151 // override with `enableIncrementalProcessing` if desired.
152 IncrementalProcessing = LangOpts.IncrementalExtensions;
153
154 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
156 SkippingUntilPragmaHdrStop = true;
157
158 // If using a PCH with a through header, start skipping tokens.
159 if (!this->PPOpts->PCHThroughHeader.empty() &&
160 !this->PPOpts->ImplicitPCHInclude.empty())
161 SkippingUntilPCHThroughHeader = true;
162
163 if (this->PPOpts->GeneratePreamble)
164 PreambleConditionalStack.startRecording();
165
166 MaxTokens = LangOpts.MaxTokens;
167}
168
170 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
171
172 IncludeMacroStack.clear();
173
174 // Free any cached macro expanders.
175 // This populates MacroArgCache, so all TokenLexers need to be destroyed
176 // before the code below that frees up the MacroArgCache list.
177 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
178 CurTokenLexer.reset();
179
180 // Free any cached MacroArgs.
181 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
182 ArgList = ArgList->deallocate();
183
184 // Delete the header search info, if we own it.
185 if (OwnsHeaderSearch)
186 delete &HeaderInfo;
187}
188
190 const TargetInfo *AuxTarget) {
191 assert((!this->Target || this->Target == &Target) &&
192 "Invalid override of target information");
193 this->Target = &Target;
194
195 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
196 "Invalid override of aux target information.");
197 this->AuxTarget = AuxTarget;
198
199 // Initialize information about built-ins.
200 BuiltinInfo->InitializeTarget(Target, AuxTarget);
201 HeaderInfo.setTarget(Target);
202
203 // Populate the identifier table with info about keywords for the current language.
204 Identifiers.AddKeywords(LangOpts);
205
206 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
207 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
208
209 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
210 // Use setting from TargetInfo.
211 setCurrentFPEvalMethod(SourceLocation(), Target.getFPEvalMethod());
212 else
213 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
214 setCurrentFPEvalMethod(SourceLocation(), getLangOpts().getFPEvalMethod());
215}
216
218 NumEnteredSourceFiles = 0;
219
220 // Reset pragmas
221 PragmaHandlersBackup = std::move(PragmaHandlers);
222 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
223 RegisterBuiltinPragmas();
224
225 // Reset PredefinesFileID
226 PredefinesFileID = FileID();
227}
228
230 NumEnteredSourceFiles = 1;
231
232 PragmaHandlers = std::move(PragmaHandlersBackup);
233}
234
235void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
236 llvm::errs() << tok::getTokenName(Tok.getKind());
237
238 if (!Tok.isAnnotation())
239 llvm::errs() << " '" << getSpelling(Tok) << "'";
240
241 if (!DumpFlags) return;
242
243 llvm::errs() << "\t";
244 if (Tok.isAtStartOfLine())
245 llvm::errs() << " [StartOfLine]";
246 if (Tok.hasLeadingSpace())
247 llvm::errs() << " [LeadingSpace]";
248 if (Tok.isExpandDisabled())
249 llvm::errs() << " [ExpandDisabled]";
250 if (Tok.needsCleaning()) {
251 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
252 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
253 << "']";
254 }
255
256 llvm::errs() << "\tLoc=<";
258 llvm::errs() << ">";
259}
260
262 Loc.print(llvm::errs(), SourceMgr);
263}
264
265void Preprocessor::DumpMacro(const MacroInfo &MI) const {
266 llvm::errs() << "MACRO: ";
267 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
269 llvm::errs() << " ";
270 }
271 llvm::errs() << "\n";
272}
273
275 llvm::errs() << "\n*** Preprocessor Stats:\n";
276 llvm::errs() << NumDirectives << " directives found:\n";
277 llvm::errs() << " " << NumDefined << " #define.\n";
278 llvm::errs() << " " << NumUndefined << " #undef.\n";
279 llvm::errs() << " #include/#include_next/#import:\n";
280 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
281 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
282 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
283 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
284 llvm::errs() << " " << NumEndif << " #endif.\n";
285 llvm::errs() << " " << NumPragma << " #pragma.\n";
286 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
287
288 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
289 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
290 << NumFastMacroExpanded << " on the fast path.\n";
291 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
292 << " token paste (##) operations performed, "
293 << NumFastTokenPaste << " on the fast path.\n";
294
295 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
296
297 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
298 llvm::errs() << "\n Macro Expanded Tokens: "
299 << llvm::capacity_in_bytes(MacroExpandedTokens);
300 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
301 // FIXME: List information for all submodules.
302 llvm::errs() << "\n Macros: "
303 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
304 llvm::errs() << "\n #pragma push_macro Info: "
305 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
306 llvm::errs() << "\n Poison Reasons: "
307 << llvm::capacity_in_bytes(PoisonReasons);
308 llvm::errs() << "\n Comment Handlers: "
309 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
310}
311
313Preprocessor::macro_begin(bool IncludeExternalMacros) const {
314 if (IncludeExternalMacros && ExternalSource &&
315 !ReadMacrosFromExternalSource) {
316 ReadMacrosFromExternalSource = true;
317 ExternalSource->ReadDefinedMacros();
318 }
319
320 // Make sure we cover all macros in visible modules.
321 for (const ModuleMacro &Macro : ModuleMacros)
322 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
323
324 return CurSubmoduleState->Macros.begin();
325}
326
328 return BP.getTotalMemory()
329 + llvm::capacity_in_bytes(MacroExpandedTokens)
330 + Predefines.capacity() /* Predefines buffer. */
331 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
332 // and ModuleMacros.
333 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
334 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
335 + llvm::capacity_in_bytes(PoisonReasons)
336 + llvm::capacity_in_bytes(CommentHandlers);
337}
338
340Preprocessor::macro_end(bool IncludeExternalMacros) const {
341 if (IncludeExternalMacros && ExternalSource &&
342 !ReadMacrosFromExternalSource) {
343 ReadMacrosFromExternalSource = true;
344 ExternalSource->ReadDefinedMacros();
345 }
346
347 return CurSubmoduleState->Macros.end();
348}
349
350/// Compares macro tokens with a specified token value sequence.
351static bool MacroDefinitionEquals(const MacroInfo *MI,
352 ArrayRef<TokenValue> Tokens) {
353 return Tokens.size() == MI->getNumTokens() &&
354 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
355}
356
359 ArrayRef<TokenValue> Tokens) const {
360 SourceLocation BestLocation;
361 StringRef BestSpelling;
363 I != E; ++I) {
365 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
366 if (!Def || !Def.getMacroInfo())
367 continue;
368 if (!Def.getMacroInfo()->isObjectLike())
369 continue;
370 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
371 continue;
372 SourceLocation Location = Def.getLocation();
373 // Choose the macro defined latest.
374 if (BestLocation.isInvalid() ||
375 (Location.isValid() &&
376 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
377 BestLocation = Location;
378 BestSpelling = I->first->getName();
379 }
380 }
381 return BestSpelling;
382}
383
385 if (CurLexer)
386 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
387 ? CLK_DependencyDirectivesLexer
388 : CLK_Lexer;
389 else if (CurTokenLexer)
390 CurLexerCallback = CLK_TokenLexer;
391 else
392 CurLexerCallback = CLK_CachingLexer;
393}
394
396 unsigned CompleteLine,
397 unsigned CompleteColumn) {
398 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
399 assert(!CodeCompletionFile && "Already set");
400
401 // Load the actual file's contents.
402 std::optional<llvm::MemoryBufferRef> Buffer =
404 if (!Buffer)
405 return true;
406
407 // Find the byte position of the truncation point.
408 const char *Position = Buffer->getBufferStart();
409 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
410 for (; *Position; ++Position) {
411 if (*Position != '\r' && *Position != '\n')
412 continue;
413
414 // Eat \r\n or \n\r as a single line.
415 if ((Position[1] == '\r' || Position[1] == '\n') &&
416 Position[0] != Position[1])
417 ++Position;
418 ++Position;
419 break;
420 }
421 }
422
423 Position += CompleteColumn - 1;
424
425 // If pointing inside the preamble, adjust the position at the beginning of
426 // the file after the preamble.
427 if (SkipMainFilePreamble.first &&
428 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
429 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
430 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
431 }
432
433 if (Position > Buffer->getBufferEnd())
434 Position = Buffer->getBufferEnd();
435
436 CodeCompletionFile = File;
437 CodeCompletionOffset = Position - Buffer->getBufferStart();
438
439 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
440 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
441 char *NewBuf = NewBuffer->getBufferStart();
442 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
443 *NewPos = '\0';
444 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
445 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
446
447 return false;
448}
449
451 bool IsAngled) {
453 if (CodeComplete)
454 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
455}
456
459 if (CodeComplete)
460 CodeComplete->CodeCompleteNaturalLanguage();
461}
462
463/// getSpelling - This method is used to get the spelling of a token into a
464/// SmallVector. Note that the returned StringRef may not point to the
465/// supplied buffer if a copy can be avoided.
466StringRef Preprocessor::getSpelling(const Token &Tok,
467 SmallVectorImpl<char> &Buffer,
468 bool *Invalid) const {
469 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
470 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
471 // Try the fast path.
472 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
473 return II->getName();
474 }
475
476 // Resize the buffer if we need to copy into it.
477 if (Tok.needsCleaning())
478 Buffer.resize(Tok.getLength());
479
480 const char *Ptr = Buffer.data();
481 unsigned Len = getSpelling(Tok, Ptr, Invalid);
482 return StringRef(Ptr, Len);
483}
484
485/// CreateString - Plop the specified string into a scratch buffer and return a
486/// location for it. If specified, the source location provides a source
487/// location for the token.
488void Preprocessor::CreateString(StringRef Str, Token &Tok,
489 SourceLocation ExpansionLocStart,
490 SourceLocation ExpansionLocEnd) {
491 Tok.setLength(Str.size());
492
493 const char *DestPtr;
494 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
495
496 if (ExpansionLocStart.isValid())
497 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
498 ExpansionLocEnd, Str.size());
499 Tok.setLocation(Loc);
500
501 // If this is a raw identifier or a literal token, set the pointer data.
502 if (Tok.is(tok::raw_identifier))
503 Tok.setRawIdentifierData(DestPtr);
504 else if (Tok.isLiteral())
505 Tok.setLiteralData(DestPtr);
506}
507
509 auto &SM = getSourceManager();
510 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
511 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
512 bool Invalid = false;
513 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
514 if (Invalid)
515 return SourceLocation();
516
517 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
518 const char *DestPtr;
519 SourceLocation Spelling =
520 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
521 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
522}
523
525 if (!getLangOpts().isCompilingModule())
526 return nullptr;
527
528 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
529}
530
532 if (!getLangOpts().isCompilingModuleImplementation())
533 return nullptr;
534
535 return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName);
536}
537
538//===----------------------------------------------------------------------===//
539// Preprocessor Initialization Methods
540//===----------------------------------------------------------------------===//
541
542/// EnterMainSourceFile - Enter the specified FileID as the main source file,
543/// which implicitly adds the builtin defines etc.
545 // We do not allow the preprocessor to reenter the main file. Doing so will
546 // cause FileID's to accumulate information from both runs (e.g. #line
547 // information) and predefined macros aren't guaranteed to be set properly.
548 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
549 FileID MainFileID = SourceMgr.getMainFileID();
550
551 // If MainFileID is loaded it means we loaded an AST file, no need to enter
552 // a main file.
553 if (!SourceMgr.isLoadedFileID(MainFileID)) {
554 // Enter the main file source buffer.
555 EnterSourceFile(MainFileID, nullptr, SourceLocation());
556
557 // If we've been asked to skip bytes in the main file (e.g., as part of a
558 // precompiled preamble), do so now.
559 if (SkipMainFilePreamble.first > 0)
560 CurLexer->SetByteOffset(SkipMainFilePreamble.first,
561 SkipMainFilePreamble.second);
562
563 // Tell the header info that the main file was entered. If the file is later
564 // #imported, it won't be re-entered.
565 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID))
566 markIncluded(*FE);
567 }
568
569 // Preprocess Predefines to populate the initial preprocessor state.
570 std::unique_ptr<llvm::MemoryBuffer> SB =
571 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
572 assert(SB && "Cannot create predefined source buffer");
573 FileID FID = SourceMgr.createFileID(std::move(SB));
574 assert(FID.isValid() && "Could not create FileID for predefines?");
575 setPredefinesFileID(FID);
576
577 // Start parsing the predefines.
578 EnterSourceFile(FID, nullptr, SourceLocation());
579
580 if (!PPOpts->PCHThroughHeader.empty()) {
581 // Lookup and save the FileID for the through header. If it isn't found
582 // in the search path, it's a fatal error.
584 SourceLocation(), PPOpts->PCHThroughHeader,
585 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
586 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
587 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
588 /*IsFrameworkFound=*/nullptr);
589 if (!File) {
590 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
591 << PPOpts->PCHThroughHeader;
592 return;
593 }
594 setPCHThroughHeaderFileID(
596 }
597
598 // Skip tokens from the Predefines and if needed the main file.
599 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
600 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
602}
603
604void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
605 assert(PCHThroughHeaderFileID.isInvalid() &&
606 "PCHThroughHeaderFileID already set!");
607 PCHThroughHeaderFileID = FID;
608}
609
611 assert(PCHThroughHeaderFileID.isValid() &&
612 "Invalid PCH through header FileID");
613 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
614}
615
617 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
618 PCHThroughHeaderFileID.isValid();
619}
620
622 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
623 PCHThroughHeaderFileID.isValid();
624}
625
627 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
628}
629
631 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
632}
633
634/// Skip tokens until after the #include of the through header or
635/// until after a #pragma hdrstop is seen. Tokens in the predefines file
636/// and the main file may be skipped. If the end of the predefines file
637/// is reached, skipping continues into the main file. If the end of the
638/// main file is reached, it's a fatal error.
640 bool ReachedMainFileEOF = false;
641 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
642 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
643 Token Tok;
644 while (true) {
645 bool InPredefines =
646 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
647 CurLexerCallback(*this, Tok);
648 if (Tok.is(tok::eof) && !InPredefines) {
649 ReachedMainFileEOF = true;
650 break;
651 }
652 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
653 break;
654 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
655 break;
656 }
657 if (ReachedMainFileEOF) {
658 if (UsingPCHThroughHeader)
659 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
660 << PPOpts->PCHThroughHeader << 1;
661 else if (!PPOpts->PCHWithHdrStopCreate)
662 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
663 }
664}
665
666void Preprocessor::replayPreambleConditionalStack() {
667 // Restore the conditional stack from the preamble, if there is one.
668 if (PreambleConditionalStack.isReplaying()) {
669 assert(CurPPLexer &&
670 "CurPPLexer is null when calling replayPreambleConditionalStack.");
671 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
672 PreambleConditionalStack.doneReplaying();
673 if (PreambleConditionalStack.reachedEOFWhileSkipping())
674 SkipExcludedConditionalBlock(
675 PreambleConditionalStack.SkipInfo->HashTokenLoc,
676 PreambleConditionalStack.SkipInfo->IfTokenLoc,
677 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
678 PreambleConditionalStack.SkipInfo->FoundElse,
679 PreambleConditionalStack.SkipInfo->ElseLoc);
680 }
681}
682
684 // Notify the client that we reached the end of the source file.
685 if (Callbacks)
686 Callbacks->EndOfMainFile();
687}
688
689//===----------------------------------------------------------------------===//
690// Lexer Event Handling.
691//===----------------------------------------------------------------------===//
692
693/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
694/// identifier information for the token and install it into the token,
695/// updating the token kind accordingly.
697 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
698
699 // Look up this token, see if it is a macro, or if it is a language keyword.
700 IdentifierInfo *II;
701 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
702 // No cleaning needed, just use the characters from the lexed buffer.
703 II = getIdentifierInfo(Identifier.getRawIdentifier());
704 } else {
705 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
706 SmallString<64> IdentifierBuffer;
707 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
708
709 if (Identifier.hasUCN()) {
710 SmallString<64> UCNIdentifierBuffer;
711 expandUCNs(UCNIdentifierBuffer, CleanedStr);
712 II = getIdentifierInfo(UCNIdentifierBuffer);
713 } else {
714 II = getIdentifierInfo(CleanedStr);
715 }
716 }
717
718 // Update the token info (identifier info and appropriate token kind).
719 // FIXME: the raw_identifier may contain leading whitespace which is removed
720 // from the cleaned identifier token. The SourceLocation should be updated to
721 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
722 // line continuation before 'B') is parsed as a single tok::raw_identifier and
723 // is cleaned to tok::identifier "B". After cleaning the token's length is
724 // still 3 and the SourceLocation refers to the location of the backslash.
725 Identifier.setIdentifierInfo(II);
726 Identifier.setKind(II->getTokenID());
727
728 return II;
729}
730
732 PoisonReasons[II] = DiagID;
733}
734
736 assert(Ident__exception_code && Ident__exception_info);
737 assert(Ident___exception_code && Ident___exception_info);
738 Ident__exception_code->setIsPoisoned(Poison);
739 Ident___exception_code->setIsPoisoned(Poison);
740 Ident_GetExceptionCode->setIsPoisoned(Poison);
741 Ident__exception_info->setIsPoisoned(Poison);
742 Ident___exception_info->setIsPoisoned(Poison);
743 Ident_GetExceptionInfo->setIsPoisoned(Poison);
744 Ident__abnormal_termination->setIsPoisoned(Poison);
745 Ident___abnormal_termination->setIsPoisoned(Poison);
746 Ident_AbnormalTermination->setIsPoisoned(Poison);
747}
748
750 assert(Identifier.getIdentifierInfo() &&
751 "Can't handle identifiers without identifier info!");
752 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
753 PoisonReasons.find(Identifier.getIdentifierInfo());
754 if(it == PoisonReasons.end())
755 Diag(Identifier, diag::err_pp_used_poisoned_id);
756 else
757 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
758}
759
760void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
761 assert(II.isOutOfDate() && "not out of date");
763}
764
765/// HandleIdentifier - This callback is invoked when the lexer reads an
766/// identifier. This callback looks up the identifier in the map and/or
767/// potentially macro expands it or turns it into a named token (like 'for').
768///
769/// Note that callers of this method are guarded by checking the
770/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
771/// IdentifierInfo methods that compute these properties will need to change to
772/// match.
774 assert(Identifier.getIdentifierInfo() &&
775 "Can't handle identifiers without identifier info!");
776
777 IdentifierInfo &II = *Identifier.getIdentifierInfo();
778
779 // If the information about this identifier is out of date, update it from
780 // the external source.
781 // We have to treat __VA_ARGS__ in a special way, since it gets
782 // serialized with isPoisoned = true, but our preprocessor may have
783 // unpoisoned it if we're defining a C99 macro.
784 if (II.isOutOfDate()) {
785 bool CurrentIsPoisoned = false;
786 const bool IsSpecialVariadicMacro =
787 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
788 if (IsSpecialVariadicMacro)
789 CurrentIsPoisoned = II.isPoisoned();
790
791 updateOutOfDateIdentifier(II);
792 Identifier.setKind(II.getTokenID());
793
794 if (IsSpecialVariadicMacro)
795 II.setIsPoisoned(CurrentIsPoisoned);
796 }
797
798 // If this identifier was poisoned, and if it was not produced from a macro
799 // expansion, emit an error.
800 if (II.isPoisoned() && CurPPLexer) {
802 }
803
804 // If this is a macro to be expanded, do it.
805 if (const MacroDefinition MD = getMacroDefinition(&II)) {
806 const auto *MI = MD.getMacroInfo();
807 assert(MI && "macro definition with no macro info?");
808 if (!DisableMacroExpansion) {
809 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
810 // C99 6.10.3p10: If the preprocessing token immediately after the
811 // macro name isn't a '(', this macro should not be expanded.
812 if (!MI->isFunctionLike() || isNextPPTokenLParen())
813 return HandleMacroExpandedIdentifier(Identifier, MD);
814 } else {
815 // C99 6.10.3.4p2 says that a disabled macro may never again be
816 // expanded, even if it's in a context where it could be expanded in the
817 // future.
819 if (MI->isObjectLike() || isNextPPTokenLParen())
820 Diag(Identifier, diag::pp_disabled_macro_expansion);
821 }
822 }
823 }
824
825 // If this identifier is a keyword in a newer Standard or proposed Standard,
826 // produce a warning. Don't warn if we're not considering macro expansion,
827 // since this identifier might be the name of a macro.
828 // FIXME: This warning is disabled in cases where it shouldn't be, like
829 // "#define constexpr constexpr", "int constexpr;"
830 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
831 Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts()))
832 << II.getName();
833 // Don't diagnose this keyword again in this translation unit.
834 II.setIsFutureCompatKeyword(false);
835 }
836
837 // If this is an extension token, diagnose its use.
838 // We avoid diagnosing tokens that originate from macro definitions.
839 // FIXME: This warning is disabled in cases where it shouldn't be,
840 // like "#define TY typeof", "TY(1) x".
841 if (II.isExtensionToken() && !DisableMacroExpansion)
842 Diag(Identifier, diag::ext_token_used);
843
844 // If this is the 'import' contextual keyword following an '@', note
845 // that the next token indicates a module name.
846 //
847 // Note that we do not treat 'import' as a contextual
848 // keyword when we're in a caching lexer, because caching lexers only get
849 // used in contexts where import declarations are disallowed.
850 //
851 // Likewise if this is the standard C++ import keyword.
852 if (((LastTokenWasAt && II.isModulesImport()) ||
853 Identifier.is(tok::kw_import)) &&
854 !InMacroArgs && !DisableMacroExpansion &&
855 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
856 CurLexerCallback != CLK_CachingLexer) {
857 ModuleImportLoc = Identifier.getLocation();
858 NamedModuleImportPath.clear();
859 IsAtImport = true;
860 ModuleImportExpectsIdentifier = true;
861 CurLexerCallback = CLK_LexAfterModuleImport;
862 }
863 return true;
864}
865
867 ++LexLevel;
868
869 // We loop here until a lex function returns a token; this avoids recursion.
870 while (!CurLexerCallback(*this, Result))
871 ;
872
873 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
874 return;
875
876 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
877 // Remember the identifier before code completion token.
878 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
879 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
880 // Set IdenfitierInfo to null to avoid confusing code that handles both
881 // identifiers and completion tokens.
882 Result.setIdentifierInfo(nullptr);
883 }
884
885 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
886 // if this token is being produced as a result of phase 4 of translation.
887 // Update TrackGMFState to decide if we are currently in a Global Module
888 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
889 // depends on the prevailing StdCXXImportSeq state in two cases.
890 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
891 !Result.getFlag(Token::IsReinjected)) {
892 switch (Result.getKind()) {
893 case tok::l_paren: case tok::l_square: case tok::l_brace:
894 StdCXXImportSeqState.handleOpenBracket();
895 break;
896 case tok::r_paren: case tok::r_square:
897 StdCXXImportSeqState.handleCloseBracket();
898 break;
899 case tok::r_brace:
900 StdCXXImportSeqState.handleCloseBrace();
901 break;
902#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
903// For `#pragma ...` mimic ';'.
904#include "clang/Basic/TokenKinds.def"
905#undef PRAGMA_ANNOTATION
906 // This token is injected to represent the translation of '#include "a.h"'
907 // into "import a.h;". Mimic the notional ';'.
908 case tok::annot_module_include:
909 case tok::semi:
910 TrackGMFState.handleSemi();
911 StdCXXImportSeqState.handleSemi();
912 ModuleDeclState.handleSemi();
913 break;
914 case tok::header_name:
915 case tok::annot_header_unit:
916 StdCXXImportSeqState.handleHeaderName();
917 break;
918 case tok::kw_export:
919 TrackGMFState.handleExport();
920 StdCXXImportSeqState.handleExport();
921 ModuleDeclState.handleExport();
922 break;
923 case tok::colon:
924 ModuleDeclState.handleColon();
925 break;
926 case tok::period:
927 ModuleDeclState.handlePeriod();
928 break;
929 case tok::identifier:
930 // Check "import" and "module" when there is no open bracket. The two
931 // identifiers are not meaningful with open brackets.
932 if (StdCXXImportSeqState.atTopLevel()) {
933 if (Result.getIdentifierInfo()->isModulesImport()) {
934 TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
935 StdCXXImportSeqState.handleImport();
936 if (StdCXXImportSeqState.afterImportSeq()) {
937 ModuleImportLoc = Result.getLocation();
938 NamedModuleImportPath.clear();
939 IsAtImport = false;
940 ModuleImportExpectsIdentifier = true;
941 CurLexerCallback = CLK_LexAfterModuleImport;
942 }
943 break;
944 } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
945 TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
946 ModuleDeclState.handleModule();
947 break;
948 }
949 }
950 ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
951 if (ModuleDeclState.isModuleCandidate())
952 break;
953 [[fallthrough]];
954 default:
955 TrackGMFState.handleMisc();
956 StdCXXImportSeqState.handleMisc();
957 ModuleDeclState.handleMisc();
958 break;
959 }
960 }
961
962 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
963 CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr);
964 CheckPointCounter = 0;
965 }
966
967 LastTokenWasAt = Result.is(tok::at);
968 --LexLevel;
969
970 if ((LexLevel == 0 || PreprocessToken) &&
971 !Result.getFlag(Token::IsReinjected)) {
972 if (LexLevel == 0)
973 ++TokenCount;
974 if (OnToken)
975 OnToken(Result);
976 }
977}
978
979void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
980 while (1) {
981 Token Tok;
982 Lex(Tok);
983 if (Tok.isOneOf(tok::unknown, tok::eof, tok::eod,
984 tok::annot_repl_input_end))
985 break;
986 if (Tokens != nullptr)
987 Tokens->push_back(Tok);
988 }
989}
990
991/// Lex a header-name token (including one formed from header-name-tokens if
992/// \p AllowMacroExpansion is \c true).
993///
994/// \param FilenameTok Filled in with the next token. On success, this will
995/// be either a header_name token. On failure, it will be whatever other
996/// token was found instead.
997/// \param AllowMacroExpansion If \c true, allow the header name to be formed
998/// by macro expansion (concatenating tokens as necessary if the first
999/// token is a '<').
1000/// \return \c true if we reached EOD or EOF while looking for a > token in
1001/// a concatenated header name and diagnosed it. \c false otherwise.
1002bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1003 // Lex using header-name tokenization rules if tokens are being lexed from
1004 // a file. Just grab a token normally if we're in a macro expansion.
1005 if (CurPPLexer)
1006 CurPPLexer->LexIncludeFilename(FilenameTok);
1007 else
1008 Lex(FilenameTok);
1009
1010 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1011 // case, glue the tokens together into an angle_string_literal token.
1012 SmallString<128> FilenameBuffer;
1013 if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
1014 bool StartOfLine = FilenameTok.isAtStartOfLine();
1015 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1016 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1017
1018 SourceLocation Start = FilenameTok.getLocation();
1019 SourceLocation End;
1020 FilenameBuffer.push_back('<');
1021
1022 // Consume tokens until we find a '>'.
1023 // FIXME: A header-name could be formed starting or ending with an
1024 // alternative token. It's not clear whether that's ill-formed in all
1025 // cases.
1026 while (FilenameTok.isNot(tok::greater)) {
1027 Lex(FilenameTok);
1028 if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1029 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1030 Diag(Start, diag::note_matching) << tok::less;
1031 return true;
1032 }
1033
1034 End = FilenameTok.getLocation();
1035
1036 // FIXME: Provide code completion for #includes.
1037 if (FilenameTok.is(tok::code_completion)) {
1039 Lex(FilenameTok);
1040 continue;
1041 }
1042
1043 // Append the spelling of this token to the buffer. If there was a space
1044 // before it, add it now.
1045 if (FilenameTok.hasLeadingSpace())
1046 FilenameBuffer.push_back(' ');
1047
1048 // Get the spelling of the token, directly into FilenameBuffer if
1049 // possible.
1050 size_t PreAppendSize = FilenameBuffer.size();
1051 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1052
1053 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1054 unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1055
1056 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1057 if (BufPtr != &FilenameBuffer[PreAppendSize])
1058 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1059
1060 // Resize FilenameBuffer to the correct size.
1061 if (FilenameTok.getLength() != ActualLen)
1062 FilenameBuffer.resize(PreAppendSize + ActualLen);
1063 }
1064
1065 FilenameTok.startToken();
1066 FilenameTok.setKind(tok::header_name);
1067 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1068 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1069 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1070 CreateString(FilenameBuffer, FilenameTok, Start, End);
1071 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1072 // Convert a string-literal token of the form " h-char-sequence "
1073 // (produced by macro expansion) into a header-name token.
1074 //
1075 // The rules for header-names don't quite match the rules for
1076 // string-literals, but all the places where they differ result in
1077 // undefined behavior, so we can and do treat them the same.
1078 //
1079 // A string-literal with a prefix or suffix is not translated into a
1080 // header-name. This could theoretically be observable via the C++20
1081 // context-sensitive header-name formation rules.
1082 StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1083 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1084 FilenameTok.setKind(tok::header_name);
1085 }
1086
1087 return false;
1088}
1089
1090/// Collect the tokens of a C++20 pp-import-suffix.
1092 // FIXME: For error recovery, consider recognizing attribute syntax here
1093 // and terminating / diagnosing a missing semicolon if we find anything
1094 // else? (Can we leave that to the parser?)
1095 unsigned BracketDepth = 0;
1096 while (true) {
1097 Toks.emplace_back();
1098 Lex(Toks.back());
1099
1100 switch (Toks.back().getKind()) {
1101 case tok::l_paren: case tok::l_square: case tok::l_brace:
1102 ++BracketDepth;
1103 break;
1104
1105 case tok::r_paren: case tok::r_square: case tok::r_brace:
1106 if (BracketDepth == 0)
1107 return;
1108 --BracketDepth;
1109 break;
1110
1111 case tok::semi:
1112 if (BracketDepth == 0)
1113 return;
1114 break;
1115
1116 case tok::eof:
1117 return;
1118
1119 default:
1120 break;
1121 }
1122 }
1123}
1124
1125
1126/// Lex a token following the 'import' contextual keyword.
1127///
1128/// pp-import: [C++20]
1129/// import header-name pp-import-suffix[opt] ;
1130/// import header-name-tokens pp-import-suffix[opt] ;
1131/// [ObjC] @ import module-name ;
1132/// [Clang] import module-name ;
1133///
1134/// header-name-tokens:
1135/// string-literal
1136/// < [any sequence of preprocessing-tokens other than >] >
1137///
1138/// module-name:
1139/// module-name-qualifier[opt] identifier
1140///
1141/// module-name-qualifier
1142/// module-name-qualifier[opt] identifier .
1143///
1144/// We respond to a pp-import by importing macros from the named module.
1146 // Figure out what kind of lexer we actually have.
1148
1149 // Lex the next token. The header-name lexing rules are used at the start of
1150 // a pp-import.
1151 //
1152 // For now, we only support header-name imports in C++20 mode.
1153 // FIXME: Should we allow this in all language modes that support an import
1154 // declaration as an extension?
1155 if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1156 if (LexHeaderName(Result))
1157 return true;
1158
1159 if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
1160 std::string Name = ModuleDeclState.getPrimaryName().str();
1161 Name += ":";
1162 NamedModuleImportPath.push_back(
1163 {getIdentifierInfo(Name), Result.getLocation()});
1164 CurLexerCallback = CLK_LexAfterModuleImport;
1165 return true;
1166 }
1167 } else {
1168 Lex(Result);
1169 }
1170
1171 // Allocate a holding buffer for a sequence of tokens and introduce it into
1172 // the token stream.
1173 auto EnterTokens = [this](ArrayRef<Token> Toks) {
1174 auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1175 std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1176 EnterTokenStream(std::move(ToksCopy), Toks.size(),
1177 /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1178 };
1179
1180 bool ImportingHeader = Result.is(tok::header_name);
1181 // Check for a header-name.
1183 if (ImportingHeader) {
1184 // Enter the header-name token into the token stream; a Lex action cannot
1185 // both return a token and cache tokens (doing so would corrupt the token
1186 // cache if the call to Lex comes from CachingLex / PeekAhead).
1187 Suffix.push_back(Result);
1188
1189 // Consume the pp-import-suffix and expand any macros in it now. We'll add
1190 // it back into the token stream later.
1191 CollectPpImportSuffix(Suffix);
1192 if (Suffix.back().isNot(tok::semi)) {
1193 // This is not a pp-import after all.
1194 EnterTokens(Suffix);
1195 return false;
1196 }
1197
1198 // C++2a [cpp.module]p1:
1199 // The ';' preprocessing-token terminating a pp-import shall not have
1200 // been produced by macro replacement.
1201 SourceLocation SemiLoc = Suffix.back().getLocation();
1202 if (SemiLoc.isMacroID())
1203 Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1204
1205 // Reconstitute the import token.
1206 Token ImportTok;
1207 ImportTok.startToken();
1208 ImportTok.setKind(tok::kw_import);
1209 ImportTok.setLocation(ModuleImportLoc);
1210 ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
1211 ImportTok.setLength(6);
1212
1213 auto Action = HandleHeaderIncludeOrImport(
1214 /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
1215 switch (Action.Kind) {
1216 case ImportAction::None:
1217 break;
1218
1219 case ImportAction::ModuleBegin:
1220 // Let the parser know we're textually entering the module.
1221 Suffix.emplace_back();
1222 Suffix.back().startToken();
1223 Suffix.back().setKind(tok::annot_module_begin);
1224 Suffix.back().setLocation(SemiLoc);
1225 Suffix.back().setAnnotationEndLoc(SemiLoc);
1226 Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1227 [[fallthrough]];
1228
1229 case ImportAction::ModuleImport:
1230 case ImportAction::HeaderUnitImport:
1231 case ImportAction::SkippedModuleImport:
1232 // We chose to import (or textually enter) the file. Convert the
1233 // header-name token into a header unit annotation token.
1234 Suffix[0].setKind(tok::annot_header_unit);
1235 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1236 Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1237 // FIXME: Call the moduleImport callback?
1238 break;
1239 case ImportAction::Failure:
1240 assert(TheModuleLoader.HadFatalFailure &&
1241 "This should be an early exit only to a fatal error");
1242 Result.setKind(tok::eof);
1243 CurLexer->cutOffLexing();
1244 EnterTokens(Suffix);
1245 return true;
1246 }
1247
1248 EnterTokens(Suffix);
1249 return false;
1250 }
1251
1252 // The token sequence
1253 //
1254 // import identifier (. identifier)*
1255 //
1256 // indicates a module import directive. We already saw the 'import'
1257 // contextual keyword, so now we're looking for the identifiers.
1258 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1259 // We expected to see an identifier here, and we did; continue handling
1260 // identifiers.
1261 NamedModuleImportPath.push_back(
1262 std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
1263 ModuleImportExpectsIdentifier = false;
1264 CurLexerCallback = CLK_LexAfterModuleImport;
1265 return true;
1266 }
1267
1268 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1269 // see the next identifier. (We can also see a '[[' that begins an
1270 // attribute-specifier-seq here under the Standard C++ Modules.)
1271 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1272 ModuleImportExpectsIdentifier = true;
1273 CurLexerCallback = CLK_LexAfterModuleImport;
1274 return true;
1275 }
1276
1277 // If we didn't recognize a module name at all, this is not a (valid) import.
1278 if (NamedModuleImportPath.empty() || Result.is(tok::eof))
1279 return true;
1280
1281 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1282 // at the semicolon already.
1283 SourceLocation SemiLoc = Result.getLocation();
1284 if (Result.isNot(tok::semi)) {
1285 Suffix.push_back(Result);
1286 CollectPpImportSuffix(Suffix);
1287 if (Suffix.back().isNot(tok::semi)) {
1288 // This is not an import after all.
1289 EnterTokens(Suffix);
1290 return false;
1291 }
1292 SemiLoc = Suffix.back().getLocation();
1293 }
1294
1295 // Under the standard C++ Modules, the dot is just part of the module name,
1296 // and not a real hierarchy separator. Flatten such module names now.
1297 //
1298 // FIXME: Is this the right level to be performing this transformation?
1299 std::string FlatModuleName;
1300 if (getLangOpts().CPlusPlusModules) {
1301 for (auto &Piece : NamedModuleImportPath) {
1302 // If the FlatModuleName ends with colon, it implies it is a partition.
1303 if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
1304 FlatModuleName += ".";
1305 FlatModuleName += Piece.first->getName();
1306 }
1307 SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
1308 NamedModuleImportPath.clear();
1309 NamedModuleImportPath.push_back(
1310 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
1311 }
1312
1313 Module *Imported = nullptr;
1314 // We don't/shouldn't load the standard c++20 modules when preprocessing.
1315 if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
1316 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
1317 NamedModuleImportPath,
1319 /*IsInclusionDirective=*/false);
1320 if (Imported)
1321 makeModuleVisible(Imported, SemiLoc);
1322 }
1323
1324 if (Callbacks)
1325 Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
1326
1327 if (!Suffix.empty()) {
1328 EnterTokens(Suffix);
1329 return false;
1330 }
1331 return true;
1332}
1333
1335 CurSubmoduleState->VisibleModules.setVisible(
1336 M, Loc, [](Module *) {},
1337 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1338 // FIXME: Include the path in the diagnostic.
1339 // FIXME: Include the import location for the conflicting module.
1340 Diag(ModuleImportLoc, diag::warn_module_conflict)
1341 << Path[0]->getFullModuleName()
1342 << Conflict->getFullModuleName()
1343 << Message;
1344 });
1345
1346 // Add this module to the imports list of the currently-built submodule.
1347 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1348 BuildingSubmoduleStack.back().M->Imports.insert(M);
1349}
1350
1352 const char *DiagnosticTag,
1353 bool AllowMacroExpansion) {
1354 // We need at least one string literal.
1355 if (Result.isNot(tok::string_literal)) {
1356 Diag(Result, diag::err_expected_string_literal)
1357 << /*Source='in...'*/0 << DiagnosticTag;
1358 return false;
1359 }
1360
1361 // Lex string literal tokens, optionally with macro expansion.
1362 SmallVector<Token, 4> StrToks;
1363 do {
1364 StrToks.push_back(Result);
1365
1366 if (Result.hasUDSuffix())
1367 Diag(Result, diag::err_invalid_string_udl);
1368
1369 if (AllowMacroExpansion)
1370 Lex(Result);
1371 else
1373 } while (Result.is(tok::string_literal));
1374
1375 // Concatenate and parse the strings.
1376 StringLiteralParser Literal(StrToks, *this);
1377 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1378
1379 if (Literal.hadError)
1380 return false;
1381
1382 if (Literal.Pascal) {
1383 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1384 << /*Source='in...'*/0 << DiagnosticTag;
1385 return false;
1386 }
1387
1388 String = std::string(Literal.GetString());
1389 return true;
1390}
1391
1393 assert(Tok.is(tok::numeric_constant));
1394 SmallString<8> IntegerBuffer;
1395 bool NumberInvalid = false;
1396 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1397 if (NumberInvalid)
1398 return false;
1399 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1401 getDiagnostics());
1402 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1403 return false;
1404 llvm::APInt APVal(64, 0);
1405 if (Literal.GetIntegerValue(APVal))
1406 return false;
1407 Lex(Tok);
1408 Value = APVal.getLimitedValue();
1409 return true;
1410}
1411
1413 assert(Handler && "NULL comment handler");
1414 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1415 "Comment handler already registered");
1416 CommentHandlers.push_back(Handler);
1417}
1418
1420 std::vector<CommentHandler *>::iterator Pos =
1421 llvm::find(CommentHandlers, Handler);
1422 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1423 CommentHandlers.erase(Pos);
1424}
1425
1427 bool AnyPendingTokens = false;
1428 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1429 HEnd = CommentHandlers.end();
1430 H != HEnd; ++H) {
1431 if ((*H)->HandleComment(*this, Comment))
1432 AnyPendingTokens = true;
1433 }
1434 if (!AnyPendingTokens || getCommentRetentionState())
1435 return false;
1436 Lex(result);
1437 return true;
1438}
1439
1440void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1441 const MacroAnnotations &A =
1442 getMacroAnnotations(Identifier.getIdentifierInfo());
1443 assert(A.DeprecationInfo &&
1444 "Macro deprecation warning without recorded annotation!");
1445 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1446 if (Info.Message.empty())
1447 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1448 << Identifier.getIdentifierInfo() << 0;
1449 else
1450 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1451 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1452 Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
1453}
1454
1455void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1456 const MacroAnnotations &A =
1457 getMacroAnnotations(Identifier.getIdentifierInfo());
1458 assert(A.RestrictExpansionInfo &&
1459 "Macro restricted expansion warning without recorded annotation!");
1460 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1461 if (Info.Message.empty())
1462 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1463 << Identifier.getIdentifierInfo() << 0;
1464 else
1465 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1466 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1467 Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
1468}
1469
1470void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1471 unsigned DiagSelection) const {
1472 Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1473}
1474
1475void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1476 bool IsUndef) const {
1477 const MacroAnnotations &A =
1478 getMacroAnnotations(Identifier.getIdentifierInfo());
1479 assert(A.FinalAnnotationLoc &&
1480 "Final macro warning without recorded annotation!");
1481
1482 Diag(Identifier, diag::warn_pragma_final_macro)
1483 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1484 Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
1485}
1486
1488 const SourceLocation &Loc) const {
1489 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1490 // region map:
1491 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1492 const SourceLocation &Loc) -> bool {
1493 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1494 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1495 Map, [&SourceMgr,
1496 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1497 return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1498 });
1499
1500 if (FirstRegionEndingAfterLoc != Map.end()) {
1501 // To test if the start location of the found region precedes `Loc`:
1502 return SourceMgr.isBeforeInTranslationUnit(
1503 FirstRegionEndingAfterLoc->first, Loc);
1504 }
1505 // If we do not find a region whose end location passes `Loc`, we want to
1506 // check if the current region is still open:
1507 if (!Map.empty() && Map.back().first == Map.back().second)
1508 return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
1509 return false;
1510 };
1511
1512 // What the following does:
1513 //
1514 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1515 // Otherwise, `Loc` is from a loaded AST. We look up the
1516 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1517 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1518 // region w.r.t. the region map. If the region map is absent, it means there
1519 // is no opt-out pragma in that loaded AST.
1520 //
1521 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1522 // one of them. That means if you put the pragmas around a `#include
1523 // "module.h"`, where module.h is a module, it is not actually suppressing
1524 // warnings in module.h. This is fine because warnings in module.h will be
1525 // reported when module.h is compiled in isolation and nothing in module.h
1526 // will be analyzed ever again. So you will not see warnings from the file
1527 // that imports module.h anyway. And you can't even do the same thing for PCHs
1528 // because they can only be included from the command line.
1529
1530 if (SourceMgr.isLocalSourceLocation(Loc))
1531 return TestInMap(SafeBufferOptOutMap, Loc);
1532
1534 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
1535
1536 if (LoadedRegions)
1537 return TestInMap(*LoadedRegions, Loc);
1538 return false;
1539}
1540
1542 bool isEnter, const SourceLocation &Loc) {
1543 if (isEnter) {
1545 return true; // invalid enter action
1546 InSafeBufferOptOutRegion = true;
1547 CurrentSafeBufferOptOutStart = Loc;
1548
1549 // To set the start location of a new region:
1550
1551 if (!SafeBufferOptOutMap.empty()) {
1552 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1553 assert(PrevRegion->first != PrevRegion->second &&
1554 "Shall not begin a safe buffer opt-out region before closing the "
1555 "previous one.");
1556 }
1557 // If the start location equals to the end location, we call the region a
1558 // open region or a unclosed region (i.e., end location has not been set
1559 // yet).
1560 SafeBufferOptOutMap.emplace_back(Loc, Loc);
1561 } else {
1563 return true; // invalid enter action
1564 InSafeBufferOptOutRegion = false;
1565
1566 // To set the end location of the current open region:
1567
1568 assert(!SafeBufferOptOutMap.empty() &&
1569 "Misordered safe buffer opt-out regions");
1570 auto *CurrRegion = &SafeBufferOptOutMap.back();
1571 assert(CurrRegion->first == CurrRegion->second &&
1572 "Set end location to a closed safe buffer opt-out region");
1573 CurrRegion->second = Loc;
1574 }
1575 return false;
1576}
1577
1579 return InSafeBufferOptOutRegion;
1580}
1582 StartLoc = CurrentSafeBufferOptOutStart;
1583 return InSafeBufferOptOutRegion;
1584}
1585
1588 assert(!InSafeBufferOptOutRegion &&
1589 "Attempt to serialize safe buffer opt-out regions before file being "
1590 "completely preprocessed");
1591
1593
1594 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1595 SrcSeq.push_back(begin);
1596 SrcSeq.push_back(end);
1597 }
1598 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1599 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1600 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1601 // It means that for each loading pch/module m, it just needs to load m's own
1602 // `SafeBufferOptOutMap`.
1603 return SrcSeq;
1604}
1605
1607 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1608 if (SourceLocations.size() == 0)
1609 return false;
1610
1611 assert(SourceLocations.size() % 2 == 0 &&
1612 "ill-formed SourceLocation sequence");
1613
1614 auto It = SourceLocations.begin();
1615 SafeBufferOptOutRegionsTy &Regions =
1616 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
1617
1618 do {
1619 SourceLocation Begin = *It++;
1620 SourceLocation End = *It++;
1621
1622 Regions.emplace_back(Begin, End);
1623 } while (It != SourceLocations.end());
1624 return true;
1625}
1626
1627ModuleLoader::~ModuleLoader() = default;
1628
1630
1632
1634
1636 if (Record)
1637 return;
1638
1640 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1641}
1642
1643const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1644 if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) {
1645 const SmallVector<const char *> &FileCheckPoints = It->second;
1646 const char *Last = nullptr;
1647 // FIXME: Do better than a linear search.
1648 for (const char *P : FileCheckPoints) {
1649 if (P > Start)
1650 break;
1651 Last = P;
1652 }
1653 return Last;
1654 }
1655
1656 return nullptr;
1657}
StringRef P
#define SM(sm)
Definition: Cuda.cpp:84
Defines enum values for all the target-independent builtin functions.
IndirectLocalPath & Path
Expr * E
Defines the clang::FileManager interface and associated types.
StringRef Identifier
Definition: Format.cpp:3040
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
llvm::MachO::Target Target
Definition: MachO.h:51
llvm::MachO::Record Record
Definition: MachO.h:31
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
Defines the PreprocessorLexer interface.
static bool MacroDefinitionEquals(const MacroInfo *MI, ArrayRef< TokenValue > Tokens)
Compares macro tokens with a specified token value sequence.
static constexpr unsigned CheckPointStepSize
Minimum distance between two check points, in tokens.
Defines the clang::Preprocessor interface.
SourceLocation Loc
Definition: SemaObjC.cpp:759
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
SourceLocation Begin
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
virtual void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Callback invoked when performing code completion inside the filename part of an #include directive.
virtual void CodeCompleteNaturalLanguage()
Callback invoked when performing code completion in a part of the file where we expect natural langua...
Abstract base class that describes a handler that will receive source ranges for each of the comments...
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
virtual void updateOutOfDateIdentifier(const IdentifierInfo &II)=0
Update an out-of-date identifier.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition: FileEntry.h:305
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
bool isInvalid() const
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
Definition: HeaderSearch.h:237
Module * lookupModule(StringRef ModuleName, SourceLocation ImportLoc=SourceLocation(), bool AllowSearch=true, bool AllowExtraModuleMapSearch=false)
Lookup a module Search for a module with the given name.
void setTarget(const TargetInfo &Target)
Set the target information for the header search, if not already known.
Provides lookups to, and iteration over, IdentiferInfo objects.
One of these records is kept for each identifier that is lexed.
bool isModulesImport() const
Determine whether this is the contextual keyword import.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
void setIsPoisoned(bool Value=true)
setIsPoisoned - Mark this identifier as poisoned.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
void setIsFutureCompatKeyword(bool Val)
StringRef getName() const
Return the actual identifier string.
bool isFutureCompatKeyword() const
is/setIsFutureCompatKeyword - Initialize information about whether or not this language token is a ke...
bool isExtensionToken() const
get/setExtension - Initialize information about whether or not this language token is an extension.
void AddKeywords(const LangOptions &LangOpts)
Populate the identifier table with info about the language keywords for the language specified by Lan...
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Definition: LangOptions.h:313
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:499
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
A description of the current definition of a macro.
Definition: MacroInfo.h:590
SourceLocation getLocation() const
Definition: MacroInfo.h:488
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
const_tokens_iterator tokens_begin() const
Definition: MacroInfo.h:244
unsigned getNumTokens() const
Return the number of tokens that this macro expands to.
Definition: MacroInfo.h:235
const Token & getReplacementToken(unsigned Tok) const
Definition: MacroInfo.h:237
bool isObjectLike() const
Definition: MacroInfo.h:202
Abstract interface for a module loader.
Definition: ModuleLoader.h:82
virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path, Module::NameVisibilityKind Visibility, bool IsInclusionDirective)=0
Attempt to load the given module.
virtual ~ModuleLoader()
Represents a macro directive exported by a module.
Definition: MacroInfo.h:514
Describes a module or submodule.
Definition: Module.h:115
@ Hidden
All of the names in this module are hidden.
Definition: Module.h:416
NumericLiteralParser - This performs strict semantic analysis of the content of a ppnumber,...
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition: Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
void setConditionalLevels(ArrayRef< PPConditionalInfo > CL)
void LexIncludeFilename(Token &FilenameTok)
Lex a token, producing a header-name token if possible.
bool markIncluded(FileEntryRef File)
Mark the file as included.
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
void CollectPpImportSuffix(SmallVectorImpl< Token > &Toks)
Collect the tokens of a C++20 pp-import-suffix.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool LexAfterModuleImport(Token &Result)
Lex a token following the 'import' contextual keyword.
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
void DumpMacro(const MacroInfo &MI) const
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void makeModuleVisible(Module *M, SourceLocation Loc)
bool isInImportingCXXNamedModules() const
If we're importing a standard C++20 Named Modules.
void Lex(Token &Result)
Lex the next token for this preprocessor.
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
Definition: Preprocessor.h:296
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
macro_iterator macro_end(bool IncludeExternalMacros=true) const
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
bool getCommentRetentionState() const
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
bool isPPInSafeBufferOptOutRegion()
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
const TargetInfo & getTargetInfo() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a #pragma hdrstop.
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
HeaderSearch & getHeaderSearchInfo() const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
ExternalPreprocessorSource * getExternalSource() const
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
Preprocessor(std::shared_ptr< PreprocessorOptions > PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
void PoisonSEHIdentifiers(bool Poison=true)
size_t getTotalMemory() const
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a #pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
DiagnosticsEngine & getDiagnostics() const
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the #include of the through header or until after a #pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
Definition: ScratchBuffer.h:24
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
bool isLocalSourceLocation(SourceLocation Loc) const
Returns true if Loc did not come from a PCH/Module.
OptionalFileEntryRef getFileEntryRefForID(FileID FID) const
Returns the FileEntryRef for the provided FileID.
FileID createFileID(FileEntryRef SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
FileID getMainFileID() const
Returns the FileID of the main source file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
void overrideFileContents(FileEntryRef SourceFile, const llvm::MemoryBufferRef &Buffer)
Override the contents of the given source file by providing an already-allocated buffer.
bool isLoadedFileID(FileID FID) const
Returns true if FID came from a PCH/Module.
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
SourceLocation createExpansionLoc(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned Length, bool ExpansionIsTokenRange=true, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Creates an expansion SLocEntry for a macro use.
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
std::optional< llvm::MemoryBufferRef > getMemoryBufferForFileOrNone(FileEntryRef File)
Retrieve the memory buffer associated with the given file.
A trivial tuple used to represent a source range.
StringLiteralParser - This decodes string escape characters and performs wide string analysis and Tra...
Exposes information about the current target.
Definition: TargetInfo.h:220
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
void setLiteralData(const char *Ptr)
Definition: Token.h:229
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:306
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
unsigned getLength() const
Definition: Token.h:135
void setLength(unsigned Len)
Definition: Token.h:141
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition: Token.h:284
void setKind(tok::TokenKind K)
Definition: Token.h:95
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
tok::TokenKind getKind() const
Definition: Token.h:94
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276
@ DisableExpand
Definition: Token.h:79
@ IsReinjected
Definition: Token.h:89
@ LeadingEmptyMacro
Definition: Token.h:81
@ LeadingSpace
Definition: Token.h:77
@ StartOfLine
Definition: Token.h:75
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280
void setLocation(SourceLocation L)
Definition: Token.h:140
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:299
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:217
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:121
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:295
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:196
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:267
Defines the clang::TargetInfo interface.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:24
The JSON file list parser is used to communicate input to InstallAPI.
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
@ Result
The result type of a method or function.
TranslationUnitKind
Describes the kind of translation unit being processed.
Definition: LangOptions.h:1096
@ TU_Prefix
The translation unit is a prefix to a translation unit, and is not complete.
Definition: LangOptions.h:1102
#define true
Definition: stdbool.h:25