clang 23.0.0git
Preprocessor.cpp
Go to the documentation of this file.
1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
31#include "clang/Basic/LLVM.h"
33#include "clang/Basic/Module.h"
42#include "clang/Lex/Lexer.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
48#include "clang/Lex/Pragma.h"
53#include "clang/Lex/Token.h"
55#include "llvm/ADT/APInt.h"
56#include "llvm/ADT/ArrayRef.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/ScopeExit.h"
60#include "llvm/ADT/SmallVector.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/MemoryBuffer.h"
65#include "llvm/Support/MemoryBufferRef.h"
66#include "llvm/Support/SaveAndRestore.h"
67#include "llvm/Support/raw_ostream.h"
68#include <algorithm>
69#include <cassert>
70#include <memory>
71#include <optional>
72#include <string>
73#include <utility>
74#include <vector>
75
76using namespace clang;
77
78/// Minimum distance between two check points, in tokens.
79static constexpr unsigned CheckPointStepSize = 1024;
80
81LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
82
84
86 DiagnosticsEngine &diags, const LangOptions &opts,
87 SourceManager &SM, HeaderSearch &Headers,
88 ModuleLoader &TheModuleLoader,
89 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
91 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
92 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
93 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
94 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
95 // As the language options may have not been loaded yet (when
96 // deserializing an ASTUnit), adding keywords to the identifier table is
97 // deferred to Preprocessor::Initialize().
98 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
99 TUKind(TUKind), SkipMainFilePreamble(0, true),
100 CurSubmoduleState(&NullSubmoduleState) {
101 OwnsHeaderSearch = OwnsHeaders;
102
103 // Default to discarding comments.
104 KeepComments = false;
105 KeepMacroComments = false;
106 SuppressIncludeNotFoundError = false;
107
108 // Macro expansion is enabled.
109 DisableMacroExpansion = false;
110 MacroExpansionInDirectivesOverride = false;
111 InMacroArgs = false;
112 ArgMacro = nullptr;
113 InMacroArgPreExpansion = false;
114 NumCachedTokenLexers = 0;
115 PragmasEnabled = true;
116 ParsingIfOrElifDirective = false;
117 PreprocessedOutput = false;
118
119 // We haven't read anything from the external source.
120 ReadMacrosFromExternalSource = false;
121
122 LastExportKeyword.startToken();
123
124 BuiltinInfo = std::make_unique<Builtin::Context>();
125
126 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
127 // a macro. They get unpoisoned where it is allowed.
128 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
129 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
130 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
131 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
132
133 // Initialize the pragma handlers.
134 RegisterBuiltinPragmas();
135
136 // Initialize builtin macros like __LINE__ and friends.
137 RegisterBuiltinMacros();
138
139 if(LangOpts.Borland) {
140 Ident__exception_info = getIdentifierInfo("_exception_info");
141 Ident___exception_info = getIdentifierInfo("__exception_info");
142 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
143 Ident__exception_code = getIdentifierInfo("_exception_code");
144 Ident___exception_code = getIdentifierInfo("__exception_code");
145 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
146 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
147 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
148 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
149 } else {
150 Ident__exception_info = Ident__exception_code = nullptr;
151 Ident__abnormal_termination = Ident___exception_info = nullptr;
152 Ident___exception_code = Ident___abnormal_termination = nullptr;
153 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
154 Ident_AbnormalTermination = nullptr;
155 }
156
157 // Default incremental processing to -fincremental-extensions, clients can
158 // override with `enableIncrementalProcessing` if desired.
159 IncrementalProcessing = LangOpts.IncrementalExtensions;
160
161 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
163 SkippingUntilPragmaHdrStop = true;
164
165 // If using a PCH with a through header, start skipping tokens.
166 if (!this->PPOpts.PCHThroughHeader.empty() &&
167 !this->PPOpts.ImplicitPCHInclude.empty())
168 SkippingUntilPCHThroughHeader = true;
169
170 if (this->PPOpts.GeneratePreamble)
171 PreambleConditionalStack.startRecording();
172
173 MaxTokens = LangOpts.MaxTokens;
174}
175
177 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
178
179 IncludeMacroStack.clear();
180
181 // Free any cached macro expanders.
182 // This populates MacroArgCache, so all TokenLexers need to be destroyed
183 // before the code below that frees up the MacroArgCache list.
184 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
185 CurTokenLexer.reset();
186
187 // Free any cached MacroArgs.
188 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
189 ArgList = ArgList->deallocate();
190
191 // Delete the header search info, if we own it.
192 if (OwnsHeaderSearch)
193 delete &HeaderInfo;
194}
195
197 const TargetInfo *AuxTarget) {
198 assert((!this->Target || this->Target == &Target) &&
199 "Invalid override of target information");
200 this->Target = &Target;
201
202 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
203 "Invalid override of aux target information.");
204 this->AuxTarget = AuxTarget;
205
206 // Initialize information about built-ins.
207 BuiltinInfo->InitializeTarget(Target, AuxTarget);
208 HeaderInfo.setTarget(Target);
209
210 // Populate the identifier table with info about keywords for the current language.
211 Identifiers.AddKeywords(LangOpts);
212
213 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
214 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
215
216 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
217 // Use setting from TargetInfo.
218 setCurrentFPEvalMethod(SourceLocation(), Target.getFPEvalMethod());
219 else
220 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
221 setCurrentFPEvalMethod(SourceLocation(), getLangOpts().getFPEvalMethod());
222}
223
225 NumEnteredSourceFiles = 0;
226
227 // Reset pragmas
228 PragmaHandlersBackup = std::move(PragmaHandlers);
229 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
230 RegisterBuiltinPragmas();
231
232 // Reset PredefinesFileID
233 PredefinesFileID = FileID();
234}
235
237 NumEnteredSourceFiles = 1;
238
239 PragmaHandlers = std::move(PragmaHandlersBackup);
240}
241
242void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
243 llvm::errs() << tok::getTokenName(Tok.getKind());
244
245 if (!Tok.isAnnotation())
246 llvm::errs() << " '" << getSpelling(Tok) << "'";
247
248 if (!DumpFlags) return;
249
250 llvm::errs() << "\t";
251 if (Tok.isAtStartOfLine())
252 llvm::errs() << " [StartOfLine]";
253 if (Tok.hasLeadingSpace())
254 llvm::errs() << " [LeadingSpace]";
255 if (Tok.isExpandDisabled())
256 llvm::errs() << " [ExpandDisabled]";
257 if (Tok.needsCleaning()) {
258 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
259 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
260 << "']";
261 }
262
263 llvm::errs() << "\tLoc=<";
264 DumpLocation(Tok.getLocation());
265 llvm::errs() << ">";
266}
267
269 Loc.print(llvm::errs(), SourceMgr);
270}
271
272void Preprocessor::DumpMacro(const MacroInfo &MI) const {
273 llvm::errs() << "MACRO: ";
274 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
276 llvm::errs() << " ";
277 }
278 llvm::errs() << "\n";
279}
280
282 llvm::errs() << "\n*** Preprocessor Stats:\n";
283 llvm::errs() << NumDirectives << " directives found:\n";
284 llvm::errs() << " " << NumDefined << " #define.\n";
285 llvm::errs() << " " << NumUndefined << " #undef.\n";
286 llvm::errs() << " #include/#include_next/#import:\n";
287 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
288 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
289 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
290 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
291 llvm::errs() << " " << NumEndif << " #endif.\n";
292 llvm::errs() << " " << NumPragma << " #pragma.\n";
293 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
294
295 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
296 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
297 << NumFastMacroExpanded << " on the fast path.\n";
298 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
299 << " token paste (##) operations performed, "
300 << NumFastTokenPaste << " on the fast path.\n";
301
302 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
303
304 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
305 llvm::errs() << "\n Macro Expanded Tokens: "
306 << llvm::capacity_in_bytes(MacroExpandedTokens);
307 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
308 // FIXME: List information for all submodules.
309 llvm::errs() << "\n Macros: "
310 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
311 llvm::errs() << "\n #pragma push_macro Info: "
312 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
313 llvm::errs() << "\n Poison Reasons: "
314 << llvm::capacity_in_bytes(PoisonReasons);
315 llvm::errs() << "\n Comment Handlers: "
316 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
317}
318
320Preprocessor::macro_begin(bool IncludeExternalMacros) const {
321 if (IncludeExternalMacros && ExternalSource &&
322 !ReadMacrosFromExternalSource) {
323 ReadMacrosFromExternalSource = true;
324 ExternalSource->ReadDefinedMacros();
325 }
326
327 // Make sure we cover all macros in visible modules.
328 for (const ModuleMacro &Macro : ModuleMacros)
329 CurSubmoduleState->Macros.try_emplace(Macro.II);
330
331 return CurSubmoduleState->Macros.begin();
332}
333
335 return BP.getTotalMemory()
336 + llvm::capacity_in_bytes(MacroExpandedTokens)
337 + Predefines.capacity() /* Predefines buffer. */
338 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
339 // and ModuleMacros.
340 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
341 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
342 + llvm::capacity_in_bytes(PoisonReasons)
343 + llvm::capacity_in_bytes(CommentHandlers);
344}
345
347Preprocessor::macro_end(bool IncludeExternalMacros) const {
348 if (IncludeExternalMacros && ExternalSource &&
349 !ReadMacrosFromExternalSource) {
350 ReadMacrosFromExternalSource = true;
351 ExternalSource->ReadDefinedMacros();
352 }
353
354 return CurSubmoduleState->Macros.end();
355}
356
357/// Compares macro tokens with a specified token value sequence.
358static bool MacroDefinitionEquals(const MacroInfo *MI,
359 ArrayRef<TokenValue> Tokens) {
360 return Tokens.size() == MI->getNumTokens() &&
361 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
362}
363
365 SourceLocation Loc,
366 ArrayRef<TokenValue> Tokens) const {
367 SourceLocation BestLocation;
368 StringRef BestSpelling;
370 I != E; ++I) {
372 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
373 if (!Def || !Def.getMacroInfo())
374 continue;
375 if (!Def.getMacroInfo()->isObjectLike())
376 continue;
377 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
378 continue;
379 SourceLocation Location = Def.getLocation();
380 // Choose the macro defined latest.
381 if (BestLocation.isInvalid() ||
382 (Location.isValid() &&
383 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
384 BestLocation = Location;
385 BestSpelling = I->first->getName();
386 }
387 }
388 return BestSpelling;
389}
390
392 if (CurLexer)
393 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
394 ? CLK_DependencyDirectivesLexer
395 : CLK_Lexer;
396 else if (CurTokenLexer)
397 CurLexerCallback = CLK_TokenLexer;
398 else
399 CurLexerCallback = CLK_CachingLexer;
400}
401
403 unsigned CompleteLine,
404 unsigned CompleteColumn) {
405 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
406 assert(!CodeCompletionFile && "Already set");
407
408 // Load the actual file's contents.
409 std::optional<llvm::MemoryBufferRef> Buffer =
410 SourceMgr.getMemoryBufferForFileOrNone(File);
411 if (!Buffer)
412 return true;
413
414 // Find the byte position of the truncation point.
415 const char *Position = Buffer->getBufferStart();
416 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
417 for (; *Position; ++Position) {
418 if (*Position != '\r' && *Position != '\n')
419 continue;
420
421 // Eat \r\n or \n\r as a single line.
422 if ((Position[1] == '\r' || Position[1] == '\n') &&
423 Position[0] != Position[1])
424 ++Position;
425 ++Position;
426 break;
427 }
428 }
429
430 Position += CompleteColumn - 1;
431
432 // If pointing inside the preamble, adjust the position at the beginning of
433 // the file after the preamble.
434 if (SkipMainFilePreamble.first &&
435 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
436 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
437 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
438 }
439
440 if (Position > Buffer->getBufferEnd())
441 Position = Buffer->getBufferEnd();
442
443 CodeCompletionFile = File;
444 CodeCompletionOffset = Position - Buffer->getBufferStart();
445
446 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
447 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
448 char *NewBuf = NewBuffer->getBufferStart();
449 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
450 *NewPos = '\0';
451 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
452 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
453
454 return false;
455}
456
458 bool IsAngled) {
460 if (CodeComplete)
461 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
462}
463
466 if (CodeComplete)
467 CodeComplete->CodeCompleteNaturalLanguage();
468}
469
470/// getSpelling - This method is used to get the spelling of a token into a
471/// SmallVector. Note that the returned StringRef may not point to the
472/// supplied buffer if a copy can be avoided.
474 SmallVectorImpl<char> &Buffer,
475 bool *Invalid) const {
476 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
477 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
478 // Try the fast path.
479 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
480 return II->getName();
481 }
482
483 // Resize the buffer if we need to copy into it.
484 if (Tok.needsCleaning())
485 Buffer.resize(Tok.getLength());
486
487 const char *Ptr = Buffer.data();
488 unsigned Len = getSpelling(Tok, Ptr, Invalid);
489 return StringRef(Ptr, Len);
490}
491
492/// CreateString - Plop the specified string into a scratch buffer and return a
493/// location for it. If specified, the source location provides a source
494/// location for the token.
496 SourceLocation ExpansionLocStart,
497 SourceLocation ExpansionLocEnd) {
498 Tok.setLength(Str.size());
499
500 const char *DestPtr;
501 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
502
503 if (ExpansionLocStart.isValid())
504 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
505 ExpansionLocEnd, Str.size());
506 Tok.setLocation(Loc);
507
508 // If this is a raw identifier or a literal token, set the pointer data.
509 if (Tok.is(tok::raw_identifier))
510 Tok.setRawIdentifierData(DestPtr);
511 else if (Tok.isLiteral())
512 Tok.setLiteralData(DestPtr);
513}
514
516 auto &SM = getSourceManager();
517 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
518 FileIDAndOffset LocInfo = SM.getDecomposedLoc(SpellingLoc);
519 bool Invalid = false;
520 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
521 if (Invalid)
522 return SourceLocation();
523
524 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
525 const char *DestPtr;
526 SourceLocation Spelling =
527 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
528 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
529}
530
532 if (!getLangOpts().isCompilingModule())
533 return nullptr;
534
535 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
536}
537
539 if (!getLangOpts().isCompilingModuleImplementation())
540 return nullptr;
541
542 return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName);
543}
544
545//===----------------------------------------------------------------------===//
546// Preprocessor Initialization Methods
547//===----------------------------------------------------------------------===//
548
549/// EnterMainSourceFile - Enter the specified FileID as the main source file,
550/// which implicitly adds the builtin defines etc.
552 // We do not allow the preprocessor to reenter the main file. Doing so will
553 // cause FileID's to accumulate information from both runs (e.g. #line
554 // information) and predefined macros aren't guaranteed to be set properly.
555 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
556 FileID MainFileID = SourceMgr.getMainFileID();
557
558 // If MainFileID is loaded it means we loaded an AST file, no need to enter
559 // a main file.
560 if (!SourceMgr.isLoadedFileID(MainFileID)) {
561 // Enter the main file source buffer.
562 EnterSourceFile(MainFileID, nullptr, SourceLocation());
563
564 // If we've been asked to skip bytes in the main file (e.g., as part of a
565 // precompiled preamble), do so now.
566 if (SkipMainFilePreamble.first > 0)
567 CurLexer->SetByteOffset(SkipMainFilePreamble.first,
568 SkipMainFilePreamble.second);
569
570 // Tell the header info that the main file was entered. If the file is later
571 // #imported, it won't be re-entered.
572 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID))
573 markIncluded(*FE);
574
575 // Record the first PP token in the main file. This is used to generate
576 // better diagnostics for C++ modules.
577 //
578 // // This is a comment.
579 // #define FOO int // note: add 'module;' to the start of the file
580 // ^ FirstPPToken // to introduce a global module fragment.
581 //
582 // export module M; // error: module declaration must occur
583 // // at the start of the translation unit.
584 if (getLangOpts().CPlusPlusModules) {
585 std::optional<StringRef> Input =
587 if (!isPreprocessedModuleFile() && Input)
588 MainFileIsPreprocessedModuleFile =
590 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(*this);
591 DirTracer = Tracer.get();
592 addPPCallbacks(std::move(Tracer));
593 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
594 if (FirstPPTok)
595 FirstPPTokenLoc = FirstPPTok->getLocation();
596 }
597 }
598
599 // Preprocess Predefines to populate the initial preprocessor state.
600 std::unique_ptr<llvm::MemoryBuffer> SB =
601 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
602 assert(SB && "Cannot create predefined source buffer");
603 FileID FID = SourceMgr.createFileID(std::move(SB));
604 assert(FID.isValid() && "Could not create FileID for predefines?");
605 setPredefinesFileID(FID);
606
607 // Start parsing the predefines.
608 EnterSourceFile(FID, nullptr, SourceLocation());
609
610 if (!PPOpts.PCHThroughHeader.empty()) {
611 // Lookup and save the FileID for the through header. If it isn't found
612 // in the search path, it's a fatal error.
614 SourceLocation(), PPOpts.PCHThroughHeader,
615 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
616 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
617 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
618 /*IsFrameworkFound=*/nullptr);
619 if (!File) {
620 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
621 << PPOpts.PCHThroughHeader;
622 return;
623 }
624 setPCHThroughHeaderFileID(
625 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
626 }
627
628 // Skip tokens from the Predefines and if needed the main file.
629 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
630 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
632}
633
634void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
635 assert(PCHThroughHeaderFileID.isInvalid() &&
636 "PCHThroughHeaderFileID already set!");
637 PCHThroughHeaderFileID = FID;
638}
639
641 assert(PCHThroughHeaderFileID.isValid() &&
642 "Invalid PCH through header FileID");
643 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
644}
645
647 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
648 PCHThroughHeaderFileID.isValid();
649}
650
652 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
653 PCHThroughHeaderFileID.isValid();
654}
655
657 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
658}
659
661 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
662}
663
664/// Skip tokens until after the #include of the through header or
665/// until after a #pragma hdrstop is seen. Tokens in the predefines file
666/// and the main file may be skipped. If the end of the predefines file
667/// is reached, skipping continues into the main file. If the end of the
668/// main file is reached, it's a fatal error.
670 bool ReachedMainFileEOF = false;
671 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
672 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
673 Token Tok;
674 while (true) {
675 bool InPredefines =
676 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
677 CurLexerCallback(*this, Tok);
678 if (Tok.is(tok::eof) && !InPredefines) {
679 ReachedMainFileEOF = true;
680 break;
681 }
682 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
683 break;
684 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
685 break;
686 }
687 if (ReachedMainFileEOF) {
688 if (UsingPCHThroughHeader)
689 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
690 << PPOpts.PCHThroughHeader << 1;
691 else if (!PPOpts.PCHWithHdrStopCreate)
692 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
693 }
694}
695
696void Preprocessor::replayPreambleConditionalStack() {
697 // Restore the conditional stack from the preamble, if there is one.
698 if (PreambleConditionalStack.isReplaying()) {
699 assert(CurPPLexer &&
700 "CurPPLexer is null when calling replayPreambleConditionalStack.");
701 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
702 PreambleConditionalStack.doneReplaying();
703 if (PreambleConditionalStack.reachedEOFWhileSkipping())
704 SkipExcludedConditionalBlock(
705 PreambleConditionalStack.SkipInfo->HashTokenLoc,
706 PreambleConditionalStack.SkipInfo->IfTokenLoc,
707 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
708 PreambleConditionalStack.SkipInfo->FoundElse,
709 PreambleConditionalStack.SkipInfo->ElseLoc);
710 }
711}
712
714 // Notify the client that we reached the end of the source file.
715 if (Callbacks)
716 Callbacks->EndOfMainFile();
717}
718
719//===----------------------------------------------------------------------===//
720// Lexer Event Handling.
721//===----------------------------------------------------------------------===//
722
723/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
724/// identifier information for the token and install it into the token,
725/// updating the token kind accordingly.
727 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
728
729 // Look up this token, see if it is a macro, or if it is a language keyword.
730 IdentifierInfo *II;
731 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
732 // No cleaning needed, just use the characters from the lexed buffer.
733 II = getIdentifierInfo(Identifier.getRawIdentifier());
734 } else {
735 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
736 SmallString<64> IdentifierBuffer;
737 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
738
739 if (Identifier.hasUCN()) {
740 SmallString<64> UCNIdentifierBuffer;
741 expandUCNs(UCNIdentifierBuffer, CleanedStr);
742 II = getIdentifierInfo(UCNIdentifierBuffer);
743 } else {
744 II = getIdentifierInfo(CleanedStr);
745 }
746 }
747
748 // Update the token info (identifier info and appropriate token kind).
749 // FIXME: the raw_identifier may contain leading whitespace which is removed
750 // from the cleaned identifier token. The SourceLocation should be updated to
751 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
752 // line continuation before 'B') is parsed as a single tok::raw_identifier and
753 // is cleaned to tok::identifier "B". After cleaning the token's length is
754 // still 3 and the SourceLocation refers to the location of the backslash.
755 Identifier.setIdentifierInfo(II);
756 Identifier.setKind(II->getTokenID());
757
758 return II;
759}
760
762 PoisonReasons[II] = DiagID;
763}
764
766 assert(Ident__exception_code && Ident__exception_info);
767 assert(Ident___exception_code && Ident___exception_info);
768 Ident__exception_code->setIsPoisoned(Poison);
769 Ident___exception_code->setIsPoisoned(Poison);
770 Ident_GetExceptionCode->setIsPoisoned(Poison);
771 Ident__exception_info->setIsPoisoned(Poison);
772 Ident___exception_info->setIsPoisoned(Poison);
773 Ident_GetExceptionInfo->setIsPoisoned(Poison);
774 Ident__abnormal_termination->setIsPoisoned(Poison);
775 Ident___abnormal_termination->setIsPoisoned(Poison);
776 Ident_AbnormalTermination->setIsPoisoned(Poison);
777}
778
780 assert(Identifier.getIdentifierInfo() &&
781 "Can't handle identifiers without identifier info!");
782 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
783 PoisonReasons.find(Identifier.getIdentifierInfo());
784 if(it == PoisonReasons.end())
785 Diag(Identifier, diag::err_pp_used_poisoned_id);
786 else
787 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
788}
789
790void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
791 assert(II.isOutOfDate() && "not out of date");
792 assert(getExternalSource() &&
793 "getExternalSource() should not return nullptr");
795}
796
797/// HandleIdentifier - This callback is invoked when the lexer reads an
798/// identifier. This callback looks up the identifier in the map and/or
799/// potentially macro expands it or turns it into a named token (like 'for').
800///
801/// Note that callers of this method are guarded by checking the
802/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
803/// IdentifierInfo methods that compute these properties will need to change to
804/// match.
806 assert(Identifier.getIdentifierInfo() &&
807 "Can't handle identifiers without identifier info!");
808
809 IdentifierInfo &II = *Identifier.getIdentifierInfo();
810
811 // If the information about this identifier is out of date, update it from
812 // the external source.
813 // We have to treat __VA_ARGS__ in a special way, since it gets
814 // serialized with isPoisoned = true, but our preprocessor may have
815 // unpoisoned it if we're defining a C99 macro.
816 if (II.isOutOfDate()) {
817 bool CurrentIsPoisoned = false;
818 const bool IsSpecialVariadicMacro =
819 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
820 if (IsSpecialVariadicMacro)
821 CurrentIsPoisoned = II.isPoisoned();
822
823 updateOutOfDateIdentifier(II);
824 Identifier.setKind(II.getTokenID());
825
826 if (IsSpecialVariadicMacro)
827 II.setIsPoisoned(CurrentIsPoisoned);
828 }
829
830 // If this identifier was poisoned, and if it was not produced from a macro
831 // expansion, emit an error.
832 if (II.isPoisoned() && CurPPLexer) {
833 HandlePoisonedIdentifier(Identifier);
834 }
835
836 // If this is a macro to be expanded, do it.
837 if (const MacroDefinition MD = getMacroDefinition(&II)) {
838 const auto *MI = MD.getMacroInfo();
839 assert(MI && "macro definition with no macro info?");
840 if (!DisableMacroExpansion) {
841 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
842 // C99 6.10.3p10: If the preprocessing token immediately after the
843 // macro name isn't a '(', this macro should not be expanded.
844 if (!MI->isFunctionLike() || isNextPPTokenOneOf(tok::l_paren))
845 return HandleMacroExpandedIdentifier(Identifier, MD);
846 } else {
847 // C99 6.10.3.4p2 says that a disabled macro may never again be
848 // expanded, even if it's in a context where it could be expanded in the
849 // future.
850 Identifier.setFlag(Token::DisableExpand);
851 if (MI->isObjectLike() || isNextPPTokenOneOf(tok::l_paren))
852 Diag(Identifier, diag::pp_disabled_macro_expansion);
853 }
854 }
855 }
856
857 // If this identifier is a keyword in a newer Standard or proposed Standard,
858 // produce a warning. Don't warn if we're not considering macro expansion,
859 // since this identifier might be the name of a macro.
860 // FIXME: This warning is disabled in cases where it shouldn't be, like
861 // "#define constexpr constexpr", "int constexpr;"
862 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
863 Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts()))
864 << II.getName();
865 // Don't diagnose this keyword again in this translation unit.
866 II.setIsFutureCompatKeyword(false);
867 }
868
869 // If this identifier would be a keyword in C++, diagnose as a compatibility
870 // issue.
871 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
872 Diag(Identifier, diag::warn_pp_identifier_is_cpp_keyword) << &II;
873
874 // If this is an extension token, diagnose its use.
875 // We avoid diagnosing tokens that originate from macro definitions.
876 // FIXME: This warning is disabled in cases where it shouldn't be,
877 // like "#define TY typeof", "TY(1) x".
878 if (II.isExtensionToken() && !DisableMacroExpansion)
879 Diag(Identifier, diag::ext_token_used);
880
881 // Handle module contextual keywords.
882 if (getLangOpts().CPlusPlusModules && CurLexer &&
883 !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
884 !CurLexer->ParsingPreprocessorDirective &&
885 Identifier.isModuleContextualKeyword() &&
886 HandleModuleContextualKeyword(Identifier)) {
887 HandleDirective(Identifier);
888 // With a fatal failure in the module loader, we abort parsing.
890 }
891
892 // If this is the 'import' contextual keyword following an '@', note
893 // that the next token indicates a module name.
894 //
895 // Note that we do not treat 'import' as a contextual
896 // keyword when we're in a caching lexer, because caching lexers only get
897 // used in contexts where import declarations are disallowed.
898 //
899 // Likewise if this is the standard C++ import keyword.
900 if (((LastTokenWasAt && II.isImportKeyword()) ||
901 Identifier.is(tok::kw_import)) &&
902 !InMacroArgs &&
903 (!DisableMacroExpansion || MacroExpansionInDirectivesOverride) &&
904 CurLexerCallback != CLK_CachingLexer) {
905 ModuleImportLoc = Identifier.getLocation();
906 IsAtImport = true;
907 CurLexerCallback = CLK_LexAfterModuleImport;
908 }
909 return true;
910}
911
913 ++LexLevel;
914
915 // We loop here until a lex function returns a token; this avoids recursion.
916 while (!CurLexerCallback(*this, Result))
917 ;
918
919 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
920 return;
921
922 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
923 // Remember the identifier before code completion token.
924 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
925 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
926 // Set IdenfitierInfo to null to avoid confusing code that handles both
927 // identifiers and completion tokens.
928 Result.setIdentifierInfo(nullptr);
929 }
930
931 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
932 // if this token is being produced as a result of phase 4 of translation.
933 // Update TrackGMFState to decide if we are currently in a Global Module
934 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
935 // depends on the prevailing StdCXXImportSeq state in two cases.
936 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
937 !Result.getFlag(Token::IsReinjected)) {
938 switch (Result.getKind()) {
939 case tok::l_paren: case tok::l_square: case tok::l_brace:
940 StdCXXImportSeqState.handleOpenBracket();
941 break;
942 case tok::r_paren: case tok::r_square:
943 StdCXXImportSeqState.handleCloseBracket();
944 break;
945 case tok::r_brace:
946 StdCXXImportSeqState.handleCloseBrace();
947 break;
948#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
949// For `#pragma ...` mimic ';'.
950#include "clang/Basic/TokenKinds.def"
951#undef PRAGMA_ANNOTATION
952 // This token is injected to represent the translation of '#include "a.h"'
953 // into "import a.h;". Mimic the notional ';'.
954 case tok::annot_module_include:
955 case tok::annot_repl_input_end:
956 case tok::semi:
957 TrackGMFState.handleSemi();
958 StdCXXImportSeqState.handleSemi();
959 ModuleDeclState.handleSemi();
960 break;
961 case tok::header_name:
962 case tok::annot_header_unit:
963 StdCXXImportSeqState.handleHeaderName();
964 break;
965 case tok::kw_export:
968 TrackGMFState.handleExport();
969 StdCXXImportSeqState.handleExport();
970 ModuleDeclState.handleExport();
971 break;
972 case tok::colon:
973 ModuleDeclState.handleColon();
974 break;
975 case tok::kw_import:
976 if (StdCXXImportSeqState.atTopLevel()) {
977 TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
978 StdCXXImportSeqState.handleImport();
979 }
980 break;
981 case tok::kw_module:
982 if (StdCXXImportSeqState.atTopLevel()) {
985 TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
986 ModuleDeclState.handleModule();
987 }
988 break;
989 case tok::annot_module_name:
990 ModuleDeclState.handleModuleName(
991 static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
992 if (ModuleDeclState.isModuleCandidate())
993 break;
994 [[fallthrough]];
995 default:
996 TrackGMFState.handleMisc();
997 StdCXXImportSeqState.handleMisc();
998 ModuleDeclState.handleMisc();
999 break;
1000 }
1001 }
1002
1003 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
1004 CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr);
1005 CheckPointCounter = 0;
1006 }
1007
1008 LastTokenWasAt = Result.is(tok::at);
1009 if (Result.isNot(tok::kw_export))
1010 LastExportKeyword.startToken();
1011
1012 --LexLevel;
1013
1014 // Destroy any lexers that were deferred while we were in nested Lex calls.
1015 // This must happen after decrementing LexLevel but before any other
1016 // processing that might re-enter Lex.
1017 if (LexLevel == 0 && !PendingDestroyLexers.empty())
1018 PendingDestroyLexers.clear();
1019
1020 if ((LexLevel == 0 || PreprocessToken) &&
1021 !Result.getFlag(Token::IsReinjected)) {
1022 if (LexLevel == 0)
1023 ++TokenCount;
1024 if (OnToken)
1025 OnToken(Result);
1026 }
1027}
1028
1029void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1030 while (1) {
1031 Token Tok;
1032 Lex(Tok);
1033 if (Tok.isOneOf(tok::unknown, tok::eof, tok::eod,
1034 tok::annot_repl_input_end))
1035 break;
1036 if (Tokens != nullptr)
1037 Tokens->push_back(Tok);
1038 }
1039}
1040
1041/// Lex a header-name token (including one formed from header-name-tokens if
1042/// \p AllowMacroExpansion is \c true).
1043///
1044/// \param FilenameTok Filled in with the next token. On success, this will
1045/// be either a header_name token. On failure, it will be whatever other
1046/// token was found instead.
1047/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1048/// by macro expansion (concatenating tokens as necessary if the first
1049/// token is a '<').
1050/// \return \c true if we reached EOD or EOF while looking for a > token in
1051/// a concatenated header name and diagnosed it. \c false otherwise.
1052bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1053 // Lex using header-name tokenization rules if tokens are being lexed from
1054 // a file. Just grab a token normally if we're in a macro expansion.
1055 if (CurPPLexer) {
1056 // Avoid nested header-name lexing when macro expansion recurses
1057 // __has_include(__has_include))
1058 if (CurPPLexer->ParsingFilename)
1059 LexUnexpandedToken(FilenameTok);
1060 else
1061 CurPPLexer->LexIncludeFilename(FilenameTok);
1062 } else {
1063 Lex(FilenameTok);
1064 }
1065
1066 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1067 // case, glue the tokens together into an angle_string_literal token.
1068 SmallString<128> FilenameBuffer;
1069 if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
1070 bool StartOfLine = FilenameTok.isAtStartOfLine();
1071 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1072 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1073
1074 SourceLocation Start = FilenameTok.getLocation();
1075 SourceLocation End;
1076 FilenameBuffer.push_back('<');
1077
1078 // Consume tokens until we find a '>'.
1079 // FIXME: A header-name could be formed starting or ending with an
1080 // alternative token. It's not clear whether that's ill-formed in all
1081 // cases.
1082 while (FilenameTok.isNot(tok::greater)) {
1083 Lex(FilenameTok);
1084 if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1085 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1086 Diag(Start, diag::note_matching) << tok::less;
1087 return true;
1088 }
1089
1090 End = FilenameTok.getLocation();
1091
1092 // FIXME: Provide code completion for #includes.
1093 if (FilenameTok.is(tok::code_completion)) {
1095 Lex(FilenameTok);
1096 continue;
1097 }
1098
1099 // Append the spelling of this token to the buffer. If there was a space
1100 // before it, add it now.
1101 if (FilenameTok.hasLeadingSpace())
1102 FilenameBuffer.push_back(' ');
1103
1104 // Get the spelling of the token, directly into FilenameBuffer if
1105 // possible.
1106 size_t PreAppendSize = FilenameBuffer.size();
1107 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1108
1109 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1110 unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1111
1112 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1113 if (BufPtr != &FilenameBuffer[PreAppendSize])
1114 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1115
1116 // Resize FilenameBuffer to the correct size.
1117 if (FilenameTok.getLength() != ActualLen)
1118 FilenameBuffer.resize(PreAppendSize + ActualLen);
1119 }
1120
1121 FilenameTok.startToken();
1122 FilenameTok.setKind(tok::header_name);
1123 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1124 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1125 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1126 CreateString(FilenameBuffer, FilenameTok, Start, End);
1127 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1128 // Convert a string-literal token of the form " h-char-sequence "
1129 // (produced by macro expansion) into a header-name token.
1130 //
1131 // The rules for header-names don't quite match the rules for
1132 // string-literals, but all the places where they differ result in
1133 // undefined behavior, so we can and do treat them the same.
1134 //
1135 // A string-literal with a prefix or suffix is not translated into a
1136 // header-name. This could theoretically be observable via the C++20
1137 // context-sensitive header-name formation rules.
1138 StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1139 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1140 FilenameTok.setKind(tok::header_name);
1141 }
1142
1143 return false;
1144}
1145
1146std::optional<Token> Preprocessor::peekNextPPToken() const {
1147 // Do some quick tests for rejection cases.
1148 std::optional<Token> Val;
1149 if (CurLexer)
1150 Val = CurLexer->peekNextPPToken();
1151 else
1152 Val = CurTokenLexer->peekNextPPToken();
1153
1154 if (!Val) {
1155 // We have run off the end. If it's a source file we don't
1156 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1157 // macro stack.
1158 if (CurPPLexer)
1159 return std::nullopt;
1160 for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
1161 if (Entry.TheLexer)
1162 Val = Entry.TheLexer->peekNextPPToken();
1163 else
1164 Val = Entry.TheTokenLexer->peekNextPPToken();
1165
1166 if (Val)
1167 break;
1168
1169 // Ran off the end of a source file?
1170 if (Entry.ThePPLexer)
1171 return std::nullopt;
1172 }
1173 }
1174
1175 // Okay, we found the token and return. Otherwise we found the end of the
1176 // translation unit.
1177 return Val;
1178}
1179
1180// We represent the primary and partition names as 'Paths' which are sections
1181// of the hierarchical access path for a clang module. However for C++20
1182// the periods in a name are just another character, and we will need to
1183// flatten them into a string.
1185 std::string Name;
1186 if (Path.empty())
1187 return Name;
1188
1189 for (auto &Piece : Path) {
1190 assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1191 if (!Name.empty())
1192 Name += ".";
1193 Name += Piece.getIdentifierInfo()->getName();
1194 }
1195 return Name;
1196}
1197
1199 assert(!Path.empty() && "expect at least one identifier in a module name");
1200 void *Mem = PP.getPreprocessorAllocator().Allocate(
1201 totalSizeToAlloc<IdentifierLoc>(Path.size()), alignof(ModuleNameLoc));
1202 return new (Mem) ModuleNameLoc(Path);
1203}
1204
1206 SmallVectorImpl<Token> &Suffix,
1208 bool AllowMacroExpansion,
1209 bool IsPartition) {
1210 auto ConsumeToken = [&]() {
1211 if (AllowMacroExpansion)
1212 Lex(Tok);
1213 else
1215 Suffix.push_back(Tok);
1216 };
1217
1218 while (true) {
1219 if (Tok.isNot(tok::identifier)) {
1220 if (Tok.is(tok::code_completion)) {
1221 CurLexer->cutOffLexing();
1222 CodeComplete->CodeCompleteModuleImport(UseLoc, Path);
1223 return true;
1224 }
1225
1226 Diag(Tok, diag::err_pp_module_expected_ident) << Path.empty();
1227 return true;
1228 }
1229
1230 // [cpp.pre]/p2:
1231 // No identifier in the pp-module-name or pp-module-partition shall
1232 // currently be defined as an object-like macro.
1233 if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo());
1234 MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1235 !AllowMacroExpansion) {
1236 Diag(Tok, diag::err_pp_module_name_is_macro)
1237 << IsPartition << Tok.getIdentifierInfo();
1238 Diag(MI->getDefinitionLoc(), diag::note_macro_here)
1239 << Tok.getIdentifierInfo();
1240 }
1241
1242 // Record this part of the module path.
1243 Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo());
1244 ConsumeToken();
1245
1246 if (Tok.isNot(tok::period))
1247 return false;
1248
1249 ConsumeToken();
1250 }
1251}
1252
1253bool Preprocessor::HandleModuleName(StringRef DirType, SourceLocation UseLoc,
1254 Token &Tok,
1256 SmallVectorImpl<Token> &DirToks,
1257 bool AllowMacroExpansion,
1258 bool IsPartition) {
1259 bool LeadingSpace = Tok.hasLeadingSpace();
1260 unsigned NumToksInDirective = DirToks.size();
1261 if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path, AllowMacroExpansion,
1262 IsPartition)) {
1263 if (Tok.isNot(tok::eod))
1264 CheckEndOfDirective(DirType,
1265 /*EnableMacros=*/false, &DirToks);
1267 return true;
1268 }
1269
1270 // Clean the module-name tokens and replace these tokens with
1271 // annot_module_name.
1272 DirToks.resize(NumToksInDirective);
1273 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path);
1274 DirToks.emplace_back();
1275 DirToks.back().setKind(tok::annot_module_name);
1276 DirToks.back().setAnnotationRange(NameLoc->getRange());
1277 DirToks.back().setAnnotationValue(static_cast<void *>(NameLoc));
1278 DirToks.back().setFlagValue(Token::LeadingSpace, LeadingSpace);
1279 DirToks.push_back(Tok);
1280 return false;
1281}
1282
1283/// [cpp.pre]/p2:
1284/// A preprocessing directive consists of a sequence of preprocessing tokens
1285/// that satisfies the following constraints: At the start of translation phase
1286/// 4, the first preprocessing token in the sequence, referred to as a
1287/// directive-introducing token, begins with the first character in the source
1288/// file (optionally after whitespace containing no new-line characters) or
1289/// follows whitespace containing at least one new-line character, and is:
1290/// - a # preprocessing token, or
1291/// - an import preprocessing token immediately followed on the same logical
1292/// source line by a header-name, <, identifier, or : preprocessing token, or
1293/// - a module preprocessing token immediately followed on the same logical
1294/// source line by an identifier, :, or ; preprocessing token, or
1295/// - an export preprocessing token immediately followed on the same logical
1296/// source line by one of the two preceding forms.
1297///
1298///
1299/// At the start of phase 4 an import or module token is treated as starting a
1300/// directive and are converted to their respective keywords iff:
1301/// - After skipping horizontal whitespace are
1302/// - at the start of a logical line, or
1303/// - preceded by an 'export' at the start of the logical line.
1304/// - Are followed by an identifier pp token (before macro expansion), or
1305/// - <, ", or : (but not ::) pp tokens for 'import', or
1306/// - ; for 'module'
1307/// Otherwise the token is treated as an identifier.
1309 if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
1310 return false;
1311
1312 if (Result.is(tok::kw_export)) {
1313 LastExportKeyword = Result;
1314 return false;
1315 }
1316
1317 /// Trait 'module' and 'import' as a identifier when the main file is a
1318 /// preprocessed module file. We only allow '__preprocessed_module' and
1319 /// '__preprocessed_import' in this context.
1320 IdentifierInfo *II = Result.getIdentifierInfo();
1322 (II->isStr(tok::getKeywordSpelling(tok::kw_import)) ||
1323 II->isStr(tok::getKeywordSpelling(tok::kw_module))))
1324 return false;
1325
1326 if (LastExportKeyword.is(tok::kw_export)) {
1327 // The export keyword was not at the start of line, it's not a
1328 // directive-introducing token.
1329 if (!LastExportKeyword.isAtPhysicalStartOfLine())
1330 return false;
1331 // [cpp.pre]/1.4
1332 // export // not a preprocessing directive
1333 // import foo; // preprocessing directive (ill-formed at phase7)
1334 if (Result.isAtPhysicalStartOfLine())
1335 return false;
1336 } else if (!Result.isAtPhysicalStartOfLine())
1337 return false;
1338
1339 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1340 CurPPLexer->ParsingPreprocessorDirective, true);
1341
1342 // The next token may be an angled string literal after import keyword.
1343 llvm::SaveAndRestore<bool> SavedParsingFilemame(
1344 CurPPLexer->ParsingFilename,
1345 Result.getIdentifierInfo()->isImportKeyword());
1346
1347 std::optional<Token> NextTok =
1348 CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken();
1349 if (!NextTok)
1350 return false;
1351
1352 if (NextTok->is(tok::raw_identifier))
1353 LookUpIdentifierInfo(*NextTok);
1354
1355 if (Result.getIdentifierInfo()->isImportKeyword()) {
1356 if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon,
1357 tok::header_name)) {
1358 Result.setKind(tok::kw_import);
1359 ModuleImportLoc = Result.getLocation();
1360 IsAtImport = false;
1361 return true;
1362 }
1363 }
1364
1365 if (Result.getIdentifierInfo()->isModuleKeyword() &&
1366 NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
1367 Result.setKind(tok::kw_module);
1368 ModuleDeclLoc = Result.getLocation();
1369 return true;
1370 }
1371
1372 // Ok, it's an identifier.
1373 return false;
1374}
1375
1377 SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1380 return false;
1381}
1382
1383/// Collect the tokens of a C++20 pp-import-suffix.
1385 bool StopUntilEOD) {
1386 while (true) {
1387 Toks.emplace_back();
1388 Lex(Toks.back());
1389
1390 switch (Toks.back().getKind()) {
1391 case tok::semi:
1392 if (!StopUntilEOD)
1393 return;
1394 [[fallthrough]];
1395 case tok::eod:
1396 case tok::eof:
1397 return;
1398 default:
1399 break;
1400 }
1401 }
1402}
1403
1404// Allocate a holding buffer for a sequence of tokens and introduce it into
1405// the token stream.
1407 if (Toks.empty())
1408 return;
1409 auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1410 std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1411 EnterTokenStream(std::move(ToksCopy), Toks.size(),
1412 /*DisableMacroExpansion*/ false, /*IsReinject*/ false);
1413 assert(CurTokenLexer && "Must have a TokenLexer");
1414 CurTokenLexer->setLexingCXXModuleDirective();
1415}
1416
1417/// Lex a token following the 'import' contextual keyword.
1418///
1419/// pp-import: [C++20]
1420/// import header-name pp-import-suffix[opt] ;
1421/// import header-name-tokens pp-import-suffix[opt] ;
1422/// [ObjC] @ import module-name ;
1423/// [Clang] import module-name ;
1424///
1425/// header-name-tokens:
1426/// string-literal
1427/// < [any sequence of preprocessing-tokens other than >] >
1428///
1429/// module-name:
1430/// module-name-qualifier[opt] identifier
1431///
1432/// module-name-qualifier
1433/// module-name-qualifier[opt] identifier .
1434///
1435/// We respond to a pp-import by importing macros from the named module.
1437 // Figure out what kind of lexer we actually have.
1439
1442 Lex(Result);
1443 if (LexModuleNameContinue(Result, ModuleImportLoc, Suffix, Path,
1444 /*AllowMacroExpansion=*/true,
1445 /*IsPartition=*/false))
1447
1448 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path);
1449 Suffix.clear();
1450 Suffix.emplace_back();
1451 Suffix.back().setKind(tok::annot_module_name);
1452 Suffix.back().setAnnotationRange(NameLoc->getRange());
1453 Suffix.back().setAnnotationValue(static_cast<void *>(NameLoc));
1454 Suffix.push_back(Result);
1455
1456 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1457 // at the semicolon already.
1458 SourceLocation SemiLoc = Result.getLocation();
1459 if (Suffix.back().isNot(tok::semi)) {
1460 if (Suffix.back().isNot(tok::eof))
1461 CollectPPImportSuffix(Suffix);
1462 if (Suffix.back().isNot(tok::semi)) {
1463 // This is not an import after all.
1465 return false;
1466 }
1467 SemiLoc = Suffix.back().getLocation();
1468 }
1469
1470 Module *Imported = nullptr;
1471 if (getLangOpts().Modules) {
1472 Imported = TheModuleLoader.loadModule(ModuleImportLoc, Path, Module::Hidden,
1473 /*IsInclusionDirective=*/false);
1474 if (Imported)
1475 makeModuleVisible(Imported, SemiLoc);
1476 }
1477
1478 if (Callbacks)
1479 Callbacks->moduleImport(ModuleImportLoc, Path, Imported);
1480
1481 if (!Suffix.empty()) {
1483 return false;
1484 }
1485 return true;
1486}
1487
1489 bool IncludeExports) {
1490 CurSubmoduleState->VisibleModules.setVisible(
1491 M, Loc, IncludeExports, [](Module *) {},
1492 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1493 // FIXME: Include the path in the diagnostic.
1494 // FIXME: Include the import location for the conflicting module.
1495 Diag(ModuleImportLoc, diag::warn_module_conflict)
1496 << Path[0]->getFullModuleName()
1497 << Conflict->getFullModuleName()
1498 << Message;
1499 });
1500
1501 // Add this module to the imports list of the currently-built submodule.
1502 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1503 BuildingSubmoduleStack.back().M->Imports.insert(M);
1504}
1505
1507 const char *DiagnosticTag,
1508 bool AllowMacroExpansion) {
1509 // We need at least one string literal.
1510 if (Result.isNot(tok::string_literal)) {
1511 Diag(Result, diag::err_expected_string_literal)
1512 << /*Source='in...'*/0 << DiagnosticTag;
1513 return false;
1514 }
1515
1516 // Lex string literal tokens, optionally with macro expansion.
1517 SmallVector<Token, 4> StrToks;
1518 do {
1519 StrToks.push_back(Result);
1520
1521 if (Result.hasUDSuffix())
1522 Diag(Result, diag::err_invalid_string_udl);
1523
1524 if (AllowMacroExpansion)
1525 Lex(Result);
1526 else
1528 } while (Result.is(tok::string_literal));
1529
1530 // Concatenate and parse the strings.
1531 StringLiteralParser Literal(StrToks, *this);
1532 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1533
1534 if (Literal.hadError)
1535 return false;
1536
1537 if (Literal.Pascal) {
1538 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1539 << /*Source='in...'*/0 << DiagnosticTag;
1540 return false;
1541 }
1542
1543 String = std::string(Literal.GetString());
1544 return true;
1545}
1546
1548 assert(Tok.is(tok::numeric_constant));
1549 SmallString<8> IntegerBuffer;
1550 bool NumberInvalid = false;
1551 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1552 if (NumberInvalid)
1553 return false;
1554 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1556 getDiagnostics());
1557 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1558 return false;
1559 llvm::APInt APVal(64, 0);
1560 if (Literal.GetIntegerValue(APVal))
1561 return false;
1562 Lex(Tok);
1563 Value = APVal.getLimitedValue();
1564 return true;
1565}
1566
1568 assert(Handler && "NULL comment handler");
1569 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1570 "Comment handler already registered");
1571 CommentHandlers.push_back(Handler);
1572}
1573
1575 std::vector<CommentHandler *>::iterator Pos =
1576 llvm::find(CommentHandlers, Handler);
1577 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1578 CommentHandlers.erase(Pos);
1579}
1580
1582 bool AnyPendingTokens = false;
1583 for (CommentHandler *H : CommentHandlers) {
1584 if (H->HandleComment(*this, Comment))
1585 AnyPendingTokens = true;
1586 }
1587 if (!AnyPendingTokens || getCommentRetentionState())
1588 return false;
1589 Lex(result);
1590 return true;
1591}
1592
1593void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1594 const MacroAnnotations &A =
1596 assert(A.DeprecationInfo &&
1597 "Macro deprecation warning without recorded annotation!");
1598 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1599 if (Info.Message.empty())
1600 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1601 << Identifier.getIdentifierInfo() << 0;
1602 else
1603 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1604 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1605 Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
1606}
1607
1608void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1609 const MacroAnnotations &A =
1611 assert(A.RestrictExpansionInfo &&
1612 "Macro restricted expansion warning without recorded annotation!");
1613 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1614 if (Info.Message.empty())
1615 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1616 << Identifier.getIdentifierInfo() << 0;
1617 else
1618 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1619 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1620 Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
1621}
1622
1623void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1624 unsigned DiagSelection) const {
1625 Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1626}
1627
1628void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1629 bool IsUndef) const {
1630 const MacroAnnotations &A =
1632 assert(A.FinalAnnotationLoc &&
1633 "Final macro warning without recorded annotation!");
1634
1635 Diag(Identifier, diag::warn_pragma_final_macro)
1636 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1637 Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
1638}
1639
1641 const SourceLocation &Loc) const {
1642 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1643 // region map:
1644 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1645 const SourceLocation &Loc) -> bool {
1646 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1647 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1648 Map, [&SourceMgr,
1649 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1650 return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1651 });
1652
1653 if (FirstRegionEndingAfterLoc != Map.end()) {
1654 // To test if the start location of the found region precedes `Loc`:
1655 return SourceMgr.isBeforeInTranslationUnit(
1656 FirstRegionEndingAfterLoc->first, Loc);
1657 }
1658 // If we do not find a region whose end location passes `Loc`, we want to
1659 // check if the current region is still open:
1660 if (!Map.empty() && Map.back().first == Map.back().second)
1661 return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
1662 return false;
1663 };
1664
1665 // What the following does:
1666 //
1667 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1668 // Otherwise, `Loc` is from a loaded AST. We look up the
1669 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1670 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1671 // region w.r.t. the region map. If the region map is absent, it means there
1672 // is no opt-out pragma in that loaded AST.
1673 //
1674 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1675 // one of them. That means if you put the pragmas around a `#include
1676 // "module.h"`, where module.h is a module, it is not actually suppressing
1677 // warnings in module.h. This is fine because warnings in module.h will be
1678 // reported when module.h is compiled in isolation and nothing in module.h
1679 // will be analyzed ever again. So you will not see warnings from the file
1680 // that imports module.h anyway. And you can't even do the same thing for PCHs
1681 // because they can only be included from the command line.
1682
1683 if (SourceMgr.isLocalSourceLocation(Loc))
1684 return TestInMap(SafeBufferOptOutMap, Loc);
1685
1686 const SafeBufferOptOutRegionsTy *LoadedRegions =
1687 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
1688
1689 if (LoadedRegions)
1690 return TestInMap(*LoadedRegions, Loc);
1691 return false;
1692}
1693
1695 bool isEnter, const SourceLocation &Loc) {
1696 if (isEnter) {
1698 return true; // invalid enter action
1699 InSafeBufferOptOutRegion = true;
1700 CurrentSafeBufferOptOutStart = Loc;
1701
1702 // To set the start location of a new region:
1703
1704 if (!SafeBufferOptOutMap.empty()) {
1705 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1706 assert(PrevRegion->first != PrevRegion->second &&
1707 "Shall not begin a safe buffer opt-out region before closing the "
1708 "previous one.");
1709 }
1710 // If the start location equals to the end location, we call the region a
1711 // open region or a unclosed region (i.e., end location has not been set
1712 // yet).
1713 SafeBufferOptOutMap.emplace_back(Loc, Loc);
1714 } else {
1716 return true; // invalid enter action
1717 InSafeBufferOptOutRegion = false;
1718
1719 // To set the end location of the current open region:
1720
1721 assert(!SafeBufferOptOutMap.empty() &&
1722 "Misordered safe buffer opt-out regions");
1723 auto *CurrRegion = &SafeBufferOptOutMap.back();
1724 assert(CurrRegion->first == CurrRegion->second &&
1725 "Set end location to a closed safe buffer opt-out region");
1726 CurrRegion->second = Loc;
1727 }
1728 return false;
1729}
1730
1732 return InSafeBufferOptOutRegion;
1733}
1735 StartLoc = CurrentSafeBufferOptOutStart;
1736 return InSafeBufferOptOutRegion;
1737}
1738
1741 assert(!InSafeBufferOptOutRegion &&
1742 "Attempt to serialize safe buffer opt-out regions before file being "
1743 "completely preprocessed");
1744
1746
1747 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1748 SrcSeq.push_back(begin);
1749 SrcSeq.push_back(end);
1750 }
1751 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1752 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1753 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1754 // It means that for each loading pch/module m, it just needs to load m's own
1755 // `SafeBufferOptOutMap`.
1756 return SrcSeq;
1757}
1758
1760 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1761 if (SourceLocations.size() == 0)
1762 return false;
1763
1764 assert(SourceLocations.size() % 2 == 0 &&
1765 "ill-formed SourceLocation sequence");
1766
1767 auto It = SourceLocations.begin();
1768 SafeBufferOptOutRegionsTy &Regions =
1769 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
1770
1771 do {
1772 SourceLocation Begin = *It++;
1773 SourceLocation End = *It++;
1774
1775 Regions.emplace_back(Begin, End);
1776 } while (It != SourceLocations.end());
1777 return true;
1778}
1779
1780ModuleLoader::~ModuleLoader() = default;
1781
1783
1785
1787
1789 if (Record)
1790 return;
1791
1792 Record = new PreprocessingRecord(getSourceManager());
1793 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1794}
1795
1796const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1797 if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) {
1798 const SmallVector<const char *> &FileCheckPoints = It->second;
1799 const char *Last = nullptr;
1800 // FIXME: Do better than a linear search.
1801 for (const char *P : FileCheckPoints) {
1802 if (P > Start)
1803 break;
1804 Last = P;
1805 }
1806 return Last;
1807 }
1808
1809 return nullptr;
1810}
1811
1813 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1814}
1815
1817 return SeenNoTrivialPPDirective;
1818}
1819
1820void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1821 if (InMainFile && !SeenNoTrivialPPDirective)
1822 SeenNoTrivialPPDirective = true;
1823}
1824
1826 FileID FID, LexedFileChangeReason Reason,
1827 SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1828 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1829}
1830
1832 const MacroDefinition &MD,
1833 SourceRange Range,
1834 const MacroArgs *Args) {
1835 // FIXME: Does only enable builtin macro expansion make sense?
1836 if (!MD.getMacroInfo()->isBuiltinMacro())
1837 setSeenNoTrivialPPDirective();
1838}
Defines enum values for all the target-independent builtin functions.
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::FileManager interface and associated types.
Token Tok
The Token.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Defines the PreprocessorLexer interface.
static bool MacroDefinitionEquals(const MacroInfo *MI, ArrayRef< TokenValue > Tokens)
Compares macro tokens with a specified token value sequence.
static constexpr unsigned CheckPointStepSize
Minimum distance between two check points, in tokens.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Abstract base class that describes a handler that will receive source ranges for each of the comments...
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:232
virtual void updateOutOfDateIdentifier(const IdentifierInfo &II)=0
Update an out-of-date identifier.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition FileEntry.h:302
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
bool isInvalid() const
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
Module * lookupModule(StringRef ModuleName, SourceLocation ImportLoc=SourceLocation(), bool AllowSearch=true, bool AllowExtraModuleMapSearch=false)
Lookup a module Search for a module with the given name.
Provides lookups to, and iteration over, IdentiferInfo objects.
One of these records is kept for each identifier that is lexed.
bool IsKeywordInCPlusPlus() const
Return true if this identifier would be a keyword in C++ mode.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
void setIsPoisoned(bool Value=true)
setIsPoisoned - Mark this identifier as poisoned.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isImportKeyword() const
Determine whether this is the contextual keyword import.
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
void setIsFutureCompatKeyword(bool Val)
StringRef getName() const
Return the actual identifier string.
bool isFutureCompatKeyword() const
is/setIsFutureCompatKeyword - Initialize information about whether or not this language token is a ke...
bool isExtensionToken() const
get/setExtension - Initialize information about whether or not this language token is an extension.
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:590
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition MacroInfo.h:606
SourceLocation getLocation() const
Definition MacroInfo.h:488
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:39
const_tokens_iterator tokens_begin() const
Definition MacroInfo.h:244
unsigned getNumTokens() const
Return the number of tokens that this macro expands to.
Definition MacroInfo.h:235
const Token & getReplacementToken(unsigned Tok) const
Definition MacroInfo.h:237
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition MacroInfo.h:217
bool isObjectLike() const
Definition MacroInfo.h:202
Abstract interface for a module loader.
virtual ~ModuleLoader()
static std::string getFlatNameFromPath(ModuleIdPath Path)
Represents a macro directive exported by a module.
Definition MacroInfo.h:514
static ModuleNameLoc * Create(Preprocessor &PP, ModuleIdPath Path)
SourceRange getRange() const
Describes a module or submodule.
Definition Module.h:246
@ Hidden
All of the names in this module are hidden.
Definition Module.h:548
llvm::SmallSetVector< Module *, 2 > Imports
The set of modules imported by this module, and on which this module depends.
Definition Module.h:561
void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
NumericLiteralParser - This performs strict semantic analysis of the content of a ppnumber,...
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
void setConditionalLevels(ArrayRef< PPConditionalInfo > CL)
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
std::string PCHThroughHeader
If non-empty, the filename used in an include directive in the primary source file (or command-line p...
bool GeneratePreamble
True indicates that a preamble is being generated.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool markIncluded(FileEntryRef File)
Mark the file as included.
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void EnterModuleSuffixTokenStream(ArrayRef< Token > Toks)
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool LexAfterModuleImport(Token &Result)
Lex a token following the 'import' contextual keyword.
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
friend class MacroArgs
void DumpMacro(const MacroInfo &MI) const
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void Lex(Token &Result)
Lex the next token for this preprocessor.
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
macro_iterator macro_end(bool IncludeExternalMacros=true) const
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
bool isPreprocessedModuleFile() const
Whether the main file is preprocessed module file.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros=false, SmallVectorImpl< Token > *ExtraToks=nullptr)
Ensure that the next token is a tok::eod token.
bool getCommentRetentionState() const
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
bool hadModuleLoaderFatalFailure() const
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
bool HandleModuleContextualKeyword(Token &Result)
Callback invoked when the lexer sees one of export, import or module token at the start of a line.
const TargetInfo & getTargetInfo() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a pragma hdrstop.
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
llvm::BumpPtrAllocator & getPreprocessorAllocator()
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
HeaderSearch & getHeaderSearchInfo() const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
ExternalPreprocessorSource * getExternalSource() const
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, SmallVectorImpl< Token > &Suffix, SmallVectorImpl< IdentifierLoc > &Path, bool AllowMacroExpansion, bool IsPartition)
const LangOptions & getLangOpts() const
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
void PoisonSEHIdentifiers(bool Poison=true)
size_t getTotalMemory() const
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool isNextPPTokenOneOf(Ts... Ks) const
isNextPPTokenOneOf - Check whether the next pp-token is one of the specificed token kind.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
void CollectPPImportSuffix(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Collect the tokens of a C++20 pp-import-suffix.
DiagnosticsEngine & getDiagnostics() const
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok, SmallVectorImpl< IdentifierLoc > &Path, SmallVectorImpl< Token > &DirToks, bool AllowMacroExpansion, bool IsPartition)
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the include of the through header or until after a pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
std::optional< StringRef > getBufferDataOrNone(FileID FID) const
Return a StringRef to the source buffer data for the specified FileID, returning std::nullopt if inva...
A trivial tuple used to represent a source range.
StringLiteralParser - This decodes string escape characters and performs wide string analysis and Tra...
Exposes information about the current target.
Definition TargetInfo.h:227
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition Token.h:324
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
unsigned getLength() const
Definition Token.h:145
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition Token.h:298
void setKind(tok::TokenKind K)
Definition Token.h:100
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:104
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition Token.h:286
bool isOneOf(Ts... Ks) const
Definition Token.h:105
@ DisableExpand
Definition Token.h:79
@ HasSeenNoTrivialPPDirective
Definition Token.h:92
@ IsReinjected
Definition Token.h:89
@ LeadingEmptyMacro
Definition Token.h:81
@ LeadingSpace
Definition Token.h:77
@ StartOfLine
Definition Token.h:75
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
Definition Lexer.cpp:75
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition Token.h:294
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition Token.h:317
bool isNot(tok::TokenKind K) const
Definition Token.h:111
void startToken()
Reset all flags to cleared.
Definition Token.h:187
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition Token.h:313
void setIdentifierInfo(IdentifierInfo *II)
Definition Token.h:206
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition Token.h:277
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition Token.h:223
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition Token.h:254
Defines the clang::TargetInfo interface.
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
const char * getKeywordSpelling(TokenKind Kind) LLVM_READNONE
Determines the spelling of simple keyword and contextual keyword tokens like 'int' and 'dynamic_cast'...
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
@ CPlusPlus20
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
std::pair< FileID, unsigned > FileIDAndOffset
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
bool isPreprocessedModuleFile(StringRef Source)
Scan an input source buffer, and check whether the input source is a preprocessed output.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
TranslationUnitKind
Describes the kind of translation unit being processed.
@ TU_Prefix
The translation unit is a prefix to a translation unit, and is not complete.
#define true
Definition stdbool.h:25