clang 23.0.0git
Preprocessor.cpp
Go to the documentation of this file.
1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
31#include "clang/Basic/LLVM.h"
33#include "clang/Basic/Module.h"
42#include "clang/Lex/Lexer.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
48#include "clang/Lex/Pragma.h"
53#include "clang/Lex/Token.h"
55#include "llvm/ADT/APInt.h"
56#include "llvm/ADT/ArrayRef.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/ScopeExit.h"
60#include "llvm/ADT/SmallVector.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/FormatVariadic.h"
65#include "llvm/Support/MemoryBuffer.h"
66#include "llvm/Support/MemoryBufferRef.h"
67#include "llvm/Support/SaveAndRestore.h"
68#include "llvm/Support/raw_ostream.h"
69#include <algorithm>
70#include <cassert>
71#include <memory>
72#include <optional>
73#include <string>
74#include <utility>
75#include <vector>
76
77using namespace clang;
78
79/// Minimum distance between two check points, in tokens.
80static constexpr unsigned CheckPointStepSize = 1024;
81
82LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
83
85
87 DiagnosticsEngine &diags, const LangOptions &opts,
88 SourceManager &SM, HeaderSearch &Headers,
89 ModuleLoader &TheModuleLoader,
90 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
92 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
93 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
94 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
95 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
96 // As the language options may have not been loaded yet (when
97 // deserializing an ASTUnit), adding keywords to the identifier table is
98 // deferred to Preprocessor::Initialize().
99 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
100 TUKind(TUKind), SkipMainFilePreamble(0, true),
101 CurSubmoduleState(&NullSubmoduleState) {
102 OwnsHeaderSearch = OwnsHeaders;
103
104 // Default to discarding comments.
105 KeepComments = false;
106 KeepMacroComments = false;
107 SuppressIncludeNotFoundError = false;
108
109 // Macro expansion is enabled.
110 DisableMacroExpansion = false;
111 MacroExpansionInDirectivesOverride = false;
112 InMacroArgs = false;
113 ArgMacro = nullptr;
114 InMacroArgPreExpansion = false;
115 NumCachedTokenLexers = 0;
116 PragmasEnabled = true;
117 ParsingIfOrElifDirective = false;
118 PreprocessedOutput = false;
119
120 // We haven't read anything from the external source.
121 ReadMacrosFromExternalSource = false;
122
123 LastExportKeyword.startToken();
124
125 BuiltinInfo = std::make_unique<Builtin::Context>();
126
127 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
128 // a macro. They get unpoisoned where it is allowed.
129 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
130 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
131 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
132 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
133
134 // Initialize the pragma handlers.
135 RegisterBuiltinPragmas();
136
137 // Initialize builtin macros like __LINE__ and friends.
138 RegisterBuiltinMacros();
139
140 if(LangOpts.Borland) {
141 Ident__exception_info = getIdentifierInfo("_exception_info");
142 Ident___exception_info = getIdentifierInfo("__exception_info");
143 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
144 Ident__exception_code = getIdentifierInfo("_exception_code");
145 Ident___exception_code = getIdentifierInfo("__exception_code");
146 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
147 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
148 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
149 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
150 } else {
151 Ident__exception_info = Ident__exception_code = nullptr;
152 Ident__abnormal_termination = Ident___exception_info = nullptr;
153 Ident___exception_code = Ident___abnormal_termination = nullptr;
154 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
155 Ident_AbnormalTermination = nullptr;
156 }
157
158 // Default incremental processing to -fincremental-extensions, clients can
159 // override with `enableIncrementalProcessing` if desired.
160 IncrementalProcessing = LangOpts.IncrementalExtensions;
161
162 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
164 SkippingUntilPragmaHdrStop = true;
165
166 // If using a PCH with a through header, start skipping tokens.
167 if (!this->PPOpts.PCHThroughHeader.empty() &&
168 !this->PPOpts.ImplicitPCHInclude.empty())
169 SkippingUntilPCHThroughHeader = true;
170
171 if (this->PPOpts.GeneratePreamble)
172 PreambleConditionalStack.startRecording();
173
174 MaxTokens = LangOpts.MaxTokens;
175}
176
178 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
179
180 IncludeMacroStack.clear();
181
182 // Free any cached macro expanders.
183 // This populates MacroArgCache, so all TokenLexers need to be destroyed
184 // before the code below that frees up the MacroArgCache list.
185 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
186 CurTokenLexer.reset();
187
188 // Free any cached MacroArgs.
189 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
190 ArgList = ArgList->deallocate();
191
192 // Delete the header search info, if we own it.
193 if (OwnsHeaderSearch)
194 delete &HeaderInfo;
195}
196
198 const TargetInfo *AuxTarget) {
199 assert((!this->Target || this->Target == &Target) &&
200 "Invalid override of target information");
201 this->Target = &Target;
202
203 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
204 "Invalid override of aux target information.");
205 this->AuxTarget = AuxTarget;
206
207 // Initialize information about built-ins.
208 BuiltinInfo->InitializeTarget(Target, AuxTarget);
209 HeaderInfo.setTarget(Target);
210
211 // Populate the identifier table with info about keywords for the current language.
212 Identifiers.AddKeywords(LangOpts);
213
214 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
215 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
216
217 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
218 // Use setting from TargetInfo.
219 setCurrentFPEvalMethod(SourceLocation(), Target.getFPEvalMethod());
220 else
221 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
222 setCurrentFPEvalMethod(SourceLocation(), getLangOpts().getFPEvalMethod());
223}
224
226 NumEnteredSourceFiles = 0;
227
228 // Reset pragmas
229 PragmaHandlersBackup = std::move(PragmaHandlers);
230 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
231 RegisterBuiltinPragmas();
232
233 // Reset PredefinesFileID
234 PredefinesFileID = FileID();
235}
236
238 NumEnteredSourceFiles = 1;
239
240 PragmaHandlers = std::move(PragmaHandlersBackup);
241}
242
243void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
244 std::string TokenStr;
245 llvm::raw_string_ostream OS(TokenStr);
246
247 // The alignment of 16 is chosen to comfortably fit most identifiers.
248 OS << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
249
250 // Annotation tokens are just markers that don't have a spelling -- they
251 // indicate where something expanded.
252 if (!Tok.isAnnotation()) {
253 OS << "'";
254 // Escape string to prevent token spelling from spanning multiple lines.
255 OS.write_escaped(getSpelling(Tok));
256 OS << "'";
257 }
258
259 // The alignment of 48 (32 characters for the spelling + the 16 for
260 // the identifier name) fits most variable names, keywords and annotations.
261 llvm::errs() << llvm::formatv("{0,-48} ", OS.str());
262
263 if (!DumpFlags) return;
264
265 auto Loc = Tok.getLocation();
266 llvm::errs() << "Loc=<";
267 DumpLocation(Loc);
268 llvm::errs() << ">";
269
270 // If the token points directly to a file location (i.e. not a macro
271 // expansion), then add additional padding so that trailing markers
272 // align, provided the line/column numbers are reasonably sized.
273 //
274 // Otherwise, if it's a macro expansion, don't bother with alignment,
275 // as the line will include multiple locations and be very long.
276 //
277 // NOTE: To keep this stateless, it doesn't account for filename
278 // length, so when a header starts markers will be temporarily misaligned.
279 if (Loc.isFileID()) {
280 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
281
282 if (!PLoc.isInvalid()) {
283 int LineWidth = llvm::utostr(PLoc.getLine()).size();
284 int ColumnWidth = llvm::utostr(PLoc.getColumn()).size();
285
286 // Reserve space for lines up to 9999 and columns up to 99,
287 // which is 4 + 2 = 6 characters in total.
288 const int ReservedSpace = 6;
289
290 int LeftSpace = ReservedSpace - LineWidth - ColumnWidth;
291 int Padding = std::max<int>(0, LeftSpace);
292
293 llvm::errs().indent(Padding);
294 }
295 }
296
297 if (Tok.isAtStartOfLine())
298 llvm::errs() << " [StartOfLine]";
299 if (Tok.hasLeadingSpace())
300 llvm::errs() << " [LeadingSpace]";
301 if (Tok.isExpandDisabled())
302 llvm::errs() << " [ExpandDisabled]";
303 if (Tok.needsCleaning()) {
304 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
305 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']";
306 }
307}
308
310 Loc.print(llvm::errs(), SourceMgr);
311}
312
313void Preprocessor::DumpMacro(const MacroInfo &MI) const {
314 llvm::errs() << "MACRO: ";
315 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
317 llvm::errs() << " ";
318 }
319 llvm::errs() << "\n";
320}
321
323 llvm::errs() << "\n*** Preprocessor Stats:\n";
324 llvm::errs() << NumDirectives << " directives found:\n";
325 llvm::errs() << " " << NumDefined << " #define.\n";
326 llvm::errs() << " " << NumUndefined << " #undef.\n";
327 llvm::errs() << " #include/#include_next/#import:\n";
328 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
329 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
330 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
331 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
332 llvm::errs() << " " << NumEndif << " #endif.\n";
333 llvm::errs() << " " << NumPragma << " #pragma.\n";
334 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
335
336 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
337 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
338 << NumFastMacroExpanded << " on the fast path.\n";
339 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
340 << " token paste (##) operations performed, "
341 << NumFastTokenPaste << " on the fast path.\n";
342
343 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
344
345 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
346 llvm::errs() << "\n Macro Expanded Tokens: "
347 << llvm::capacity_in_bytes(MacroExpandedTokens);
348 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
349 // FIXME: List information for all submodules.
350 llvm::errs() << "\n Macros: "
351 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
352 llvm::errs() << "\n #pragma push_macro Info: "
353 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
354 llvm::errs() << "\n Poison Reasons: "
355 << llvm::capacity_in_bytes(PoisonReasons);
356 llvm::errs() << "\n Comment Handlers: "
357 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
358}
359
360llvm::iterator_range<Preprocessor::macro_iterator>
361Preprocessor::macros(bool IncludeExternalMacros) const {
362 if (IncludeExternalMacros && ExternalSource &&
363 !ReadMacrosFromExternalSource) {
364 ReadMacrosFromExternalSource = true;
365 ExternalSource->ReadDefinedMacros();
366 }
367 // Make sure we cover all macros in visible modules.
368 for (const ModuleMacro &Macro : ModuleMacros)
369 CurSubmoduleState->Macros.try_emplace(Macro.II);
370
371 return CurSubmoduleState->Macros;
372}
373
375 return BP.getTotalMemory()
376 + llvm::capacity_in_bytes(MacroExpandedTokens)
377 + Predefines.capacity() /* Predefines buffer. */
378 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
379 // and ModuleMacros.
380 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
381 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
382 + llvm::capacity_in_bytes(PoisonReasons)
383 + llvm::capacity_in_bytes(CommentHandlers);
384}
385
386/// Compares macro tokens with a specified token value sequence.
387static bool MacroDefinitionEquals(const MacroInfo *MI,
388 ArrayRef<TokenValue> Tokens) {
389 return Tokens.size() == MI->getNumTokens() &&
390 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
391}
392
394 SourceLocation Loc,
395 ArrayRef<TokenValue> Tokens) const {
396 SourceLocation BestLocation;
397 StringRef BestSpelling;
398 for (const auto &M : macros()) {
399 const MacroDirective::DefInfo Def =
400 M.second.findDirectiveAtLoc(Loc, SourceMgr);
401 if (!Def || !Def.getMacroInfo())
402 continue;
403 if (!Def.getMacroInfo()->isObjectLike())
404 continue;
405 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
406 continue;
407 SourceLocation Location = Def.getLocation();
408 // Choose the macro defined latest.
409 if (BestLocation.isInvalid() ||
410 (Location.isValid() &&
411 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
412 BestLocation = Location;
413 BestSpelling = M.first->getName();
414 }
415 }
416 return BestSpelling;
417}
418
420 if (InCachingLexMode())
421 CurLexerCallback = CLK_CachingLexer;
422 else if (CurLexer)
423 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
424 ? CLK_DependencyDirectivesLexer
425 : CLK_Lexer;
426 else if (CurTokenLexer)
427 CurLexerCallback = CLK_TokenLexer;
428 else
429 CurLexerCallback = CLK_Lexer;
430}
431
433 unsigned CompleteLine,
434 unsigned CompleteColumn) {
435 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
436 assert(!CodeCompletionFile && "Already set");
437
438 // Load the actual file's contents.
439 std::optional<llvm::MemoryBufferRef> Buffer =
440 SourceMgr.getMemoryBufferForFileOrNone(File);
441 if (!Buffer)
442 return true;
443
444 // Find the byte position of the truncation point.
445 const char *Position = Buffer->getBufferStart();
446 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
447 for (; *Position; ++Position) {
448 if (*Position != '\r' && *Position != '\n')
449 continue;
450
451 // Eat \r\n or \n\r as a single line.
452 if ((Position[1] == '\r' || Position[1] == '\n') &&
453 Position[0] != Position[1])
454 ++Position;
455 ++Position;
456 break;
457 }
458 }
459
460 Position += CompleteColumn - 1;
461
462 // If pointing inside the preamble, adjust the position at the beginning of
463 // the file after the preamble.
464 if (SkipMainFilePreamble.first &&
465 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
466 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
467 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
468 }
469
470 if (Position > Buffer->getBufferEnd())
471 Position = Buffer->getBufferEnd();
472
473 CodeCompletionFile = File;
474 CodeCompletionOffset = Position - Buffer->getBufferStart();
475
476 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
477 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
478 char *NewBuf = NewBuffer->getBufferStart();
479 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
480 *NewPos = '\0';
481 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
482 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
483
484 return false;
485}
486
488 bool IsAngled) {
490 if (CodeComplete)
491 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
492}
493
496 if (CodeComplete)
497 CodeComplete->CodeCompleteNaturalLanguage();
498}
499
500/// getSpelling - This method is used to get the spelling of a token into a
501/// SmallVector. Note that the returned StringRef may not point to the
502/// supplied buffer if a copy can be avoided.
504 SmallVectorImpl<char> &Buffer,
505 bool *Invalid) const {
506 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
507 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
508 // Try the fast path.
509 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
510 return II->getName();
511 }
512
513 // Resize the buffer if we need to copy into it.
514 if (Tok.needsCleaning())
515 Buffer.resize(Tok.getLength());
516
517 const char *Ptr = Buffer.data();
518 unsigned Len = getSpelling(Tok, Ptr, Invalid);
519 return StringRef(Ptr, Len);
520}
521
522/// CreateString - Plop the specified string into a scratch buffer and return a
523/// location for it. If specified, the source location provides a source
524/// location for the token.
526 SourceLocation ExpansionLocStart,
527 SourceLocation ExpansionLocEnd) {
528 Tok.setLength(Str.size());
529
530 const char *DestPtr;
531 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
532
533 if (ExpansionLocStart.isValid())
534 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
535 ExpansionLocEnd, Str.size());
536 Tok.setLocation(Loc);
537
538 // If this is a raw identifier or a literal token, set the pointer data.
539 if (Tok.is(tok::raw_identifier))
540 Tok.setRawIdentifierData(DestPtr);
541 else if (Tok.isLiteral())
542 Tok.setLiteralData(DestPtr);
543}
544
546 auto &SM = getSourceManager();
547 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
548 FileIDAndOffset LocInfo = SM.getDecomposedLoc(SpellingLoc);
549 bool Invalid = false;
550 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
551 if (Invalid)
552 return SourceLocation();
553
554 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
555 const char *DestPtr;
556 SourceLocation Spelling =
557 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
558 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
559}
560
562 if (!getLangOpts().isCompilingModule())
563 return nullptr;
564
565 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
566}
567
569 if (!getLangOpts().isCompilingModuleImplementation())
570 return nullptr;
571
572 return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName);
573}
574
575//===----------------------------------------------------------------------===//
576// Preprocessor Initialization Methods
577//===----------------------------------------------------------------------===//
578
579/// EnterMainSourceFile - Enter the specified FileID as the main source file,
580/// which implicitly adds the builtin defines etc.
582 // We do not allow the preprocessor to reenter the main file. Doing so will
583 // cause FileID's to accumulate information from both runs (e.g. #line
584 // information) and predefined macros aren't guaranteed to be set properly.
585 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
586 FileID MainFileID = SourceMgr.getMainFileID();
587
588 // If MainFileID is loaded it means we loaded an AST file, no need to enter
589 // a main file.
590 if (!SourceMgr.isLoadedFileID(MainFileID)) {
591 // Enter the main file source buffer.
592 EnterSourceFile(MainFileID, nullptr, SourceLocation());
593
594 // If we've been asked to skip bytes in the main file (e.g., as part of a
595 // precompiled preamble), do so now.
596 if (SkipMainFilePreamble.first > 0)
597 CurLexer->SetByteOffset(SkipMainFilePreamble.first,
598 SkipMainFilePreamble.second);
599
600 // Tell the header info that the main file was entered. If the file is later
601 // #imported, it won't be re-entered.
602 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID))
603 markIncluded(*FE);
604
605 // Record the first PP token in the main file. This is used to generate
606 // better diagnostics for C++ modules.
607 //
608 // // This is a comment.
609 // #define FOO int // note: add 'module;' to the start of the file
610 // ^ FirstPPToken // to introduce a global module fragment.
611 //
612 // export module M; // error: module declaration must occur
613 // // at the start of the translation unit.
614 if (getLangOpts().CPlusPlusModules) {
615 std::optional<StringRef> Input =
617 if (!isPreprocessedModuleFile() && Input)
618 MainFileIsPreprocessedModuleFile =
620 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(*this);
621 DirTracer = Tracer.get();
622 addPPCallbacks(std::move(Tracer));
623 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
624 if (FirstPPTok)
625 FirstPPTokenLoc = FirstPPTok->getLocation();
626 }
627 }
628
629 // Preprocess Predefines to populate the initial preprocessor state.
630 std::unique_ptr<llvm::MemoryBuffer> SB =
631 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
632 assert(SB && "Cannot create predefined source buffer");
633 FileID FID = SourceMgr.createFileID(std::move(SB));
634 assert(FID.isValid() && "Could not create FileID for predefines?");
635 setPredefinesFileID(FID);
636
637 // Start parsing the predefines.
638 EnterSourceFile(FID, nullptr, SourceLocation());
639
640 if (!PPOpts.PCHThroughHeader.empty()) {
641 // Lookup and save the FileID for the through header. If it isn't found
642 // in the search path, it's a fatal error.
644 SourceLocation(), PPOpts.PCHThroughHeader,
645 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
646 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
647 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
648 /*IsFrameworkFound=*/nullptr);
649 if (!File) {
650 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
651 << PPOpts.PCHThroughHeader;
652 return;
653 }
654 setPCHThroughHeaderFileID(
655 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
656 }
657
658 // Skip tokens from the Predefines and if needed the main file.
659 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
660 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
662}
663
664void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
665 assert(PCHThroughHeaderFileID.isInvalid() &&
666 "PCHThroughHeaderFileID already set!");
667 PCHThroughHeaderFileID = FID;
668}
669
671 assert(PCHThroughHeaderFileID.isValid() &&
672 "Invalid PCH through header FileID");
673 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
674}
675
677 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
678 PCHThroughHeaderFileID.isValid();
679}
680
682 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
683 PCHThroughHeaderFileID.isValid();
684}
685
687 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
688}
689
691 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
692}
693
694/// Skip tokens until after the #include of the through header or
695/// until after a #pragma hdrstop is seen. Tokens in the predefines file
696/// and the main file may be skipped. If the end of the predefines file
697/// is reached, skipping continues into the main file. If the end of the
698/// main file is reached, it's a fatal error.
700 bool ReachedMainFileEOF = false;
701 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
702 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
703 Token Tok;
704 while (true) {
705 bool InPredefines =
706 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
707 CurLexerCallback(*this, Tok);
708 if (Tok.is(tok::eof) && !InPredefines) {
709 ReachedMainFileEOF = true;
710 break;
711 }
712 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
713 break;
714 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
715 break;
716 }
717 if (ReachedMainFileEOF) {
718 if (UsingPCHThroughHeader)
719 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
720 << PPOpts.PCHThroughHeader << 1;
721 else if (!PPOpts.PCHWithHdrStopCreate)
722 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
723 }
724}
725
726void Preprocessor::replayPreambleConditionalStack() {
727 // Restore the conditional stack from the preamble, if there is one.
728 if (PreambleConditionalStack.isReplaying()) {
729 assert(CurPPLexer &&
730 "CurPPLexer is null when calling replayPreambleConditionalStack.");
731 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
732 PreambleConditionalStack.doneReplaying();
733 if (PreambleConditionalStack.reachedEOFWhileSkipping())
734 SkipExcludedConditionalBlock(
735 PreambleConditionalStack.SkipInfo->HashTokenLoc,
736 PreambleConditionalStack.SkipInfo->IfTokenLoc,
737 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
738 PreambleConditionalStack.SkipInfo->FoundElse,
739 PreambleConditionalStack.SkipInfo->ElseLoc);
740 }
741}
742
744 // Notify the client that we reached the end of the source file.
745 if (Callbacks)
746 Callbacks->EndOfMainFile();
747}
748
749//===----------------------------------------------------------------------===//
750// Lexer Event Handling.
751//===----------------------------------------------------------------------===//
752
753/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
754/// identifier information for the token and install it into the token,
755/// updating the token kind accordingly.
757 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
758
759 // Look up this token, see if it is a macro, or if it is a language keyword.
760 IdentifierInfo *II;
761 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
762 // No cleaning needed, just use the characters from the lexed buffer.
763 II = getIdentifierInfo(Identifier.getRawIdentifier());
764 } else {
765 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
766 SmallString<64> IdentifierBuffer;
767 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
768
769 if (Identifier.hasUCN()) {
770 SmallString<64> UCNIdentifierBuffer;
771 expandUCNs(UCNIdentifierBuffer, CleanedStr);
772 II = getIdentifierInfo(UCNIdentifierBuffer);
773 } else {
774 II = getIdentifierInfo(CleanedStr);
775 }
776 }
777
778 // Update the token info (identifier info and appropriate token kind).
779 // FIXME: the raw_identifier may contain leading whitespace which is removed
780 // from the cleaned identifier token. The SourceLocation should be updated to
781 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
782 // line continuation before 'B') is parsed as a single tok::raw_identifier and
783 // is cleaned to tok::identifier "B". After cleaning the token's length is
784 // still 3 and the SourceLocation refers to the location of the backslash.
785 Identifier.setIdentifierInfo(II);
786 Identifier.setKind(II->getTokenID());
787
788 return II;
789}
790
792 PoisonReasons[II] = DiagID;
793}
794
796 assert(Ident__exception_code && Ident__exception_info);
797 assert(Ident___exception_code && Ident___exception_info);
798 Ident__exception_code->setIsPoisoned(Poison);
799 Ident___exception_code->setIsPoisoned(Poison);
800 Ident_GetExceptionCode->setIsPoisoned(Poison);
801 Ident__exception_info->setIsPoisoned(Poison);
802 Ident___exception_info->setIsPoisoned(Poison);
803 Ident_GetExceptionInfo->setIsPoisoned(Poison);
804 Ident__abnormal_termination->setIsPoisoned(Poison);
805 Ident___abnormal_termination->setIsPoisoned(Poison);
806 Ident_AbnormalTermination->setIsPoisoned(Poison);
807}
808
810 assert(Identifier.getIdentifierInfo() &&
811 "Can't handle identifiers without identifier info!");
812 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
813 PoisonReasons.find(Identifier.getIdentifierInfo());
814 if(it == PoisonReasons.end())
815 Diag(Identifier, diag::err_pp_used_poisoned_id);
816 else
817 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
818}
819
820void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
821 assert(II.isOutOfDate() && "not out of date");
822 assert(getExternalSource() &&
823 "getExternalSource() should not return nullptr");
825}
826
827/// HandleIdentifier - This callback is invoked when the lexer reads an
828/// identifier. This callback looks up the identifier in the map and/or
829/// potentially macro expands it or turns it into a named token (like 'for').
830///
831/// Note that callers of this method are guarded by checking the
832/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
833/// IdentifierInfo methods that compute these properties will need to change to
834/// match.
836 assert(Identifier.getIdentifierInfo() &&
837 "Can't handle identifiers without identifier info!");
838
839 IdentifierInfo &II = *Identifier.getIdentifierInfo();
840
841 // If the information about this identifier is out of date, update it from
842 // the external source.
843 // We have to treat __VA_ARGS__ in a special way, since it gets
844 // serialized with isPoisoned = true, but our preprocessor may have
845 // unpoisoned it if we're defining a C99 macro.
846 if (II.isOutOfDate()) {
847 bool CurrentIsPoisoned = false;
848 const bool IsSpecialVariadicMacro =
849 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
850 if (IsSpecialVariadicMacro)
851 CurrentIsPoisoned = II.isPoisoned();
852
853 updateOutOfDateIdentifier(II);
854 Identifier.setKind(II.getTokenID());
855
856 if (IsSpecialVariadicMacro)
857 II.setIsPoisoned(CurrentIsPoisoned);
858 }
859
860 // If this identifier was poisoned, and if it was not produced from a macro
861 // expansion, emit an error.
862 if (II.isPoisoned() && CurPPLexer) {
863 HandlePoisonedIdentifier(Identifier);
864 }
865
866 // If this is a macro to be expanded, do it.
867 if (const MacroDefinition MD = getMacroDefinition(&II)) {
868 const auto *MI = MD.getMacroInfo();
869 assert(MI && "macro definition with no macro info?");
870 if (!DisableMacroExpansion) {
871 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
872 // C99 6.10.3p10: If the preprocessing token immediately after the
873 // macro name isn't a '(', this macro should not be expanded.
874 if (!MI->isFunctionLike() || isNextPPTokenOneOf(tok::l_paren))
875 return HandleMacroExpandedIdentifier(Identifier, MD);
876 } else {
877 // C99 6.10.3.4p2 says that a disabled macro may never again be
878 // expanded, even if it's in a context where it could be expanded in the
879 // future.
880 Identifier.setFlag(Token::DisableExpand);
881 if (MI->isObjectLike() || isNextPPTokenOneOf(tok::l_paren))
882 Diag(Identifier, diag::pp_disabled_macro_expansion);
883 }
884 }
885 }
886
887 // If this identifier is a keyword in a newer Standard or proposed Standard,
888 // produce a warning. Don't warn if we're not considering macro expansion,
889 // since this identifier might be the name of a macro.
890 // FIXME: This warning is disabled in cases where it shouldn't be, like
891 // "#define constexpr constexpr", "int constexpr;"
892 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
893 Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts()))
894 << II.getName();
895 // Don't diagnose this keyword again in this translation unit.
896 II.setIsFutureCompatKeyword(false);
897 }
898
899 // If this identifier would be a keyword in C++, diagnose as a compatibility
900 // issue.
901 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
902 Diag(Identifier, diag::warn_pp_identifier_is_cpp_keyword) << &II;
903
904 // If this is an extension token, diagnose its use.
905 // We avoid diagnosing tokens that originate from macro definitions.
906 // FIXME: This warning is disabled in cases where it shouldn't be,
907 // like "#define TY typeof", "TY(1) x".
908 if (II.isExtensionToken() && !DisableMacroExpansion)
909 Diag(Identifier, diag::ext_token_used);
910
911 // Handle module contextual keywords.
912 if (getLangOpts().CPlusPlusModules && CurLexer &&
913 !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
914 !CurLexer->ParsingPreprocessorDirective &&
915 Identifier.isModuleContextualKeyword() &&
916 HandleModuleContextualKeyword(Identifier)) {
917 HandleDirective(Identifier);
918 // With a fatal failure in the module loader, we abort parsing.
920 }
921
922 return true;
923}
924
926 ++LexLevel;
927
928 // We loop here until a lex function returns a token; this avoids recursion.
929 while (!CurLexerCallback(*this, Result))
930 ;
931
932 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
933 return;
934
935 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
936 // Remember the identifier before code completion token.
937 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
938 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
939 // Set IdenfitierInfo to null to avoid confusing code that handles both
940 // identifiers and completion tokens.
941 Result.setIdentifierInfo(nullptr);
942 }
943
944 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
945 // if this token is being produced as a result of phase 4 of translation.
946 // Update TrackGMFState to decide if we are currently in a Global Module
947 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
948 // depends on the prevailing StdCXXImportSeq state in two cases.
949 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
950 !Result.getFlag(Token::IsReinjected)) {
951 switch (Result.getKind()) {
952 case tok::l_paren: case tok::l_square: case tok::l_brace:
953 StdCXXImportSeqState.handleOpenBracket();
954 break;
955 case tok::r_paren: case tok::r_square:
956 StdCXXImportSeqState.handleCloseBracket();
957 break;
958 case tok::r_brace:
959 StdCXXImportSeqState.handleCloseBrace();
960 break;
961#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
962// For `#pragma ...` mimic ';'.
963#include "clang/Basic/TokenKinds.def"
964#undef PRAGMA_ANNOTATION
965 // This token is injected to represent the translation of '#include "a.h"'
966 // into "import a.h;". Mimic the notional ';'.
967 case tok::annot_module_include:
968 case tok::annot_repl_input_end:
969 case tok::semi:
970 TrackGMFState.handleSemi();
971 StdCXXImportSeqState.handleSemi();
972 ModuleDeclState.handleSemi();
973 break;
974 case tok::header_name:
975 case tok::annot_header_unit:
976 StdCXXImportSeqState.handleHeaderName();
977 break;
978 case tok::kw_export:
981 TrackGMFState.handleExport();
982 StdCXXImportSeqState.handleExport();
983 ModuleDeclState.handleExport();
984 break;
985 case tok::colon:
986 ModuleDeclState.handleColon();
987 break;
988 case tok::kw_import:
989 if (StdCXXImportSeqState.atTopLevel()) {
990 TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
991 StdCXXImportSeqState.handleImport();
992 }
993 break;
994 case tok::kw_module:
995 if (StdCXXImportSeqState.atTopLevel()) {
998 TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
999 ModuleDeclState.handleModule();
1000 }
1001 break;
1002 case tok::annot_module_name:
1003 ModuleDeclState.handleModuleName(
1004 static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
1005 if (ModuleDeclState.isModuleCandidate())
1006 break;
1007 [[fallthrough]];
1008 default:
1009 TrackGMFState.handleMisc();
1010 StdCXXImportSeqState.handleMisc();
1011 ModuleDeclState.handleMisc();
1012 break;
1013 }
1014 }
1015
1016 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
1017 CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr);
1018 CheckPointCounter = 0;
1019 }
1020
1021 if (Result.isNot(tok::kw_export))
1022 LastExportKeyword.startToken();
1023
1024 --LexLevel;
1025
1026 // Destroy any lexers that were deferred while we were in nested Lex calls.
1027 // This must happen after decrementing LexLevel but before any other
1028 // processing that might re-enter Lex.
1029 if (LexLevel == 0 && !PendingDestroyLexers.empty())
1030 PendingDestroyLexers.clear();
1031
1032 if ((LexLevel == 0 || PreprocessToken) &&
1033 !Result.getFlag(Token::IsReinjected)) {
1034 if (LexLevel == 0)
1035 ++TokenCount;
1036 if (OnToken)
1037 OnToken(Result);
1038 }
1039}
1040
1041void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1042 while (1) {
1043 Token Tok;
1044 Lex(Tok);
1045 if (Tok.isOneOf(tok::unknown, tok::eof, tok::eod,
1046 tok::annot_repl_input_end))
1047 break;
1048 if (Tokens != nullptr)
1049 Tokens->push_back(Tok);
1050 }
1051}
1052
1053/// Lex a header-name token (including one formed from header-name-tokens if
1054/// \p AllowMacroExpansion is \c true).
1055///
1056/// \param FilenameTok Filled in with the next token. On success, this will
1057/// be either a header_name token. On failure, it will be whatever other
1058/// token was found instead.
1059/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1060/// by macro expansion (concatenating tokens as necessary if the first
1061/// token is a '<').
1062/// \return \c true if we reached EOD or EOF while looking for a > token in
1063/// a concatenated header name and diagnosed it. \c false otherwise.
1064bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1065 // Lex using header-name tokenization rules if tokens are being lexed from
1066 // a file. Just grab a token normally if we're in a macro expansion.
1067 if (CurPPLexer) {
1068 // Avoid nested header-name lexing when macro expansion recurses
1069 // __has_include(__has_include))
1070 if (CurPPLexer->ParsingFilename)
1071 LexUnexpandedToken(FilenameTok);
1072 else
1073 CurPPLexer->LexIncludeFilename(FilenameTok);
1074 } else {
1075 Lex(FilenameTok);
1076 }
1077
1078 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1079 // case, glue the tokens together into an angle_string_literal token.
1080 SmallString<128> FilenameBuffer;
1081 if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
1082 bool StartOfLine = FilenameTok.isAtStartOfLine();
1083 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1084 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1085
1086 SourceLocation Start = FilenameTok.getLocation();
1087 SourceLocation End;
1088 FilenameBuffer.push_back('<');
1089
1090 // Consume tokens until we find a '>'.
1091 // FIXME: A header-name could be formed starting or ending with an
1092 // alternative token. It's not clear whether that's ill-formed in all
1093 // cases.
1094 while (FilenameTok.isNot(tok::greater)) {
1095 Lex(FilenameTok);
1096 if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1097 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1098 Diag(Start, diag::note_matching) << tok::less;
1099 return true;
1100 }
1101
1102 End = FilenameTok.getLocation();
1103
1104 // FIXME: Provide code completion for #includes.
1105 if (FilenameTok.is(tok::code_completion)) {
1107 Lex(FilenameTok);
1108 continue;
1109 }
1110
1111 // Append the spelling of this token to the buffer. If there was a space
1112 // before it, add it now.
1113 if (FilenameTok.hasLeadingSpace())
1114 FilenameBuffer.push_back(' ');
1115
1116 // Get the spelling of the token, directly into FilenameBuffer if
1117 // possible.
1118 size_t PreAppendSize = FilenameBuffer.size();
1119 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1120
1121 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1122 unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1123
1124 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1125 if (BufPtr != &FilenameBuffer[PreAppendSize])
1126 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1127
1128 // Resize FilenameBuffer to the correct size.
1129 if (FilenameTok.getLength() != ActualLen)
1130 FilenameBuffer.resize(PreAppendSize + ActualLen);
1131 }
1132
1133 FilenameTok.startToken();
1134 FilenameTok.setKind(tok::header_name);
1135 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1136 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1137 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1138 CreateString(FilenameBuffer, FilenameTok, Start, End);
1139 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1140 // Convert a string-literal token of the form " h-char-sequence "
1141 // (produced by macro expansion) into a header-name token.
1142 //
1143 // The rules for header-names don't quite match the rules for
1144 // string-literals, but all the places where they differ result in
1145 // undefined behavior, so we can and do treat them the same.
1146 //
1147 // A string-literal with a prefix or suffix is not translated into a
1148 // header-name. This could theoretically be observable via the C++20
1149 // context-sensitive header-name formation rules.
1150 StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1151 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1152 FilenameTok.setKind(tok::header_name);
1153 }
1154
1155 return false;
1156}
1157
1158std::optional<Token> Preprocessor::peekNextPPToken() const {
1159 // Do some quick tests for rejection cases.
1160 std::optional<Token> Val;
1161 if (CurLexer)
1162 Val = CurLexer->peekNextPPToken();
1163 else
1164 Val = CurTokenLexer->peekNextPPToken();
1165
1166 if (!Val) {
1167 // We have run off the end. If it's a source file we don't
1168 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1169 // macro stack.
1170 if (CurPPLexer)
1171 return std::nullopt;
1172 for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
1173 if (Entry.TheLexer)
1174 Val = Entry.TheLexer->peekNextPPToken();
1175 else
1176 Val = Entry.TheTokenLexer->peekNextPPToken();
1177
1178 if (Val)
1179 break;
1180
1181 // Ran off the end of a source file?
1182 if (Entry.ThePPLexer)
1183 return std::nullopt;
1184 }
1185 }
1186
1187 // Okay, we found the token and return. Otherwise we found the end of the
1188 // translation unit.
1189 return Val;
1190}
1191
1192// We represent the primary and partition names as 'Paths' which are sections
1193// of the hierarchical access path for a clang module. However for C++20
1194// the periods in a name are just another character, and we will need to
1195// flatten them into a string.
1197 std::string Name;
1198 if (Path.empty())
1199 return Name;
1200
1201 for (auto &Piece : Path) {
1202 assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1203 if (!Name.empty())
1204 Name += ".";
1205 Name += Piece.getIdentifierInfo()->getName();
1206 }
1207 return Name;
1208}
1209
1211 assert(!Path.empty() && "expect at least one identifier in a module name");
1212 void *Mem = PP.getPreprocessorAllocator().Allocate(
1213 totalSizeToAlloc<IdentifierLoc>(Path.size()), alignof(ModuleNameLoc));
1214 return new (Mem) ModuleNameLoc(Path);
1215}
1216
1218 SmallVectorImpl<Token> &Suffix,
1220 bool AllowMacroExpansion,
1221 bool IsPartition) {
1222 auto ConsumeToken = [&]() {
1223 if (AllowMacroExpansion)
1224 Lex(Tok);
1225 else
1227 Suffix.push_back(Tok);
1228 };
1229
1230 while (true) {
1231 if (Tok.isNot(tok::identifier)) {
1232 if (Tok.is(tok::code_completion)) {
1233 CurLexer->cutOffLexing();
1234 CodeComplete->CodeCompleteModuleImport(UseLoc, Path);
1235 return true;
1236 }
1237
1238 Diag(Tok, diag::err_pp_module_expected_ident) << Path.empty();
1239 return true;
1240 }
1241
1242 // [cpp.pre]/p2:
1243 // No identifier in the pp-module-name or pp-module-partition shall
1244 // currently be defined as an object-like macro.
1245 if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo());
1246 MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1247 !AllowMacroExpansion) {
1248 Diag(Tok, diag::err_pp_module_name_is_macro)
1249 << IsPartition << Tok.getIdentifierInfo();
1250 Diag(MI->getDefinitionLoc(), diag::note_macro_here)
1251 << Tok.getIdentifierInfo();
1252 }
1253
1254 // Record this part of the module path.
1255 Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo());
1256 ConsumeToken();
1257
1258 if (Tok.isNot(tok::period))
1259 return false;
1260
1261 ConsumeToken();
1262 }
1263}
1264
1265bool Preprocessor::HandleModuleName(StringRef DirType, SourceLocation UseLoc,
1266 Token &Tok,
1268 SmallVectorImpl<Token> &DirToks,
1269 bool AllowMacroExpansion,
1270 bool IsPartition) {
1271 bool LeadingSpace = Tok.hasLeadingSpace();
1272 unsigned NumToksInDirective = DirToks.size();
1273 if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path, AllowMacroExpansion,
1274 IsPartition)) {
1275 if (Tok.isNot(tok::eod))
1276 CheckEndOfDirective(DirType,
1277 /*EnableMacros=*/false, &DirToks);
1279 return true;
1280 }
1281
1282 // Clean the module-name tokens and replace these tokens with
1283 // annot_module_name.
1284 DirToks.resize(NumToksInDirective);
1285 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path);
1286 DirToks.emplace_back();
1287 DirToks.back().setKind(tok::annot_module_name);
1288 DirToks.back().setAnnotationRange(NameLoc->getRange());
1289 DirToks.back().setAnnotationValue(static_cast<void *>(NameLoc));
1290 DirToks.back().setFlagValue(Token::LeadingSpace, LeadingSpace);
1291 DirToks.push_back(Tok);
1292 return false;
1293}
1294
1295/// [cpp.pre]/p2:
1296/// A preprocessing directive consists of a sequence of preprocessing tokens
1297/// that satisfies the following constraints: At the start of translation phase
1298/// 4, the first preprocessing token in the sequence, referred to as a
1299/// directive-introducing token, begins with the first character in the source
1300/// file (optionally after whitespace containing no new-line characters) or
1301/// follows whitespace containing at least one new-line character, and is:
1302/// - a # preprocessing token, or
1303/// - an import preprocessing token immediately followed on the same logical
1304/// source line by a header-name, <, identifier, or : preprocessing token, or
1305/// - a module preprocessing token immediately followed on the same logical
1306/// source line by an identifier, :, or ; preprocessing token, or
1307/// - an export preprocessing token immediately followed on the same logical
1308/// source line by one of the two preceding forms.
1309///
1310///
1311/// At the start of phase 4 an import or module token is treated as starting a
1312/// directive and are converted to their respective keywords iff:
1313/// - After skipping horizontal whitespace are
1314/// - at the start of a logical line, or
1315/// - preceded by an 'export' at the start of the logical line.
1316/// - Are followed by an identifier pp token (before macro expansion), or
1317/// - <, ", or : (but not ::) pp tokens for 'import', or
1318/// - ; for 'module'
1319/// Otherwise the token is treated as an identifier.
1321 if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
1322 return false;
1323
1324 if (Result.is(tok::kw_export)) {
1325 LastExportKeyword = Result;
1326 return false;
1327 }
1328
1329 /// Trait 'module' and 'import' as a identifier when the main file is a
1330 /// preprocessed module file. We only allow '__preprocessed_module' and
1331 /// '__preprocessed_import' in this context.
1332 IdentifierInfo *II = Result.getIdentifierInfo();
1334 (II->isStr(tok::getKeywordSpelling(tok::kw_import)) ||
1335 II->isStr(tok::getKeywordSpelling(tok::kw_module))))
1336 return false;
1337
1338 if (LastExportKeyword.is(tok::kw_export)) {
1339 // The export keyword was not at the start of line, it's not a
1340 // directive-introducing token.
1341 if (!LastExportKeyword.isAtPhysicalStartOfLine())
1342 return false;
1343 // [cpp.pre]/1.4
1344 // export // not a preprocessing directive
1345 // import foo; // preprocessing directive (ill-formed at phase7)
1346 if (Result.isAtPhysicalStartOfLine())
1347 return false;
1348 } else if (!Result.isAtPhysicalStartOfLine())
1349 return false;
1350
1351 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1352 CurPPLexer->ParsingPreprocessorDirective, true);
1353
1354 // The next token may be an angled string literal after import keyword.
1355 llvm::SaveAndRestore<bool> SavedParsingFilemame(
1356 CurPPLexer->ParsingFilename,
1357 Result.getIdentifierInfo()->isImportKeyword());
1358
1359 std::optional<Token> NextTok = peekNextPPToken();
1360 if (!NextTok)
1361 return false;
1362
1363 if (NextTok->is(tok::raw_identifier))
1364 LookUpIdentifierInfo(*NextTok);
1365
1366 if (Result.getIdentifierInfo()->isImportKeyword()) {
1367 if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon,
1368 tok::header_name)) {
1369 Result.setKind(tok::kw_import);
1370 ModuleImportLoc = Result.getLocation();
1371 return true;
1372 }
1373 }
1374
1375 if (Result.getIdentifierInfo()->isModuleKeyword() &&
1376 NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
1377 Result.setKind(tok::kw_module);
1378 ModuleDeclLoc = Result.getLocation();
1379 return true;
1380 }
1381
1382 // Ok, it's an identifier.
1383 return false;
1384}
1385
1387 SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1390 return false;
1391}
1392
1393/// Collect the tokens of a C++20 pp-import-suffix.
1395 bool StopUntilEOD) {
1396 while (true) {
1397 Toks.emplace_back();
1398 Lex(Toks.back());
1399
1400 switch (Toks.back().getKind()) {
1401 case tok::semi:
1402 if (!StopUntilEOD)
1403 return;
1404 [[fallthrough]];
1405 case tok::eod:
1406 case tok::eof:
1407 return;
1408 default:
1409 break;
1410 }
1411 }
1412}
1413
1414// Allocate a holding buffer for a sequence of tokens and introduce it into
1415// the token stream.
1417 if (Toks.empty())
1418 return;
1419 auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1420 std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1421 EnterTokenStream(std::move(ToksCopy), Toks.size(),
1422 /*DisableMacroExpansion*/ false, /*IsReinject*/ false);
1423 assert(CurTokenLexer && "Must have a TokenLexer");
1424 CurTokenLexer->setLexingCXXModuleDirective();
1425}
1426
1428 bool IncludeExports) {
1429 CurSubmoduleState->VisibleModules.setVisible(
1430 M, Loc, IncludeExports, [](Module *) {},
1431 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1432 // FIXME: Include the path in the diagnostic.
1433 // FIXME: Include the import location for the conflicting module.
1434 Diag(ModuleImportLoc, diag::warn_module_conflict)
1435 << Path[0]->getFullModuleName()
1436 << Conflict->getFullModuleName()
1437 << Message;
1438 });
1439
1440 // Add this module to the imports list of the currently-built submodule.
1441 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1442 BuildingSubmoduleStack.back().M->Imports.push_back(M);
1443}
1444
1446 const char *DiagnosticTag,
1447 bool AllowMacroExpansion) {
1448 // We need at least one string literal.
1449 if (Result.isNot(tok::string_literal)) {
1450 Diag(Result, diag::err_expected_string_literal)
1451 << /*Source='in...'*/0 << DiagnosticTag;
1452 return false;
1453 }
1454
1455 // Lex string literal tokens, optionally with macro expansion.
1456 SmallVector<Token, 4> StrToks;
1457 do {
1458 StrToks.push_back(Result);
1459
1460 if (Result.hasUDSuffix())
1461 Diag(Result, diag::err_invalid_string_udl);
1462
1463 if (AllowMacroExpansion)
1464 Lex(Result);
1465 else
1467 } while (Result.is(tok::string_literal));
1468
1469 // Concatenate and parse the strings.
1470 StringLiteralParser Literal(StrToks, *this);
1471 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1472
1473 if (Literal.hadError)
1474 return false;
1475
1476 if (Literal.Pascal) {
1477 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1478 << /*Source='in...'*/0 << DiagnosticTag;
1479 return false;
1480 }
1481
1482 String = std::string(Literal.GetString());
1483 return true;
1484}
1485
1487 assert(Tok.is(tok::numeric_constant));
1488 SmallString<8> IntegerBuffer;
1489 bool NumberInvalid = false;
1490 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1491 if (NumberInvalid)
1492 return false;
1493 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1495 getDiagnostics());
1496 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1497 return false;
1498 llvm::APInt APVal(64, 0);
1499 if (Literal.GetIntegerValue(APVal))
1500 return false;
1501 Lex(Tok);
1502 Value = APVal.getLimitedValue();
1503 return true;
1504}
1505
1507 assert(Handler && "NULL comment handler");
1508 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1509 "Comment handler already registered");
1510 CommentHandlers.push_back(Handler);
1511}
1512
1514 std::vector<CommentHandler *>::iterator Pos =
1515 llvm::find(CommentHandlers, Handler);
1516 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1517 CommentHandlers.erase(Pos);
1518}
1519
1521 bool AnyPendingTokens = false;
1522 for (CommentHandler *H : CommentHandlers) {
1523 if (H->HandleComment(*this, Comment))
1524 AnyPendingTokens = true;
1525 }
1526 if (!AnyPendingTokens || getCommentRetentionState())
1527 return false;
1528 Lex(result);
1529 return true;
1530}
1531
1532void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1533 const MacroAnnotations &A =
1535 assert(A.DeprecationInfo &&
1536 "Macro deprecation warning without recorded annotation!");
1537 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1538 if (Info.Message.empty())
1539 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1540 << Identifier.getIdentifierInfo() << 0;
1541 else
1542 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1543 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1544 Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
1545}
1546
1547void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1548 const MacroAnnotations &A =
1550 assert(A.RestrictExpansionInfo &&
1551 "Macro restricted expansion warning without recorded annotation!");
1552 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1553 if (Info.Message.empty())
1554 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1555 << Identifier.getIdentifierInfo() << 0;
1556 else
1557 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1558 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1559 Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
1560}
1561
1562void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1563 unsigned DiagSelection) const {
1564 Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1565}
1566
1567void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1568 bool IsUndef) const {
1569 const MacroAnnotations &A =
1571 assert(A.FinalAnnotationLoc &&
1572 "Final macro warning without recorded annotation!");
1573
1574 Diag(Identifier, diag::warn_pragma_final_macro)
1575 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1576 Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
1577}
1578
1580 const SourceLocation &Loc) const {
1581 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1582 // region map:
1583 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1584 const SourceLocation &Loc) -> bool {
1585 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1586 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1587 Map, [&SourceMgr,
1588 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1589 return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1590 });
1591
1592 if (FirstRegionEndingAfterLoc != Map.end()) {
1593 // To test if the start location of the found region precedes `Loc`:
1594 return SourceMgr.isBeforeInTranslationUnit(
1595 FirstRegionEndingAfterLoc->first, Loc);
1596 }
1597 // If we do not find a region whose end location passes `Loc`, we want to
1598 // check if the current region is still open:
1599 if (!Map.empty() && Map.back().first == Map.back().second)
1600 return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
1601 return false;
1602 };
1603
1604 // What the following does:
1605 //
1606 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1607 // Otherwise, `Loc` is from a loaded AST. We look up the
1608 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1609 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1610 // region w.r.t. the region map. If the region map is absent, it means there
1611 // is no opt-out pragma in that loaded AST.
1612 //
1613 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1614 // one of them. That means if you put the pragmas around a `#include
1615 // "module.h"`, where module.h is a module, it is not actually suppressing
1616 // warnings in module.h. This is fine because warnings in module.h will be
1617 // reported when module.h is compiled in isolation and nothing in module.h
1618 // will be analyzed ever again. So you will not see warnings from the file
1619 // that imports module.h anyway. And you can't even do the same thing for PCHs
1620 // because they can only be included from the command line.
1621
1622 if (SourceMgr.isLocalSourceLocation(Loc))
1623 return TestInMap(SafeBufferOptOutMap, Loc);
1624
1625 const SafeBufferOptOutRegionsTy *LoadedRegions =
1626 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
1627
1628 if (LoadedRegions)
1629 return TestInMap(*LoadedRegions, Loc);
1630 return false;
1631}
1632
1634 bool isEnter, const SourceLocation &Loc) {
1635 if (isEnter) {
1637 return true; // invalid enter action
1638 InSafeBufferOptOutRegion = true;
1639 CurrentSafeBufferOptOutStart = Loc;
1640
1641 // To set the start location of a new region:
1642
1643 if (!SafeBufferOptOutMap.empty()) {
1644 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1645 assert(PrevRegion->first != PrevRegion->second &&
1646 "Shall not begin a safe buffer opt-out region before closing the "
1647 "previous one.");
1648 }
1649 // If the start location equals to the end location, we call the region a
1650 // open region or a unclosed region (i.e., end location has not been set
1651 // yet).
1652 SafeBufferOptOutMap.emplace_back(Loc, Loc);
1653 } else {
1655 return true; // invalid enter action
1656 InSafeBufferOptOutRegion = false;
1657
1658 // To set the end location of the current open region:
1659
1660 assert(!SafeBufferOptOutMap.empty() &&
1661 "Misordered safe buffer opt-out regions");
1662 auto *CurrRegion = &SafeBufferOptOutMap.back();
1663 assert(CurrRegion->first == CurrRegion->second &&
1664 "Set end location to a closed safe buffer opt-out region");
1665 CurrRegion->second = Loc;
1666 }
1667 return false;
1668}
1669
1671 return InSafeBufferOptOutRegion;
1672}
1674 StartLoc = CurrentSafeBufferOptOutStart;
1675 return InSafeBufferOptOutRegion;
1676}
1677
1680 assert(!InSafeBufferOptOutRegion &&
1681 "Attempt to serialize safe buffer opt-out regions before file being "
1682 "completely preprocessed");
1683
1685
1686 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1687 SrcSeq.push_back(begin);
1688 SrcSeq.push_back(end);
1689 }
1690 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1691 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1692 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1693 // It means that for each loading pch/module m, it just needs to load m's own
1694 // `SafeBufferOptOutMap`.
1695 return SrcSeq;
1696}
1697
1699 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1700 if (SourceLocations.size() == 0)
1701 return false;
1702
1703 assert(SourceLocations.size() % 2 == 0 &&
1704 "ill-formed SourceLocation sequence");
1705
1706 auto It = SourceLocations.begin();
1707 SafeBufferOptOutRegionsTy &Regions =
1708 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
1709
1710 do {
1711 SourceLocation Begin = *It++;
1712 SourceLocation End = *It++;
1713
1714 Regions.emplace_back(Begin, End);
1715 } while (It != SourceLocations.end());
1716 return true;
1717}
1718
1719ModuleLoader::~ModuleLoader() = default;
1720
1722
1724
1726
1728 if (Record)
1729 return;
1730
1731 Record = new PreprocessingRecord(getSourceManager());
1732 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1733}
1734
1736 auto IsPreserved = [&](PPCallbacks *C) {
1737 return C == Record || C == DirTracer;
1738 };
1740 PPCallbacks::releaseIfPreserved(Callbacks, IsPreserved, Released);
1741 Callbacks.reset();
1742 for (auto *P : Released)
1743 addPPCallbacks(std::unique_ptr<PPCallbacks>(P));
1744}
1745
1746const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1747 if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) {
1748 const SmallVector<const char *> &FileCheckPoints = It->second;
1749 auto P = llvm::upper_bound(FileCheckPoints, Start);
1750 if (P == FileCheckPoints.begin())
1751 return nullptr;
1752 return *std::prev(P);
1753 }
1754 return nullptr;
1755}
1756
1758 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1759}
1760
1762 return SeenNoTrivialPPDirective;
1763}
1764
1765void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1766 if (InMainFile && !SeenNoTrivialPPDirective)
1767 SeenNoTrivialPPDirective = true;
1768}
1769
1771 FileID FID, LexedFileChangeReason Reason,
1772 SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1773 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1774}
1775
1777 const MacroDefinition &MD,
1778 SourceRange Range,
1779 const MacroArgs *Args) {
1780 // FIXME: Does only enable builtin macro expansion make sense?
1781 if (!MD.getMacroInfo()->isBuiltinMacro())
1782 setSeenNoTrivialPPDirective();
1783}
Defines enum values for all the target-independent builtin functions.
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::FileManager interface and associated types.
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Token Tok
The Token.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Defines the PreprocessorLexer interface.
static bool MacroDefinitionEquals(const MacroInfo *MI, ArrayRef< TokenValue > Tokens)
Compares macro tokens with a specified token value sequence.
static constexpr unsigned CheckPointStepSize
Minimum distance between two check points, in tokens.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Abstract base class that describes a handler that will receive source ranges for each of the comments...
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:233
virtual void updateOutOfDateIdentifier(const IdentifierInfo &II)=0
Update an out-of-date identifier.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition FileEntry.h:273
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
bool isInvalid() const
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
Module * lookupModule(StringRef ModuleName, SourceLocation ImportLoc=SourceLocation(), bool AllowSearch=true, bool AllowExtraModuleMapSearch=false)
Lookup a module Search for a module with the given name.
Provides lookups to, and iteration over, IdentiferInfo objects.
One of these records is kept for each identifier that is lexed.
bool IsKeywordInCPlusPlus() const
Return true if this identifier would be a keyword in C++ mode.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
void setIsPoisoned(bool Value=true)
setIsPoisoned - Mark this identifier as poisoned.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
void setIsFutureCompatKeyword(bool Val)
StringRef getName() const
Return the actual identifier string.
bool isFutureCompatKeyword() const
is/setIsFutureCompatKeyword - Initialize information about whether or not this language token is a ke...
bool isExtensionToken() const
get/setExtension - Initialize information about whether or not this language token is an extension.
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:596
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition MacroInfo.h:612
SourceLocation getLocation() const
Definition MacroInfo.h:489
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:40
const_tokens_iterator tokens_begin() const
Definition MacroInfo.h:245
unsigned getNumTokens() const
Return the number of tokens that this macro expands to.
Definition MacroInfo.h:236
const Token & getReplacementToken(unsigned Tok) const
Definition MacroInfo.h:238
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition MacroInfo.h:218
bool isObjectLike() const
Definition MacroInfo.h:203
Abstract interface for a module loader.
virtual ~ModuleLoader()
static std::string getFlatNameFromPath(ModuleIdPath Path)
Represents a macro directive exported by a module.
Definition MacroInfo.h:515
static ModuleNameLoc * Create(Preprocessor &PP, ModuleIdPath Path)
SourceRange getRange() const
Describes a module or submodule.
Definition Module.h:340
void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
NumericLiteralParser - This performs strict semantic analysis of the content of a ppnumber,...
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
static void releaseIfPreserved(std::unique_ptr< PPCallbacks > &CB, llvm::function_ref< bool(PPCallbacks *)> Pred, SmallVectorImpl< PPCallbacks * > &Released)
Walk the subtree rooted at CB (recursing into descendants first), then check CB itself.
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
void setConditionalLevels(ArrayRef< PPConditionalInfo > CL)
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
std::string PCHThroughHeader
If non-empty, the filename used in an include directive in the primary source file (or command-line p...
bool GeneratePreamble
True indicates that a preamble is being generated.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool markIncluded(FileEntryRef File)
Mark the file as included.
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void EnterModuleSuffixTokenStream(ArrayRef< Token > Toks)
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
friend class MacroArgs
void DumpMacro(const MacroInfo &MI) const
llvm::iterator_range< macro_iterator > macros(bool IncludeExternalMacros=true) const
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void Lex(Token &Result)
Lex the next token for this preprocessor.
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
bool isPreprocessedModuleFile() const
Whether the main file is preprocessed module file.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros=false, SmallVectorImpl< Token > *ExtraToks=nullptr)
Ensure that the next token is a tok::eod token.
bool getCommentRetentionState() const
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
bool hadModuleLoaderFatalFailure() const
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
bool HandleModuleContextualKeyword(Token &Result)
Callback invoked when the lexer sees one of export, import or module token at the start of a line.
const TargetInfo & getTargetInfo() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a pragma hdrstop.
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
llvm::BumpPtrAllocator & getPreprocessorAllocator()
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
HeaderSearch & getHeaderSearchInfo() const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
ExternalPreprocessorSource * getExternalSource() const
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, SmallVectorImpl< Token > &Suffix, SmallVectorImpl< IdentifierLoc > &Path, bool AllowMacroExpansion, bool IsPartition)
const LangOptions & getLangOpts() const
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
void PoisonSEHIdentifiers(bool Poison=true)
size_t getTotalMemory() const
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool isNextPPTokenOneOf(Ts... Ks) const
isNextPPTokenOneOf - Check whether the next pp-token is one of the specificed token kind.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
void CollectPPImportSuffix(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Collect the tokens of a C++20 pp-import-suffix.
DiagnosticsEngine & getDiagnostics() const
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok, SmallVectorImpl< IdentifierLoc > &Path, SmallVectorImpl< Token > &DirToks, bool AllowMacroExpansion, bool IsPartition)
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the include of the through header or until after a pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
unsigned getLine() const
Return the presumed line number of this location.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
std::optional< StringRef > getBufferDataOrNone(FileID FID) const
Return a StringRef to the source buffer data for the specified FileID, returning std::nullopt if inva...
A trivial tuple used to represent a source range.
StringLiteralParser - This decodes string escape characters and performs wide string analysis and Tra...
Exposes information about the current target.
Definition TargetInfo.h:227
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition Token.h:324
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
unsigned getLength() const
Definition Token.h:145
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition Token.h:298
void setKind(tok::TokenKind K)
Definition Token.h:100
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:104
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition Token.h:286
bool isOneOf(Ts... Ks) const
Definition Token.h:105
@ DisableExpand
Definition Token.h:79
@ HasSeenNoTrivialPPDirective
Definition Token.h:92
@ IsReinjected
Definition Token.h:89
@ LeadingEmptyMacro
Definition Token.h:81
@ LeadingSpace
Definition Token.h:77
@ StartOfLine
Definition Token.h:75
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
Definition Lexer.cpp:76
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition Token.h:294
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition Token.h:317
bool isNot(tok::TokenKind K) const
Definition Token.h:111
void startToken()
Reset all flags to cleared.
Definition Token.h:187
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition Token.h:313
void setIdentifierInfo(IdentifierInfo *II)
Definition Token.h:206
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition Token.h:277
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition Token.h:223
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition Token.h:254
Defines the clang::TargetInfo interface.
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
const char * getKeywordSpelling(TokenKind Kind) LLVM_READNONE
Determines the spelling of simple keyword and contextual keyword tokens like 'int' and 'dynamic_cast'...
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:196
@ CPlusPlus20
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
std::pair< FileID, unsigned > FileIDAndOffset
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
bool isPreprocessedModuleFile(StringRef Source)
Scan an input source buffer, and check whether the input source is a preprocessed output.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
TranslationUnitKind
Describes the kind of translation unit being processed.
@ TU_Prefix
The translation unit is a prefix to a translation unit, and is not complete.
#define true
Definition stdbool.h:25