clang 23.0.0git
Preprocessor.cpp
Go to the documentation of this file.
1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
31#include "clang/Basic/LLVM.h"
33#include "clang/Basic/Module.h"
42#include "clang/Lex/Lexer.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
48#include "clang/Lex/Pragma.h"
53#include "clang/Lex/Token.h"
55#include "llvm/ADT/APInt.h"
56#include "llvm/ADT/ArrayRef.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/ScopeExit.h"
60#include "llvm/ADT/SmallVector.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/FormatVariadic.h"
65#include "llvm/Support/MemoryBuffer.h"
66#include "llvm/Support/MemoryBufferRef.h"
67#include "llvm/Support/SaveAndRestore.h"
68#include "llvm/Support/raw_ostream.h"
69#include <algorithm>
70#include <cassert>
71#include <memory>
72#include <optional>
73#include <string>
74#include <utility>
75#include <vector>
76
77using namespace clang;
78
79/// Minimum distance between two check points, in tokens.
80static constexpr unsigned CheckPointStepSize = 1024;
81
82LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
83
85
87 DiagnosticsEngine &diags, const LangOptions &opts,
88 SourceManager &SM, HeaderSearch &Headers,
89 ModuleLoader &TheModuleLoader,
90 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
92 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
93 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
94 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
95 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
96 // As the language options may have not been loaded yet (when
97 // deserializing an ASTUnit), adding keywords to the identifier table is
98 // deferred to Preprocessor::Initialize().
99 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
100 TUKind(TUKind), SkipMainFilePreamble(0, true),
101 CurSubmoduleState(&NullSubmoduleState) {
102 OwnsHeaderSearch = OwnsHeaders;
103
104 // Default to discarding comments.
105 KeepComments = false;
106 KeepMacroComments = false;
107 SuppressIncludeNotFoundError = false;
108
109 // Macro expansion is enabled.
110 DisableMacroExpansion = false;
111 MacroExpansionInDirectivesOverride = false;
112 InMacroArgs = false;
113 ArgMacro = nullptr;
114 InMacroArgPreExpansion = false;
115 NumCachedTokenLexers = 0;
116 PragmasEnabled = true;
117 ParsingIfOrElifDirective = false;
118 PreprocessedOutput = false;
119
120 // We haven't read anything from the external source.
121 ReadMacrosFromExternalSource = false;
122
123 LastExportKeyword.startToken();
124
125 BuiltinInfo = std::make_unique<Builtin::Context>();
126
127 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
128 // a macro. They get unpoisoned where it is allowed.
129 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
130 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
131 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
132 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
133
134 // Initialize the pragma handlers.
135 RegisterBuiltinPragmas();
136
137 // Initialize builtin macros like __LINE__ and friends.
138 RegisterBuiltinMacros();
139
140 if(LangOpts.Borland) {
141 Ident__exception_info = getIdentifierInfo("_exception_info");
142 Ident___exception_info = getIdentifierInfo("__exception_info");
143 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
144 Ident__exception_code = getIdentifierInfo("_exception_code");
145 Ident___exception_code = getIdentifierInfo("__exception_code");
146 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
147 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
148 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
149 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
150 } else {
151 Ident__exception_info = Ident__exception_code = nullptr;
152 Ident__abnormal_termination = Ident___exception_info = nullptr;
153 Ident___exception_code = Ident___abnormal_termination = nullptr;
154 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
155 Ident_AbnormalTermination = nullptr;
156 }
157
158 // Default incremental processing to -fincremental-extensions, clients can
159 // override with `enableIncrementalProcessing` if desired.
160 IncrementalProcessing = LangOpts.IncrementalExtensions;
161
162 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
164 SkippingUntilPragmaHdrStop = true;
165
166 // If using a PCH with a through header, start skipping tokens.
167 if (!this->PPOpts.PCHThroughHeader.empty() &&
168 !this->PPOpts.ImplicitPCHInclude.empty())
169 SkippingUntilPCHThroughHeader = true;
170
171 if (this->PPOpts.GeneratePreamble)
172 PreambleConditionalStack.startRecording();
173
174 MaxTokens = LangOpts.MaxTokens;
175}
176
178 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
179
180 IncludeMacroStack.clear();
181
182 // Free any cached macro expanders.
183 // This populates MacroArgCache, so all TokenLexers need to be destroyed
184 // before the code below that frees up the MacroArgCache list.
185 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
186 CurTokenLexer.reset();
187
188 // Free any cached MacroArgs.
189 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
190 ArgList = ArgList->deallocate();
191
192 // Delete the header search info, if we own it.
193 if (OwnsHeaderSearch)
194 delete &HeaderInfo;
195}
196
198 const TargetInfo *AuxTarget) {
199 assert((!this->Target || this->Target == &Target) &&
200 "Invalid override of target information");
201 this->Target = &Target;
202
203 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
204 "Invalid override of aux target information.");
205 this->AuxTarget = AuxTarget;
206
207 // Initialize information about built-ins.
208 BuiltinInfo->InitializeTarget(Target, AuxTarget);
209 HeaderInfo.setTarget(Target);
210
211 // Populate the identifier table with info about keywords for the current language.
212 Identifiers.AddKeywords(LangOpts);
213
214 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
215 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
216
217 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
218 // Use setting from TargetInfo.
219 setCurrentFPEvalMethod(SourceLocation(), Target.getFPEvalMethod());
220 else
221 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
222 setCurrentFPEvalMethod(SourceLocation(), getLangOpts().getFPEvalMethod());
223}
224
226 NumEnteredSourceFiles = 0;
227
228 // Reset pragmas
229 PragmaHandlersBackup = std::move(PragmaHandlers);
230 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
231 RegisterBuiltinPragmas();
232
233 // Reset PredefinesFileID
234 PredefinesFileID = FileID();
235}
236
238 NumEnteredSourceFiles = 1;
239
240 PragmaHandlers = std::move(PragmaHandlersBackup);
241}
242
243void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
244 std::string TokenStr;
245 llvm::raw_string_ostream OS(TokenStr);
246
247 // The alignment of 16 is chosen to comfortably fit most identifiers.
248 OS << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
249
250 // Annotation tokens are just markers that don't have a spelling -- they
251 // indicate where something expanded.
252 if (!Tok.isAnnotation()) {
253 OS << "'";
254 // Escape string to prevent token spelling from spanning multiple lines.
255 OS.write_escaped(getSpelling(Tok));
256 OS << "'";
257 }
258
259 // The alignment of 48 (32 characters for the spelling + the 16 for
260 // the identifier name) fits most variable names, keywords and annotations.
261 llvm::errs() << llvm::formatv("{0,-48} ", OS.str());
262
263 if (!DumpFlags) return;
264
265 auto Loc = Tok.getLocation();
266 llvm::errs() << "Loc=<";
267 DumpLocation(Loc);
268 llvm::errs() << ">";
269
270 // If the token points directly to a file location (i.e. not a macro
271 // expansion), then add additional padding so that trailing markers
272 // align, provided the line/column numbers are reasonably sized.
273 //
274 // Otherwise, if it's a macro expansion, don't bother with alignment,
275 // as the line will include multiple locations and be very long.
276 //
277 // NOTE: To keep this stateless, it doesn't account for filename
278 // length, so when a header starts markers will be temporarily misaligned.
279 if (Loc.isFileID()) {
280 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
281
282 if (!PLoc.isInvalid()) {
283 int LineWidth = llvm::utostr(PLoc.getLine()).size();
284 int ColumnWidth = llvm::utostr(PLoc.getColumn()).size();
285
286 // Reserve space for lines up to 9999 and columns up to 99,
287 // which is 4 + 2 = 6 characters in total.
288 const int ReservedSpace = 6;
289
290 int LeftSpace = ReservedSpace - LineWidth - ColumnWidth;
291 int Padding = std::max<int>(0, LeftSpace);
292
293 llvm::errs().indent(Padding);
294 }
295 }
296
297 if (Tok.isAtStartOfLine())
298 llvm::errs() << " [StartOfLine]";
299 if (Tok.hasLeadingSpace())
300 llvm::errs() << " [LeadingSpace]";
301 if (Tok.isExpandDisabled())
302 llvm::errs() << " [ExpandDisabled]";
303 if (Tok.needsCleaning()) {
304 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
305 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']";
306 }
307}
308
310 Loc.print(llvm::errs(), SourceMgr);
311}
312
313void Preprocessor::DumpMacro(const MacroInfo &MI) const {
314 llvm::errs() << "MACRO: ";
315 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
317 llvm::errs() << " ";
318 }
319 llvm::errs() << "\n";
320}
321
323 llvm::errs() << "\n*** Preprocessor Stats:\n";
324 llvm::errs() << NumDirectives << " directives found:\n";
325 llvm::errs() << " " << NumDefined << " #define.\n";
326 llvm::errs() << " " << NumUndefined << " #undef.\n";
327 llvm::errs() << " #include/#include_next/#import:\n";
328 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
329 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
330 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
331 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
332 llvm::errs() << " " << NumEndif << " #endif.\n";
333 llvm::errs() << " " << NumPragma << " #pragma.\n";
334 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
335
336 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
337 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
338 << NumFastMacroExpanded << " on the fast path.\n";
339 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
340 << " token paste (##) operations performed, "
341 << NumFastTokenPaste << " on the fast path.\n";
342
343 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
344
345 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
346 llvm::errs() << "\n Macro Expanded Tokens: "
347 << llvm::capacity_in_bytes(MacroExpandedTokens);
348 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
349 // FIXME: List information for all submodules.
350 llvm::errs() << "\n Macros: "
351 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
352 llvm::errs() << "\n #pragma push_macro Info: "
353 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
354 llvm::errs() << "\n Poison Reasons: "
355 << llvm::capacity_in_bytes(PoisonReasons);
356 llvm::errs() << "\n Comment Handlers: "
357 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
358}
359
361Preprocessor::macro_begin(bool IncludeExternalMacros) const {
362 if (IncludeExternalMacros && ExternalSource &&
363 !ReadMacrosFromExternalSource) {
364 ReadMacrosFromExternalSource = true;
365 ExternalSource->ReadDefinedMacros();
366 }
367
368 // Make sure we cover all macros in visible modules.
369 for (const ModuleMacro &Macro : ModuleMacros)
370 CurSubmoduleState->Macros.try_emplace(Macro.II);
371
372 return CurSubmoduleState->Macros.begin();
373}
374
376 return BP.getTotalMemory()
377 + llvm::capacity_in_bytes(MacroExpandedTokens)
378 + Predefines.capacity() /* Predefines buffer. */
379 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
380 // and ModuleMacros.
381 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
382 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
383 + llvm::capacity_in_bytes(PoisonReasons)
384 + llvm::capacity_in_bytes(CommentHandlers);
385}
386
388Preprocessor::macro_end(bool IncludeExternalMacros) const {
389 if (IncludeExternalMacros && ExternalSource &&
390 !ReadMacrosFromExternalSource) {
391 ReadMacrosFromExternalSource = true;
392 ExternalSource->ReadDefinedMacros();
393 }
394
395 return CurSubmoduleState->Macros.end();
396}
397
398/// Compares macro tokens with a specified token value sequence.
399static bool MacroDefinitionEquals(const MacroInfo *MI,
400 ArrayRef<TokenValue> Tokens) {
401 return Tokens.size() == MI->getNumTokens() &&
402 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
403}
404
406 SourceLocation Loc,
407 ArrayRef<TokenValue> Tokens) const {
408 SourceLocation BestLocation;
409 StringRef BestSpelling;
411 I != E; ++I) {
413 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
414 if (!Def || !Def.getMacroInfo())
415 continue;
416 if (!Def.getMacroInfo()->isObjectLike())
417 continue;
418 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
419 continue;
420 SourceLocation Location = Def.getLocation();
421 // Choose the macro defined latest.
422 if (BestLocation.isInvalid() ||
423 (Location.isValid() &&
424 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
425 BestLocation = Location;
426 BestSpelling = I->first->getName();
427 }
428 }
429 return BestSpelling;
430}
431
433 if (CurLexer)
434 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
435 ? CLK_DependencyDirectivesLexer
436 : CLK_Lexer;
437 else if (CurTokenLexer)
438 CurLexerCallback = CLK_TokenLexer;
439 else
440 CurLexerCallback = CLK_CachingLexer;
441}
442
444 unsigned CompleteLine,
445 unsigned CompleteColumn) {
446 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
447 assert(!CodeCompletionFile && "Already set");
448
449 // Load the actual file's contents.
450 std::optional<llvm::MemoryBufferRef> Buffer =
451 SourceMgr.getMemoryBufferForFileOrNone(File);
452 if (!Buffer)
453 return true;
454
455 // Find the byte position of the truncation point.
456 const char *Position = Buffer->getBufferStart();
457 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
458 for (; *Position; ++Position) {
459 if (*Position != '\r' && *Position != '\n')
460 continue;
461
462 // Eat \r\n or \n\r as a single line.
463 if ((Position[1] == '\r' || Position[1] == '\n') &&
464 Position[0] != Position[1])
465 ++Position;
466 ++Position;
467 break;
468 }
469 }
470
471 Position += CompleteColumn - 1;
472
473 // If pointing inside the preamble, adjust the position at the beginning of
474 // the file after the preamble.
475 if (SkipMainFilePreamble.first &&
476 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
477 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
478 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
479 }
480
481 if (Position > Buffer->getBufferEnd())
482 Position = Buffer->getBufferEnd();
483
484 CodeCompletionFile = File;
485 CodeCompletionOffset = Position - Buffer->getBufferStart();
486
487 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
488 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
489 char *NewBuf = NewBuffer->getBufferStart();
490 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
491 *NewPos = '\0';
492 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
493 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
494
495 return false;
496}
497
499 bool IsAngled) {
501 if (CodeComplete)
502 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
503}
504
507 if (CodeComplete)
508 CodeComplete->CodeCompleteNaturalLanguage();
509}
510
511/// getSpelling - This method is used to get the spelling of a token into a
512/// SmallVector. Note that the returned StringRef may not point to the
513/// supplied buffer if a copy can be avoided.
515 SmallVectorImpl<char> &Buffer,
516 bool *Invalid) const {
517 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
518 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
519 // Try the fast path.
520 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
521 return II->getName();
522 }
523
524 // Resize the buffer if we need to copy into it.
525 if (Tok.needsCleaning())
526 Buffer.resize(Tok.getLength());
527
528 const char *Ptr = Buffer.data();
529 unsigned Len = getSpelling(Tok, Ptr, Invalid);
530 return StringRef(Ptr, Len);
531}
532
533/// CreateString - Plop the specified string into a scratch buffer and return a
534/// location for it. If specified, the source location provides a source
535/// location for the token.
537 SourceLocation ExpansionLocStart,
538 SourceLocation ExpansionLocEnd) {
539 Tok.setLength(Str.size());
540
541 const char *DestPtr;
542 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
543
544 if (ExpansionLocStart.isValid())
545 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
546 ExpansionLocEnd, Str.size());
547 Tok.setLocation(Loc);
548
549 // If this is a raw identifier or a literal token, set the pointer data.
550 if (Tok.is(tok::raw_identifier))
551 Tok.setRawIdentifierData(DestPtr);
552 else if (Tok.isLiteral())
553 Tok.setLiteralData(DestPtr);
554}
555
557 auto &SM = getSourceManager();
558 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
559 FileIDAndOffset LocInfo = SM.getDecomposedLoc(SpellingLoc);
560 bool Invalid = false;
561 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
562 if (Invalid)
563 return SourceLocation();
564
565 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
566 const char *DestPtr;
567 SourceLocation Spelling =
568 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
569 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
570}
571
573 if (!getLangOpts().isCompilingModule())
574 return nullptr;
575
576 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
577}
578
580 if (!getLangOpts().isCompilingModuleImplementation())
581 return nullptr;
582
583 return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName);
584}
585
586//===----------------------------------------------------------------------===//
587// Preprocessor Initialization Methods
588//===----------------------------------------------------------------------===//
589
590/// EnterMainSourceFile - Enter the specified FileID as the main source file,
591/// which implicitly adds the builtin defines etc.
593 // We do not allow the preprocessor to reenter the main file. Doing so will
594 // cause FileID's to accumulate information from both runs (e.g. #line
595 // information) and predefined macros aren't guaranteed to be set properly.
596 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
597 FileID MainFileID = SourceMgr.getMainFileID();
598
599 // If MainFileID is loaded it means we loaded an AST file, no need to enter
600 // a main file.
601 if (!SourceMgr.isLoadedFileID(MainFileID)) {
602 // Enter the main file source buffer.
603 EnterSourceFile(MainFileID, nullptr, SourceLocation());
604
605 // If we've been asked to skip bytes in the main file (e.g., as part of a
606 // precompiled preamble), do so now.
607 if (SkipMainFilePreamble.first > 0)
608 CurLexer->SetByteOffset(SkipMainFilePreamble.first,
609 SkipMainFilePreamble.second);
610
611 // Tell the header info that the main file was entered. If the file is later
612 // #imported, it won't be re-entered.
613 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID))
614 markIncluded(*FE);
615
616 // Record the first PP token in the main file. This is used to generate
617 // better diagnostics for C++ modules.
618 //
619 // // This is a comment.
620 // #define FOO int // note: add 'module;' to the start of the file
621 // ^ FirstPPToken // to introduce a global module fragment.
622 //
623 // export module M; // error: module declaration must occur
624 // // at the start of the translation unit.
625 if (getLangOpts().CPlusPlusModules) {
626 std::optional<StringRef> Input =
628 if (!isPreprocessedModuleFile() && Input)
629 MainFileIsPreprocessedModuleFile =
631 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(*this);
632 DirTracer = Tracer.get();
633 addPPCallbacks(std::move(Tracer));
634 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
635 if (FirstPPTok)
636 FirstPPTokenLoc = FirstPPTok->getLocation();
637 }
638 }
639
640 // Preprocess Predefines to populate the initial preprocessor state.
641 std::unique_ptr<llvm::MemoryBuffer> SB =
642 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
643 assert(SB && "Cannot create predefined source buffer");
644 FileID FID = SourceMgr.createFileID(std::move(SB));
645 assert(FID.isValid() && "Could not create FileID for predefines?");
646 setPredefinesFileID(FID);
647
648 // Start parsing the predefines.
649 EnterSourceFile(FID, nullptr, SourceLocation());
650
651 if (!PPOpts.PCHThroughHeader.empty()) {
652 // Lookup and save the FileID for the through header. If it isn't found
653 // in the search path, it's a fatal error.
655 SourceLocation(), PPOpts.PCHThroughHeader,
656 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
657 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
658 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
659 /*IsFrameworkFound=*/nullptr);
660 if (!File) {
661 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
662 << PPOpts.PCHThroughHeader;
663 return;
664 }
665 setPCHThroughHeaderFileID(
666 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
667 }
668
669 // Skip tokens from the Predefines and if needed the main file.
670 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
671 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
673}
674
675void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
676 assert(PCHThroughHeaderFileID.isInvalid() &&
677 "PCHThroughHeaderFileID already set!");
678 PCHThroughHeaderFileID = FID;
679}
680
682 assert(PCHThroughHeaderFileID.isValid() &&
683 "Invalid PCH through header FileID");
684 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
685}
686
688 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
689 PCHThroughHeaderFileID.isValid();
690}
691
693 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
694 PCHThroughHeaderFileID.isValid();
695}
696
698 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
699}
700
702 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
703}
704
705/// Skip tokens until after the #include of the through header or
706/// until after a #pragma hdrstop is seen. Tokens in the predefines file
707/// and the main file may be skipped. If the end of the predefines file
708/// is reached, skipping continues into the main file. If the end of the
709/// main file is reached, it's a fatal error.
711 bool ReachedMainFileEOF = false;
712 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
713 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
714 Token Tok;
715 while (true) {
716 bool InPredefines =
717 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
718 CurLexerCallback(*this, Tok);
719 if (Tok.is(tok::eof) && !InPredefines) {
720 ReachedMainFileEOF = true;
721 break;
722 }
723 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
724 break;
725 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
726 break;
727 }
728 if (ReachedMainFileEOF) {
729 if (UsingPCHThroughHeader)
730 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
731 << PPOpts.PCHThroughHeader << 1;
732 else if (!PPOpts.PCHWithHdrStopCreate)
733 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
734 }
735}
736
737void Preprocessor::replayPreambleConditionalStack() {
738 // Restore the conditional stack from the preamble, if there is one.
739 if (PreambleConditionalStack.isReplaying()) {
740 assert(CurPPLexer &&
741 "CurPPLexer is null when calling replayPreambleConditionalStack.");
742 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
743 PreambleConditionalStack.doneReplaying();
744 if (PreambleConditionalStack.reachedEOFWhileSkipping())
745 SkipExcludedConditionalBlock(
746 PreambleConditionalStack.SkipInfo->HashTokenLoc,
747 PreambleConditionalStack.SkipInfo->IfTokenLoc,
748 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
749 PreambleConditionalStack.SkipInfo->FoundElse,
750 PreambleConditionalStack.SkipInfo->ElseLoc);
751 }
752}
753
755 // Notify the client that we reached the end of the source file.
756 if (Callbacks)
757 Callbacks->EndOfMainFile();
758}
759
760//===----------------------------------------------------------------------===//
761// Lexer Event Handling.
762//===----------------------------------------------------------------------===//
763
764/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
765/// identifier information for the token and install it into the token,
766/// updating the token kind accordingly.
768 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
769
770 // Look up this token, see if it is a macro, or if it is a language keyword.
771 IdentifierInfo *II;
772 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
773 // No cleaning needed, just use the characters from the lexed buffer.
774 II = getIdentifierInfo(Identifier.getRawIdentifier());
775 } else {
776 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
777 SmallString<64> IdentifierBuffer;
778 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
779
780 if (Identifier.hasUCN()) {
781 SmallString<64> UCNIdentifierBuffer;
782 expandUCNs(UCNIdentifierBuffer, CleanedStr);
783 II = getIdentifierInfo(UCNIdentifierBuffer);
784 } else {
785 II = getIdentifierInfo(CleanedStr);
786 }
787 }
788
789 // Update the token info (identifier info and appropriate token kind).
790 // FIXME: the raw_identifier may contain leading whitespace which is removed
791 // from the cleaned identifier token. The SourceLocation should be updated to
792 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
793 // line continuation before 'B') is parsed as a single tok::raw_identifier and
794 // is cleaned to tok::identifier "B". After cleaning the token's length is
795 // still 3 and the SourceLocation refers to the location of the backslash.
796 Identifier.setIdentifierInfo(II);
797 Identifier.setKind(II->getTokenID());
798
799 return II;
800}
801
803 PoisonReasons[II] = DiagID;
804}
805
807 assert(Ident__exception_code && Ident__exception_info);
808 assert(Ident___exception_code && Ident___exception_info);
809 Ident__exception_code->setIsPoisoned(Poison);
810 Ident___exception_code->setIsPoisoned(Poison);
811 Ident_GetExceptionCode->setIsPoisoned(Poison);
812 Ident__exception_info->setIsPoisoned(Poison);
813 Ident___exception_info->setIsPoisoned(Poison);
814 Ident_GetExceptionInfo->setIsPoisoned(Poison);
815 Ident__abnormal_termination->setIsPoisoned(Poison);
816 Ident___abnormal_termination->setIsPoisoned(Poison);
817 Ident_AbnormalTermination->setIsPoisoned(Poison);
818}
819
821 assert(Identifier.getIdentifierInfo() &&
822 "Can't handle identifiers without identifier info!");
823 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
824 PoisonReasons.find(Identifier.getIdentifierInfo());
825 if(it == PoisonReasons.end())
826 Diag(Identifier, diag::err_pp_used_poisoned_id);
827 else
828 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
829}
830
831void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
832 assert(II.isOutOfDate() && "not out of date");
833 assert(getExternalSource() &&
834 "getExternalSource() should not return nullptr");
836}
837
838/// HandleIdentifier - This callback is invoked when the lexer reads an
839/// identifier. This callback looks up the identifier in the map and/or
840/// potentially macro expands it or turns it into a named token (like 'for').
841///
842/// Note that callers of this method are guarded by checking the
843/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
844/// IdentifierInfo methods that compute these properties will need to change to
845/// match.
847 assert(Identifier.getIdentifierInfo() &&
848 "Can't handle identifiers without identifier info!");
849
850 IdentifierInfo &II = *Identifier.getIdentifierInfo();
851
852 // If the information about this identifier is out of date, update it from
853 // the external source.
854 // We have to treat __VA_ARGS__ in a special way, since it gets
855 // serialized with isPoisoned = true, but our preprocessor may have
856 // unpoisoned it if we're defining a C99 macro.
857 if (II.isOutOfDate()) {
858 bool CurrentIsPoisoned = false;
859 const bool IsSpecialVariadicMacro =
860 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
861 if (IsSpecialVariadicMacro)
862 CurrentIsPoisoned = II.isPoisoned();
863
864 updateOutOfDateIdentifier(II);
865 Identifier.setKind(II.getTokenID());
866
867 if (IsSpecialVariadicMacro)
868 II.setIsPoisoned(CurrentIsPoisoned);
869 }
870
871 // If this identifier was poisoned, and if it was not produced from a macro
872 // expansion, emit an error.
873 if (II.isPoisoned() && CurPPLexer) {
874 HandlePoisonedIdentifier(Identifier);
875 }
876
877 // If this is a macro to be expanded, do it.
878 if (const MacroDefinition MD = getMacroDefinition(&II)) {
879 const auto *MI = MD.getMacroInfo();
880 assert(MI && "macro definition with no macro info?");
881 if (!DisableMacroExpansion) {
882 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
883 // C99 6.10.3p10: If the preprocessing token immediately after the
884 // macro name isn't a '(', this macro should not be expanded.
885 if (!MI->isFunctionLike() || isNextPPTokenOneOf(tok::l_paren))
886 return HandleMacroExpandedIdentifier(Identifier, MD);
887 } else {
888 // C99 6.10.3.4p2 says that a disabled macro may never again be
889 // expanded, even if it's in a context where it could be expanded in the
890 // future.
891 Identifier.setFlag(Token::DisableExpand);
892 if (MI->isObjectLike() || isNextPPTokenOneOf(tok::l_paren))
893 Diag(Identifier, diag::pp_disabled_macro_expansion);
894 }
895 }
896 }
897
898 // If this identifier is a keyword in a newer Standard or proposed Standard,
899 // produce a warning. Don't warn if we're not considering macro expansion,
900 // since this identifier might be the name of a macro.
901 // FIXME: This warning is disabled in cases where it shouldn't be, like
902 // "#define constexpr constexpr", "int constexpr;"
903 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
904 Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts()))
905 << II.getName();
906 // Don't diagnose this keyword again in this translation unit.
907 II.setIsFutureCompatKeyword(false);
908 }
909
910 // If this identifier would be a keyword in C++, diagnose as a compatibility
911 // issue.
912 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
913 Diag(Identifier, diag::warn_pp_identifier_is_cpp_keyword) << &II;
914
915 // If this is an extension token, diagnose its use.
916 // We avoid diagnosing tokens that originate from macro definitions.
917 // FIXME: This warning is disabled in cases where it shouldn't be,
918 // like "#define TY typeof", "TY(1) x".
919 if (II.isExtensionToken() && !DisableMacroExpansion)
920 Diag(Identifier, diag::ext_token_used);
921
922 // Handle module contextual keywords.
923 if (getLangOpts().CPlusPlusModules && CurLexer &&
924 !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
925 !CurLexer->ParsingPreprocessorDirective &&
926 Identifier.isModuleContextualKeyword() &&
927 HandleModuleContextualKeyword(Identifier)) {
928 HandleDirective(Identifier);
929 // With a fatal failure in the module loader, we abort parsing.
931 }
932
933 return true;
934}
935
937 ++LexLevel;
938
939 // We loop here until a lex function returns a token; this avoids recursion.
940 while (!CurLexerCallback(*this, Result))
941 ;
942
943 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
944 return;
945
946 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
947 // Remember the identifier before code completion token.
948 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
949 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
950 // Set IdenfitierInfo to null to avoid confusing code that handles both
951 // identifiers and completion tokens.
952 Result.setIdentifierInfo(nullptr);
953 }
954
955 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
956 // if this token is being produced as a result of phase 4 of translation.
957 // Update TrackGMFState to decide if we are currently in a Global Module
958 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
959 // depends on the prevailing StdCXXImportSeq state in two cases.
960 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
961 !Result.getFlag(Token::IsReinjected)) {
962 switch (Result.getKind()) {
963 case tok::l_paren: case tok::l_square: case tok::l_brace:
964 StdCXXImportSeqState.handleOpenBracket();
965 break;
966 case tok::r_paren: case tok::r_square:
967 StdCXXImportSeqState.handleCloseBracket();
968 break;
969 case tok::r_brace:
970 StdCXXImportSeqState.handleCloseBrace();
971 break;
972#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
973// For `#pragma ...` mimic ';'.
974#include "clang/Basic/TokenKinds.def"
975#undef PRAGMA_ANNOTATION
976 // This token is injected to represent the translation of '#include "a.h"'
977 // into "import a.h;". Mimic the notional ';'.
978 case tok::annot_module_include:
979 case tok::annot_repl_input_end:
980 case tok::semi:
981 TrackGMFState.handleSemi();
982 StdCXXImportSeqState.handleSemi();
983 ModuleDeclState.handleSemi();
984 break;
985 case tok::header_name:
986 case tok::annot_header_unit:
987 StdCXXImportSeqState.handleHeaderName();
988 break;
989 case tok::kw_export:
992 TrackGMFState.handleExport();
993 StdCXXImportSeqState.handleExport();
994 ModuleDeclState.handleExport();
995 break;
996 case tok::colon:
997 ModuleDeclState.handleColon();
998 break;
999 case tok::kw_import:
1000 if (StdCXXImportSeqState.atTopLevel()) {
1001 TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
1002 StdCXXImportSeqState.handleImport();
1003 }
1004 break;
1005 case tok::kw_module:
1006 if (StdCXXImportSeqState.atTopLevel()) {
1009 TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
1010 ModuleDeclState.handleModule();
1011 }
1012 break;
1013 case tok::annot_module_name:
1014 ModuleDeclState.handleModuleName(
1015 static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
1016 if (ModuleDeclState.isModuleCandidate())
1017 break;
1018 [[fallthrough]];
1019 default:
1020 TrackGMFState.handleMisc();
1021 StdCXXImportSeqState.handleMisc();
1022 ModuleDeclState.handleMisc();
1023 break;
1024 }
1025 }
1026
1027 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
1028 CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr);
1029 CheckPointCounter = 0;
1030 }
1031
1032 if (Result.isNot(tok::kw_export))
1033 LastExportKeyword.startToken();
1034
1035 --LexLevel;
1036
1037 // Destroy any lexers that were deferred while we were in nested Lex calls.
1038 // This must happen after decrementing LexLevel but before any other
1039 // processing that might re-enter Lex.
1040 if (LexLevel == 0 && !PendingDestroyLexers.empty())
1041 PendingDestroyLexers.clear();
1042
1043 if ((LexLevel == 0 || PreprocessToken) &&
1044 !Result.getFlag(Token::IsReinjected)) {
1045 if (LexLevel == 0)
1046 ++TokenCount;
1047 if (OnToken)
1048 OnToken(Result);
1049 }
1050}
1051
1052void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1053 while (1) {
1054 Token Tok;
1055 Lex(Tok);
1056 if (Tok.isOneOf(tok::unknown, tok::eof, tok::eod,
1057 tok::annot_repl_input_end))
1058 break;
1059 if (Tokens != nullptr)
1060 Tokens->push_back(Tok);
1061 }
1062}
1063
1064/// Lex a header-name token (including one formed from header-name-tokens if
1065/// \p AllowMacroExpansion is \c true).
1066///
1067/// \param FilenameTok Filled in with the next token. On success, this will
1068/// be either a header_name token. On failure, it will be whatever other
1069/// token was found instead.
1070/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1071/// by macro expansion (concatenating tokens as necessary if the first
1072/// token is a '<').
1073/// \return \c true if we reached EOD or EOF while looking for a > token in
1074/// a concatenated header name and diagnosed it. \c false otherwise.
1075bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1076 // Lex using header-name tokenization rules if tokens are being lexed from
1077 // a file. Just grab a token normally if we're in a macro expansion.
1078 if (CurPPLexer) {
1079 // Avoid nested header-name lexing when macro expansion recurses
1080 // __has_include(__has_include))
1081 if (CurPPLexer->ParsingFilename)
1082 LexUnexpandedToken(FilenameTok);
1083 else
1084 CurPPLexer->LexIncludeFilename(FilenameTok);
1085 } else {
1086 Lex(FilenameTok);
1087 }
1088
1089 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1090 // case, glue the tokens together into an angle_string_literal token.
1091 SmallString<128> FilenameBuffer;
1092 if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
1093 bool StartOfLine = FilenameTok.isAtStartOfLine();
1094 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1095 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1096
1097 SourceLocation Start = FilenameTok.getLocation();
1098 SourceLocation End;
1099 FilenameBuffer.push_back('<');
1100
1101 // Consume tokens until we find a '>'.
1102 // FIXME: A header-name could be formed starting or ending with an
1103 // alternative token. It's not clear whether that's ill-formed in all
1104 // cases.
1105 while (FilenameTok.isNot(tok::greater)) {
1106 Lex(FilenameTok);
1107 if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1108 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1109 Diag(Start, diag::note_matching) << tok::less;
1110 return true;
1111 }
1112
1113 End = FilenameTok.getLocation();
1114
1115 // FIXME: Provide code completion for #includes.
1116 if (FilenameTok.is(tok::code_completion)) {
1118 Lex(FilenameTok);
1119 continue;
1120 }
1121
1122 // Append the spelling of this token to the buffer. If there was a space
1123 // before it, add it now.
1124 if (FilenameTok.hasLeadingSpace())
1125 FilenameBuffer.push_back(' ');
1126
1127 // Get the spelling of the token, directly into FilenameBuffer if
1128 // possible.
1129 size_t PreAppendSize = FilenameBuffer.size();
1130 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1131
1132 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1133 unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1134
1135 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1136 if (BufPtr != &FilenameBuffer[PreAppendSize])
1137 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1138
1139 // Resize FilenameBuffer to the correct size.
1140 if (FilenameTok.getLength() != ActualLen)
1141 FilenameBuffer.resize(PreAppendSize + ActualLen);
1142 }
1143
1144 FilenameTok.startToken();
1145 FilenameTok.setKind(tok::header_name);
1146 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1147 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1148 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1149 CreateString(FilenameBuffer, FilenameTok, Start, End);
1150 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1151 // Convert a string-literal token of the form " h-char-sequence "
1152 // (produced by macro expansion) into a header-name token.
1153 //
1154 // The rules for header-names don't quite match the rules for
1155 // string-literals, but all the places where they differ result in
1156 // undefined behavior, so we can and do treat them the same.
1157 //
1158 // A string-literal with a prefix or suffix is not translated into a
1159 // header-name. This could theoretically be observable via the C++20
1160 // context-sensitive header-name formation rules.
1161 StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1162 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1163 FilenameTok.setKind(tok::header_name);
1164 }
1165
1166 return false;
1167}
1168
1169std::optional<Token> Preprocessor::peekNextPPToken() const {
1170 // Do some quick tests for rejection cases.
1171 std::optional<Token> Val;
1172 if (CurLexer)
1173 Val = CurLexer->peekNextPPToken();
1174 else
1175 Val = CurTokenLexer->peekNextPPToken();
1176
1177 if (!Val) {
1178 // We have run off the end. If it's a source file we don't
1179 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1180 // macro stack.
1181 if (CurPPLexer)
1182 return std::nullopt;
1183 for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
1184 if (Entry.TheLexer)
1185 Val = Entry.TheLexer->peekNextPPToken();
1186 else
1187 Val = Entry.TheTokenLexer->peekNextPPToken();
1188
1189 if (Val)
1190 break;
1191
1192 // Ran off the end of a source file?
1193 if (Entry.ThePPLexer)
1194 return std::nullopt;
1195 }
1196 }
1197
1198 // Okay, we found the token and return. Otherwise we found the end of the
1199 // translation unit.
1200 return Val;
1201}
1202
1203// We represent the primary and partition names as 'Paths' which are sections
1204// of the hierarchical access path for a clang module. However for C++20
1205// the periods in a name are just another character, and we will need to
1206// flatten them into a string.
1208 std::string Name;
1209 if (Path.empty())
1210 return Name;
1211
1212 for (auto &Piece : Path) {
1213 assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1214 if (!Name.empty())
1215 Name += ".";
1216 Name += Piece.getIdentifierInfo()->getName();
1217 }
1218 return Name;
1219}
1220
1222 assert(!Path.empty() && "expect at least one identifier in a module name");
1223 void *Mem = PP.getPreprocessorAllocator().Allocate(
1224 totalSizeToAlloc<IdentifierLoc>(Path.size()), alignof(ModuleNameLoc));
1225 return new (Mem) ModuleNameLoc(Path);
1226}
1227
1229 SmallVectorImpl<Token> &Suffix,
1231 bool AllowMacroExpansion,
1232 bool IsPartition) {
1233 auto ConsumeToken = [&]() {
1234 if (AllowMacroExpansion)
1235 Lex(Tok);
1236 else
1238 Suffix.push_back(Tok);
1239 };
1240
1241 while (true) {
1242 if (Tok.isNot(tok::identifier)) {
1243 if (Tok.is(tok::code_completion)) {
1244 CurLexer->cutOffLexing();
1245 CodeComplete->CodeCompleteModuleImport(UseLoc, Path);
1246 return true;
1247 }
1248
1249 Diag(Tok, diag::err_pp_module_expected_ident) << Path.empty();
1250 return true;
1251 }
1252
1253 // [cpp.pre]/p2:
1254 // No identifier in the pp-module-name or pp-module-partition shall
1255 // currently be defined as an object-like macro.
1256 if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo());
1257 MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1258 !AllowMacroExpansion) {
1259 Diag(Tok, diag::err_pp_module_name_is_macro)
1260 << IsPartition << Tok.getIdentifierInfo();
1261 Diag(MI->getDefinitionLoc(), diag::note_macro_here)
1262 << Tok.getIdentifierInfo();
1263 }
1264
1265 // Record this part of the module path.
1266 Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo());
1267 ConsumeToken();
1268
1269 if (Tok.isNot(tok::period))
1270 return false;
1271
1272 ConsumeToken();
1273 }
1274}
1275
1276bool Preprocessor::HandleModuleName(StringRef DirType, SourceLocation UseLoc,
1277 Token &Tok,
1279 SmallVectorImpl<Token> &DirToks,
1280 bool AllowMacroExpansion,
1281 bool IsPartition) {
1282 bool LeadingSpace = Tok.hasLeadingSpace();
1283 unsigned NumToksInDirective = DirToks.size();
1284 if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path, AllowMacroExpansion,
1285 IsPartition)) {
1286 if (Tok.isNot(tok::eod))
1287 CheckEndOfDirective(DirType,
1288 /*EnableMacros=*/false, &DirToks);
1290 return true;
1291 }
1292
1293 // Clean the module-name tokens and replace these tokens with
1294 // annot_module_name.
1295 DirToks.resize(NumToksInDirective);
1296 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path);
1297 DirToks.emplace_back();
1298 DirToks.back().setKind(tok::annot_module_name);
1299 DirToks.back().setAnnotationRange(NameLoc->getRange());
1300 DirToks.back().setAnnotationValue(static_cast<void *>(NameLoc));
1301 DirToks.back().setFlagValue(Token::LeadingSpace, LeadingSpace);
1302 DirToks.push_back(Tok);
1303 return false;
1304}
1305
1306/// [cpp.pre]/p2:
1307/// A preprocessing directive consists of a sequence of preprocessing tokens
1308/// that satisfies the following constraints: At the start of translation phase
1309/// 4, the first preprocessing token in the sequence, referred to as a
1310/// directive-introducing token, begins with the first character in the source
1311/// file (optionally after whitespace containing no new-line characters) or
1312/// follows whitespace containing at least one new-line character, and is:
1313/// - a # preprocessing token, or
1314/// - an import preprocessing token immediately followed on the same logical
1315/// source line by a header-name, <, identifier, or : preprocessing token, or
1316/// - a module preprocessing token immediately followed on the same logical
1317/// source line by an identifier, :, or ; preprocessing token, or
1318/// - an export preprocessing token immediately followed on the same logical
1319/// source line by one of the two preceding forms.
1320///
1321///
1322/// At the start of phase 4 an import or module token is treated as starting a
1323/// directive and are converted to their respective keywords iff:
1324/// - After skipping horizontal whitespace are
1325/// - at the start of a logical line, or
1326/// - preceded by an 'export' at the start of the logical line.
1327/// - Are followed by an identifier pp token (before macro expansion), or
1328/// - <, ", or : (but not ::) pp tokens for 'import', or
1329/// - ; for 'module'
1330/// Otherwise the token is treated as an identifier.
1332 if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
1333 return false;
1334
1335 if (Result.is(tok::kw_export)) {
1336 LastExportKeyword = Result;
1337 return false;
1338 }
1339
1340 /// Trait 'module' and 'import' as a identifier when the main file is a
1341 /// preprocessed module file. We only allow '__preprocessed_module' and
1342 /// '__preprocessed_import' in this context.
1343 IdentifierInfo *II = Result.getIdentifierInfo();
1345 (II->isStr(tok::getKeywordSpelling(tok::kw_import)) ||
1346 II->isStr(tok::getKeywordSpelling(tok::kw_module))))
1347 return false;
1348
1349 if (LastExportKeyword.is(tok::kw_export)) {
1350 // The export keyword was not at the start of line, it's not a
1351 // directive-introducing token.
1352 if (!LastExportKeyword.isAtPhysicalStartOfLine())
1353 return false;
1354 // [cpp.pre]/1.4
1355 // export // not a preprocessing directive
1356 // import foo; // preprocessing directive (ill-formed at phase7)
1357 if (Result.isAtPhysicalStartOfLine())
1358 return false;
1359 } else if (!Result.isAtPhysicalStartOfLine())
1360 return false;
1361
1362 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1363 CurPPLexer->ParsingPreprocessorDirective, true);
1364
1365 // The next token may be an angled string literal after import keyword.
1366 llvm::SaveAndRestore<bool> SavedParsingFilemame(
1367 CurPPLexer->ParsingFilename,
1368 Result.getIdentifierInfo()->isImportKeyword());
1369
1370 std::optional<Token> NextTok = peekNextPPToken();
1371 if (!NextTok)
1372 return false;
1373
1374 if (NextTok->is(tok::raw_identifier))
1375 LookUpIdentifierInfo(*NextTok);
1376
1377 if (Result.getIdentifierInfo()->isImportKeyword()) {
1378 if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon,
1379 tok::header_name)) {
1380 Result.setKind(tok::kw_import);
1381 ModuleImportLoc = Result.getLocation();
1382 return true;
1383 }
1384 }
1385
1386 if (Result.getIdentifierInfo()->isModuleKeyword() &&
1387 NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
1388 Result.setKind(tok::kw_module);
1389 ModuleDeclLoc = Result.getLocation();
1390 return true;
1391 }
1392
1393 // Ok, it's an identifier.
1394 return false;
1395}
1396
1398 SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1401 return false;
1402}
1403
1404/// Collect the tokens of a C++20 pp-import-suffix.
1406 bool StopUntilEOD) {
1407 while (true) {
1408 Toks.emplace_back();
1409 Lex(Toks.back());
1410
1411 switch (Toks.back().getKind()) {
1412 case tok::semi:
1413 if (!StopUntilEOD)
1414 return;
1415 [[fallthrough]];
1416 case tok::eod:
1417 case tok::eof:
1418 return;
1419 default:
1420 break;
1421 }
1422 }
1423}
1424
1425// Allocate a holding buffer for a sequence of tokens and introduce it into
1426// the token stream.
1428 if (Toks.empty())
1429 return;
1430 auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1431 std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1432 EnterTokenStream(std::move(ToksCopy), Toks.size(),
1433 /*DisableMacroExpansion*/ false, /*IsReinject*/ false);
1434 assert(CurTokenLexer && "Must have a TokenLexer");
1435 CurTokenLexer->setLexingCXXModuleDirective();
1436}
1437
1439 bool IncludeExports) {
1440 CurSubmoduleState->VisibleModules.setVisible(
1441 M, Loc, IncludeExports, [](Module *) {},
1442 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1443 // FIXME: Include the path in the diagnostic.
1444 // FIXME: Include the import location for the conflicting module.
1445 Diag(ModuleImportLoc, diag::warn_module_conflict)
1446 << Path[0]->getFullModuleName()
1447 << Conflict->getFullModuleName()
1448 << Message;
1449 });
1450
1451 // Add this module to the imports list of the currently-built submodule.
1452 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1453 BuildingSubmoduleStack.back().M->Imports.insert(M);
1454}
1455
1457 const char *DiagnosticTag,
1458 bool AllowMacroExpansion) {
1459 // We need at least one string literal.
1460 if (Result.isNot(tok::string_literal)) {
1461 Diag(Result, diag::err_expected_string_literal)
1462 << /*Source='in...'*/0 << DiagnosticTag;
1463 return false;
1464 }
1465
1466 // Lex string literal tokens, optionally with macro expansion.
1467 SmallVector<Token, 4> StrToks;
1468 do {
1469 StrToks.push_back(Result);
1470
1471 if (Result.hasUDSuffix())
1472 Diag(Result, diag::err_invalid_string_udl);
1473
1474 if (AllowMacroExpansion)
1475 Lex(Result);
1476 else
1478 } while (Result.is(tok::string_literal));
1479
1480 // Concatenate and parse the strings.
1481 StringLiteralParser Literal(StrToks, *this);
1482 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1483
1484 if (Literal.hadError)
1485 return false;
1486
1487 if (Literal.Pascal) {
1488 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1489 << /*Source='in...'*/0 << DiagnosticTag;
1490 return false;
1491 }
1492
1493 String = std::string(Literal.GetString());
1494 return true;
1495}
1496
1498 assert(Tok.is(tok::numeric_constant));
1499 SmallString<8> IntegerBuffer;
1500 bool NumberInvalid = false;
1501 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1502 if (NumberInvalid)
1503 return false;
1504 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1506 getDiagnostics());
1507 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1508 return false;
1509 llvm::APInt APVal(64, 0);
1510 if (Literal.GetIntegerValue(APVal))
1511 return false;
1512 Lex(Tok);
1513 Value = APVal.getLimitedValue();
1514 return true;
1515}
1516
1518 assert(Handler && "NULL comment handler");
1519 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1520 "Comment handler already registered");
1521 CommentHandlers.push_back(Handler);
1522}
1523
1525 std::vector<CommentHandler *>::iterator Pos =
1526 llvm::find(CommentHandlers, Handler);
1527 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1528 CommentHandlers.erase(Pos);
1529}
1530
1532 bool AnyPendingTokens = false;
1533 for (CommentHandler *H : CommentHandlers) {
1534 if (H->HandleComment(*this, Comment))
1535 AnyPendingTokens = true;
1536 }
1537 if (!AnyPendingTokens || getCommentRetentionState())
1538 return false;
1539 Lex(result);
1540 return true;
1541}
1542
1543void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1544 const MacroAnnotations &A =
1546 assert(A.DeprecationInfo &&
1547 "Macro deprecation warning without recorded annotation!");
1548 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1549 if (Info.Message.empty())
1550 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1551 << Identifier.getIdentifierInfo() << 0;
1552 else
1553 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1554 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1555 Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
1556}
1557
1558void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1559 const MacroAnnotations &A =
1561 assert(A.RestrictExpansionInfo &&
1562 "Macro restricted expansion warning without recorded annotation!");
1563 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1564 if (Info.Message.empty())
1565 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1566 << Identifier.getIdentifierInfo() << 0;
1567 else
1568 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1569 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1570 Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
1571}
1572
1573void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1574 unsigned DiagSelection) const {
1575 Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1576}
1577
1578void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1579 bool IsUndef) const {
1580 const MacroAnnotations &A =
1582 assert(A.FinalAnnotationLoc &&
1583 "Final macro warning without recorded annotation!");
1584
1585 Diag(Identifier, diag::warn_pragma_final_macro)
1586 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1587 Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
1588}
1589
1591 const SourceLocation &Loc) const {
1592 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1593 // region map:
1594 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1595 const SourceLocation &Loc) -> bool {
1596 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1597 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1598 Map, [&SourceMgr,
1599 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1600 return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1601 });
1602
1603 if (FirstRegionEndingAfterLoc != Map.end()) {
1604 // To test if the start location of the found region precedes `Loc`:
1605 return SourceMgr.isBeforeInTranslationUnit(
1606 FirstRegionEndingAfterLoc->first, Loc);
1607 }
1608 // If we do not find a region whose end location passes `Loc`, we want to
1609 // check if the current region is still open:
1610 if (!Map.empty() && Map.back().first == Map.back().second)
1611 return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
1612 return false;
1613 };
1614
1615 // What the following does:
1616 //
1617 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1618 // Otherwise, `Loc` is from a loaded AST. We look up the
1619 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1620 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1621 // region w.r.t. the region map. If the region map is absent, it means there
1622 // is no opt-out pragma in that loaded AST.
1623 //
1624 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1625 // one of them. That means if you put the pragmas around a `#include
1626 // "module.h"`, where module.h is a module, it is not actually suppressing
1627 // warnings in module.h. This is fine because warnings in module.h will be
1628 // reported when module.h is compiled in isolation and nothing in module.h
1629 // will be analyzed ever again. So you will not see warnings from the file
1630 // that imports module.h anyway. And you can't even do the same thing for PCHs
1631 // because they can only be included from the command line.
1632
1633 if (SourceMgr.isLocalSourceLocation(Loc))
1634 return TestInMap(SafeBufferOptOutMap, Loc);
1635
1636 const SafeBufferOptOutRegionsTy *LoadedRegions =
1637 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
1638
1639 if (LoadedRegions)
1640 return TestInMap(*LoadedRegions, Loc);
1641 return false;
1642}
1643
1645 bool isEnter, const SourceLocation &Loc) {
1646 if (isEnter) {
1648 return true; // invalid enter action
1649 InSafeBufferOptOutRegion = true;
1650 CurrentSafeBufferOptOutStart = Loc;
1651
1652 // To set the start location of a new region:
1653
1654 if (!SafeBufferOptOutMap.empty()) {
1655 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1656 assert(PrevRegion->first != PrevRegion->second &&
1657 "Shall not begin a safe buffer opt-out region before closing the "
1658 "previous one.");
1659 }
1660 // If the start location equals to the end location, we call the region a
1661 // open region or a unclosed region (i.e., end location has not been set
1662 // yet).
1663 SafeBufferOptOutMap.emplace_back(Loc, Loc);
1664 } else {
1666 return true; // invalid enter action
1667 InSafeBufferOptOutRegion = false;
1668
1669 // To set the end location of the current open region:
1670
1671 assert(!SafeBufferOptOutMap.empty() &&
1672 "Misordered safe buffer opt-out regions");
1673 auto *CurrRegion = &SafeBufferOptOutMap.back();
1674 assert(CurrRegion->first == CurrRegion->second &&
1675 "Set end location to a closed safe buffer opt-out region");
1676 CurrRegion->second = Loc;
1677 }
1678 return false;
1679}
1680
1682 return InSafeBufferOptOutRegion;
1683}
1685 StartLoc = CurrentSafeBufferOptOutStart;
1686 return InSafeBufferOptOutRegion;
1687}
1688
1691 assert(!InSafeBufferOptOutRegion &&
1692 "Attempt to serialize safe buffer opt-out regions before file being "
1693 "completely preprocessed");
1694
1696
1697 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1698 SrcSeq.push_back(begin);
1699 SrcSeq.push_back(end);
1700 }
1701 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1702 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1703 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1704 // It means that for each loading pch/module m, it just needs to load m's own
1705 // `SafeBufferOptOutMap`.
1706 return SrcSeq;
1707}
1708
1710 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1711 if (SourceLocations.size() == 0)
1712 return false;
1713
1714 assert(SourceLocations.size() % 2 == 0 &&
1715 "ill-formed SourceLocation sequence");
1716
1717 auto It = SourceLocations.begin();
1718 SafeBufferOptOutRegionsTy &Regions =
1719 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
1720
1721 do {
1722 SourceLocation Begin = *It++;
1723 SourceLocation End = *It++;
1724
1725 Regions.emplace_back(Begin, End);
1726 } while (It != SourceLocations.end());
1727 return true;
1728}
1729
1730ModuleLoader::~ModuleLoader() = default;
1731
1733
1735
1737
1739 if (Record)
1740 return;
1741
1742 Record = new PreprocessingRecord(getSourceManager());
1743 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1744}
1745
1746const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1747 if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) {
1748 const SmallVector<const char *> &FileCheckPoints = It->second;
1749 const char *Last = nullptr;
1750 // FIXME: Do better than a linear search.
1751 for (const char *P : FileCheckPoints) {
1752 if (P > Start)
1753 break;
1754 Last = P;
1755 }
1756 return Last;
1757 }
1758
1759 return nullptr;
1760}
1761
1763 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1764}
1765
1767 return SeenNoTrivialPPDirective;
1768}
1769
1770void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1771 if (InMainFile && !SeenNoTrivialPPDirective)
1772 SeenNoTrivialPPDirective = true;
1773}
1774
1776 FileID FID, LexedFileChangeReason Reason,
1777 SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1778 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1779}
1780
1782 const MacroDefinition &MD,
1783 SourceRange Range,
1784 const MacroArgs *Args) {
1785 // FIXME: Does only enable builtin macro expansion make sense?
1786 if (!MD.getMacroInfo()->isBuiltinMacro())
1787 setSeenNoTrivialPPDirective();
1788}
Defines enum values for all the target-independent builtin functions.
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::FileManager interface and associated types.
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Token Tok
The Token.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Defines the PreprocessorLexer interface.
static bool MacroDefinitionEquals(const MacroInfo *MI, ArrayRef< TokenValue > Tokens)
Compares macro tokens with a specified token value sequence.
static constexpr unsigned CheckPointStepSize
Minimum distance between two check points, in tokens.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Abstract base class that describes a handler that will receive source ranges for each of the comments...
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:233
virtual void updateOutOfDateIdentifier(const IdentifierInfo &II)=0
Update an out-of-date identifier.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition FileEntry.h:302
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
bool isInvalid() const
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
Module * lookupModule(StringRef ModuleName, SourceLocation ImportLoc=SourceLocation(), bool AllowSearch=true, bool AllowExtraModuleMapSearch=false)
Lookup a module Search for a module with the given name.
Provides lookups to, and iteration over, IdentiferInfo objects.
One of these records is kept for each identifier that is lexed.
bool IsKeywordInCPlusPlus() const
Return true if this identifier would be a keyword in C++ mode.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
void setIsPoisoned(bool Value=true)
setIsPoisoned - Mark this identifier as poisoned.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
void setIsFutureCompatKeyword(bool Val)
StringRef getName() const
Return the actual identifier string.
bool isFutureCompatKeyword() const
is/setIsFutureCompatKeyword - Initialize information about whether or not this language token is a ke...
bool isExtensionToken() const
get/setExtension - Initialize information about whether or not this language token is an extension.
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:590
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition MacroInfo.h:606
SourceLocation getLocation() const
Definition MacroInfo.h:488
Encapsulates the data about a macro definition (e.g.
Definition MacroInfo.h:39
const_tokens_iterator tokens_begin() const
Definition MacroInfo.h:244
unsigned getNumTokens() const
Return the number of tokens that this macro expands to.
Definition MacroInfo.h:235
const Token & getReplacementToken(unsigned Tok) const
Definition MacroInfo.h:237
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition MacroInfo.h:217
bool isObjectLike() const
Definition MacroInfo.h:202
Abstract interface for a module loader.
virtual ~ModuleLoader()
static std::string getFlatNameFromPath(ModuleIdPath Path)
Represents a macro directive exported by a module.
Definition MacroInfo.h:514
static ModuleNameLoc * Create(Preprocessor &PP, ModuleIdPath Path)
SourceRange getRange() const
Describes a module or submodule.
Definition Module.h:237
void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
NumericLiteralParser - This performs strict semantic analysis of the content of a ppnumber,...
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
void setConditionalLevels(ArrayRef< PPConditionalInfo > CL)
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
std::string PCHThroughHeader
If non-empty, the filename used in an include directive in the primary source file (or command-line p...
bool GeneratePreamble
True indicates that a preamble is being generated.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool markIncluded(FileEntryRef File)
Mark the file as included.
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void EnterModuleSuffixTokenStream(ArrayRef< Token > Toks)
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
friend class MacroArgs
void DumpMacro(const MacroInfo &MI) const
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
void Lex(Token &Result)
Lex the next token for this preprocessor.
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
macro_iterator macro_end(bool IncludeExternalMacros=true) const
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
bool isPreprocessedModuleFile() const
Whether the main file is preprocessed module file.
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
SourceLocation CheckEndOfDirective(StringRef DirType, bool EnableMacros=false, SmallVectorImpl< Token > *ExtraToks=nullptr)
Ensure that the next token is a tok::eod token.
bool getCommentRetentionState() const
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
bool hadModuleLoaderFatalFailure() const
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
bool HandleModuleContextualKeyword(Token &Result)
Callback invoked when the lexer sees one of export, import or module token at the start of a line.
const TargetInfo & getTargetInfo() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a pragma hdrstop.
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
llvm::BumpPtrAllocator & getPreprocessorAllocator()
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
HeaderSearch & getHeaderSearchInfo() const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
ExternalPreprocessorSource * getExternalSource() const
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, SmallVectorImpl< Token > &Suffix, SmallVectorImpl< IdentifierLoc > &Path, bool AllowMacroExpansion, bool IsPartition)
const LangOptions & getLangOpts() const
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
void PoisonSEHIdentifiers(bool Poison=true)
size_t getTotalMemory() const
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool isNextPPTokenOneOf(Ts... Ks) const
isNextPPTokenOneOf - Check whether the next pp-token is one of the specificed token kind.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
void CollectPPImportSuffix(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Collect the tokens of a C++20 pp-import-suffix.
DiagnosticsEngine & getDiagnostics() const
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
bool HandleModuleName(StringRef DirType, SourceLocation UseLoc, Token &Tok, SmallVectorImpl< IdentifierLoc > &Path, SmallVectorImpl< Token > &DirToks, bool AllowMacroExpansion, bool IsPartition)
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the include of the through header or until after a pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl< Token > &Toks, bool StopUntilEOD=false)
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
unsigned getLine() const
Return the presumed line number of this location.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
std::optional< StringRef > getBufferDataOrNone(FileID FID) const
Return a StringRef to the source buffer data for the specified FileID, returning std::nullopt if inva...
A trivial tuple used to represent a source range.
StringLiteralParser - This decodes string escape characters and performs wide string analysis and Tra...
Exposes information about the current target.
Definition TargetInfo.h:227
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:197
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition Token.h:324
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:142
unsigned getLength() const
Definition Token.h:145
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition Token.h:298
void setKind(tok::TokenKind K)
Definition Token.h:100
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:104
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition Token.h:286
bool isOneOf(Ts... Ks) const
Definition Token.h:105
@ DisableExpand
Definition Token.h:79
@ HasSeenNoTrivialPPDirective
Definition Token.h:92
@ IsReinjected
Definition Token.h:89
@ LeadingEmptyMacro
Definition Token.h:81
@ LeadingSpace
Definition Token.h:77
@ StartOfLine
Definition Token.h:75
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
Definition Lexer.cpp:76
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition Token.h:294
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition Token.h:317
bool isNot(tok::TokenKind K) const
Definition Token.h:111
void startToken()
Reset all flags to cleared.
Definition Token.h:187
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition Token.h:313
void setIdentifierInfo(IdentifierInfo *II)
Definition Token.h:206
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition Token.h:277
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition Token.h:223
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition Token.h:254
Defines the clang::TargetInfo interface.
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
const char * getKeywordSpelling(TokenKind Kind) LLVM_READNONE
Determines the spelling of simple keyword and contextual keyword tokens like 'int' and 'dynamic_cast'...
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
@ CPlusPlus20
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
std::pair< FileID, unsigned > FileIDAndOffset
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
bool isPreprocessedModuleFile(StringRef Source)
Scan an input source buffer, and check whether the input source is a preprocessed output.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
TranslationUnitKind
Describes the kind of translation unit being processed.
@ TU_Prefix
The translation unit is a prefix to a translation unit, and is not complete.
#define true
Definition stdbool.h:25