clang 20.0.0git
MacroExpansionContext.cpp
Go to the documentation of this file.
1//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/Support/Debug.h"
11#include <optional>
12
13#define DEBUG_TYPE "macro-expansion-context"
14
15static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
16 clang::Token Tok);
17
18namespace clang {
19namespace detail {
21 const Preprocessor &PP;
22 SourceManager &SM;
23 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
24
25public:
27 const Preprocessor &PP, SourceManager &SM,
28 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
29 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
30
31 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
32 SourceRange Range, const MacroArgs *Args) override {
33 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
34 if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
35 return;
36
37 SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
38 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
39
40 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
41 // If the range is empty, use the length of the macro.
42 if (Range.getBegin() == Range.getEnd())
43 return SM.getExpansionLoc(
44 MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
45
46 // Include the last character.
47 return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
48 }();
49
50 (void)PP;
51 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
52 dumpTokenInto(PP, llvm::dbgs(), MacroName);
53 llvm::dbgs()
54 << "' with length " << MacroName.getLength() << " at ";
55 MacroNameBegin.print(llvm::dbgs(), SM);
56 llvm::dbgs() << ", expansion end at ";
57 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
58
59 // If the expansion range is empty, use the identifier of the macro as a
60 // range.
61 MacroExpansionContext::ExpansionRangeMap::iterator It;
62 bool Inserted;
63 std::tie(It, Inserted) =
64 ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
65 if (Inserted) {
66 LLVM_DEBUG(llvm::dbgs() << "maps ";
67 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
68 It->getSecond().print(llvm::dbgs(), SM);
69 llvm::dbgs() << '\n';);
70 } else {
71 if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
72 It->getSecond() = ExpansionEnd;
73 LLVM_DEBUG(
74 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
75 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
76 llvm::dbgs() << '\n';);
77 }
78 }
79 }
80};
81} // namespace detail
82} // namespace clang
83
84using namespace clang;
85
87 : LangOpts(LangOpts) {}
88
90 PP = &NewPP;
91 SM = &NewPP.getSourceManager();
92
93 // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
94 PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
95 *PP, *SM, ExpansionRanges));
96 // Same applies here.
97 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
98}
99
100std::optional<StringRef>
102 if (MacroExpansionLoc.isMacroID())
103 return std::nullopt;
104
105 // If there was no macro expansion at that location, return std::nullopt.
106 if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
107 return std::nullopt;
108
109 // There was macro expansion, but resulted in no tokens, return empty string.
110 const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
111 if (It == ExpandedTokens.end())
112 return StringRef{""};
113
114 // Otherwise we have the actual token sequence as string.
115 return It->getSecond().str();
116}
117
118std::optional<StringRef>
120 if (MacroExpansionLoc.isMacroID())
121 return std::nullopt;
122
123 const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
124 if (It == ExpansionRanges.end())
125 return std::nullopt;
126
127 assert(It->getFirst() != It->getSecond() &&
128 "Every macro expansion must cover a non-empty range.");
129
131 CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
132 LangOpts);
133}
134
136 dumpExpansionRangesToStream(llvm::dbgs());
137}
139 dumpExpandedTextsToStream(llvm::dbgs());
140}
141
143 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
144 LocalExpansionRanges.reserve(ExpansionRanges.size());
145 for (const auto &Record : ExpansionRanges)
146 LocalExpansionRanges.emplace_back(
147 std::make_pair(Record.getFirst(), Record.getSecond()));
148 llvm::sort(LocalExpansionRanges);
149
150 OS << "\n=============== ExpansionRanges ===============\n";
151 for (const auto &Record : LocalExpansionRanges) {
152 OS << "> ";
153 Record.first.print(OS, *SM);
154 OS << ", ";
155 Record.second.print(OS, *SM);
156 OS << '\n';
157 }
158}
159
161 std::vector<std::pair<SourceLocation, MacroExpansionText>>
162 LocalExpandedTokens;
163 LocalExpandedTokens.reserve(ExpandedTokens.size());
164 for (const auto &Record : ExpandedTokens)
165 LocalExpandedTokens.emplace_back(
166 std::make_pair(Record.getFirst(), Record.getSecond()));
167 llvm::sort(LocalExpandedTokens);
168
169 OS << "\n=============== ExpandedTokens ===============\n";
170 for (const auto &Record : LocalExpandedTokens) {
171 OS << "> ";
172 Record.first.print(OS, *SM);
173 OS << " -> '" << Record.second << "'\n";
174 }
175}
176
177static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
178 assert(Tok.isNot(tok::raw_identifier));
179
180 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
181 if (Tok.isAnnotation())
182 return;
183
184 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
185 // FIXME: For now, we don't respect whitespaces between macro expanded
186 // tokens. We just emit a space after every identifier to produce a valid
187 // code for `int a ;` like expansions.
188 // ^-^-- Space after the 'int' and 'a' identifiers.
189 OS << II->getName() << ' ';
190 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
191 OS << StringRef(Tok.getLiteralData(), Tok.getLength());
192 } else {
193 char Tmp[256];
194 if (Tok.getLength() < sizeof(Tmp)) {
195 const char *TokPtr = Tmp;
196 // FIXME: Might use a different overload for cleaner callsite.
197 unsigned Len = PP.getSpelling(Tok, TokPtr);
198 OS.write(TokPtr, Len);
199 } else {
200 OS << "<too long token>";
201 }
202 }
203}
204
205void MacroExpansionContext::onTokenLexed(const Token &Tok) {
206 SourceLocation SLoc = Tok.getLocation();
207 if (SLoc.isFileID())
208 return;
209
210 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
211 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
212 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
213
214 // Remove spelling location.
215 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
216
217 MacroExpansionText TokenAsString;
218 llvm::raw_svector_ostream OS(TokenAsString);
219
220 // FIXME: Prepend newlines and space to produce the exact same output as the
221 // preprocessor would for this token.
222
223 dumpTokenInto(*PP, OS, Tok);
224
225 ExpansionMap::iterator It;
226 bool Inserted;
227 std::tie(It, Inserted) =
228 ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
229 if (!Inserted)
230 It->getSecond().append(TokenAsString);
231}
232
#define SM(sm)
Definition: Cuda.cpp:84
static void print(llvm::raw_ostream &OS, const T &V, ASTContext &ASTCtx, QualType Ty)
llvm::MachO::Record Record
Definition: MachO.h:31
static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, clang::Token Tok)
SourceRange Range
Definition: SemaObjC.cpp:758
static CharSourceRange getCharRange(SourceRange R)
One of these records is kept for each identifier that is lexed.
StringRef getName() const
Return the actual identifier string.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:499
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
Definition: Lexer.cpp:1023
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
A description of the current definition of a macro.
Definition: MacroInfo.h:590
void registerForPreprocessor(Preprocessor &PP)
Register the necessary callbacks to the Preprocessor to record the expansion events and the generated...
LLVM_DUMP_METHOD void dumpExpandedTextsToStream(raw_ostream &OS) const
LLVM_DUMP_METHOD void dumpExpandedTexts() const
LLVM_DUMP_METHOD void dumpExpansionRanges() const
MacroExpansionContext(const LangOptions &LangOpts)
Creates a MacroExpansionContext.
LLVM_DUMP_METHOD void dumpExpansionRangesToStream(raw_ostream &OS) const
std::optional< StringRef > getExpandedText(SourceLocation MacroExpansionLoc) const
std::optional< StringRef > getOriginalText(SourceLocation MacroExpansionLoc) const
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition: PPCallbacks.h:36
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:138
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
SourceManager & getSourceManager() const
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream.
Encodes a location in the source.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
unsigned getLength() const
Definition: Token.h:135
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:121
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:295
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:225
void MacroExpands(const Token &MacroName, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
MacroExpansionRangeRecorder(const Preprocessor &PP, SourceManager &SM, MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
The JSON file list parser is used to communicate input to InstallAPI.