clang 22.0.0git
MacroExpansionContext.cpp
Go to the documentation of this file.
1//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/Format/Format.h"
11#include "llvm/Support/Debug.h"
12#include <optional>
13
14#define DEBUG_TYPE "macro-expansion-context"
15
16static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
18
19namespace clang {
20namespace detail {
22 const Preprocessor &PP;
23 SourceManager &SM;
24 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
25
26public:
28 const Preprocessor &PP, SourceManager &SM,
29 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
30 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
31
32 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
33 SourceRange Range, const MacroArgs *Args) override {
34 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
35 if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
36 return;
37
38 SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
39 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
40
41 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
42 // If the range is empty, use the length of the macro.
43 if (Range.getBegin() == Range.getEnd())
44 return SM.getExpansionLoc(
45 MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
46
47 // Include the last character.
48 return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
49 }();
50
51 (void)PP;
52 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
53 dumpTokenInto(PP, llvm::dbgs(), MacroName);
54 llvm::dbgs()
55 << "' with length " << MacroName.getLength() << " at ";
56 MacroNameBegin.print(llvm::dbgs(), SM);
57 llvm::dbgs() << ", expansion end at ";
58 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
59
60 // If the expansion range is empty, use the identifier of the macro as a
61 // range.
62 MacroExpansionContext::ExpansionRangeMap::iterator It;
63 bool Inserted;
64 std::tie(It, Inserted) =
65 ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
66 if (Inserted) {
67 LLVM_DEBUG(llvm::dbgs() << "maps ";
68 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
69 It->getSecond().print(llvm::dbgs(), SM);
70 llvm::dbgs() << '\n';);
71 } else {
72 if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
73 It->getSecond() = ExpansionEnd;
74 LLVM_DEBUG(
75 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
76 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
77 llvm::dbgs() << '\n';);
78 }
79 }
80 }
81};
82} // namespace detail
83} // namespace clang
84
85using namespace clang;
86
88 : LangOpts(LangOpts) {}
89
91 PP = &NewPP;
92 SM = &NewPP.getSourceManager();
93
94 // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
95 PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
96 *PP, *SM, ExpansionRanges));
97 // Same applies here.
98 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
99}
100
101std::optional<StringRef>
103 if (MacroExpansionLoc.isMacroID())
104 return std::nullopt;
105
106 // If there was no macro expansion at that location, return std::nullopt.
107 if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
108 return std::nullopt;
109
110 // There was macro expansion, but resulted in no tokens, return empty string.
111 const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
112 if (It == ExpandedTokens.end())
113 return StringRef{""};
114
115 // Otherwise we have the actual token sequence as string.
116 return It->getSecond().str();
117}
118
119std::optional<StringRef>
121 if (MacroExpansionLoc.isMacroID())
122 return std::nullopt;
123
124 const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
125 if (It == ExpansionRanges.end())
126 return std::nullopt;
127
128 assert(It->getFirst() != It->getSecond() &&
129 "Every macro expansion must cover a non-empty range.");
130
132 CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
133 LangOpts);
134}
135
137 SourceLocation MacroExpansionLoc) const {
138 std::optional<StringRef> ExpandedText = getExpandedText(MacroExpansionLoc);
139 if (!ExpandedText)
140 return std::nullopt;
141
142 auto [It, Inserted] =
143 FormattedExpandedTokens.try_emplace(MacroExpansionLoc, "");
144 if (!Inserted)
145 return StringRef(It->getSecond());
146
147 clang::format::FormatStyle Style = clang::format::getLLVMStyle();
148
149 std::string MacroCodeBlock = ExpandedText->str();
150
151 std::vector<clang::tooling::Range> Ranges;
152 Ranges.emplace_back(0, MacroCodeBlock.length());
153
155 Style, MacroCodeBlock, Ranges, "<macro-expansion>");
156
158 clang::tooling::applyAllReplacements(MacroCodeBlock, Replacements);
159
160 It->getSecond() = Result ? std::move(*Result) : std::move(MacroCodeBlock);
161
162 return StringRef(It->getSecond());
163}
164
171
173 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
174 LocalExpansionRanges.reserve(ExpansionRanges.size());
175 for (const auto &Record : ExpansionRanges)
176 LocalExpansionRanges.emplace_back(
177 std::make_pair(Record.getFirst(), Record.getSecond()));
178 llvm::sort(LocalExpansionRanges);
179
180 OS << "\n=============== ExpansionRanges ===============\n";
181 for (const auto &Record : LocalExpansionRanges) {
182 OS << "> ";
183 Record.first.print(OS, *SM);
184 OS << ", ";
185 Record.second.print(OS, *SM);
186 OS << '\n';
187 }
188}
189
191 std::vector<std::pair<SourceLocation, MacroExpansionText>>
192 LocalExpandedTokens;
193 LocalExpandedTokens.reserve(ExpandedTokens.size());
194 for (const auto &Record : ExpandedTokens)
195 LocalExpandedTokens.emplace_back(
196 std::make_pair(Record.getFirst(), Record.getSecond()));
197 llvm::sort(LocalExpandedTokens);
198
199 OS << "\n=============== ExpandedTokens ===============\n";
200 for (const auto &Record : LocalExpandedTokens) {
201 OS << "> ";
202 Record.first.print(OS, *SM);
203 OS << " -> '" << Record.second << "'\n";
204 }
205}
206
207static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
208 assert(Tok.isNot(tok::raw_identifier));
209
210 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
211 if (Tok.isAnnotation())
212 return;
213
214 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
215 // FIXME: For now, we don't respect whitespaces between macro expanded
216 // tokens. We just emit a space after every identifier to produce a valid
217 // code for `int a ;` like expansions.
218 // ^-^-- Space after the 'int' and 'a' identifiers.
219 OS << II->getName() << ' ';
220 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
221 OS << StringRef(Tok.getLiteralData(), Tok.getLength());
222 } else {
223 char Tmp[256];
224 if (Tok.getLength() < sizeof(Tmp)) {
225 const char *TokPtr = Tmp;
226 // FIXME: Might use a different overload for cleaner callsite.
227 unsigned Len = PP.getSpelling(Tok, TokPtr);
228 OS.write(TokPtr, Len);
229 } else {
230 OS << "<too long token>";
231 }
232 }
233}
234
235void MacroExpansionContext::onTokenLexed(const Token &Tok) {
236 SourceLocation SLoc = Tok.getLocation();
237 if (SLoc.isFileID())
238 return;
239
240 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
241 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
242 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
243
244 // Remove spelling location.
245 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
246
247 MacroExpansionText TokenAsString;
248 llvm::raw_svector_ostream OS(TokenAsString);
249
250 // FIXME: Prepend newlines and space to produce the exact same output as the
251 // preprocessor would for this token.
252
253 dumpTokenInto(*PP, OS, Tok);
254
255 ExpansionMap::iterator It;
256 bool Inserted;
257 std::tie(It, Inserted) =
258 ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
259 if (!Inserted)
260 It->getSecond().append(TokenAsString);
261}
262
Token Tok
The Token.
Various functions to configurably format source code.
static void print(llvm::raw_ostream &OS, const T &V, ASTContext &ASTCtx, QualType Ty)
llvm::MachO::Record Record
Definition MachO.h:31
static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, clang::Token Tok)
static CharSourceRange getCharRange(SourceRange R)
One of these records is kept for each identifier that is lexed.
StringRef getName() const
Return the actual identifier string.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
Definition Lexer.cpp:1020
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition MacroArgs.h:30
A description of the current definition of a macro.
Definition MacroInfo.h:590
void registerForPreprocessor(Preprocessor &PP)
Register the necessary callbacks to the Preprocessor to record the expansion events and the generated...
LLVM_DUMP_METHOD void dumpExpandedTextsToStream(raw_ostream &OS) const
LLVM_DUMP_METHOD void dumpExpandedTexts() const
LLVM_DUMP_METHOD void dumpExpansionRanges() const
MacroExpansionContext(const LangOptions &LangOpts)
Creates a MacroExpansionContext.
std::optional< StringRef > getFormattedExpandedText(SourceLocation MacroExpansionLoc) const
LLVM_DUMP_METHOD void dumpExpansionRangesToStream(raw_ostream &OS) const
std::optional< StringRef > getExpandedText(SourceLocation MacroExpansionLoc) const
std::optional< StringRef > getOriginalText(SourceLocation MacroExpansionLoc) const
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
Encodes a location in the source.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
Definition Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition Token.h:195
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition Token.h:140
unsigned getLength() const
Definition Token.h:143
void MacroExpands(const Token &MacroName, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
MacroExpansionRangeRecorder(const Preprocessor &PP, SourceManager &SM, MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
Maintains a set of replacements that are conflict-free.
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef< tooling::Range > Ranges, StringRef FileName, FormattingAttemptStatus *Status)
Definition Format.cpp:4178
FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language)
Definition Format.cpp:1643
bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite)
Apply all replacements in Replaces to the Rewriter Rewrite.
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Definition TypeBase.h:905