clang  13.0.0git
MacroExpansionContext.cpp
Go to the documentation of this file.
1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/Support/Debug.h"
11 
12 #define DEBUG_TYPE "macro-expansion-context"
13 
14 static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS,
15  clang::Token Tok);
16 
17 namespace clang {
18 namespace detail {
20  const Preprocessor &PP;
21  SourceManager &SM;
22  MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
23 
24 public:
26  const Preprocessor &PP, SourceManager &SM,
27  MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
28  : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
29 
30  void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
31  SourceRange Range, const MacroArgs *Args) override {
32  // Ignore annotation tokens like: _Pragma("pack(push, 1)")
33  if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
34  return;
35 
36  SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
37  assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
38 
39  const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
40  // If the range is empty, use the length of the macro.
41  if (Range.getBegin() == Range.getEnd())
42  return SM.getExpansionLoc(
43  MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
44 
45  // Include the last character.
46  return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
47  }();
48 
49  (void)PP;
50  LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
51  dumpTokenInto(PP, llvm::dbgs(), MacroName);
52  llvm::dbgs()
53  << "' with length " << MacroName.getLength() << " at ";
54  MacroNameBegin.print(llvm::dbgs(), SM);
55  llvm::dbgs() << ", expansion end at ";
56  ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
57 
58  // If the expansion range is empty, use the identifier of the macro as a
59  // range.
60  MacroExpansionContext::ExpansionRangeMap::iterator It;
61  bool Inserted;
62  std::tie(It, Inserted) =
63  ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
64  if (Inserted) {
65  LLVM_DEBUG(llvm::dbgs() << "maps ";
66  It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
67  It->getSecond().print(llvm::dbgs(), SM);
68  llvm::dbgs() << '\n';);
69  } else {
70  if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
71  It->getSecond() = ExpansionEnd;
72  LLVM_DEBUG(
73  llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
74  llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
75  llvm::dbgs() << '\n';);
76  }
77  }
78  }
79 };
80 } // namespace detail
81 } // namespace clang
82 
83 using namespace clang;
84 
86  : LangOpts(LangOpts) {}
87 
89  PP = &NewPP;
90  SM = &NewPP.getSourceManager();
91 
92  // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
93  PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
94  *PP, *SM, ExpansionRanges));
95  // Same applies here.
96  PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
97 }
98 
101  if (MacroExpansionLoc.isMacroID())
102  return llvm::None;
103 
104  // If there was no macro expansion at that location, return None.
105  if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
106  return llvm::None;
107 
108  // There was macro expansion, but resulted in no tokens, return empty string.
109  const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
110  if (It == ExpandedTokens.end())
111  return StringRef{""};
112 
113  // Otherwise we have the actual token sequence as string.
114  return StringRef{It->getSecond()};
115 }
116 
119  if (MacroExpansionLoc.isMacroID())
120  return llvm::None;
121 
122  const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
123  if (It == ExpansionRanges.end())
124  return llvm::None;
125 
126  assert(It->getFirst() != It->getSecond() &&
127  "Every macro expansion must cover a non-empty range.");
128 
129  return Lexer::getSourceText(
130  CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
131  LangOpts);
132 }
133 
135  dumpExpansionRangesToStream(llvm::dbgs());
136 }
138  dumpExpandedTextsToStream(llvm::dbgs());
139 }
140 
142  std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
143  LocalExpansionRanges.reserve(ExpansionRanges.size());
144  for (const auto &Record : ExpansionRanges)
145  LocalExpansionRanges.emplace_back(
146  std::make_pair(Record.getFirst(), Record.getSecond()));
147  llvm::sort(LocalExpansionRanges);
148 
149  OS << "\n=============== ExpansionRanges ===============\n";
150  for (const auto &Record : LocalExpansionRanges) {
151  OS << "> ";
152  Record.first.print(OS, *SM);
153  OS << ", ";
154  Record.second.print(OS, *SM);
155  OS << '\n';
156  }
157 }
158 
160  std::vector<std::pair<SourceLocation, MacroExpansionText>>
161  LocalExpandedTokens;
162  LocalExpandedTokens.reserve(ExpandedTokens.size());
163  for (const auto &Record : ExpandedTokens)
164  LocalExpandedTokens.emplace_back(
165  std::make_pair(Record.getFirst(), Record.getSecond()));
166  llvm::sort(LocalExpandedTokens);
167 
168  OS << "\n=============== ExpandedTokens ===============\n";
169  for (const auto &Record : LocalExpandedTokens) {
170  OS << "> ";
171  Record.first.print(OS, *SM);
172  OS << " -> '" << Record.second << "'\n";
173  }
174 }
175 
176 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
177  assert(Tok.isNot(tok::raw_identifier));
178 
179  // Ignore annotation tokens like: _Pragma("pack(push, 1)")
180  if (Tok.isAnnotation())
181  return;
182 
183  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
184  // FIXME: For now, we don't respect whitespaces between macro expanded
185  // tokens. We just emit a space after every identifier to produce a valid
186  // code for `int a ;` like expansions.
187  // ^-^-- Space after the 'int' and 'a' identifiers.
188  OS << II->getName() << ' ';
189  } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
190  OS << StringRef(Tok.getLiteralData(), Tok.getLength());
191  } else {
192  char Tmp[256];
193  if (Tok.getLength() < sizeof(Tmp)) {
194  const char *TokPtr = Tmp;
195  // FIXME: Might use a different overload for cleaner callsite.
196  unsigned Len = PP.getSpelling(Tok, TokPtr);
197  OS.write(TokPtr, Len);
198  } else {
199  OS << "<too long token>";
200  }
201  }
202 }
203 
204 void MacroExpansionContext::onTokenLexed(const Token &Tok) {
205  SourceLocation SLoc = Tok.getLocation();
206  if (SLoc.isFileID())
207  return;
208 
209  LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
210  dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
211  SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
212 
213  // Remove spelling location.
214  SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
215 
216  MacroExpansionText TokenAsString;
217  llvm::raw_svector_ostream OS(TokenAsString);
218 
219  // FIXME: Prepend newlines and space to produce the exact same output as the
220  // preprocessor would for this token.
221 
222  dumpTokenInto(*PP, OS, Tok);
223 
224  ExpansionMap::iterator It;
225  bool Inserted;
226  std::tie(It, Inserted) =
227  ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
228  if (!Inserted)
229  It->getSecond().append(TokenAsString);
230 }
231 
clang::MacroExpansionContext::dumpExpandedTextsToStream
LLVM_DUMP_METHOD void dumpExpandedTextsToStream(raw_ostream &OS) const
Definition: MacroExpansionContext.cpp:159
clang::MacroExpansionContext::getOriginalText
Optional< StringRef > getOriginalText(SourceLocation MacroExpansionLoc) const
Definition: MacroExpansionContext.cpp:118
clang::MacroExpansionContext::dumpExpansionRangesToStream
LLVM_DUMP_METHOD void dumpExpansionRangesToStream(raw_ostream &OS) const
Definition: MacroExpansionContext.cpp:141
clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:212
clang::Token::getLiteralData
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:217
clang::MacroDefinition
A description of the current definition of a macro.
Definition: MacroInfo.h:564
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:89
clang::PPCallbacks
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition: PPCallbacks.h:35
clang::Token::getIdentifierInfo
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
clang::MacroArgs
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:29
llvm::Optional
Definition: LLVM.h:40
clang::Lexer::getSourceText
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
Definition: Lexer.cpp:942
clang::MacroExpansionContext::getExpandedText
Optional< StringRef > getExpandedText(SourceLocation MacroExpansionLoc) const
Definition: MacroExpansionContext.cpp:100
print
void print(llvm::raw_ostream &OS, const Pointer &P, ASTContext &Ctx, QualType Ty)
Definition: InterpFrame.cpp:62
clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:624
clang::CharSourceRange::getCharRange
static CharSourceRange getCharRange(SourceRange R)
Definition: SourceLocation.h:267
clang::SourceLocation::print
void print(raw_ostream &OS, const SourceManager &SM) const
Definition: SourceLocation.cpp:62
clang::detail::MacroExpansionRangeRecorder::MacroExpands
void MacroExpands(const Token &MacroName, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
Definition: MacroExpansionContext.cpp:30
clang::Token::needsCleaning
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:285
clang::SourceLocation::isFileID
bool isFileID() const
Definition: SourceLocation.h:102
clang::detail::MacroExpansionRangeRecorder
Definition: MacroExpansionContext.cpp:19
clang::MacroExpansionContext::dumpExpandedTexts
LLVM_DUMP_METHOD void dumpExpandedTexts() const
Definition: MacroExpansionContext.cpp:137
clang::Token::isNot
bool isNot(tok::TokenKind K) const
Definition: Token.h:98
MacroExpansionContext.h
clang::Token::isLiteral
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:115
clang::Token::getLength
unsigned getLength() const
Definition: Token.h:129
clang::Preprocessor::getSourceManager
SourceManager & getSourceManager() const
Definition: Preprocessor.h:926
clang::Preprocessor::getSpelling
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
Definition: Preprocessor.h:1721
clang::MacroExpansionContext::dumpExpansionRanges
LLVM_DUMP_METHOD void dumpExpansionRanges() const
Definition: MacroExpansionContext.cpp:134
clang::MacroExpansionContext::registerForPreprocessor
void registerForPreprocessor(Preprocessor &PP)
Register the necessary callbacks to the Preprocessor to record the expansion events and the generated...
Definition: MacroExpansionContext.cpp:88
clang::detail::MacroExpansionRangeRecorder::MacroExpansionRangeRecorder
MacroExpansionRangeRecorder(const Preprocessor &PP, SourceManager &SM, MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
Definition: MacroExpansionContext.cpp:25
clang::IdentifierInfo
One of these records is kept for each identifier that is lexed.
Definition: IdentifierTable.h:59
clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:126
clang::SourceLocation::isMacroID
bool isMacroID() const
Definition: SourceLocation.h:103
clang::LangOptions
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:58
clang::Token::isAnnotation
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:120
clang::IdentifierInfo::getName
StringRef getName() const
Return the actual identifier string.
Definition: IdentifierTable.h:160
clang
Dataflow Directional Tag Classes.
Definition: CalledOnceCheck.h:17
clang::Preprocessor::addPPCallbacks
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
Definition: Preprocessor.h:1020
clang::SourceManager::getExpansionLoc
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID.
Definition: SourceManager.h:1155
clang::Preprocessor::setTokenWatcher
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream.
Definition: Preprocessor.h:1044
clang::MacroExpansionContext::MacroExpansionContext
MacroExpansionContext(const LangOptions &LangOpts)
Creates a MacroExpansionContext.
Definition: MacroExpansionContext.cpp:85
clang::Preprocessor
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:129
SM
#define SM(sm)
Definition: Cuda.cpp:68
clang::SourceLocation::getLocWithOffset
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
Definition: SourceLocation.h:136
clang::ento::ObjKind::OS
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
dumpTokenInto
static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, clang::Token Tok)
Definition: MacroExpansionContext.cpp:176