clang  14.0.0git
RewriteMacros.cpp
Go to the documentation of this file.
1 //===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code rewrites macro invocations into their expansions. This gives you
10 // a macro expanded file that retains comments and #includes.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "clang/Lex/Preprocessor.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <cstdio>
21 #include <memory>
22 
23 using namespace clang;
24 
25 /// isSameToken - Return true if the two specified tokens start have the same
26 /// content.
27 static bool isSameToken(Token &RawTok, Token &PPTok) {
28  // If two tokens have the same kind and the same identifier info, they are
29  // obviously the same.
30  if (PPTok.getKind() == RawTok.getKind() &&
31  PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
32  return true;
33 
34  // Otherwise, if they are different but have the same identifier info, they
35  // are also considered to be the same. This allows keywords and raw lexed
36  // identifiers with the same name to be treated the same.
37  if (PPTok.getIdentifierInfo() &&
38  PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
39  return true;
40 
41  return false;
42 }
43 
44 
45 /// GetNextRawTok - Return the next raw token in the stream, skipping over
46 /// comments if ReturnComment is false.
47 static const Token &GetNextRawTok(const std::vector<Token> &RawTokens,
48  unsigned &CurTok, bool ReturnComment) {
49  assert(CurTok < RawTokens.size() && "Overran eof!");
50 
51  // If the client doesn't want comments and we have one, skip it.
52  if (!ReturnComment && RawTokens[CurTok].is(tok::comment))
53  ++CurTok;
54 
55  return RawTokens[CurTok++];
56 }
57 
58 
59 /// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into
60 /// the specified vector.
62  std::vector<Token> &RawTokens) {
64 
65  // Create a lexer to lex all the tokens of the main file in raw mode. Even
66  // though it is in raw mode, it will not return comments.
67  llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID());
68  Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
69 
70  // Switch on comment lexing because we really do want them.
71  RawLex.SetCommentRetentionState(true);
72 
73  Token RawTok;
74  do {
75  RawLex.LexFromRawLexer(RawTok);
76 
77  // If we have an identifier with no identifier info for our raw token, look
78  // up the identifier info. This is important for equality comparison of
79  // identifier tokens.
80  if (RawTok.is(tok::raw_identifier))
81  PP.LookUpIdentifierInfo(RawTok);
82 
83  RawTokens.push_back(RawTok);
84  } while (RawTok.isNot(tok::eof));
85 }
86 
87 
88 /// RewriteMacrosInInput - Implement -rewrite-macros mode.
89 void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
91 
93  Rewrite.setSourceMgr(SM, PP.getLangOpts());
94  RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());
95 
96  std::vector<Token> RawTokens;
97  LexRawTokensFromMainFile(PP, RawTokens);
98  unsigned CurRawTok = 0;
99  Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
100 
101 
102  // Get the first preprocessing token.
103  PP.EnterMainSourceFile();
104  Token PPTok;
105  PP.Lex(PPTok);
106 
107  // Preprocess the input file in parallel with raw lexing the main file. Ignore
108  // all tokens that are preprocessed from a file other than the main file (e.g.
109  // a header). If we see tokens that are in the preprocessed file but not the
110  // lexed file, we have a macro expansion. If we see tokens in the lexed file
111  // that aren't in the preprocessed view, we have macros that expand to no
112  // tokens, or macro arguments etc.
113  while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
114  SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());
115 
116  // If PPTok is from a different source file, ignore it.
117  if (!SM.isWrittenInMainFile(PPLoc)) {
118  PP.Lex(PPTok);
119  continue;
120  }
121 
122  // If the raw file hits a preprocessor directive, they will be extra tokens
123  // in the raw file that don't exist in the preprocsesed file. However, we
124  // choose to preserve them in the output file and otherwise handle them
125  // specially.
126  if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
127  // If this is a #warning directive or #pragma mark (GNU extensions),
128  // comment the line out.
129  if (RawTokens[CurRawTok].is(tok::identifier)) {
130  const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
131  if (II->getName() == "warning") {
132  // Comment out #warning.
133  RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
134  } else if (II->getName() == "pragma" &&
135  RawTokens[CurRawTok+1].is(tok::identifier) &&
136  (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
137  "mark")) {
138  // Comment out #pragma mark.
139  RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
140  }
141  }
142 
143  // Otherwise, if this is a #include or some other directive, just leave it
144  // in the file by skipping over the line.
145  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
146  while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
147  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
148  continue;
149  }
150 
151  // Okay, both tokens are from the same file. Get their offsets from the
152  // start of the file.
153  unsigned PPOffs = SM.getFileOffset(PPLoc);
154  unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());
155 
156  // If the offsets are the same and the token kind is the same, ignore them.
157  if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
158  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
159  PP.Lex(PPTok);
160  continue;
161  }
162 
163  // If the PP token is farther along than the raw token, something was
164  // deleted. Comment out the raw token.
165  if (RawOffs <= PPOffs) {
166  // Comment out a whole run of tokens instead of bracketing each one with
167  // comments. Add a leading space if RawTok didn't have one.
168  bool HasSpace = RawTok.hasLeadingSpace();
169  RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
170  unsigned EndPos;
171 
172  do {
173  EndPos = RawOffs+RawTok.getLength();
174 
175  RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
176  RawOffs = SM.getFileOffset(RawTok.getLocation());
177 
178  if (RawTok.is(tok::comment)) {
179  // Skip past the comment.
180  RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
181  break;
182  }
183 
184  } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
185  (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));
186 
187  RB.InsertTextBefore(EndPos, "*/");
188  continue;
189  }
190 
191  // Otherwise, there was a replacement an expansion. Insert the new token
192  // in the output buffer. Insert the whole run of new tokens at once to get
193  // them in the right order.
194  unsigned InsertPos = PPOffs;
195  std::string Expansion;
196  while (PPOffs < RawOffs) {
197  Expansion += ' ' + PP.getSpelling(PPTok);
198  PP.Lex(PPTok);
199  PPLoc = SM.getExpansionLoc(PPTok.getLocation());
200  PPOffs = SM.getFileOffset(PPLoc);
201  }
202  Expansion += ' ';
203  RB.InsertTextBefore(InsertPos, Expansion);
204  }
205 
206  // Get the buffer corresponding to MainFileID. If we haven't changed it, then
207  // we are done.
208  if (const RewriteBuffer *RewriteBuf =
209  Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
210  //printf("Changed:\n");
211  *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
212  } else {
213  fprintf(stderr, "No changes\n");
214  }
215  OS->flush();
216 }
clang::RewriteBuffer::InsertTextAfter
void InsertTextAfter(unsigned OrigOffset, StringRef Str)
InsertTextAfter - Insert some text at the specified point, where the offset in the buffer is specifie...
Definition: RewriteBuffer.h:81
clang::Token::hasLeadingSpace
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:272
clang::Lexer::SetCommentRetentionState
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
Definition: Lexer.h:231
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::Preprocessor::Lex
void Lex(Token &Result)
Lex the next token for this preprocessor.
Definition: Preprocessor.cpp:888
clang::SourceLocation
Encodes a location in the source.
Definition: SourceLocation.h:88
clang::Token::getIdentifierInfo
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
clang::TemplateSubstitutionKind::Rewrite
@ Rewrite
We are substituting template parameters for (typically) other template parameters in order to rewrite...
clang::Token::isAtStartOfLine
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:268
SourceManager.h
clang::Lexer::LexFromRawLexer
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Definition: Lexer.h:198
clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
LexRawTokensFromMainFile
static void LexRawTokensFromMainFile(Preprocessor &PP, std::vector< Token > &RawTokens)
LexRawTokensFromMainFile - Lets all the raw tokens from the main file into the specified vector.
Definition: RewriteMacros.cpp:61
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:626
clang::Preprocessor::getLangOpts
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:960
clang::RewriteBuffer::InsertTextBefore
void InsertTextBefore(unsigned OrigOffset, StringRef Str)
InsertTextBefore - Insert some text before the specified point, where the offset in the buffer is spe...
Definition: RewriteBuffer.h:74
Preprocessor.h
clang::Lexer
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:76
isSameToken
static bool isSameToken(Token &RawTok, Token &PPTok)
isSameToken - Return true if the two specified tokens start have the same content.
Definition: RewriteMacros.cpp:27
Rewriters.h
GetNextRawTok
static const Token & GetNextRawTok(const std::vector< Token > &RawTokens, unsigned &CurTok, bool ReturnComment)
GetNextRawTok - Return the next raw token in the stream, skipping over comments if ReturnComment is f...
Definition: RewriteMacros.cpp:47
clang::Token::getKind
tok::TokenKind getKind() const
Definition: Token.h:92
clang::Token::is
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:97
clang::Preprocessor::EnterMainSourceFile
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
Definition: Preprocessor.cpp:530
clang::Token::isNot
bool isNot(tok::TokenKind K) const
Definition: Token.h:98
clang::Token::getLength
unsigned getLength() const
Definition: Token.h:129
clang::Preprocessor::getSourceManager
SourceManager & getSourceManager() const
Definition: Preprocessor.h:964
clang::Preprocessor::getSpelling
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
Definition: Preprocessor.h:1759
clang::RewriteMacrosInInput
void RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS)
RewriteMacrosInInput - Implement -rewrite-macros mode.
Definition: RewriteMacros.cpp:89
clang::IdentifierInfo
One of these records is kept for each identifier that is lexed.
Definition: IdentifierTable.h:84
clang::Token::getLocation
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:126
clang::IdentifierInfo::getName
StringRef getName() const
Return the actual identifier string.
Definition: IdentifierTable.h:195
clang
Definition: CalledOnceCheck.h:17
clang::RewriteBuffer
RewriteBuffer - As code is rewritten, SourceBuffer's from the original input with modifications get a...
Definition: RewriteBuffer.h:25
clang::Preprocessor::LookUpIdentifierInfo
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
Definition: Preprocessor.cpp:696
clang::Rewriter
Rewriter - This is the main interface to the rewrite buffers.
Definition: Rewriter.h:32
clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33
Rewriter.h
clang::Preprocessor
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:130
SM
#define SM(sm)
Definition: Cuda.cpp:78