clang  8.0.0svn
Rewriter.cpp
Go to the documentation of this file.
1 //===- Rewriter.cpp - Code rewriting interface ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the Rewriter class, which is used for code
11 // transformations.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "clang/Basic/Diagnostic.h"
21 #include "clang/Lex/Lexer.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <iterator>
31 #include <map>
32 #include <memory>
33 #include <system_error>
34 #include <utility>
35 
36 using namespace clang;
37 
38 raw_ostream &RewriteBuffer::write(raw_ostream &os) const {
39  // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the
40  // character iterator.
41  for (RopePieceBTreeIterator I = begin(), E = end(); I != E;
42  I.MoveToNextPiece())
43  os << I.piece();
44  return os;
45 }
46 
47 /// Return true if this character is non-new-line whitespace:
48 /// ' ', '\\t', '\\f', '\\v', '\\r'.
49 static inline bool isWhitespaceExceptNL(unsigned char c) {
50  switch (c) {
51  case ' ':
52  case '\t':
53  case '\f':
54  case '\v':
55  case '\r':
56  return true;
57  default:
58  return false;
59  }
60 }
61 
62 void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size,
63  bool removeLineIfEmpty) {
64  // Nothing to remove, exit early.
65  if (Size == 0) return;
66 
67  unsigned RealOffset = getMappedOffset(OrigOffset, true);
68  assert(RealOffset+Size <= Buffer.size() && "Invalid location");
69 
70  // Remove the dead characters.
71  Buffer.erase(RealOffset, Size);
72 
73  // Add a delta so that future changes are offset correctly.
74  AddReplaceDelta(OrigOffset, -Size);
75 
76  if (removeLineIfEmpty) {
77  // Find the line that the remove occurred and if it is completely empty
78  // remove the line as well.
79 
80  iterator curLineStart = begin();
81  unsigned curLineStartOffs = 0;
82  iterator posI = begin();
83  for (unsigned i = 0; i != RealOffset; ++i) {
84  if (*posI == '\n') {
85  curLineStart = posI;
86  ++curLineStart;
87  curLineStartOffs = i + 1;
88  }
89  ++posI;
90  }
91 
92  unsigned lineSize = 0;
93  posI = curLineStart;
94  while (posI != end() && isWhitespaceExceptNL(*posI)) {
95  ++posI;
96  ++lineSize;
97  }
98  if (posI != end() && *posI == '\n') {
99  Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/);
100  AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/));
101  }
102  }
103 }
104 
105 void RewriteBuffer::InsertText(unsigned OrigOffset, StringRef Str,
106  bool InsertAfter) {
107  // Nothing to insert, exit early.
108  if (Str.empty()) return;
109 
110  unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter);
111  Buffer.insert(RealOffset, Str.begin(), Str.end());
112 
113  // Add a delta so that future changes are offset correctly.
114  AddInsertDelta(OrigOffset, Str.size());
115 }
116 
117 /// ReplaceText - This method replaces a range of characters in the input
118 /// buffer with a new string. This is effectively a combined "remove+insert"
119 /// operation.
120 void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength,
121  StringRef NewStr) {
122  unsigned RealOffset = getMappedOffset(OrigOffset, true);
123  Buffer.erase(RealOffset, OrigLength);
124  Buffer.insert(RealOffset, NewStr.begin(), NewStr.end());
125  if (OrigLength != NewStr.size())
126  AddReplaceDelta(OrigOffset, NewStr.size() - OrigLength);
127 }
128 
129 //===----------------------------------------------------------------------===//
130 // Rewriter class
131 //===----------------------------------------------------------------------===//
132 
133 /// getRangeSize - Return the size in bytes of the specified range if they
134 /// are in the same file. If not, this returns -1.
136  RewriteOptions opts) const {
137  if (!isRewritable(Range.getBegin()) ||
138  !isRewritable(Range.getEnd())) return -1;
139 
140  FileID StartFileID, EndFileID;
141  unsigned StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
142  unsigned EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
143 
144  if (StartFileID != EndFileID)
145  return -1;
146 
147  // If edits have been made to this buffer, the delta between the range may
148  // have changed.
149  std::map<FileID, RewriteBuffer>::const_iterator I =
150  RewriteBuffers.find(StartFileID);
151  if (I != RewriteBuffers.end()) {
152  const RewriteBuffer &RB = I->second;
153  EndOff = RB.getMappedOffset(EndOff, opts.IncludeInsertsAtEndOfRange);
154  StartOff = RB.getMappedOffset(StartOff, !opts.IncludeInsertsAtBeginOfRange);
155  }
156 
157  // Adjust the end offset to the end of the last token, instead of being the
158  // start of the last token if this is a token range.
159  if (Range.isTokenRange())
160  EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
161 
162  return EndOff-StartOff;
163 }
164 
166  return getRangeSize(CharSourceRange::getTokenRange(Range), opts);
167 }
168 
169 /// getRewrittenText - Return the rewritten form of the text in the specified
170 /// range. If the start or end of the range was unrewritable or if they are
171 /// in different buffers, this returns an empty string.
172 ///
173 /// Note that this method is not particularly efficient.
174 std::string Rewriter::getRewrittenText(SourceRange Range) const {
175  if (!isRewritable(Range.getBegin()) ||
176  !isRewritable(Range.getEnd()))
177  return {};
178 
179  FileID StartFileID, EndFileID;
180  unsigned StartOff, EndOff;
181  StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
182  EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
183 
184  if (StartFileID != EndFileID)
185  return {}; // Start and end in different buffers.
186 
187  // If edits have been made to this buffer, the delta between the range may
188  // have changed.
189  std::map<FileID, RewriteBuffer>::const_iterator I =
190  RewriteBuffers.find(StartFileID);
191  if (I == RewriteBuffers.end()) {
192  // If the buffer hasn't been rewritten, just return the text from the input.
193  const char *Ptr = SourceMgr->getCharacterData(Range.getBegin());
194 
195  // Adjust the end offset to the end of the last token, instead of being the
196  // start of the last token.
197  EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
198  return std::string(Ptr, Ptr+EndOff-StartOff);
199  }
200 
201  const RewriteBuffer &RB = I->second;
202  EndOff = RB.getMappedOffset(EndOff, true);
203  StartOff = RB.getMappedOffset(StartOff);
204 
205  // Adjust the end offset to the end of the last token, instead of being the
206  // start of the last token.
207  EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
208 
209  // Advance the iterators to the right spot, yay for linear time algorithms.
210  RewriteBuffer::iterator Start = RB.begin();
211  std::advance(Start, StartOff);
213  std::advance(End, EndOff-StartOff);
214 
215  return std::string(Start, End);
216 }
217 
218 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc,
219  FileID &FID) const {
220  assert(Loc.isValid() && "Invalid location");
221  std::pair<FileID, unsigned> V = SourceMgr->getDecomposedLoc(Loc);
222  FID = V.first;
223  return V.second;
224 }
225 
226 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID.
228  std::map<FileID, RewriteBuffer>::iterator I =
229  RewriteBuffers.lower_bound(FID);
230  if (I != RewriteBuffers.end() && I->first == FID)
231  return I->second;
232  I = RewriteBuffers.insert(I, std::make_pair(FID, RewriteBuffer()));
233 
234  StringRef MB = SourceMgr->getBufferData(FID);
235  I->second.Initialize(MB.begin(), MB.end());
236 
237  return I->second;
238 }
239 
240 /// InsertText - Insert the specified string at the specified location in the
241 /// original buffer.
242 bool Rewriter::InsertText(SourceLocation Loc, StringRef Str,
243  bool InsertAfter, bool indentNewLines) {
244  if (!isRewritable(Loc)) return true;
245  FileID FID;
246  unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
247 
248  SmallString<128> indentedStr;
249  if (indentNewLines && Str.find('\n') != StringRef::npos) {
250  StringRef MB = SourceMgr->getBufferData(FID);
251 
252  unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1;
253  const SrcMgr::ContentCache *
254  Content = SourceMgr->getSLocEntry(FID).getFile().getContentCache();
255  unsigned lineOffs = Content->SourceLineCache[lineNo];
256 
257  // Find the whitespace at the start of the line.
258  StringRef indentSpace;
259  {
260  unsigned i = lineOffs;
261  while (isWhitespaceExceptNL(MB[i]))
262  ++i;
263  indentSpace = MB.substr(lineOffs, i-lineOffs);
264  }
265 
267  Str.split(lines, "\n");
268 
269  for (unsigned i = 0, e = lines.size(); i != e; ++i) {
270  indentedStr += lines[i];
271  if (i < e-1) {
272  indentedStr += '\n';
273  indentedStr += indentSpace;
274  }
275  }
276  Str = indentedStr.str();
277  }
278 
279  getEditBuffer(FID).InsertText(StartOffs, Str, InsertAfter);
280  return false;
281 }
282 
284  if (!isRewritable(Loc)) return true;
285  FileID FID;
286  unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
287  RewriteOptions rangeOpts;
288  rangeOpts.IncludeInsertsAtBeginOfRange = false;
289  StartOffs += getRangeSize(SourceRange(Loc, Loc), rangeOpts);
290  getEditBuffer(FID).InsertText(StartOffs, Str, /*InsertAfter*/true);
291  return false;
292 }
293 
294 /// RemoveText - Remove the specified text region.
295 bool Rewriter::RemoveText(SourceLocation Start, unsigned Length,
296  RewriteOptions opts) {
297  if (!isRewritable(Start)) return true;
298  FileID FID;
299  unsigned StartOffs = getLocationOffsetAndFileID(Start, FID);
300  getEditBuffer(FID).RemoveText(StartOffs, Length, opts.RemoveLineIfEmpty);
301  return false;
302 }
303 
304 /// ReplaceText - This method replaces a range of characters in the input
305 /// buffer with a new string. This is effectively a combined "remove/insert"
306 /// operation.
307 bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength,
308  StringRef NewStr) {
309  if (!isRewritable(Start)) return true;
310  FileID StartFileID;
311  unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID);
312 
313  getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, NewStr);
314  return false;
315 }
316 
317 bool Rewriter::ReplaceText(SourceRange range, SourceRange replacementRange) {
318  if (!isRewritable(range.getBegin())) return true;
319  if (!isRewritable(range.getEnd())) return true;
320  if (replacementRange.isInvalid()) return true;
321  SourceLocation start = range.getBegin();
322  unsigned origLength = getRangeSize(range);
323  unsigned newLength = getRangeSize(replacementRange);
324  FileID FID;
325  unsigned newOffs = getLocationOffsetAndFileID(replacementRange.getBegin(),
326  FID);
327  StringRef MB = SourceMgr->getBufferData(FID);
328  return ReplaceText(start, origLength, MB.substr(newOffs, newLength));
329 }
330 
332  SourceLocation parentIndent) {
333  if (range.isInvalid()) return true;
334  if (!isRewritable(range.getBegin())) return true;
335  if (!isRewritable(range.getEnd())) return true;
336  if (!isRewritable(parentIndent)) return true;
337 
338  FileID StartFileID, EndFileID, parentFileID;
339  unsigned StartOff, EndOff, parentOff;
340 
341  StartOff = getLocationOffsetAndFileID(range.getBegin(), StartFileID);
342  EndOff = getLocationOffsetAndFileID(range.getEnd(), EndFileID);
343  parentOff = getLocationOffsetAndFileID(parentIndent, parentFileID);
344 
345  if (StartFileID != EndFileID || StartFileID != parentFileID)
346  return true;
347  if (StartOff > EndOff)
348  return true;
349 
350  FileID FID = StartFileID;
351  StringRef MB = SourceMgr->getBufferData(FID);
352 
353  unsigned parentLineNo = SourceMgr->getLineNumber(FID, parentOff) - 1;
354  unsigned startLineNo = SourceMgr->getLineNumber(FID, StartOff) - 1;
355  unsigned endLineNo = SourceMgr->getLineNumber(FID, EndOff) - 1;
356 
357  const SrcMgr::ContentCache *
358  Content = SourceMgr->getSLocEntry(FID).getFile().getContentCache();
359 
360  // Find where the lines start.
361  unsigned parentLineOffs = Content->SourceLineCache[parentLineNo];
362  unsigned startLineOffs = Content->SourceLineCache[startLineNo];
363 
364  // Find the whitespace at the start of each line.
365  StringRef parentSpace, startSpace;
366  {
367  unsigned i = parentLineOffs;
368  while (isWhitespaceExceptNL(MB[i]))
369  ++i;
370  parentSpace = MB.substr(parentLineOffs, i-parentLineOffs);
371 
372  i = startLineOffs;
373  while (isWhitespaceExceptNL(MB[i]))
374  ++i;
375  startSpace = MB.substr(startLineOffs, i-startLineOffs);
376  }
377  if (parentSpace.size() >= startSpace.size())
378  return true;
379  if (!startSpace.startswith(parentSpace))
380  return true;
381 
382  StringRef indent = startSpace.substr(parentSpace.size());
383 
384  // Indent the lines between start/end offsets.
385  RewriteBuffer &RB = getEditBuffer(FID);
386  for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) {
387  unsigned offs = Content->SourceLineCache[lineNo];
388  unsigned i = offs;
389  while (isWhitespaceExceptNL(MB[i]))
390  ++i;
391  StringRef origIndent = MB.substr(offs, i-offs);
392  if (origIndent.startswith(startSpace))
393  RB.InsertText(offs, indent, /*InsertAfter=*/false);
394  }
395 
396  return false;
397 }
398 
399 namespace {
400 
401 // A wrapper for a file stream that atomically overwrites the target.
402 //
403 // Creates a file output stream for a temporary file in the constructor,
404 // which is later accessible via getStream() if ok() return true.
405 // Flushes the stream and moves the temporary file to the target location
406 // in the destructor.
407 class AtomicallyMovedFile {
408 public:
409  AtomicallyMovedFile(DiagnosticsEngine &Diagnostics, StringRef Filename,
410  bool &AllWritten)
411  : Diagnostics(Diagnostics), Filename(Filename), AllWritten(AllWritten) {
412  TempFilename = Filename;
413  TempFilename += "-%%%%%%%%";
414  int FD;
415  if (llvm::sys::fs::createUniqueFile(TempFilename, FD, TempFilename)) {
416  AllWritten = false;
417  Diagnostics.Report(clang::diag::err_unable_to_make_temp)
418  << TempFilename;
419  } else {
420  FileStream.reset(new llvm::raw_fd_ostream(FD, /*shouldClose=*/true));
421  }
422  }
423 
424  ~AtomicallyMovedFile() {
425  if (!ok()) return;
426 
427  // Close (will also flush) theFileStream.
428  FileStream->close();
429  if (std::error_code ec = llvm::sys::fs::rename(TempFilename, Filename)) {
430  AllWritten = false;
431  Diagnostics.Report(clang::diag::err_unable_to_rename_temp)
432  << TempFilename << Filename << ec.message();
433  // If the remove fails, there's not a lot we can do - this is already an
434  // error.
435  llvm::sys::fs::remove(TempFilename);
436  }
437  }
438 
439  bool ok() { return (bool)FileStream; }
440  raw_ostream &getStream() { return *FileStream; }
441 
442 private:
443  DiagnosticsEngine &Diagnostics;
444  StringRef Filename;
445  SmallString<128> TempFilename;
446  std::unique_ptr<llvm::raw_fd_ostream> FileStream;
447  bool &AllWritten;
448 };
449 
450 } // namespace
451 
453  bool AllWritten = true;
454  for (buffer_iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) {
455  const FileEntry *Entry =
456  getSourceMgr().getFileEntryForID(I->first);
457  AtomicallyMovedFile File(getSourceMgr().getDiagnostics(), Entry->getName(),
458  AllWritten);
459  if (File.ok()) {
460  I->second.write(File.getStream());
461  }
462  }
463  return !AllWritten;
464 }
void insert(unsigned Offset, const char *Start, const char *End)
Definition: RewriteRope.h:197
bool IncreaseIndentation(CharSourceRange range, SourceLocation parentIndent)
Increase indentation for the lines between the given source range.
Definition: Rewriter.cpp:331
Defines the clang::FileManager interface and associated types.
Defines the SourceManager interface.
static CharSourceRange getTokenRange(SourceRange R)
void Initialize(const char *BufStart, const char *BufEnd)
Initialize - Start this rewrite buffer out with a copy of the unmodified input buffer.
Definition: RewriteBuffer.h:44
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1294
bool RemoveText(SourceLocation Start, unsigned Length, RewriteOptions opts=RewriteOptions())
RemoveText - Remove the specified text region.
Definition: Rewriter.cpp:295
void erase(unsigned Offset, unsigned NumBytes)
Definition: RewriteRope.h:203
RewriteBuffer & getEditBuffer(FileID FID)
getEditBuffer - This is like getRewriteBufferFor, but always returns a buffer, and allows you to writ...
Definition: Rewriter.cpp:227
RewriteBuffer - As code is rewritten, SourceBuffer&#39;s from the original input with modifications get a...
Definition: RewriteBuffer.h:26
iterator end() const
Definition: RewriteBuffer.h:39
std::string getRewrittenText(SourceRange Range) const
getRewrittenText - Return the rewritten form of the text in the specified range.
Definition: Rewriter.cpp:174
SourceLocation getBegin() const
One instance of this struct is kept for every file loaded or used.
Definition: SourceManager.h:95
bool ReplaceText(SourceLocation Start, unsigned OrigLength, StringRef NewStr)
ReplaceText - This method replaces a range of characters in the input buffer with a new string...
Definition: Rewriter.cpp:307
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
Defines the Diagnostic-related interfaces.
StringRef Filename
Definition: Format.cpp:1629
SourceLocation End
Represents a character-granular source range.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
Definition: Lexer.cpp:436
bool isInvalid() const
SourceLocation getEnd() const
int getRangeSize(SourceRange Range, RewriteOptions opts=RewriteOptions()) const
getRangeSize - Return the size in bytes of the specified range if they are in the same file...
Definition: Rewriter.cpp:165
void InsertText(unsigned OrigOffset, StringRef Str, bool InsertAfter=true)
InsertText - Insert some text at the specified point, where the offset in the buffer is specified rel...
Definition: Rewriter.cpp:105
bool overwriteChangedFiles()
overwriteChangedFiles - Save all changed files to disk.
Definition: Rewriter.cpp:452
iterator begin() const
Definition: RewriteBuffer.h:38
void ReplaceText(unsigned OrigOffset, unsigned OrigLength, StringRef NewStr)
ReplaceText - This method replaces a range of characters in the input buffer with a new string...
Definition: Rewriter.cpp:120
Encodes a location in the source.
static bool isWhitespaceExceptNL(unsigned char c)
Return true if this character is non-new-line whitespace: &#39; &#39;, &#39;\t&#39;, &#39;\f&#39;, &#39;\v&#39;, &#39;\r&#39;.
Definition: Rewriter.cpp:49
raw_ostream & write(raw_ostream &Stream) const
Write to Stream the result of applying all changes to the original buffer.
Definition: Rewriter.cpp:38
StringRef getName() const
Definition: FileManager.h:85
unsigned size() const
Definition: RewriteRope.h:185
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:60
bool InsertText(SourceLocation Loc, StringRef Str, bool InsertAfter=true, bool indentNewLines=false)
InsertText - Insert the specified string at the specified location in the original buffer...
Definition: Rewriter.cpp:242
bool RemoveLineIfEmpty
If true and removing some text leaves a blank line also remove the empty line (false by default)...
Definition: Rewriter.h:50
void RemoveText(unsigned OrigOffset, unsigned Size, bool removeLineIfEmpty=false)
RemoveText - Remove the specified text.
Definition: Rewriter.cpp:62
unsigned * SourceLineCache
A bump pointer allocated array of offsets for each source line.
std::map< FileID, RewriteBuffer >::iterator buffer_iterator
Definition: Rewriter.h:55
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
RopePieceBTreeIterator - This class provides read-only forward iteration over bytes that are in a Rop...
Definition: RewriteRope.h:87
static int getRangeSize(const SourceManager &Sources, const CharSourceRange &Range, const LangOptions &LangOpts)
bool IncludeInsertsAtBeginOfRange
Given a source range, true to include previous inserts at the beginning of the range as part of the r...
Definition: Rewriter.h:42
SourceLocation getEnd() const
Defines the clang::SourceLocation class and associated facilities.
Defines the Diagnostic IDs-related interfaces.
A trivial tuple used to represent a source range.
bool InsertTextAfterToken(SourceLocation Loc, StringRef Str)
Insert the specified string after the token in the specified location.
Definition: Rewriter.cpp:283
SourceLocation getBegin() const
bool IncludeInsertsAtEndOfRange
Given a source range, true to include previous inserts at the end of the range as part of the range i...
Definition: Rewriter.h:46