clang-tools  10.0.0svn
Format.cpp
Go to the documentation of this file.
1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "Logger.h"
10 #include "clang/Basic/SourceManager.h"
11 #include "clang/Format/Format.h"
12 #include "clang/Lex/Lexer.h"
13 #include "clang/Tooling/Core/Replacement.h"
14 #include "llvm/Support/Unicode.h"
15 
16 namespace clang {
17 namespace clangd {
18 namespace {
19 
20 /// Append closing brackets )]} to \p Code to make it well-formed.
21 /// Clang-format conservatively refuses to format files with unmatched brackets
22 /// as it isn't sure where the errors are and so can't correct.
23 /// When editing, it's reasonable to assume code before the cursor is complete.
24 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
25  SourceManagerForFile FileSM("dummy.cpp", Code);
26  auto &SM = FileSM.get();
27  FileID FID = SM.getMainFileID();
28  Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
29  Token Tok;
30  std::vector<char> Brackets;
31  while (!Lex.LexFromRawLexer(Tok)) {
32  switch(Tok.getKind()) {
33  case tok::l_paren:
34  Brackets.push_back(')');
35  break;
36  case tok::l_brace:
37  Brackets.push_back('}');
38  break;
39  case tok::l_square:
40  Brackets.push_back(']');
41  break;
42  case tok::r_paren:
43  if (!Brackets.empty() && Brackets.back() == ')')
44  Brackets.pop_back();
45  break;
46  case tok::r_brace:
47  if (!Brackets.empty() && Brackets.back() == '}')
48  Brackets.pop_back();
49  break;
50  case tok::r_square:
51  if (!Brackets.empty() && Brackets.back() == ']')
52  Brackets.pop_back();
53  break;
54  default:
55  continue;
56  }
57  }
58  // Attempt to end any open comments first.
59  Code.append("\n// */\n");
60  Code.append(Brackets.rbegin(), Brackets.rend());
61 }
62 
63 static StringRef commentMarker(llvm::StringRef Line) {
64  for (StringRef Marker : {"///", "//"}){
65  auto I = Line.rfind(Marker);
66  if (I != StringRef::npos)
67  return Line.substr(I, Marker.size());
68  }
69  return "";
70 }
71 
72 llvm::StringRef firstLine(llvm::StringRef Code) {
73  return Code.take_until([](char C) { return C == '\n'; });
74 }
75 
76 llvm::StringRef lastLine(llvm::StringRef Code) {
77  llvm::StringRef Rest = Code;
78  while (!Rest.empty() && Rest.back() != '\n')
79  Rest = Rest.drop_back();
80  return Code.substr(Rest.size());
81 }
82 
83 // Filename is needed for tooling::Replacement and some overloads of reformat().
84 // Its value should not affect the outcome. We use the default from reformat().
85 llvm::StringRef Filename = "<stdin>";
86 
87 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
88 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
89  llvm::StringRef To) {
90  assert(From.begin() >= Code.begin() && From.end() <= Code.end());
91  // The filename is required but ignored.
92  return tooling::Replacement(Filename, From.data() - Code.data(),
93  From.size(), To);
94 }
95 
96 // High-level representation of incremental formatting changes.
97 // The changes are made in two steps.
98 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
99 // comment markers when splitting a line comment with a newline).
100 // 2) a selective clang-format run:
101 // - the "source code" passed to clang format is the code up to the cursor,
102 // a placeholder for the cursor, and some closing brackets
103 // - the formatting is restricted to the cursor and (possibly) other ranges
104 // (e.g. the old line when inserting a newline).
105 // - changes before the cursor are applied, those after are discarded.
106 struct IncrementalChanges {
107  // Changes that should be applied before running clang-format.
108  tooling::Replacements Changes;
109  // Ranges of the original source code that should be clang-formatted.
110  // The CursorProxyText will also be formatted.
111  std::vector<tooling::Range> FormatRanges;
112  // The source code that should stand in for the cursor when clang-formatting.
113  // e.g. after inserting a newline, a line-comment at the cursor is used to
114  // ensure that the newline is preserved.
115  std::string CursorPlaceholder;
116 };
117 
118 // After a newline:
119 // - we continue any line-comment that was split
120 // - we format the old line in addition to the cursor
121 // - we represent the cursor with a line comment to preserve the newline
122 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
123  unsigned Cursor) {
124  IncrementalChanges Result;
125  // Before newline, code looked like:
126  // leading^trailing
127  // After newline, code looks like:
128  // leading
129  // indentation^trailing
130  // Where indentation was added by the editor.
131  StringRef Trailing = firstLine(Code.substr(Cursor));
132  StringRef Indentation = lastLine(Code.take_front(Cursor));
133  if (Indentation.data() == Code.data()) {
134  vlog("Typed a newline, but we're still on the first line!");
135  return Result;
136  }
137  StringRef Leading =
138  lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
139  StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
140 
141  // Strip leading whitespace on trailing line.
142  StringRef TrailingTrim = Trailing.ltrim();
143  if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
144  cantFail(Result.Changes.add(
145  replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
146 
147  // If we split a comment, replace indentation with a comment marker.
148  // If the editor made the new line a comment, also respect that.
149  StringRef CommentMarker = commentMarker(Leading);
150  bool NewLineIsComment = !commentMarker(Indentation).empty();
151  if (!CommentMarker.empty() &&
152  (NewLineIsComment || !commentMarker(NextLine).empty() ||
153  (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
154  using llvm::sys::unicode::columnWidthUTF8;
155  // We indent the new comment to match the previous one.
156  StringRef PreComment =
157  Leading.take_front(CommentMarker.data() - Leading.data());
158  std::string IndentAndComment =
159  (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
160  .str();
161  cantFail(
162  Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
163  } else {
164  // Remove any indentation and let clang-format re-add it.
165  // This prevents the cursor marker dragging e.g. an aligned comment with it.
166  cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
167  }
168 
169  // If we put a the newline inside a {} pair, put } on its own line...
170  if (CommentMarker.empty() && Leading.endswith("{") &&
171  Trailing.startswith("}")) {
172  cantFail(
173  Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
174  // ...and format it.
175  Result.FormatRanges.push_back(
176  tooling::Range(Trailing.data() - Code.data() + 1, 1));
177  }
178 
179  // Format the whole leading line.
180  Result.FormatRanges.push_back(
181  tooling::Range(Leading.data() - Code.data(), Leading.size()));
182 
183  // We use a comment to represent the cursor, to preserve the newline.
184  // A trailing identifier improves parsing of e.g. for without braces.
185  // Exception: if the previous line has a trailing comment, we can't use one
186  // as the cursor (they will be aligned). But in this case we don't need to.
187  Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
188 
189  return Result;
190 }
191 
192 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
193  llvm::StringRef InsertedText) {
194  IncrementalChanges Result;
195  if (InsertedText == "\n")
196  return getIncrementalChangesAfterNewline(Code, Cursor);
197 
198  Result.CursorPlaceholder = " /**/";
199  return Result;
200 }
201 
202 // Returns equivalent replacements that preserve the correspondence between
203 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
204 // replacement will be split.
205 std::vector<tooling::Replacement>
206 split(const tooling::Replacements &Replacements, unsigned OldCursor,
207  unsigned NewCursor) {
208  std::vector<tooling::Replacement> Result;
209  int LengthChange = 0;
210  for (const tooling::Replacement &R : Replacements) {
211  if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
212  Result.push_back(R);
213  LengthChange += R.getReplacementText().size() - R.getLength();
214  } else if (R.getOffset() < OldCursor) { // overlaps cursor
215  int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
216  assert(ReplacementSplit >= 0 &&
217  ReplacementSplit <= int(R.getReplacementText().size()) &&
218  "NewCursor incompatible with OldCursor!");
219  Result.push_back(tooling::Replacement(
220  R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
221  R.getReplacementText().take_front(ReplacementSplit)));
222  Result.push_back(tooling::Replacement(
223  R.getFilePath(), OldCursor,
224  R.getLength() - (OldCursor - R.getOffset()),
225  R.getReplacementText().drop_front(ReplacementSplit)));
226  } else if (R.getOffset() >= OldCursor) { // after cursor
227  Result.push_back(R);
228  }
229  }
230  return Result;
231 }
232 
233 } // namespace
234 
235 // We're simulating the following sequence of changes:
236 // - apply the pre-formatting edits (see getIncrementalChanges)
237 // - insert a placeholder for the cursor
238 // - format some of the resulting code
239 // - remove the cursor placeholder again
240 // The replacements we return are produced by composing these.
241 //
242 // The text we actually pass to clang-format is slightly different from this,
243 // e.g. we have to close brackets. We ensure these differences are *after*
244 // all the regions we want to format, and discard changes in them.
245 std::vector<tooling::Replacement>
246 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
247  llvm::StringRef InsertedText, format::FormatStyle Style) {
248  IncrementalChanges Incremental =
249  getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
250  // Never *remove* lines in response to pressing enter! This annoys users.
251  if (InsertedText == "\n") {
252  Style.MaxEmptyLinesToKeep = 1000;
253  Style.KeepEmptyLinesAtTheStartOfBlocks = true;
254  }
255 
256  // Compute the code we want to format:
257  // 1) Start with code after the pre-formatting edits.
258  std::string CodeToFormat = cantFail(
259  tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
260  unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
261  // 2) Truncate code after the last interesting range.
262  unsigned FormatLimit = Cursor;
263  for (tooling::Range &R : Incremental.FormatRanges)
264  FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
265  CodeToFormat.resize(FormatLimit);
266  // 3) Insert a placeholder for the cursor.
267  CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
268  // 4) Append brackets after FormatLimit so the code is well-formed.
269  closeBrackets(CodeToFormat, Style);
270 
271  // Determine the ranges to format:
272  std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
273  // Ranges after the cursor need to be adjusted for the placeholder.
274  for (auto &R : RangesToFormat) {
275  if (R.getOffset() > Cursor)
276  R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
277  R.getLength());
278  }
279  // We also format the cursor.
280  RangesToFormat.push_back(
281  tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
282  // Also update FormatLimit for the placeholder, we'll use this later.
283  FormatLimit += Incremental.CursorPlaceholder.size();
284 
285  // Run clang-format, and truncate changes at FormatLimit.
286  tooling::Replacements FormattingChanges;
287  format::FormattingAttemptStatus Status;
288  for (const tooling::Replacement &R : format::reformat(
289  Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
290  if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
291  cantFail(FormattingChanges.add(R));
292  else if(R.getOffset() < FormatLimit) { // Overlaps limit.
293  if (R.getReplacementText().empty()) // Deletions are easy to handle.
294  cantFail(FormattingChanges.add(tooling::Replacement(Filename,
295  R.getOffset(), FormatLimit - R.getOffset(), "")));
296  else
297  // Hopefully won't happen in practice?
298  elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
299  Cursor, CodeToFormat);
300  }
301  }
302  if (!Status.FormatComplete)
303  vlog("Incremental format incomplete at line {0}", Status.Line);
304 
305  // Now we are ready to compose the changes relative to OriginalCode.
306  // edits -> insert placeholder -> format -> remove placeholder.
307  // We must express insert/remove as Replacements.
308  tooling::Replacements InsertCursorPlaceholder(
309  tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
310  unsigned FormattedCursorStart =
311  FormattingChanges.getShiftedCodePosition(Cursor),
312  FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
313  Cursor + Incremental.CursorPlaceholder.size());
314  tooling::Replacements RemoveCursorPlaceholder(
315  tooling::Replacement(Filename, FormattedCursorStart,
316  FormattedCursorEnd - FormattedCursorStart, ""));
317 
318  // We can't simply merge() and return: tooling::Replacements will combine
319  // adjacent edits left and right of the cursor. This gives the right source
320  // code, but loses information about where the cursor is!
321  // Fortunately, none of the individual passes lose information, so:
322  // - we use merge() to compute the final Replacements
323  // - we chain getShiftedCodePosition() to compute final cursor position
324  // - we split the final Replacements at the cursor position, so that
325  // each Replacement lies either before or after the cursor.
326  tooling::Replacements Final;
327  unsigned FinalCursor = OriginalCursor;
328 #ifndef NDEBUG
329  std::string FinalCode = OriginalCode;
330  dlog("Initial code: {0}", FinalCode);
331 #endif
332  for (auto Pass :
333  std::vector<std::pair<const char *, const tooling::Replacements *>>{
334  {"Pre-formatting changes", &Incremental.Changes},
335  {"Insert placeholder", &InsertCursorPlaceholder},
336  {"clang-format", &FormattingChanges},
337  {"Remove placeholder", &RemoveCursorPlaceholder}}) {
338  Final = Final.merge(*Pass.second);
339  FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
340 #ifndef NDEBUG
341  FinalCode =
342  cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
343  dlog("After {0}:\n{1}^{2}", Pass.first,
344  StringRef(FinalCode).take_front(FinalCursor),
345  StringRef(FinalCode).drop_front(FinalCursor));
346 #endif
347  }
348  return split(Final, OriginalCursor, FinalCursor);
349 }
350 
351 unsigned
353  const std::vector<tooling::Replacement> &Replacements) {
354  unsigned OriginalOffset = Offset;
355  for (const auto &R : Replacements) {
356  if (R.getOffset() + R.getLength() <= OriginalOffset) {
357  // Replacement is before cursor.
358  Offset += R.getReplacementText().size();
359  Offset -= R.getLength();
360  } else if (R.getOffset() < OriginalOffset) {
361  // Replacement overlaps cursor.
362  // Preserve position within replacement text, as far as possible.
363  unsigned PositionWithinReplacement = Offset - R.getOffset();
364  if (PositionWithinReplacement > R.getReplacementText().size()) {
365  Offset += R.getReplacementText().size();
366  Offset -= PositionWithinReplacement;
367  }
368  } else {
369  // Replacement after cursor.
370  break; // Replacements are sorted, the rest are also after the cursor.
371  }
372  }
373  return Offset;
374 }
375 
376 } // namespace clangd
377 } // namespace clang
std::string Code
std::vector< tooling::Replacement > formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, llvm::StringRef InsertedText, format::FormatStyle Style)
Applies limited formatting around new InsertedText.
Definition: Format.cpp:246
tooling::Replacements Changes
Definition: Format.cpp:108
std::vector< tooling::Range > FormatRanges
Definition: Format.cpp:111
Documents are synced by sending the full content on open.
unsigned transformCursorPosition(unsigned Offset, const std::vector< tooling::Replacement > &Replacements)
Determine the new cursor position after applying Replacements.
Definition: Format.cpp:352
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:56
std::string Filename
Filename as a string.
#define dlog(...)
Definition: Logger.h:72
size_t Offset
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
CharSourceRange Range
SourceRange for the file name.
std::string CursorPlaceholder
Definition: Format.cpp:115
static cl::opt< std::string > FormatStyle("format-style", cl::desc(R"( Style for formatting code around applied fixes: - 'none' (default) turns off formatting - 'file' (literally 'file', not a placeholder) uses .clang-format file in the closest parent directory - '{ <json> }' specifies options inline, e.g. -format-style='{BasedOnStyle: llvm, IndentWidth: 8}' - 'llvm', 'google', 'webkit', 'mozilla' See clang-format documentation for the up-to-date information about formatting styles and options. This option overrides the 'FormatStyle` option in .clang-tidy file, if any. )"), cl::init("none"), cl::cat(ClangTidyCategory))