clang-tools  16.0.0git
Format.cpp
Go to the documentation of this file.
1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "support/Logger.h"
10 #include "clang/Basic/SourceManager.h"
11 #include "clang/Format/Format.h"
12 #include "clang/Lex/Lexer.h"
13 #include "clang/Tooling/Core/Replacement.h"
14 #include "llvm/Support/Unicode.h"
15 
16 namespace clang {
17 namespace clangd {
18 namespace {
19 
20 /// Append closing brackets )]} to \p Code to make it well-formed.
21 /// Clang-format conservatively refuses to format files with unmatched brackets
22 /// as it isn't sure where the errors are and so can't correct.
23 /// When editing, it's reasonable to assume code before the cursor is complete.
24 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
25  SourceManagerForFile FileSM("mock_file.cpp", Code);
26  auto &SM = FileSM.get();
27  FileID FID = SM.getMainFileID();
28  LangOptions LangOpts = format::getFormattingLangOpts(Style);
29  Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts);
30  Token Tok;
31  std::vector<char> Brackets;
32  while (!Lex.LexFromRawLexer(Tok)) {
33  switch(Tok.getKind()) {
34  case tok::l_paren:
35  Brackets.push_back(')');
36  break;
37  case tok::l_brace:
38  Brackets.push_back('}');
39  break;
40  case tok::l_square:
41  Brackets.push_back(']');
42  break;
43  case tok::r_paren:
44  if (!Brackets.empty() && Brackets.back() == ')')
45  Brackets.pop_back();
46  break;
47  case tok::r_brace:
48  if (!Brackets.empty() && Brackets.back() == '}')
49  Brackets.pop_back();
50  break;
51  case tok::r_square:
52  if (!Brackets.empty() && Brackets.back() == ']')
53  Brackets.pop_back();
54  break;
55  default:
56  continue;
57  }
58  }
59  // Attempt to end any open comments first.
60  Code.append("\n// */\n");
61  Code.append(Brackets.rbegin(), Brackets.rend());
62 }
63 
64 static StringRef commentMarker(llvm::StringRef Line) {
65  for (StringRef Marker : {"///", "//"}){
66  auto I = Line.rfind(Marker);
67  if (I != StringRef::npos)
68  return Line.substr(I, Marker.size());
69  }
70  return "";
71 }
72 
73 llvm::StringRef firstLine(llvm::StringRef Code) {
74  return Code.take_until([](char C) { return C == '\n'; });
75 }
76 
77 llvm::StringRef lastLine(llvm::StringRef Code) {
78  llvm::StringRef Rest = Code;
79  while (!Rest.empty() && Rest.back() != '\n')
80  Rest = Rest.drop_back();
81  return Code.substr(Rest.size());
82 }
83 
84 // Filename is needed for tooling::Replacement and some overloads of reformat().
85 // Its value should not affect the outcome. We use the default from reformat().
86 llvm::StringRef Filename = "<stdin>";
87 
88 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
89 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
90  llvm::StringRef To) {
91  assert(From.begin() >= Code.begin() && From.end() <= Code.end());
92  // The filename is required but ignored.
93  return tooling::Replacement(Filename, From.data() - Code.data(),
94  From.size(), To);
95 }
96 
97 // High-level representation of incremental formatting changes.
98 // The changes are made in two steps.
99 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100 // comment markers when splitting a line comment with a newline).
101 // 2) a selective clang-format run:
102 // - the "source code" passed to clang format is the code up to the cursor,
103 // a placeholder for the cursor, and some closing brackets
104 // - the formatting is restricted to the cursor and (possibly) other ranges
105 // (e.g. the old line when inserting a newline).
106 // - changes before the cursor are applied, those after are discarded.
107 struct IncrementalChanges {
108  // Changes that should be applied before running clang-format.
109  tooling::Replacements Changes;
110  // Ranges of the original source code that should be clang-formatted.
111  // The CursorProxyText will also be formatted.
112  std::vector<tooling::Range> FormatRanges;
113  // The source code that should stand in for the cursor when clang-formatting.
114  // e.g. after inserting a newline, a line-comment at the cursor is used to
115  // ensure that the newline is preserved.
116  std::string CursorPlaceholder;
117 };
118 
119 // The two functions below, columnWidth() and columnWidthWithTabs(), were
120 // adapted from similar functions in clang/lib/Format/Encoding.h.
121 // FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
122 
123 // Helper function for columnWidthWithTabs().
124 inline unsigned columnWidth(StringRef Text) {
125  int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
126  if (ContentWidth < 0)
127  return Text.size(); // fallback for unprintable characters
128  return ContentWidth;
129 }
130 
131 // Returns the number of columns required to display the \p Text on a terminal
132 // with the \p TabWidth.
133 inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) {
134  unsigned TotalWidth = 0;
135  StringRef Tail = Text;
136  for (;;) {
137  StringRef::size_type TabPos = Tail.find('\t');
138  if (TabPos == StringRef::npos)
139  return TotalWidth + columnWidth(Tail);
140  TotalWidth += columnWidth(Tail.substr(0, TabPos));
141  if (TabWidth)
142  TotalWidth += TabWidth - TotalWidth % TabWidth;
143  Tail = Tail.substr(TabPos + 1);
144  }
145 }
146 
147 // After a newline:
148 // - we continue any line-comment that was split
149 // - we format the old line in addition to the cursor
150 // - we represent the cursor with a line comment to preserve the newline
151 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
152  unsigned Cursor,
153  unsigned TabWidth) {
154  IncrementalChanges Result;
155  // Before newline, code looked like:
156  // leading^trailing
157  // After newline, code looks like:
158  // leading
159  // indentation^trailing
160  // Where indentation was added by the editor.
161  StringRef Trailing = firstLine(Code.substr(Cursor));
162  StringRef Indentation = lastLine(Code.take_front(Cursor));
163  if (Indentation.data() == Code.data()) {
164  vlog("Typed a newline, but we're still on the first line!");
165  return Result;
166  }
167  StringRef Leading =
168  lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
169  StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
170 
171  // Strip leading whitespace on trailing line.
172  StringRef TrailingTrim = Trailing.ltrim();
173  if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
174  cantFail(Result.Changes.add(
175  replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
176 
177  // If we split a comment, replace indentation with a comment marker.
178  // If the editor made the new line a comment, also respect that.
179  StringRef CommentMarker = commentMarker(Leading);
180  bool NewLineIsComment = !commentMarker(Indentation).empty();
181  if (!CommentMarker.empty() &&
182  (NewLineIsComment || !commentMarker(NextLine).empty() ||
183  (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
184  // We indent the new comment to match the previous one.
185  StringRef PreComment =
186  Leading.take_front(CommentMarker.data() - Leading.data());
187  std::string IndentAndComment =
188  (std::string(columnWidthWithTabs(PreComment, TabWidth), ' ') +
189  CommentMarker + " ")
190  .str();
191  cantFail(
192  Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
193  } else {
194  // Remove any indentation and let clang-format re-add it.
195  // This prevents the cursor marker dragging e.g. an aligned comment with it.
196  cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
197  }
198 
199  // If we put a the newline inside a {} pair, put } on its own line...
200  if (CommentMarker.empty() && Leading.endswith("{") &&
201  Trailing.startswith("}")) {
202  cantFail(
203  Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
204  // ...and format it.
205  Result.FormatRanges.push_back(
206  tooling::Range(Trailing.data() - Code.data() + 1, 1));
207  }
208 
209  // Format the whole leading line.
210  Result.FormatRanges.push_back(
211  tooling::Range(Leading.data() - Code.data(), Leading.size()));
212 
213  // We use a comment to represent the cursor, to preserve the newline.
214  // A trailing identifier improves parsing of e.g. for without braces.
215  // Exception: if the previous line has a trailing comment, we can't use one
216  // as the cursor (they will be aligned). But in this case we don't need to.
217  Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
218 
219  return Result;
220 }
221 
222 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
223  llvm::StringRef InsertedText,
224  unsigned TabWidth) {
225  IncrementalChanges Result;
226  if (InsertedText == "\n")
227  return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth);
228 
229  Result.CursorPlaceholder = " /**/";
230  return Result;
231 }
232 
233 // Returns equivalent replacements that preserve the correspondence between
234 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235 // replacement will be split.
236 std::vector<tooling::Replacement>
237 split(const tooling::Replacements &Replacements, unsigned OldCursor,
238  unsigned NewCursor) {
239  std::vector<tooling::Replacement> Result;
240  int LengthChange = 0;
241  for (const tooling::Replacement &R : Replacements) {
242  if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
243  Result.push_back(R);
244  LengthChange += R.getReplacementText().size() - R.getLength();
245  } else if (R.getOffset() < OldCursor) { // overlaps cursor
246  int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
247  assert(ReplacementSplit >= 0 &&
248  ReplacementSplit <= int(R.getReplacementText().size()) &&
249  "NewCursor incompatible with OldCursor!");
250  Result.push_back(tooling::Replacement(
251  R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
252  R.getReplacementText().take_front(ReplacementSplit)));
253  Result.push_back(tooling::Replacement(
254  R.getFilePath(), OldCursor,
255  R.getLength() - (OldCursor - R.getOffset()),
256  R.getReplacementText().drop_front(ReplacementSplit)));
257  } else if (R.getOffset() >= OldCursor) { // after cursor
258  Result.push_back(R);
259  }
260  }
261  return Result;
262 }
263 
264 } // namespace
265 
266 // We're simulating the following sequence of changes:
267 // - apply the pre-formatting edits (see getIncrementalChanges)
268 // - insert a placeholder for the cursor
269 // - format some of the resulting code
270 // - remove the cursor placeholder again
271 // The replacements we return are produced by composing these.
272 //
273 // The text we actually pass to clang-format is slightly different from this,
274 // e.g. we have to close brackets. We ensure these differences are *after*
275 // all the regions we want to format, and discard changes in them.
276 std::vector<tooling::Replacement>
277 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
278  llvm::StringRef InsertedText, format::FormatStyle Style) {
279  IncrementalChanges Incremental = getIncrementalChanges(
280  OriginalCode, OriginalCursor, InsertedText, Style.TabWidth);
281  // Never *remove* lines in response to pressing enter! This annoys users.
282  if (InsertedText == "\n") {
283  Style.MaxEmptyLinesToKeep = 1000;
284  Style.KeepEmptyLinesAtTheStartOfBlocks = true;
285  }
286 
287  // Compute the code we want to format:
288  // 1) Start with code after the pre-formatting edits.
289  std::string CodeToFormat = cantFail(
290  tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
291  unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
292  // 2) Truncate code after the last interesting range.
293  unsigned FormatLimit = Cursor;
294  for (tooling::Range &R : Incremental.FormatRanges)
295  FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
296  CodeToFormat.resize(FormatLimit);
297  // 3) Insert a placeholder for the cursor.
298  CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
299  // 4) Append brackets after FormatLimit so the code is well-formed.
300  closeBrackets(CodeToFormat, Style);
301 
302  // Determine the ranges to format:
303  std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
304  // Ranges after the cursor need to be adjusted for the placeholder.
305  for (auto &R : RangesToFormat) {
306  if (R.getOffset() > Cursor)
307  R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
308  R.getLength());
309  }
310  // We also format the cursor.
311  RangesToFormat.push_back(
312  tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
313  // Also update FormatLimit for the placeholder, we'll use this later.
314  FormatLimit += Incremental.CursorPlaceholder.size();
315 
316  // Run clang-format, and truncate changes at FormatLimit.
317  tooling::Replacements FormattingChanges;
318  format::FormattingAttemptStatus Status;
319  for (const tooling::Replacement &R : format::reformat(
320  Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
321  if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
322  cantFail(FormattingChanges.add(R));
323  else if(R.getOffset() < FormatLimit) { // Overlaps limit.
324  if (R.getReplacementText().empty()) // Deletions are easy to handle.
325  cantFail(FormattingChanges.add(tooling::Replacement(Filename,
326  R.getOffset(), FormatLimit - R.getOffset(), "")));
327  else
328  // Hopefully won't happen in practice?
329  elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
330  Cursor, CodeToFormat);
331  }
332  }
333  if (!Status.FormatComplete)
334  vlog("Incremental format incomplete at line {0}", Status.Line);
335 
336  // Now we are ready to compose the changes relative to OriginalCode.
337  // edits -> insert placeholder -> format -> remove placeholder.
338  // We must express insert/remove as Replacements.
339  tooling::Replacements InsertCursorPlaceholder(
340  tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
341  unsigned FormattedCursorStart =
342  FormattingChanges.getShiftedCodePosition(Cursor),
343  FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
344  Cursor + Incremental.CursorPlaceholder.size());
345  tooling::Replacements RemoveCursorPlaceholder(
346  tooling::Replacement(Filename, FormattedCursorStart,
347  FormattedCursorEnd - FormattedCursorStart, ""));
348 
349  // We can't simply merge() and return: tooling::Replacements will combine
350  // adjacent edits left and right of the cursor. This gives the right source
351  // code, but loses information about where the cursor is!
352  // Fortunately, none of the individual passes lose information, so:
353  // - we use merge() to compute the final Replacements
354  // - we chain getShiftedCodePosition() to compute final cursor position
355  // - we split the final Replacements at the cursor position, so that
356  // each Replacement lies either before or after the cursor.
357  tooling::Replacements Final;
358  unsigned FinalCursor = OriginalCursor;
359 #ifndef NDEBUG
360  std::string FinalCode = std::string(OriginalCode);
361  dlog("Initial code: {0}", FinalCode);
362 #endif
363  for (auto Pass :
364  std::vector<std::pair<const char *, const tooling::Replacements *>>{
365  {"Pre-formatting changes", &Incremental.Changes},
366  {"Insert placeholder", &InsertCursorPlaceholder},
367  {"clang-format", &FormattingChanges},
368  {"Remove placeholder", &RemoveCursorPlaceholder}}) {
369  Final = Final.merge(*Pass.second);
370  FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
371 #ifndef NDEBUG
372  FinalCode =
373  cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
374  dlog("After {0}:\n{1}^{2}", Pass.first,
375  StringRef(FinalCode).take_front(FinalCursor),
376  StringRef(FinalCode).drop_front(FinalCursor));
377 #endif
378  }
379  return split(Final, OriginalCursor, FinalCursor);
380 }
381 
382 unsigned
384  const std::vector<tooling::Replacement> &Replacements) {
385  unsigned OriginalOffset = Offset;
386  for (const auto &R : Replacements) {
387  if (R.getOffset() + R.getLength() <= OriginalOffset) {
388  // Replacement is before cursor.
389  Offset += R.getReplacementText().size();
390  Offset -= R.getLength();
391  } else if (R.getOffset() < OriginalOffset) {
392  // Replacement overlaps cursor.
393  // Preserve position within replacement text, as far as possible.
394  unsigned PositionWithinReplacement = Offset - R.getOffset();
395  if (PositionWithinReplacement > R.getReplacementText().size()) {
396  Offset += R.getReplacementText().size();
397  Offset -= PositionWithinReplacement;
398  }
399  } else {
400  // Replacement after cursor.
401  break; // Replacements are sorted, the rest are also after the cursor.
402  }
403  }
404  return Offset;
405 }
406 
407 } // namespace clangd
408 } // namespace clang
dlog
#define dlog(...)
Definition: Logger.h:101
FormatRanges
std::vector< tooling::Range > FormatRanges
Definition: Format.cpp:112
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:39
CursorPlaceholder
std::string CursorPlaceholder
Definition: Format.cpp:116
FormatStyle
static cl::opt< std::string > FormatStyle("format-style", cl::desc(R"( Style for formatting code around applied fixes: - 'none' (default) turns off formatting - 'file' (literally 'file', not a placeholder) uses .clang-format file in the closest parent directory - '{ <json> }' specifies options inline, e.g. -format-style='{BasedOnStyle: llvm, IndentWidth: 8}' - 'llvm', 'google', 'webkit', 'mozilla' See clang-format documentation for the up-to-date information about formatting styles and options. This option overrides the 'FormatStyle` option in .clang-tidy file, if any. )"), cl::init("none"), cl::cat(ClangTidyCategory))
Filename
std::string Filename
Filename as a string.
Definition: IncludeOrderCheck.cpp:40
Text
std::string Text
Definition: HTMLGenerator.cpp:81
Format.h
Changes
tooling::Replacements Changes
Definition: Format.cpp:109
clang::clangd::Tail
@ Tail
Definition: FuzzyMatch.h:57
Offset
size_t Offset
Definition: CodeComplete.cpp:1213
Code
std::string Code
Definition: FindTargetTests.cpp:67
Line
int Line
Definition: PreprocessorTracker.cpp:514
Logger.h
clang::clangd::vlog
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:72
clang::clangd::TextDocumentSyncKind::Incremental
@ Incremental
Documents are synced by sending the full content on open.
clang::clangd::transformCursorPosition
unsigned transformCursorPosition(unsigned Offset, const std::vector< tooling::Replacement > &Replacements)
Determine the new cursor position after applying Replacements.
Definition: Format.cpp:383
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::formatIncremental
std::vector< tooling::Replacement > formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, llvm::StringRef InsertedText, format::FormatStyle Style)
Applies limited formatting around new InsertedText.
Definition: Format.cpp:277
clang::tidy::modernize::empty
static bool empty(SourceRange Range)
Definition: MacroToEnumCheck.cpp:538
LangOpts
const LangOptions * LangOpts
Definition: ExtractFunction.cpp:375
clang::clangd::elog
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:61