clang-tools  14.0.0git
Selection.cpp
Go to the documentation of this file.
1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Selection.h"
10 #include "AST.h"
11 #include "SourceCode.h"
12 #include "support/Logger.h"
13 #include "support/Trace.h"
14 #include "clang/AST/ASTTypeTraits.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/PrettyPrinter.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/OperatorKinds.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Tooling/Syntax/Tokens.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <string>
34 
35 namespace clang {
36 namespace clangd {
37 namespace {
38 using Node = SelectionTree::Node;
39 
40 // Measure the fraction of selections that were enabled by recovery AST.
41 void recordMetrics(const SelectionTree &S, const LangOptions &Lang) {
42  if (!trace::enabled())
43  return;
44  const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C";
45  static constexpr trace::Metric SelectionUsedRecovery(
46  "selection_recovery", trace::Metric::Distribution, "language");
47  static constexpr trace::Metric RecoveryType(
48  "selection_recovery_type", trace::Metric::Distribution, "language");
49  const auto *Common = S.commonAncestor();
50  for (const auto *N = Common; N; N = N->Parent) {
51  if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
52  SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast.
53  RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel);
54  return;
55  }
56  }
57  if (Common)
58  SelectionUsedRecovery.record(0, LanguageLabel); // unused.
59 }
60 
61 SourceRange getSourceRange(const DynTypedNode &N) {
62  // MemberExprs to implicitly access anonymous fields should not claim any
63  // tokens for themselves. Given:
64  // struct A { struct { int b; }; };
65  // The clang AST reports the following nodes for an access to b:
66  // A().b;
67  // [----] MemberExpr, base = A().<anonymous>, member = b
68  // [----] MemberExpr: base = A(), member = <anonymous>
69  // [-] CXXConstructExpr
70  // For our purposes, we don't want the second MemberExpr to own any tokens,
71  // so we reduce its range to match the CXXConstructExpr.
72  // (It's not clear that changing the clang AST would be correct in general).
73  if (const auto *ME = N.get<MemberExpr>()) {
74  if (!ME->getMemberDecl()->getDeclName())
75  return ME->getBase()
76  ? getSourceRange(DynTypedNode::create(*ME->getBase()))
77  : SourceRange();
78  }
79  return N.getSourceRange();
80 }
81 
82 // An IntervalSet maintains a set of disjoint subranges of an array.
83 //
84 // Initially, it contains the entire array.
85 // [-----------------------------------------------------------]
86 //
87 // When a range is erased(), it will typically split the array in two.
88 // Claim: [--------------------]
89 // after: [----------------] [-------------------]
90 //
91 // erase() returns the segments actually erased. Given the state above:
92 // Claim: [---------------------------------------]
93 // Out: [---------] [------]
94 // After: [-----] [-----------]
95 //
96 // It is used to track (expanded) tokens not yet associated with an AST node.
97 // On traversing an AST node, its token range is erased from the unclaimed set.
98 // The tokens actually removed are associated with that node, and hit-tested
99 // against the selection to determine whether the node is selected.
100 template <typename T> class IntervalSet {
101 public:
102  IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
103 
104  // Removes the elements of Claim from the set, modifying or removing ranges
105  // that overlap it.
106  // Returns the continuous subranges of Claim that were actually removed.
107  llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) {
108  llvm::SmallVector<llvm::ArrayRef<T>> Out;
109  if (Claim.empty())
110  return Out;
111 
112  // General case:
113  // Claim: [-----------------]
114  // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
115  // Overlap: ^first ^second
116  // Ranges C and D are fully included. Ranges B and E must be trimmed.
117  auto Overlap = std::make_pair(
118  UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
119  UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
120  // Rewind to cover B.
121  if (Overlap.first != UnclaimedRanges.begin()) {
122  --Overlap.first;
123  // ...unless B isn't selected at all.
124  if (Overlap.first->end() <= Claim.begin())
125  ++Overlap.first;
126  }
127  if (Overlap.first == Overlap.second)
128  return Out;
129 
130  // First, copy all overlapping ranges into the output.
131  auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
132  // If any of the overlapping ranges were sliced by the claim, split them:
133  // - restrict the returned range to the claimed part
134  // - save the unclaimed part so it can be reinserted
135  llvm::ArrayRef<T> RemainingHead, RemainingTail;
136  if (Claim.begin() > OutFirst->begin()) {
137  RemainingHead = {OutFirst->begin(), Claim.begin()};
138  *OutFirst = {Claim.begin(), OutFirst->end()};
139  }
140  if (Claim.end() < Out.back().end()) {
141  RemainingTail = {Claim.end(), Out.back().end()};
142  Out.back() = {Out.back().begin(), Claim.end()};
143  }
144 
145  // Erase all the overlapping ranges (invalidating all iterators).
146  UnclaimedRanges.erase(Overlap.first, Overlap.second);
147  // Reinsert ranges that were merely trimmed.
148  if (!RemainingHead.empty())
149  UnclaimedRanges.insert(RemainingHead);
150  if (!RemainingTail.empty())
151  UnclaimedRanges.insert(RemainingTail);
152 
153  return Out;
154  }
155 
156 private:
157  using TokenRange = llvm::ArrayRef<T>;
158  struct RangeLess {
159  bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
160  return L.begin() < R.begin();
161  }
162  };
163 
164  // Disjoint sorted unclaimed ranges of expanded tokens.
165  std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
166 };
167 
168 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
169 // This resolves to Unselected if no tokens are ever seen.
170 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
171 // This value is never exposed publicly.
172 constexpr SelectionTree::Selection NoTokens =
173  static_cast<SelectionTree::Selection>(
174  static_cast<unsigned char>(SelectionTree::Complete + 1));
175 
176 // Nodes start with NoTokens, and then use this function to aggregate the
177 // selectedness as more tokens are found.
178 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
179  if (New == NoTokens)
180  return;
181  if (Result == NoTokens)
182  Result = New;
183  else if (Result != New)
184  // Can only be completely selected (or unselected) if all tokens are.
185  Result = SelectionTree::Partial;
186 }
187 
188 // As well as comments, don't count semicolons as real tokens.
189 // They're not properly claimed as expr-statement is missing from the AST.
190 bool shouldIgnore(const syntax::Token &Tok) {
191  return Tok.kind() == tok::comment || Tok.kind() == tok::semi;
192 }
193 
194 // Determine whether 'Target' is the first expansion of the macro
195 // argument whose top-level spelling location is 'SpellingLoc'.
196 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
197  const SourceManager &SM) {
198  SourceLocation Prev = SpellingLoc;
199  while (true) {
200  // If the arg is expanded multiple times, getMacroArgExpandedLocation()
201  // returns the first expansion.
202  SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
203  // So if we reach the target, target is the first-expansion of the
204  // first-expansion ...
205  if (SM.getFileID(Next) == Target)
206  return true;
207 
208  // Otherwise, if the FileID stops changing, we've reached the innermost
209  // macro expansion, and Target was on a different branch.
210  if (SM.getFileID(Next) == SM.getFileID(Prev))
211  return false;
212 
213  Prev = Next;
214  }
215  return false;
216 }
217 
218 // SelectionTester can determine whether a range of tokens from the PP-expanded
219 // stream (corresponding to an AST node) is considered selected.
220 //
221 // When the tokens result from macro expansions, the appropriate tokens in the
222 // main file are examined (macro invocation or args). Similarly for #includes.
223 // However, only the first expansion of a given spelled token is considered
224 // selected.
225 //
226 // It tests each token in the range (not just the endpoints) as contiguous
227 // expanded tokens may not have contiguous spellings (with macros).
228 //
229 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
230 // are ignored when determining selectedness.
231 class SelectionTester {
232 public:
233  // The selection is offsets [SelBegin, SelEnd) in SelFile.
234  SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
235  unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
236  : SelFile(SelFile), SM(SM) {
237  // Find all tokens (partially) selected in the file.
238  auto AllSpelledTokens = Buf.spelledTokens(SelFile);
239  const syntax::Token *SelFirst =
240  llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
241  return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
242  });
243  const syntax::Token *SelLimit = std::partition_point(
244  SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
245  return SM.getFileOffset(Tok.location()) < SelEnd;
246  });
247  auto Sel = llvm::makeArrayRef(SelFirst, SelLimit);
248  // Find which of these are preprocessed to nothing and should be ignored.
249  std::vector<bool> PPIgnored(Sel.size(), false);
250  for (const syntax::TokenBuffer::Expansion &X :
251  Buf.expansionsOverlapping(Sel)) {
252  if (X.Expanded.empty()) {
253  for (const syntax::Token &Tok : X.Spelled) {
254  if (&Tok >= SelFirst && &Tok < SelLimit)
255  PPIgnored[&Tok - SelFirst] = true;
256  }
257  }
258  }
259  // Precompute selectedness and offset for selected spelled tokens.
260  for (unsigned I = 0; I < Sel.size(); ++I) {
261  if (shouldIgnore(Sel[I]) || PPIgnored[I])
262  continue;
263  SpelledTokens.emplace_back();
264  Tok &S = SpelledTokens.back();
265  S.Offset = SM.getFileOffset(Sel[I].location());
266  if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
267  S.Selected = SelectionTree::Complete;
268  else
269  S.Selected = SelectionTree::Partial;
270  }
271  }
272 
273  // Test whether a consecutive range of tokens is selected.
274  // The tokens are taken from the expanded token stream.
276  test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
277  if (SpelledTokens.empty())
278  return NoTokens;
279  SelectionTree::Selection Result = NoTokens;
280  while (!ExpandedTokens.empty()) {
281  // Take consecutive tokens from the same context together for efficiency.
282  FileID FID = SM.getFileID(ExpandedTokens.front().location());
283  auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
284  return SM.getFileID(T.location()) == FID;
285  });
286  assert(!Batch.empty());
287  ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
288 
289  update(Result, testChunk(FID, Batch));
290  }
291  return Result;
292  }
293 
294  // Cheap check whether any of the tokens in R might be selected.
295  // If it returns false, test() will return NoTokens or Unselected.
296  // If it returns true, test() may return any value.
297  bool mayHit(SourceRange R) const {
298  if (SpelledTokens.empty())
299  return false;
300  auto B = SM.getDecomposedLoc(R.getBegin());
301  auto E = SM.getDecomposedLoc(R.getEnd());
302  if (B.first == SelFile && E.first == SelFile)
303  if (E.second < SpelledTokens.front().Offset ||
304  B.second > SpelledTokens.back().Offset)
305  return false;
306  return true;
307  }
308 
309 private:
310  // Hit-test a consecutive range of tokens from a single file ID.
312  testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
313  assert(!Batch.empty());
314  SourceLocation StartLoc = Batch.front().location();
315  // There are several possible categories of FileID depending on how the
316  // preprocessor was used to generate these tokens:
317  // main file, #included file, macro args, macro bodies.
318  // We need to identify the main-file tokens that represent Batch, and
319  // determine whether we want to exclusively claim them. Regular tokens
320  // represent one AST construct, but a macro invocation can represent many.
321 
322  // Handle tokens written directly in the main file.
323  if (FID == SelFile) {
324  return testTokenRange(SM.getFileOffset(Batch.front().location()),
325  SM.getFileOffset(Batch.back().location()));
326  }
327 
328  // Handle tokens in another file #included into the main file.
329  // Check if the #include is selected, but don't claim it exclusively.
330  if (StartLoc.isFileID()) {
331  for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
332  Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
333  if (SM.getFileID(Loc) == SelFile)
334  // FIXME: use whole #include directive, not just the filename string.
335  return testToken(SM.getFileOffset(Loc));
336  }
337  return NoTokens;
338  }
339 
340  assert(StartLoc.isMacroID());
341  // Handle tokens that were passed as a macro argument.
342  SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
343  if (SM.getFileID(ArgStart) == SelFile) {
344  if (isFirstExpansion(FID, ArgStart, SM)) {
345  SourceLocation ArgEnd =
346  SM.getTopMacroCallerLoc(Batch.back().location());
347  return testTokenRange(SM.getFileOffset(ArgStart),
348  SM.getFileOffset(ArgEnd));
349  } else {
350  /* fall through and treat as part of the macro body */
351  }
352  }
353 
354  // Handle tokens produced by non-argument macro expansion.
355  // Check if the macro name is selected, don't claim it exclusively.
356  auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
357  if (Expansion.first == SelFile)
358  // FIXME: also check ( and ) for function-like macros?
359  return testToken(Expansion.second);
360  else
361  return NoTokens;
362  }
363 
364  // Is the closed token range [Begin, End] selected?
365  SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
366  assert(Begin <= End);
367  // Outside the selection entirely?
368  if (End < SpelledTokens.front().Offset ||
369  Begin > SpelledTokens.back().Offset)
371 
372  // Compute range of tokens.
373  auto B = llvm::partition_point(
374  SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
375  auto E = std::partition_point(
376  B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
377 
378  // Aggregate selectedness of tokens in range.
379  bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
380  End > SpelledTokens.back().Offset;
381  SelectionTree::Selection Result =
382  ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
383  for (auto It = B; It != E; ++It)
384  update(Result, It->Selected);
385  return Result;
386  }
387 
388  // Is the token at `Offset` selected?
389  SelectionTree::Selection testToken(unsigned Offset) const {
390  // Outside the selection entirely?
391  if (Offset < SpelledTokens.front().Offset ||
392  Offset > SpelledTokens.back().Offset)
394  // Find the token, if it exists.
395  auto It = llvm::partition_point(
396  SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
397  if (It != SpelledTokens.end() && It->Offset == Offset)
398  return It->Selected;
399  return NoTokens;
400  }
401 
402  struct Tok {
403  unsigned Offset;
405  };
406  std::vector<Tok> SpelledTokens;
407  FileID SelFile;
408  const SourceManager &SM;
409 };
410 
411 // Show the type of a node for debugging.
412 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
413  if (const TypeLoc *TL = N.get<TypeLoc>()) {
414  // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
415  // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
416  if (TL->getTypeLocClass() == TypeLoc::Qualified)
417  OS << "QualifiedTypeLoc";
418  else
419  OS << TL->getType()->getTypeClassName() << "TypeLoc";
420  } else {
421  OS << N.getNodeKind().asStringRef();
422  }
423 }
424 
425 #ifndef NDEBUG
426 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
427  std::string S;
428  llvm::raw_string_ostream OS(S);
429  printNodeKind(OS, N);
430  OS << " ";
431  return std::move(OS.str());
432 }
433 #endif
434 
435 bool isImplicit(const Stmt *S) {
436  // Some Stmts are implicit and shouldn't be traversed, but there's no
437  // "implicit" attribute on Stmt/Expr.
438  // Unwrap implicit casts first if present (other nodes too?).
439  if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
440  S = ICE->getSubExprAsWritten();
441  // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
442  // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
443  if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
444  if (CTI->isImplicit())
445  return true;
446  // Refs to operator() and [] are (almost?) always implicit as part of calls.
447  if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
448  if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
449  switch (FD->getOverloadedOperator()) {
450  case OO_Call:
451  case OO_Subscript:
452  return true;
453  default:
454  break;
455  }
456  }
457  }
458  return false;
459 }
460 
461 // We find the selection by visiting written nodes in the AST, looking for nodes
462 // that intersect with the selected character range.
463 //
464 // While traversing, we maintain a parent stack. As nodes pop off the stack,
465 // we decide whether to keep them or not. To be kept, they must either be
466 // selected or contain some nodes that are.
467 //
468 // For simple cases (not inside macros) we prune subtrees that don't intersect.
469 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
470 public:
471  // Runs the visitor to gather selected nodes and their ancestors.
472  // If there is any selection, the root (TUDecl) is the first node.
473  static std::deque<Node> collect(ASTContext &AST,
474  const syntax::TokenBuffer &Tokens,
475  const PrintingPolicy &PP, unsigned Begin,
476  unsigned End, FileID File) {
477  SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
478  V.TraverseAST(AST);
479  assert(V.Stack.size() == 1 && "Unpaired push/pop?");
480  assert(V.Stack.top() == &V.Nodes.front());
481  return std::move(V.Nodes);
482  }
483 
484  // We traverse all "well-behaved" nodes the same way:
485  // - push the node onto the stack
486  // - traverse its children recursively
487  // - pop it from the stack
488  // - hit testing: is intersection(node, selection) - union(children) empty?
489  // - attach it to the tree if it or any children hit the selection
490  //
491  // Two categories of nodes are not "well-behaved":
492  // - those without source range information, we don't record those
493  // - those that can't be stored in DynTypedNode.
494  bool TraverseDecl(Decl *X) {
495  if (X && isa<TranslationUnitDecl>(X))
496  return Base::TraverseDecl(X); // Already pushed by constructor.
497  // Base::TraverseDecl will suppress children, but not this node itself.
498  if (X && X->isImplicit())
499  return true;
500  return traverseNode(X, [&] { return Base::TraverseDecl(X); });
501  }
502  bool TraverseTypeLoc(TypeLoc X) {
503  return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
504  }
505  bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) {
506  return traverseNode(&X,
507  [&] { return Base::TraverseTemplateArgumentLoc(X); });
508  }
509  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
510  return traverseNode(
511  &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
512  }
513  bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
514  return traverseNode(
515  X, [&] { return Base::TraverseConstructorInitializer(X); });
516  }
517  bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) {
518  return traverseNode(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); });
519  }
520  bool TraverseAttr(Attr *X) {
521  return traverseNode(X, [&] { return Base::TraverseAttr(X); });
522  }
523  // Stmt is the same, but this form allows the data recursion optimization.
524  bool dataTraverseStmtPre(Stmt *X) {
525  if (!X || isImplicit(X))
526  return false;
527  auto N = DynTypedNode::create(*X);
528  if (canSafelySkipNode(N))
529  return false;
530  push(std::move(N));
531  if (shouldSkipChildren(X)) {
532  pop();
533  return false;
534  }
535  return true;
536  }
537  bool dataTraverseStmtPost(Stmt *X) {
538  pop();
539  return true;
540  }
541  // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
542  // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
543  // This means we'd never see 'int' in 'const int'! Work around that here.
544  // (The reason for the behavior is to avoid traversing the nested Type twice,
545  // but we ignore TraverseType anyway).
546  bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
547  return traverseNode<TypeLoc>(
548  &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
549  }
550  // Uninteresting parts of the AST that don't have locations within them.
551  bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
552  bool TraverseType(QualType) { return true; }
553 
554  // The DeclStmt for the loop variable claims to cover the whole range
555  // inside the parens, this causes the range-init expression to not be hit.
556  // Traverse the loop VarDecl instead, which has the right source range.
557  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
558  return traverseNode(S, [&] {
559  return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
560  TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
561  });
562  }
563  // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
564  bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
565  return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
566  }
567  // We only want to traverse the *syntactic form* to understand the selection.
568  bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
569  return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
570  }
571 
572 private:
574 
575  SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
576  const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
577  FileID SelFile)
578  : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
579 #ifndef NDEBUG
580  PrintPolicy(PP),
581 #endif
582  TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
583  UnclaimedExpandedTokens(Tokens.expandedTokens()) {
584  // Ensure we have a node for the TU decl, regardless of traversal scope.
585  Nodes.emplace_back();
586  Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
587  Nodes.back().Parent = nullptr;
588  Nodes.back().Selected = SelectionTree::Unselected;
589  Stack.push(&Nodes.back());
590  }
591 
592  // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
593  // Node is always a pointer so the generic code can handle any null checks.
594  template <typename T, typename Func>
595  bool traverseNode(T *Node, const Func &Body) {
596  if (Node == nullptr)
597  return true;
598  auto N = DynTypedNode::create(*Node);
599  if (canSafelySkipNode(N))
600  return true;
601  push(DynTypedNode::create(*Node));
602  bool Ret = Body();
603  pop();
604  return Ret;
605  }
606 
607  // HIT TESTING
608  //
609  // We do rough hit testing on the way down the tree to avoid traversing
610  // subtrees that don't touch the selection (canSafelySkipNode), but
611  // fine-grained hit-testing is mostly done on the way back up (in pop()).
612  // This means children get to claim parts of the selection first, and parents
613  // are only selected if they own tokens that no child owned.
614  //
615  // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
616  // parent's, and a node (transitively) owns all tokens in its range.
617  //
618  // Exception 1: child range claims tokens that should be owned by the parent.
619  // e.g. in `void foo(int);`, the FunctionTypeLoc should own
620  // `void (int)` but the parent FunctionDecl should own `foo`.
621  // To handle this case, certain nodes claim small token ranges *before*
622  // their children are traversed. (see earlySourceRange).
623  //
624  // Exception 2: siblings both claim the same node.
625  // e.g. `int x, y;` produces two sibling VarDecls.
626  // ~~~~~ x
627  // ~~~~~~~~ y
628  // Here the first ("leftmost") sibling claims the tokens it wants, and the
629  // other sibling gets what's left. So selecting "int" only includes the left
630  // VarDecl in the selection tree.
631 
632  // An optimization for a common case: nodes outside macro expansions that
633  // don't intersect the selection may be recursively skipped.
634  bool canSafelySkipNode(const DynTypedNode &N) {
635  SourceRange S = getSourceRange(N);
636  if (auto *TL = N.get<TypeLoc>()) {
637  // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
638  // heuristics. We should consider only pruning critical TypeLoc nodes, to
639  // be more robust.
640 
641  // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
642  // failing
643  // to descend into the child expression.
644  // decltype(2+2);
645  // ~~~~~~~~~~~~~ <-- correct range
646  // ~~~~~~~~ <-- range reported by getSourceRange()
647  // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
648  // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
649  // rid of this patch.
650  if (auto DT = TL->getAs<DecltypeTypeLoc>())
651  S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
652  // AttributedTypeLoc may point to the attribute's range, NOT the modified
653  // type's range.
654  if (auto AT = TL->getAs<AttributedTypeLoc>())
655  S = AT.getModifiedLoc().getSourceRange();
656  }
657  // SourceRange often doesn't manage to accurately cover attributes.
658  // Fortunately, attributes are rare.
659  if (llvm::any_of(getAttributes(N),
660  [](const Attr *A) { return !A->isImplicit(); }))
661  return false;
662  if (!SelChecker.mayHit(S)) {
663  dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
664  dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
665  return true;
666  }
667  return false;
668  }
669 
670  // There are certain nodes we want to treat as leaves in the SelectionTree,
671  // although they do have children.
672  bool shouldSkipChildren(const Stmt *X) const {
673  // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
674  // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
675  // So we treat UserDefinedLiteral as a leaf node, owning the token.
676  return llvm::isa<UserDefinedLiteral>(X);
677  }
678 
679  // Pushes a node onto the ancestor stack. Pairs with pop().
680  // Performs early hit detection for some nodes (on the earlySourceRange).
681  void push(DynTypedNode Node) {
682  SourceRange Early = earlySourceRange(Node);
683  dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
684  Nodes.emplace_back();
685  Nodes.back().ASTNode = std::move(Node);
686  Nodes.back().Parent = Stack.top();
687  Nodes.back().Selected = NoTokens;
688  Stack.push(&Nodes.back());
689  claimRange(Early, Nodes.back().Selected);
690  }
691 
692  // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
693  // Performs primary hit detection.
694  void pop() {
695  Node &N = *Stack.top();
696  dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
697  claimRange(getSourceRange(N.ASTNode), N.Selected);
698  if (N.Selected == NoTokens)
699  N.Selected = SelectionTree::Unselected;
700  if (N.Selected || !N.Children.empty()) {
701  // Attach to the tree.
702  N.Parent->Children.push_back(&N);
703  } else {
704  // Neither N any children are selected, it doesn't belong in the tree.
705  assert(&N == &Nodes.back());
706  Nodes.pop_back();
707  }
708  Stack.pop();
709  }
710 
711  // Returns the range of tokens that this node will claim directly, and
712  // is not available to the node's children.
713  // Usually empty, but sometimes children cover tokens but shouldn't own them.
714  SourceRange earlySourceRange(const DynTypedNode &N) {
715  if (const Decl *D = N.get<Decl>()) {
716  // We want constructor name to be claimed by TypeLoc not the constructor
717  // itself. Similar for deduction guides, we rather want to select the
718  // underlying TypeLoc.
719  // FIXME: Unfortunately this doesn't work, even though RecursiveASTVisitor
720  // traverses the underlying TypeLoc inside DeclarationName, it is null for
721  // constructors.
722  if (isa<CXXConstructorDecl>(D) || isa<CXXDeductionGuideDecl>(D))
723  return SourceRange();
724  // This will capture Field, Function, MSProperty, NonTypeTemplateParm and
725  // VarDecls. We want the name in the declarator to be claimed by the decl
726  // and not by any children. For example:
727  // void [[foo]]();
728  // int (*[[s]])();
729  // struct X { int [[hash]] [32]; [[operator]] int();}
730  if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D))
731  return DD->getLocation();
732  } else if (const auto *CCI = N.get<CXXCtorInitializer>()) {
733  // : [[b_]](42)
734  return CCI->getMemberLocation();
735  }
736  return SourceRange();
737  }
738 
739  // Perform hit-testing of a complete Node against the selection.
740  // This runs for every node in the AST, and must be fast in common cases.
741  // This is usually called from pop(), so we can take children into account.
742  // The existing state of Result is relevant (early/late claims can interact).
743  void claimRange(SourceRange S, SelectionTree::Selection &Result) {
744  for (const auto &ClaimedRange :
745  UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
746  update(Result, SelChecker.test(ClaimedRange));
747 
748  if (Result && Result != NoTokens)
749  dlog("{1}hit selection: {0}", S.printToString(SM), indent());
750  }
751 
752  std::string indent(int Offset = 0) {
753  // Cast for signed arithmetic.
754  int Amount = int(Stack.size()) + Offset;
755  assert(Amount >= 0);
756  return std::string(Amount, ' ');
757  }
758 
759  SourceManager &SM;
760  const LangOptions &LangOpts;
761 #ifndef NDEBUG
762  const PrintingPolicy &PrintPolicy;
763 #endif
764  const syntax::TokenBuffer &TokenBuf;
765  std::stack<Node *> Stack;
766  SelectionTester SelChecker;
767  IntervalSet<syntax::Token> UnclaimedExpandedTokens;
768  std::deque<Node> Nodes; // Stable pointers as we add more nodes.
769 };
770 
771 } // namespace
772 
773 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
774  const PrintingPolicy &PP) {
775  llvm::SmallString<256> Result;
776  {
777  llvm::raw_svector_ostream OS(Result);
778  N.print(OS, PP);
779  }
780  auto Pos = Result.find('\n');
781  if (Pos != llvm::StringRef::npos) {
782  bool MoreText = !llvm::all_of(Result.str().drop_front(Pos), llvm::isSpace);
783  Result.resize(Pos);
784  if (MoreText)
785  Result.append(" …");
786  }
787  return Result;
788 }
789 
790 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
791  int Indent) const {
792  if (N.Selected)
793  OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
794  : '.');
795  else
796  OS.indent(Indent);
797  printNodeKind(OS, N.ASTNode);
798  OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
799  for (const Node *Child : N.Children)
800  print(OS, *Child, Indent + 2);
801 }
802 
803 std::string SelectionTree::Node::kind() const {
804  std::string S;
805  llvm::raw_string_ostream OS(S);
806  printNodeKind(OS, ASTNode);
807  return std::move(OS.str());
808 }
809 
810 // Decide which selections emulate a "point" query in between characters.
811 // If it's ambiguous (the neighboring characters are selectable tokens), returns
812 // both possibilities in preference order.
813 // Always returns at least one range - if no tokens touched, and empty range.
814 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
815 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
816  const auto &SM = Tokens.sourceManager();
817  SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
818  llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
819  // Prefer right token over left.
820  for (const syntax::Token &Tok :
821  llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
822  if (shouldIgnore(Tok))
823  continue;
824  unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
825  Result.emplace_back(Offset, Offset + Tok.length());
826  }
827  if (Result.empty())
828  Result.emplace_back(Offset, Offset);
829  return Result;
830 }
831 
832 bool SelectionTree::createEach(ASTContext &AST,
833  const syntax::TokenBuffer &Tokens,
834  unsigned Begin, unsigned End,
835  llvm::function_ref<bool(SelectionTree)> Func) {
836  if (Begin != End)
837  return Func(SelectionTree(AST, Tokens, Begin, End));
838  for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
839  if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
840  return true;
841  return false;
842 }
843 
845  const syntax::TokenBuffer &Tokens,
846  unsigned int Begin, unsigned int End) {
847  llvm::Optional<SelectionTree> Result;
848  createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
849  Result = std::move(T);
850  return true;
851  });
852  return std::move(*Result);
853 }
854 
855 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
856  unsigned Begin, unsigned End)
857  : PrintPolicy(AST.getLangOpts()) {
858  // No fundamental reason the selection needs to be in the main file,
859  // but that's all clangd has needed so far.
860  const SourceManager &SM = AST.getSourceManager();
861  FileID FID = SM.getMainFileID();
862  PrintPolicy.TerseOutput = true;
863  PrintPolicy.IncludeNewlines = false;
864 
865  dlog("Computing selection for {0}",
866  SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
867  .printToString(SM));
868  Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
869  Root = Nodes.empty() ? nullptr : &Nodes.front();
870  recordMetrics(*this, AST.getLangOpts());
871  dlog("Built selection tree\n{0}", *this);
872 }
873 
875  const Node *Ancestor = Root;
876  while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
877  Ancestor = Ancestor->Children.front();
878  // Returning nullptr here is a bit unprincipled, but it makes the API safer:
879  // the TranslationUnitDecl contains all of the preamble, so traversing it is a
880  // performance cliff. Callers can check for null and use root() if they want.
881  return Ancestor != Root ? Ancestor : nullptr;
882 }
883 
884 const DeclContext &SelectionTree::Node::getDeclContext() const {
885  for (const Node *CurrentNode = this; CurrentNode != nullptr;
886  CurrentNode = CurrentNode->Parent) {
887  if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
888  if (CurrentNode != this)
889  if (auto *DC = dyn_cast<DeclContext>(Current))
890  return *DC;
891  return *Current->getDeclContext();
892  }
893  }
894  llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
895 }
896 
898  if (Children.size() == 1 &&
899  getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode))
900  return Children.front()->ignoreImplicit();
901  return *this;
902 }
903 
905  if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode))
906  return Parent->outerImplicit();
907  return *this;
908 }
909 
910 } // namespace clangd
911 } // namespace clang
dlog
#define dlog(...)
Definition: Logger.h:102
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:45
Base
std::unique_ptr< GlobalCompilationDatabase > Base
Definition: GlobalCompilationDatabaseTests.cpp:90
clang::clangd::SelectionTree::Node::kind
std::string kind() const
Definition: Selection.cpp:803
RecursiveASTVisitor
Selection.h
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
clang::clangd::SelectionTree::Node::ignoreImplicit
const Node & ignoreImplicit() const
Definition: Selection.cpp:897
clang::clangd::SelectionTree::createEach
static bool createEach(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Begin, unsigned End, llvm::function_ref< bool(SelectionTree)> Func)
Definition: Selection.cpp:832
clang::clangd::SelectionTree::Node::Selected
Selection Selected
Definition: Selection.h:130
clang::clangd::pointBounds
static llvm::SmallVector< std::pair< unsigned, unsigned >, 2 > pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens)
Definition: Selection.cpp:815
clang::clangd::X
static URISchemeRegistry::Add< TestScheme > X(TestScheme::Scheme, "Test schema")
Trace.h
Selected
SelectionTree::Selection Selected
Definition: Selection.cpp:404
clang::clangd::SelectionTree::Node::outerImplicit
const Node & outerImplicit() const
Definition: Selection.cpp:904
Target
std::string Target
Definition: QueryDriverDatabase.cpp:64
clang::clangd::SelectionTree::Node::ASTNode
DynTypedNode ASTNode
Definition: Selection.h:128
clang::clangd::SelectionTree::Complete
@ Complete
Definition: Selection.h:118
test
Definition: test.py:1
Children
std::vector< std::unique_ptr< HTMLNode > > Children
Definition: HTMLGenerator.cpp:91
ns1::ns2::A
@ A
Definition: CategoricalFeature.h:3
ns1::ns2::D
@ D
Definition: CategoricalFeature.h:3
clang::clangd::SelectionTree::Node::Children
llvm::SmallVector< const Node * > Children
Definition: Selection.h:126
Offset
unsigned Offset
Definition: Selection.cpp:403
clang::clangd::abbreviatedString
llvm::SmallString< 256 > abbreviatedString(DynTypedNode N, const PrintingPolicy &PP)
Definition: Selection.cpp:773
Decl
const FunctionDecl * Decl
Definition: AvoidBindCheck.cpp:100
clang::clangd::SelectionTree::createRight
static SelectionTree createRight(ASTContext &AST, const syntax::TokenBuffer &Tokens, unsigned Begin, unsigned End)
Definition: Selection.cpp:844
clang::clangd::trace::enabled
bool enabled()
Returns true if there is an active tracer.
Definition: Trace.cpp:284
Logger.h
Bounds
PreambleBounds Bounds
Definition: Preamble.cpp:235
clang::clangd::trace::Metric::Distribution
@ Distribution
A distribution of values with a meaningful mean and count.
Definition: Trace.h:52
clang::clangd::SelectionTree
Definition: Selection.h:76
clang::clangd::SelectionTree::Node
Definition: Selection.h:122
clang::tidy::bugprone::PP
static Preprocessor * PP
Definition: BadSignalToKillThreadCheck.cpp:29
clang::clangd::SelectionTree::Node::Parent
Node * Parent
Definition: Selection.h:124
Parent
const Node * Parent
Definition: ExtractFunction.cpp:152
clang::clangd::SelectionTree::commonAncestor
const Node * commonAncestor() const
Definition: Selection.cpp:874
SourceCode.h
clang::clangd::SelectionTree::SelectionTree
SelectionTree(const SelectionTree &)=delete
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:163
clang::clangd::SelectionTree::Unselected
@ Unselected
Definition: Selection.h:114
Pos
Position Pos
Definition: SourceCode.cpp:650
clang::clangd::SelectionTree::Partial
@ Partial
Definition: Selection.h:116
ns1::ns2::B
@ B
Definition: CategoricalFeature.h:3
Out
CompiledFragmentImpl & Out
Definition: ConfigCompile.cpp:100
clang::clangd::getAttributes
std::vector< const Attr * > getAttributes(const DynTypedNode &N)
Return attributes attached directly to a node.
Definition: AST.cpp:486
clang::clangd::SelectionTree::Node::getDeclContext
const DeclContext & getDeclContext() const
Definition: Selection.cpp:884
clang::clangd::ASTNode
Simplified description of a clang AST node.
Definition: Protocol.h:1757
AST.h
clang::clangd::SelectionTree::Selection
Selection
Definition: Selection.h:106