clang-tools  14.0.0git
ExtractVariable.cpp
Go to the documentation of this file.
1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ParsedAST.h"
9 #include "Protocol.h"
10 #include "Selection.h"
11 #include "SourceCode.h"
12 #include "refactor/Tweak.h"
13 #include "support/Logger.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/OperationKinds.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtCXX.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Tooling/Core/Replacement.h"
25 #include "llvm/ADT/None.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Error.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace {
35 // information regarding the Expr that is being extracted
36 class ExtractionContext {
37 public:
38  ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM,
39  const ASTContext &Ctx);
40  const clang::Expr *getExpr() const { return Expr; }
41  const SelectionTree::Node *getExprNode() const { return ExprNode; }
42  bool isExtractable() const { return Extractable; }
43  // The half-open range for the expression to be extracted.
44  SourceRange getExtractionChars() const;
45  // Generate Replacement for replacing selected expression with given VarName
46  tooling::Replacement replaceWithVar(SourceRange Chars,
47  llvm::StringRef VarName) const;
48  // Generate Replacement for declaring the selected Expr as a new variable
49  tooling::Replacement insertDeclaration(llvm::StringRef VarName,
50  SourceRange InitChars) const;
51 
52 private:
53  bool Extractable = false;
54  const clang::Expr *Expr;
55  const SelectionTree::Node *ExprNode;
56  // Stmt before which we will extract
57  const clang::Stmt *InsertionPoint = nullptr;
58  const SourceManager &SM;
59  const ASTContext &Ctx;
60  // Decls referenced in the Expr
61  std::vector<clang::Decl *> ReferencedDecls;
62  // returns true if the Expr doesn't reference any variable declared in scope
63  bool exprIsValidOutside(const clang::Stmt *Scope) const;
64  // computes the Stmt before which we will extract out Expr
65  const clang::Stmt *computeInsertionPoint() const;
66 };
67 
68 // Returns all the Decls referenced inside the given Expr
69 static std::vector<clang::Decl *>
70 computeReferencedDecls(const clang::Expr *Expr) {
71  // RAV subclass to find all DeclRefs in a given Stmt
72  class FindDeclRefsVisitor
73  : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> {
74  public:
75  std::vector<Decl *> ReferencedDecls;
76  bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT
77  ReferencedDecls.push_back(DeclRef->getDecl());
78  return true;
79  }
80  };
81  FindDeclRefsVisitor Visitor;
82  Visitor.TraverseStmt(const_cast<Stmt *>(cast<Stmt>(Expr)));
83  return Visitor.ReferencedDecls;
84 }
85 
86 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node,
87  const SourceManager &SM,
88  const ASTContext &Ctx)
89  : ExprNode(Node), SM(SM), Ctx(Ctx) {
90  Expr = Node->ASTNode.get<clang::Expr>();
91  ReferencedDecls = computeReferencedDecls(Expr);
92  InsertionPoint = computeInsertionPoint();
93  if (InsertionPoint)
94  Extractable = true;
95 }
96 
97 // checks whether extracting before InsertionPoint will take a
98 // variable reference out of scope
99 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const {
100  SourceLocation ScopeBegin = Scope->getBeginLoc();
101  SourceLocation ScopeEnd = Scope->getEndLoc();
102  for (const Decl *ReferencedDecl : ReferencedDecls) {
103  if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) &&
104  SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd))
105  return false;
106  }
107  return true;
108 }
109 
110 // Return the Stmt before which we need to insert the extraction.
111 // To find the Stmt, we go up the AST Tree and if the Parent of the current
112 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before
113 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a
114 // CompoundStmt
115 //
116 // FIXME: Extraction from label, switch and case statements
117 // FIXME: Doens't work for FoldExpr
118 // FIXME: Ensure extraction from loops doesn't change semantics.
119 const clang::Stmt *ExtractionContext::computeInsertionPoint() const {
120  // returns true if we can extract before InsertionPoint
121  auto CanExtractOutside =
122  [](const SelectionTree::Node *InsertionPoint) -> bool {
123  if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) {
124  // Allow all expressions except LambdaExpr since we don't want to extract
125  // from the captures/default arguments of a lambda
126  if (isa<clang::Expr>(Stmt))
127  return !isa<LambdaExpr>(Stmt);
128  // We don't yet allow extraction from switch/case stmt as we would need to
129  // jump over the switch stmt even if there is a CompoundStmt inside the
130  // switch. And there are other Stmts which we don't care about (e.g.
131  // continue and break) as there can never be anything to extract from
132  // them.
133  return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) ||
134  isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) ||
135  isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) ||
136  isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt);
137  }
138  if (InsertionPoint->ASTNode.get<VarDecl>())
139  return true;
140  return false;
141  };
142  for (const SelectionTree::Node *CurNode = getExprNode();
143  CurNode->Parent && CanExtractOutside(CurNode);
144  CurNode = CurNode->Parent) {
145  const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>();
146  // give up if extraction will take a variable out of scope
147  if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint))
148  break;
149  if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) {
150  if (isa<CompoundStmt>(CurParent)) {
151  // Ensure we don't write inside a macro.
152  if (CurParent->getBeginLoc().isMacroID())
153  continue;
154  return CurInsertionPoint;
155  }
156  }
157  }
158  return nullptr;
159 }
160 
161 // returns the replacement for substituting the extraction with VarName
162 tooling::Replacement
163 ExtractionContext::replaceWithVar(SourceRange Chars,
164  llvm::StringRef VarName) const {
165  unsigned ExtractionLength =
166  SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
167  return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
168 }
169 // returns the Replacement for declaring a new variable storing the extraction
170 tooling::Replacement
171 ExtractionContext::insertDeclaration(llvm::StringRef VarName,
172  SourceRange InitializerChars) const {
173  llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
174  const SourceLocation InsertionLoc =
175  toHalfOpenFileRange(SM, Ctx.getLangOpts(),
176  InsertionPoint->getSourceRange())
177  ->getBegin();
178  // FIXME: Replace auto with explicit type and add &/&& as necessary
179  std::string ExtractedVarDecl = std::string("auto ") + VarName.str() + " = " +
180  ExtractionCode.str() + "; ";
181  return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
182 }
183 
184 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
185 //
186 // These are special, because the formal AST doesn't match what users expect:
187 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
188 // - but extracting `b + c` is reasonable, as + is (mathematically) associative.
189 //
190 // So we try to support these cases with some restrictions:
191 // - the operator must be associative
192 // - no mixing of operators is allowed
193 // - we don't look inside macro expansions in the subexpressions
194 // - we only adjust the extracted range, so references in the unselected parts
195 // of the AST expression (e.g. `a`) are still considered referenced for
196 // the purposes of calculating the insertion point.
197 // FIXME: it would be nice to exclude these references, by micromanaging
198 // the computeReferencedDecls() calls around the binary operator tree.
199 
200 // Information extracted about a binary operator encounted in a SelectionTree.
201 // It can represent either an overloaded or built-in operator.
202 struct ParsedBinaryOperator {
203  BinaryOperatorKind Kind;
204  SourceLocation ExprLoc;
205  llvm::SmallVector<const SelectionTree::Node *> SelectedOperands;
206 
207  // If N is a binary operator, populate this and return true.
208  bool parse(const SelectionTree::Node &N) {
209  SelectedOperands.clear();
210 
211  if (const BinaryOperator *Op =
212  llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
213  Kind = Op->getOpcode();
214  ExprLoc = Op->getExprLoc();
215  SelectedOperands = N.Children;
216  return true;
217  }
218  if (const CXXOperatorCallExpr *Op =
219  llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
220  N.ASTNode.get<Expr>())) {
221  if (!Op->isInfixBinaryOp())
222  return false;
223 
224  Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
225  ExprLoc = Op->getExprLoc();
226  // Not all children are args, there's also the callee (operator).
227  for (const auto* Child : N.Children) {
228  const Expr *E = Child->ASTNode.get<Expr>();
229  assert(E && "callee and args should be Exprs!");
230  if (E == Op->getArg(0) || E == Op->getArg(1))
231  SelectedOperands.push_back(Child);
232  }
233  return true;
234  }
235  return false;
236  }
237 
238  bool associative() const {
239  // Must also be left-associative, or update getBinaryOperatorRange()!
240  switch (Kind) {
241  case BO_Add:
242  case BO_Mul:
243  case BO_And:
244  case BO_Or:
245  case BO_Xor:
246  case BO_LAnd:
247  case BO_LOr:
248  return true;
249  default:
250  return false;
251  }
252  }
253 
254  bool crossesMacroBoundary(const SourceManager &SM) {
255  FileID F = SM.getFileID(ExprLoc);
256  for (const SelectionTree::Node *Child : SelectedOperands)
257  if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F)
258  return true;
259  return false;
260  }
261 };
262 
263 // If have an associative operator at the top level, then we must find
264 // the start point (rightmost in LHS) and end point (leftmost in RHS).
265 // We can only descend into subtrees where the operator matches.
266 //
267 // e.g. for a + [[b + c]] + d
268 // +
269 // / \
270 // N-> + d
271 // / \
272 // + c <- End
273 // / \
274 // a b <- Start
275 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N,
276  const SourceManager &SM,
277  const LangOptions &LangOpts) {
278  // If N is not a suitable binary operator, bail out.
279  ParsedBinaryOperator Op;
280  if (!Op.parse(N.ignoreImplicit()) || !Op.associative() ||
281  Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2)
282  return SourceRange();
283  BinaryOperatorKind OuterOp = Op.Kind;
284 
285  // Because the tree we're interested in contains only one operator type, and
286  // all eligible operators are left-associative, the shape of the tree is
287  // very restricted: it's a linked list along the left edges.
288  // This simplifies our implementation.
289  const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS
290  const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS
291  // End is already correct: it can't be an OuterOp (as it's left-associative).
292  // Start needs to be pushed down int the subtree to the right spot.
293  while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp &&
294  !Op.crossesMacroBoundary(SM)) {
295  assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
296  if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected
297  Start = Op.SelectedOperands.back();
298  break;
299  }
300  // Op.LHS is (at least partially) selected, so descend into it.
301  Start = Op.SelectedOperands.front();
302  }
303 
304  return SourceRange(
305  toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange())
306  ->getBegin(),
307  toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange())
308  ->getEnd());
309 }
310 
311 SourceRange ExtractionContext::getExtractionChars() const {
312  // Special case: we're extracting an associative binary subexpression.
313  SourceRange BinaryOperatorRange =
314  getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts());
315  if (BinaryOperatorRange.isValid())
316  return BinaryOperatorRange;
317 
318  // Usual case: we're extracting the whole expression.
319  return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
320 }
321 
322 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef
323 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) {
324  const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit();
325  const SelectionTree::Node *MaybeCall = MaybeCallee.Parent;
326  if (!MaybeCall)
327  return nullptr;
328  const CallExpr *CE =
329  llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>());
330  if (!CE)
331  return nullptr;
332  if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>())
333  return nullptr;
334  return MaybeCall;
335 }
336 
337 // Returns true if Inner (which is a direct child of Outer) is appearing as
338 // a statement rather than an expression whose value can be used.
339 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) {
340  if (!Outer || !Inner)
341  return false;
342  // Exclude the most common places where an expr can appear but be unused.
343  if (llvm::isa<CompoundStmt>(Outer))
344  return true;
345  if (llvm::isa<SwitchCase>(Outer))
346  return true;
347  // Control flow statements use condition etc, but not the body.
348  if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer))
349  return Inner == WS->getBody();
350  if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer))
351  return Inner == DS->getBody();
352  if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer))
353  return Inner == FS->getBody();
354  if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer))
355  return Inner == FS->getBody();
356  if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer))
357  return Inner == IS->getThen() || Inner == IS->getElse();
358  // Assume all other cases may be actual expressions.
359  // This includes the important case of subexpressions (where Outer is Expr).
360  return false;
361 }
362 
363 // check if N can and should be extracted (e.g. is not void-typed).
364 bool eligibleForExtraction(const SelectionTree::Node *N) {
365  const Expr *E = N->ASTNode.get<Expr>();
366  if (!E)
367  return false;
368 
369  // Void expressions can't be assigned to variables.
370  if (const Type *ExprType = E->getType().getTypePtrOrNull())
371  if (ExprType->isVoidType())
372  return false;
373 
374  // A plain reference to a name (e.g. variable) isn't worth extracting.
375  // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`?
376  if (llvm::isa<DeclRefExpr>(E) || llvm::isa<MemberExpr>(E))
377  return false;
378 
379  // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful.
380  // FIXME: we could still hoist the assignment, and leave the variable there?
381  ParsedBinaryOperator BinOp;
382  if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind))
383  return false;
384 
385  const SelectionTree::Node &OuterImplicit = N->outerImplicit();
386  const auto *Parent = OuterImplicit.Parent;
387  if (!Parent)
388  return false;
389  // We don't want to extract expressions used as statements, that would leave
390  // a `placeholder;` around that has no effect.
391  // Unfortunately because the AST doesn't have ExprStmt, we have to check in
392  // this roundabout way.
393  if (childExprIsStmt(Parent->ASTNode.get<Stmt>(),
394  OuterImplicit.ASTNode.get<Expr>()))
395  return false;
396 
397  // Disable extraction of full RHS on assignment operations, e.g:
398  // auto x = [[RHS_EXPR]];
399  // This would just result in duplicating the code.
400  if (const auto *BO = Parent->ASTNode.get<BinaryOperator>()) {
401  if (BO->isAssignmentOp() &&
402  BO->getRHS() == OuterImplicit.ASTNode.get<Expr>())
403  return false;
404  }
405 
406  return true;
407 }
408 
409 // Find the Expr node that we're going to extract.
410 // We don't want to trigger for assignment expressions and variable/field
411 // DeclRefs. For function/member function, we want to extract the entire
412 // function call.
413 const SelectionTree::Node *computeExtractedExpr(const SelectionTree::Node *N) {
414  if (!N)
415  return nullptr;
416  const SelectionTree::Node *TargetNode = N;
417  const clang::Expr *SelectedExpr = N->ASTNode.get<clang::Expr>();
418  if (!SelectedExpr)
419  return nullptr;
420  // For function and member function DeclRefs, extract the whole call.
421  if (llvm::isa<DeclRefExpr>(SelectedExpr) ||
422  llvm::isa<MemberExpr>(SelectedExpr))
423  if (const SelectionTree::Node *Call = getCallExpr(N))
424  TargetNode = Call;
425  // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful.
426  if (const BinaryOperator *BinOpExpr =
427  dyn_cast_or_null<BinaryOperator>(SelectedExpr)) {
428  if (BinOpExpr->getOpcode() == BinaryOperatorKind::BO_Assign)
429  return nullptr;
430  }
431  if (!TargetNode || !eligibleForExtraction(TargetNode))
432  return nullptr;
433  return TargetNode;
434 }
435 
436 /// Extracts an expression to the variable placeholder
437 /// Before:
438 /// int x = 5 + 4 * 3;
439 /// ^^^^^
440 /// After:
441 /// auto placeholder = 5 + 4;
442 /// int x = placeholder * 3;
443 class ExtractVariable : public Tweak {
444 public:
445  const char *id() const override final;
446  bool prepare(const Selection &Inputs) override;
447  Expected<Effect> apply(const Selection &Inputs) override;
448  std::string title() const override {
449  return "Extract subexpression to variable";
450  }
451  llvm::StringLiteral kind() const override {
452  return CodeAction::REFACTOR_KIND;
453  }
454 
455 private:
456  // the expression to extract
457  std::unique_ptr<ExtractionContext> Target;
458 };
459 REGISTER_TWEAK(ExtractVariable)
460 bool ExtractVariable::prepare(const Selection &Inputs) {
461  // we don't trigger on empty selections for now
462  if (Inputs.SelectionBegin == Inputs.SelectionEnd)
463  return false;
464  const ASTContext &Ctx = Inputs.AST->getASTContext();
465  // FIXME: Enable non-C++ cases once we start spelling types explicitly instead
466  // of making use of auto.
467  if (!Ctx.getLangOpts().CPlusPlus)
468  return false;
469  const SourceManager &SM = Inputs.AST->getSourceManager();
470  if (const SelectionTree::Node *N =
471  computeExtractedExpr(Inputs.ASTSelection.commonAncestor()))
472  Target = std::make_unique<ExtractionContext>(N, SM, Ctx);
473  return Target && Target->isExtractable();
474 }
475 
476 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) {
477  tooling::Replacements Result;
478  // FIXME: get variable name from user or suggest based on type
479  std::string VarName = "placeholder";
480  SourceRange Range = Target->getExtractionChars();
481  // insert new variable declaration
482  if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
483  return std::move(Err);
484  // replace expression with variable name
485  if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
486  return std::move(Err);
487  return Effect::mainFileEdit(Inputs.AST->getSourceManager(),
488  std::move(Result));
489 }
490 
491 } // namespace
492 } // namespace clangd
493 } // namespace clang
Range
CharSourceRange Range
SourceRange for the file name.
Definition: IncludeOrderCheck.cpp:38
Kind
BinaryOperatorKind Kind
Definition: ExtractVariable.cpp:203
Selection.h
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
clang::clangd::toHalfOpenFileRange
llvm::Optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Definition: SourceCode.cpp:424
Type
NodeType Type
Definition: HTMLGenerator.cpp:73
Expected
std::vector< const char * > Expected
Definition: PrintASTTests.cpp:27
InsertionPoint
SourceLocation InsertionPoint
Definition: ExtractFunction.cpp:350
Ctx
Context Ctx
Definition: TUScheduler.cpp:458
Outer
std::pair< Context, Canceler > Outer
Definition: CancellationTests.cpp:49
Target
std::string Target
Definition: QueryDriverDatabase.cpp:64
Protocol.h
Inputs
ParseInputs Inputs
Definition: TUScheduler.cpp:455
ExprLoc
SourceLocation ExprLoc
Definition: ExtractVariable.cpp:204
Decl
const FunctionDecl * Decl
Definition: AvoidBindCheck.cpp:100
Tweak.h
Inner
std::pair< Context, Canceler > Inner
Definition: CancellationTests.cpp:49
Logger.h
Parent
const Node * Parent
Definition: ExtractFunction.cpp:152
SourceCode.h
CE
CaptureExpr CE
Definition: AvoidBindCheck.cpp:67
DeclRef
const DeclRefExpr * DeclRef
Definition: UseAfterMoveCheck.cpp:50
SelectedOperands
llvm::SmallVector< const SelectionTree::Node * > SelectedOperands
Definition: ExtractVariable.cpp:205
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
SM
const SourceManager & SM
Definition: IncludeCleaner.cpp:108
clang::clangd::toSourceCode
llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R)
Returns the source code covered by the source range.
Definition: SourceCode.cpp:446
REGISTER_TWEAK
#define REGISTER_TWEAK(Subclass)
Definition: Tweak.h:132
ParsedAST.h