clang-tools  14.0.0git
ExtractFunction.cpp
Go to the documentation of this file.
1 //===--- ExtractFunction.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Extracts statements to a new function and replaces the statements with a
10 // call to the new function.
11 // Before:
12 // void f(int a) {
13 // [[if(a < 5)
14 // a = 5;]]
15 // }
16 // After:
17 // void extracted(int &a) {
18 // if(a < 5)
19 // a = 5;
20 // }
21 // void f(int a) {
22 // extracted(a);
23 // }
24 //
25 // - Only extract statements
26 // - Extracts from non-templated free functions only.
27 // - Parameters are const only if the declaration was const
28 // - Always passed by l-value reference
29 // - Void return type
30 // - Cannot extract declarations that will be needed in the original function
31 // after extraction.
32 // - Checks for broken control flow (break/continue without loop/switch)
33 //
34 // 1. ExtractFunction is the tweak subclass
35 // - Prepare does basic analysis of the selection and is therefore fast.
36 // Successful prepare doesn't always mean we can apply the tweak.
37 // - Apply does a more detailed analysis and can be slower. In case of
38 // failure, we let the user know that we are unable to perform extraction.
39 // 2. ExtractionZone store information about the range being extracted and the
40 // enclosing function.
41 // 3. NewFunction stores properties of the extracted function and provides
42 // methods for rendering it.
43 // 4. CapturedZoneInfo uses a RecursiveASTVisitor to capture information about
44 // the extraction like declarations, existing return statements, etc.
45 // 5. getExtractedFunction is responsible for analyzing the CapturedZoneInfo and
46 // creating a NewFunction.
47 //===----------------------------------------------------------------------===//
48 
49 #include "AST.h"
50 #include "FindTarget.h"
51 #include "ParsedAST.h"
52 #include "Selection.h"
53 #include "SourceCode.h"
54 #include "refactor/Tweak.h"
55 #include "support/Logger.h"
56 #include "clang/AST/ASTContext.h"
57 #include "clang/AST/Decl.h"
58 #include "clang/AST/DeclBase.h"
59 #include "clang/AST/DeclTemplate.h"
60 #include "clang/AST/RecursiveASTVisitor.h"
61 #include "clang/AST/Stmt.h"
62 #include "clang/Basic/LangOptions.h"
63 #include "clang/Basic/SourceLocation.h"
64 #include "clang/Basic/SourceManager.h"
65 #include "clang/Lex/Lexer.h"
66 #include "clang/Tooling/Core/Replacement.h"
67 #include "clang/Tooling/Refactoring/Extract/SourceExtraction.h"
68 #include "llvm/ADT/None.h"
69 #include "llvm/ADT/Optional.h"
70 #include "llvm/ADT/STLExtras.h"
71 #include "llvm/ADT/SmallSet.h"
72 #include "llvm/ADT/SmallVector.h"
73 #include "llvm/ADT/StringRef.h"
74 #include "llvm/ADT/iterator_range.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/Error.h"
77 
78 namespace clang {
79 namespace clangd {
80 namespace {
81 
82 using Node = SelectionTree::Node;
83 
84 // ExtractionZone is the part of code that is being extracted.
85 // EnclosingFunction is the function/method inside which the zone lies.
86 // We split the file into 4 parts relative to extraction zone.
87 enum class ZoneRelative {
88  Before, // Before Zone and inside EnclosingFunction.
89  Inside, // Inside Zone.
90  After, // After Zone and inside EnclosingFunction.
91  OutsideFunc // Outside EnclosingFunction.
92 };
93 
94 // A RootStmt is a statement that's fully selected including all it's children
95 // and it's parent is unselected.
96 // Check if a node is a root statement.
97 bool isRootStmt(const Node *N) {
98  if (!N->ASTNode.get<Stmt>())
99  return false;
100  // Root statement cannot be partially selected.
101  if (N->Selected == SelectionTree::Partial)
102  return false;
103  // Only DeclStmt can be an unselected RootStmt since VarDecls claim the entire
104  // selection range in selectionTree.
105  if (N->Selected == SelectionTree::Unselected && !N->ASTNode.get<DeclStmt>())
106  return false;
107  return true;
108 }
109 
110 // Returns the (unselected) parent of all RootStmts given the commonAncestor.
111 // Returns null if:
112 // 1. any node is partially selected
113 // 2. If all completely selected nodes don't have the same common parent
114 // 3. Any child of Parent isn't a RootStmt.
115 // Returns null if any child is not a RootStmt.
116 // We only support extraction of RootStmts since it allows us to extract without
117 // having to change the selection range. Also, this means that any scope that
118 // begins in selection range, ends in selection range and any scope that begins
119 // outside the selection range, ends outside as well.
120 const Node *getParentOfRootStmts(const Node *CommonAnc) {
121  if (!CommonAnc)
122  return nullptr;
123  const Node *Parent = nullptr;
124  switch (CommonAnc->Selected) {
125  case SelectionTree::Selection::Unselected:
126  // Typically a block, with the { and } unselected, could also be ForStmt etc
127  // Ensure all Children are RootStmts.
128  Parent = CommonAnc;
129  break;
130  case SelectionTree::Selection::Partial:
131  // Only a fully-selected single statement can be selected.
132  return nullptr;
133  case SelectionTree::Selection::Complete:
134  // If the Common Ancestor is completely selected, then it's a root statement
135  // and its parent will be unselected.
136  Parent = CommonAnc->Parent;
137  // If parent is a DeclStmt, even though it's unselected, we consider it a
138  // root statement and return its parent. This is done because the VarDecls
139  // claim the entire selection range of the Declaration and DeclStmt is
140  // always unselected.
141  if (Parent->ASTNode.get<DeclStmt>())
142  Parent = Parent->Parent;
143  break;
144  }
145  // Ensure all Children are RootStmts.
146  return llvm::all_of(Parent->Children, isRootStmt) ? Parent : nullptr;
147 }
148 
149 // The ExtractionZone class forms a view of the code wrt Zone.
150 struct ExtractionZone {
151  // Parent of RootStatements being extracted.
152  const Node *Parent = nullptr;
153  // The half-open file range of the code being extracted.
154  SourceRange ZoneRange;
155  // The function inside which our zone resides.
156  const FunctionDecl *EnclosingFunction = nullptr;
157  // The half-open file range of the enclosing function.
158  SourceRange EnclosingFuncRange;
159  // Set of statements that form the ExtractionZone.
160  llvm::DenseSet<const Stmt *> RootStmts;
161 
162  SourceLocation getInsertionPoint() const {
163  return EnclosingFuncRange.getBegin();
164  }
165  bool isRootStmt(const Stmt *S) const;
166  // The last root statement is important to decide where we need to insert a
167  // semicolon after the extraction.
168  const Node *getLastRootStmt() const { return Parent->Children.back(); }
169 
170  // Checks if declarations inside extraction zone are accessed afterwards.
171  //
172  // This performs a partial AST traversal proportional to the size of the
173  // enclosing function, so it is possibly expensive.
174  bool requiresHoisting(const SourceManager &SM,
175  const HeuristicResolver *Resolver) const {
176  // First find all the declarations that happened inside extraction zone.
177  llvm::SmallSet<const Decl *, 1> DeclsInExtZone;
178  for (auto *RootStmt : RootStmts) {
180  RootStmt,
181  [&DeclsInExtZone](const ReferenceLoc &Loc) {
182  if (!Loc.IsDecl)
183  return;
184  DeclsInExtZone.insert(Loc.Targets.front());
185  },
186  Resolver);
187  }
188  // Early exit without performing expensive traversal below.
189  if (DeclsInExtZone.empty())
190  return false;
191  // Then make sure they are not used outside the zone.
192  for (const auto *S : EnclosingFunction->getBody()->children()) {
193  if (SM.isBeforeInTranslationUnit(S->getSourceRange().getEnd(),
194  ZoneRange.getEnd()))
195  continue;
196  bool HasPostUse = false;
198  S,
199  [&](const ReferenceLoc &Loc) {
200  if (HasPostUse ||
201  SM.isBeforeInTranslationUnit(Loc.NameLoc, ZoneRange.getEnd()))
202  return;
203  HasPostUse = llvm::any_of(Loc.Targets,
204  [&DeclsInExtZone](const Decl *Target) {
205  return DeclsInExtZone.contains(Target);
206  });
207  },
208  Resolver);
209  if (HasPostUse)
210  return true;
211  }
212  return false;
213  }
214 };
215 
216 // Whether the code in the extraction zone is guaranteed to return, assuming
217 // no broken control flow (unbound break/continue).
218 // This is a very naive check (does it end with a return stmt).
219 // Doing some rudimentary control flow analysis would cover more cases.
220 bool alwaysReturns(const ExtractionZone &EZ) {
221  const Stmt *Last = EZ.getLastRootStmt()->ASTNode.get<Stmt>();
222  // Unwrap enclosing (unconditional) compound statement.
223  while (const auto *CS = llvm::dyn_cast<CompoundStmt>(Last)) {
224  if (CS->body_empty())
225  return false;
226  else
227  Last = CS->body_back();
228  }
229  return llvm::isa<ReturnStmt>(Last);
230 }
231 
232 bool ExtractionZone::isRootStmt(const Stmt *S) const {
233  return RootStmts.find(S) != RootStmts.end();
234 }
235 
236 // Finds the function in which the zone lies.
237 const FunctionDecl *findEnclosingFunction(const Node *CommonAnc) {
238  // Walk up the SelectionTree until we find a function Decl
239  for (const Node *CurNode = CommonAnc; CurNode; CurNode = CurNode->Parent) {
240  // Don't extract from lambdas
241  if (CurNode->ASTNode.get<LambdaExpr>())
242  return nullptr;
243  if (const FunctionDecl *Func = CurNode->ASTNode.get<FunctionDecl>()) {
244  // FIXME: Support extraction from methods.
245  if (isa<CXXMethodDecl>(Func))
246  return nullptr;
247  // FIXME: Support extraction from templated functions.
248  if (Func->isTemplated())
249  return nullptr;
250  return Func;
251  }
252  }
253  return nullptr;
254 }
255 
256 // Zone Range is the union of SourceRanges of all child Nodes in Parent since
257 // all child Nodes are RootStmts
258 llvm::Optional<SourceRange> findZoneRange(const Node *Parent,
259  const SourceManager &SM,
260  const LangOptions &LangOpts) {
261  SourceRange SR;
262  if (auto BeginFileRange = toHalfOpenFileRange(
263  SM, LangOpts, Parent->Children.front()->ASTNode.getSourceRange()))
264  SR.setBegin(BeginFileRange->getBegin());
265  else
266  return llvm::None;
267  if (auto EndFileRange = toHalfOpenFileRange(
268  SM, LangOpts, Parent->Children.back()->ASTNode.getSourceRange()))
269  SR.setEnd(EndFileRange->getEnd());
270  else
271  return llvm::None;
272  return SR;
273 }
274 
275 // Compute the range spanned by the enclosing function.
276 // FIXME: check if EnclosingFunction has any attributes as the AST doesn't
277 // always store the source range of the attributes and thus we end up extracting
278 // between the attributes and the EnclosingFunction.
279 llvm::Optional<SourceRange>
280 computeEnclosingFuncRange(const FunctionDecl *EnclosingFunction,
281  const SourceManager &SM,
282  const LangOptions &LangOpts) {
283  return toHalfOpenFileRange(SM, LangOpts, EnclosingFunction->getSourceRange());
284 }
285 
286 // returns true if Child can be a single RootStmt being extracted from
287 // EnclosingFunc.
288 bool validSingleChild(const Node *Child, const FunctionDecl *EnclosingFunc) {
289  // Don't extract expressions.
290  // FIXME: We should extract expressions that are "statements" i.e. not
291  // subexpressions
292  if (Child->ASTNode.get<Expr>())
293  return false;
294  // Extracting the body of EnclosingFunc would remove it's definition.
295  assert(EnclosingFunc->hasBody() &&
296  "We should always be extracting from a function body.");
297  if (Child->ASTNode.get<Stmt>() == EnclosingFunc->getBody())
298  return false;
299  return true;
300 }
301 
302 // FIXME: Check we're not extracting from the initializer/condition of a control
303 // flow structure.
304 llvm::Optional<ExtractionZone> findExtractionZone(const Node *CommonAnc,
305  const SourceManager &SM,
306  const LangOptions &LangOpts) {
307  ExtractionZone ExtZone;
308  ExtZone.Parent = getParentOfRootStmts(CommonAnc);
309  if (!ExtZone.Parent || ExtZone.Parent->Children.empty())
310  return llvm::None;
311  ExtZone.EnclosingFunction = findEnclosingFunction(ExtZone.Parent);
312  if (!ExtZone.EnclosingFunction)
313  return llvm::None;
314  // When there is a single RootStmt, we must check if it's valid for
315  // extraction.
316  if (ExtZone.Parent->Children.size() == 1 &&
317  !validSingleChild(ExtZone.getLastRootStmt(), ExtZone.EnclosingFunction))
318  return llvm::None;
319  if (auto FuncRange =
320  computeEnclosingFuncRange(ExtZone.EnclosingFunction, SM, LangOpts))
321  ExtZone.EnclosingFuncRange = *FuncRange;
322  if (auto ZoneRange = findZoneRange(ExtZone.Parent, SM, LangOpts))
323  ExtZone.ZoneRange = *ZoneRange;
324  if (ExtZone.EnclosingFuncRange.isInvalid() || ExtZone.ZoneRange.isInvalid())
325  return llvm::None;
326 
327  for (const Node *Child : ExtZone.Parent->Children)
328  ExtZone.RootStmts.insert(Child->ASTNode.get<Stmt>());
329 
330  return ExtZone;
331 }
332 
333 // Stores information about the extracted function and provides methods for
334 // rendering it.
335 struct NewFunction {
336  struct Parameter {
337  std::string Name;
338  QualType TypeInfo;
340  unsigned OrderPriority; // Lower value parameters are preferred first.
341  std::string render(const DeclContext *Context) const;
342  bool operator<(const Parameter &Other) const {
343  return OrderPriority < Other.OrderPriority;
344  }
345  };
346  std::string Name = "extracted";
347  QualType ReturnType;
348  std::vector<Parameter> Parameters;
349  SourceRange BodyRange;
350  SourceLocation InsertionPoint;
351  const DeclContext *EnclosingFuncContext;
352  bool CallerReturnsValue = false;
353  // Decides whether the extracted function body and the function call need a
354  // semicolon after extraction.
355  tooling::ExtractionSemicolonPolicy SemicolonPolicy;
356  NewFunction(tooling::ExtractionSemicolonPolicy SemicolonPolicy)
358  // Render the call for this function.
359  std::string renderCall() const;
360  // Render the definition for this function.
361  std::string renderDefinition(const SourceManager &SM) const;
362 
363 private:
364  std::string renderParametersForDefinition() const;
365  std::string renderParametersForCall() const;
366  // Generate the function body.
367  std::string getFuncBody(const SourceManager &SM) const;
368 };
369 
370 std::string NewFunction::renderParametersForDefinition() const {
371  std::string Result;
372  bool NeedCommaBefore = false;
373  for (const Parameter &P : Parameters) {
374  if (NeedCommaBefore)
375  Result += ", ";
376  NeedCommaBefore = true;
377  Result += P.render(EnclosingFuncContext);
378  }
379  return Result;
380 }
381 
382 std::string NewFunction::renderParametersForCall() const {
383  std::string Result;
384  bool NeedCommaBefore = false;
385  for (const Parameter &P : Parameters) {
386  if (NeedCommaBefore)
387  Result += ", ";
388  NeedCommaBefore = true;
389  Result += P.Name;
390  }
391  return Result;
392 }
393 
394 std::string NewFunction::renderCall() const {
395  return std::string(
396  llvm::formatv("{0}{1}({2}){3}", CallerReturnsValue ? "return " : "", Name,
397  renderParametersForCall(),
398  (SemicolonPolicy.isNeededInOriginalFunction() ? ";" : "")));
399 }
400 
401 std::string NewFunction::renderDefinition(const SourceManager &SM) const {
402  return std::string(llvm::formatv(
403  "{0} {1}({2}) {\n{3}\n}\n", printType(ReturnType, *EnclosingFuncContext),
404  Name, renderParametersForDefinition(), getFuncBody(SM)));
405 }
406 
407 std::string NewFunction::getFuncBody(const SourceManager &SM) const {
408  // FIXME: Generate tooling::Replacements instead of std::string to
409  // - hoist decls
410  // - add return statement
411  // - Add semicolon
412  return toSourceCode(SM, BodyRange).str() +
413  (SemicolonPolicy.isNeededInExtractedFunction() ? ";" : "");
414 }
415 
416 std::string NewFunction::Parameter::render(const DeclContext *Context) const {
417  return printType(TypeInfo, *Context) + (PassByReference ? " &" : " ") + Name;
418 }
419 
420 // Stores captured information about Extraction Zone.
421 struct CapturedZoneInfo {
422  struct DeclInformation {
423  const Decl *TheDecl;
424  ZoneRelative DeclaredIn;
425  // index of the declaration or first reference.
426  unsigned DeclIndex;
427  bool IsReferencedInZone = false;
429  // FIXME: Capture mutation information
430  DeclInformation(const Decl *TheDecl, ZoneRelative DeclaredIn,
431  unsigned DeclIndex)
433  // Marks the occurence of a reference for this declaration
434  void markOccurence(ZoneRelative ReferenceLoc);
435  };
436  // Maps Decls to their DeclInfo
437  llvm::DenseMap<const Decl *, DeclInformation> DeclInfoMap;
438  bool HasReturnStmt = false; // Are there any return statements in the zone?
439  bool AlwaysReturns = false; // Does the zone always return?
440  // Control flow is broken if we are extracting a break/continue without a
441  // corresponding parent loop/switch
442  bool BrokenControlFlow = false;
443  // FIXME: capture TypeAliasDecl and UsingDirectiveDecl
444  // FIXME: Capture type information as well.
445  DeclInformation *createDeclInfo(const Decl *D, ZoneRelative RelativeLoc);
446  DeclInformation *getDeclInfoFor(const Decl *D);
447 };
448 
449 CapturedZoneInfo::DeclInformation *
450 CapturedZoneInfo::createDeclInfo(const Decl *D, ZoneRelative RelativeLoc) {
451  // The new Decl's index is the size of the map so far.
452  auto InsertionResult = DeclInfoMap.insert(
453  {D, DeclInformation(D, RelativeLoc, DeclInfoMap.size())});
454  // Return the newly created DeclInfo
455  return &InsertionResult.first->second;
456 }
457 
458 CapturedZoneInfo::DeclInformation *
459 CapturedZoneInfo::getDeclInfoFor(const Decl *D) {
460  // If the Decl doesn't exist, we
461  auto Iter = DeclInfoMap.find(D);
462  if (Iter == DeclInfoMap.end())
463  return nullptr;
464  return &Iter->second;
465 }
466 
467 void CapturedZoneInfo::DeclInformation::markOccurence(
468  ZoneRelative ReferenceLoc) {
469  switch (ReferenceLoc) {
470  case ZoneRelative::Inside:
471  IsReferencedInZone = true;
472  break;
473  case ZoneRelative::After:
474  IsReferencedInPostZone = true;
475  break;
476  default:
477  break;
478  }
479 }
480 
481 bool isLoop(const Stmt *S) {
482  return isa<ForStmt>(S) || isa<DoStmt>(S) || isa<WhileStmt>(S) ||
483  isa<CXXForRangeStmt>(S);
484 }
485 
486 // Captures information from Extraction Zone
487 CapturedZoneInfo captureZoneInfo(const ExtractionZone &ExtZone) {
488  // We use the ASTVisitor instead of using the selection tree since we need to
489  // find references in the PostZone as well.
490  // FIXME: Check which statements we don't allow to extract.
491  class ExtractionZoneVisitor
492  : public clang::RecursiveASTVisitor<ExtractionZoneVisitor> {
493  public:
494  ExtractionZoneVisitor(const ExtractionZone &ExtZone) : ExtZone(ExtZone) {
495  TraverseDecl(const_cast<FunctionDecl *>(ExtZone.EnclosingFunction));
496  }
497 
498  bool TraverseStmt(Stmt *S) {
499  if (!S)
500  return true;
501  bool IsRootStmt = ExtZone.isRootStmt(const_cast<const Stmt *>(S));
502  // If we are starting traversal of a RootStmt, we are somewhere inside
503  // ExtractionZone
504  if (IsRootStmt)
505  CurrentLocation = ZoneRelative::Inside;
506  addToLoopSwitchCounters(S, 1);
507  // Traverse using base class's TraverseStmt
508  RecursiveASTVisitor::TraverseStmt(S);
509  addToLoopSwitchCounters(S, -1);
510  // We set the current location as after since next stmt will either be a
511  // RootStmt (handled at the beginning) or after extractionZone
512  if (IsRootStmt)
513  CurrentLocation = ZoneRelative::After;
514  return true;
515  }
516 
517  // Add Increment to CurNumberOf{Loops,Switch} if statement is
518  // {Loop,Switch} and inside Extraction Zone.
519  void addToLoopSwitchCounters(Stmt *S, int Increment) {
520  if (CurrentLocation != ZoneRelative::Inside)
521  return;
522  if (isLoop(S))
523  CurNumberOfNestedLoops += Increment;
524  else if (isa<SwitchStmt>(S))
525  CurNumberOfSwitch += Increment;
526  }
527 
528  bool VisitDecl(Decl *D) {
529  Info.createDeclInfo(D, CurrentLocation);
530  return true;
531  }
532 
533  bool VisitDeclRefExpr(DeclRefExpr *DRE) {
534  // Find the corresponding Decl and mark it's occurrence.
535  const Decl *D = DRE->getDecl();
536  auto *DeclInfo = Info.getDeclInfoFor(D);
537  // If no Decl was found, the Decl must be outside the enclosingFunc.
538  if (!DeclInfo)
539  DeclInfo = Info.createDeclInfo(D, ZoneRelative::OutsideFunc);
540  DeclInfo->markOccurence(CurrentLocation);
541  // FIXME: check if reference mutates the Decl being referred.
542  return true;
543  }
544 
545  bool VisitReturnStmt(ReturnStmt *Return) {
546  if (CurrentLocation == ZoneRelative::Inside)
547  Info.HasReturnStmt = true;
548  return true;
549  }
550 
551  bool VisitBreakStmt(BreakStmt *Break) {
552  // Control flow is broken if break statement is selected without any
553  // parent loop or switch statement.
554  if (CurrentLocation == ZoneRelative::Inside &&
555  !(CurNumberOfNestedLoops || CurNumberOfSwitch))
556  Info.BrokenControlFlow = true;
557  return true;
558  }
559 
560  bool VisitContinueStmt(ContinueStmt *Continue) {
561  // Control flow is broken if Continue statement is selected without any
562  // parent loop
563  if (CurrentLocation == ZoneRelative::Inside && !CurNumberOfNestedLoops)
564  Info.BrokenControlFlow = true;
565  return true;
566  }
567  CapturedZoneInfo Info;
568  const ExtractionZone &ExtZone;
569  ZoneRelative CurrentLocation = ZoneRelative::Before;
570  // Number of {loop,switch} statements that are currently in the traversal
571  // stack inside Extraction Zone. Used to check for broken control flow.
572  unsigned CurNumberOfNestedLoops = 0;
573  unsigned CurNumberOfSwitch = 0;
574  };
575  ExtractionZoneVisitor Visitor(ExtZone);
576  CapturedZoneInfo Result = std::move(Visitor.Info);
577  Result.AlwaysReturns = alwaysReturns(ExtZone);
578  return Result;
579 }
580 
581 // Adds parameters to ExtractedFunc.
582 // Returns true if able to find the parameters successfully and no hoisting
583 // needed.
584 // FIXME: Check if the declaration has a local/anonymous type
585 bool createParameters(NewFunction &ExtractedFunc,
586  const CapturedZoneInfo &CapturedInfo) {
587  for (const auto &KeyVal : CapturedInfo.DeclInfoMap) {
588  const auto &DeclInfo = KeyVal.second;
589  // If a Decl was Declared in zone and referenced in post zone, it
590  // needs to be hoisted (we bail out in that case).
591  // FIXME: Support Decl Hoisting.
592  if (DeclInfo.DeclaredIn == ZoneRelative::Inside &&
593  DeclInfo.IsReferencedInPostZone)
594  return false;
595  if (!DeclInfo.IsReferencedInZone)
596  continue; // no need to pass as parameter, not referenced
597  if (DeclInfo.DeclaredIn == ZoneRelative::Inside ||
598  DeclInfo.DeclaredIn == ZoneRelative::OutsideFunc)
599  continue; // no need to pass as parameter, still accessible.
600  // Parameter specific checks.
601  const ValueDecl *VD = dyn_cast_or_null<ValueDecl>(DeclInfo.TheDecl);
602  // Can't parameterise if the Decl isn't a ValueDecl or is a FunctionDecl
603  // (this includes the case of recursive call to EnclosingFunc in Zone).
604  if (!VD || isa<FunctionDecl>(DeclInfo.TheDecl))
605  return false;
606  // Parameter qualifiers are same as the Decl's qualifiers.
607  QualType TypeInfo = VD->getType().getNonReferenceType();
608  // FIXME: Need better qualifier checks: check mutated status for
609  // Decl(e.g. was it assigned, passed as nonconst argument, etc)
610  // FIXME: check if parameter will be a non l-value reference.
611  // FIXME: We don't want to always pass variables of types like int,
612  // pointers, etc by reference.
613  bool IsPassedByReference = true;
614  // We use the index of declaration as the ordering priority for parameters.
615  ExtractedFunc.Parameters.push_back({std::string(VD->getName()), TypeInfo,
616  IsPassedByReference,
617  DeclInfo.DeclIndex});
618  }
619  llvm::sort(ExtractedFunc.Parameters);
620  return true;
621 }
622 
623 // Clangd uses open ranges while ExtractionSemicolonPolicy (in Clang Tooling)
624 // uses closed ranges. Generates the semicolon policy for the extraction and
625 // extends the ZoneRange if necessary.
626 tooling::ExtractionSemicolonPolicy
627 getSemicolonPolicy(ExtractionZone &ExtZone, const SourceManager &SM,
628  const LangOptions &LangOpts) {
629  // Get closed ZoneRange.
630  SourceRange FuncBodyRange = {ExtZone.ZoneRange.getBegin(),
631  ExtZone.ZoneRange.getEnd().getLocWithOffset(-1)};
632  auto SemicolonPolicy = tooling::ExtractionSemicolonPolicy::compute(
633  ExtZone.getLastRootStmt()->ASTNode.get<Stmt>(), FuncBodyRange, SM,
634  LangOpts);
635  // Update ZoneRange.
636  ExtZone.ZoneRange.setEnd(FuncBodyRange.getEnd().getLocWithOffset(1));
637  return SemicolonPolicy;
638 }
639 
640 // Generate return type for ExtractedFunc. Return false if unable to do so.
641 bool generateReturnProperties(NewFunction &ExtractedFunc,
642  const FunctionDecl &EnclosingFunc,
643  const CapturedZoneInfo &CapturedInfo) {
644  // If the selected code always returns, we preserve those return statements.
645  // The return type should be the same as the enclosing function.
646  // (Others are possible if there are conversions, but this seems clearest).
647  if (CapturedInfo.HasReturnStmt) {
648  // If the return is conditional, neither replacing the code with
649  // `extracted()` nor `return extracted()` is correct.
650  if (!CapturedInfo.AlwaysReturns)
651  return false;
652  QualType Ret = EnclosingFunc.getReturnType();
653  // Once we support members, it'd be nice to support e.g. extracting a method
654  // of Foo<T> that returns T. But it's not clear when that's safe.
655  if (Ret->isDependentType())
656  return false;
657  ExtractedFunc.ReturnType = Ret;
658  return true;
659  }
660  // FIXME: Generate new return statement if needed.
661  ExtractedFunc.ReturnType = EnclosingFunc.getParentASTContext().VoidTy;
662  return true;
663 }
664 
665 // FIXME: add support for adding other function return types besides void.
666 // FIXME: assign the value returned by non void extracted function.
667 llvm::Expected<NewFunction> getExtractedFunction(ExtractionZone &ExtZone,
668  const SourceManager &SM,
669  const LangOptions &LangOpts) {
670  CapturedZoneInfo CapturedInfo = captureZoneInfo(ExtZone);
671  // Bail out if any break of continue exists
672  if (CapturedInfo.BrokenControlFlow)
673  return error("Cannot extract break/continue without corresponding "
674  "loop/switch statement.");
675  NewFunction ExtractedFunc(getSemicolonPolicy(ExtZone, SM, LangOpts));
676  ExtractedFunc.BodyRange = ExtZone.ZoneRange;
677  ExtractedFunc.InsertionPoint = ExtZone.getInsertionPoint();
678  ExtractedFunc.EnclosingFuncContext =
679  ExtZone.EnclosingFunction->getDeclContext();
680  ExtractedFunc.CallerReturnsValue = CapturedInfo.AlwaysReturns;
681  if (!createParameters(ExtractedFunc, CapturedInfo) ||
682  !generateReturnProperties(ExtractedFunc, *ExtZone.EnclosingFunction,
683  CapturedInfo))
684  return error("Too complex to extract.");
685  return ExtractedFunc;
686 }
687 
688 class ExtractFunction : public Tweak {
689 public:
690  const char *id() const override final;
691  bool prepare(const Selection &Inputs) override;
692  Expected<Effect> apply(const Selection &Inputs) override;
693  std::string title() const override { return "Extract to function"; }
694  llvm::StringLiteral kind() const override {
696  }
697 
698 private:
699  ExtractionZone ExtZone;
700 };
701 
702 REGISTER_TWEAK(ExtractFunction)
703 tooling::Replacement replaceWithFuncCall(const NewFunction &ExtractedFunc,
704  const SourceManager &SM,
705  const LangOptions &LangOpts) {
706  std::string FuncCall = ExtractedFunc.renderCall();
707  return tooling::Replacement(
708  SM, CharSourceRange(ExtractedFunc.BodyRange, false), FuncCall, LangOpts);
709 }
710 
711 tooling::Replacement createFunctionDefinition(const NewFunction &ExtractedFunc,
712  const SourceManager &SM) {
713  std::string FunctionDef = ExtractedFunc.renderDefinition(SM);
714  return tooling::Replacement(SM, ExtractedFunc.InsertionPoint, 0, FunctionDef);
715 }
716 
717 // Returns true if ExtZone contains any ReturnStmts.
718 bool hasReturnStmt(const ExtractionZone &ExtZone) {
719  class ReturnStmtVisitor
720  : public clang::RecursiveASTVisitor<ReturnStmtVisitor> {
721  public:
722  bool VisitReturnStmt(ReturnStmt *Return) {
723  Found = true;
724  return false; // We found the answer, abort the scan.
725  }
726  bool Found = false;
727  };
728 
729  ReturnStmtVisitor V;
730  for (const Stmt *RootStmt : ExtZone.RootStmts) {
731  V.TraverseStmt(const_cast<Stmt *>(RootStmt));
732  if (V.Found)
733  break;
734  }
735  return V.Found;
736 }
737 
738 bool ExtractFunction::prepare(const Selection &Inputs) {
739  const LangOptions &LangOpts = Inputs.AST->getLangOpts();
740  if (!LangOpts.CPlusPlus)
741  return false;
742  const Node *CommonAnc = Inputs.ASTSelection.commonAncestor();
743  const SourceManager &SM = Inputs.AST->getSourceManager();
744  auto MaybeExtZone = findExtractionZone(CommonAnc, SM, LangOpts);
745  if (!MaybeExtZone ||
746  (hasReturnStmt(*MaybeExtZone) && !alwaysReturns(*MaybeExtZone)))
747  return false;
748 
749  // FIXME: Get rid of this check once we support hoisting.
750  if (MaybeExtZone->requiresHoisting(SM, Inputs.AST->getHeuristicResolver()))
751  return false;
752 
753  ExtZone = std::move(*MaybeExtZone);
754  return true;
755 }
756 
757 Expected<Tweak::Effect> ExtractFunction::apply(const Selection &Inputs) {
758  const SourceManager &SM = Inputs.AST->getSourceManager();
759  const LangOptions &LangOpts = Inputs.AST->getLangOpts();
760  auto ExtractedFunc = getExtractedFunction(ExtZone, SM, LangOpts);
761  // FIXME: Add more types of errors.
762  if (!ExtractedFunc)
763  return ExtractedFunc.takeError();
764  tooling::Replacements Result;
765  if (auto Err = Result.add(createFunctionDefinition(*ExtractedFunc, SM)))
766  return std::move(Err);
767  if (auto Err = Result.add(replaceWithFuncCall(*ExtractedFunc, SM, LangOpts)))
768  return std::move(Err);
769  return Effect::mainFileEdit(SM, std::move(Result));
770 }
771 
772 } // namespace
773 } // namespace clangd
774 } // namespace clang
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:45
TheDecl
const Decl * TheDecl
Definition: ExtractFunction.cpp:423
DeclInfoMap
llvm::DenseMap< const Decl *, DeclInformation > DeclInfoMap
Definition: ExtractFunction.cpp:437
Selection.h
clang::clangd::toHalfOpenFileRange
llvm::Optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
Definition: SourceCode.cpp:424
BodyRange
SourceRange BodyRange
Definition: ExtractFunction.cpp:349
Expected
std::vector< const char * > Expected
Definition: PrintASTTests.cpp:27
clang::clangd::error
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:80
InsertionPoint
SourceLocation InsertionPoint
Definition: ExtractFunction.cpp:350
EnclosingFuncContext
const DeclContext * EnclosingFuncContext
Definition: ExtractFunction.cpp:351
HasReturnStmt
bool HasReturnStmt
Definition: ExtractFunction.cpp:438
PassByReference
bool PassByReference
Definition: ExtractFunction.cpp:339
FindTarget.h
Target
std::string Target
Definition: QueryDriverDatabase.cpp:64
RootStmts
llvm::DenseSet< const Stmt * > RootStmts
Definition: ExtractFunction.cpp:160
Inputs
ParseInputs Inputs
Definition: TUScheduler.cpp:455
TypeInfo
QualType TypeInfo
Definition: ExtractFunction.cpp:338
OrderPriority
unsigned OrderPriority
Definition: ExtractFunction.cpp:340
DeclIndex
unsigned DeclIndex
Definition: ExtractFunction.cpp:426
ns1::ns2::D
@ D
Definition: CategoricalFeature.h:3
Name
std::string Name
Definition: ExtractFunction.cpp:337
Decl
const FunctionDecl * Decl
Definition: AvoidBindCheck.cpp:100
ReturnType
QualType ReturnType
Definition: ExtractFunction.cpp:347
Tweak.h
EnclosingFuncRange
SourceRange EnclosingFuncRange
Definition: ExtractFunction.cpp:158
Parameters
std::vector< Parameter > Parameters
Definition: ExtractFunction.cpp:348
Logger.h
DeclaredIn
ZoneRelative DeclaredIn
Definition: ExtractFunction.cpp:424
Parent
const Node * Parent
Definition: ExtractFunction.cpp:152
SemicolonPolicy
tooling::ExtractionSemicolonPolicy SemicolonPolicy
Definition: ExtractFunction.cpp:355
SourceCode.h
IsReferencedInZone
bool IsReferencedInZone
Definition: ExtractFunction.cpp:427
clang::clangd::HighlightingKind::Parameter
@ Parameter
Info
FunctionInfo Info
Definition: FunctionSizeCheck.cpp:120
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::CodeAction::REFACTOR_KIND
const static llvm::StringLiteral REFACTOR_KIND
Definition: Protocol.h:980
IsReferencedInPostZone
bool IsReferencedInPostZone
Definition: ExtractFunction.cpp:428
ZoneRange
SourceRange ZoneRange
Definition: ExtractFunction.cpp:154
clang::clangd::operator<
bool operator<(const Ref &L, const Ref &R)
Definition: Ref.h:97
EnclosingFunction
const FunctionDecl * EnclosingFunction
Definition: ExtractFunction.cpp:156
SM
const SourceManager & SM
Definition: IncludeCleaner.cpp:108
clang::clangd::SelectionTree::Unselected
@ Unselected
Definition: Selection.h:114
clang::clangd::SelectionTree::Partial
@ Partial
Definition: Selection.h:116
AlwaysReturns
bool AlwaysReturns
Definition: ExtractFunction.cpp:439
clang::clangd::findExplicitReferences
void findExplicitReferences(const Stmt *S, llvm::function_ref< void(ReferenceLoc)> Out, const HeuristicResolver *Resolver)
Recursively traverse S and report all references explicitly written in the code.
Definition: FindTarget.cpp:1115
clang::clangd::toSourceCode
llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R)
Returns the source code covered by the source range.
Definition: SourceCode.cpp:446
BrokenControlFlow
bool BrokenControlFlow
Definition: ExtractFunction.cpp:442
clang::clangd::printType
std::string printType(const QualType QT, const DeclContext &CurContext)
Returns a QualType as string.
Definition: AST.cpp:354
CallerReturnsValue
bool CallerReturnsValue
Definition: ExtractFunction.cpp:352
REGISTER_TWEAK
#define REGISTER_TWEAK(Subclass)
Definition: Tweak.h:132
AST.h
ParsedAST.h