clang-tools 22.0.0git
ExtractFunction.cpp
Go to the documentation of this file.
1//===--- ExtractFunction.cpp -------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Extracts statements to a new function and replaces the statements with a
10// call to the new function.
11// Before:
12// void f(int a) {
13// [[if(a < 5)
14// a = 5;]]
15// }
16// After:
17// void extracted(int &a) {
18// if(a < 5)
19// a = 5;
20// }
21// void f(int a) {
22// extracted(a);
23// }
24//
25// - Only extract statements
26// - Extracts from non-templated free functions only.
27// - Parameters are const only if the declaration was const
28// - Always passed by l-value reference
29// - Void return type
30// - Cannot extract declarations that will be needed in the original function
31// after extraction.
32// - Checks for broken control flow (break/continue without loop/switch)
33//
34// 1. ExtractFunction is the tweak subclass
35// - Prepare does basic analysis of the selection and is therefore fast.
36// Successful prepare doesn't always mean we can apply the tweak.
37// - Apply does a more detailed analysis and can be slower. In case of
38// failure, we let the user know that we are unable to perform extraction.
39// 2. ExtractionZone store information about the range being extracted and the
40// enclosing function.
41// 3. NewFunction stores properties of the extracted function and provides
42// methods for rendering it.
43// 4. CapturedZoneInfo uses a RecursiveASTVisitor to capture information about
44// the extraction like declarations, existing return statements, etc.
45// 5. getExtractedFunction is responsible for analyzing the CapturedZoneInfo and
46// creating a NewFunction.
47//===----------------------------------------------------------------------===//
48
49#include "AST.h"
50#include "FindTarget.h"
51#include "ParsedAST.h"
52#include "Selection.h"
53#include "SourceCode.h"
54#include "refactor/Tweak.h"
55#include "support/Logger.h"
56#include "clang/AST/ASTContext.h"
57#include "clang/AST/Decl.h"
58#include "clang/AST/DeclBase.h"
59#include "clang/AST/ExprCXX.h"
60#include "clang/AST/NestedNameSpecifier.h"
61#include "clang/AST/RecursiveASTVisitor.h"
62#include "clang/AST/Stmt.h"
63#include "clang/Basic/LangOptions.h"
64#include "clang/Basic/SourceLocation.h"
65#include "clang/Basic/SourceManager.h"
66#include "clang/Tooling/Core/Replacement.h"
67#include "clang/Tooling/Refactoring/Extract/SourceExtraction.h"
68#include "llvm/ADT/STLExtras.h"
69#include "llvm/ADT/SmallSet.h"
70#include "llvm/ADT/SmallVector.h"
71#include "llvm/ADT/StringRef.h"
72#include "llvm/Support/Casting.h"
73#include "llvm/Support/Error.h"
74#include <optional>
75
76namespace clang {
77namespace clangd {
78namespace {
79
80using Node = SelectionTree::Node;
81
82// ExtractionZone is the part of code that is being extracted.
83// EnclosingFunction is the function/method inside which the zone lies.
84// We split the file into 4 parts relative to extraction zone.
85enum class ZoneRelative {
86 Before, // Before Zone and inside EnclosingFunction.
87 Inside, // Inside Zone.
88 After, // After Zone and inside EnclosingFunction.
89 OutsideFunc // Outside EnclosingFunction.
90};
91
92enum FunctionDeclKind {
93 InlineDefinition,
94 ForwardDeclaration,
95 OutOfLineDefinition
96};
97
98// A RootStmt is a statement that's fully selected including all its children
99// and its parent is unselected.
100// Check if a node is a root statement.
101bool isRootStmt(const Node *N) {
102 if (!N->ASTNode.get<Stmt>())
103 return false;
104 // Root statement cannot be partially selected.
105 if (N->Selected == SelectionTree::Partial)
106 return false;
107 // A DeclStmt can be an unselected RootStmt since VarDecls claim the entire
108 // selection range in selectionTree. Additionally, a CXXOperatorCallExpr of a
109 // binary operation can be unselected because its children claim the entire
110 // selection range in the selection tree (e.g. <<).
111 if (N->Selected == SelectionTree::Unselected && !N->ASTNode.get<DeclStmt>() &&
112 !N->ASTNode.get<CXXOperatorCallExpr>())
113 return false;
114 return true;
115}
116
117// Returns the (unselected) parent of all RootStmts given the commonAncestor.
118// Returns null if:
119// 1. any node is partially selected
120// 2. If all completely selected nodes don't have the same common parent
121// 3. Any child of Parent isn't a RootStmt.
122// Returns null if any child is not a RootStmt.
123// We only support extraction of RootStmts since it allows us to extract without
124// having to change the selection range. Also, this means that any scope that
125// begins in selection range, ends in selection range and any scope that begins
126// outside the selection range, ends outside as well.
127const Node *getParentOfRootStmts(const Node *CommonAnc) {
128 if (!CommonAnc)
129 return nullptr;
130 const Node *Parent = nullptr;
131 switch (CommonAnc->Selected) {
133 // Typically a block, with the { and } unselected, could also be ForStmt etc
134 // Ensure all Children are RootStmts.
135 Parent = CommonAnc;
136 break;
138 // Only a fully-selected single statement can be selected.
139 return nullptr;
141 // If the Common Ancestor is completely selected, then it's a root statement
142 // and its parent will be unselected.
143 Parent = CommonAnc->Parent;
144 // If parent is a DeclStmt, even though it's unselected, we consider it a
145 // root statement and return its parent. This is done because the VarDecls
146 // claim the entire selection range of the Declaration and DeclStmt is
147 // always unselected.
148 if (Parent->ASTNode.get<DeclStmt>())
149 Parent = Parent->Parent;
150 break;
151 }
152 // Ensure all Children are RootStmts.
153 return llvm::all_of(Parent->Children, isRootStmt) ? Parent : nullptr;
154}
155
156// The ExtractionZone class forms a view of the code wrt Zone.
157struct ExtractionZone {
158 // Parent of RootStatements being extracted.
159 const Node *Parent = nullptr;
160 // The half-open file range of the code being extracted.
161 SourceRange ZoneRange;
162 // The function inside which our zone resides.
163 const FunctionDecl *EnclosingFunction = nullptr;
164 // The half-open file range of the enclosing function.
165 SourceRange EnclosingFuncRange;
166 // Set of statements that form the ExtractionZone.
167 llvm::DenseSet<const Stmt *> RootStmts;
168
169 SourceLocation getInsertionPoint() const {
170 return EnclosingFuncRange.getBegin();
171 }
172 bool isRootStmt(const Stmt *S) const;
173 // The last root statement is important to decide where we need to insert a
174 // semicolon after the extraction.
175 const Node *getLastRootStmt() const { return Parent->Children.back(); }
176
177 // Checks if declarations inside extraction zone are accessed afterwards.
178 //
179 // This performs a partial AST traversal proportional to the size of the
180 // enclosing function, so it is possibly expensive.
181 bool requiresHoisting(const SourceManager &SM,
182 const HeuristicResolver *Resolver) const {
183 // First find all the declarations that happened inside extraction zone.
184 llvm::SmallPtrSet<const Decl *, 1> DeclsInExtZone;
185 for (auto *RootStmt : RootStmts) {
187 RootStmt,
188 [&DeclsInExtZone](const ReferenceLoc &Loc) {
189 if (!Loc.IsDecl)
190 return;
191 DeclsInExtZone.insert(Loc.Targets.front());
192 },
193 Resolver);
194 }
195 // Early exit without performing expensive traversal below.
196 if (DeclsInExtZone.empty())
197 return false;
198 // Then make sure they are not used outside the zone.
199 for (const auto *S : EnclosingFunction->getBody()->children()) {
200 if (SM.isBeforeInTranslationUnit(S->getSourceRange().getEnd(),
201 ZoneRange.getEnd()))
202 continue;
203 bool HasPostUse = false;
205 S,
206 [&](const ReferenceLoc &Loc) {
207 if (HasPostUse ||
208 SM.isBeforeInTranslationUnit(Loc.NameLoc, ZoneRange.getEnd()))
209 return;
210 HasPostUse = llvm::any_of(Loc.Targets,
211 [&DeclsInExtZone](const Decl *Target) {
212 return DeclsInExtZone.contains(Target);
213 });
214 },
215 Resolver);
216 if (HasPostUse)
217 return true;
218 }
219 return false;
220 }
221};
222
223// Whether the code in the extraction zone is guaranteed to return, assuming
224// no broken control flow (unbound break/continue).
225// This is a very naive check (does it end with a return stmt).
226// Doing some rudimentary control flow analysis would cover more cases.
227bool alwaysReturns(const ExtractionZone &EZ) {
228 const Stmt *Last = EZ.getLastRootStmt()->ASTNode.get<Stmt>();
229 // Unwrap enclosing (unconditional) compound statement.
230 while (const auto *CS = llvm::dyn_cast<CompoundStmt>(Last)) {
231 if (CS->body_empty())
232 return false;
233 Last = CS->body_back();
234 }
235 return llvm::isa<ReturnStmt>(Last);
236}
237
238bool ExtractionZone::isRootStmt(const Stmt *S) const {
239 return RootStmts.contains(S);
240}
241
242// Finds the function in which the zone lies.
243const FunctionDecl *findEnclosingFunction(const Node *CommonAnc) {
244 // Walk up the SelectionTree until we find a function Decl
245 for (const Node *CurNode = CommonAnc; CurNode; CurNode = CurNode->Parent) {
246 // Don't extract from lambdas
247 if (CurNode->ASTNode.get<LambdaExpr>())
248 return nullptr;
249 if (const FunctionDecl *Func = CurNode->ASTNode.get<FunctionDecl>()) {
250 // FIXME: Support extraction from templated functions.
251 if (Func->isTemplated())
252 return nullptr;
253 if (!Func->getBody())
254 return nullptr;
255 for (const auto *S : Func->getBody()->children()) {
256 // During apply phase, we perform semantic analysis (e.g. figure out
257 // what variables requires hoisting). We cannot perform those when the
258 // body has invalid statements, so fail up front.
259 if (!S)
260 return nullptr;
261 }
262 return Func;
263 }
264 }
265 return nullptr;
266}
267
268// Zone Range is the union of SourceRanges of all child Nodes in Parent since
269// all child Nodes are RootStmts
270std::optional<SourceRange> findZoneRange(const Node *Parent,
271 const SourceManager &SM,
272 const LangOptions &LangOpts) {
273 SourceRange SR;
274 if (auto BeginFileRange = toHalfOpenFileRange(
275 SM, LangOpts, Parent->Children.front()->ASTNode.getSourceRange()))
276 SR.setBegin(BeginFileRange->getBegin());
277 else
278 return std::nullopt;
279 if (auto EndFileRange = toHalfOpenFileRange(
280 SM, LangOpts, Parent->Children.back()->ASTNode.getSourceRange()))
281 SR.setEnd(EndFileRange->getEnd());
282 else
283 return std::nullopt;
284 return SR;
285}
286
287// Compute the range spanned by the enclosing function.
288// FIXME: check if EnclosingFunction has any attributes as the AST doesn't
289// always store the source range of the attributes and thus we end up extracting
290// between the attributes and the EnclosingFunction.
291std::optional<SourceRange>
292computeEnclosingFuncRange(const FunctionDecl *EnclosingFunction,
293 const SourceManager &SM,
294 const LangOptions &LangOpts) {
295 return toHalfOpenFileRange(SM, LangOpts, EnclosingFunction->getSourceRange());
296}
297
298// returns true if Child can be a single RootStmt being extracted from
299// EnclosingFunc.
300bool validSingleChild(const Node *Child, const FunctionDecl *EnclosingFunc) {
301 // Don't extract expressions.
302 // FIXME: We should extract expressions that are "statements" i.e. not
303 // subexpressions
304 if (Child->ASTNode.get<Expr>())
305 return false;
306 // Extracting the body of EnclosingFunc would remove it's definition.
307 assert(EnclosingFunc->hasBody() &&
308 "We should always be extracting from a function body.");
309 if (Child->ASTNode.get<Stmt>() == EnclosingFunc->getBody())
310 return false;
311 return true;
312}
313
314// FIXME: Check we're not extracting from the initializer/condition of a control
315// flow structure.
316std::optional<ExtractionZone> findExtractionZone(const Node *CommonAnc,
317 const SourceManager &SM,
318 const LangOptions &LangOpts) {
319 ExtractionZone ExtZone;
320 ExtZone.Parent = getParentOfRootStmts(CommonAnc);
321 if (!ExtZone.Parent || ExtZone.Parent->Children.empty())
322 return std::nullopt;
323 ExtZone.EnclosingFunction = findEnclosingFunction(ExtZone.Parent);
324 if (!ExtZone.EnclosingFunction)
325 return std::nullopt;
326 // When there is a single RootStmt, we must check if it's valid for
327 // extraction.
328 if (ExtZone.Parent->Children.size() == 1 &&
329 !validSingleChild(ExtZone.getLastRootStmt(), ExtZone.EnclosingFunction))
330 return std::nullopt;
331 if (auto FuncRange =
332 computeEnclosingFuncRange(ExtZone.EnclosingFunction, SM, LangOpts))
333 ExtZone.EnclosingFuncRange = *FuncRange;
334 if (auto ZoneRange = findZoneRange(ExtZone.Parent, SM, LangOpts))
335 ExtZone.ZoneRange = *ZoneRange;
336 if (ExtZone.EnclosingFuncRange.isInvalid() || ExtZone.ZoneRange.isInvalid())
337 return std::nullopt;
338
339 for (const Node *Child : ExtZone.Parent->Children)
340 ExtZone.RootStmts.insert(Child->ASTNode.get<Stmt>());
341
342 return ExtZone;
343}
344
345// Stores information about the extracted function and provides methods for
346// rendering it.
347struct NewFunction {
348 struct Parameter {
349 std::string Name;
350 QualType TypeInfo;
351 bool PassByReference;
352 unsigned OrderPriority; // Lower value parameters are preferred first.
353 std::string render(const DeclContext *Context) const;
354 bool operator<(const Parameter &Other) const {
355 return OrderPriority < Other.OrderPriority;
356 }
357 };
358 std::string Name = "extracted";
359 QualType ReturnType;
360 std::vector<Parameter> Parameters;
361 SourceRange BodyRange;
362 SourceLocation DefinitionPoint;
363 std::optional<SourceLocation> ForwardDeclarationPoint;
364 const CXXRecordDecl *EnclosingClass = nullptr;
365 NestedNameSpecifier DefinitionQualifier = std::nullopt;
366 const DeclContext *SemanticDC = nullptr;
367 const DeclContext *SyntacticDC = nullptr;
368 const DeclContext *ForwardDeclarationSyntacticDC = nullptr;
369 bool CallerReturnsValue = false;
370 bool Static = false;
371 ConstexprSpecKind Constexpr = ConstexprSpecKind::Unspecified;
372 bool Const = false;
373
374 // Decides whether the extracted function body and the function call need a
375 // semicolon after extraction.
376 tooling::ExtractionSemicolonPolicy SemicolonPolicy;
377 const LangOptions *LangOpts;
378 NewFunction(tooling::ExtractionSemicolonPolicy SemicolonPolicy,
379 const LangOptions *LangOpts)
380 : SemicolonPolicy(SemicolonPolicy), LangOpts(LangOpts) {}
381 // Render the call for this function.
382 std::string renderCall() const;
383 // Render the definition for this function.
384 std::string renderDeclaration(FunctionDeclKind K,
385 const DeclContext &SemanticDC,
386 const DeclContext &SyntacticDC,
387 const SourceManager &SM) const;
388
389private:
390 std::string
391 renderParametersForDeclaration(const DeclContext &Enclosing) const;
392 std::string renderParametersForCall() const;
393 std::string renderSpecifiers(FunctionDeclKind K) const;
394 std::string renderQualifiers() const;
395 std::string renderDeclarationName(FunctionDeclKind K) const;
396 // Generate the function body.
397 std::string getFuncBody(const SourceManager &SM) const;
398};
399
400std::string NewFunction::renderParametersForDeclaration(
401 const DeclContext &Enclosing) const {
402 std::string Result;
403 bool NeedCommaBefore = false;
404 for (const Parameter &P : Parameters) {
405 if (NeedCommaBefore)
406 Result += ", ";
407 NeedCommaBefore = true;
408 Result += P.render(&Enclosing);
409 }
410 return Result;
411}
412
413std::string NewFunction::renderParametersForCall() const {
414 std::string Result;
415 bool NeedCommaBefore = false;
416 for (const Parameter &P : Parameters) {
417 if (NeedCommaBefore)
418 Result += ", ";
419 NeedCommaBefore = true;
420 Result += P.Name;
421 }
422 return Result;
423}
424
425std::string NewFunction::renderSpecifiers(FunctionDeclKind K) const {
426 std::string Attributes;
427
428 if (Static && K != FunctionDeclKind::OutOfLineDefinition) {
429 Attributes += "static ";
430 }
431
432 switch (Constexpr) {
433 case ConstexprSpecKind::Unspecified:
434 case ConstexprSpecKind::Constinit:
435 break;
436 case ConstexprSpecKind::Constexpr:
437 Attributes += "constexpr ";
438 break;
439 case ConstexprSpecKind::Consteval:
440 Attributes += "consteval ";
441 break;
442 }
443
444 return Attributes;
445}
446
447std::string NewFunction::renderQualifiers() const {
448 std::string Attributes;
449
450 if (Const) {
451 Attributes += " const";
452 }
453
454 return Attributes;
455}
456
457std::string NewFunction::renderDeclarationName(FunctionDeclKind K) const {
458 if (!DefinitionQualifier || K != OutOfLineDefinition)
459 return Name;
460
461 std::string QualifierName;
462 llvm::raw_string_ostream Oss(QualifierName);
463 DefinitionQualifier.print(Oss, *LangOpts);
464 return llvm::formatv("{0}{1}", QualifierName, Name);
465}
466
467std::string NewFunction::renderCall() const {
468 return std::string(
469 llvm::formatv("{0}{1}({2}){3}", CallerReturnsValue ? "return " : "", Name,
470 renderParametersForCall(),
471 (SemicolonPolicy.isNeededInOriginalFunction() ? ";" : "")));
472}
473
474std::string NewFunction::renderDeclaration(FunctionDeclKind K,
475 const DeclContext &SemanticDC,
476 const DeclContext &SyntacticDC,
477 const SourceManager &SM) const {
478 std::string Declaration = std::string(llvm::formatv(
479 "{0}{1} {2}({3}){4}", renderSpecifiers(K),
480 printType(ReturnType, SyntacticDC), renderDeclarationName(K),
481 renderParametersForDeclaration(SemanticDC), renderQualifiers()));
482
483 switch (K) {
484 case ForwardDeclaration:
485 return std::string(llvm::formatv("{0};\n", Declaration));
486 case OutOfLineDefinition:
487 case InlineDefinition:
488 return std::string(
489 llvm::formatv("{0} {\n{1}\n}\n", Declaration, getFuncBody(SM)));
490 break;
491 }
492 llvm_unreachable("Unsupported FunctionDeclKind enum");
493}
494
495std::string NewFunction::getFuncBody(const SourceManager &SM) const {
496 // FIXME: Generate tooling::Replacements instead of std::string to
497 // - hoist decls
498 // - add return statement
499 // - Add semicolon
500 return toSourceCode(SM, BodyRange).str() +
501 (SemicolonPolicy.isNeededInExtractedFunction() ? ";" : "");
502}
503
504std::string NewFunction::Parameter::render(const DeclContext *Context) const {
505 return printType(TypeInfo, *Context) + (PassByReference ? " &" : " ") + Name;
506}
507
508// Stores captured information about Extraction Zone.
509struct CapturedZoneInfo {
510 struct DeclInformation {
511 const Decl *TheDecl;
512 ZoneRelative DeclaredIn;
513 // index of the declaration or first reference.
514 unsigned DeclIndex;
515 bool IsReferencedInZone = false;
516 bool IsReferencedInPostZone = false;
517 // FIXME: Capture mutation information
518 DeclInformation(const Decl *TheDecl, ZoneRelative DeclaredIn,
519 unsigned DeclIndex)
520 : TheDecl(TheDecl), DeclaredIn(DeclaredIn), DeclIndex(DeclIndex){};
521 // Marks the occurence of a reference for this declaration
522 void markOccurence(ZoneRelative ReferenceLoc);
523 };
524 // Maps Decls to their DeclInfo
525 llvm::DenseMap<const Decl *, DeclInformation> DeclInfoMap;
526 bool HasReturnStmt = false; // Are there any return statements in the zone?
527 bool AlwaysReturns = false; // Does the zone always return?
528 // Control flow is broken if we are extracting a break/continue without a
529 // corresponding parent loop/switch
530 bool BrokenControlFlow = false;
531 // FIXME: capture TypeAliasDecl and UsingDirectiveDecl
532 // FIXME: Capture type information as well.
533 DeclInformation *createDeclInfo(const Decl *D, ZoneRelative RelativeLoc);
534 DeclInformation *getDeclInfoFor(const Decl *D);
535};
536
537CapturedZoneInfo::DeclInformation *
538CapturedZoneInfo::createDeclInfo(const Decl *D, ZoneRelative RelativeLoc) {
539 // The new Decl's index is the size of the map so far.
540 auto InsertionResult = DeclInfoMap.insert(
541 {D, DeclInformation(D, RelativeLoc, DeclInfoMap.size())});
542 // Return the newly created DeclInfo
543 return &InsertionResult.first->second;
544}
545
546CapturedZoneInfo::DeclInformation *
547CapturedZoneInfo::getDeclInfoFor(const Decl *D) {
548 // If the Decl doesn't exist, we
549 auto Iter = DeclInfoMap.find(D);
550 if (Iter == DeclInfoMap.end())
551 return nullptr;
552 return &Iter->second;
553}
554
555void CapturedZoneInfo::DeclInformation::markOccurence(
556 ZoneRelative ReferenceLoc) {
557 switch (ReferenceLoc) {
558 case ZoneRelative::Inside:
559 IsReferencedInZone = true;
560 break;
561 case ZoneRelative::After:
562 IsReferencedInPostZone = true;
563 break;
564 default:
565 break;
566 }
567}
568
569bool isLoop(const Stmt *S) {
570 return isa<ForStmt>(S) || isa<DoStmt>(S) || isa<WhileStmt>(S) ||
571 isa<CXXForRangeStmt>(S);
572}
573
574// Captures information from Extraction Zone
575CapturedZoneInfo captureZoneInfo(const ExtractionZone &ExtZone) {
576 // We use the ASTVisitor instead of using the selection tree since we need to
577 // find references in the PostZone as well.
578 // FIXME: Check which statements we don't allow to extract.
579 class ExtractionZoneVisitor
580 : public clang::RecursiveASTVisitor<ExtractionZoneVisitor> {
581 public:
582 ExtractionZoneVisitor(const ExtractionZone &ExtZone) : ExtZone(ExtZone) {
583 TraverseDecl(const_cast<FunctionDecl *>(ExtZone.EnclosingFunction));
584 }
585
586 bool TraverseStmt(Stmt *S) {
587 if (!S)
588 return true;
589 bool IsRootStmt = ExtZone.isRootStmt(const_cast<const Stmt *>(S));
590 // If we are starting traversal of a RootStmt, we are somewhere inside
591 // ExtractionZone
592 if (IsRootStmt)
593 CurrentLocation = ZoneRelative::Inside;
594 addToLoopSwitchCounters(S, 1);
595 // Traverse using base class's TraverseStmt
596 RecursiveASTVisitor::TraverseStmt(S);
597 addToLoopSwitchCounters(S, -1);
598 // We set the current location as after since next stmt will either be a
599 // RootStmt (handled at the beginning) or after extractionZone
600 if (IsRootStmt)
601 CurrentLocation = ZoneRelative::After;
602 return true;
603 }
604
605 // Add Increment to CurNumberOf{Loops,Switch} if statement is
606 // {Loop,Switch} and inside Extraction Zone.
607 void addToLoopSwitchCounters(Stmt *S, int Increment) {
608 if (CurrentLocation != ZoneRelative::Inside)
609 return;
610 if (isLoop(S))
611 CurNumberOfNestedLoops += Increment;
612 else if (isa<SwitchStmt>(S))
613 CurNumberOfSwitch += Increment;
614 }
615
616 bool VisitDecl(Decl *D) {
617 Info.createDeclInfo(D, CurrentLocation);
618 return true;
619 }
620
621 bool VisitDeclRefExpr(DeclRefExpr *DRE) {
622 // Find the corresponding Decl and mark it's occurrence.
623 const Decl *D = DRE->getDecl();
624 auto *DeclInfo = Info.getDeclInfoFor(D);
625 // If no Decl was found, the Decl must be outside the enclosingFunc.
626 if (!DeclInfo)
627 DeclInfo = Info.createDeclInfo(D, ZoneRelative::OutsideFunc);
628 DeclInfo->markOccurence(CurrentLocation);
629 // FIXME: check if reference mutates the Decl being referred.
630 return true;
631 }
632
633 bool VisitReturnStmt(ReturnStmt *Return) {
634 if (CurrentLocation == ZoneRelative::Inside)
635 Info.HasReturnStmt = true;
636 return true;
637 }
638
639 bool VisitBreakStmt(BreakStmt *Break) {
640 // Control flow is broken if break statement is selected without any
641 // parent loop or switch statement.
642 if (CurrentLocation == ZoneRelative::Inside &&
643 !(CurNumberOfNestedLoops || CurNumberOfSwitch))
644 Info.BrokenControlFlow = true;
645 return true;
646 }
647
648 bool VisitContinueStmt(ContinueStmt *Continue) {
649 // Control flow is broken if Continue statement is selected without any
650 // parent loop
651 if (CurrentLocation == ZoneRelative::Inside && !CurNumberOfNestedLoops)
652 Info.BrokenControlFlow = true;
653 return true;
654 }
655 CapturedZoneInfo Info;
656 const ExtractionZone &ExtZone;
657 ZoneRelative CurrentLocation = ZoneRelative::Before;
658 // Number of {loop,switch} statements that are currently in the traversal
659 // stack inside Extraction Zone. Used to check for broken control flow.
660 unsigned CurNumberOfNestedLoops = 0;
661 unsigned CurNumberOfSwitch = 0;
662 };
663 ExtractionZoneVisitor Visitor(ExtZone);
664 CapturedZoneInfo Result = std::move(Visitor.Info);
665 Result.AlwaysReturns = alwaysReturns(ExtZone);
666 return Result;
667}
668
669// Adds parameters to ExtractedFunc.
670// Returns true if able to find the parameters successfully and no hoisting
671// needed.
672// FIXME: Check if the declaration has a local/anonymous type
673bool createParameters(NewFunction &ExtractedFunc,
674 const CapturedZoneInfo &CapturedInfo) {
675 for (const auto &KeyVal : CapturedInfo.DeclInfoMap) {
676 const auto &DeclInfo = KeyVal.second;
677 // If a Decl was Declared in zone and referenced in post zone, it
678 // needs to be hoisted (we bail out in that case).
679 // FIXME: Support Decl Hoisting.
680 if (DeclInfo.DeclaredIn == ZoneRelative::Inside &&
681 DeclInfo.IsReferencedInPostZone)
682 return false;
683 if (!DeclInfo.IsReferencedInZone)
684 continue; // no need to pass as parameter, not referenced
685 if (DeclInfo.DeclaredIn == ZoneRelative::Inside ||
686 DeclInfo.DeclaredIn == ZoneRelative::OutsideFunc)
687 continue; // no need to pass as parameter, still accessible.
688 // Parameter specific checks.
689 const ValueDecl *VD = dyn_cast_or_null<ValueDecl>(DeclInfo.TheDecl);
690 // Can't parameterise if the Decl isn't a ValueDecl or is a FunctionDecl
691 // (this includes the case of recursive call to EnclosingFunc in Zone).
692 if (!VD || isa<FunctionDecl>(DeclInfo.TheDecl))
693 return false;
694 // Parameter qualifiers are same as the Decl's qualifiers.
695 QualType TypeInfo = VD->getType().getNonReferenceType();
696 // FIXME: Need better qualifier checks: check mutated status for
697 // Decl(e.g. was it assigned, passed as nonconst argument, etc)
698 // FIXME: check if parameter will be a non l-value reference.
699 // FIXME: We don't want to always pass variables of types like int,
700 // pointers, etc by reference.
701 bool IsPassedByReference = true;
702 // We use the index of declaration as the ordering priority for parameters.
703 ExtractedFunc.Parameters.push_back({std::string(VD->getName()), TypeInfo,
704 IsPassedByReference,
705 DeclInfo.DeclIndex});
706 }
707 llvm::sort(ExtractedFunc.Parameters);
708 return true;
709}
710
711// Clangd uses open ranges while ExtractionSemicolonPolicy (in Clang Tooling)
712// uses closed ranges. Generates the semicolon policy for the extraction and
713// extends the ZoneRange if necessary.
714tooling::ExtractionSemicolonPolicy
715getSemicolonPolicy(ExtractionZone &ExtZone, const SourceManager &SM,
716 const LangOptions &LangOpts) {
717 // Get closed ZoneRange.
718 SourceRange FuncBodyRange = {ExtZone.ZoneRange.getBegin(),
719 ExtZone.ZoneRange.getEnd().getLocWithOffset(-1)};
720 auto SemicolonPolicy = tooling::ExtractionSemicolonPolicy::compute(
721 ExtZone.getLastRootStmt()->ASTNode.get<Stmt>(), FuncBodyRange, SM,
722 LangOpts);
723 // Update ZoneRange.
724 ExtZone.ZoneRange.setEnd(FuncBodyRange.getEnd().getLocWithOffset(1));
725 return SemicolonPolicy;
726}
727
728// Generate return type for ExtractedFunc. Return false if unable to do so.
729bool generateReturnProperties(NewFunction &ExtractedFunc,
730 const FunctionDecl &EnclosingFunc,
731 const CapturedZoneInfo &CapturedInfo) {
732 // If the selected code always returns, we preserve those return statements.
733 // The return type should be the same as the enclosing function.
734 // (Others are possible if there are conversions, but this seems clearest).
735 if (CapturedInfo.HasReturnStmt) {
736 // If the return is conditional, neither replacing the code with
737 // `extracted()` nor `return extracted()` is correct.
738 if (!CapturedInfo.AlwaysReturns)
739 return false;
740 QualType Ret = EnclosingFunc.getReturnType();
741 // Once we support members, it'd be nice to support e.g. extracting a method
742 // of Foo<T> that returns T. But it's not clear when that's safe.
743 if (Ret->isDependentType())
744 return false;
745 ExtractedFunc.ReturnType = Ret;
746 return true;
747 }
748 // FIXME: Generate new return statement if needed.
749 ExtractedFunc.ReturnType = EnclosingFunc.getParentASTContext().VoidTy;
750 return true;
751}
752
753void captureMethodInfo(NewFunction &ExtractedFunc,
754 const CXXMethodDecl *Method) {
755 ExtractedFunc.Static = Method->isStatic();
756 ExtractedFunc.Const = Method->isConst();
757 ExtractedFunc.EnclosingClass = Method->getParent();
758}
759
760// FIXME: add support for adding other function return types besides void.
761// FIXME: assign the value returned by non void extracted function.
762llvm::Expected<NewFunction> getExtractedFunction(ExtractionZone &ExtZone,
763 const SourceManager &SM,
764 const LangOptions &LangOpts) {
765 CapturedZoneInfo CapturedInfo = captureZoneInfo(ExtZone);
766 // Bail out if any break of continue exists
767 if (CapturedInfo.BrokenControlFlow)
768 return error("Cannot extract break/continue without corresponding "
769 "loop/switch statement.");
770 NewFunction ExtractedFunc(getSemicolonPolicy(ExtZone, SM, LangOpts),
771 &LangOpts);
772
773 ExtractedFunc.SyntacticDC =
774 ExtZone.EnclosingFunction->getLexicalDeclContext();
775 ExtractedFunc.SemanticDC = ExtZone.EnclosingFunction->getDeclContext();
776 ExtractedFunc.DefinitionQualifier = ExtZone.EnclosingFunction->getQualifier();
777 ExtractedFunc.Constexpr = ExtZone.EnclosingFunction->getConstexprKind();
778
779 if (const auto *Method =
780 llvm::dyn_cast<CXXMethodDecl>(ExtZone.EnclosingFunction))
781 captureMethodInfo(ExtractedFunc, Method);
782
783 if (ExtZone.EnclosingFunction->isOutOfLine()) {
784 // FIXME: Put the extracted method in a private section if it's a class or
785 // maybe in an anonymous namespace
786 const auto *FirstOriginalDecl =
787 ExtZone.EnclosingFunction->getCanonicalDecl();
788 auto DeclPos =
789 toHalfOpenFileRange(SM, LangOpts, FirstOriginalDecl->getSourceRange());
790 if (!DeclPos)
791 return error("Declaration is inside a macro");
792 ExtractedFunc.ForwardDeclarationPoint = DeclPos->getBegin();
793 ExtractedFunc.ForwardDeclarationSyntacticDC = ExtractedFunc.SemanticDC;
794 }
795
796 ExtractedFunc.BodyRange = ExtZone.ZoneRange;
797 ExtractedFunc.DefinitionPoint = ExtZone.getInsertionPoint();
798
799 ExtractedFunc.CallerReturnsValue = CapturedInfo.AlwaysReturns;
800 if (!createParameters(ExtractedFunc, CapturedInfo) ||
801 !generateReturnProperties(ExtractedFunc, *ExtZone.EnclosingFunction,
802 CapturedInfo))
803 return error("Too complex to extract.");
804 return ExtractedFunc;
805}
806
807class ExtractFunction : public Tweak {
808public:
809 const char *id() const final;
810 bool prepare(const Selection &Inputs) override;
811 Expected<Effect> apply(const Selection &Inputs) override;
812 std::string title() const override { return "Extract to function"; }
813 llvm::StringLiteral kind() const override {
814 return CodeAction::REFACTOR_KIND;
815 }
816
817private:
818 ExtractionZone ExtZone;
819};
820
821REGISTER_TWEAK(ExtractFunction)
822tooling::Replacement replaceWithFuncCall(const NewFunction &ExtractedFunc,
823 const SourceManager &SM,
824 const LangOptions &LangOpts) {
825 std::string FuncCall = ExtractedFunc.renderCall();
826 return tooling::Replacement(
827 SM, CharSourceRange(ExtractedFunc.BodyRange, false), FuncCall, LangOpts);
828}
829
830tooling::Replacement createFunctionDefinition(const NewFunction &ExtractedFunc,
831 const SourceManager &SM) {
832 FunctionDeclKind DeclKind = InlineDefinition;
833 if (ExtractedFunc.ForwardDeclarationPoint)
834 DeclKind = OutOfLineDefinition;
835 std::string FunctionDef = ExtractedFunc.renderDeclaration(
836 DeclKind, *ExtractedFunc.SemanticDC, *ExtractedFunc.SyntacticDC, SM);
837
838 return tooling::Replacement(SM, ExtractedFunc.DefinitionPoint, 0,
839 FunctionDef);
840}
841
842tooling::Replacement createForwardDeclaration(const NewFunction &ExtractedFunc,
843 const SourceManager &SM) {
844 std::string FunctionDecl = ExtractedFunc.renderDeclaration(
845 ForwardDeclaration, *ExtractedFunc.SemanticDC,
846 *ExtractedFunc.ForwardDeclarationSyntacticDC, SM);
847 SourceLocation DeclPoint = *ExtractedFunc.ForwardDeclarationPoint;
848
849 return tooling::Replacement(SM, DeclPoint, 0, FunctionDecl);
850}
851
852// Returns true if ExtZone contains any ReturnStmts.
853bool hasReturnStmt(const ExtractionZone &ExtZone) {
854 class ReturnStmtVisitor
855 : public clang::RecursiveASTVisitor<ReturnStmtVisitor> {
856 public:
857 bool VisitReturnStmt(ReturnStmt *Return) {
858 Found = true;
859 return false; // We found the answer, abort the scan.
860 }
861 bool Found = false;
862 };
863
864 ReturnStmtVisitor V;
865 for (const Stmt *RootStmt : ExtZone.RootStmts) {
866 V.TraverseStmt(const_cast<Stmt *>(RootStmt));
867 if (V.Found)
868 break;
869 }
870 return V.Found;
871}
872
873bool ExtractFunction::prepare(const Selection &Inputs) {
874 const LangOptions &LangOpts = Inputs.AST->getLangOpts();
875 if (!LangOpts.CPlusPlus)
876 return false;
877 const Node *CommonAnc = Inputs.ASTSelection.commonAncestor();
878 const SourceManager &SM = Inputs.AST->getSourceManager();
879 auto MaybeExtZone = findExtractionZone(CommonAnc, SM, LangOpts);
880 if (!MaybeExtZone ||
881 (hasReturnStmt(*MaybeExtZone) && !alwaysReturns(*MaybeExtZone)))
882 return false;
883
884 // FIXME: Get rid of this check once we support hoisting.
885 if (MaybeExtZone->requiresHoisting(SM, Inputs.AST->getHeuristicResolver()))
886 return false;
887
888 ExtZone = std::move(*MaybeExtZone);
889 return true;
890}
891
892Expected<Tweak::Effect> ExtractFunction::apply(const Selection &Inputs) {
893 const SourceManager &SM = Inputs.AST->getSourceManager();
894 const LangOptions &LangOpts = Inputs.AST->getLangOpts();
895 auto ExtractedFunc = getExtractedFunction(ExtZone, SM, LangOpts);
896 // FIXME: Add more types of errors.
897 if (!ExtractedFunc)
898 return ExtractedFunc.takeError();
899 tooling::Replacements Edit;
900 if (auto Err = Edit.add(createFunctionDefinition(*ExtractedFunc, SM)))
901 return std::move(Err);
902 if (auto Err = Edit.add(replaceWithFuncCall(*ExtractedFunc, SM, LangOpts)))
903 return std::move(Err);
904
905 if (auto FwdLoc = ExtractedFunc->ForwardDeclarationPoint) {
906 // If the fwd-declaration goes in the same file, merge into Replacements.
907 // Otherwise it needs to be a separate file edit.
908 if (SM.isWrittenInSameFile(ExtractedFunc->DefinitionPoint, *FwdLoc)) {
909 if (auto Err = Edit.add(createForwardDeclaration(*ExtractedFunc, SM)))
910 return std::move(Err);
911 } else {
912 auto MultiFileEffect = Effect::mainFileEdit(SM, std::move(Edit));
913 if (!MultiFileEffect)
914 return MultiFileEffect.takeError();
915
916 tooling::Replacements OtherEdit(
917 createForwardDeclaration(*ExtractedFunc, SM));
918 if (auto PathAndEdit =
919 Tweak::Effect::fileEdit(SM, SM.getFileID(*FwdLoc), OtherEdit))
920 MultiFileEffect->ApplyEdits.try_emplace(PathAndEdit->first,
921 PathAndEdit->second);
922 else
923 return PathAndEdit.takeError();
924 return MultiFileEffect;
925 }
926 }
927 return Effect::mainFileEdit(SM, std::move(Edit));
928}
929
930} // namespace
931} // namespace clangd
932} // namespace clang
#define REGISTER_TWEAK(Subclass)
Definition Tweak.h:129
llvm::Error error(std::error_code, std::string &&)
Definition Logger.cpp:80
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
Definition AST.cpp:45
@ Info
An information message.
Definition Protocol.h:738
std::optional< SourceRange > toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R)
Turns a token range into a half-open range and checks its correctness.
std::string printType(const QualType QT, const DeclContext &CurContext, const llvm::StringRef Placeholder, bool FullyQualify)
Returns a QualType as string.
Definition AST.cpp:417
void findExplicitReferences(const Stmt *S, llvm::function_ref< void(ReferenceLoc)> Out, const HeuristicResolver *Resolver)
Recursively traverse S and report all references explicitly written in the code.
llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R)
Returns the source code covered by the source range.
bool operator<(const Ref &L, const Ref &R)
Definition Ref.h:98
@ Parameter
An inlay hint that is for a parameter.
Definition Protocol.h:1679
cppcoreguidelines::ProBoundsAvoidUncheckedContainerAccess P
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//