clang 17.0.0git
UnsafeBufferUsage.cpp
Go to the documentation of this file.
1//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "clang/AST/Decl.h"
13#include "clang/Lex/Lexer.h"
15#include "llvm/ADT/SmallVector.h"
16#include <memory>
17#include <optional>
18#include <sstream>
19#include <queue>
20
21using namespace llvm;
22using namespace clang;
23using namespace ast_matchers;
24
25namespace clang::ast_matchers {
26// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
27// except for those belonging to a different callable of "n".
29 : public RecursiveASTVisitor<MatchDescendantVisitor> {
30public:
32
33 // Creates an AST visitor that matches `Matcher` on all
34 // descendants of a given node "n" except for the ones
35 // belonging to a different callable of "n".
36 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
37 internal::ASTMatchFinder *Finder,
38 internal::BoundNodesTreeBuilder *Builder,
39 internal::ASTMatchFinder::BindKind Bind,
40 const bool ignoreUnevaluatedContext)
41 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
42 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {}
43
44 // Returns true if a match is found in a subtree of `DynNode`, which belongs
45 // to the same callable of `DynNode`.
46 bool findMatch(const DynTypedNode &DynNode) {
47 Matches = false;
48 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
49 TraverseStmt(const_cast<Stmt *>(StmtNode));
50 *Builder = ResultBindings;
51 return Matches;
52 }
53 return false;
54 }
55
56 // The following are overriding methods from the base visitor class.
57 // They are public only to allow CRTP to work. They are *not *part
58 // of the public API of this class.
59
60 // For the matchers so far used in safe buffers, we only need to match
61 // `Stmt`s. To override more as needed.
62
64 if (!Node)
65 return true;
66 if (!match(*Node))
67 return false;
68 // To skip callables:
69 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node))
70 return true;
71 // Traverse descendants
73 }
74
76 // These are unevaluated, except the result expression.
77 if(ignoreUnevaluatedContext)
78 return TraverseStmt(Node->getResultExpr());
79 return VisitorBase::TraverseGenericSelectionExpr(Node);
80 }
81
83 // Unevaluated context.
84 if(ignoreUnevaluatedContext)
85 return true;
86 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
87 }
88
90 // Unevaluated context.
91 if(ignoreUnevaluatedContext)
92 return true;
93 return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
94 }
95
97 // Unevaluated context.
98 if(ignoreUnevaluatedContext)
99 return true;
100 return VisitorBase::TraverseDecltypeTypeLoc(Node);
101 }
102
104 // Unevaluated context.
105 if(ignoreUnevaluatedContext)
106 return true;
107 return VisitorBase::TraverseCXXNoexceptExpr(Node);
108 }
109
111 // Unevaluated context.
112 if(ignoreUnevaluatedContext)
113 return true;
114 return VisitorBase::TraverseCXXTypeidExpr(Node);
115 }
116
117 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
118 if (!Node)
119 return true;
120 if (!match(*Node))
121 return false;
122 // To skip callables:
123 if (isa<LambdaExpr>(Node))
124 return true;
126 }
127
128 bool shouldVisitTemplateInstantiations() const { return true; }
130 // TODO: let's ignore implicit code for now
131 return false;
132 }
133
134private:
135 // Sets 'Matched' to true if 'Matcher' matches 'Node'
136 //
137 // Returns 'true' if traversal should continue after this function
138 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
139 template <typename T> bool match(const T &Node) {
140 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
141
142 if (Matcher->matches(DynTypedNode::create(Node), Finder,
143 &RecursiveBuilder)) {
144 ResultBindings.addMatch(RecursiveBuilder);
145 Matches = true;
146 if (Bind != internal::ASTMatchFinder::BK_All)
147 return false; // Abort as soon as a match is found.
148 }
149 return true;
150 }
151
152 const internal::DynTypedMatcher *const Matcher;
153 internal::ASTMatchFinder *const Finder;
154 internal::BoundNodesTreeBuilder *const Builder;
155 internal::BoundNodesTreeBuilder ResultBindings;
156 const internal::ASTMatchFinder::BindKind Bind;
157 bool Matches;
158 bool ignoreUnevaluatedContext;
159};
160
161// Because we're dealing with raw pointers, let's define what we mean by that.
162static auto hasPointerType() {
163 return hasType(hasCanonicalType(pointerType()));
164}
165
166static auto hasArrayType() {
167 return hasType(hasCanonicalType(arrayType()));
168}
169
170AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) {
171 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
172
173 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
174 return Visitor.findMatch(DynTypedNode::create(Node));
175}
176
177AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) {
178 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
179
180 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
181 return Visitor.findMatch(DynTypedNode::create(Node));
182}
183
184// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
185AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
186 Handler) {
187 return !Handler->isSafeBufferOptOut(Node.getBeginLoc());
188}
189
190AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
191 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
192}
193
194// Matches a `UnaryOperator` whose operator is pre-increment:
196 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc;
197}
198
199// Returns a matcher that matches any expression 'e' such that `innerMatcher`
200// matches 'e' and 'e' is in an Unspecified Lvalue Context.
201static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
202 // clang-format off
203 return
204 expr(anyOf(
206 hasCastKind(CastKind::CK_LValueToRValue),
207 castSubExpr(innerMatcher)),
210 hasLHS(innerMatcher)
211 )
212 ));
213// clang-format off
214}
215
216
217// Returns a matcher that matches any expression `e` such that `InnerMatcher`
218// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
219static internal::Matcher<Stmt>
220isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
221 // A UPC can be
222 // 1. an argument of a function call (except the callee has [[unsafe_...]]
223 // attribute), or
224 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
225 // 3. the operand of a comparator operation; or
226 // 4. the operand of a pointer subtraction operation
227 // (i.e., computing the distance between two pointers); or ...
228
229 auto CallArgMatcher =
230 callExpr(forEachArgumentWithParam(InnerMatcher,
231 hasPointerType() /* array also decays to pointer type*/),
232 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
233
234 auto CastOperandMatcher =
235 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
236 hasCastKind(CastKind::CK_PointerToBoolean)),
237 castSubExpr(allOf(hasPointerType(), InnerMatcher)));
238
239 auto CompOperandMatcher =
240 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
241 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)),
242 hasRHS(allOf(hasPointerType(), InnerMatcher))));
243
244 // A matcher that matches pointer subtractions:
245 auto PtrSubtractionMatcher =
246 binaryOperator(hasOperatorName("-"),
247 // Note that here we need both LHS and RHS to be
248 // pointer. Then the inner matcher can match any of
249 // them:
250 allOf(hasLHS(hasPointerType()),
251 hasRHS(hasPointerType())),
252 eachOf(hasLHS(InnerMatcher),
253 hasRHS(InnerMatcher)));
254
255 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
256 PtrSubtractionMatcher));
257 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
258 // don't have to check that.)
259}
260
261// Returns a matcher that matches any expression 'e' such that `innerMatcher`
262// matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
263// 'e' isn't evaluated to an RValue). For example, consider the following code:
264// int *p = new int[4];
265// int *q = new int[4];
266// if ((p = q)) {}
267// p = q;
268// The expression `p = q` in the conditional of the `if` statement
269// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
270// in the assignment statement is in an untyped context.
271static internal::Matcher<Stmt>
272isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
273 // An unspecified context can be
274 // 1. A compound statement,
275 // 2. The body of an if statement
276 // 3. Body of a loop
277 auto CompStmt = compoundStmt(forEach(InnerMatcher));
278 auto IfStmtThen = ifStmt(hasThen(InnerMatcher));
279 auto IfStmtElse = ifStmt(hasElse(InnerMatcher));
280 // FIXME: Handle loop bodies.
281 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
282}
283} // namespace clang::ast_matchers
284
285namespace {
286// Because the analysis revolves around variables and their types, we'll need to
287// track uses of variables (aka DeclRefExprs).
288using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
289
290// Convenience typedef.
291using FixItList = SmallVector<FixItHint, 4>;
292
293// Defined below.
294class Strategy;
295} // namespace
296
297namespace {
298/// Gadget is an individual operation in the code that may be of interest to
299/// this analysis. Each (non-abstract) subclass corresponds to a specific
300/// rigid AST structure that constitutes an operation on a pointer-type object.
301/// Discovery of a gadget in the code corresponds to claiming that we understand
302/// what this part of code is doing well enough to potentially improve it.
303/// Gadgets can be warning (immediately deserving a warning) or fixable (not
304/// always deserving a warning per se, but requires our attention to identify
305/// it warrants a fixit).
306class Gadget {
307public:
308 enum class Kind {
309#define GADGET(x) x,
310#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
311 };
312
313 /// Common type of ASTMatchers used for discovering gadgets.
314 /// Useful for implementing the static matcher() methods
315 /// that are expected from all non-abstract subclasses.
316 using Matcher = decltype(stmt());
317
318 Gadget(Kind K) : K(K) {}
319
320 Kind getKind() const { return K; }
321
322 virtual bool isWarningGadget() const = 0;
323 virtual const Stmt *getBaseStmt() const = 0;
324
325 /// Returns the list of pointer-type variables on which this gadget performs
326 /// its operation. Typically, there's only one variable. This isn't a list
327 /// of all DeclRefExprs in the gadget's AST!
328 virtual DeclUseList getClaimedVarUseSites() const = 0;
329
330 virtual ~Gadget() = default;
331
332private:
333 Kind K;
334};
335
336
337/// Warning gadgets correspond to unsafe code patterns that warrants
338/// an immediate warning.
339class WarningGadget : public Gadget {
340public:
341 WarningGadget(Kind K) : Gadget(K) {}
342
343 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
344 bool isWarningGadget() const final { return true; }
345};
346
347/// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
348/// properly recognized in order to emit fixes. For example, if a raw pointer-type
349/// variable is replaced by a safe C++ container, every use of such variable must be
350/// carefully considered and possibly updated.
351class FixableGadget : public Gadget {
352public:
353 FixableGadget(Kind K) : Gadget(K) {}
354
355 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
356 bool isWarningGadget() const final { return false; }
357
358 /// Returns a fixit that would fix the current gadget according to
359 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
360 /// returns an empty list if no fixes are necessary.
361 virtual std::optional<FixItList> getFixits(const Strategy &) const {
362 return std::nullopt;
363 }
364
365 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
366 /// statement and the second element is the RHS. This two-element list represents the fact that
367 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
368 /// later to group all those variables whose types must be modified together to prevent type
369 /// mismatches.
370 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
371 getStrategyImplications() const {
372 return std::nullopt;
373 }
374};
375
376using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
377using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
378
379/// An increment of a pointer-type value is unsafe as it may run the pointer
380/// out of bounds.
381class IncrementGadget : public WarningGadget {
382 static constexpr const char *const OpTag = "op";
383 const UnaryOperator *Op;
384
385public:
386 IncrementGadget(const MatchFinder::MatchResult &Result)
387 : WarningGadget(Kind::Increment),
388 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
389
390 static bool classof(const Gadget *G) {
391 return G->getKind() == Kind::Increment;
392 }
393
394 static Matcher matcher() {
395 return stmt(unaryOperator(
396 hasOperatorName("++"),
397 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
398 ).bind(OpTag));
399 }
400
401 const UnaryOperator *getBaseStmt() const override { return Op; }
402
403 DeclUseList getClaimedVarUseSites() const override {
405 if (const auto *DRE =
406 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
407 Uses.push_back(DRE);
408 }
409
410 return std::move(Uses);
411 }
412};
413
414/// A decrement of a pointer-type value is unsafe as it may run the pointer
415/// out of bounds.
416class DecrementGadget : public WarningGadget {
417 static constexpr const char *const OpTag = "op";
418 const UnaryOperator *Op;
419
420public:
421 DecrementGadget(const MatchFinder::MatchResult &Result)
422 : WarningGadget(Kind::Decrement),
423 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
424
425 static bool classof(const Gadget *G) {
426 return G->getKind() == Kind::Decrement;
427 }
428
429 static Matcher matcher() {
430 return stmt(unaryOperator(
431 hasOperatorName("--"),
432 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
433 ).bind(OpTag));
434 }
435
436 const UnaryOperator *getBaseStmt() const override { return Op; }
437
438 DeclUseList getClaimedVarUseSites() const override {
439 if (const auto *DRE =
440 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
441 return {DRE};
442 }
443
444 return {};
445 }
446};
447
448/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
449/// it doesn't have any bounds checks for the array.
450class ArraySubscriptGadget : public WarningGadget {
451 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
452 const ArraySubscriptExpr *ASE;
453
454public:
455 ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
456 : WarningGadget(Kind::ArraySubscript),
457 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {}
458
459 static bool classof(const Gadget *G) {
460 return G->getKind() == Kind::ArraySubscript;
461 }
462
463 static Matcher matcher() {
464 // FIXME: What if the index is integer literal 0? Should this be
465 // a safe gadget in this case?
466 // clang-format off
468 hasBase(ignoringParenImpCasts(
470 unless(hasIndex(integerLiteral(equals(0)))))
471 .bind(ArraySubscrTag));
472 // clang-format on
473 }
474
475 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
476
477 DeclUseList getClaimedVarUseSites() const override {
478 if (const auto *DRE =
479 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
480 return {DRE};
481 }
482
483 return {};
484 }
485};
486
487/// A pointer arithmetic expression of one of the forms:
488/// \code
489/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
490/// \endcode
491class PointerArithmeticGadget : public WarningGadget {
492 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
493 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
494 const BinaryOperator *PA; // pointer arithmetic expression
495 const Expr *Ptr; // the pointer expression in `PA`
496
497public:
498 PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
499 : WarningGadget(Kind::PointerArithmetic),
500 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)),
501 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {}
502
503 static bool classof(const Gadget *G) {
504 return G->getKind() == Kind::PointerArithmetic;
505 }
506
507 static Matcher matcher() {
508 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType()));
509 auto PtrAtRight =
510 allOf(hasOperatorName("+"),
511 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
512 hasLHS(HasIntegerType));
513 auto PtrAtLeft =
514 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"),
515 hasOperatorName("+="), hasOperatorName("-=")),
516 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)),
517 hasRHS(HasIntegerType));
518
519 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight))
520 .bind(PointerArithmeticTag));
521 }
522
523 const Stmt *getBaseStmt() const override { return PA; }
524
525 DeclUseList getClaimedVarUseSites() const override {
526 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) {
527 return {DRE};
528 }
529
530 return {};
531 }
532 // FIXME: pointer adding zero should be fine
533 // FIXME: this gadge will need a fix-it
534};
535
536/// A pointer assignment expression of the form:
537/// \code
538/// p = q;
539/// \endcode
540class PointerAssignmentGadget : public FixableGadget {
541private:
542 static constexpr const char *const PointerAssignmentTag = "ptrAssign";
543 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
544 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
545 const BinaryOperator *PA; // pointer arithmetic expression
546 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA`
547 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA`
548
549public:
550 PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
551 : FixableGadget(Kind::PointerAssignment),
552 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerAssignmentTag)),
553 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
554 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
555
556 static bool classof(const Gadget *G) {
557 return G->getKind() == Kind::PointerAssignment;
558 }
559
560 static Matcher matcher() {
561 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
562 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(),
563 to(varDecl())).
564 bind(PointerAssignRHSTag))),
566 to(varDecl())).
567 bind(PointerAssignLHSTag))));
568
569 //FIXME: Handle declarations at assignments
570 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
571 }
572
573 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
574
575 virtual const Stmt *getBaseStmt() const override { return PA; }
576
577 virtual DeclUseList getClaimedVarUseSites() const override {
578 return DeclUseList{PtrLHS, PtrRHS};
579 }
580
581 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
582 getStrategyImplications() const override {
583 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()),
584 cast<VarDecl>(PtrRHS->getDecl()));
585 }
586};
587
588/// A call of a function or method that performs unchecked buffer operations
589/// over one of its pointer parameters.
590class UnsafeBufferUsageAttrGadget : public WarningGadget {
591 constexpr static const char *const OpTag = "call_expr";
592 const CallExpr *Op;
593
594public:
595 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
596 : WarningGadget(Kind::UnsafeBufferUsageAttr),
597 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {}
598
599 static bool classof(const Gadget *G) {
600 return G->getKind() == Kind::UnsafeBufferUsageAttr;
601 }
602
603 static Matcher matcher() {
604 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
605 .bind(OpTag));
606 }
607 const Stmt *getBaseStmt() const override { return Op; }
608
609 DeclUseList getClaimedVarUseSites() const override { return {}; }
610};
611
612// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
613// Context (see `isInUnspecifiedLvalueContext`).
614// Note here `[]` is the built-in subscript operator.
615class ULCArraySubscriptGadget : public FixableGadget {
616private:
617 static constexpr const char *const ULCArraySubscriptTag =
618 "ArraySubscriptUnderULC";
620
621public:
622 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
623 : FixableGadget(Kind::ULCArraySubscript),
624 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
625 assert(Node != nullptr && "Expecting a non-null matching result");
626 }
627
628 static bool classof(const Gadget *G) {
629 return G->getKind() == Kind::ULCArraySubscript;
630 }
631
632 static Matcher matcher() {
633 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
634 auto BaseIsArrayOrPtrDRE =
635 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr)));
636 auto Target =
637 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag);
638
639 return expr(isInUnspecifiedLvalueContext(Target));
640 }
641
642 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
643
644 virtual const Stmt *getBaseStmt() const override { return Node; }
645
646 virtual DeclUseList getClaimedVarUseSites() const override {
647 if (const auto *DRE =
648 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
649 return {DRE};
650 }
651 return {};
652 }
653};
654
655// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
656// unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
657// fixit of the form `UPC(DRE.data())`.
658class UPCStandalonePointerGadget : public FixableGadget {
659private:
660 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
661 const DeclRefExpr *Node;
662
663public:
664 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result)
665 : FixableGadget(Kind::UPCStandalonePointer),
666 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) {
667 assert(Node != nullptr && "Expecting a non-null matching result");
668 }
669
670 static bool classof(const Gadget *G) {
671 return G->getKind() == Kind::UPCStandalonePointer;
672 }
673
674 static Matcher matcher() {
675 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
676 auto target = expr(
677 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr, to(varDecl()))).bind(DeclRefExprTag)));
678 return stmt(isInUnspecifiedPointerContext(target));
679 }
680
681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
682
683 virtual const Stmt *getBaseStmt() const override { return Node; }
684
685 virtual DeclUseList getClaimedVarUseSites() const override {
686 return {Node};
687 }
688};
689
690class PointerDereferenceGadget : public FixableGadget {
691 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
692 static constexpr const char *const OperatorTag = "op";
693
694 const DeclRefExpr *BaseDeclRefExpr = nullptr;
695 const UnaryOperator *Op = nullptr;
696
697public:
698 PointerDereferenceGadget(const MatchFinder::MatchResult &Result)
699 : FixableGadget(Kind::PointerDereference),
700 BaseDeclRefExpr(
701 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
702 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {}
703
704 static bool classof(const Gadget *G) {
705 return G->getKind() == Kind::PointerDereference;
706 }
707
708 static Matcher matcher() {
709 auto Target =
711 hasOperatorName("*"),
712 has(expr(ignoringParenImpCasts(
713 declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag)))))
714 .bind(OperatorTag);
715
716 return expr(isInUnspecifiedLvalueContext(Target));
717 }
718
719 DeclUseList getClaimedVarUseSites() const override {
720 return {BaseDeclRefExpr};
721 }
722
723 virtual const Stmt *getBaseStmt() const final { return Op; }
724
725 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
726};
727
728// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
729// Context (see `isInUnspecifiedPointerContext`).
730// Note here `[]` is the built-in subscript operator.
731class UPCAddressofArraySubscriptGadget : public FixableGadget {
732private:
733 static constexpr const char *const UPCAddressofArraySubscriptTag =
734 "AddressofArraySubscriptUnderUPC";
735 const UnaryOperator *Node; // the `&DRE[any]` node
736
737public:
738 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
739 : FixableGadget(Kind::ULCArraySubscript),
740 Node(Result.Nodes.getNodeAs<UnaryOperator>(
741 UPCAddressofArraySubscriptTag)) {
742 assert(Node != nullptr && "Expecting a non-null matching result");
743 }
744
745 static bool classof(const Gadget *G) {
746 return G->getKind() == Kind::UPCAddressofArraySubscript;
747 }
748
749 static Matcher matcher() {
750 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
751 unaryOperator(hasOperatorName("&"),
752 hasUnaryOperand(arraySubscriptExpr(
753 hasBase(ignoringParenImpCasts(declRefExpr())))))
754 .bind(UPCAddressofArraySubscriptTag)))));
755 }
756
757 virtual std::optional<FixItList> getFixits(const Strategy &) const override;
758
759 virtual const Stmt *getBaseStmt() const override { return Node; }
760
761 virtual DeclUseList getClaimedVarUseSites() const override {
762 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
763 const auto *DRE =
764 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
765 return {DRE};
766 }
767};
768} // namespace
769
770namespace {
771// An auxiliary tracking facility for the fixit analysis. It helps connect
772// declarations to its and make sure we've covered all uses with our analysis
773// before we try to fix the declaration.
774class DeclUseTracker {
775 using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
776 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
777
778 // Allocate on the heap for easier move.
779 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
780 DefMapTy Defs{};
781
782public:
783 DeclUseTracker() = default;
784 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
785 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
786 DeclUseTracker(DeclUseTracker &&) = default;
787 DeclUseTracker &operator=(DeclUseTracker &&) = default;
788
789 // Start tracking a freshly discovered DRE.
790 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
791
792 // Stop tracking the DRE as it's been fully figured out.
793 void claimUse(const DeclRefExpr *DRE) {
794 assert(Uses->count(DRE) &&
795 "DRE not found or claimed by multiple matchers!");
796 Uses->erase(DRE);
797 }
798
799 // A variable is unclaimed if at least one use is unclaimed.
800 bool hasUnclaimedUses(const VarDecl *VD) const {
801 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
802 return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
803 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
804 });
805 }
806
807 void discoverDecl(const DeclStmt *DS) {
808 for (const Decl *D : DS->decls()) {
809 if (const auto *VD = dyn_cast<VarDecl>(D)) {
810 // FIXME: Assertion temporarily disabled due to a bug in
811 // ASTMatcher internal behavior in presence of GNU
812 // statement-expressions. We need to properly investigate this
813 // because it can screw up our algorithm in other ways.
814 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
815 Defs[VD] = DS;
816 }
817 }
818 }
819
820 const DeclStmt *lookupDecl(const VarDecl *VD) const {
821 auto It = Defs.find(VD);
822 assert(It != Defs.end() && "Definition never discovered!");
823 return It->second;
824 }
825};
826} // namespace
827
828namespace {
829// Strategy is a map from variables to the way we plan to emit fixes for
830// these variables. It is figured out gradually by trying different fixes
831// for different variables depending on gadgets in which these variables
832// participate.
833class Strategy {
834public:
835 enum class Kind {
836 Wontfix, // We don't plan to emit a fixit for this variable.
837 Span, // We recommend replacing the variable with std::span.
838 Iterator, // We recommend replacing the variable with std::span::iterator.
839 Array, // We recommend replacing the variable with std::array.
840 Vector // We recommend replacing the variable with std::vector.
841 };
842
843private:
844 using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
845
846 MapTy Map;
847
848public:
849 Strategy() = default;
850 Strategy(const Strategy &) = delete; // Let's avoid copies.
851 Strategy &operator=(const Strategy &) = delete;
852 Strategy(Strategy &&) = default;
853 Strategy &operator=(Strategy &&) = default;
854
855 void set(const VarDecl *VD, Kind K) { Map[VD] = K; }
856
857 Kind lookup(const VarDecl *VD) const {
858 auto I = Map.find(VD);
859 if (I == Map.end())
860 return Kind::Wontfix;
861
862 return I->second;
863 }
864};
865} // namespace
866
867
868// Representing a pointer type expression of the form `++Ptr` in an Unspecified
869// Pointer Context (UPC):
870class UPCPreIncrementGadget : public FixableGadget {
871private:
872 static constexpr const char *const UPCPreIncrementTag =
873 "PointerPreIncrementUnderUPC";
874 const UnaryOperator *Node; // the `++Ptr` node
875
876public:
878 : FixableGadget(Kind::UPCPreIncrement),
879 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) {
880 assert(Node != nullptr && "Expecting a non-null matching result");
881 }
882
883 static bool classof(const Gadget *G) {
884 return G->getKind() == Kind::UPCPreIncrement;
885 }
886
887 static Matcher matcher() {
888 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
889 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
890 // can have the matcher be general, so long as `getClaimedVarUseSites` does
891 // things right.
892 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
893 unaryOperator(isPreInc(),
894 hasUnaryOperand(declRefExpr())
895 ).bind(UPCPreIncrementTag)))));
896 }
897
898 virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
899
900 virtual const Stmt *getBaseStmt() const override { return Node; }
901
902 virtual DeclUseList getClaimedVarUseSites() const override {
903 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())};
904 }
905};
906
907// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
908// ptr)`:
909class DerefSimplePtrArithFixableGadget : public FixableGadget {
910 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
911 static constexpr const char *const DerefOpTag = "DerefOp";
912 static constexpr const char *const AddOpTag = "AddOp";
913 static constexpr const char *const OffsetTag = "Offset";
914
915 const DeclRefExpr *BaseDeclRefExpr = nullptr;
916 const UnaryOperator *DerefOp = nullptr;
917 const BinaryOperator *AddOp = nullptr;
918 const IntegerLiteral *Offset = nullptr;
919
920public:
922 : FixableGadget(Kind::DerefSimplePtrArithFixable),
923 BaseDeclRefExpr(
924 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)),
925 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)),
926 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)),
927 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {}
928
929 static Matcher matcher() {
930 // clang-format off
931 auto ThePtr = expr(hasPointerType(),
932 ignoringImpCasts(declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag)));
933 auto PlusOverPtrAndInteger = expr(anyOf(
934 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr),
935 hasRHS(integerLiteral().bind(OffsetTag)))
936 .bind(AddOpTag),
937 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr),
938 hasLHS(integerLiteral().bind(OffsetTag)))
939 .bind(AddOpTag)));
941 hasOperatorName("*"),
942 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger)))
943 .bind(DerefOpTag));
944 // clang-format on
945 }
946
947 virtual std::optional<FixItList> getFixits(const Strategy &s) const final;
948
949 // TODO remove this method from FixableGadget interface
950 virtual const Stmt *getBaseStmt() const final { return nullptr; }
951
952 virtual DeclUseList getClaimedVarUseSites() const final {
953 return {BaseDeclRefExpr};
954 }
955};
956
957/// Scan the function and return a list of gadgets found with provided kits.
958static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
960 bool EmitSuggestions) {
961
962 struct GadgetFinderCallback : MatchFinder::MatchCallback {
963 FixableGadgetList FixableGadgets;
964 WarningGadgetList WarningGadgets;
965 DeclUseTracker Tracker;
966
967 void run(const MatchFinder::MatchResult &Result) override {
968 // In debug mode, assert that we've found exactly one gadget.
969 // This helps us avoid conflicts in .bind() tags.
970#if NDEBUG
971#define NEXT return
972#else
973 [[maybe_unused]] int numFound = 0;
974#define NEXT ++numFound
975#endif
976
977 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
978 Tracker.discoverUse(DRE);
979 NEXT;
980 }
981
982 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
983 Tracker.discoverDecl(DS);
984 NEXT;
985 }
986
987 // Figure out which matcher we've found, and call the appropriate
988 // subclass constructor.
989 // FIXME: Can we do this more logarithmically?
990#define FIXABLE_GADGET(name) \
991 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
992 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
993 NEXT; \
994 }
995#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
996#define WARNING_GADGET(name) \
997 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
998 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
999 NEXT; \
1000 }
1001#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1002
1003 assert(numFound >= 1 && "Gadgets not found in match result!");
1004 assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
1005 }
1006 };
1007
1008 MatchFinder M;
1009 GadgetFinderCallback CB;
1010
1011 // clang-format off
1012 M.addMatcher(
1013 stmt(
1014 forEachDescendantEvaluatedStmt(stmt(anyOf(
1015 // Add Gadget::matcher() for every gadget in the registry.
1016#define WARNING_GADGET(x) \
1017 allOf(x ## Gadget::matcher().bind(#x), \
1018 notInSafeBufferOptOut(&Handler)),
1019#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1020 // Avoid a hanging comma.
1021 unless(stmt())
1022 )))
1023 ),
1024 &CB
1025 );
1026 // clang-format on
1027
1028 if (EmitSuggestions) {
1029 // clang-format off
1030 M.addMatcher(
1031 stmt(
1032 forEachDescendantStmt(stmt(eachOf(
1033#define FIXABLE_GADGET(x) \
1034 x ## Gadget::matcher().bind(#x),
1035#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1036 // In parallel, match all DeclRefExprs so that to find out
1037 // whether there are any uncovered by gadgets.
1039 to(varDecl())).bind("any_dre"),
1040 // Also match DeclStmts because we'll need them when fixing
1041 // their underlying VarDecls that otherwise don't have
1042 // any backreferences to DeclStmts.
1043 declStmt().bind("any_ds")
1044 )))
1045 ),
1046 &CB
1047 );
1048 // clang-format on
1049 }
1050
1051 M.match(*D->getBody(), D->getASTContext());
1052
1053 // Gadgets "claim" variables they're responsible for. Once this loop finishes,
1054 // the tracker will only track DREs that weren't claimed by any gadgets,
1055 // i.e. not understood by the analysis.
1056 for (const auto &G : CB.FixableGadgets) {
1057 for (const auto *DRE : G->getClaimedVarUseSites()) {
1058 CB.Tracker.claimUse(DRE);
1059 }
1060 }
1061
1062 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets),
1063 std::move(CB.Tracker)};
1064}
1065
1066// Compares AST nodes by source locations.
1067template <typename NodeTy> struct CompareNode {
1068 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
1069 return N1->getBeginLoc().getRawEncoding() <
1070 N2->getBeginLoc().getRawEncoding();
1071 }
1072};
1073
1075 std::map<const VarDecl *, std::set<const WarningGadget *>,
1076 // To keep keys sorted by their locations in the map so that the
1077 // order is deterministic:
1080 // These Gadgets are not related to pointer variables (e. g. temporaries).
1082};
1083
1084static WarningGadgetSets
1085groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
1086 WarningGadgetSets result;
1087 // If some gadgets cover more than one
1088 // variable, they'll appear more than once in the map.
1089 for (auto &G : AllUnsafeOperations) {
1090 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
1091
1092 bool AssociatedWithVarDecl = false;
1093 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
1094 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1095 result.byVar[VD].insert(G.get());
1096 AssociatedWithVarDecl = true;
1097 }
1098 }
1099
1100 if (!AssociatedWithVarDecl) {
1101 result.noVar.push_back(G.get());
1102 continue;
1103 }
1104 }
1105 return result;
1106}
1107
1109 std::map<const VarDecl *, std::set<const FixableGadget *>> byVar;
1110};
1111
1112static FixableGadgetSets
1113groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
1114 FixableGadgetSets FixablesForUnsafeVars;
1115 for (auto &F : AllFixableOperations) {
1116 DeclUseList DREs = F->getClaimedVarUseSites();
1117
1118 for (const DeclRefExpr *DRE : DREs) {
1119 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1120 FixablesForUnsafeVars.byVar[VD].insert(F.get());
1121 }
1122 }
1123 }
1124 return FixablesForUnsafeVars;
1125}
1126
1128 const SourceManager &SM) {
1129 // A simple interval overlap detection algorithm. Sorts all ranges by their
1130 // begin location then finds the first overlap in one pass.
1131 std::vector<const FixItHint *> All; // a copy of `FixIts`
1132
1133 for (const FixItHint &H : FixIts)
1134 All.push_back(&H);
1135 std::sort(All.begin(), All.end(),
1136 [&SM](const FixItHint *H1, const FixItHint *H2) {
1137 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(),
1138 H2->RemoveRange.getBegin());
1139 });
1140
1141 const FixItHint *CurrHint = nullptr;
1142
1143 for (const FixItHint *Hint : All) {
1144 if (!CurrHint ||
1145 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(),
1146 Hint->RemoveRange.getBegin())) {
1147 // Either to initialize `CurrHint` or `CurrHint` does not
1148 // overlap with `Hint`:
1149 CurrHint = Hint;
1150 } else
1151 // In case `Hint` overlaps the `CurrHint`, we found at least one
1152 // conflict:
1153 return true;
1154 }
1155 return false;
1156}
1157
1158std::optional<FixItList>
1159PointerAssignmentGadget::getFixits(const Strategy &S) const {
1160 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
1161 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
1162 switch (S.lookup(LeftVD)) {
1163 case Strategy::Kind::Span:
1164 if (S.lookup(RightVD) == Strategy::Kind::Span)
1165 return FixItList{};
1166 return std::nullopt;
1167 case Strategy::Kind::Wontfix:
1168 return std::nullopt;
1169 case Strategy::Kind::Iterator:
1170 case Strategy::Kind::Array:
1171 case Strategy::Kind::Vector:
1172 llvm_unreachable("unsupported strategies for FixableGadgets");
1173 }
1174 return std::nullopt;
1175}
1176
1177
1178std::optional<FixItList>
1179ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
1180 if (const auto *DRE =
1181 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
1182 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1183 switch (S.lookup(VD)) {
1184 case Strategy::Kind::Span: {
1185 // If the index has a negative constant value, we give up as no valid
1186 // fix-it can be generated:
1187 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
1188 VD->getASTContext();
1189 if (auto ConstVal = Node->getIdx()->getIntegerConstantExpr(Ctx)) {
1190 if (ConstVal->isNegative())
1191 return std::nullopt;
1192 } else if (!Node->getIdx()->getType()->isUnsignedIntegerType())
1193 return std::nullopt;
1194 // no-op is a good fix-it, otherwise
1195 return FixItList{};
1196 }
1197 case Strategy::Kind::Wontfix:
1198 case Strategy::Kind::Iterator:
1199 case Strategy::Kind::Array:
1200 case Strategy::Kind::Vector:
1201 llvm_unreachable("unsupported strategies for FixableGadgets");
1202 }
1203 }
1204 return std::nullopt;
1205}
1206
1207static std::optional<FixItList> // forward declaration
1209
1210std::optional<FixItList>
1211UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
1212 auto DREs = getClaimedVarUseSites();
1213 const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
1214
1215 switch (S.lookup(VD)) {
1216 case Strategy::Kind::Span:
1218 case Strategy::Kind::Wontfix:
1219 case Strategy::Kind::Iterator:
1220 case Strategy::Kind::Array:
1221 case Strategy::Kind::Vector:
1222 llvm_unreachable("unsupported strategies for FixableGadgets");
1223 }
1224 return std::nullopt; // something went wrong, no fix-it
1225}
1226
1227// Return the text representation of the given `APInt Val`:
1228static std::string getAPIntText(APInt Val) {
1230 Val.toString(Txt, 10, true);
1231 // APInt::toString does not add '\0' to the end of the string for us:
1232 Txt.push_back('\0');
1233 return Txt.data();
1234}
1235
1236// Return the source location of the last character of the AST `Node`.
1237template <typename NodeTy>
1238static std::optional<SourceLocation>
1239getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
1240 const LangOptions &LangOpts) {
1241 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts);
1242 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1243
1244 if (Loc.isValid())
1245 return Loc;
1246
1247 return std::nullopt;
1248}
1249
1250// Return the source location just past the last character of the AST `Node`.
1251template <typename NodeTy>
1252static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
1253 const SourceManager &SM,
1254 const LangOptions &LangOpts) {
1255 SourceLocation Loc =
1256 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts);
1257
1258 if (Loc.isValid())
1259 return Loc;
1260
1261 return std::nullopt;
1262}
1263
1264// Return text representation of an `Expr`.
1265static std::optional<StringRef> getExprText(const Expr *E,
1266 const SourceManager &SM,
1267 const LangOptions &LangOpts) {
1268 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts);
1269
1270 if (LastCharLoc)
1271 return Lexer::getSourceText(
1272 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM,
1273 LangOpts);
1274
1275 return std::nullopt;
1276}
1277
1278std::optional<FixItList>
1280 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl());
1281
1282 if (VD && s.lookup(VD) == Strategy::Kind::Span) {
1283 ASTContext &Ctx = VD->getASTContext();
1284 // std::span can't represent elements before its begin()
1285 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
1286 if (ConstVal->isNegative())
1287 return std::nullopt;
1288
1289 // note that the expr may (oddly) has multiple layers of parens
1290 // example:
1291 // *((..(pointer + 123)..))
1292 // goal:
1293 // pointer[123]
1294 // Fix-It:
1295 // remove '*('
1296 // replace ' + ' with '['
1297 // replace ')' with ']'
1298
1299 // example:
1300 // *((..(123 + pointer)..))
1301 // goal:
1302 // 123[pointer]
1303 // Fix-It:
1304 // remove '*('
1305 // replace ' + ' with '['
1306 // replace ')' with ']'
1307
1308 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
1309 const SourceManager &SM = Ctx.getSourceManager();
1310 const LangOptions &LangOpts = Ctx.getLangOpts();
1311 CharSourceRange StarWithTrailWhitespace =
1313 LHS->getBeginLoc());
1314
1315 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts);
1316 if (!LHSLocation)
1317 return std::nullopt;
1318
1319 CharSourceRange PlusWithSurroundingWhitespace =
1320 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
1321
1322 std::optional<SourceLocation> AddOpLocation =
1323 getPastLoc(AddOp, SM, LangOpts);
1324 std::optional<SourceLocation> DerefOpLocation =
1325 getPastLoc(DerefOp, SM, LangOpts);
1326
1327 if (!AddOpLocation || !DerefOpLocation)
1328 return std::nullopt;
1329
1330 CharSourceRange ClosingParenWithPrecWhitespace =
1331 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation);
1332
1333 return FixItList{
1334 {FixItHint::CreateRemoval(StarWithTrailWhitespace),
1335 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["),
1336 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}};
1337 }
1338 return std::nullopt; // something wrong or unsupported, give up
1339}
1340
1341std::optional<FixItList>
1342PointerDereferenceGadget::getFixits(const Strategy &S) const {
1343 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl());
1344 switch (S.lookup(VD)) {
1345 case Strategy::Kind::Span: {
1346 ASTContext &Ctx = VD->getASTContext();
1348 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1349 // Deletes the *operand
1351 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1));
1352 // Inserts the [0]
1353 std::optional<SourceLocation> EndOfOperand =
1354 getEndCharLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts());
1355 if (EndOfOperand) {
1356 return FixItList{{FixItHint::CreateRemoval(derefRange),
1358 (*EndOfOperand).getLocWithOffset(1), "[0]")}};
1359 }
1360 }
1361 [[fallthrough]];
1362 case Strategy::Kind::Iterator:
1363 case Strategy::Kind::Array:
1364 case Strategy::Kind::Vector:
1365 llvm_unreachable("Strategy not implemented yet!");
1366 case Strategy::Kind::Wontfix:
1367 llvm_unreachable("Invalid strategy!");
1368 }
1369
1370 return std::nullopt;
1371}
1372
1373// Generates fix-its replacing an expression of the form UPC(DRE) with
1374// `DRE.data()`
1375std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S)
1376 const {
1377 const auto VD = cast<VarDecl>(Node->getDecl());
1378 switch (S.lookup(VD)) {
1379 case Strategy::Kind::Span: {
1380 ASTContext &Ctx = VD->getASTContext();
1382 // Inserts the .data() after the DRE
1383 std::optional<SourceLocation> EndOfOperand =
1384 getPastLoc(Node, SM, Ctx.getLangOpts());
1385
1386 if (EndOfOperand)
1387 return FixItList{{FixItHint::CreateInsertion(
1388 *EndOfOperand, ".data()")}};
1389 }
1390 [[fallthrough]];
1391 case Strategy::Kind::Wontfix:
1392 case Strategy::Kind::Iterator:
1393 case Strategy::Kind::Array:
1394 case Strategy::Kind::Vector:
1395 llvm_unreachable("unsupported strategies for FixableGadgets");
1396 }
1397
1398 return std::nullopt;
1399}
1400
1401// Generates fix-its replacing an expression of the form `&DRE[e]` with
1402// `&DRE.data()[e]`:
1403static std::optional<FixItList>
1405 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
1406 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
1407 // FIXME: this `getASTContext` call is costly, we should pass the
1408 // ASTContext in:
1409 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
1410 const Expr *Idx = ArraySub->getIdx();
1411 const SourceManager &SM = Ctx.getSourceManager();
1412 const LangOptions &LangOpts = Ctx.getLangOpts();
1413 std::stringstream SS;
1414 bool IdxIsLitZero = false;
1415
1416 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
1417 if ((*ICE).isZero())
1418 IdxIsLitZero = true;
1419 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
1420 if (!DreString)
1421 return std::nullopt;
1422
1423 if (IdxIsLitZero) {
1424 // If the index is literal zero, we produce the most concise fix-it:
1425 SS << (*DreString).str() << ".data()";
1426 } else {
1427 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts);
1428 if (!IndexString)
1429 return std::nullopt;
1430
1431 SS << "&" << (*DreString).str() << ".data()"
1432 << "[" << (*IndexString).str() << "]";
1433 }
1434 return FixItList{
1436}
1437
1438
1439std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const {
1440 DeclUseList DREs = getClaimedVarUseSites();
1441
1442 if (DREs.size() != 1)
1443 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1444 // give up
1445 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
1446 if (S.lookup(VD) == Strategy::Kind::Span) {
1447 FixItList Fixes;
1448 std::stringstream SS;
1449 const Stmt *PreIncNode = getBaseStmt();
1450 StringRef varName = VD->getName();
1451 const ASTContext &Ctx = VD->getASTContext();
1452
1453 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1454 SS << "(" << varName.data() << " = " << varName.data()
1455 << ".subspan(1)).data()";
1456 std::optional<SourceLocation> PreIncLocation =
1457 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts());
1458 if (!PreIncLocation)
1459 return std::nullopt;
1460
1461 Fixes.push_back(FixItHint::CreateReplacement(
1462 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str()));
1463 return Fixes;
1464 }
1465 }
1466 return std::nullopt; // Not in the cases that we can handle for now, give up.
1467}
1468
1469
1470// For a non-null initializer `Init` of `T *` type, this function returns
1471// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
1472// to output stream.
1473// In many cases, this function cannot figure out the actual extent `S`. It
1474// then will use a place holder to replace `S` to ask users to fill `S` in. The
1475// initializer shall be used to initialize a variable of type `std::span<T>`.
1476//
1477// FIXME: Support multi-level pointers
1478//
1479// Parameters:
1480// `Init` a pointer to the initializer expression
1481// `Ctx` a reference to the ASTContext
1482static FixItList
1484 const StringRef UserFillPlaceHolder) {
1485 const SourceManager &SM = Ctx.getSourceManager();
1486 const LangOptions &LangOpts = Ctx.getLangOpts();
1487
1488 // If `Init` has a constant value that is (or equivalent to) a
1489 // NULL pointer, we use the default constructor to initialize the span
1490 // object, i.e., a `std:span` variable declaration with no initializer.
1491 // So the fix-it is just to remove the initializer.
1492 if (Init->isNullPointerConstant(
1493 std::remove_const_t<ASTContext &>(Ctx),
1494 // FIXME: Why does this function not ask for `const ASTContext
1495 // &`? It should. Maybe worth an NFC patch later.
1497 NPC_ValueDependentIsNotNull)) {
1498 std::optional<SourceLocation> InitLocation =
1499 getEndCharLoc(Init, SM, LangOpts);
1500 if (!InitLocation)
1501 return {};
1502
1503 SourceRange SR(Init->getBeginLoc(), *InitLocation);
1504
1505 return {FixItHint::CreateRemoval(SR)};
1506 }
1507
1508 FixItList FixIts{};
1509 std::string ExtentText = UserFillPlaceHolder.data();
1510 StringRef One = "1";
1511
1512 // Insert `{` before `Init`:
1513 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
1514 // Try to get the data extent. Break into different cases:
1515 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
1516 // In cases `Init` is `new T[n]` and there is no explicit cast over
1517 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
1518 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
1519 // simpler for the case where `Init` is `new T`.
1520 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
1521 if (!Ext->HasSideEffects(Ctx)) {
1522 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts);
1523 if (!ExtentString)
1524 return {};
1525 ExtentText = *ExtentString;
1526 }
1527 } else if (!CxxNew->isArray())
1528 // Although the initializer is not allocating a buffer, the pointer
1529 // variable could still be used in buffer access operations.
1530 ExtentText = One;
1531 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType(
1532 Init->IgnoreImpCasts()->getType())) {
1533 // In cases `Init` is of an array type after stripping off implicit casts,
1534 // the extent is the array size. Note that if the array size is not a
1535 // constant, we cannot use it as the extent.
1536 ExtentText = getAPIntText(CArrTy->getSize());
1537 } else {
1538 // In cases `Init` is of the form `&Var` after stripping of implicit
1539 // casts, where `&` is the built-in operator, the extent is 1.
1540 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
1541 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
1542 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
1543 ExtentText = One;
1544 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
1545 // and explicit casting, etc. etc.
1546 }
1547
1548 SmallString<32> StrBuffer{};
1549 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts);
1550
1551 if (!LocPassInit)
1552 return {};
1553
1554 StrBuffer.append(", ");
1555 StrBuffer.append(ExtentText);
1556 StrBuffer.append("}");
1557 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str()));
1558 return FixIts;
1559}
1560
1561// For a `VarDecl` of the form `T * var (= Init)?`, this
1562// function generates a fix-it for the declaration, which re-declares `var` to
1563// be of `span<T>` type and transforms the initializer, if present, to a span
1564// constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user
1565// to fill in.
1566//
1567// FIXME: support Multi-level pointers
1568//
1569// Parameters:
1570// `D` a pointer the variable declaration node
1571// `Ctx` a reference to the ASTContext
1572// Returns:
1573// the generated fix-it
1574static FixItList fixVarDeclWithSpan(const VarDecl *D, const ASTContext &Ctx,
1575 const StringRef UserFillPlaceHolder) {
1576 const QualType &SpanEltT = D->getType()->getPointeeType();
1577 assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!");
1578
1579 FixItList FixIts{};
1580 std::optional<SourceLocation>
1581 ReplacementLastLoc; // the inclusive end location of the replacement
1582 const SourceManager &SM = Ctx.getSourceManager();
1583
1584 if (const Expr *Init = D->getInit()) {
1585 FixItList InitFixIts =
1586 populateInitializerFixItWithSpan(Init, Ctx, UserFillPlaceHolder);
1587
1588 if (InitFixIts.empty())
1589 return {};
1590
1591 // The loc right before the initializer:
1592 ReplacementLastLoc = Init->getBeginLoc().getLocWithOffset(-1);
1593 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
1594 std::make_move_iterator(InitFixIts.end()));
1595 } else
1596 ReplacementLastLoc = getEndCharLoc(D, SM, Ctx.getLangOpts());
1597
1598 // Producing fix-it for variable declaration---make `D` to be of span type:
1599 SmallString<32> Replacement;
1600 raw_svector_ostream OS(Replacement);
1601
1602 OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName();
1603
1604 if (!ReplacementLastLoc)
1605 return {};
1606
1607 FixIts.push_back(FixItHint::CreateReplacement(
1608 SourceRange{D->getBeginLoc(), *ReplacementLastLoc}, OS.str()));
1609 return FixIts;
1610}
1611
1612static FixItList fixVariableWithSpan(const VarDecl *VD,
1613 const DeclUseTracker &Tracker,
1614 const ASTContext &Ctx,
1615 UnsafeBufferUsageHandler &Handler) {
1616 const DeclStmt *DS = Tracker.lookupDecl(VD);
1617 assert(DS && "Fixing non-local variables not implemented yet!");
1618 if (!DS->isSingleDecl()) {
1619 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
1620 return {};
1621 }
1622 // Currently DS is an unused variable but we'll need it when
1623 // non-single decls are implemented, where the pointee type name
1624 // and the '*' are spread around the place.
1625 (void)DS;
1626
1627 // FIXME: handle cases where DS has multiple declarations
1628 return fixVarDeclWithSpan(VD, Ctx, Handler.getUserFillPlaceHolder());
1629}
1630
1631static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K,
1632 const DeclUseTracker &Tracker,
1633 const ASTContext &Ctx,
1634 UnsafeBufferUsageHandler &Handler) {
1635 switch (K) {
1636 case Strategy::Kind::Span: {
1637 if (VD->getType()->isPointerType() && VD->isLocalVarDecl())
1638 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
1639 return {};
1640 }
1641 case Strategy::Kind::Iterator:
1642 case Strategy::Kind::Array:
1643 case Strategy::Kind::Vector:
1644 llvm_unreachable("Strategy not implemented yet!");
1645 case Strategy::Kind::Wontfix:
1646 llvm_unreachable("Invalid strategy!");
1647 }
1648 llvm_unreachable("Unknown strategy!");
1649}
1650
1651// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
1652// `RemoveRange` of 'h' overlaps with a macro use.
1653static bool overlapWithMacro(const FixItList &FixIts) {
1654 // FIXME: For now we only check if the range (or the first token) is (part of)
1655 // a macro expansion. Ideally, we want to check for all tokens in the range.
1656 return llvm::any_of(FixIts, [](const FixItHint &Hint) {
1657 auto Range = Hint.RemoveRange;
1658 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
1659 // If the range (or the first token) is (part of) a macro expansion:
1660 return true;
1661 return false;
1662 });
1663}
1664
1665static bool impossibleToFixForVar(const FixableGadgetSets &FixablesForUnsafeVars,
1666 const Strategy &S,
1667 const VarDecl * Var) {
1668 for (const auto &F : FixablesForUnsafeVars.byVar.find(Var)->second) {
1669 std::optional<FixItList> Fixits = F->getFixits(S);
1670 if (!Fixits) {
1671 return true;
1672 }
1673 }
1674 return false;
1675}
1676
1677static std::map<const VarDecl *, FixItList>
1678getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S,
1679 const DeclUseTracker &Tracker, const ASTContext &Ctx,
1680 UnsafeBufferUsageHandler &Handler,
1681 const DefMapTy &VarGrpMap) {
1682 std::map<const VarDecl *, FixItList> FixItsForVariable;
1683 for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) {
1684 const Strategy::Kind ReplacementTypeForVD = S.lookup(VD);
1685 FixItsForVariable[VD] =
1686 fixVariable(VD, ReplacementTypeForVD, Tracker, Ctx, Handler);
1687 // If we fail to produce Fix-It for the declaration we have to skip the
1688 // variable entirely.
1689 if (FixItsForVariable[VD].empty()) {
1690 FixItsForVariable.erase(VD);
1691 continue;
1692 }
1693 bool ImpossibleToFix = false;
1695 for (const auto &F : Fixables) {
1696 std::optional<FixItList> Fixits = F->getFixits(S);
1697 if (!Fixits) {
1698 ImpossibleToFix = true;
1699 break;
1700 } else {
1701 const FixItList CorrectFixes = Fixits.value();
1702 FixItsForVD.insert(FixItsForVD.end(), CorrectFixes.begin(),
1703 CorrectFixes.end());
1704 }
1705 }
1706
1707 if (ImpossibleToFix) {
1708 FixItsForVariable.erase(VD);
1709 continue;
1710 }
1711
1712 const auto VarGroupForVD = VarGrpMap.find(VD);
1713 if (VarGroupForVD != VarGrpMap.end()) {
1714 for (const VarDecl * V : VarGroupForVD->second) {
1715 if (V == VD) {
1716 continue;
1717 }
1718 if (impossibleToFixForVar(FixablesForUnsafeVars, S, V)) {
1719 ImpossibleToFix = true;
1720 break;
1721 }
1722 }
1723
1724 if (ImpossibleToFix) {
1725 FixItsForVariable.erase(VD);
1726 for (const VarDecl * V : VarGroupForVD->second) {
1727 FixItsForVariable.erase(V);
1728 }
1729 continue;
1730 }
1731 }
1732 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(),
1733 FixItsForVD.begin(), FixItsForVD.end());
1734
1735 // Fix-it shall not overlap with macros or/and templates:
1736 if (overlapWithMacro(FixItsForVariable[VD]) ||
1737 clang::internal::anyConflict(FixItsForVariable[VD],
1738 Ctx.getSourceManager())) {
1739 FixItsForVariable.erase(VD);
1740 continue;
1741 }
1742 }
1743
1744 for (auto VD : FixItsForVariable) {
1745 const auto VarGroupForVD = VarGrpMap.find(VD.first);
1746 const Strategy::Kind ReplacementTypeForVD = S.lookup(VD.first);
1747 if (VarGroupForVD != VarGrpMap.end()) {
1748 for (const VarDecl * Var : VarGroupForVD->second) {
1749 if (Var == VD.first) {
1750 continue;
1751 }
1752
1753 FixItList GroupFix;
1754 if (FixItsForVariable.find(Var) == FixItsForVariable.end()) {
1755 GroupFix = fixVariable(Var, ReplacementTypeForVD, Tracker,
1756 Var->getASTContext(), Handler);
1757 } else {
1758 GroupFix = FixItsForVariable[Var];
1759 }
1760
1761 for (auto Fix : GroupFix) {
1762 FixItsForVariable[VD.first].push_back(Fix);
1763 }
1764 }
1765 }
1766 }
1767 return FixItsForVariable;
1768}
1769
1770
1771static Strategy
1773 Strategy S;
1774 for (const VarDecl *VD : UnsafeVars) {
1775 S.set(VD, Strategy::Kind::Span);
1776 }
1777 return S;
1778}
1779
1781 UnsafeBufferUsageHandler &Handler,
1782 bool EmitSuggestions) {
1783 assert(D && D->getBody());
1784 WarningGadgetSets UnsafeOps;
1785 FixableGadgetSets FixablesForAllVars;
1786
1787 auto [FixableGadgets, WarningGadgets, Tracker] =
1788 findGadgets(D, Handler, EmitSuggestions);
1789
1790 if (!EmitSuggestions) {
1791 // Our job is very easy without suggestions. Just warn about
1792 // every problematic operation and consider it done. No need to deal
1793 // with fixable gadgets, no need to group operations by variable.
1794 for (const auto &G : WarningGadgets) {
1795 Handler.handleUnsafeOperation(G->getBaseStmt(),
1796 /*IsRelatedToDecl=*/false);
1797 }
1798
1799 // This return guarantees that most of the machine doesn't run when
1800 // suggestions aren't requested.
1801 assert(FixableGadgets.size() == 0 &&
1802 "Fixable gadgets found but suggestions not requested!");
1803 return;
1804 }
1805
1806 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets));
1807 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets));
1808
1809 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
1810 DefMapTy VariableGroupsMap{};
1811
1812 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
1813 for (auto it = FixablesForAllVars.byVar.cbegin();
1814 it != FixablesForAllVars.byVar.cend();) {
1815 // FIXME: Support ParmVarDecl as well.
1816 if (!it->first->isLocalVarDecl() || Tracker.hasUnclaimedUses(it->first)) {
1817 it = FixablesForAllVars.byVar.erase(it);
1818 } else {
1819 ++it;
1820 }
1821 }
1822
1824 for (const auto &[VD, ignore] : FixablesForAllVars.byVar)
1825 UnsafeVars.push_back(VD);
1826
1827 // Fixpoint iteration for pointer assignments
1828 using DepMapTy = DenseMap<const VarDecl *, std::set<const VarDecl *>>;
1829 DepMapTy DependenciesMap{};
1830 DepMapTy PtrAssignmentGraph{};
1831
1832 for (auto it : FixablesForAllVars.byVar) {
1833 for (const FixableGadget *fixable : it.second) {
1834 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
1835 fixable->getStrategyImplications();
1836 if (ImplPair) {
1837 std::pair<const VarDecl *, const VarDecl *> Impl = ImplPair.value();
1838 PtrAssignmentGraph[Impl.first].insert(Impl.second);
1839 }
1840 }
1841 }
1842
1843 /*
1844 The following code does a BFS traversal of the `PtrAssignmentGraph`
1845 considering all unsafe vars as starting nodes and constructs an undirected
1846 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
1847 elimiates all variables that are unreachable from any unsafe var. In other
1848 words, this removes all dependencies that don't include any unsafe variable
1849 and consequently don't need any fixit generation.
1850 Note: A careful reader would observe that the code traverses
1851 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
1852 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
1853 achieve the same result but the one used here dramatically cuts the
1854 amount of hoops the second part of the algorithm needs to jump, given that
1855 a lot of these connections become "direct". The reader is advised not to
1856 imagine how the graph is transformed because of using `Var` instead of
1857 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
1858 and think about why it's equivalent later.
1859 */
1860 std::set<const VarDecl *> VisitedVarsDirected{};
1861 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
1862 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) {
1863
1864 std::queue<const VarDecl*> QueueDirected{};
1865 QueueDirected.push(Var);
1866 while(!QueueDirected.empty()) {
1867 const VarDecl* CurrentVar = QueueDirected.front();
1868 QueueDirected.pop();
1869 VisitedVarsDirected.insert(CurrentVar);
1870 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
1871 for (const VarDecl *Adj : AdjacentNodes) {
1872 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) {
1873 QueueDirected.push(Adj);
1874 }
1875 DependenciesMap[Var].insert(Adj);
1876 DependenciesMap[Adj].insert(Var);
1877 }
1878 }
1879 }
1880 }
1881
1882 // Group Connected Components for Unsafe Vars
1883 // (Dependencies based on pointer assignments)
1884 std::set<const VarDecl *> VisitedVars{};
1885 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
1886 if (VisitedVars.find(Var) == VisitedVars.end()) {
1887 std::vector<const VarDecl *> VarGroup{};
1888
1889 std::queue<const VarDecl*> Queue{};
1890 Queue.push(Var);
1891 while(!Queue.empty()) {
1892 const VarDecl* CurrentVar = Queue.front();
1893 Queue.pop();
1894 VisitedVars.insert(CurrentVar);
1895 VarGroup.push_back(CurrentVar);
1896 auto AdjacentNodes = DependenciesMap[CurrentVar];
1897 for (const VarDecl *Adj : AdjacentNodes) {
1898 if (VisitedVars.find(Adj) == VisitedVars.end()) {
1899 Queue.push(Adj);
1900 }
1901 }
1902 }
1903 for (const VarDecl * V : VarGroup) {
1904 if (UnsafeOps.byVar.find(V) != UnsafeOps.byVar.end()) {
1905 VariableGroupsMap[V] = VarGroup;
1906 }
1907 }
1908 }
1909 }
1910
1911 Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars);
1912 FixItsForVariableGroup = getFixIts(FixablesForAllVars, NaiveStrategy, Tracker,
1913 D->getASTContext(), Handler, VariableGroupsMap);
1914
1915 // FIXME Detect overlapping FixIts.
1916
1917 for (const auto &G : UnsafeOps.noVar) {
1918 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false);
1919 }
1920
1921 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
1922 auto FixItsIt = FixItsForVariableGroup.find(VD);
1923 Handler.handleUnsafeVariableGroup(VD, VariableGroupsMap,
1924 FixItsIt != FixItsForVariableGroup.end()
1925 ? std::move(FixItsIt->second)
1926 : FixItList{});
1927 for (const auto &G : WarningGadgets) {
1928 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true);
1929 }
1930 }
1931}
#define V(N, I)
Definition: ASTContext.h:3230
BoundNodesTreeBuilder Nodes
DynTypedNode Node
#define AST_MATCHER(Type, DefineMatcher)
AST_MATCHER(Type, DefineMatcher) { ... } defines a zero parameter function named DefineMatcher() that...
#define AST_MATCHER_P(Type, DefineMatcher, ParamType, Param)
AST_MATCHER_P(Type, DefineMatcher, ParamType, Param) { ... } defines a single-parameter function name...
#define SM(sm)
Definition: Cuda.cpp:80
static Decl::Kind getKind(const Decl *D)
Definition: DeclBase.cpp:1048
unsigned Offset
Definition: Format.cpp:2794
static const Decl * getCanonicalDecl(const Decl *D)
llvm::DenseMap< Stmt *, Stmt * > MapTy
Definition: ParentMap.cpp:22
Defines the clang::Preprocessor interface.
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr)
Definition: SemaCUDA.cpp:108
static std::optional< FixItList > fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node)
static std::optional< StringRef > getExprText(const Expr *E, const SourceManager &SM, const LangOptions &LangOpts)
static FixItList fixVariableWithSpan(const VarDecl *VD, const DeclUseTracker &Tracker, const ASTContext &Ctx, UnsafeBufferUsageHandler &Handler)
static WarningGadgetSets groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations)
static FixItList fixVarDeclWithSpan(const VarDecl *D, const ASTContext &Ctx, const StringRef UserFillPlaceHolder)
#define NEXT
static std::map< const VarDecl *, FixItList > getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S, const DeclUseTracker &Tracker, const ASTContext &Ctx, UnsafeBufferUsageHandler &Handler, const DefMapTy &VarGrpMap)
static std::optional< SourceLocation > getEndCharLoc(const NodeTy *Node, const SourceManager &SM, const LangOptions &LangOpts)
#define FIXABLE_GADGET(name)
static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K, const DeclUseTracker &Tracker, const ASTContext &Ctx, UnsafeBufferUsageHandler &Handler)
static bool impossibleToFixForVar(const FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S, const VarDecl *Var)
#define WARNING_GADGET(name)
static std::optional< SourceLocation > getPastLoc(const NodeTy *Node, const SourceManager &SM, const LangOptions &LangOpts)
static std::tuple< FixableGadgetList, WarningGadgetList, DeclUseTracker > findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, bool EmitSuggestions)
Scan the function and return a list of gadgets found with provided kits.
static Strategy getNaiveStrategy(const llvm::SmallVectorImpl< const VarDecl * > &UnsafeVars)
static bool overlapWithMacro(const FixItList &FixIts)
static FixItList populateInitializerFixItWithSpan(const Expr *Init, const ASTContext &Ctx, const StringRef UserFillPlaceHolder)
static FixableGadgetSets groupFixablesByVar(FixableGadgetList &&AllFixableOperations)
static std::string getAPIntText(APInt Val)
__device__ __2f16 float bool s
DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result)
virtual const Stmt * getBaseStmt() const final
virtual std::optional< FixItList > getFixits(const Strategy &s) const final
virtual DeclUseList getClaimedVarUseSites() const final
virtual const Stmt * getBaseStmt() const override
virtual DeclUseList getClaimedVarUseSites() const override
static bool classof(const Gadget *G)
UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
virtual std::optional< FixItList > getFixits(const Strategy &S) const override
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
SourceManager & getSourceManager()
Definition: ASTContext.h:691
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2717
const LangOptions & getLangOpts() const
Definition: ASTContext.h:761
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2661
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:3819
Expr * getLHS() const
Definition: Expr.h:3868
Expr * getRHS() const
Definition: Expr.h:3870
Represents a C++11 noexcept expression (C++ [expr.unary.noexcept]).
Definition: ExprCXX.h:4072
A C++ typeid expression (C++ [expr.typeid]), which gets the type_info that corresponds to the supplie...
Definition: ExprCXX.h:845
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2817
CastExpr - Base class for type casts, including both implicit casts (ImplicitCastExpr) and explicit c...
Definition: Expr.h:3487
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1237
ValueDecl * getDecl()
Definition: Expr.h:1305
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Definition: Stmt.h:1315
bool isSingleDecl() const
isSingleDecl - This method returns true if this DeclStmt refers to a single Decl.
Definition: Stmt.h:1328
decl_range decls()
Definition: Stmt.h:1363
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:83
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:429
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition: DeclBase.h:1055
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:817
A dynamically typed AST node container.
SourceRange getSourceRange() const
For nodes which represent textual entities in the source code, return their SourceRange.
const T * get() const
Retrieve the stored node as type T.
static DynTypedNode create(const T &Node)
Creates a DynTypedNode from Node.
This represents one expression.
Definition: Expr.h:110
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3055
NullPointerConstantValueDependence
Enumeration used to describe how isNullPointerConstant() should cope with value-dependent expressions...
Definition: Expr.h:790
Annotates a diagnostic with some code that should be inserted, removed, or replaced to fix the proble...
Definition: Diagnostic.h:71
CharSourceRange RemoveRange
Code that should be replaced to correct the error.
Definition: Diagnostic.h:75
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
Definition: Diagnostic.h:134
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
Definition: Diagnostic.h:123
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
Definition: Diagnostic.h:97
Represents a C11 generic selection.
Definition: Expr.h:5686
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:82
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
Definition: Lexer.cpp:960
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
Definition: Lexer.cpp:450
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Definition: Lexer.cpp:786
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:274
A (possibly-)qualified type.
Definition: Type.h:736
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:803
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Definition: Type.h:1091
A class that does preorder or postorder depth-first traversal on the entire Clang AST and visits each...
bool TraverseStmt(Stmt *S, DataRecursionQueue *Queue=nullptr)
Recursively visit a statement or expression, by dispatching to Traverse*() based on the argument's dy...
bool TraverseDecl(Decl *D)
Recursively visit a declaration, by dispatching to Traverse*Decl() based on the argument's dynamic ty...
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:72
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:337
bool isPointerType() const
Definition: Type.h:6916
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:630
UnaryExprOrTypeTraitExpr - expression with either a type or (unevaluated) expression operand.
Definition: Expr.h:2565
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Definition: Expr.h:2181
SourceLocation getOperatorLoc() const
getOperatorLoc - Return the location of the operator.
Definition: Expr.h:2230
Expr * getSubExpr() const
Definition: Expr.h:2226
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Expr.h:2303
The interface that lets the caller handle unsafe buffer usage analysis results by overriding this cla...
virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl)=0
Invoked when an unsafe operation over raw pointers is found.
virtual void handleUnsafeVariableGroup(const VarDecl *Variable, const DefMapTy &VarGrpMap, FixItList &&Fixes)=0
Invoked when a fix is suggested against a variable.
virtual std::string getUserFillPlaceHolder(StringRef HintTextToUser="placeholder") const
Returns the text indicating that the user needs to provide input there:
QualType getType() const
Definition: Decl.h:712
Represents a variable declaration or definition.
Definition: Decl.h:913
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2203
const Expr * getInit() const
Definition: Decl.h:1325
bool isLocalVarDecl() const
Returns true for local variable declarations other than parameters.
Definition: Decl.h:1210
bool findMatch(const DynTypedNode &DynNode)
bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node)
RecursiveASTVisitor< MatchDescendantVisitor > VisitorBase
bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node)
bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node)
MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, internal::ASTMatchFinder *Finder, internal::BoundNodesTreeBuilder *Builder, internal::ASTMatchFinder::BindKind Bind, const bool ignoreUnevaluatedContext)
bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node)
bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue=nullptr)
bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node)
Called when the Match registered for it was successfully found in the AST.
virtual void run(const MatchResult &Result)=0
Called on every match by the MatchFinder.
A class to allow finding matches over the Clang AST.
void addMatcher(const DeclarationMatcher &NodeMatch, MatchCallback *Action)
Adds a matcher to execute when running over the AST.
void match(const T &Node, ASTContext &Context)
Calls the registered callbacks on all matches on the given Node.
const internal::VariadicDynCastAllOfMatcher< Decl, VarDecl > varDecl
Matches variable declarations.
const internal::VariadicDynCastAllOfMatcher< Stmt, DeclRefExpr > declRefExpr
Matches expressions that refer to declarations.
const AstTypeMatcher< EnumType > enumType
Matches enum types.
static auto isInUnspecifiedLvalueContext(internal::Matcher< Expr > innerMatcher)
const internal::VariadicOperatorMatcherFunc< 1, 1 > unless
Matches if the provided matcher does not match.
const internal::VariadicDynCastAllOfMatcher< Stmt, ImplicitCastExpr > implicitCastExpr
Matches the implicit cast nodes of Clang's AST.
const AstTypeMatcher< PointerType > pointerType
Matches pointer types, but does not match Objective-C object pointer types.
const internal::VariadicDynCastAllOfMatcher< Stmt, CallExpr > callExpr
Matches call expressions.
const internal::VariadicDynCastAllOfMatcher< Stmt, CompoundStmt > compoundStmt
Matches compound statements.
const internal::ArgumentAdaptingMatcherFunc< internal::ForEachMatcher > forEach
Matches AST nodes that have child AST nodes that match the provided matcher.
const AstTypeMatcher< ArrayType > arrayType
Matches all kinds of arrays.
const internal::VariadicDynCastAllOfMatcher< Stmt, UnaryOperator > unaryOperator
Matches unary operator expressions.
const internal::VariadicDynCastAllOfMatcher< Stmt, ArraySubscriptExpr > arraySubscriptExpr
Matches array subscript expressions.
static internal::Matcher< Stmt > isInUnspecifiedUntypedContext(internal::Matcher< Stmt > InnerMatcher)
const internal::VariadicDynCastAllOfMatcher< Stmt, BinaryOperator > binaryOperator
Matches binary operator expressions.
const internal::ArgumentAdaptingMatcherFunc< internal::HasMatcher > has
Matches AST nodes that have child AST nodes that match the provided matcher.
internal::PolymorphicMatcher< internal::ValueEqualsMatcher, void(internal::AllNodeBaseTypes), ValueT > equals(const ValueT &Value)
Matches literals that are equal to the given value of type ValueT.
Definition: ASTMatchers.h:5597
const internal::VariadicOperatorMatcherFunc< 2, std::numeric_limits< unsigned >::max()> eachOf
Matches if any of the given matchers matches.
static internal::Matcher< Stmt > isInUnspecifiedPointerContext(internal::Matcher< Stmt > InnerMatcher)
const internal::VariadicOperatorMatcherFunc< 2, std::numeric_limits< unsigned >::max()> allOf
Matches if all given matchers match.
const internal::VariadicDynCastAllOfMatcher< Decl, FunctionDecl > functionDecl
Matches function declarations.
static auto hasPointerType()
const internal::VariadicDynCastAllOfMatcher< Stmt, IntegerLiteral > integerLiteral
Matches integer literals of all sizes / encodings, e.g.
const internal::VariadicDynCastAllOfMatcher< Stmt, DeclStmt > declStmt
Matches declaration statements.
const internal::VariadicAllOfMatcher< Stmt > stmt
Matches statements.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
const internal::VariadicOperatorMatcherFunc< 2, std::numeric_limits< unsigned >::max()> anyOf
Matches if any of the given matchers matches.
const internal::VariadicFunction< internal::PolymorphicMatcher< internal::HasAnyOperatorNameMatcher, AST_POLYMORPHIC_SUPPORTED_TYPES(BinaryOperator, CXXOperatorCallExpr, CXXRewrittenBinaryOperator, UnaryOperator), std::vector< std::string > >, StringRef, internal::hasAnyOperatorNameFunc > hasAnyOperatorName
Matches operator expressions (binary or unary) that have any of the specified names.
const internal::VariadicDynCastAllOfMatcher< Stmt, CastExpr > castExpr
Matches any cast nodes of Clang's AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, IfStmt > ifStmt
Matches if statements.
bool anyConflict(const llvm::SmallVectorImpl< FixItHint > &FixIts, const SourceManager &SM)
void checkUnsafeBufferUsage(const Decl *D, UnsafeBufferUsageHandler &Handler, bool EmitSuggestions)
llvm::DenseMap< const VarDecl *, std::vector< const VarDecl * > > DefMapTy
YAML serialization mapping.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:22
bool operator()(const NodeTy *N1, const NodeTy *N2) const
std::map< const VarDecl *, std::set< const FixableGadget * > > byVar
std::map< const VarDecl *, std::set< const WarningGadget * >, CompareNode< VarDecl > > byVar
llvm::SmallVector< const WarningGadget *, 16 > noVar
Contains all information for a given match.