clang  11.0.0git
MIGChecker.cpp
Go to the documentation of this file.
1 //== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines MIGChecker, a Mach Interface Generator calling convention
10 // checker. Namely, in MIG callback implementation the following rules apply:
11 // - When a server routine returns an error code that represents success, it
12 // must take ownership of resources passed to it (and eventually release
13 // them).
14 // - Additionally, when returning success, all out-parameters must be
15 // initialized.
16 // - When it returns any other error code, it must not take ownership,
17 // because the message and its out-of-line parameters will be destroyed
18 // by the client that called the function.
19 // For now we only check the last rule, as its violations lead to dangerous
20 // use-after-free exploits.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "clang/AST/Attr.h"
25 #include "clang/Analysis/AnyCall.h"
32 
33 using namespace clang;
34 using namespace ento;
35 
36 namespace {
37 class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>,
38  check::EndFunction> {
39  BugType BT{this, "Use-after-free (MIG calling convention violation)",
41 
42  // The checker knows that an out-of-line object is deallocated if it is
43  // passed as an argument to one of these functions. If this object is
44  // additionally an argument of a MIG routine, the checker keeps track of that
45  // information and issues a warning when an error is returned from the
46  // respective routine.
47  std::vector<std::pair<CallDescription, unsigned>> Deallocators = {
48 #define CALL(required_args, deallocated_arg, ...) \
49  {{{__VA_ARGS__}, required_args}, deallocated_arg}
50  // E.g., if the checker sees a C function 'vm_deallocate' that is
51  // defined on class 'IOUserClient' that has exactly 3 parameters, it knows
52  // that argument #1 (starting from 0, i.e. the second argument) is going
53  // to be consumed in the sense of the MIG consume-on-success convention.
54  CALL(3, 1, "vm_deallocate"),
55  CALL(3, 1, "mach_vm_deallocate"),
56  CALL(2, 0, "mig_deallocate"),
57  CALL(2, 1, "mach_port_deallocate"),
58  CALL(1, 0, "device_deallocate"),
59  CALL(1, 0, "iokit_remove_connect_reference"),
60  CALL(1, 0, "iokit_remove_reference"),
61  CALL(1, 0, "iokit_release_port"),
62  CALL(1, 0, "ipc_port_release"),
63  CALL(1, 0, "ipc_port_release_sonce"),
64  CALL(1, 0, "ipc_voucher_attr_control_release"),
65  CALL(1, 0, "ipc_voucher_release"),
66  CALL(1, 0, "lock_set_dereference"),
67  CALL(1, 0, "memory_object_control_deallocate"),
68  CALL(1, 0, "pset_deallocate"),
69  CALL(1, 0, "semaphore_dereference"),
70  CALL(1, 0, "space_deallocate"),
71  CALL(1, 0, "space_inspect_deallocate"),
72  CALL(1, 0, "task_deallocate"),
73  CALL(1, 0, "task_inspect_deallocate"),
74  CALL(1, 0, "task_name_deallocate"),
75  CALL(1, 0, "thread_deallocate"),
76  CALL(1, 0, "thread_inspect_deallocate"),
77  CALL(1, 0, "upl_deallocate"),
78  CALL(1, 0, "vm_map_deallocate"),
79  // E.g., if the checker sees a method 'releaseAsyncReference64()' that is
80  // defined on class 'IOUserClient' that takes exactly 1 argument, it knows
81  // that the argument is going to be consumed in the sense of the MIG
82  // consume-on-success convention.
83  CALL(1, 0, "IOUserClient", "releaseAsyncReference64"),
84  CALL(1, 0, "IOUserClient", "releaseNotificationPort"),
85 #undef CALL
86  };
87 
88  CallDescription OsRefRetain{"os_ref_retain", 1};
89 
90  void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const;
91 
92 public:
93  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
94 
95  // HACK: We're making two attempts to find the bug: checkEndFunction
96  // should normally be enough but it fails when the return value is a literal
97  // that never gets put into the Environment and ends of function with multiple
98  // returns get agglutinated across returns, preventing us from obtaining
99  // the return value. The problem is similar to https://reviews.llvm.org/D25326
100  // but now we step into it in the top-level function.
101  void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const {
102  checkReturnAux(RS, C);
103  }
104  void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const {
105  checkReturnAux(RS, C);
106  }
107 
108 };
109 } // end anonymous namespace
110 
111 // A flag that says that the programmer has called a MIG destructor
112 // for at least one parameter.
113 REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool)
114 // A set of parameters for which the check is suppressed because
115 // reference counting is being performed.
116 REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *)
117 
118 static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C,
119  bool IncludeBaseRegions = false) {
120  // TODO: We should most likely always include base regions here.
121  SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions);
122  if (!Sym)
123  return nullptr;
124 
125  // If we optimistically assume that the MIG routine never re-uses the storage
126  // that was passed to it as arguments when it invalidates it (but at most when
127  // it assigns to parameter variables directly), this procedure correctly
128  // determines if the value was loaded from the transitive closure of MIG
129  // routine arguments in the heap.
130  while (const MemRegion *MR = Sym->getOriginRegion()) {
131  const auto *VR = dyn_cast<VarRegion>(MR);
132  if (VR && VR->hasStackParametersStorage() &&
133  VR->getStackFrame()->inTopFrame())
134  return cast<ParmVarDecl>(VR->getDecl());
135 
136  const SymbolicRegion *SR = MR->getSymbolicBase();
137  if (!SR)
138  return nullptr;
139 
140  Sym = SR->getSymbol();
141  }
142 
143  return nullptr;
144 }
145 
146 static bool isInMIGCall(CheckerContext &C) {
147  const LocationContext *LC = C.getLocationContext();
148  assert(LC && "Unknown location context");
149 
150  const StackFrameContext *SFC;
151  // Find the top frame.
152  while (LC) {
153  SFC = LC->getStackFrame();
154  LC = SFC->getParent();
155  }
156 
157  const Decl *D = SFC->getDecl();
158 
159  if (Optional<AnyCall> AC = AnyCall::forDecl(D)) {
160  // Even though there's a Sema warning when the return type of an annotated
161  // function is not a kern_return_t, this warning isn't an error, so we need
162  // an extra sanity check here.
163  // FIXME: AnyCall doesn't support blocks yet, so they remain unchecked
164  // for now.
165  if (!AC->getReturnType(C.getASTContext())
166  .getCanonicalType()->isSignedIntegerType())
167  return false;
168  }
169 
170  if (D->hasAttr<MIGServerRoutineAttr>())
171  return true;
172 
173  // See if there's an annotated method in the superclass.
174  if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
175  for (const auto *OMD: MD->overridden_methods())
176  if (OMD->hasAttr<MIGServerRoutineAttr>())
177  return true;
178 
179  return false;
180 }
181 
182 void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
183  if (Call.isCalled(OsRefRetain)) {
184  // If the code is doing reference counting over the parameter,
185  // it opens up an opportunity for safely calling a destructor function.
186  // TODO: We should still check for over-releases.
187  if (const ParmVarDecl *PVD =
188  getOriginParam(Call.getArgSVal(0), C, /*IncludeBaseRegions=*/true)) {
189  // We never need to clean up the program state because these are
190  // top-level parameters anyway, so they're always live.
191  C.addTransition(C.getState()->add<RefCountedParameters>(PVD));
192  }
193  return;
194  }
195 
196  if (!isInMIGCall(C))
197  return;
198 
199  auto I = llvm::find_if(Deallocators,
200  [&](const std::pair<CallDescription, unsigned> &Item) {
201  return Call.isCalled(Item.first);
202  });
203  if (I == Deallocators.end())
204  return;
205 
206  ProgramStateRef State = C.getState();
207  unsigned ArgIdx = I->second;
208  SVal Arg = Call.getArgSVal(ArgIdx);
209  const ParmVarDecl *PVD = getOriginParam(Arg, C);
210  if (!PVD || State->contains<RefCountedParameters>(PVD))
211  return;
212 
213  const NoteTag *T =
214  C.getNoteTag([this, PVD](PathSensitiveBugReport &BR) -> std::string {
215  if (&BR.getBugType() != &BT)
216  return "";
217  SmallString<64> Str;
218  llvm::raw_svector_ostream OS(Str);
219  OS << "Value passed through parameter '" << PVD->getName()
220  << "\' is deallocated";
221  return std::string(OS.str());
222  });
223  C.addTransition(State->set<ReleasedParameter>(true), T);
224 }
225 
226 // Returns true if V can potentially represent a "successful" kern_return_t.
227 static bool mayBeSuccess(SVal V, CheckerContext &C) {
228  ProgramStateRef State = C.getState();
229 
230  // Can V represent KERN_SUCCESS?
231  if (!State->isNull(V).isConstrainedFalse())
232  return true;
233 
234  SValBuilder &SVB = C.getSValBuilder();
235  ASTContext &ACtx = C.getASTContext();
236 
237  // Can V represent MIG_NO_REPLY?
238  static const int MigNoReply = -305;
239  V = SVB.evalEQ(C.getState(), V, SVB.makeIntVal(MigNoReply, ACtx.IntTy));
240  if (!State->isNull(V).isConstrainedTrue())
241  return true;
242 
243  // If none of the above, it's definitely an error.
244  return false;
245 }
246 
247 void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const {
248  // It is very unlikely that a MIG callback will be called from anywhere
249  // within the project under analysis and the caller isn't itself a routine
250  // that follows the MIG calling convention. Therefore we're safe to believe
251  // that it's always the top frame that is of interest. There's a slight chance
252  // that the user would want to enforce the MIG calling convention upon
253  // a random routine in the middle of nowhere, but given that the convention is
254  // fairly weird and hard to follow in the first place, there's relatively
255  // little motivation to spread it this way.
256  if (!C.inTopFrame())
257  return;
258 
259  if (!isInMIGCall(C))
260  return;
261 
262  // We know that the function is non-void, but what if the return statement
263  // is not there in the code? It's not a compile error, we should not crash.
264  if (!RS)
265  return;
266 
267  ProgramStateRef State = C.getState();
268  if (!State->get<ReleasedParameter>())
269  return;
270 
271  SVal V = C.getSVal(RS);
272  if (mayBeSuccess(V, C))
273  return;
274 
275  ExplodedNode *N = C.generateErrorNode();
276  if (!N)
277  return;
278 
279  auto R = std::make_unique<PathSensitiveBugReport>(
280  BT,
281  "MIG callback fails with error after deallocating argument value. "
282  "This is a use-after-free vulnerability because the caller will try to "
283  "deallocate it again",
284  N);
285 
286  R->addRange(RS->getSourceRange());
287  bugreporter::trackExpressionValue(N, RS->getRetValue(), *R,
288  bugreporter::TrackingKind::Thorough, false);
289  C.emitReport(std::move(R));
290 }
291 
292 void ento::registerMIGChecker(CheckerManager &Mgr) {
293  Mgr.registerChecker<MIGChecker>();
294 }
295 
296 bool ento::shouldRegisterMIGChecker(const CheckerManager &mgr) {
297  return true;
298 }
static const ParmVarDecl * getOriginParam(SVal V, CheckerContext &C, bool IncludeBaseRegions=false)
Definition: MIGChecker.cpp:118
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:89
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Represents a parameter to a function.
Definition: Decl.h:1595
const SymExpr * SymbolRef
Definition: SymExpr.h:110
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:174
LineState State
This class represents a description of a function call using the number of arguments and the name of ...
Definition: CallEvent.h:1252
const LocationContext * getParent() const
bool hasAttr() const
Definition: DeclBase.h:547
#define V(N, I)
Definition: ASTContext.h:2899
ReturnStmt - This represents a return, optionally of an expression: return; return 4;...
Definition: Stmt.h:2677
#define false
Definition: stdbool.h:17
#define REGISTER_TRAIT_WITH_PROGRAMSTATE(Name, Type)
Declares a program state trait for type Type called Name, and introduce a type named NameTy...
Expr * getRetValue()
Definition: Stmt.h:2710
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
static bool mayBeSuccess(SVal V, CheckerContext &C)
Definition: MIGChecker.cpp:227
const Decl * getDecl() const
Indicates that the tracking object is a descendant of a referenced-counted OSObject, used in the Darwin kernel.
const StackFrameContext * getStackFrame() const
It represents a stack frame of the call stack (based on CallEvent).
#define CALL(required_args, deallocated_arg,...)
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:263
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:250
CanQualType IntTy
Definition: ASTContext.h:951
static bool isInMIGCall(CheckerContext &C)
Definition: MIGChecker.cpp:146