clang  10.0.0svn
MIGChecker.cpp
Go to the documentation of this file.
1 //== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines MIGChecker, a Mach Interface Generator calling convention
10 // checker. Namely, in MIG callback implementation the following rules apply:
11 // - When a server routine returns an error code that represents success, it
12 // must take ownership of resources passed to it (and eventually release
13 // them).
14 // - Additionally, when returning success, all out-parameters must be
15 // initialized.
16 // - When it returns any other error code, it must not take ownership,
17 // because the message and its out-of-line parameters will be destroyed
18 // by the client that called the function.
19 // For now we only check the last rule, as its violations lead to dangerous
20 // use-after-free exploits.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "clang/Analysis/AnyCall.h"
31 
32 using namespace clang;
33 using namespace ento;
34 
35 namespace {
36 class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>,
37  check::EndFunction> {
38  BugType BT{this, "Use-after-free (MIG calling convention violation)",
40 
41  // The checker knows that an out-of-line object is deallocated if it is
42  // passed as an argument to one of these functions. If this object is
43  // additionally an argument of a MIG routine, the checker keeps track of that
44  // information and issues a warning when an error is returned from the
45  // respective routine.
46  std::vector<std::pair<CallDescription, unsigned>> Deallocators = {
47 #define CALL(required_args, deallocated_arg, ...) \
48  {{{__VA_ARGS__}, required_args}, deallocated_arg}
49  // E.g., if the checker sees a C function 'vm_deallocate' that is
50  // defined on class 'IOUserClient' that has exactly 3 parameters, it knows
51  // that argument #1 (starting from 0, i.e. the second argument) is going
52  // to be consumed in the sense of the MIG consume-on-success convention.
53  CALL(3, 1, "vm_deallocate"),
54  CALL(3, 1, "mach_vm_deallocate"),
55  CALL(2, 0, "mig_deallocate"),
56  CALL(2, 1, "mach_port_deallocate"),
57  CALL(1, 0, "device_deallocate"),
58  CALL(1, 0, "iokit_remove_connect_reference"),
59  CALL(1, 0, "iokit_remove_reference"),
60  CALL(1, 0, "iokit_release_port"),
61  CALL(1, 0, "ipc_port_release"),
62  CALL(1, 0, "ipc_port_release_sonce"),
63  CALL(1, 0, "ipc_voucher_attr_control_release"),
64  CALL(1, 0, "ipc_voucher_release"),
65  CALL(1, 0, "lock_set_dereference"),
66  CALL(1, 0, "memory_object_control_deallocate"),
67  CALL(1, 0, "pset_deallocate"),
68  CALL(1, 0, "semaphore_dereference"),
69  CALL(1, 0, "space_deallocate"),
70  CALL(1, 0, "space_inspect_deallocate"),
71  CALL(1, 0, "task_deallocate"),
72  CALL(1, 0, "task_inspect_deallocate"),
73  CALL(1, 0, "task_name_deallocate"),
74  CALL(1, 0, "thread_deallocate"),
75  CALL(1, 0, "thread_inspect_deallocate"),
76  CALL(1, 0, "upl_deallocate"),
77  CALL(1, 0, "vm_map_deallocate"),
78  // E.g., if the checker sees a method 'releaseAsyncReference64()' that is
79  // defined on class 'IOUserClient' that takes exactly 1 argument, it knows
80  // that the argument is going to be consumed in the sense of the MIG
81  // consume-on-success convention.
82  CALL(1, 0, "IOUserClient", "releaseAsyncReference64"),
83  CALL(1, 0, "IOUserClient", "releaseNotificationPort"),
84 #undef CALL
85  };
86 
87  CallDescription OsRefRetain{"os_ref_retain", 1};
88 
89  void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const;
90 
91 public:
92  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
93 
94  // HACK: We're making two attempts to find the bug: checkEndFunction
95  // should normally be enough but it fails when the return value is a literal
96  // that never gets put into the Environment and ends of function with multiple
97  // returns get agglutinated across returns, preventing us from obtaining
98  // the return value. The problem is similar to https://reviews.llvm.org/D25326
99  // but now we step into it in the top-level function.
100  void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const {
101  checkReturnAux(RS, C);
102  }
103  void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const {
104  checkReturnAux(RS, C);
105  }
106 
107 };
108 } // end anonymous namespace
109 
110 // A flag that says that the programmer has called a MIG destructor
111 // for at least one parameter.
112 REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool)
113 // A set of parameters for which the check is suppressed because
114 // reference counting is being performed.
115 REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *)
116 
117 static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C,
118  bool IncludeBaseRegions = false) {
119  // TODO: We should most likely always include base regions here.
120  SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions);
121  if (!Sym)
122  return nullptr;
123 
124  // If we optimistically assume that the MIG routine never re-uses the storage
125  // that was passed to it as arguments when it invalidates it (but at most when
126  // it assigns to parameter variables directly), this procedure correctly
127  // determines if the value was loaded from the transitive closure of MIG
128  // routine arguments in the heap.
129  while (const MemRegion *MR = Sym->getOriginRegion()) {
130  const auto *VR = dyn_cast<VarRegion>(MR);
131  if (VR && VR->hasStackParametersStorage() &&
132  VR->getStackFrame()->inTopFrame())
133  return cast<ParmVarDecl>(VR->getDecl());
134 
135  const SymbolicRegion *SR = MR->getSymbolicBase();
136  if (!SR)
137  return nullptr;
138 
139  Sym = SR->getSymbol();
140  }
141 
142  return nullptr;
143 }
144 
145 static bool isInMIGCall(CheckerContext &C) {
146  const LocationContext *LC = C.getLocationContext();
147  assert(LC && "Unknown location context");
148 
149  const StackFrameContext *SFC;
150  // Find the top frame.
151  while (LC) {
152  SFC = LC->getStackFrame();
153  LC = SFC->getParent();
154  }
155 
156  const Decl *D = SFC->getDecl();
157 
158  if (Optional<AnyCall> AC = AnyCall::forDecl(D)) {
159  // Even though there's a Sema warning when the return type of an annotated
160  // function is not a kern_return_t, this warning isn't an error, so we need
161  // an extra sanity check here.
162  // FIXME: AnyCall doesn't support blocks yet, so they remain unchecked
163  // for now.
164  if (!AC->getReturnType(C.getASTContext())
165  .getCanonicalType()->isSignedIntegerType())
166  return false;
167  }
168 
169  if (D->hasAttr<MIGServerRoutineAttr>())
170  return true;
171 
172  // See if there's an annotated method in the superclass.
173  if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
174  for (const auto *OMD: MD->overridden_methods())
175  if (OMD->hasAttr<MIGServerRoutineAttr>())
176  return true;
177 
178  return false;
179 }
180 
181 void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
182  if (Call.isCalled(OsRefRetain)) {
183  // If the code is doing reference counting over the parameter,
184  // it opens up an opportunity for safely calling a destructor function.
185  // TODO: We should still check for over-releases.
186  if (const ParmVarDecl *PVD =
187  getOriginParam(Call.getArgSVal(0), C, /*IncludeBaseRegions=*/true)) {
188  // We never need to clean up the program state because these are
189  // top-level parameters anyway, so they're always live.
190  C.addTransition(C.getState()->add<RefCountedParameters>(PVD));
191  }
192  return;
193  }
194 
195  if (!isInMIGCall(C))
196  return;
197 
198  auto I = llvm::find_if(Deallocators,
199  [&](const std::pair<CallDescription, unsigned> &Item) {
200  return Call.isCalled(Item.first);
201  });
202  if (I == Deallocators.end())
203  return;
204 
205  ProgramStateRef State = C.getState();
206  unsigned ArgIdx = I->second;
207  SVal Arg = Call.getArgSVal(ArgIdx);
208  const ParmVarDecl *PVD = getOriginParam(Arg, C);
209  if (!PVD || State->contains<RefCountedParameters>(PVD))
210  return;
211 
212  const NoteTag *T = C.getNoteTag([this, PVD](BugReport &BR) -> std::string {
213  if (&BR.getBugType() != &BT)
214  return "";
215  SmallString<64> Str;
216  llvm::raw_svector_ostream OS(Str);
217  OS << "Value passed through parameter '" << PVD->getName()
218  << "\' is deallocated";
219  return OS.str();
220  });
221  C.addTransition(State->set<ReleasedParameter>(true), T);
222 }
223 
224 // Returns true if V can potentially represent a "successful" kern_return_t.
225 static bool mayBeSuccess(SVal V, CheckerContext &C) {
226  ProgramStateRef State = C.getState();
227 
228  // Can V represent KERN_SUCCESS?
229  if (!State->isNull(V).isConstrainedFalse())
230  return true;
231 
232  SValBuilder &SVB = C.getSValBuilder();
233  ASTContext &ACtx = C.getASTContext();
234 
235  // Can V represent MIG_NO_REPLY?
236  static const int MigNoReply = -305;
237  V = SVB.evalEQ(C.getState(), V, SVB.makeIntVal(MigNoReply, ACtx.IntTy));
238  if (!State->isNull(V).isConstrainedTrue())
239  return true;
240 
241  // If none of the above, it's definitely an error.
242  return false;
243 }
244 
245 void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const {
246  // It is very unlikely that a MIG callback will be called from anywhere
247  // within the project under analysis and the caller isn't itself a routine
248  // that follows the MIG calling convention. Therefore we're safe to believe
249  // that it's always the top frame that is of interest. There's a slight chance
250  // that the user would want to enforce the MIG calling convention upon
251  // a random routine in the middle of nowhere, but given that the convention is
252  // fairly weird and hard to follow in the first place, there's relatively
253  // little motivation to spread it this way.
254  if (!C.inTopFrame())
255  return;
256 
257  if (!isInMIGCall(C))
258  return;
259 
260  // We know that the function is non-void, but what if the return statement
261  // is not there in the code? It's not a compile error, we should not crash.
262  if (!RS)
263  return;
264 
265  ProgramStateRef State = C.getState();
266  if (!State->get<ReleasedParameter>())
267  return;
268 
269  SVal V = C.getSVal(RS);
270  if (mayBeSuccess(V, C))
271  return;
272 
273  ExplodedNode *N = C.generateErrorNode();
274  if (!N)
275  return;
276 
277  auto R = std::make_unique<PathSensitiveBugReport>(
278  BT,
279  "MIG callback fails with error after deallocating argument value. "
280  "This is a use-after-free vulnerability because the caller will try to "
281  "deallocate it again",
282  N);
283 
284  R->addRange(RS->getSourceRange());
285  bugreporter::trackExpressionValue(N, RS->getRetValue(), *R,
286  bugreporter::TrackingKind::Thorough, false);
287  C.emitReport(std::move(R));
288 }
289 
290 void ento::registerMIGChecker(CheckerManager &Mgr) {
291  Mgr.registerChecker<MIGChecker>();
292 }
293 
294 bool ento::shouldRegisterMIGChecker(const LangOptions &LO) {
295  return true;
296 }
static const ParmVarDecl * getOriginParam(SVal V, CheckerContext &C, bool IncludeBaseRegions=false)
Definition: MIGChecker.cpp:117
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:88
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
Represents a parameter to a function.
Definition: Decl.h:1600
const SymExpr * SymbolRef
Definition: SymExpr.h:110
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:160
LineState State
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
This class represents a description of a function call using the number of arguments and the name of ...
Definition: CallEvent.h:1058
const LocationContext * getParent() const
bool hasAttr() const
Definition: DeclBase.h:542
#define V(N, I)
Definition: ASTContext.h:2921
ReturnStmt - This represents a return, optionally of an expression: return; return 4;...
Definition: Stmt.h:2620
#define false
Definition: stdbool.h:17
#define REGISTER_TRAIT_WITH_PROGRAMSTATE(Name, Type)
Declares a program state trait for type Type called Name, and introduce a type named NameTy...
Expr * getRetValue()
Definition: Stmt.h:2653
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
static bool mayBeSuccess(SVal V, CheckerContext &C)
Definition: MIGChecker.cpp:225
const Decl * getDecl() const
Indicates that the tracking object is a descendant of a referenced-counted OSObject, used in the Darwin kernel.
const StackFrameContext * getStackFrame() const
#define CALL(required_args, deallocated_arg,...)
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:262
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:275
CanQualType IntTy
Definition: ASTContext.h:1024
static bool isInMIGCall(CheckerContext &C)
Definition: MIGChecker.cpp:145