clang  15.0.0git
MIGChecker.cpp
Go to the documentation of this file.
1 //== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines MIGChecker, a Mach Interface Generator calling convention
10 // checker. Namely, in MIG callback implementation the following rules apply:
11 // - When a server routine returns an error code that represents success, it
12 // must take ownership of resources passed to it (and eventually release
13 // them).
14 // - Additionally, when returning success, all out-parameters must be
15 // initialized.
16 // - When it returns any other error code, it must not take ownership,
17 // because the message and its out-of-line parameters will be destroyed
18 // by the client that called the function.
19 // For now we only check the last rule, as its violations lead to dangerous
20 // use-after-free exploits.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "clang/AST/Attr.h"
25 #include "clang/Analysis/AnyCall.h"
33 
34 using namespace clang;
35 using namespace ento;
36 
37 namespace {
38 class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>,
39  check::EndFunction> {
40  BugType BT{this, "Use-after-free (MIG calling convention violation)",
42 
43  // The checker knows that an out-of-line object is deallocated if it is
44  // passed as an argument to one of these functions. If this object is
45  // additionally an argument of a MIG routine, the checker keeps track of that
46  // information and issues a warning when an error is returned from the
47  // respective routine.
48  std::vector<std::pair<CallDescription, unsigned>> Deallocators = {
49 #define CALL(required_args, deallocated_arg, ...) \
50  {{{__VA_ARGS__}, required_args}, deallocated_arg}
51  // E.g., if the checker sees a C function 'vm_deallocate' that is
52  // defined on class 'IOUserClient' that has exactly 3 parameters, it knows
53  // that argument #1 (starting from 0, i.e. the second argument) is going
54  // to be consumed in the sense of the MIG consume-on-success convention.
55  CALL(3, 1, "vm_deallocate"),
56  CALL(3, 1, "mach_vm_deallocate"),
57  CALL(2, 0, "mig_deallocate"),
58  CALL(2, 1, "mach_port_deallocate"),
59  CALL(1, 0, "device_deallocate"),
60  CALL(1, 0, "iokit_remove_connect_reference"),
61  CALL(1, 0, "iokit_remove_reference"),
62  CALL(1, 0, "iokit_release_port"),
63  CALL(1, 0, "ipc_port_release"),
64  CALL(1, 0, "ipc_port_release_sonce"),
65  CALL(1, 0, "ipc_voucher_attr_control_release"),
66  CALL(1, 0, "ipc_voucher_release"),
67  CALL(1, 0, "lock_set_dereference"),
68  CALL(1, 0, "memory_object_control_deallocate"),
69  CALL(1, 0, "pset_deallocate"),
70  CALL(1, 0, "semaphore_dereference"),
71  CALL(1, 0, "space_deallocate"),
72  CALL(1, 0, "space_inspect_deallocate"),
73  CALL(1, 0, "task_deallocate"),
74  CALL(1, 0, "task_inspect_deallocate"),
75  CALL(1, 0, "task_name_deallocate"),
76  CALL(1, 0, "thread_deallocate"),
77  CALL(1, 0, "thread_inspect_deallocate"),
78  CALL(1, 0, "upl_deallocate"),
79  CALL(1, 0, "vm_map_deallocate"),
80  // E.g., if the checker sees a method 'releaseAsyncReference64()' that is
81  // defined on class 'IOUserClient' that takes exactly 1 argument, it knows
82  // that the argument is going to be consumed in the sense of the MIG
83  // consume-on-success convention.
84  CALL(1, 0, "IOUserClient", "releaseAsyncReference64"),
85  CALL(1, 0, "IOUserClient", "releaseNotificationPort"),
86 #undef CALL
87  };
88 
89  CallDescription OsRefRetain{"os_ref_retain", 1};
90 
91  void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const;
92 
93 public:
94  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
95 
96  // HACK: We're making two attempts to find the bug: checkEndFunction
97  // should normally be enough but it fails when the return value is a literal
98  // that never gets put into the Environment and ends of function with multiple
99  // returns get agglutinated across returns, preventing us from obtaining
100  // the return value. The problem is similar to https://reviews.llvm.org/D25326
101  // but now we step into it in the top-level function.
102  void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const {
103  checkReturnAux(RS, C);
104  }
105  void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const {
106  checkReturnAux(RS, C);
107  }
108 
109 };
110 } // end anonymous namespace
111 
112 // A flag that says that the programmer has called a MIG destructor
113 // for at least one parameter.
114 REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool)
115 // A set of parameters for which the check is suppressed because
116 // reference counting is being performed.
117 REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *)
118 
119 static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C,
120  bool IncludeBaseRegions = false) {
121  // TODO: We should most likely always include base regions here.
122  SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions);
123  if (!Sym)
124  return nullptr;
125 
126  // If we optimistically assume that the MIG routine never re-uses the storage
127  // that was passed to it as arguments when it invalidates it (but at most when
128  // it assigns to parameter variables directly), this procedure correctly
129  // determines if the value was loaded from the transitive closure of MIG
130  // routine arguments in the heap.
131  while (const MemRegion *MR = Sym->getOriginRegion()) {
132  const auto *VR = dyn_cast<VarRegion>(MR);
133  if (VR && VR->hasStackParametersStorage() &&
134  VR->getStackFrame()->inTopFrame())
135  return cast<ParmVarDecl>(VR->getDecl());
136 
137  const SymbolicRegion *SR = MR->getSymbolicBase();
138  if (!SR)
139  return nullptr;
140 
141  Sym = SR->getSymbol();
142  }
143 
144  return nullptr;
145 }
146 
147 static bool isInMIGCall(CheckerContext &C) {
148  const LocationContext *LC = C.getLocationContext();
149  assert(LC && "Unknown location context");
150 
151  const StackFrameContext *SFC;
152  // Find the top frame.
153  while (LC) {
154  SFC = LC->getStackFrame();
155  LC = SFC->getParent();
156  }
157 
158  const Decl *D = SFC->getDecl();
159 
160  if (Optional<AnyCall> AC = AnyCall::forDecl(D)) {
161  // Even though there's a Sema warning when the return type of an annotated
162  // function is not a kern_return_t, this warning isn't an error, so we need
163  // an extra check here.
164  // FIXME: AnyCall doesn't support blocks yet, so they remain unchecked
165  // for now.
166  if (!AC->getReturnType(C.getASTContext())
167  .getCanonicalType()->isSignedIntegerType())
168  return false;
169  }
170 
171  if (D->hasAttr<MIGServerRoutineAttr>())
172  return true;
173 
174  // See if there's an annotated method in the superclass.
175  if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
176  for (const auto *OMD: MD->overridden_methods())
177  if (OMD->hasAttr<MIGServerRoutineAttr>())
178  return true;
179 
180  return false;
181 }
182 
183 void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
184  if (OsRefRetain.matches(Call)) {
185  // If the code is doing reference counting over the parameter,
186  // it opens up an opportunity for safely calling a destructor function.
187  // TODO: We should still check for over-releases.
188  if (const ParmVarDecl *PVD =
189  getOriginParam(Call.getArgSVal(0), C, /*IncludeBaseRegions=*/true)) {
190  // We never need to clean up the program state because these are
191  // top-level parameters anyway, so they're always live.
192  C.addTransition(C.getState()->add<RefCountedParameters>(PVD));
193  }
194  return;
195  }
196 
197  if (!isInMIGCall(C))
198  return;
199 
200  auto I = llvm::find_if(Deallocators,
201  [&](const std::pair<CallDescription, unsigned> &Item) {
202  return Item.first.matches(Call);
203  });
204  if (I == Deallocators.end())
205  return;
206 
207  ProgramStateRef State = C.getState();
208  unsigned ArgIdx = I->second;
209  SVal Arg = Call.getArgSVal(ArgIdx);
210  const ParmVarDecl *PVD = getOriginParam(Arg, C);
211  if (!PVD || State->contains<RefCountedParameters>(PVD))
212  return;
213 
214  const NoteTag *T =
215  C.getNoteTag([this, PVD](PathSensitiveBugReport &BR) -> std::string {
216  if (&BR.getBugType() != &BT)
217  return "";
218  SmallString<64> Str;
219  llvm::raw_svector_ostream OS(Str);
220  OS << "Value passed through parameter '" << PVD->getName()
221  << "\' is deallocated";
222  return std::string(OS.str());
223  });
224  C.addTransition(State->set<ReleasedParameter>(true), T);
225 }
226 
227 // Returns true if V can potentially represent a "successful" kern_return_t.
228 static bool mayBeSuccess(SVal V, CheckerContext &C) {
229  ProgramStateRef State = C.getState();
230 
231  // Can V represent KERN_SUCCESS?
232  if (!State->isNull(V).isConstrainedFalse())
233  return true;
234 
235  SValBuilder &SVB = C.getSValBuilder();
236  ASTContext &ACtx = C.getASTContext();
237 
238  // Can V represent MIG_NO_REPLY?
239  static const int MigNoReply = -305;
240  V = SVB.evalEQ(C.getState(), V, SVB.makeIntVal(MigNoReply, ACtx.IntTy));
241  if (!State->isNull(V).isConstrainedTrue())
242  return true;
243 
244  // If none of the above, it's definitely an error.
245  return false;
246 }
247 
248 void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const {
249  // It is very unlikely that a MIG callback will be called from anywhere
250  // within the project under analysis and the caller isn't itself a routine
251  // that follows the MIG calling convention. Therefore we're safe to believe
252  // that it's always the top frame that is of interest. There's a slight chance
253  // that the user would want to enforce the MIG calling convention upon
254  // a random routine in the middle of nowhere, but given that the convention is
255  // fairly weird and hard to follow in the first place, there's relatively
256  // little motivation to spread it this way.
257  if (!C.inTopFrame())
258  return;
259 
260  if (!isInMIGCall(C))
261  return;
262 
263  // We know that the function is non-void, but what if the return statement
264  // is not there in the code? It's not a compile error, we should not crash.
265  if (!RS)
266  return;
267 
268  ProgramStateRef State = C.getState();
269  if (!State->get<ReleasedParameter>())
270  return;
271 
272  SVal V = C.getSVal(RS);
273  if (mayBeSuccess(V, C))
274  return;
275 
276  ExplodedNode *N = C.generateErrorNode();
277  if (!N)
278  return;
279 
280  auto R = std::make_unique<PathSensitiveBugReport>(
281  BT,
282  "MIG callback fails with error after deallocating argument value. "
283  "This is a use-after-free vulnerability because the caller will try to "
284  "deallocate it again",
285  N);
286 
287  R->addRange(RS->getSourceRange());
289  N, RS->getRetValue(), *R,
290  {bugreporter::TrackingKind::Thorough, /*EnableNullFPSuppression=*/false});
291  C.emitReport(std::move(R));
292 }
293 
294 void ento::registerMIGChecker(CheckerManager &Mgr) {
295  Mgr.registerChecker<MIGChecker>();
296 }
297 
298 bool ento::shouldRegisterMIGChecker(const CheckerManager &mgr) {
299  return true;
300 }
clang::ReturnStmt::getRetValue
Expr * getRetValue()
Definition: Stmt.h:2797
clang::LocationContext
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Definition: AnalysisDeclContext.h:215
CallDescription.h
clang::LocationContext::getStackFrame
const StackFrameContext * getStackFrame() const
Definition: AnalysisDeclContext.cpp:463
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::Decl::hasAttr
bool hasAttr() const
Definition: DeclBase.h:549
clang::Stmt::getSourceRange
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:324
clang::ento::ProgramStateRef
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
Definition: ProgramState_Fwd.h:37
Attr.h
AttributeLangSupport::C
@ C
Definition: SemaDeclAttr.cpp:55
clang::AnyCall::forDecl
static Optional< AnyCall > forDecl(const Decl *D)
If D is a callable (Objective-C method or a function), return a constructed AnyCall object.
Definition: AnyCall.h:133
clang::ParmVarDecl
Represents a parameter to a function.
Definition: Decl.h:1680
clang::ento::SymbolRef
const SymExpr * SymbolRef
Definition: SymExpr.h:111
llvm::Optional
Definition: LLVM.h:40
clang::StackFrameContext
It represents a stack frame of the call stack (based on CallEvent).
Definition: AnalysisDeclContext.h:299
clang::index::SymbolRole::Call
@ Call
REGISTER_TRAIT_WITH_PROGRAMSTATE
#define REGISTER_TRAIT_WITH_PROGRAMSTATE(Name, Type)
Declares a program state trait for type Type called Name, and introduce a type named NameTy.
Definition: ProgramStateTrait.h:34
CallEvent.h
AnyCall.h
CALL
#define CALL(required_args, deallocated_arg,...)
V
#define V(N, I)
Definition: ASTContext.h:3176
mayBeSuccess
static bool mayBeSuccess(SVal V, CheckerContext &C)
Definition: MIGChecker.cpp:228
BuiltinCheckerRegistration.h
CheckerManager.h
clang::ASTContext
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:208
llvm::SmallString
Definition: LLVM.h:37
REGISTER_SET_WITH_PROGRAMSTATE
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Definition: ProgramStateTrait.h:112
isInMIGCall
static bool isInMIGCall(CheckerContext &C)
Definition: MIGChecker.cpp:147
clang::LocationContext::getParent
const LocationContext * getParent() const
It might return null.
Definition: AnalysisDeclContext.h:247
clang::ento::bugreporter::trackExpressionValue
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
clang::ento::categories::MemoryError
const char *const MemoryError
Definition: CommonBugCategories.cpp:20
false
#define false
Definition: stdbool.h:22
BugType.h
getOriginParam
static const ParmVarDecl * getOriginParam(SVal V, CheckerContext &C, bool IncludeBaseRegions=false)
Definition: MIGChecker.cpp:119
clang::ASTContext::IntTy
CanQualType IntTy
Definition: ASTContext.h:1105
clang::Decl
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:83
State
LineState State
Definition: UnwrappedLineFormatter.cpp:1126
CheckerContext.h
Checker.h
clang
Definition: CalledOnceCheck.h:17
clang::LocationContext::getDecl
const Decl * getDecl() const
Definition: AnalysisDeclContext.h:251
clang::ento::ObjKind::OS
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
clang::ReturnStmt
ReturnStmt - This represents a return, optionally of an expression: return; return 4;.
Definition: Stmt.h:2764
clang::NamedDecl::getName
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:274