clang API Documentation
00001 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This defines CStringChecker, which is an assortment of checks on calls 00011 // to functions in <string.h>. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "ClangSACheckers.h" 00016 #include "InterCheckerAPI.h" 00017 #include "clang/StaticAnalyzer/Core/Checker.h" 00018 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 00019 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 00020 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 00021 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 00022 #include "llvm/ADT/SmallString.h" 00023 #include "llvm/ADT/STLExtras.h" 00024 #include "llvm/ADT/StringSwitch.h" 00025 00026 using namespace clang; 00027 using namespace ento; 00028 00029 namespace { 00030 class CStringChecker : public Checker< eval::Call, 00031 check::PreStmt<DeclStmt>, 00032 check::LiveSymbols, 00033 check::DeadSymbols, 00034 check::RegionChanges 00035 > { 00036 mutable OwningPtr<BugType> BT_Null, 00037 BT_Bounds, 00038 BT_Overlap, 00039 BT_NotCString, 00040 BT_AdditionOverflow; 00041 00042 mutable const char *CurrentFunctionDescription; 00043 00044 public: 00045 /// The filter is used to filter out the diagnostics which are not enabled by 00046 /// the user. 00047 struct CStringChecksFilter { 00048 DefaultBool CheckCStringNullArg; 00049 DefaultBool CheckCStringOutOfBounds; 00050 DefaultBool CheckCStringBufferOverlap; 00051 DefaultBool CheckCStringNotNullTerm; 00052 }; 00053 00054 CStringChecksFilter Filter; 00055 00056 static void *getTag() { static int tag; return &tag; } 00057 00058 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 00059 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 00060 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 00061 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 00062 bool wantsRegionChangeUpdate(ProgramStateRef state) const; 00063 00064 ProgramStateRef 00065 checkRegionChanges(ProgramStateRef state, 00066 const StoreManager::InvalidatedSymbols *, 00067 ArrayRef<const MemRegion *> ExplicitRegions, 00068 ArrayRef<const MemRegion *> Regions, 00069 const CallOrObjCMessage *Call) const; 00070 00071 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 00072 const CallExpr *) const; 00073 00074 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 00075 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 00076 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 00077 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 00078 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 00079 ProgramStateRef state, 00080 const Expr *Size, 00081 const Expr *Source, 00082 const Expr *Dest, 00083 bool Restricted = false, 00084 bool IsMempcpy = false) const; 00085 00086 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 00087 00088 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 00089 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 00090 void evalstrLengthCommon(CheckerContext &C, 00091 const CallExpr *CE, 00092 bool IsStrnlen = false) const; 00093 00094 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 00095 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 00096 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 00097 void evalStrcpyCommon(CheckerContext &C, 00098 const CallExpr *CE, 00099 bool returnEnd, 00100 bool isBounded, 00101 bool isAppending) const; 00102 00103 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 00104 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 00105 00106 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 00107 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 00108 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 00109 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 00110 void evalStrcmpCommon(CheckerContext &C, 00111 const CallExpr *CE, 00112 bool isBounded = false, 00113 bool ignoreCase = false) const; 00114 00115 // Utility methods 00116 std::pair<ProgramStateRef , ProgramStateRef > 00117 static assumeZero(CheckerContext &C, 00118 ProgramStateRef state, SVal V, QualType Ty); 00119 00120 static ProgramStateRef setCStringLength(ProgramStateRef state, 00121 const MemRegion *MR, 00122 SVal strLength); 00123 static SVal getCStringLengthForRegion(CheckerContext &C, 00124 ProgramStateRef &state, 00125 const Expr *Ex, 00126 const MemRegion *MR, 00127 bool hypothetical); 00128 SVal getCStringLength(CheckerContext &C, 00129 ProgramStateRef &state, 00130 const Expr *Ex, 00131 SVal Buf, 00132 bool hypothetical = false) const; 00133 00134 const StringLiteral *getCStringLiteral(CheckerContext &C, 00135 ProgramStateRef &state, 00136 const Expr *expr, 00137 SVal val) const; 00138 00139 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 00140 ProgramStateRef state, 00141 const Expr *Ex, SVal V); 00142 00143 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 00144 const MemRegion *MR); 00145 00146 // Re-usable checks 00147 ProgramStateRef checkNonNull(CheckerContext &C, 00148 ProgramStateRef state, 00149 const Expr *S, 00150 SVal l) const; 00151 ProgramStateRef CheckLocation(CheckerContext &C, 00152 ProgramStateRef state, 00153 const Expr *S, 00154 SVal l, 00155 const char *message = NULL) const; 00156 ProgramStateRef CheckBufferAccess(CheckerContext &C, 00157 ProgramStateRef state, 00158 const Expr *Size, 00159 const Expr *FirstBuf, 00160 const Expr *SecondBuf, 00161 const char *firstMessage = NULL, 00162 const char *secondMessage = NULL, 00163 bool WarnAboutSize = false) const; 00164 00165 ProgramStateRef CheckBufferAccess(CheckerContext &C, 00166 ProgramStateRef state, 00167 const Expr *Size, 00168 const Expr *Buf, 00169 const char *message = NULL, 00170 bool WarnAboutSize = false) const { 00171 // This is a convenience override. 00172 return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL, 00173 WarnAboutSize); 00174 } 00175 ProgramStateRef CheckOverlap(CheckerContext &C, 00176 ProgramStateRef state, 00177 const Expr *Size, 00178 const Expr *First, 00179 const Expr *Second) const; 00180 void emitOverlapBug(CheckerContext &C, 00181 ProgramStateRef state, 00182 const Stmt *First, 00183 const Stmt *Second) const; 00184 00185 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 00186 ProgramStateRef state, 00187 NonLoc left, 00188 NonLoc right) const; 00189 }; 00190 00191 class CStringLength { 00192 public: 00193 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 00194 }; 00195 } //end anonymous namespace 00196 00197 namespace clang { 00198 namespace ento { 00199 template <> 00200 struct ProgramStateTrait<CStringLength> 00201 : public ProgramStatePartialTrait<CStringLength::EntryMap> { 00202 static void *GDMIndex() { return CStringChecker::getTag(); } 00203 }; 00204 } 00205 } 00206 00207 //===----------------------------------------------------------------------===// 00208 // Individual checks and utility methods. 00209 //===----------------------------------------------------------------------===// 00210 00211 std::pair<ProgramStateRef , ProgramStateRef > 00212 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 00213 QualType Ty) { 00214 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 00215 if (!val) 00216 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 00217 00218 SValBuilder &svalBuilder = C.getSValBuilder(); 00219 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 00220 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 00221 } 00222 00223 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 00224 ProgramStateRef state, 00225 const Expr *S, SVal l) const { 00226 // If a previous check has failed, propagate the failure. 00227 if (!state) 00228 return NULL; 00229 00230 ProgramStateRef stateNull, stateNonNull; 00231 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 00232 00233 if (stateNull && !stateNonNull) { 00234 if (!Filter.CheckCStringNullArg) 00235 return NULL; 00236 00237 ExplodedNode *N = C.generateSink(stateNull); 00238 if (!N) 00239 return NULL; 00240 00241 if (!BT_Null) 00242 BT_Null.reset(new BuiltinBug("Unix API", 00243 "Null pointer argument in call to byte string function")); 00244 00245 SmallString<80> buf; 00246 llvm::raw_svector_ostream os(buf); 00247 assert(CurrentFunctionDescription); 00248 os << "Null pointer argument in call to " << CurrentFunctionDescription; 00249 00250 // Generate a report for this bug. 00251 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 00252 BugReport *report = new BugReport(*BT, os.str(), N); 00253 00254 report->addRange(S->getSourceRange()); 00255 report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, S, 00256 report)); 00257 C.EmitReport(report); 00258 return NULL; 00259 } 00260 00261 // From here on, assume that the value is non-null. 00262 assert(stateNonNull); 00263 return stateNonNull; 00264 } 00265 00266 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 00267 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 00268 ProgramStateRef state, 00269 const Expr *S, SVal l, 00270 const char *warningMsg) const { 00271 // If a previous check has failed, propagate the failure. 00272 if (!state) 00273 return NULL; 00274 00275 // Check for out of bound array element access. 00276 const MemRegion *R = l.getAsRegion(); 00277 if (!R) 00278 return state; 00279 00280 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 00281 if (!ER) 00282 return state; 00283 00284 assert(ER->getValueType() == C.getASTContext().CharTy && 00285 "CheckLocation should only be called with char* ElementRegions"); 00286 00287 // Get the size of the array. 00288 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 00289 SValBuilder &svalBuilder = C.getSValBuilder(); 00290 SVal Extent = 00291 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 00292 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 00293 00294 // Get the index of the accessed element. 00295 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 00296 00297 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); 00298 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); 00299 if (StOutBound && !StInBound) { 00300 ExplodedNode *N = C.generateSink(StOutBound); 00301 if (!N) 00302 return NULL; 00303 00304 if (!BT_Bounds) { 00305 BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", 00306 "Byte string function accesses out-of-bound array element")); 00307 } 00308 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 00309 00310 // Generate a report for this bug. 00311 BugReport *report; 00312 if (warningMsg) { 00313 report = new BugReport(*BT, warningMsg, N); 00314 } else { 00315 assert(CurrentFunctionDescription); 00316 assert(CurrentFunctionDescription[0] != '\0'); 00317 00318 SmallString<80> buf; 00319 llvm::raw_svector_ostream os(buf); 00320 os << (char)toupper(CurrentFunctionDescription[0]) 00321 << &CurrentFunctionDescription[1] 00322 << " accesses out-of-bound array element"; 00323 report = new BugReport(*BT, os.str(), N); 00324 } 00325 00326 // FIXME: It would be nice to eventually make this diagnostic more clear, 00327 // e.g., by referencing the original declaration or by saying *why* this 00328 // reference is outside the range. 00329 00330 report->addRange(S->getSourceRange()); 00331 C.EmitReport(report); 00332 return NULL; 00333 } 00334 00335 // Array bound check succeeded. From this point forward the array bound 00336 // should always succeed. 00337 return StInBound; 00338 } 00339 00340 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, 00341 ProgramStateRef state, 00342 const Expr *Size, 00343 const Expr *FirstBuf, 00344 const Expr *SecondBuf, 00345 const char *firstMessage, 00346 const char *secondMessage, 00347 bool WarnAboutSize) const { 00348 // If a previous check has failed, propagate the failure. 00349 if (!state) 00350 return NULL; 00351 00352 SValBuilder &svalBuilder = C.getSValBuilder(); 00353 ASTContext &Ctx = svalBuilder.getContext(); 00354 const LocationContext *LCtx = C.getLocationContext(); 00355 00356 QualType sizeTy = Size->getType(); 00357 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 00358 00359 // Check that the first buffer is non-null. 00360 SVal BufVal = state->getSVal(FirstBuf, LCtx); 00361 state = checkNonNull(C, state, FirstBuf, BufVal); 00362 if (!state) 00363 return NULL; 00364 00365 // If out-of-bounds checking is turned off, skip the rest. 00366 if (!Filter.CheckCStringOutOfBounds) 00367 return state; 00368 00369 // Get the access length and make sure it is known. 00370 // FIXME: This assumes the caller has already checked that the access length 00371 // is positive. And that it's unsigned. 00372 SVal LengthVal = state->getSVal(Size, LCtx); 00373 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 00374 if (!Length) 00375 return state; 00376 00377 // Compute the offset of the last element to be accessed: size-1. 00378 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 00379 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 00380 *Length, One, sizeTy)); 00381 00382 // Check that the first buffer is sufficiently long. 00383 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 00384 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 00385 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); 00386 00387 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 00388 LastOffset, PtrTy); 00389 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); 00390 00391 // If the buffer isn't large enough, abort. 00392 if (!state) 00393 return NULL; 00394 } 00395 00396 // If there's a second buffer, check it as well. 00397 if (SecondBuf) { 00398 BufVal = state->getSVal(SecondBuf, LCtx); 00399 state = checkNonNull(C, state, SecondBuf, BufVal); 00400 if (!state) 00401 return NULL; 00402 00403 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 00404 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 00405 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); 00406 00407 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 00408 LastOffset, PtrTy); 00409 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); 00410 } 00411 } 00412 00413 // Large enough or not, return this state! 00414 return state; 00415 } 00416 00417 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 00418 ProgramStateRef state, 00419 const Expr *Size, 00420 const Expr *First, 00421 const Expr *Second) const { 00422 if (!Filter.CheckCStringBufferOverlap) 00423 return state; 00424 00425 // Do a simple check for overlap: if the two arguments are from the same 00426 // buffer, see if the end of the first is greater than the start of the second 00427 // or vice versa. 00428 00429 // If a previous check has failed, propagate the failure. 00430 if (!state) 00431 return NULL; 00432 00433 ProgramStateRef stateTrue, stateFalse; 00434 00435 // Get the buffer values and make sure they're known locations. 00436 const LocationContext *LCtx = C.getLocationContext(); 00437 SVal firstVal = state->getSVal(First, LCtx); 00438 SVal secondVal = state->getSVal(Second, LCtx); 00439 00440 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 00441 if (!firstLoc) 00442 return state; 00443 00444 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 00445 if (!secondLoc) 00446 return state; 00447 00448 // Are the two values the same? 00449 SValBuilder &svalBuilder = C.getSValBuilder(); 00450 llvm::tie(stateTrue, stateFalse) = 00451 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 00452 00453 if (stateTrue && !stateFalse) { 00454 // If the values are known to be equal, that's automatically an overlap. 00455 emitOverlapBug(C, stateTrue, First, Second); 00456 return NULL; 00457 } 00458 00459 // assume the two expressions are not equal. 00460 assert(stateFalse); 00461 state = stateFalse; 00462 00463 // Which value comes first? 00464 QualType cmpTy = svalBuilder.getConditionType(); 00465 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 00466 *firstLoc, *secondLoc, cmpTy); 00467 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 00468 if (!reverseTest) 00469 return state; 00470 00471 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 00472 if (stateTrue) { 00473 if (stateFalse) { 00474 // If we don't know which one comes first, we can't perform this test. 00475 return state; 00476 } else { 00477 // Switch the values so that firstVal is before secondVal. 00478 Loc *tmpLoc = firstLoc; 00479 firstLoc = secondLoc; 00480 secondLoc = tmpLoc; 00481 00482 // Switch the Exprs as well, so that they still correspond. 00483 const Expr *tmpExpr = First; 00484 First = Second; 00485 Second = tmpExpr; 00486 } 00487 } 00488 00489 // Get the length, and make sure it too is known. 00490 SVal LengthVal = state->getSVal(Size, LCtx); 00491 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 00492 if (!Length) 00493 return state; 00494 00495 // Convert the first buffer's start address to char*. 00496 // Bail out if the cast fails. 00497 ASTContext &Ctx = svalBuilder.getContext(); 00498 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 00499 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 00500 First->getType()); 00501 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 00502 if (!FirstStartLoc) 00503 return state; 00504 00505 // Compute the end of the first buffer. Bail out if THAT fails. 00506 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 00507 *FirstStartLoc, *Length, CharPtrTy); 00508 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 00509 if (!FirstEndLoc) 00510 return state; 00511 00512 // Is the end of the first buffer past the start of the second buffer? 00513 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 00514 *FirstEndLoc, *secondLoc, cmpTy); 00515 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 00516 if (!OverlapTest) 00517 return state; 00518 00519 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 00520 00521 if (stateTrue && !stateFalse) { 00522 // Overlap! 00523 emitOverlapBug(C, stateTrue, First, Second); 00524 return NULL; 00525 } 00526 00527 // assume the two expressions don't overlap. 00528 assert(stateFalse); 00529 return stateFalse; 00530 } 00531 00532 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 00533 const Stmt *First, const Stmt *Second) const { 00534 ExplodedNode *N = C.generateSink(state); 00535 if (!N) 00536 return; 00537 00538 if (!BT_Overlap) 00539 BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); 00540 00541 // Generate a report for this bug. 00542 BugReport *report = 00543 new BugReport(*BT_Overlap, 00544 "Arguments must not be overlapping buffers", N); 00545 report->addRange(First->getSourceRange()); 00546 report->addRange(Second->getSourceRange()); 00547 00548 C.EmitReport(report); 00549 } 00550 00551 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 00552 ProgramStateRef state, 00553 NonLoc left, 00554 NonLoc right) const { 00555 // If out-of-bounds checking is turned off, skip the rest. 00556 if (!Filter.CheckCStringOutOfBounds) 00557 return state; 00558 00559 // If a previous check has failed, propagate the failure. 00560 if (!state) 00561 return NULL; 00562 00563 SValBuilder &svalBuilder = C.getSValBuilder(); 00564 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 00565 00566 QualType sizeTy = svalBuilder.getContext().getSizeType(); 00567 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 00568 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 00569 00570 SVal maxMinusRight; 00571 if (isa<nonloc::ConcreteInt>(right)) { 00572 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 00573 sizeTy); 00574 } else { 00575 // Try switching the operands. (The order of these two assignments is 00576 // important!) 00577 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 00578 sizeTy); 00579 left = right; 00580 } 00581 00582 if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) { 00583 QualType cmpTy = svalBuilder.getConditionType(); 00584 // If left > max - right, we have an overflow. 00585 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 00586 *maxMinusRightNL, cmpTy); 00587 00588 ProgramStateRef stateOverflow, stateOkay; 00589 llvm::tie(stateOverflow, stateOkay) = 00590 state->assume(cast<DefinedOrUnknownSVal>(willOverflow)); 00591 00592 if (stateOverflow && !stateOkay) { 00593 // We have an overflow. Emit a bug report. 00594 ExplodedNode *N = C.generateSink(stateOverflow); 00595 if (!N) 00596 return NULL; 00597 00598 if (!BT_AdditionOverflow) 00599 BT_AdditionOverflow.reset(new BuiltinBug("API", 00600 "Sum of expressions causes overflow")); 00601 00602 // This isn't a great error message, but this should never occur in real 00603 // code anyway -- you'd have to create a buffer longer than a size_t can 00604 // represent, which is sort of a contradiction. 00605 const char *warning = 00606 "This expression will create a string whose length is too big to " 00607 "be represented as a size_t"; 00608 00609 // Generate a report for this bug. 00610 BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N); 00611 C.EmitReport(report); 00612 00613 return NULL; 00614 } 00615 00616 // From now on, assume an overflow didn't occur. 00617 assert(stateOkay); 00618 state = stateOkay; 00619 } 00620 00621 return state; 00622 } 00623 00624 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 00625 const MemRegion *MR, 00626 SVal strLength) { 00627 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 00628 00629 MR = MR->StripCasts(); 00630 00631 switch (MR->getKind()) { 00632 case MemRegion::StringRegionKind: 00633 // FIXME: This can happen if we strcpy() into a string region. This is 00634 // undefined [C99 6.4.5p6], but we should still warn about it. 00635 return state; 00636 00637 case MemRegion::SymbolicRegionKind: 00638 case MemRegion::AllocaRegionKind: 00639 case MemRegion::VarRegionKind: 00640 case MemRegion::FieldRegionKind: 00641 case MemRegion::ObjCIvarRegionKind: 00642 // These are the types we can currently track string lengths for. 00643 break; 00644 00645 case MemRegion::ElementRegionKind: 00646 // FIXME: Handle element regions by upper-bounding the parent region's 00647 // string length. 00648 return state; 00649 00650 default: 00651 // Other regions (mostly non-data) can't have a reliable C string length. 00652 // For now, just ignore the change. 00653 // FIXME: These are rare but not impossible. We should output some kind of 00654 // warning for things like strcpy((char[]){'a', 0}, "b"); 00655 return state; 00656 } 00657 00658 if (strLength.isUnknown()) 00659 return state->remove<CStringLength>(MR); 00660 00661 return state->set<CStringLength>(MR, strLength); 00662 } 00663 00664 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 00665 ProgramStateRef &state, 00666 const Expr *Ex, 00667 const MemRegion *MR, 00668 bool hypothetical) { 00669 if (!hypothetical) { 00670 // If there's a recorded length, go ahead and return it. 00671 const SVal *Recorded = state->get<CStringLength>(MR); 00672 if (Recorded) 00673 return *Recorded; 00674 } 00675 00676 // Otherwise, get a new symbol and update the state. 00677 unsigned Count = C.getCurrentBlockCount(); 00678 SValBuilder &svalBuilder = C.getSValBuilder(); 00679 QualType sizeTy = svalBuilder.getContext().getSizeType(); 00680 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 00681 MR, Ex, sizeTy, Count); 00682 00683 if (!hypothetical) 00684 state = state->set<CStringLength>(MR, strLength); 00685 00686 return strLength; 00687 } 00688 00689 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 00690 const Expr *Ex, SVal Buf, 00691 bool hypothetical) const { 00692 const MemRegion *MR = Buf.getAsRegion(); 00693 if (!MR) { 00694 // If we can't get a region, see if it's something we /know/ isn't a 00695 // C string. In the context of locations, the only time we can issue such 00696 // a warning is for labels. 00697 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 00698 if (!Filter.CheckCStringNotNullTerm) 00699 return UndefinedVal(); 00700 00701 if (ExplodedNode *N = C.addTransition(state)) { 00702 if (!BT_NotCString) 00703 BT_NotCString.reset(new BuiltinBug("Unix API", 00704 "Argument is not a null-terminated string.")); 00705 00706 SmallString<120> buf; 00707 llvm::raw_svector_ostream os(buf); 00708 assert(CurrentFunctionDescription); 00709 os << "Argument to " << CurrentFunctionDescription 00710 << " is the address of the label '" << Label->getLabel()->getName() 00711 << "', which is not a null-terminated string"; 00712 00713 // Generate a report for this bug. 00714 BugReport *report = new BugReport(*BT_NotCString, 00715 os.str(), N); 00716 00717 report->addRange(Ex->getSourceRange()); 00718 C.EmitReport(report); 00719 } 00720 return UndefinedVal(); 00721 00722 } 00723 00724 // If it's not a region and not a label, give up. 00725 return UnknownVal(); 00726 } 00727 00728 // If we have a region, strip casts from it and see if we can figure out 00729 // its length. For anything we can't figure out, just return UnknownVal. 00730 MR = MR->StripCasts(); 00731 00732 switch (MR->getKind()) { 00733 case MemRegion::StringRegionKind: { 00734 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 00735 // so we can assume that the byte length is the correct C string length. 00736 SValBuilder &svalBuilder = C.getSValBuilder(); 00737 QualType sizeTy = svalBuilder.getContext().getSizeType(); 00738 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 00739 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 00740 } 00741 case MemRegion::SymbolicRegionKind: 00742 case MemRegion::AllocaRegionKind: 00743 case MemRegion::VarRegionKind: 00744 case MemRegion::FieldRegionKind: 00745 case MemRegion::ObjCIvarRegionKind: 00746 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 00747 case MemRegion::CompoundLiteralRegionKind: 00748 // FIXME: Can we track this? Is it necessary? 00749 return UnknownVal(); 00750 case MemRegion::ElementRegionKind: 00751 // FIXME: How can we handle this? It's not good enough to subtract the 00752 // offset from the base string length; consider "123\x00567" and &a[5]. 00753 return UnknownVal(); 00754 default: 00755 // Other regions (mostly non-data) can't have a reliable C string length. 00756 // In this case, an error is emitted and UndefinedVal is returned. 00757 // The caller should always be prepared to handle this case. 00758 if (!Filter.CheckCStringNotNullTerm) 00759 return UndefinedVal(); 00760 00761 if (ExplodedNode *N = C.addTransition(state)) { 00762 if (!BT_NotCString) 00763 BT_NotCString.reset(new BuiltinBug("Unix API", 00764 "Argument is not a null-terminated string.")); 00765 00766 SmallString<120> buf; 00767 llvm::raw_svector_ostream os(buf); 00768 00769 assert(CurrentFunctionDescription); 00770 os << "Argument to " << CurrentFunctionDescription << " is "; 00771 00772 if (SummarizeRegion(os, C.getASTContext(), MR)) 00773 os << ", which is not a null-terminated string"; 00774 else 00775 os << "not a null-terminated string"; 00776 00777 // Generate a report for this bug. 00778 BugReport *report = new BugReport(*BT_NotCString, 00779 os.str(), N); 00780 00781 report->addRange(Ex->getSourceRange()); 00782 C.EmitReport(report); 00783 } 00784 00785 return UndefinedVal(); 00786 } 00787 } 00788 00789 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 00790 ProgramStateRef &state, const Expr *expr, SVal val) const { 00791 00792 // Get the memory region pointed to by the val. 00793 const MemRegion *bufRegion = val.getAsRegion(); 00794 if (!bufRegion) 00795 return NULL; 00796 00797 // Strip casts off the memory region. 00798 bufRegion = bufRegion->StripCasts(); 00799 00800 // Cast the memory region to a string region. 00801 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 00802 if (!strRegion) 00803 return NULL; 00804 00805 // Return the actual string in the string region. 00806 return strRegion->getStringLiteral(); 00807 } 00808 00809 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 00810 ProgramStateRef state, 00811 const Expr *E, SVal V) { 00812 Loc *L = dyn_cast<Loc>(&V); 00813 if (!L) 00814 return state; 00815 00816 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 00817 // some assumptions about the value that CFRefCount can't. Even so, it should 00818 // probably be refactored. 00819 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 00820 const MemRegion *R = MR->getRegion()->StripCasts(); 00821 00822 // Are we dealing with an ElementRegion? If so, we should be invalidating 00823 // the super-region. 00824 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 00825 R = ER->getSuperRegion(); 00826 // FIXME: What about layers of ElementRegions? 00827 } 00828 00829 // Invalidate this region. 00830 unsigned Count = C.getCurrentBlockCount(); 00831 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 00832 return state->invalidateRegions(R, E, Count, LCtx); 00833 } 00834 00835 // If we have a non-region value by chance, just remove the binding. 00836 // FIXME: is this necessary or correct? This handles the non-Region 00837 // cases. Is it ever valid to store to these? 00838 return state->unbindLoc(*L); 00839 } 00840 00841 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 00842 const MemRegion *MR) { 00843 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); 00844 00845 switch (MR->getKind()) { 00846 case MemRegion::FunctionTextRegionKind: { 00847 const FunctionDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); 00848 if (FD) 00849 os << "the address of the function '" << *FD << '\''; 00850 else 00851 os << "the address of a function"; 00852 return true; 00853 } 00854 case MemRegion::BlockTextRegionKind: 00855 os << "block text"; 00856 return true; 00857 case MemRegion::BlockDataRegionKind: 00858 os << "a block"; 00859 return true; 00860 case MemRegion::CXXThisRegionKind: 00861 case MemRegion::CXXTempObjectRegionKind: 00862 os << "a C++ temp object of type " << TVR->getValueType().getAsString(); 00863 return true; 00864 case MemRegion::VarRegionKind: 00865 os << "a variable of type" << TVR->getValueType().getAsString(); 00866 return true; 00867 case MemRegion::FieldRegionKind: 00868 os << "a field of type " << TVR->getValueType().getAsString(); 00869 return true; 00870 case MemRegion::ObjCIvarRegionKind: 00871 os << "an instance variable of type " << TVR->getValueType().getAsString(); 00872 return true; 00873 default: 00874 return false; 00875 } 00876 } 00877 00878 //===----------------------------------------------------------------------===// 00879 // evaluation of individual function calls. 00880 //===----------------------------------------------------------------------===// 00881 00882 void CStringChecker::evalCopyCommon(CheckerContext &C, 00883 const CallExpr *CE, 00884 ProgramStateRef state, 00885 const Expr *Size, const Expr *Dest, 00886 const Expr *Source, bool Restricted, 00887 bool IsMempcpy) const { 00888 CurrentFunctionDescription = "memory copy function"; 00889 00890 // See if the size argument is zero. 00891 const LocationContext *LCtx = C.getLocationContext(); 00892 SVal sizeVal = state->getSVal(Size, LCtx); 00893 QualType sizeTy = Size->getType(); 00894 00895 ProgramStateRef stateZeroSize, stateNonZeroSize; 00896 llvm::tie(stateZeroSize, stateNonZeroSize) = 00897 assumeZero(C, state, sizeVal, sizeTy); 00898 00899 // Get the value of the Dest. 00900 SVal destVal = state->getSVal(Dest, LCtx); 00901 00902 // If the size is zero, there won't be any actual memory access, so 00903 // just bind the return value to the destination buffer and return. 00904 if (stateZeroSize && !stateNonZeroSize) { 00905 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 00906 C.addTransition(stateZeroSize); 00907 return; 00908 } 00909 00910 // If the size can be nonzero, we have to check the other arguments. 00911 if (stateNonZeroSize) { 00912 state = stateNonZeroSize; 00913 00914 // Ensure the destination is not null. If it is NULL there will be a 00915 // NULL pointer dereference. 00916 state = checkNonNull(C, state, Dest, destVal); 00917 if (!state) 00918 return; 00919 00920 // Get the value of the Src. 00921 SVal srcVal = state->getSVal(Source, LCtx); 00922 00923 // Ensure the source is not null. If it is NULL there will be a 00924 // NULL pointer dereference. 00925 state = checkNonNull(C, state, Source, srcVal); 00926 if (!state) 00927 return; 00928 00929 // Ensure the accesses are valid and that the buffers do not overlap. 00930 const char * const writeWarning = 00931 "Memory copy function overflows destination buffer"; 00932 state = CheckBufferAccess(C, state, Size, Dest, Source, 00933 writeWarning, /* sourceWarning = */ NULL); 00934 if (Restricted) 00935 state = CheckOverlap(C, state, Size, Dest, Source); 00936 00937 if (!state) 00938 return; 00939 00940 // If this is mempcpy, get the byte after the last byte copied and 00941 // bind the expr. 00942 if (IsMempcpy) { 00943 loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal); 00944 assert(destRegVal && "Destination should be a known MemRegionVal here"); 00945 00946 // Get the length to copy. 00947 NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal); 00948 00949 if (lenValNonLoc) { 00950 // Get the byte after the last byte copied. 00951 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 00952 *destRegVal, 00953 *lenValNonLoc, 00954 Dest->getType()); 00955 00956 // The byte after the last byte copied is the return value. 00957 state = state->BindExpr(CE, LCtx, lastElement); 00958 } else { 00959 // If we don't know how much we copied, we can at least 00960 // conjure a return value for later. 00961 unsigned Count = C.getCurrentBlockCount(); 00962 SVal result = 00963 C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count); 00964 state = state->BindExpr(CE, LCtx, result); 00965 } 00966 00967 } else { 00968 // All other copies return the destination buffer. 00969 // (Well, bcopy() has a void return type, but this won't hurt.) 00970 state = state->BindExpr(CE, LCtx, destVal); 00971 } 00972 00973 // Invalidate the destination. 00974 // FIXME: Even if we can't perfectly model the copy, we should see if we 00975 // can use LazyCompoundVals to copy the source values into the destination. 00976 // This would probably remove any existing bindings past the end of the 00977 // copied region, but that's still an improvement over blank invalidation. 00978 state = InvalidateBuffer(C, state, Dest, 00979 state->getSVal(Dest, C.getLocationContext())); 00980 C.addTransition(state); 00981 } 00982 } 00983 00984 00985 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 00986 if (CE->getNumArgs() < 3) 00987 return; 00988 00989 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 00990 // The return value is the address of the destination buffer. 00991 const Expr *Dest = CE->getArg(0); 00992 ProgramStateRef state = C.getState(); 00993 00994 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 00995 } 00996 00997 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 00998 if (CE->getNumArgs() < 3) 00999 return; 01000 01001 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 01002 // The return value is a pointer to the byte following the last written byte. 01003 const Expr *Dest = CE->getArg(0); 01004 ProgramStateRef state = C.getState(); 01005 01006 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 01007 } 01008 01009 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 01010 if (CE->getNumArgs() < 3) 01011 return; 01012 01013 // void *memmove(void *dst, const void *src, size_t n); 01014 // The return value is the address of the destination buffer. 01015 const Expr *Dest = CE->getArg(0); 01016 ProgramStateRef state = C.getState(); 01017 01018 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 01019 } 01020 01021 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 01022 if (CE->getNumArgs() < 3) 01023 return; 01024 01025 // void bcopy(const void *src, void *dst, size_t n); 01026 evalCopyCommon(C, CE, C.getState(), 01027 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 01028 } 01029 01030 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 01031 if (CE->getNumArgs() < 3) 01032 return; 01033 01034 // int memcmp(const void *s1, const void *s2, size_t n); 01035 CurrentFunctionDescription = "memory comparison function"; 01036 01037 const Expr *Left = CE->getArg(0); 01038 const Expr *Right = CE->getArg(1); 01039 const Expr *Size = CE->getArg(2); 01040 01041 ProgramStateRef state = C.getState(); 01042 SValBuilder &svalBuilder = C.getSValBuilder(); 01043 01044 // See if the size argument is zero. 01045 const LocationContext *LCtx = C.getLocationContext(); 01046 SVal sizeVal = state->getSVal(Size, LCtx); 01047 QualType sizeTy = Size->getType(); 01048 01049 ProgramStateRef stateZeroSize, stateNonZeroSize; 01050 llvm::tie(stateZeroSize, stateNonZeroSize) = 01051 assumeZero(C, state, sizeVal, sizeTy); 01052 01053 // If the size can be zero, the result will be 0 in that case, and we don't 01054 // have to check either of the buffers. 01055 if (stateZeroSize) { 01056 state = stateZeroSize; 01057 state = state->BindExpr(CE, LCtx, 01058 svalBuilder.makeZeroVal(CE->getType())); 01059 C.addTransition(state); 01060 } 01061 01062 // If the size can be nonzero, we have to check the other arguments. 01063 if (stateNonZeroSize) { 01064 state = stateNonZeroSize; 01065 // If we know the two buffers are the same, we know the result is 0. 01066 // First, get the two buffers' addresses. Another checker will have already 01067 // made sure they're not undefined. 01068 DefinedOrUnknownSVal LV = 01069 cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx)); 01070 DefinedOrUnknownSVal RV = 01071 cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx)); 01072 01073 // See if they are the same. 01074 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 01075 ProgramStateRef StSameBuf, StNotSameBuf; 01076 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 01077 01078 // If the two arguments might be the same buffer, we know the result is 0, 01079 // and we only need to check one size. 01080 if (StSameBuf) { 01081 state = StSameBuf; 01082 state = CheckBufferAccess(C, state, Size, Left); 01083 if (state) { 01084 state = StSameBuf->BindExpr(CE, LCtx, 01085 svalBuilder.makeZeroVal(CE->getType())); 01086 C.addTransition(state); 01087 } 01088 } 01089 01090 // If the two arguments might be different buffers, we have to check the 01091 // size of both of them. 01092 if (StNotSameBuf) { 01093 state = StNotSameBuf; 01094 state = CheckBufferAccess(C, state, Size, Left, Right); 01095 if (state) { 01096 // The return value is the comparison result, which we don't know. 01097 unsigned Count = C.getCurrentBlockCount(); 01098 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count); 01099 state = state->BindExpr(CE, LCtx, CmpV); 01100 C.addTransition(state); 01101 } 01102 } 01103 } 01104 } 01105 01106 void CStringChecker::evalstrLength(CheckerContext &C, 01107 const CallExpr *CE) const { 01108 if (CE->getNumArgs() < 1) 01109 return; 01110 01111 // size_t strlen(const char *s); 01112 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 01113 } 01114 01115 void CStringChecker::evalstrnLength(CheckerContext &C, 01116 const CallExpr *CE) const { 01117 if (CE->getNumArgs() < 2) 01118 return; 01119 01120 // size_t strnlen(const char *s, size_t maxlen); 01121 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 01122 } 01123 01124 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 01125 bool IsStrnlen) const { 01126 CurrentFunctionDescription = "string length function"; 01127 ProgramStateRef state = C.getState(); 01128 const LocationContext *LCtx = C.getLocationContext(); 01129 01130 if (IsStrnlen) { 01131 const Expr *maxlenExpr = CE->getArg(1); 01132 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 01133 01134 ProgramStateRef stateZeroSize, stateNonZeroSize; 01135 llvm::tie(stateZeroSize, stateNonZeroSize) = 01136 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 01137 01138 // If the size can be zero, the result will be 0 in that case, and we don't 01139 // have to check the string itself. 01140 if (stateZeroSize) { 01141 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 01142 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 01143 C.addTransition(stateZeroSize); 01144 } 01145 01146 // If the size is GUARANTEED to be zero, we're done! 01147 if (!stateNonZeroSize) 01148 return; 01149 01150 // Otherwise, record the assumption that the size is nonzero. 01151 state = stateNonZeroSize; 01152 } 01153 01154 // Check that the string argument is non-null. 01155 const Expr *Arg = CE->getArg(0); 01156 SVal ArgVal = state->getSVal(Arg, LCtx); 01157 01158 state = checkNonNull(C, state, Arg, ArgVal); 01159 01160 if (!state) 01161 return; 01162 01163 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 01164 01165 // If the argument isn't a valid C string, there's no valid state to 01166 // transition to. 01167 if (strLength.isUndef()) 01168 return; 01169 01170 DefinedOrUnknownSVal result = UnknownVal(); 01171 01172 // If the check is for strnlen() then bind the return value to no more than 01173 // the maxlen value. 01174 if (IsStrnlen) { 01175 QualType cmpTy = C.getSValBuilder().getConditionType(); 01176 01177 // It's a little unfortunate to be getting this again, 01178 // but it's not that expensive... 01179 const Expr *maxlenExpr = CE->getArg(1); 01180 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 01181 01182 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 01183 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 01184 01185 if (strLengthNL && maxlenValNL) { 01186 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 01187 01188 // Check if the strLength is greater than the maxlen. 01189 llvm::tie(stateStringTooLong, stateStringNotTooLong) = 01190 state->assume(cast<DefinedOrUnknownSVal> 01191 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 01192 *strLengthNL, 01193 *maxlenValNL, 01194 cmpTy))); 01195 01196 if (stateStringTooLong && !stateStringNotTooLong) { 01197 // If the string is longer than maxlen, return maxlen. 01198 result = *maxlenValNL; 01199 } else if (stateStringNotTooLong && !stateStringTooLong) { 01200 // If the string is shorter than maxlen, return its length. 01201 result = *strLengthNL; 01202 } 01203 } 01204 01205 if (result.isUnknown()) { 01206 // If we don't have enough information for a comparison, there's 01207 // no guarantee the full string length will actually be returned. 01208 // All we know is the return value is the min of the string length 01209 // and the limit. This is better than nothing. 01210 unsigned Count = C.getCurrentBlockCount(); 01211 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count); 01212 NonLoc *resultNL = cast<NonLoc>(&result); 01213 01214 if (strLengthNL) { 01215 state = state->assume(cast<DefinedOrUnknownSVal> 01216 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 01217 *resultNL, 01218 *strLengthNL, 01219 cmpTy)), true); 01220 } 01221 01222 if (maxlenValNL) { 01223 state = state->assume(cast<DefinedOrUnknownSVal> 01224 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 01225 *resultNL, 01226 *maxlenValNL, 01227 cmpTy)), true); 01228 } 01229 } 01230 01231 } else { 01232 // This is a plain strlen(), not strnlen(). 01233 result = cast<DefinedOrUnknownSVal>(strLength); 01234 01235 // If we don't know the length of the string, conjure a return 01236 // value, so it can be used in constraints, at least. 01237 if (result.isUnknown()) { 01238 unsigned Count = C.getCurrentBlockCount(); 01239 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count); 01240 } 01241 } 01242 01243 // Bind the return value. 01244 assert(!result.isUnknown() && "Should have conjured a value by now"); 01245 state = state->BindExpr(CE, LCtx, result); 01246 C.addTransition(state); 01247 } 01248 01249 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 01250 if (CE->getNumArgs() < 2) 01251 return; 01252 01253 // char *strcpy(char *restrict dst, const char *restrict src); 01254 evalStrcpyCommon(C, CE, 01255 /* returnEnd = */ false, 01256 /* isBounded = */ false, 01257 /* isAppending = */ false); 01258 } 01259 01260 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 01261 if (CE->getNumArgs() < 3) 01262 return; 01263 01264 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 01265 evalStrcpyCommon(C, CE, 01266 /* returnEnd = */ false, 01267 /* isBounded = */ true, 01268 /* isAppending = */ false); 01269 } 01270 01271 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 01272 if (CE->getNumArgs() < 2) 01273 return; 01274 01275 // char *stpcpy(char *restrict dst, const char *restrict src); 01276 evalStrcpyCommon(C, CE, 01277 /* returnEnd = */ true, 01278 /* isBounded = */ false, 01279 /* isAppending = */ false); 01280 } 01281 01282 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 01283 if (CE->getNumArgs() < 2) 01284 return; 01285 01286 //char *strcat(char *restrict s1, const char *restrict s2); 01287 evalStrcpyCommon(C, CE, 01288 /* returnEnd = */ false, 01289 /* isBounded = */ false, 01290 /* isAppending = */ true); 01291 } 01292 01293 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 01294 if (CE->getNumArgs() < 3) 01295 return; 01296 01297 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 01298 evalStrcpyCommon(C, CE, 01299 /* returnEnd = */ false, 01300 /* isBounded = */ true, 01301 /* isAppending = */ true); 01302 } 01303 01304 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 01305 bool returnEnd, bool isBounded, 01306 bool isAppending) const { 01307 CurrentFunctionDescription = "string copy function"; 01308 ProgramStateRef state = C.getState(); 01309 const LocationContext *LCtx = C.getLocationContext(); 01310 01311 // Check that the destination is non-null. 01312 const Expr *Dst = CE->getArg(0); 01313 SVal DstVal = state->getSVal(Dst, LCtx); 01314 01315 state = checkNonNull(C, state, Dst, DstVal); 01316 if (!state) 01317 return; 01318 01319 // Check that the source is non-null. 01320 const Expr *srcExpr = CE->getArg(1); 01321 SVal srcVal = state->getSVal(srcExpr, LCtx); 01322 state = checkNonNull(C, state, srcExpr, srcVal); 01323 if (!state) 01324 return; 01325 01326 // Get the string length of the source. 01327 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 01328 01329 // If the source isn't a valid C string, give up. 01330 if (strLength.isUndef()) 01331 return; 01332 01333 SValBuilder &svalBuilder = C.getSValBuilder(); 01334 QualType cmpTy = svalBuilder.getConditionType(); 01335 QualType sizeTy = svalBuilder.getContext().getSizeType(); 01336 01337 // These two values allow checking two kinds of errors: 01338 // - actual overflows caused by a source that doesn't fit in the destination 01339 // - potential overflows caused by a bound that could exceed the destination 01340 SVal amountCopied = UnknownVal(); 01341 SVal maxLastElementIndex = UnknownVal(); 01342 const char *boundWarning = NULL; 01343 01344 // If the function is strncpy, strncat, etc... it is bounded. 01345 if (isBounded) { 01346 // Get the max number of characters to copy. 01347 const Expr *lenExpr = CE->getArg(2); 01348 SVal lenVal = state->getSVal(lenExpr, LCtx); 01349 01350 // Protect against misdeclared strncpy(). 01351 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); 01352 01353 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 01354 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 01355 01356 // If we know both values, we might be able to figure out how much 01357 // we're copying. 01358 if (strLengthNL && lenValNL) { 01359 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 01360 01361 // Check if the max number to copy is less than the length of the src. 01362 // If the bound is equal to the source length, strncpy won't null- 01363 // terminate the result! 01364 llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = 01365 state->assume(cast<DefinedOrUnknownSVal> 01366 (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, 01367 *lenValNL, cmpTy))); 01368 01369 if (stateSourceTooLong && !stateSourceNotTooLong) { 01370 // Max number to copy is less than the length of the src, so the actual 01371 // strLength copied is the max number arg. 01372 state = stateSourceTooLong; 01373 amountCopied = lenVal; 01374 01375 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 01376 // The source buffer entirely fits in the bound. 01377 state = stateSourceNotTooLong; 01378 amountCopied = strLength; 01379 } 01380 } 01381 01382 // We still want to know if the bound is known to be too large. 01383 if (lenValNL) { 01384 if (isAppending) { 01385 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 01386 01387 // Get the string length of the destination. If the destination is 01388 // memory that can't have a string length, we shouldn't be copying 01389 // into it anyway. 01390 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 01391 if (dstStrLength.isUndef()) 01392 return; 01393 01394 if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) { 01395 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, 01396 *lenValNL, 01397 *dstStrLengthNL, 01398 sizeTy); 01399 boundWarning = "Size argument is greater than the free space in the " 01400 "destination buffer"; 01401 } 01402 01403 } else { 01404 // For strncpy, this is just checking that lenVal <= sizeof(dst) 01405 // (Yes, strncpy and strncat differ in how they treat termination. 01406 // strncat ALWAYS terminates, but strncpy doesn't.) 01407 01408 // We need a special case for when the copy size is zero, in which 01409 // case strncpy will do no work at all. Our bounds check uses n-1 01410 // as the last element accessed, so n == 0 is problematic. 01411 ProgramStateRef StateZeroSize, StateNonZeroSize; 01412 llvm::tie(StateZeroSize, StateNonZeroSize) = 01413 assumeZero(C, state, *lenValNL, sizeTy); 01414 01415 // If the size is known to be zero, we're done. 01416 if (StateZeroSize && !StateNonZeroSize) { 01417 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 01418 C.addTransition(StateZeroSize); 01419 return; 01420 } 01421 01422 // Otherwise, go ahead and figure out the last element we'll touch. 01423 // We don't record the non-zero assumption here because we can't 01424 // be sure. We won't warn on a possible zero. 01425 NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 01426 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 01427 one, sizeTy); 01428 boundWarning = "Size argument is greater than the length of the " 01429 "destination buffer"; 01430 } 01431 } 01432 01433 // If we couldn't pin down the copy length, at least bound it. 01434 // FIXME: We should actually run this code path for append as well, but 01435 // right now it creates problems with constraints (since we can end up 01436 // trying to pass constraints from symbol to symbol). 01437 if (amountCopied.isUnknown() && !isAppending) { 01438 // Try to get a "hypothetical" string length symbol, which we can later 01439 // set as a real value if that turns out to be the case. 01440 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 01441 assert(!amountCopied.isUndef()); 01442 01443 if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) { 01444 if (lenValNL) { 01445 // amountCopied <= lenVal 01446 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 01447 *amountCopiedNL, 01448 *lenValNL, 01449 cmpTy); 01450 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound), 01451 true); 01452 if (!state) 01453 return; 01454 } 01455 01456 if (strLengthNL) { 01457 // amountCopied <= strlen(source) 01458 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 01459 *amountCopiedNL, 01460 *strLengthNL, 01461 cmpTy); 01462 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc), 01463 true); 01464 if (!state) 01465 return; 01466 } 01467 } 01468 } 01469 01470 } else { 01471 // The function isn't bounded. The amount copied should match the length 01472 // of the source buffer. 01473 amountCopied = strLength; 01474 } 01475 01476 assert(state); 01477 01478 // This represents the number of characters copied into the destination 01479 // buffer. (It may not actually be the strlen if the destination buffer 01480 // is not terminated.) 01481 SVal finalStrLength = UnknownVal(); 01482 01483 // If this is an appending function (strcat, strncat...) then set the 01484 // string length to strlen(src) + strlen(dst) since the buffer will 01485 // ultimately contain both. 01486 if (isAppending) { 01487 // Get the string length of the destination. If the destination is memory 01488 // that can't have a string length, we shouldn't be copying into it anyway. 01489 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 01490 if (dstStrLength.isUndef()) 01491 return; 01492 01493 NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied); 01494 NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength); 01495 01496 // If we know both string lengths, we might know the final string length. 01497 if (srcStrLengthNL && dstStrLengthNL) { 01498 // Make sure the two lengths together don't overflow a size_t. 01499 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 01500 if (!state) 01501 return; 01502 01503 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 01504 *dstStrLengthNL, sizeTy); 01505 } 01506 01507 // If we couldn't get a single value for the final string length, 01508 // we can at least bound it by the individual lengths. 01509 if (finalStrLength.isUnknown()) { 01510 // Try to get a "hypothetical" string length symbol, which we can later 01511 // set as a real value if that turns out to be the case. 01512 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 01513 assert(!finalStrLength.isUndef()); 01514 01515 if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) { 01516 if (srcStrLengthNL) { 01517 // finalStrLength >= srcStrLength 01518 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 01519 *finalStrLengthNL, 01520 *srcStrLengthNL, 01521 cmpTy); 01522 state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult), 01523 true); 01524 if (!state) 01525 return; 01526 } 01527 01528 if (dstStrLengthNL) { 01529 // finalStrLength >= dstStrLength 01530 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 01531 *finalStrLengthNL, 01532 *dstStrLengthNL, 01533 cmpTy); 01534 state = state->assume(cast<DefinedOrUnknownSVal>(destInResult), 01535 true); 01536 if (!state) 01537 return; 01538 } 01539 } 01540 } 01541 01542 } else { 01543 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 01544 // the final string length will match the input string length. 01545 finalStrLength = amountCopied; 01546 } 01547 01548 // The final result of the function will either be a pointer past the last 01549 // copied element, or a pointer to the start of the destination buffer. 01550 SVal Result = (returnEnd ? UnknownVal() : DstVal); 01551 01552 assert(state); 01553 01554 // If the destination is a MemRegion, try to check for a buffer overflow and 01555 // record the new string length. 01556 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 01557 QualType ptrTy = Dst->getType(); 01558 01559 // If we have an exact value on a bounded copy, use that to check for 01560 // overflows, rather than our estimate about how much is actually copied. 01561 if (boundWarning) { 01562 if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) { 01563 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 01564 *maxLastNL, ptrTy); 01565 state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 01566 boundWarning); 01567 if (!state) 01568 return; 01569 } 01570 } 01571 01572 // Then, if the final length is known... 01573 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) { 01574 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 01575 *knownStrLength, ptrTy); 01576 01577 // ...and we haven't checked the bound, we'll check the actual copy. 01578 if (!boundWarning) { 01579 const char * const warningMsg = 01580 "String copy function overflows destination buffer"; 01581 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 01582 if (!state) 01583 return; 01584 } 01585 01586 // If this is a stpcpy-style copy, the last element is the return value. 01587 if (returnEnd) 01588 Result = lastElement; 01589 } 01590 01591 // Invalidate the destination. This must happen before we set the C string 01592 // length because invalidation will clear the length. 01593 // FIXME: Even if we can't perfectly model the copy, we should see if we 01594 // can use LazyCompoundVals to copy the source values into the destination. 01595 // This would probably remove any existing bindings past the end of the 01596 // string, but that's still an improvement over blank invalidation. 01597 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 01598 01599 // Set the C string length of the destination, if we know it. 01600 if (isBounded && !isAppending) { 01601 // strncpy is annoying in that it doesn't guarantee to null-terminate 01602 // the result string. If the original string didn't fit entirely inside 01603 // the bound (including the null-terminator), we don't know how long the 01604 // result is. 01605 if (amountCopied != strLength) 01606 finalStrLength = UnknownVal(); 01607 } 01608 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 01609 } 01610 01611 assert(state); 01612 01613 // If this is a stpcpy-style copy, but we were unable to check for a buffer 01614 // overflow, we still need a result. Conjure a return value. 01615 if (returnEnd && Result.isUnknown()) { 01616 unsigned Count = C.getCurrentBlockCount(); 01617 Result = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count); 01618 } 01619 01620 // Set the return value. 01621 state = state->BindExpr(CE, LCtx, Result); 01622 C.addTransition(state); 01623 } 01624 01625 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 01626 if (CE->getNumArgs() < 2) 01627 return; 01628 01629 //int strcmp(const char *s1, const char *s2); 01630 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 01631 } 01632 01633 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 01634 if (CE->getNumArgs() < 3) 01635 return; 01636 01637 //int strncmp(const char *s1, const char *s2, size_t n); 01638 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 01639 } 01640 01641 void CStringChecker::evalStrcasecmp(CheckerContext &C, 01642 const CallExpr *CE) const { 01643 if (CE->getNumArgs() < 2) 01644 return; 01645 01646 //int strcasecmp(const char *s1, const char *s2); 01647 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 01648 } 01649 01650 void CStringChecker::evalStrncasecmp(CheckerContext &C, 01651 const CallExpr *CE) const { 01652 if (CE->getNumArgs() < 3) 01653 return; 01654 01655 //int strncasecmp(const char *s1, const char *s2, size_t n); 01656 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 01657 } 01658 01659 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 01660 bool isBounded, bool ignoreCase) const { 01661 CurrentFunctionDescription = "string comparison function"; 01662 ProgramStateRef state = C.getState(); 01663 const LocationContext *LCtx = C.getLocationContext(); 01664 01665 // Check that the first string is non-null 01666 const Expr *s1 = CE->getArg(0); 01667 SVal s1Val = state->getSVal(s1, LCtx); 01668 state = checkNonNull(C, state, s1, s1Val); 01669 if (!state) 01670 return; 01671 01672 // Check that the second string is non-null. 01673 const Expr *s2 = CE->getArg(1); 01674 SVal s2Val = state->getSVal(s2, LCtx); 01675 state = checkNonNull(C, state, s2, s2Val); 01676 if (!state) 01677 return; 01678 01679 // Get the string length of the first string or give up. 01680 SVal s1Length = getCStringLength(C, state, s1, s1Val); 01681 if (s1Length.isUndef()) 01682 return; 01683 01684 // Get the string length of the second string or give up. 01685 SVal s2Length = getCStringLength(C, state, s2, s2Val); 01686 if (s2Length.isUndef()) 01687 return; 01688 01689 // If we know the two buffers are the same, we know the result is 0. 01690 // First, get the two buffers' addresses. Another checker will have already 01691 // made sure they're not undefined. 01692 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val); 01693 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val); 01694 01695 // See if they are the same. 01696 SValBuilder &svalBuilder = C.getSValBuilder(); 01697 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 01698 ProgramStateRef StSameBuf, StNotSameBuf; 01699 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 01700 01701 // If the two arguments might be the same buffer, we know the result is 0, 01702 // and we only need to check one size. 01703 if (StSameBuf) { 01704 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 01705 svalBuilder.makeZeroVal(CE->getType())); 01706 C.addTransition(StSameBuf); 01707 01708 // If the two arguments are GUARANTEED to be the same, we're done! 01709 if (!StNotSameBuf) 01710 return; 01711 } 01712 01713 assert(StNotSameBuf); 01714 state = StNotSameBuf; 01715 01716 // At this point we can go about comparing the two buffers. 01717 // For now, we only do this if they're both known string literals. 01718 01719 // Attempt to extract string literals from both expressions. 01720 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 01721 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 01722 bool canComputeResult = false; 01723 01724 if (s1StrLiteral && s2StrLiteral) { 01725 StringRef s1StrRef = s1StrLiteral->getString(); 01726 StringRef s2StrRef = s2StrLiteral->getString(); 01727 01728 if (isBounded) { 01729 // Get the max number of characters to compare. 01730 const Expr *lenExpr = CE->getArg(2); 01731 SVal lenVal = state->getSVal(lenExpr, LCtx); 01732 01733 // If the length is known, we can get the right substrings. 01734 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 01735 // Create substrings of each to compare the prefix. 01736 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 01737 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 01738 canComputeResult = true; 01739 } 01740 } else { 01741 // This is a normal, unbounded strcmp. 01742 canComputeResult = true; 01743 } 01744 01745 if (canComputeResult) { 01746 // Real strcmp stops at null characters. 01747 size_t s1Term = s1StrRef.find('\0'); 01748 if (s1Term != StringRef::npos) 01749 s1StrRef = s1StrRef.substr(0, s1Term); 01750 01751 size_t s2Term = s2StrRef.find('\0'); 01752 if (s2Term != StringRef::npos) 01753 s2StrRef = s2StrRef.substr(0, s2Term); 01754 01755 // Use StringRef's comparison methods to compute the actual result. 01756 int result; 01757 01758 if (ignoreCase) { 01759 // Compare string 1 to string 2 the same way strcasecmp() does. 01760 result = s1StrRef.compare_lower(s2StrRef); 01761 } else { 01762 // Compare string 1 to string 2 the same way strcmp() does. 01763 result = s1StrRef.compare(s2StrRef); 01764 } 01765 01766 // Build the SVal of the comparison and bind the return value. 01767 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); 01768 state = state->BindExpr(CE, LCtx, resultVal); 01769 } 01770 } 01771 01772 if (!canComputeResult) { 01773 // Conjure a symbolic value. It's the best we can do. 01774 unsigned Count = C.getCurrentBlockCount(); 01775 SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count); 01776 state = state->BindExpr(CE, LCtx, resultVal); 01777 } 01778 01779 // Record this as a possible path. 01780 C.addTransition(state); 01781 } 01782 01783 //===----------------------------------------------------------------------===// 01784 // The driver method, and other Checker callbacks. 01785 //===----------------------------------------------------------------------===// 01786 01787 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 01788 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 01789 01790 if (!FDecl) 01791 return false; 01792 01793 FnCheck evalFunction = 0; 01794 if (C.isCLibraryFunction(FDecl, "memcpy")) 01795 evalFunction = &CStringChecker::evalMemcpy; 01796 else if (C.isCLibraryFunction(FDecl, "mempcpy")) 01797 evalFunction = &CStringChecker::evalMempcpy; 01798 else if (C.isCLibraryFunction(FDecl, "memcmp")) 01799 evalFunction = &CStringChecker::evalMemcmp; 01800 else if (C.isCLibraryFunction(FDecl, "memmove")) 01801 evalFunction = &CStringChecker::evalMemmove; 01802 else if (C.isCLibraryFunction(FDecl, "strcpy")) 01803 evalFunction = &CStringChecker::evalStrcpy; 01804 else if (C.isCLibraryFunction(FDecl, "strncpy")) 01805 evalFunction = &CStringChecker::evalStrncpy; 01806 else if (C.isCLibraryFunction(FDecl, "stpcpy")) 01807 evalFunction = &CStringChecker::evalStpcpy; 01808 else if (C.isCLibraryFunction(FDecl, "strcat")) 01809 evalFunction = &CStringChecker::evalStrcat; 01810 else if (C.isCLibraryFunction(FDecl, "strncat")) 01811 evalFunction = &CStringChecker::evalStrncat; 01812 else if (C.isCLibraryFunction(FDecl, "strlen")) 01813 evalFunction = &CStringChecker::evalstrLength; 01814 else if (C.isCLibraryFunction(FDecl, "strnlen")) 01815 evalFunction = &CStringChecker::evalstrnLength; 01816 else if (C.isCLibraryFunction(FDecl, "strcmp")) 01817 evalFunction = &CStringChecker::evalStrcmp; 01818 else if (C.isCLibraryFunction(FDecl, "strncmp")) 01819 evalFunction = &CStringChecker::evalStrncmp; 01820 else if (C.isCLibraryFunction(FDecl, "strcasecmp")) 01821 evalFunction = &CStringChecker::evalStrcasecmp; 01822 else if (C.isCLibraryFunction(FDecl, "strncasecmp")) 01823 evalFunction = &CStringChecker::evalStrncasecmp; 01824 else if (C.isCLibraryFunction(FDecl, "bcopy")) 01825 evalFunction = &CStringChecker::evalBcopy; 01826 else if (C.isCLibraryFunction(FDecl, "bcmp")) 01827 evalFunction = &CStringChecker::evalMemcmp; 01828 01829 // If the callee isn't a string function, let another checker handle it. 01830 if (!evalFunction) 01831 return false; 01832 01833 // Make sure each function sets its own description. 01834 // (But don't bother in a release build.) 01835 assert(!(CurrentFunctionDescription = NULL)); 01836 01837 // Check and evaluate the call. 01838 (this->*evalFunction)(C, CE); 01839 01840 // If the evaluate call resulted in no change, chain to the next eval call 01841 // handler. 01842 // Note, the custom CString evaluation calls assume that basic safety 01843 // properties are held. However, if the user chooses to turn off some of these 01844 // checks, we ignore the issues and leave the call evaluation to a generic 01845 // handler. 01846 if (!C.isDifferent()) 01847 return false; 01848 01849 return true; 01850 } 01851 01852 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 01853 // Record string length for char a[] = "abc"; 01854 ProgramStateRef state = C.getState(); 01855 01856 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 01857 I != E; ++I) { 01858 const VarDecl *D = dyn_cast<VarDecl>(*I); 01859 if (!D) 01860 continue; 01861 01862 // FIXME: Handle array fields of structs. 01863 if (!D->getType()->isArrayType()) 01864 continue; 01865 01866 const Expr *Init = D->getInit(); 01867 if (!Init) 01868 continue; 01869 if (!isa<StringLiteral>(Init)) 01870 continue; 01871 01872 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 01873 const MemRegion *MR = VarLoc.getAsRegion(); 01874 if (!MR) 01875 continue; 01876 01877 SVal StrVal = state->getSVal(Init, C.getLocationContext()); 01878 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 01879 DefinedOrUnknownSVal strLength 01880 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 01881 01882 state = state->set<CStringLength>(MR, strLength); 01883 } 01884 01885 C.addTransition(state); 01886 } 01887 01888 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const { 01889 CStringLength::EntryMap Entries = state->get<CStringLength>(); 01890 return !Entries.isEmpty(); 01891 } 01892 01893 ProgramStateRef 01894 CStringChecker::checkRegionChanges(ProgramStateRef state, 01895 const StoreManager::InvalidatedSymbols *, 01896 ArrayRef<const MemRegion *> ExplicitRegions, 01897 ArrayRef<const MemRegion *> Regions, 01898 const CallOrObjCMessage *Call) const { 01899 CStringLength::EntryMap Entries = state->get<CStringLength>(); 01900 if (Entries.isEmpty()) 01901 return state; 01902 01903 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 01904 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 01905 01906 // First build sets for the changed regions and their super-regions. 01907 for (ArrayRef<const MemRegion *>::iterator 01908 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 01909 const MemRegion *MR = *I; 01910 Invalidated.insert(MR); 01911 01912 SuperRegions.insert(MR); 01913 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 01914 MR = SR->getSuperRegion(); 01915 SuperRegions.insert(MR); 01916 } 01917 } 01918 01919 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 01920 01921 // Then loop over the entries in the current state. 01922 for (CStringLength::EntryMap::iterator I = Entries.begin(), 01923 E = Entries.end(); I != E; ++I) { 01924 const MemRegion *MR = I.getKey(); 01925 01926 // Is this entry for a super-region of a changed region? 01927 if (SuperRegions.count(MR)) { 01928 Entries = F.remove(Entries, MR); 01929 continue; 01930 } 01931 01932 // Is this entry for a sub-region of a changed region? 01933 const MemRegion *Super = MR; 01934 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 01935 Super = SR->getSuperRegion(); 01936 if (Invalidated.count(Super)) { 01937 Entries = F.remove(Entries, MR); 01938 break; 01939 } 01940 } 01941 } 01942 01943 return state->set<CStringLength>(Entries); 01944 } 01945 01946 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 01947 SymbolReaper &SR) const { 01948 // Mark all symbols in our string length map as valid. 01949 CStringLength::EntryMap Entries = state->get<CStringLength>(); 01950 01951 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 01952 I != E; ++I) { 01953 SVal Len = I.getData(); 01954 01955 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 01956 se = Len.symbol_end(); si != se; ++si) 01957 SR.markInUse(*si); 01958 } 01959 } 01960 01961 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 01962 CheckerContext &C) const { 01963 if (!SR.hasDeadSymbols()) 01964 return; 01965 01966 ProgramStateRef state = C.getState(); 01967 CStringLength::EntryMap Entries = state->get<CStringLength>(); 01968 if (Entries.isEmpty()) 01969 return; 01970 01971 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 01972 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 01973 I != E; ++I) { 01974 SVal Len = I.getData(); 01975 if (SymbolRef Sym = Len.getAsSymbol()) { 01976 if (SR.isDead(Sym)) 01977 Entries = F.remove(Entries, I.getKey()); 01978 } 01979 } 01980 01981 state = state->set<CStringLength>(Entries); 01982 C.addTransition(state); 01983 } 01984 01985 #define REGISTER_CHECKER(name) \ 01986 void ento::register##name(CheckerManager &mgr) {\ 01987 static CStringChecker *TheChecker = 0; \ 01988 if (TheChecker == 0) \ 01989 TheChecker = mgr.registerChecker<CStringChecker>(); \ 01990 TheChecker->Filter.Check##name = true; \ 01991 } 01992 01993 REGISTER_CHECKER(CStringNullArg) 01994 REGISTER_CHECKER(CStringOutOfBounds) 01995 REGISTER_CHECKER(CStringBufferOverlap) 01996 REGISTER_CHECKER(CStringNotNullTerm) 01997 01998 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { 01999 registerCStringNullArg(Mgr); 02000 }