clang API Documentation

CStringChecker.cpp
Go to the documentation of this file.
00001 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This defines CStringChecker, which is an assortment of checks on calls
00011 // to functions in <string.h>.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ClangSACheckers.h"
00016 #include "InterCheckerAPI.h"
00017 #include "clang/StaticAnalyzer/Core/Checker.h"
00018 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
00019 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
00020 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
00021 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
00022 #include "llvm/ADT/SmallString.h"
00023 #include "llvm/ADT/STLExtras.h"
00024 #include "llvm/ADT/StringSwitch.h"
00025 
00026 using namespace clang;
00027 using namespace ento;
00028 
00029 namespace {
00030 class CStringChecker : public Checker< eval::Call,
00031                                          check::PreStmt<DeclStmt>,
00032                                          check::LiveSymbols,
00033                                          check::DeadSymbols,
00034                                          check::RegionChanges
00035                                          > {
00036   mutable OwningPtr<BugType> BT_Null,
00037                              BT_Bounds,
00038                              BT_Overlap,
00039                              BT_NotCString,
00040                              BT_AdditionOverflow;
00041 
00042   mutable const char *CurrentFunctionDescription;
00043 
00044 public:
00045   /// The filter is used to filter out the diagnostics which are not enabled by
00046   /// the user.
00047   struct CStringChecksFilter {
00048     DefaultBool CheckCStringNullArg;
00049     DefaultBool CheckCStringOutOfBounds;
00050     DefaultBool CheckCStringBufferOverlap;
00051     DefaultBool CheckCStringNotNullTerm;
00052   };
00053 
00054   CStringChecksFilter Filter;
00055 
00056   static void *getTag() { static int tag; return &tag; }
00057 
00058   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
00059   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
00060   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
00061   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
00062   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
00063 
00064   ProgramStateRef 
00065     checkRegionChanges(ProgramStateRef state,
00066                        const StoreManager::InvalidatedSymbols *,
00067                        ArrayRef<const MemRegion *> ExplicitRegions,
00068                        ArrayRef<const MemRegion *> Regions,
00069                        const CallOrObjCMessage *Call) const;
00070 
00071   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
00072                                           const CallExpr *) const;
00073 
00074   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
00075   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
00076   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
00077   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
00078   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
00079                       ProgramStateRef state,
00080                       const Expr *Size,
00081                       const Expr *Source,
00082                       const Expr *Dest,
00083                       bool Restricted = false,
00084                       bool IsMempcpy = false) const;
00085 
00086   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
00087 
00088   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
00089   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
00090   void evalstrLengthCommon(CheckerContext &C,
00091                            const CallExpr *CE, 
00092                            bool IsStrnlen = false) const;
00093 
00094   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
00095   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
00096   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
00097   void evalStrcpyCommon(CheckerContext &C,
00098                         const CallExpr *CE,
00099                         bool returnEnd,
00100                         bool isBounded,
00101                         bool isAppending) const;
00102 
00103   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
00104   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
00105 
00106   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
00107   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
00108   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
00109   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
00110   void evalStrcmpCommon(CheckerContext &C,
00111                         const CallExpr *CE,
00112                         bool isBounded = false,
00113                         bool ignoreCase = false) const;
00114 
00115   // Utility methods
00116   std::pair<ProgramStateRef , ProgramStateRef >
00117   static assumeZero(CheckerContext &C,
00118                     ProgramStateRef state, SVal V, QualType Ty);
00119 
00120   static ProgramStateRef setCStringLength(ProgramStateRef state,
00121                                               const MemRegion *MR,
00122                                               SVal strLength);
00123   static SVal getCStringLengthForRegion(CheckerContext &C,
00124                                         ProgramStateRef &state,
00125                                         const Expr *Ex,
00126                                         const MemRegion *MR,
00127                                         bool hypothetical);
00128   SVal getCStringLength(CheckerContext &C,
00129                         ProgramStateRef &state,
00130                         const Expr *Ex,
00131                         SVal Buf,
00132                         bool hypothetical = false) const;
00133 
00134   const StringLiteral *getCStringLiteral(CheckerContext &C, 
00135                                          ProgramStateRef &state,
00136                                          const Expr *expr,  
00137                                          SVal val) const;
00138 
00139   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
00140                                               ProgramStateRef state,
00141                                               const Expr *Ex, SVal V);
00142 
00143   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
00144                               const MemRegion *MR);
00145 
00146   // Re-usable checks
00147   ProgramStateRef checkNonNull(CheckerContext &C,
00148                                    ProgramStateRef state,
00149                                    const Expr *S,
00150                                    SVal l) const;
00151   ProgramStateRef CheckLocation(CheckerContext &C,
00152                                     ProgramStateRef state,
00153                                     const Expr *S,
00154                                     SVal l,
00155                                     const char *message = NULL) const;
00156   ProgramStateRef CheckBufferAccess(CheckerContext &C,
00157                                         ProgramStateRef state,
00158                                         const Expr *Size,
00159                                         const Expr *FirstBuf,
00160                                         const Expr *SecondBuf,
00161                                         const char *firstMessage = NULL,
00162                                         const char *secondMessage = NULL,
00163                                         bool WarnAboutSize = false) const;
00164 
00165   ProgramStateRef CheckBufferAccess(CheckerContext &C,
00166                                         ProgramStateRef state,
00167                                         const Expr *Size,
00168                                         const Expr *Buf,
00169                                         const char *message = NULL,
00170                                         bool WarnAboutSize = false) const {
00171     // This is a convenience override.
00172     return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
00173                              WarnAboutSize);
00174   }
00175   ProgramStateRef CheckOverlap(CheckerContext &C,
00176                                    ProgramStateRef state,
00177                                    const Expr *Size,
00178                                    const Expr *First,
00179                                    const Expr *Second) const;
00180   void emitOverlapBug(CheckerContext &C,
00181                       ProgramStateRef state,
00182                       const Stmt *First,
00183                       const Stmt *Second) const;
00184 
00185   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
00186                                             ProgramStateRef state,
00187                                             NonLoc left,
00188                                             NonLoc right) const;
00189 };
00190 
00191 class CStringLength {
00192 public:
00193   typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
00194 };
00195 } //end anonymous namespace
00196 
00197 namespace clang {
00198 namespace ento {
00199   template <>
00200   struct ProgramStateTrait<CStringLength> 
00201     : public ProgramStatePartialTrait<CStringLength::EntryMap> {
00202     static void *GDMIndex() { return CStringChecker::getTag(); }
00203   };
00204 }
00205 }
00206 
00207 //===----------------------------------------------------------------------===//
00208 // Individual checks and utility methods.
00209 //===----------------------------------------------------------------------===//
00210 
00211 std::pair<ProgramStateRef , ProgramStateRef >
00212 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
00213                            QualType Ty) {
00214   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
00215   if (!val)
00216     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
00217 
00218   SValBuilder &svalBuilder = C.getSValBuilder();
00219   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
00220   return state->assume(svalBuilder.evalEQ(state, *val, zero));
00221 }
00222 
00223 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
00224                                             ProgramStateRef state,
00225                                             const Expr *S, SVal l) const {
00226   // If a previous check has failed, propagate the failure.
00227   if (!state)
00228     return NULL;
00229 
00230   ProgramStateRef stateNull, stateNonNull;
00231   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
00232 
00233   if (stateNull && !stateNonNull) {
00234     if (!Filter.CheckCStringNullArg)
00235       return NULL;
00236 
00237     ExplodedNode *N = C.generateSink(stateNull);
00238     if (!N)
00239       return NULL;
00240 
00241     if (!BT_Null)
00242       BT_Null.reset(new BuiltinBug("Unix API",
00243         "Null pointer argument in call to byte string function"));
00244 
00245     SmallString<80> buf;
00246     llvm::raw_svector_ostream os(buf);
00247     assert(CurrentFunctionDescription);
00248     os << "Null pointer argument in call to " << CurrentFunctionDescription;
00249 
00250     // Generate a report for this bug.
00251     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
00252     BugReport *report = new BugReport(*BT, os.str(), N);
00253 
00254     report->addRange(S->getSourceRange());
00255     report->addVisitor(bugreporter::getTrackNullOrUndefValueVisitor(N, S,
00256                                                                     report));
00257     C.EmitReport(report);
00258     return NULL;
00259   }
00260 
00261   // From here on, assume that the value is non-null.
00262   assert(stateNonNull);
00263   return stateNonNull;
00264 }
00265 
00266 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
00267 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
00268                                              ProgramStateRef state,
00269                                              const Expr *S, SVal l,
00270                                              const char *warningMsg) const {
00271   // If a previous check has failed, propagate the failure.
00272   if (!state)
00273     return NULL;
00274 
00275   // Check for out of bound array element access.
00276   const MemRegion *R = l.getAsRegion();
00277   if (!R)
00278     return state;
00279 
00280   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
00281   if (!ER)
00282     return state;
00283 
00284   assert(ER->getValueType() == C.getASTContext().CharTy &&
00285     "CheckLocation should only be called with char* ElementRegions");
00286 
00287   // Get the size of the array.
00288   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
00289   SValBuilder &svalBuilder = C.getSValBuilder();
00290   SVal Extent = 
00291     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
00292   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
00293 
00294   // Get the index of the accessed element.
00295   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
00296 
00297   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
00298   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
00299   if (StOutBound && !StInBound) {
00300     ExplodedNode *N = C.generateSink(StOutBound);
00301     if (!N)
00302       return NULL;
00303 
00304     if (!BT_Bounds) {
00305       BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
00306         "Byte string function accesses out-of-bound array element"));
00307     }
00308     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
00309 
00310     // Generate a report for this bug.
00311     BugReport *report;
00312     if (warningMsg) {
00313       report = new BugReport(*BT, warningMsg, N);
00314     } else {
00315       assert(CurrentFunctionDescription);
00316       assert(CurrentFunctionDescription[0] != '\0');
00317 
00318       SmallString<80> buf;
00319       llvm::raw_svector_ostream os(buf);
00320       os << (char)toupper(CurrentFunctionDescription[0])
00321          << &CurrentFunctionDescription[1]
00322          << " accesses out-of-bound array element";
00323       report = new BugReport(*BT, os.str(), N);      
00324     }
00325 
00326     // FIXME: It would be nice to eventually make this diagnostic more clear,
00327     // e.g., by referencing the original declaration or by saying *why* this
00328     // reference is outside the range.
00329 
00330     report->addRange(S->getSourceRange());
00331     C.EmitReport(report);
00332     return NULL;
00333   }
00334   
00335   // Array bound check succeeded.  From this point forward the array bound
00336   // should always succeed.
00337   return StInBound;
00338 }
00339 
00340 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
00341                                                  ProgramStateRef state,
00342                                                  const Expr *Size,
00343                                                  const Expr *FirstBuf,
00344                                                  const Expr *SecondBuf,
00345                                                  const char *firstMessage,
00346                                                  const char *secondMessage,
00347                                                  bool WarnAboutSize) const {
00348   // If a previous check has failed, propagate the failure.
00349   if (!state)
00350     return NULL;
00351 
00352   SValBuilder &svalBuilder = C.getSValBuilder();
00353   ASTContext &Ctx = svalBuilder.getContext();
00354   const LocationContext *LCtx = C.getLocationContext();
00355 
00356   QualType sizeTy = Size->getType();
00357   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
00358 
00359   // Check that the first buffer is non-null.
00360   SVal BufVal = state->getSVal(FirstBuf, LCtx);
00361   state = checkNonNull(C, state, FirstBuf, BufVal);
00362   if (!state)
00363     return NULL;
00364 
00365   // If out-of-bounds checking is turned off, skip the rest.
00366   if (!Filter.CheckCStringOutOfBounds)
00367     return state;
00368 
00369   // Get the access length and make sure it is known.
00370   // FIXME: This assumes the caller has already checked that the access length
00371   // is positive. And that it's unsigned.
00372   SVal LengthVal = state->getSVal(Size, LCtx);
00373   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
00374   if (!Length)
00375     return state;
00376 
00377   // Compute the offset of the last element to be accessed: size-1.
00378   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
00379   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
00380                                                     *Length, One, sizeTy));
00381 
00382   // Check that the first buffer is sufficiently long.
00383   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
00384   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
00385     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
00386 
00387     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
00388                                           LastOffset, PtrTy);
00389     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
00390 
00391     // If the buffer isn't large enough, abort.
00392     if (!state)
00393       return NULL;
00394   }
00395 
00396   // If there's a second buffer, check it as well.
00397   if (SecondBuf) {
00398     BufVal = state->getSVal(SecondBuf, LCtx);
00399     state = checkNonNull(C, state, SecondBuf, BufVal);
00400     if (!state)
00401       return NULL;
00402 
00403     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
00404     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
00405       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
00406 
00407       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
00408                                             LastOffset, PtrTy);
00409       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
00410     }
00411   }
00412 
00413   // Large enough or not, return this state!
00414   return state;
00415 }
00416 
00417 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
00418                                             ProgramStateRef state,
00419                                             const Expr *Size,
00420                                             const Expr *First,
00421                                             const Expr *Second) const {
00422   if (!Filter.CheckCStringBufferOverlap)
00423     return state;
00424 
00425   // Do a simple check for overlap: if the two arguments are from the same
00426   // buffer, see if the end of the first is greater than the start of the second
00427   // or vice versa.
00428 
00429   // If a previous check has failed, propagate the failure.
00430   if (!state)
00431     return NULL;
00432 
00433   ProgramStateRef stateTrue, stateFalse;
00434 
00435   // Get the buffer values and make sure they're known locations.
00436   const LocationContext *LCtx = C.getLocationContext();
00437   SVal firstVal = state->getSVal(First, LCtx);
00438   SVal secondVal = state->getSVal(Second, LCtx);
00439 
00440   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
00441   if (!firstLoc)
00442     return state;
00443 
00444   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
00445   if (!secondLoc)
00446     return state;
00447 
00448   // Are the two values the same?
00449   SValBuilder &svalBuilder = C.getSValBuilder();  
00450   llvm::tie(stateTrue, stateFalse) =
00451     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
00452 
00453   if (stateTrue && !stateFalse) {
00454     // If the values are known to be equal, that's automatically an overlap.
00455     emitOverlapBug(C, stateTrue, First, Second);
00456     return NULL;
00457   }
00458 
00459   // assume the two expressions are not equal.
00460   assert(stateFalse);
00461   state = stateFalse;
00462 
00463   // Which value comes first?
00464   QualType cmpTy = svalBuilder.getConditionType();
00465   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
00466                                          *firstLoc, *secondLoc, cmpTy);
00467   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
00468   if (!reverseTest)
00469     return state;
00470 
00471   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
00472   if (stateTrue) {
00473     if (stateFalse) {
00474       // If we don't know which one comes first, we can't perform this test.
00475       return state;
00476     } else {
00477       // Switch the values so that firstVal is before secondVal.
00478       Loc *tmpLoc = firstLoc;
00479       firstLoc = secondLoc;
00480       secondLoc = tmpLoc;
00481 
00482       // Switch the Exprs as well, so that they still correspond.
00483       const Expr *tmpExpr = First;
00484       First = Second;
00485       Second = tmpExpr;
00486     }
00487   }
00488 
00489   // Get the length, and make sure it too is known.
00490   SVal LengthVal = state->getSVal(Size, LCtx);
00491   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
00492   if (!Length)
00493     return state;
00494 
00495   // Convert the first buffer's start address to char*.
00496   // Bail out if the cast fails.
00497   ASTContext &Ctx = svalBuilder.getContext();
00498   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
00499   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 
00500                                          First->getType());
00501   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
00502   if (!FirstStartLoc)
00503     return state;
00504 
00505   // Compute the end of the first buffer. Bail out if THAT fails.
00506   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
00507                                  *FirstStartLoc, *Length, CharPtrTy);
00508   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
00509   if (!FirstEndLoc)
00510     return state;
00511 
00512   // Is the end of the first buffer past the start of the second buffer?
00513   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
00514                                 *FirstEndLoc, *secondLoc, cmpTy);
00515   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
00516   if (!OverlapTest)
00517     return state;
00518 
00519   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
00520 
00521   if (stateTrue && !stateFalse) {
00522     // Overlap!
00523     emitOverlapBug(C, stateTrue, First, Second);
00524     return NULL;
00525   }
00526 
00527   // assume the two expressions don't overlap.
00528   assert(stateFalse);
00529   return stateFalse;
00530 }
00531 
00532 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
00533                                   const Stmt *First, const Stmt *Second) const {
00534   ExplodedNode *N = C.generateSink(state);
00535   if (!N)
00536     return;
00537 
00538   if (!BT_Overlap)
00539     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
00540 
00541   // Generate a report for this bug.
00542   BugReport *report = 
00543     new BugReport(*BT_Overlap,
00544       "Arguments must not be overlapping buffers", N);
00545   report->addRange(First->getSourceRange());
00546   report->addRange(Second->getSourceRange());
00547 
00548   C.EmitReport(report);
00549 }
00550 
00551 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
00552                                                      ProgramStateRef state,
00553                                                      NonLoc left,
00554                                                      NonLoc right) const {
00555   // If out-of-bounds checking is turned off, skip the rest.
00556   if (!Filter.CheckCStringOutOfBounds)
00557     return state;
00558 
00559   // If a previous check has failed, propagate the failure.
00560   if (!state)
00561     return NULL;
00562 
00563   SValBuilder &svalBuilder = C.getSValBuilder();
00564   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
00565 
00566   QualType sizeTy = svalBuilder.getContext().getSizeType();
00567   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
00568   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
00569 
00570   SVal maxMinusRight;
00571   if (isa<nonloc::ConcreteInt>(right)) {
00572     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
00573                                                  sizeTy);
00574   } else {
00575     // Try switching the operands. (The order of these two assignments is
00576     // important!)
00577     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 
00578                                             sizeTy);
00579     left = right;
00580   }
00581 
00582   if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
00583     QualType cmpTy = svalBuilder.getConditionType();
00584     // If left > max - right, we have an overflow.
00585     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
00586                                                 *maxMinusRightNL, cmpTy);
00587 
00588     ProgramStateRef stateOverflow, stateOkay;
00589     llvm::tie(stateOverflow, stateOkay) =
00590       state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
00591 
00592     if (stateOverflow && !stateOkay) {
00593       // We have an overflow. Emit a bug report.
00594       ExplodedNode *N = C.generateSink(stateOverflow);
00595       if (!N)
00596         return NULL;
00597 
00598       if (!BT_AdditionOverflow)
00599         BT_AdditionOverflow.reset(new BuiltinBug("API",
00600           "Sum of expressions causes overflow"));
00601 
00602       // This isn't a great error message, but this should never occur in real
00603       // code anyway -- you'd have to create a buffer longer than a size_t can
00604       // represent, which is sort of a contradiction.
00605       const char *warning =
00606         "This expression will create a string whose length is too big to "
00607         "be represented as a size_t";
00608 
00609       // Generate a report for this bug.
00610       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
00611       C.EmitReport(report);        
00612 
00613       return NULL;
00614     }
00615 
00616     // From now on, assume an overflow didn't occur.
00617     assert(stateOkay);
00618     state = stateOkay;
00619   }
00620 
00621   return state;
00622 }
00623 
00624 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
00625                                                 const MemRegion *MR,
00626                                                 SVal strLength) {
00627   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
00628 
00629   MR = MR->StripCasts();
00630 
00631   switch (MR->getKind()) {
00632   case MemRegion::StringRegionKind:
00633     // FIXME: This can happen if we strcpy() into a string region. This is
00634     // undefined [C99 6.4.5p6], but we should still warn about it.
00635     return state;
00636 
00637   case MemRegion::SymbolicRegionKind:
00638   case MemRegion::AllocaRegionKind:
00639   case MemRegion::VarRegionKind:
00640   case MemRegion::FieldRegionKind:
00641   case MemRegion::ObjCIvarRegionKind:
00642     // These are the types we can currently track string lengths for.
00643     break;
00644 
00645   case MemRegion::ElementRegionKind:
00646     // FIXME: Handle element regions by upper-bounding the parent region's
00647     // string length.
00648     return state;
00649 
00650   default:
00651     // Other regions (mostly non-data) can't have a reliable C string length.
00652     // For now, just ignore the change.
00653     // FIXME: These are rare but not impossible. We should output some kind of
00654     // warning for things like strcpy((char[]){'a', 0}, "b");
00655     return state;
00656   }
00657 
00658   if (strLength.isUnknown())
00659     return state->remove<CStringLength>(MR);
00660 
00661   return state->set<CStringLength>(MR, strLength);
00662 }
00663 
00664 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
00665                                                ProgramStateRef &state,
00666                                                const Expr *Ex,
00667                                                const MemRegion *MR,
00668                                                bool hypothetical) {
00669   if (!hypothetical) {
00670     // If there's a recorded length, go ahead and return it.
00671     const SVal *Recorded = state->get<CStringLength>(MR);
00672     if (Recorded)
00673       return *Recorded;
00674   }
00675   
00676   // Otherwise, get a new symbol and update the state.
00677   unsigned Count = C.getCurrentBlockCount();
00678   SValBuilder &svalBuilder = C.getSValBuilder();
00679   QualType sizeTy = svalBuilder.getContext().getSizeType();
00680   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
00681                                                     MR, Ex, sizeTy, Count);
00682 
00683   if (!hypothetical)
00684     state = state->set<CStringLength>(MR, strLength);
00685 
00686   return strLength;
00687 }
00688 
00689 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
00690                                       const Expr *Ex, SVal Buf,
00691                                       bool hypothetical) const {
00692   const MemRegion *MR = Buf.getAsRegion();
00693   if (!MR) {
00694     // If we can't get a region, see if it's something we /know/ isn't a
00695     // C string. In the context of locations, the only time we can issue such
00696     // a warning is for labels.
00697     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
00698       if (!Filter.CheckCStringNotNullTerm)
00699         return UndefinedVal();
00700 
00701       if (ExplodedNode *N = C.addTransition(state)) {
00702         if (!BT_NotCString)
00703           BT_NotCString.reset(new BuiltinBug("Unix API",
00704             "Argument is not a null-terminated string."));
00705 
00706         SmallString<120> buf;
00707         llvm::raw_svector_ostream os(buf);
00708         assert(CurrentFunctionDescription);
00709         os << "Argument to " << CurrentFunctionDescription
00710            << " is the address of the label '" << Label->getLabel()->getName()
00711            << "', which is not a null-terminated string";
00712 
00713         // Generate a report for this bug.
00714         BugReport *report = new BugReport(*BT_NotCString,
00715                                                           os.str(), N);
00716 
00717         report->addRange(Ex->getSourceRange());
00718         C.EmitReport(report);        
00719       }
00720       return UndefinedVal();
00721 
00722     }
00723 
00724     // If it's not a region and not a label, give up.
00725     return UnknownVal();
00726   }
00727 
00728   // If we have a region, strip casts from it and see if we can figure out
00729   // its length. For anything we can't figure out, just return UnknownVal.
00730   MR = MR->StripCasts();
00731 
00732   switch (MR->getKind()) {
00733   case MemRegion::StringRegionKind: {
00734     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
00735     // so we can assume that the byte length is the correct C string length.
00736     SValBuilder &svalBuilder = C.getSValBuilder();
00737     QualType sizeTy = svalBuilder.getContext().getSizeType();
00738     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
00739     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
00740   }
00741   case MemRegion::SymbolicRegionKind:
00742   case MemRegion::AllocaRegionKind:
00743   case MemRegion::VarRegionKind:
00744   case MemRegion::FieldRegionKind:
00745   case MemRegion::ObjCIvarRegionKind:
00746     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
00747   case MemRegion::CompoundLiteralRegionKind:
00748     // FIXME: Can we track this? Is it necessary?
00749     return UnknownVal();
00750   case MemRegion::ElementRegionKind:
00751     // FIXME: How can we handle this? It's not good enough to subtract the
00752     // offset from the base string length; consider "123\x00567" and &a[5].
00753     return UnknownVal();
00754   default:
00755     // Other regions (mostly non-data) can't have a reliable C string length.
00756     // In this case, an error is emitted and UndefinedVal is returned.
00757     // The caller should always be prepared to handle this case.
00758     if (!Filter.CheckCStringNotNullTerm)
00759       return UndefinedVal();
00760 
00761     if (ExplodedNode *N = C.addTransition(state)) {
00762       if (!BT_NotCString)
00763         BT_NotCString.reset(new BuiltinBug("Unix API",
00764           "Argument is not a null-terminated string."));
00765 
00766       SmallString<120> buf;
00767       llvm::raw_svector_ostream os(buf);
00768 
00769       assert(CurrentFunctionDescription);
00770       os << "Argument to " << CurrentFunctionDescription << " is ";
00771 
00772       if (SummarizeRegion(os, C.getASTContext(), MR))
00773         os << ", which is not a null-terminated string";
00774       else
00775         os << "not a null-terminated string";
00776 
00777       // Generate a report for this bug.
00778       BugReport *report = new BugReport(*BT_NotCString,
00779                                                         os.str(), N);
00780 
00781       report->addRange(Ex->getSourceRange());
00782       C.EmitReport(report);        
00783     }
00784 
00785     return UndefinedVal();
00786   }
00787 }
00788 
00789 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
00790   ProgramStateRef &state, const Expr *expr, SVal val) const {
00791 
00792   // Get the memory region pointed to by the val.
00793   const MemRegion *bufRegion = val.getAsRegion();
00794   if (!bufRegion)
00795     return NULL; 
00796 
00797   // Strip casts off the memory region.
00798   bufRegion = bufRegion->StripCasts();
00799 
00800   // Cast the memory region to a string region.
00801   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
00802   if (!strRegion)
00803     return NULL; 
00804 
00805   // Return the actual string in the string region.
00806   return strRegion->getStringLiteral();
00807 }
00808 
00809 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
00810                                                 ProgramStateRef state,
00811                                                 const Expr *E, SVal V) {
00812   Loc *L = dyn_cast<Loc>(&V);
00813   if (!L)
00814     return state;
00815 
00816   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
00817   // some assumptions about the value that CFRefCount can't. Even so, it should
00818   // probably be refactored.
00819   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
00820     const MemRegion *R = MR->getRegion()->StripCasts();
00821 
00822     // Are we dealing with an ElementRegion?  If so, we should be invalidating
00823     // the super-region.
00824     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
00825       R = ER->getSuperRegion();
00826       // FIXME: What about layers of ElementRegions?
00827     }
00828 
00829     // Invalidate this region.
00830     unsigned Count = C.getCurrentBlockCount();
00831     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
00832     return state->invalidateRegions(R, E, Count, LCtx);
00833   }
00834 
00835   // If we have a non-region value by chance, just remove the binding.
00836   // FIXME: is this necessary or correct? This handles the non-Region
00837   //  cases.  Is it ever valid to store to these?
00838   return state->unbindLoc(*L);
00839 }
00840 
00841 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
00842                                      const MemRegion *MR) {
00843   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
00844 
00845   switch (MR->getKind()) {
00846   case MemRegion::FunctionTextRegionKind: {
00847     const FunctionDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
00848     if (FD)
00849       os << "the address of the function '" << *FD << '\'';
00850     else
00851       os << "the address of a function";
00852     return true;
00853   }
00854   case MemRegion::BlockTextRegionKind:
00855     os << "block text";
00856     return true;
00857   case MemRegion::BlockDataRegionKind:
00858     os << "a block";
00859     return true;
00860   case MemRegion::CXXThisRegionKind:
00861   case MemRegion::CXXTempObjectRegionKind:
00862     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
00863     return true;
00864   case MemRegion::VarRegionKind:
00865     os << "a variable of type" << TVR->getValueType().getAsString();
00866     return true;
00867   case MemRegion::FieldRegionKind:
00868     os << "a field of type " << TVR->getValueType().getAsString();
00869     return true;
00870   case MemRegion::ObjCIvarRegionKind:
00871     os << "an instance variable of type " << TVR->getValueType().getAsString();
00872     return true;
00873   default:
00874     return false;
00875   }
00876 }
00877 
00878 //===----------------------------------------------------------------------===//
00879 // evaluation of individual function calls.
00880 //===----------------------------------------------------------------------===//
00881 
00882 void CStringChecker::evalCopyCommon(CheckerContext &C, 
00883                                     const CallExpr *CE,
00884                                     ProgramStateRef state,
00885                                     const Expr *Size, const Expr *Dest,
00886                                     const Expr *Source, bool Restricted,
00887                                     bool IsMempcpy) const {
00888   CurrentFunctionDescription = "memory copy function";
00889 
00890   // See if the size argument is zero.
00891   const LocationContext *LCtx = C.getLocationContext();
00892   SVal sizeVal = state->getSVal(Size, LCtx);
00893   QualType sizeTy = Size->getType();
00894 
00895   ProgramStateRef stateZeroSize, stateNonZeroSize;
00896   llvm::tie(stateZeroSize, stateNonZeroSize) =
00897     assumeZero(C, state, sizeVal, sizeTy);
00898 
00899   // Get the value of the Dest.
00900   SVal destVal = state->getSVal(Dest, LCtx);
00901 
00902   // If the size is zero, there won't be any actual memory access, so
00903   // just bind the return value to the destination buffer and return.
00904   if (stateZeroSize && !stateNonZeroSize) {
00905     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
00906     C.addTransition(stateZeroSize);
00907     return;
00908   }
00909 
00910   // If the size can be nonzero, we have to check the other arguments.
00911   if (stateNonZeroSize) {
00912     state = stateNonZeroSize;
00913 
00914     // Ensure the destination is not null. If it is NULL there will be a
00915     // NULL pointer dereference.
00916     state = checkNonNull(C, state, Dest, destVal);
00917     if (!state)
00918       return;
00919 
00920     // Get the value of the Src.
00921     SVal srcVal = state->getSVal(Source, LCtx);
00922     
00923     // Ensure the source is not null. If it is NULL there will be a
00924     // NULL pointer dereference.
00925     state = checkNonNull(C, state, Source, srcVal);
00926     if (!state)
00927       return;
00928 
00929     // Ensure the accesses are valid and that the buffers do not overlap.
00930     const char * const writeWarning =
00931       "Memory copy function overflows destination buffer";
00932     state = CheckBufferAccess(C, state, Size, Dest, Source,
00933                               writeWarning, /* sourceWarning = */ NULL);
00934     if (Restricted)
00935       state = CheckOverlap(C, state, Size, Dest, Source);
00936 
00937     if (!state)
00938       return;
00939 
00940     // If this is mempcpy, get the byte after the last byte copied and 
00941     // bind the expr.
00942     if (IsMempcpy) {
00943       loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
00944       assert(destRegVal && "Destination should be a known MemRegionVal here");
00945       
00946       // Get the length to copy.
00947       NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
00948       
00949       if (lenValNonLoc) {
00950         // Get the byte after the last byte copied.
00951         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 
00952                                                           *destRegVal,
00953                                                           *lenValNonLoc, 
00954                                                           Dest->getType());
00955       
00956         // The byte after the last byte copied is the return value.
00957         state = state->BindExpr(CE, LCtx, lastElement);
00958       } else {
00959         // If we don't know how much we copied, we can at least
00960         // conjure a return value for later.
00961         unsigned Count = C.getCurrentBlockCount();
00962         SVal result =
00963           C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count);
00964         state = state->BindExpr(CE, LCtx, result);
00965       }
00966 
00967     } else {
00968       // All other copies return the destination buffer.
00969       // (Well, bcopy() has a void return type, but this won't hurt.)
00970       state = state->BindExpr(CE, LCtx, destVal);
00971     }
00972 
00973     // Invalidate the destination.
00974     // FIXME: Even if we can't perfectly model the copy, we should see if we
00975     // can use LazyCompoundVals to copy the source values into the destination.
00976     // This would probably remove any existing bindings past the end of the
00977     // copied region, but that's still an improvement over blank invalidation.
00978     state = InvalidateBuffer(C, state, Dest,
00979                              state->getSVal(Dest, C.getLocationContext()));
00980     C.addTransition(state);
00981   }
00982 }
00983 
00984 
00985 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
00986   if (CE->getNumArgs() < 3)
00987     return;
00988 
00989   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
00990   // The return value is the address of the destination buffer.
00991   const Expr *Dest = CE->getArg(0);
00992   ProgramStateRef state = C.getState();
00993 
00994   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
00995 }
00996 
00997 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
00998   if (CE->getNumArgs() < 3)
00999     return;
01000 
01001   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
01002   // The return value is a pointer to the byte following the last written byte.
01003   const Expr *Dest = CE->getArg(0);
01004   ProgramStateRef state = C.getState();
01005   
01006   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
01007 }
01008 
01009 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
01010   if (CE->getNumArgs() < 3)
01011     return;
01012 
01013   // void *memmove(void *dst, const void *src, size_t n);
01014   // The return value is the address of the destination buffer.
01015   const Expr *Dest = CE->getArg(0);
01016   ProgramStateRef state = C.getState();
01017 
01018   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
01019 }
01020 
01021 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
01022   if (CE->getNumArgs() < 3)
01023     return;
01024 
01025   // void bcopy(const void *src, void *dst, size_t n);
01026   evalCopyCommon(C, CE, C.getState(), 
01027                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
01028 }
01029 
01030 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
01031   if (CE->getNumArgs() < 3)
01032     return;
01033 
01034   // int memcmp(const void *s1, const void *s2, size_t n);
01035   CurrentFunctionDescription = "memory comparison function";
01036 
01037   const Expr *Left = CE->getArg(0);
01038   const Expr *Right = CE->getArg(1);
01039   const Expr *Size = CE->getArg(2);
01040 
01041   ProgramStateRef state = C.getState();
01042   SValBuilder &svalBuilder = C.getSValBuilder();
01043 
01044   // See if the size argument is zero.
01045   const LocationContext *LCtx = C.getLocationContext();
01046   SVal sizeVal = state->getSVal(Size, LCtx);
01047   QualType sizeTy = Size->getType();
01048 
01049   ProgramStateRef stateZeroSize, stateNonZeroSize;
01050   llvm::tie(stateZeroSize, stateNonZeroSize) =
01051     assumeZero(C, state, sizeVal, sizeTy);
01052 
01053   // If the size can be zero, the result will be 0 in that case, and we don't
01054   // have to check either of the buffers.
01055   if (stateZeroSize) {
01056     state = stateZeroSize;
01057     state = state->BindExpr(CE, LCtx,
01058                             svalBuilder.makeZeroVal(CE->getType()));
01059     C.addTransition(state);
01060   }
01061 
01062   // If the size can be nonzero, we have to check the other arguments.
01063   if (stateNonZeroSize) {
01064     state = stateNonZeroSize;
01065     // If we know the two buffers are the same, we know the result is 0.
01066     // First, get the two buffers' addresses. Another checker will have already
01067     // made sure they're not undefined.
01068     DefinedOrUnknownSVal LV =
01069       cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx));
01070     DefinedOrUnknownSVal RV =
01071       cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx));
01072 
01073     // See if they are the same.
01074     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
01075     ProgramStateRef StSameBuf, StNotSameBuf;
01076     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
01077 
01078     // If the two arguments might be the same buffer, we know the result is 0,
01079     // and we only need to check one size.
01080     if (StSameBuf) {
01081       state = StSameBuf;
01082       state = CheckBufferAccess(C, state, Size, Left);
01083       if (state) {
01084         state = StSameBuf->BindExpr(CE, LCtx,
01085                                     svalBuilder.makeZeroVal(CE->getType()));
01086         C.addTransition(state);
01087       }
01088     }
01089 
01090     // If the two arguments might be different buffers, we have to check the
01091     // size of both of them.
01092     if (StNotSameBuf) {
01093       state = StNotSameBuf;
01094       state = CheckBufferAccess(C, state, Size, Left, Right);
01095       if (state) {
01096         // The return value is the comparison result, which we don't know.
01097         unsigned Count = C.getCurrentBlockCount();
01098         SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count);
01099         state = state->BindExpr(CE, LCtx, CmpV);
01100         C.addTransition(state);
01101       }
01102     }
01103   }
01104 }
01105 
01106 void CStringChecker::evalstrLength(CheckerContext &C,
01107                                    const CallExpr *CE) const {
01108   if (CE->getNumArgs() < 1)
01109     return;
01110 
01111   // size_t strlen(const char *s);
01112   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
01113 }
01114 
01115 void CStringChecker::evalstrnLength(CheckerContext &C,
01116                                     const CallExpr *CE) const {
01117   if (CE->getNumArgs() < 2)
01118     return;
01119 
01120   // size_t strnlen(const char *s, size_t maxlen);
01121   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
01122 }
01123 
01124 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
01125                                          bool IsStrnlen) const {
01126   CurrentFunctionDescription = "string length function";
01127   ProgramStateRef state = C.getState();
01128   const LocationContext *LCtx = C.getLocationContext();
01129 
01130   if (IsStrnlen) {
01131     const Expr *maxlenExpr = CE->getArg(1);
01132     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
01133 
01134     ProgramStateRef stateZeroSize, stateNonZeroSize;
01135     llvm::tie(stateZeroSize, stateNonZeroSize) =
01136       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
01137 
01138     // If the size can be zero, the result will be 0 in that case, and we don't
01139     // have to check the string itself.
01140     if (stateZeroSize) {
01141       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
01142       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
01143       C.addTransition(stateZeroSize);
01144     }
01145 
01146     // If the size is GUARANTEED to be zero, we're done!
01147     if (!stateNonZeroSize)
01148       return;
01149 
01150     // Otherwise, record the assumption that the size is nonzero.
01151     state = stateNonZeroSize;
01152   }
01153 
01154   // Check that the string argument is non-null.
01155   const Expr *Arg = CE->getArg(0);
01156   SVal ArgVal = state->getSVal(Arg, LCtx);
01157 
01158   state = checkNonNull(C, state, Arg, ArgVal);
01159 
01160   if (!state)
01161     return;
01162 
01163   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
01164 
01165   // If the argument isn't a valid C string, there's no valid state to
01166   // transition to.
01167   if (strLength.isUndef())
01168     return;
01169 
01170   DefinedOrUnknownSVal result = UnknownVal();
01171 
01172   // If the check is for strnlen() then bind the return value to no more than
01173   // the maxlen value.
01174   if (IsStrnlen) {
01175     QualType cmpTy = C.getSValBuilder().getConditionType();
01176 
01177     // It's a little unfortunate to be getting this again,
01178     // but it's not that expensive...
01179     const Expr *maxlenExpr = CE->getArg(1);
01180     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
01181 
01182     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
01183     NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
01184 
01185     if (strLengthNL && maxlenValNL) {
01186       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
01187 
01188       // Check if the strLength is greater than the maxlen.
01189       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
01190         state->assume(cast<DefinedOrUnknownSVal>
01191                       (C.getSValBuilder().evalBinOpNN(state, BO_GT, 
01192                                                       *strLengthNL,
01193                                                       *maxlenValNL,
01194                                                       cmpTy)));
01195 
01196       if (stateStringTooLong && !stateStringNotTooLong) {
01197         // If the string is longer than maxlen, return maxlen.
01198         result = *maxlenValNL;
01199       } else if (stateStringNotTooLong && !stateStringTooLong) {
01200         // If the string is shorter than maxlen, return its length.
01201         result = *strLengthNL;
01202       }
01203     }
01204 
01205     if (result.isUnknown()) {
01206       // If we don't have enough information for a comparison, there's
01207       // no guarantee the full string length will actually be returned.
01208       // All we know is the return value is the min of the string length
01209       // and the limit. This is better than nothing.
01210       unsigned Count = C.getCurrentBlockCount();
01211       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count);
01212       NonLoc *resultNL = cast<NonLoc>(&result);
01213 
01214       if (strLengthNL) {
01215         state = state->assume(cast<DefinedOrUnknownSVal>
01216                               (C.getSValBuilder().evalBinOpNN(state, BO_LE, 
01217                                                               *resultNL,
01218                                                               *strLengthNL,
01219                                                               cmpTy)), true);
01220       }
01221       
01222       if (maxlenValNL) {
01223         state = state->assume(cast<DefinedOrUnknownSVal>
01224                               (C.getSValBuilder().evalBinOpNN(state, BO_LE, 
01225                                                               *resultNL,
01226                                                               *maxlenValNL,
01227                                                               cmpTy)), true);
01228       }
01229     }
01230 
01231   } else {
01232     // This is a plain strlen(), not strnlen().
01233     result = cast<DefinedOrUnknownSVal>(strLength);
01234 
01235     // If we don't know the length of the string, conjure a return
01236     // value, so it can be used in constraints, at least.
01237     if (result.isUnknown()) {
01238       unsigned Count = C.getCurrentBlockCount();
01239       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, LCtx, Count);
01240     }
01241   }
01242 
01243   // Bind the return value.
01244   assert(!result.isUnknown() && "Should have conjured a value by now");
01245   state = state->BindExpr(CE, LCtx, result);
01246   C.addTransition(state);
01247 }
01248 
01249 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
01250   if (CE->getNumArgs() < 2)
01251     return;
01252 
01253   // char *strcpy(char *restrict dst, const char *restrict src);
01254   evalStrcpyCommon(C, CE, 
01255                    /* returnEnd = */ false, 
01256                    /* isBounded = */ false,
01257                    /* isAppending = */ false);
01258 }
01259 
01260 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
01261   if (CE->getNumArgs() < 3)
01262     return;
01263 
01264   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
01265   evalStrcpyCommon(C, CE, 
01266                    /* returnEnd = */ false, 
01267                    /* isBounded = */ true,
01268                    /* isAppending = */ false);
01269 }
01270 
01271 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
01272   if (CE->getNumArgs() < 2)
01273     return;
01274 
01275   // char *stpcpy(char *restrict dst, const char *restrict src);
01276   evalStrcpyCommon(C, CE, 
01277                    /* returnEnd = */ true, 
01278                    /* isBounded = */ false,
01279                    /* isAppending = */ false);
01280 }
01281 
01282 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
01283   if (CE->getNumArgs() < 2)
01284     return;
01285 
01286   //char *strcat(char *restrict s1, const char *restrict s2);
01287   evalStrcpyCommon(C, CE, 
01288                    /* returnEnd = */ false, 
01289                    /* isBounded = */ false,
01290                    /* isAppending = */ true);
01291 }
01292 
01293 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
01294   if (CE->getNumArgs() < 3)
01295     return;
01296 
01297   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
01298   evalStrcpyCommon(C, CE, 
01299                    /* returnEnd = */ false, 
01300                    /* isBounded = */ true,
01301                    /* isAppending = */ true);
01302 }
01303 
01304 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
01305                                       bool returnEnd, bool isBounded,
01306                                       bool isAppending) const {
01307   CurrentFunctionDescription = "string copy function";
01308   ProgramStateRef state = C.getState();
01309   const LocationContext *LCtx = C.getLocationContext();
01310 
01311   // Check that the destination is non-null.
01312   const Expr *Dst = CE->getArg(0);
01313   SVal DstVal = state->getSVal(Dst, LCtx);
01314 
01315   state = checkNonNull(C, state, Dst, DstVal);
01316   if (!state)
01317     return;
01318 
01319   // Check that the source is non-null.
01320   const Expr *srcExpr = CE->getArg(1);
01321   SVal srcVal = state->getSVal(srcExpr, LCtx);
01322   state = checkNonNull(C, state, srcExpr, srcVal);
01323   if (!state)
01324     return;
01325 
01326   // Get the string length of the source.
01327   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
01328 
01329   // If the source isn't a valid C string, give up.
01330   if (strLength.isUndef())
01331     return;
01332 
01333   SValBuilder &svalBuilder = C.getSValBuilder();
01334   QualType cmpTy = svalBuilder.getConditionType();
01335   QualType sizeTy = svalBuilder.getContext().getSizeType();
01336 
01337   // These two values allow checking two kinds of errors:
01338   // - actual overflows caused by a source that doesn't fit in the destination
01339   // - potential overflows caused by a bound that could exceed the destination
01340   SVal amountCopied = UnknownVal();
01341   SVal maxLastElementIndex = UnknownVal();
01342   const char *boundWarning = NULL;
01343 
01344   // If the function is strncpy, strncat, etc... it is bounded.
01345   if (isBounded) {
01346     // Get the max number of characters to copy.
01347     const Expr *lenExpr = CE->getArg(2);
01348     SVal lenVal = state->getSVal(lenExpr, LCtx);
01349 
01350     // Protect against misdeclared strncpy().
01351     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
01352 
01353     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
01354     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
01355 
01356     // If we know both values, we might be able to figure out how much
01357     // we're copying.
01358     if (strLengthNL && lenValNL) {
01359       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
01360 
01361       // Check if the max number to copy is less than the length of the src.
01362       // If the bound is equal to the source length, strncpy won't null-
01363       // terminate the result!
01364       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
01365         state->assume(cast<DefinedOrUnknownSVal>
01366                       (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL,
01367                                                *lenValNL, cmpTy)));
01368 
01369       if (stateSourceTooLong && !stateSourceNotTooLong) {
01370         // Max number to copy is less than the length of the src, so the actual
01371         // strLength copied is the max number arg.
01372         state = stateSourceTooLong;
01373         amountCopied = lenVal;
01374 
01375       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
01376         // The source buffer entirely fits in the bound.
01377         state = stateSourceNotTooLong;
01378         amountCopied = strLength;
01379       }
01380     }
01381 
01382     // We still want to know if the bound is known to be too large.
01383     if (lenValNL) {
01384       if (isAppending) {
01385         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
01386 
01387         // Get the string length of the destination. If the destination is
01388         // memory that can't have a string length, we shouldn't be copying
01389         // into it anyway.
01390         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
01391         if (dstStrLength.isUndef())
01392           return;
01393 
01394         if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) {
01395           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
01396                                                         *lenValNL,
01397                                                         *dstStrLengthNL,
01398                                                         sizeTy);
01399           boundWarning = "Size argument is greater than the free space in the "
01400                          "destination buffer";
01401         }
01402 
01403       } else {
01404         // For strncpy, this is just checking that lenVal <= sizeof(dst)
01405         // (Yes, strncpy and strncat differ in how they treat termination.
01406         // strncat ALWAYS terminates, but strncpy doesn't.)
01407 
01408         // We need a special case for when the copy size is zero, in which
01409         // case strncpy will do no work at all. Our bounds check uses n-1
01410         // as the last element accessed, so n == 0 is problematic.
01411         ProgramStateRef StateZeroSize, StateNonZeroSize;
01412         llvm::tie(StateZeroSize, StateNonZeroSize) =
01413           assumeZero(C, state, *lenValNL, sizeTy);
01414 
01415         // If the size is known to be zero, we're done.
01416         if (StateZeroSize && !StateNonZeroSize) {
01417           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
01418           C.addTransition(StateZeroSize);
01419           return;
01420         }
01421 
01422         // Otherwise, go ahead and figure out the last element we'll touch.
01423         // We don't record the non-zero assumption here because we can't
01424         // be sure. We won't warn on a possible zero.
01425         NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
01426         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
01427                                                       one, sizeTy);
01428         boundWarning = "Size argument is greater than the length of the "
01429                        "destination buffer";
01430       }
01431     }
01432 
01433     // If we couldn't pin down the copy length, at least bound it.
01434     // FIXME: We should actually run this code path for append as well, but
01435     // right now it creates problems with constraints (since we can end up
01436     // trying to pass constraints from symbol to symbol).
01437     if (amountCopied.isUnknown() && !isAppending) {
01438       // Try to get a "hypothetical" string length symbol, which we can later
01439       // set as a real value if that turns out to be the case.
01440       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
01441       assert(!amountCopied.isUndef());
01442 
01443       if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
01444         if (lenValNL) {
01445           // amountCopied <= lenVal
01446           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
01447                                                              *amountCopiedNL,
01448                                                              *lenValNL,
01449                                                              cmpTy);
01450           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
01451                                 true);
01452           if (!state)
01453             return;
01454         }
01455 
01456         if (strLengthNL) {
01457           // amountCopied <= strlen(source)
01458           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
01459                                                            *amountCopiedNL,
01460                                                            *strLengthNL,
01461                                                            cmpTy);
01462           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
01463                                 true);
01464           if (!state)
01465             return;
01466         }
01467       }
01468     }
01469 
01470   } else {
01471     // The function isn't bounded. The amount copied should match the length
01472     // of the source buffer.
01473     amountCopied = strLength;
01474   }
01475 
01476   assert(state);
01477 
01478   // This represents the number of characters copied into the destination
01479   // buffer. (It may not actually be the strlen if the destination buffer
01480   // is not terminated.)
01481   SVal finalStrLength = UnknownVal();
01482 
01483   // If this is an appending function (strcat, strncat...) then set the
01484   // string length to strlen(src) + strlen(dst) since the buffer will
01485   // ultimately contain both.
01486   if (isAppending) {
01487     // Get the string length of the destination. If the destination is memory
01488     // that can't have a string length, we shouldn't be copying into it anyway.
01489     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
01490     if (dstStrLength.isUndef())
01491       return;
01492 
01493     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
01494     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
01495     
01496     // If we know both string lengths, we might know the final string length.
01497     if (srcStrLengthNL && dstStrLengthNL) {
01498       // Make sure the two lengths together don't overflow a size_t.
01499       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
01500       if (!state)
01501         return;
01502 
01503       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 
01504                                                *dstStrLengthNL, sizeTy);
01505     }
01506 
01507     // If we couldn't get a single value for the final string length,
01508     // we can at least bound it by the individual lengths.
01509     if (finalStrLength.isUnknown()) {
01510       // Try to get a "hypothetical" string length symbol, which we can later
01511       // set as a real value if that turns out to be the case.
01512       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
01513       assert(!finalStrLength.isUndef());
01514 
01515       if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
01516         if (srcStrLengthNL) {
01517           // finalStrLength >= srcStrLength
01518           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
01519                                                         *finalStrLengthNL,
01520                                                         *srcStrLengthNL,
01521                                                         cmpTy);
01522           state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
01523                                 true);
01524           if (!state)
01525             return;
01526         }
01527 
01528         if (dstStrLengthNL) {
01529           // finalStrLength >= dstStrLength
01530           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
01531                                                       *finalStrLengthNL,
01532                                                       *dstStrLengthNL,
01533                                                       cmpTy);
01534           state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
01535                                 true);
01536           if (!state)
01537             return;
01538         }
01539       }
01540     }
01541 
01542   } else {
01543     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
01544     // the final string length will match the input string length.
01545     finalStrLength = amountCopied;
01546   }
01547 
01548   // The final result of the function will either be a pointer past the last
01549   // copied element, or a pointer to the start of the destination buffer.
01550   SVal Result = (returnEnd ? UnknownVal() : DstVal);
01551 
01552   assert(state);
01553 
01554   // If the destination is a MemRegion, try to check for a buffer overflow and
01555   // record the new string length.
01556   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
01557     QualType ptrTy = Dst->getType();
01558 
01559     // If we have an exact value on a bounded copy, use that to check for
01560     // overflows, rather than our estimate about how much is actually copied.
01561     if (boundWarning) {
01562       if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) {
01563         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
01564                                                       *maxLastNL, ptrTy);
01565         state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 
01566                               boundWarning);
01567         if (!state)
01568           return;
01569       }
01570     }
01571 
01572     // Then, if the final length is known...
01573     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
01574       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
01575                                                  *knownStrLength, ptrTy);
01576 
01577       // ...and we haven't checked the bound, we'll check the actual copy.
01578       if (!boundWarning) {
01579         const char * const warningMsg =
01580           "String copy function overflows destination buffer";
01581         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
01582         if (!state)
01583           return;
01584       }
01585 
01586       // If this is a stpcpy-style copy, the last element is the return value.
01587       if (returnEnd)
01588         Result = lastElement;
01589     }
01590 
01591     // Invalidate the destination. This must happen before we set the C string
01592     // length because invalidation will clear the length.
01593     // FIXME: Even if we can't perfectly model the copy, we should see if we
01594     // can use LazyCompoundVals to copy the source values into the destination.
01595     // This would probably remove any existing bindings past the end of the
01596     // string, but that's still an improvement over blank invalidation.
01597     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
01598 
01599     // Set the C string length of the destination, if we know it.
01600     if (isBounded && !isAppending) {
01601       // strncpy is annoying in that it doesn't guarantee to null-terminate
01602       // the result string. If the original string didn't fit entirely inside
01603       // the bound (including the null-terminator), we don't know how long the
01604       // result is.
01605       if (amountCopied != strLength)
01606         finalStrLength = UnknownVal();
01607     }
01608     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
01609   }
01610 
01611   assert(state);
01612 
01613   // If this is a stpcpy-style copy, but we were unable to check for a buffer
01614   // overflow, we still need a result. Conjure a return value.
01615   if (returnEnd && Result.isUnknown()) {
01616     unsigned Count = C.getCurrentBlockCount();
01617     Result = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count);
01618   }
01619 
01620   // Set the return value.
01621   state = state->BindExpr(CE, LCtx, Result);
01622   C.addTransition(state);
01623 }
01624 
01625 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
01626   if (CE->getNumArgs() < 2)
01627     return;
01628 
01629   //int strcmp(const char *s1, const char *s2);
01630   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
01631 }
01632 
01633 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
01634   if (CE->getNumArgs() < 3)
01635     return;
01636 
01637   //int strncmp(const char *s1, const char *s2, size_t n);
01638   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
01639 }
01640 
01641 void CStringChecker::evalStrcasecmp(CheckerContext &C, 
01642                                     const CallExpr *CE) const {
01643   if (CE->getNumArgs() < 2)
01644     return;
01645 
01646   //int strcasecmp(const char *s1, const char *s2);
01647   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
01648 }
01649 
01650 void CStringChecker::evalStrncasecmp(CheckerContext &C, 
01651                                      const CallExpr *CE) const {
01652   if (CE->getNumArgs() < 3)
01653     return;
01654 
01655   //int strncasecmp(const char *s1, const char *s2, size_t n);
01656   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
01657 }
01658 
01659 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
01660                                       bool isBounded, bool ignoreCase) const {
01661   CurrentFunctionDescription = "string comparison function";
01662   ProgramStateRef state = C.getState();
01663   const LocationContext *LCtx = C.getLocationContext();
01664 
01665   // Check that the first string is non-null
01666   const Expr *s1 = CE->getArg(0);
01667   SVal s1Val = state->getSVal(s1, LCtx);
01668   state = checkNonNull(C, state, s1, s1Val);
01669   if (!state)
01670     return;
01671 
01672   // Check that the second string is non-null.
01673   const Expr *s2 = CE->getArg(1);
01674   SVal s2Val = state->getSVal(s2, LCtx);
01675   state = checkNonNull(C, state, s2, s2Val);
01676   if (!state)
01677     return;
01678 
01679   // Get the string length of the first string or give up.
01680   SVal s1Length = getCStringLength(C, state, s1, s1Val);
01681   if (s1Length.isUndef())
01682     return;
01683 
01684   // Get the string length of the second string or give up.
01685   SVal s2Length = getCStringLength(C, state, s2, s2Val);
01686   if (s2Length.isUndef())
01687     return;
01688 
01689   // If we know the two buffers are the same, we know the result is 0.
01690   // First, get the two buffers' addresses. Another checker will have already
01691   // made sure they're not undefined.
01692   DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
01693   DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
01694 
01695   // See if they are the same.
01696   SValBuilder &svalBuilder = C.getSValBuilder();
01697   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
01698   ProgramStateRef StSameBuf, StNotSameBuf;
01699   llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
01700 
01701   // If the two arguments might be the same buffer, we know the result is 0,
01702   // and we only need to check one size.
01703   if (StSameBuf) {
01704     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
01705                                     svalBuilder.makeZeroVal(CE->getType()));
01706     C.addTransition(StSameBuf);
01707 
01708     // If the two arguments are GUARANTEED to be the same, we're done!
01709     if (!StNotSameBuf)
01710       return;
01711   }
01712 
01713   assert(StNotSameBuf);
01714   state = StNotSameBuf;
01715 
01716   // At this point we can go about comparing the two buffers.
01717   // For now, we only do this if they're both known string literals.
01718 
01719   // Attempt to extract string literals from both expressions.
01720   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
01721   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
01722   bool canComputeResult = false;
01723 
01724   if (s1StrLiteral && s2StrLiteral) {
01725     StringRef s1StrRef = s1StrLiteral->getString();
01726     StringRef s2StrRef = s2StrLiteral->getString();
01727 
01728     if (isBounded) {
01729       // Get the max number of characters to compare.
01730       const Expr *lenExpr = CE->getArg(2);
01731       SVal lenVal = state->getSVal(lenExpr, LCtx);
01732 
01733       // If the length is known, we can get the right substrings.
01734       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
01735         // Create substrings of each to compare the prefix.
01736         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
01737         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
01738         canComputeResult = true;
01739       }
01740     } else {
01741       // This is a normal, unbounded strcmp.
01742       canComputeResult = true;
01743     }
01744 
01745     if (canComputeResult) {
01746       // Real strcmp stops at null characters.
01747       size_t s1Term = s1StrRef.find('\0');
01748       if (s1Term != StringRef::npos)
01749         s1StrRef = s1StrRef.substr(0, s1Term);
01750 
01751       size_t s2Term = s2StrRef.find('\0');
01752       if (s2Term != StringRef::npos)
01753         s2StrRef = s2StrRef.substr(0, s2Term);
01754 
01755       // Use StringRef's comparison methods to compute the actual result.
01756       int result;
01757 
01758       if (ignoreCase) {
01759         // Compare string 1 to string 2 the same way strcasecmp() does.
01760         result = s1StrRef.compare_lower(s2StrRef);
01761       } else {
01762         // Compare string 1 to string 2 the same way strcmp() does.
01763         result = s1StrRef.compare(s2StrRef);
01764       }
01765 
01766       // Build the SVal of the comparison and bind the return value.
01767       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
01768       state = state->BindExpr(CE, LCtx, resultVal);
01769     }
01770   }
01771 
01772   if (!canComputeResult) {
01773     // Conjure a symbolic value. It's the best we can do.
01774     unsigned Count = C.getCurrentBlockCount();
01775     SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, LCtx, Count);
01776     state = state->BindExpr(CE, LCtx, resultVal);
01777   }
01778 
01779   // Record this as a possible path.
01780   C.addTransition(state);
01781 }
01782 
01783 //===----------------------------------------------------------------------===//
01784 // The driver method, and other Checker callbacks.
01785 //===----------------------------------------------------------------------===//
01786 
01787 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
01788   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
01789 
01790   if (!FDecl)
01791     return false;
01792 
01793   FnCheck evalFunction = 0;
01794   if (C.isCLibraryFunction(FDecl, "memcpy"))
01795     evalFunction =  &CStringChecker::evalMemcpy;
01796   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
01797     evalFunction =  &CStringChecker::evalMempcpy;
01798   else if (C.isCLibraryFunction(FDecl, "memcmp"))
01799     evalFunction =  &CStringChecker::evalMemcmp;
01800   else if (C.isCLibraryFunction(FDecl, "memmove"))
01801     evalFunction =  &CStringChecker::evalMemmove;
01802   else if (C.isCLibraryFunction(FDecl, "strcpy"))
01803     evalFunction =  &CStringChecker::evalStrcpy;
01804   else if (C.isCLibraryFunction(FDecl, "strncpy"))
01805     evalFunction =  &CStringChecker::evalStrncpy;
01806   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
01807     evalFunction =  &CStringChecker::evalStpcpy;
01808   else if (C.isCLibraryFunction(FDecl, "strcat"))
01809     evalFunction =  &CStringChecker::evalStrcat;
01810   else if (C.isCLibraryFunction(FDecl, "strncat"))
01811     evalFunction =  &CStringChecker::evalStrncat;
01812   else if (C.isCLibraryFunction(FDecl, "strlen"))
01813     evalFunction =  &CStringChecker::evalstrLength;
01814   else if (C.isCLibraryFunction(FDecl, "strnlen"))
01815     evalFunction =  &CStringChecker::evalstrnLength;
01816   else if (C.isCLibraryFunction(FDecl, "strcmp"))
01817     evalFunction =  &CStringChecker::evalStrcmp;
01818   else if (C.isCLibraryFunction(FDecl, "strncmp"))
01819     evalFunction =  &CStringChecker::evalStrncmp;
01820   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
01821     evalFunction =  &CStringChecker::evalStrcasecmp;
01822   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
01823     evalFunction =  &CStringChecker::evalStrncasecmp;
01824   else if (C.isCLibraryFunction(FDecl, "bcopy"))
01825     evalFunction =  &CStringChecker::evalBcopy;
01826   else if (C.isCLibraryFunction(FDecl, "bcmp"))
01827     evalFunction =  &CStringChecker::evalMemcmp;
01828   
01829   // If the callee isn't a string function, let another checker handle it.
01830   if (!evalFunction)
01831     return false;
01832 
01833   // Make sure each function sets its own description.
01834   // (But don't bother in a release build.)
01835   assert(!(CurrentFunctionDescription = NULL));
01836 
01837   // Check and evaluate the call.
01838   (this->*evalFunction)(C, CE);
01839 
01840   // If the evaluate call resulted in no change, chain to the next eval call
01841   // handler.
01842   // Note, the custom CString evaluation calls assume that basic safety
01843   // properties are held. However, if the user chooses to turn off some of these
01844   // checks, we ignore the issues and leave the call evaluation to a generic
01845   // handler.
01846   if (!C.isDifferent())
01847     return false;
01848 
01849   return true;
01850 }
01851 
01852 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
01853   // Record string length for char a[] = "abc";
01854   ProgramStateRef state = C.getState();
01855 
01856   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
01857        I != E; ++I) {
01858     const VarDecl *D = dyn_cast<VarDecl>(*I);
01859     if (!D)
01860       continue;
01861 
01862     // FIXME: Handle array fields of structs.
01863     if (!D->getType()->isArrayType())
01864       continue;
01865 
01866     const Expr *Init = D->getInit();
01867     if (!Init)
01868       continue;
01869     if (!isa<StringLiteral>(Init))
01870       continue;
01871 
01872     Loc VarLoc = state->getLValue(D, C.getLocationContext());
01873     const MemRegion *MR = VarLoc.getAsRegion();
01874     if (!MR)
01875       continue;
01876 
01877     SVal StrVal = state->getSVal(Init, C.getLocationContext());
01878     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
01879     DefinedOrUnknownSVal strLength
01880       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
01881 
01882     state = state->set<CStringLength>(MR, strLength);
01883   }
01884 
01885   C.addTransition(state);
01886 }
01887 
01888 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
01889   CStringLength::EntryMap Entries = state->get<CStringLength>();
01890   return !Entries.isEmpty();
01891 }
01892 
01893 ProgramStateRef 
01894 CStringChecker::checkRegionChanges(ProgramStateRef state,
01895                                    const StoreManager::InvalidatedSymbols *,
01896                                    ArrayRef<const MemRegion *> ExplicitRegions,
01897                                    ArrayRef<const MemRegion *> Regions,
01898                                    const CallOrObjCMessage *Call) const {
01899   CStringLength::EntryMap Entries = state->get<CStringLength>();
01900   if (Entries.isEmpty())
01901     return state;
01902 
01903   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
01904   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
01905 
01906   // First build sets for the changed regions and their super-regions.
01907   for (ArrayRef<const MemRegion *>::iterator
01908        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
01909     const MemRegion *MR = *I;
01910     Invalidated.insert(MR);
01911 
01912     SuperRegions.insert(MR);
01913     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
01914       MR = SR->getSuperRegion();
01915       SuperRegions.insert(MR);
01916     }
01917   }
01918 
01919   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
01920 
01921   // Then loop over the entries in the current state.
01922   for (CStringLength::EntryMap::iterator I = Entries.begin(),
01923        E = Entries.end(); I != E; ++I) {
01924     const MemRegion *MR = I.getKey();
01925 
01926     // Is this entry for a super-region of a changed region?
01927     if (SuperRegions.count(MR)) {
01928       Entries = F.remove(Entries, MR);
01929       continue;
01930     }
01931 
01932     // Is this entry for a sub-region of a changed region?
01933     const MemRegion *Super = MR;
01934     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
01935       Super = SR->getSuperRegion();
01936       if (Invalidated.count(Super)) {
01937         Entries = F.remove(Entries, MR);
01938         break;
01939       }
01940     }
01941   }
01942 
01943   return state->set<CStringLength>(Entries);
01944 }
01945 
01946 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
01947                                       SymbolReaper &SR) const {
01948   // Mark all symbols in our string length map as valid.
01949   CStringLength::EntryMap Entries = state->get<CStringLength>();
01950 
01951   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
01952        I != E; ++I) {
01953     SVal Len = I.getData();
01954 
01955     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
01956                                   se = Len.symbol_end(); si != se; ++si)
01957       SR.markInUse(*si);
01958   }
01959 }
01960 
01961 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
01962                                       CheckerContext &C) const {
01963   if (!SR.hasDeadSymbols())
01964     return;
01965 
01966   ProgramStateRef state = C.getState();
01967   CStringLength::EntryMap Entries = state->get<CStringLength>();
01968   if (Entries.isEmpty())
01969     return;
01970 
01971   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
01972   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
01973        I != E; ++I) {
01974     SVal Len = I.getData();
01975     if (SymbolRef Sym = Len.getAsSymbol()) {
01976       if (SR.isDead(Sym))
01977         Entries = F.remove(Entries, I.getKey());
01978     }
01979   }
01980 
01981   state = state->set<CStringLength>(Entries);
01982   C.addTransition(state);
01983 }
01984 
01985 #define REGISTER_CHECKER(name) \
01986 void ento::register##name(CheckerManager &mgr) {\
01987   static CStringChecker *TheChecker = 0; \
01988   if (TheChecker == 0) \
01989     TheChecker = mgr.registerChecker<CStringChecker>(); \
01990   TheChecker->Filter.Check##name = true; \
01991 }
01992 
01993 REGISTER_CHECKER(CStringNullArg)
01994 REGISTER_CHECKER(CStringOutOfBounds)
01995 REGISTER_CHECKER(CStringBufferOverlap)
01996 REGISTER_CHECKER(CStringNotNullTerm)
01997 
01998 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
01999   registerCStringNullArg(Mgr);
02000 }