clang API Documentation

RangeConstraintManager.cpp
Go to the documentation of this file.
00001 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file defines RangeConstraintManager, a class that tracks simple
00011 //  equality and inequality constraints on symbolic values of ProgramState.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "SimpleConstraintManager.h"
00016 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
00017 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
00018 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
00019 #include "llvm/Support/Debug.h"
00020 #include "llvm/ADT/FoldingSet.h"
00021 #include "llvm/ADT/ImmutableSet.h"
00022 #include "llvm/Support/raw_ostream.h"
00023 
00024 using namespace clang;
00025 using namespace ento;
00026 
00027 namespace { class ConstraintRange {}; }
00028 static int ConstraintRangeIndex = 0;
00029 
00030 /// A Range represents the closed range [from, to].  The caller must
00031 /// guarantee that from <= to.  Note that Range is immutable, so as not
00032 /// to subvert RangeSet's immutability.
00033 namespace {
00034 class Range : public std::pair<const llvm::APSInt*,
00035                                                 const llvm::APSInt*> {
00036 public:
00037   Range(const llvm::APSInt &from, const llvm::APSInt &to)
00038     : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) {
00039     assert(from <= to);
00040   }
00041   bool Includes(const llvm::APSInt &v) const {
00042     return *first <= v && v <= *second;
00043   }
00044   const llvm::APSInt &From() const {
00045     return *first;
00046   }
00047   const llvm::APSInt &To() const {
00048     return *second;
00049   }
00050   const llvm::APSInt *getConcreteValue() const {
00051     return &From() == &To() ? &From() : NULL;
00052   }
00053 
00054   void Profile(llvm::FoldingSetNodeID &ID) const {
00055     ID.AddPointer(&From());
00056     ID.AddPointer(&To());
00057   }
00058 };
00059 
00060 
00061 class RangeTrait : public llvm::ImutContainerInfo<Range> {
00062 public:
00063   // When comparing if one Range is less than another, we should compare
00064   // the actual APSInt values instead of their pointers.  This keeps the order
00065   // consistent (instead of comparing by pointer values) and can potentially
00066   // be used to speed up some of the operations in RangeSet.
00067   static inline bool isLess(key_type_ref lhs, key_type_ref rhs) {
00068     return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) &&
00069                                        *lhs.second < *rhs.second);
00070   }
00071 };
00072 
00073 /// RangeSet contains a set of ranges. If the set is empty, then
00074 ///  there the value of a symbol is overly constrained and there are no
00075 ///  possible values for that symbol.
00076 class RangeSet {
00077   typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet;
00078   PrimRangeSet ranges; // no need to make const, since it is an
00079                        // ImmutableSet - this allows default operator=
00080                        // to work.
00081 public:
00082   typedef PrimRangeSet::Factory Factory;
00083   typedef PrimRangeSet::iterator iterator;
00084 
00085   RangeSet(PrimRangeSet RS) : ranges(RS) {}
00086 
00087   iterator begin() const { return ranges.begin(); }
00088   iterator end() const { return ranges.end(); }
00089 
00090   bool isEmpty() const { return ranges.isEmpty(); }
00091 
00092   /// Construct a new RangeSet representing '{ [from, to] }'.
00093   RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
00094     : ranges(F.add(F.getEmptySet(), Range(from, to))) {}
00095 
00096   /// Profile - Generates a hash profile of this RangeSet for use
00097   ///  by FoldingSet.
00098   void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }
00099 
00100   /// getConcreteValue - If a symbol is contrained to equal a specific integer
00101   ///  constant then this method returns that value.  Otherwise, it returns
00102   ///  NULL.
00103   const llvm::APSInt* getConcreteValue() const {
00104     return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0;
00105   }
00106 
00107 private:
00108   void IntersectInRange(BasicValueFactory &BV, Factory &F,
00109                         const llvm::APSInt &Lower,
00110                         const llvm::APSInt &Upper,
00111                         PrimRangeSet &newRanges,
00112                         PrimRangeSet::iterator &i,
00113                         PrimRangeSet::iterator &e) const {
00114     // There are six cases for each range R in the set:
00115     //   1. R is entirely before the intersection range.
00116     //   2. R is entirely after the intersection range.
00117     //   3. R contains the entire intersection range.
00118     //   4. R starts before the intersection range and ends in the middle.
00119     //   5. R starts in the middle of the intersection range and ends after it.
00120     //   6. R is entirely contained in the intersection range.
00121     // These correspond to each of the conditions below.
00122     for (/* i = begin(), e = end() */; i != e; ++i) {
00123       if (i->To() < Lower) {
00124         continue;
00125       }
00126       if (i->From() > Upper) {
00127         break;
00128       }
00129 
00130       if (i->Includes(Lower)) {
00131         if (i->Includes(Upper)) {
00132           newRanges = F.add(newRanges, Range(BV.getValue(Lower),
00133                                              BV.getValue(Upper)));
00134           break;
00135         } else
00136           newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To()));
00137       } else {
00138         if (i->Includes(Upper)) {
00139           newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper)));
00140           break;
00141         } else
00142           newRanges = F.add(newRanges, *i);
00143       }
00144     }
00145   }
00146 
00147   const llvm::APSInt &getMinValue() const {
00148     assert(!isEmpty());
00149     return ranges.begin()->From();
00150   }
00151 
00152   bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
00153     // This function has nine cases, the cartesian product of range-testing
00154     // both the upper and lower bounds against the symbol's type.
00155     // Each case requires a different pinning operation.
00156     // The function returns false if the described range is entirely outside
00157     // the range of values for the associated symbol.
00158     APSIntType Type(getMinValue());
00159     APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower);
00160     APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper);
00161 
00162     switch (LowerTest) {
00163     case APSIntType::RTR_Below:
00164       switch (UpperTest) {
00165       case APSIntType::RTR_Below:
00166         // The entire range is outside the symbol's set of possible values.
00167         // If this is a conventionally-ordered range, the state is infeasible.
00168         if (Lower < Upper)
00169           return false;
00170 
00171         // However, if the range wraps around, it spans all possible values.
00172         Lower = Type.getMinValue();
00173         Upper = Type.getMaxValue();
00174         break;
00175       case APSIntType::RTR_Within:
00176         // The range starts below what's possible but ends within it. Pin.
00177         Lower = Type.getMinValue();
00178         Type.apply(Upper);
00179         break;
00180       case APSIntType::RTR_Above:
00181         // The range spans all possible values for the symbol. Pin.
00182         Lower = Type.getMinValue();
00183         Upper = Type.getMaxValue();
00184         break;
00185       }
00186       break;
00187     case APSIntType::RTR_Within:
00188       switch (UpperTest) {
00189       case APSIntType::RTR_Below:
00190         // The range wraps around, but all lower values are not possible.
00191         Type.apply(Lower);
00192         Upper = Type.getMaxValue();
00193         break;
00194       case APSIntType::RTR_Within:
00195         // The range may or may not wrap around, but both limits are valid.
00196         Type.apply(Lower);
00197         Type.apply(Upper);
00198         break;
00199       case APSIntType::RTR_Above:
00200         // The range starts within what's possible but ends above it. Pin.
00201         Type.apply(Lower);
00202         Upper = Type.getMaxValue();
00203         break;
00204       }
00205       break;
00206     case APSIntType::RTR_Above:
00207       switch (UpperTest) {
00208       case APSIntType::RTR_Below:
00209         // The range wraps but is outside the symbol's set of possible values.
00210         return false;
00211       case APSIntType::RTR_Within:
00212         // The range starts above what's possible but ends within it (wrap).
00213         Lower = Type.getMinValue();
00214         Type.apply(Upper);
00215         break;
00216       case APSIntType::RTR_Above:
00217         // The entire range is outside the symbol's set of possible values.
00218         // If this is a conventionally-ordered range, the state is infeasible.
00219         if (Lower < Upper)
00220           return false;
00221 
00222         // However, if the range wraps around, it spans all possible values.
00223         Lower = Type.getMinValue();
00224         Upper = Type.getMaxValue();
00225         break;
00226       }
00227       break;
00228     }
00229 
00230     return true;
00231   }
00232 
00233 public:
00234   // Returns a set containing the values in the receiving set, intersected with
00235   // the closed range [Lower, Upper]. Unlike the Range type, this range uses
00236   // modular arithmetic, corresponding to the common treatment of C integer
00237   // overflow. Thus, if the Lower bound is greater than the Upper bound, the
00238   // range is taken to wrap around. This is equivalent to taking the
00239   // intersection with the two ranges [Min, Upper] and [Lower, Max],
00240   // or, alternatively, /removing/ all integers between Upper and Lower.
00241   RangeSet Intersect(BasicValueFactory &BV, Factory &F,
00242                      llvm::APSInt Lower, llvm::APSInt Upper) const {
00243     if (!pin(Lower, Upper))
00244       return F.getEmptySet();
00245 
00246     PrimRangeSet newRanges = F.getEmptySet();
00247 
00248     PrimRangeSet::iterator i = begin(), e = end();
00249     if (Lower <= Upper)
00250       IntersectInRange(BV, F, Lower, Upper, newRanges, i, e);
00251     else {
00252       // The order of the next two statements is important!
00253       // IntersectInRange() does not reset the iteration state for i and e.
00254       // Therefore, the lower range most be handled first.
00255       IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e);
00256       IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e);
00257     }
00258 
00259     return newRanges;
00260   }
00261 
00262   void print(raw_ostream &os) const {
00263     bool isFirst = true;
00264     os << "{ ";
00265     for (iterator i = begin(), e = end(); i != e; ++i) {
00266       if (isFirst)
00267         isFirst = false;
00268       else
00269         os << ", ";
00270 
00271       os << '[' << i->From().toString(10) << ", " << i->To().toString(10)
00272          << ']';
00273     }
00274     os << " }";
00275   }
00276 
00277   bool operator==(const RangeSet &other) const {
00278     return ranges == other.ranges;
00279   }
00280 };
00281 } // end anonymous namespace
00282 
00283 typedef llvm::ImmutableMap<SymbolRef,RangeSet> ConstraintRangeTy;
00284 
00285 namespace clang {
00286 namespace ento {
00287 template<>
00288 struct ProgramStateTrait<ConstraintRange>
00289   : public ProgramStatePartialTrait<ConstraintRangeTy> {
00290   static inline void *GDMIndex() { return &ConstraintRangeIndex; }
00291 };
00292 }
00293 }
00294 
00295 namespace {
00296 class RangeConstraintManager : public SimpleConstraintManager{
00297   RangeSet GetRange(ProgramStateRef state, SymbolRef sym);
00298 public:
00299   RangeConstraintManager(SubEngine &subengine, BasicValueFactory &BVF)
00300     : SimpleConstraintManager(subengine, BVF) {}
00301 
00302   ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym,
00303                              const llvm::APSInt& Int,
00304                              const llvm::APSInt& Adjustment);
00305 
00306   ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym,
00307                              const llvm::APSInt& Int,
00308                              const llvm::APSInt& Adjustment);
00309 
00310   ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym,
00311                              const llvm::APSInt& Int,
00312                              const llvm::APSInt& Adjustment);
00313 
00314   ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym,
00315                              const llvm::APSInt& Int,
00316                              const llvm::APSInt& Adjustment);
00317 
00318   ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym,
00319                              const llvm::APSInt& Int,
00320                              const llvm::APSInt& Adjustment);
00321 
00322   ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym,
00323                              const llvm::APSInt& Int,
00324                              const llvm::APSInt& Adjustment);
00325 
00326   const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const;
00327 
00328   // FIXME: Refactor into SimpleConstraintManager?
00329   bool isEqual(ProgramStateRef St, SymbolRef sym, const llvm::APSInt& V) const {
00330     const llvm::APSInt *i = getSymVal(St, sym);
00331     return i ? *i == V : false;
00332   }
00333 
00334   ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper);
00335 
00336   void print(ProgramStateRef St, raw_ostream &Out,
00337              const char* nl, const char *sep);
00338 
00339 private:
00340   RangeSet::Factory F;
00341 };
00342 
00343 } // end anonymous namespace
00344 
00345 ConstraintManager *
00346 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine &Eng) {
00347   return new RangeConstraintManager(Eng, StMgr.getBasicVals());
00348 }
00349 
00350 const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St,
00351                                                       SymbolRef sym) const {
00352   const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym);
00353   return T ? T->getConcreteValue() : NULL;
00354 }
00355 
00356 /// Scan all symbols referenced by the constraints. If the symbol is not alive
00357 /// as marked in LSymbols, mark it as dead in DSymbols.
00358 ProgramStateRef 
00359 RangeConstraintManager::removeDeadBindings(ProgramStateRef state,
00360                                            SymbolReaper& SymReaper) {
00361 
00362   ConstraintRangeTy CR = state->get<ConstraintRange>();
00363   ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>();
00364 
00365   for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) {
00366     SymbolRef sym = I.getKey();
00367     if (SymReaper.maybeDead(sym))
00368       CR = CRFactory.remove(CR, sym);
00369   }
00370 
00371   return state->set<ConstraintRange>(CR);
00372 }
00373 
00374 RangeSet
00375 RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) {
00376   if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym))
00377     return *V;
00378 
00379   // Lazily generate a new RangeSet representing all possible values for the
00380   // given symbol type.
00381   BasicValueFactory &BV = getBasicVals();
00382   QualType T = sym->getType(BV.getContext());
00383   return RangeSet(F, BV.getMinValue(T), BV.getMaxValue(T));
00384 }
00385 
00386 //===------------------------------------------------------------------------===
00387 // assumeSymX methods: public interface for RangeConstraintManager.
00388 //===------------------------------------------------------------------------===/
00389 
00390 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
00391 // and (x, y) for open ranges. These ranges are modular, corresponding with
00392 // a common treatment of C integer overflow. This means that these methods
00393 // do not have to worry about overflow; RangeSet::Intersect can handle such a
00394 // "wraparound" range.
00395 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
00396 // UINT_MAX, 0, 1, and 2.
00397 
00398 ProgramStateRef 
00399 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
00400                                     const llvm::APSInt &Int,
00401                                     const llvm::APSInt &Adjustment) {
00402   // Before we do any real work, see if the value can even show up.
00403   APSIntType AdjustmentType(Adjustment);
00404   if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
00405     return St;
00406 
00407   llvm::APSInt Lower = AdjustmentType.convert(Int) - Adjustment;
00408   llvm::APSInt Upper = Lower;
00409   --Lower;
00410   ++Upper;
00411 
00412   // [Int-Adjustment+1, Int-Adjustment-1]
00413   // Notice that the lower bound is greater than the upper bound.
00414   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Upper, Lower);
00415   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
00416 }
00417 
00418 ProgramStateRef 
00419 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
00420                                     const llvm::APSInt &Int,
00421                                     const llvm::APSInt &Adjustment) {
00422   // Before we do any real work, see if the value can even show up.
00423   APSIntType AdjustmentType(Adjustment);
00424   if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
00425     return NULL;
00426 
00427   // [Int-Adjustment, Int-Adjustment]
00428   llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
00429   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt);
00430   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
00431 }
00432 
00433 ProgramStateRef 
00434 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
00435                                     const llvm::APSInt &Int,
00436                                     const llvm::APSInt &Adjustment) {
00437   // Before we do any real work, see if the value can even show up.
00438   APSIntType AdjustmentType(Adjustment);
00439   switch (AdjustmentType.testInRange(Int)) {
00440   case APSIntType::RTR_Below:
00441     return NULL;
00442   case APSIntType::RTR_Within:
00443     break;
00444   case APSIntType::RTR_Above:
00445     return St;
00446   }
00447 
00448   // Special case for Int == Min. This is always false.
00449   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
00450   llvm::APSInt Min = AdjustmentType.getMinValue();
00451   if (ComparisonVal == Min)
00452     return NULL;
00453 
00454   llvm::APSInt Lower = Min-Adjustment;
00455   llvm::APSInt Upper = ComparisonVal-Adjustment;
00456   --Upper;
00457 
00458   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
00459   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
00460 }
00461 
00462 ProgramStateRef 
00463 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
00464                                     const llvm::APSInt &Int,
00465                                     const llvm::APSInt &Adjustment) {
00466   // Before we do any real work, see if the value can even show up.
00467   APSIntType AdjustmentType(Adjustment);
00468   switch (AdjustmentType.testInRange(Int)) {
00469   case APSIntType::RTR_Below:
00470     return St;
00471   case APSIntType::RTR_Within:
00472     break;
00473   case APSIntType::RTR_Above:
00474     return NULL;
00475   }
00476 
00477   // Special case for Int == Max. This is always false.
00478   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
00479   llvm::APSInt Max = AdjustmentType.getMaxValue();
00480   if (ComparisonVal == Max)
00481     return NULL;
00482 
00483   llvm::APSInt Lower = ComparisonVal-Adjustment;
00484   llvm::APSInt Upper = Max-Adjustment;
00485   ++Lower;
00486 
00487   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
00488   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
00489 }
00490 
00491 ProgramStateRef 
00492 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
00493                                     const llvm::APSInt &Int,
00494                                     const llvm::APSInt &Adjustment) {
00495   // Before we do any real work, see if the value can even show up.
00496   APSIntType AdjustmentType(Adjustment);
00497   switch (AdjustmentType.testInRange(Int)) {
00498   case APSIntType::RTR_Below:
00499     return St;
00500   case APSIntType::RTR_Within:
00501     break;
00502   case APSIntType::RTR_Above:
00503     return NULL;
00504   }
00505 
00506   // Special case for Int == Min. This is always feasible.
00507   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
00508   llvm::APSInt Min = AdjustmentType.getMinValue();
00509   if (ComparisonVal == Min)
00510     return St;
00511 
00512   llvm::APSInt Max = AdjustmentType.getMaxValue();
00513   llvm::APSInt Lower = ComparisonVal-Adjustment;
00514   llvm::APSInt Upper = Max-Adjustment;
00515 
00516   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
00517   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
00518 }
00519 
00520 ProgramStateRef 
00521 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
00522                                     const llvm::APSInt &Int,
00523                                     const llvm::APSInt &Adjustment) {
00524   // Before we do any real work, see if the value can even show up.
00525   APSIntType AdjustmentType(Adjustment);
00526   switch (AdjustmentType.testInRange(Int)) {
00527   case APSIntType::RTR_Below:
00528     return NULL;
00529   case APSIntType::RTR_Within:
00530     break;
00531   case APSIntType::RTR_Above:
00532     return St;
00533   }
00534 
00535   // Special case for Int == Max. This is always feasible.
00536   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
00537   llvm::APSInt Max = AdjustmentType.getMaxValue();
00538   if (ComparisonVal == Max)
00539     return St;
00540 
00541   llvm::APSInt Min = AdjustmentType.getMinValue();
00542   llvm::APSInt Lower = Min-Adjustment;
00543   llvm::APSInt Upper = ComparisonVal-Adjustment;
00544 
00545   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
00546   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
00547 }
00548 
00549 //===------------------------------------------------------------------------===
00550 // Pretty-printing.
00551 //===------------------------------------------------------------------------===/
00552 
00553 void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out,
00554                                    const char* nl, const char *sep) {
00555 
00556   ConstraintRangeTy Ranges = St->get<ConstraintRange>();
00557 
00558   if (Ranges.isEmpty()) {
00559     Out << nl << sep << "Ranges are empty." << nl;
00560     return;
00561   }
00562 
00563   Out << nl << sep << "Ranges of symbol values:";
00564   for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){
00565     Out << nl << ' ' << I.getKey() << " : ";
00566     I.getData().print(Out);
00567   }
00568   Out << nl;
00569 }