25#include "llvm/ADT/APSInt.h"
26#include "llvm/Support/FormatVariadic.h"
27#include "llvm/Support/raw_ostream.h"
43 const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
47 const MemRegion *SubscriptBaseReg =
C.getSVal(ASE->getBase()).getAsRegion();
48 if (!SubscriptBaseReg)
63static std::optional<QualType> determineElementType(
const Expr *E,
65 const auto *ASE = getAsCleanArraySubscriptExpr(E,
C);
69 return ASE->getType();
72static std::optional<int64_t>
73determineElementSize(
const std::optional<QualType>
T,
const CheckerContext &
C) {
76 return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
79class StateUpdateReporter {
80 const MemSpaceRegion *Space;
82 const NonLoc ByteOffsetVal;
83 const std::optional<QualType> ElementType;
84 const std::optional<int64_t> ElementSize;
85 bool AssumedNonNegative =
false;
86 std::optional<NonLoc> AssumedUpperBound = std::nullopt;
89 StateUpdateReporter(
const SubRegion *R, NonLoc ByteOffsVal,
const Expr *E,
91 : Space(R->getMemorySpace(
C.getState())), Reg(R),
92 ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(E,
C)),
93 ElementSize(determineElementSize(ElementType,
C)) {}
95 void recordNonNegativeAssumption() { AssumedNonNegative =
true; }
96 void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
97 AssumedUpperBound = UpperBoundVal;
100 bool assumedNonNegative() {
return AssumedNonNegative; }
102 const NoteTag *createNoteTag(CheckerContext &
C)
const;
105 std::string getMessage(PathSensitiveBugReport &BR)
const;
121 static bool providesInformationAboutInteresting(
SymbolRef Sym,
122 PathSensitiveBugReport &BR);
123 static bool providesInformationAboutInteresting(SVal SV,
124 PathSensitiveBugReport &BR) {
125 return providesInformationAboutInteresting(SV.
getAsSymbol(), BR);
130 std::string Short, Full;
133enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
135constexpr llvm::StringLiteral Adjectives[] = {
"a negative",
"an overflowing",
136 "a negative or overflowing"};
137static StringRef asAdjective(BadOffsetKind Problem) {
138 return Adjectives[
static_cast<int>(Problem)];
141constexpr llvm::StringLiteral Prepositions[] = {
"preceding",
"after the end of",
143static StringRef asPreposition(BadOffsetKind Problem) {
144 return Prepositions[
static_cast<int>(Problem)];
154class ArrayBoundChecker :
public Checker<check::PostStmt<ArraySubscriptExpr>,
155 check::PostStmt<UnaryOperator>,
156 check::PostStmt<MemberExpr>> {
157 BugType BT{
this,
"Out-of-bound access"};
160 void performCheck(
const Expr *E, CheckerContext &
C)
const;
162 void reportOOB(CheckerContext &
C,
ProgramStateRef ErrorState, Messages Msgs,
163 NonLoc Offset, std::optional<NonLoc> Extent,
164 bool IsTaintBug =
false)
const;
166 static void markPartsInteresting(PathSensitiveBugReport &BR,
170 static bool isFromCtypeMacro(
const Expr *E, ASTContext &AC);
172 static bool isOffsetObviouslyNonnegative(
const Expr *E, CheckerContext &
C);
174 static bool isIdiomaticPastTheEndPtr(
const Expr *E,
ProgramStateRef State,
175 NonLoc Offset, NonLoc Limit,
177 static bool isInAddressOf(
const Stmt *S, ASTContext &AC);
180 void checkPostStmt(
const ArraySubscriptExpr *E, CheckerContext &
C)
const {
183 void checkPostStmt(
const UnaryOperator *E, CheckerContext &
C)
const {
187 void checkPostStmt(
const MemberExpr *E, CheckerContext &
C)
const {
200static std::optional<std::pair<const SubRegion *, NonLoc>>
212 dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
230 auto Delta = EvalBinOp(BO_Mul, *Index, Size);
235 Offset = EvalBinOp(BO_Add, *Offset, *Delta);
242 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
246 return std::make_pair(OwnerRegion, *Offset);
271static std::pair<NonLoc, nonloc::ConcreteInt>
274 const llvm::APSInt &extentVal = extent.
getValue();
276 if (SymVal && SymVal->isExpression()) {
277 if (
const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
279 switch (SIE->getOpcode()) {
283 if ((extentVal % constant) != 0)
284 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
288 svalBuilder.
makeIntVal(extentVal / constant), svalBuilder);
292 svalBuilder.
makeIntVal(extentVal - constant), svalBuilder);
299 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
304 return MaxV && MaxV->isNegative();
309 return T->isUnsignedIntegerType();
319static std::pair<ProgramStateRef, ProgramStateRef>
323 std::tie(
Value, Threshold) =
337 return {
nullptr, State};
340 return {State,
nullptr};
345 return {
nullptr, State};
359 auto BelowThreshold =
364 return State->assume(*BelowThreshold);
366 return {
nullptr,
nullptr};
377 if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
378 return formatv(
"the field '{0}'", Name);
379 return "the unnamed field";
383 return "the memory returned by 'alloca'";
386 return "the heap area";
389 return "the string literal";
396 return ConcreteVal->getValue()->tryExtValue();
410 std::optional<int64_t> &Val2, int64_t Divisor) {
413 const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
414 const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
415 if (Val1HasRemainder || Val2HasRemainder)
427 std::optional<NonLoc> Extent,
SVal Location,
428 BadOffsetKind Problem) {
430 const auto *EReg = Location.getAsRegion()->getAs<
ElementRegion>();
431 assert(EReg &&
"this checker only handles element access");
432 QualType ElemType = EReg->getElementType();
439 bool UseByteOffsets = !
tryDividePair(OffsetN, ExtentN, ElemSize);
440 const char *OffsetOrIndex = UseByteOffsets ?
"byte offset" :
"index";
443 llvm::raw_svector_ostream Out(Buf);
445 if (OffsetN && !ExtentN && !UseByteOffsets) {
451 Out <<
"'" << ElemType.
getAsString() <<
"' element in ";
453 Out << RegName <<
" at ";
455 if (Problem == BadOffsetKind::Negative)
457 Out << OffsetOrIndex <<
" " << *OffsetN;
459 Out << asAdjective(Problem) <<
" " << OffsetOrIndex;
462 Out <<
", while it holds only ";
470 Out <<
" '" << ElemType.
getAsString() <<
"' element";
476 return {formatv(
"Out of bound access to memory {0} {1}",
477 asPreposition(Problem), RegName),
482 const SubRegion *Region,
const char *OffsetName,
483 bool AlsoMentionUnderflow) {
485 return {formatv(
"Potential out of bound access to {0} with tainted {1}",
486 RegName, OffsetName),
487 formatv(
"Access of {0} with a tainted {1} that may be {2}too large",
489 AlsoMentionUnderflow ?
"negative or " :
"")};
494 if (!AssumedNonNegative && !AssumedUpperBound)
497 return C.getNoteTag([*
this](PathSensitiveBugReport &BR) -> std::string {
498 return getMessage(BR);
502std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR)
const {
503 bool ShouldReportNonNegative = AssumedNonNegative;
504 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
505 if (AssumedUpperBound &&
506 providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
510 ShouldReportNonNegative =
false;
520 const bool UseIndex =
521 ElementSize &&
tryDividePair(OffsetN, ExtentN, *ElementSize);
523 SmallString<256> Buf;
524 llvm::raw_svector_ostream
Out(Buf);
529 Out <<
"'" << OffsetN <<
"' ";
530 }
else if (AssumedUpperBound) {
531 Out <<
"byte offset ";
533 Out <<
"'" << OffsetN <<
"' ";
539 if (ShouldReportNonNegative) {
540 Out <<
" non-negative";
542 if (AssumedUpperBound) {
543 if (ShouldReportNonNegative)
545 Out <<
" less than ";
547 Out << *ExtentN <<
", ";
548 if (UseIndex && ElementType)
549 Out <<
"the number of '" << ElementType->getAsString()
552 Out <<
"the extent of ";
555 return std::string(
Out.str());
558bool StateUpdateReporter::providesInformationAboutInteresting(
559 SymbolRef Sym, PathSensitiveBugReport &BR) {
576void ArrayBoundChecker::performCheck(
const Expr *E, CheckerContext &
C)
const {
577 const SVal Location =
C.getSVal(E);
584 if (isFromCtypeMacro(E,
C.getASTContext()))
588 SValBuilder &SVB =
C.getSValBuilder();
590 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
596 auto [Reg, ByteOffset] = *RawOffset;
600 StateUpdateReporter SUR(Reg, ByteOffset, E,
C);
603 const MemSpaceRegion *Space = Reg->getMemorySpace(State);
615 if (PrecedesLowerBound) {
618 if (isOffsetObviouslyNonnegative(E,
C)) {
635 if (!WithinLowerBound) {
644 if (!WithinLowerBound) {
647 ByteOffset, std::nullopt,
648 Location, BadOffsetKind::Negative);
649 reportOOB(
C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
654 SUR.recordNonNegativeAssumption();
661 if (WithinLowerBound)
662 State = WithinLowerBound;
667 if (
auto KnownSize =
Size.getAs<NonLoc>()) {
673 bool AlsoMentionUnderflow = SUR.assumedNonNegative();
675 auto [WithinUpperBound, ExceedsUpperBound] =
678 if (ExceedsUpperBound) {
680 if (!WithinUpperBound) {
684 if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
686 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(
C));
690 BadOffsetKind Problem = AlsoMentionUnderflow
691 ? BadOffsetKind::Indeterminate
692 : BadOffsetKind::Overflowing;
695 *KnownSize, Location, Problem);
696 reportOOB(
C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
706 const char *OffsetName =
"offset";
707 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
708 if (
isTainted(State, ASE->getIdx(),
C.getLocationContext()))
709 OffsetName =
"index";
712 getTaintMsgs(Space, Reg, OffsetName, AlsoMentionUnderflow);
713 reportOOB(
C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
719 SUR.recordUpperBoundAssumption(*KnownSize);
725 if (WithinUpperBound)
726 State = WithinUpperBound;
730 C.addTransition(State, SUR.createNoteTag(
C));
733void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
735 NonLoc Val,
bool MarkTaint) {
756void ArrayBoundChecker::reportOOB(CheckerContext &
C,
ProgramStateRef ErrorState,
757 Messages Msgs, NonLoc Offset,
758 std::optional<NonLoc> Extent,
759 bool IsTaintBug )
const {
761 ExplodedNode *ErrorNode =
C.generateErrorNode(ErrorState);
765 auto BR = std::make_unique<PathSensitiveBugReport>(
766 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
781 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
783 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
785 C.emitReport(std::move(BR));
788bool ArrayBoundChecker::isFromCtypeMacro(
const Expr *E, ASTContext &ACtx) {
796 if (MacroName.size() < 7 || MacroName[0] !=
'i' || MacroName[1] !=
's')
799 return ((MacroName ==
"isalnum") || (MacroName ==
"isalpha") ||
800 (MacroName ==
"isblank") || (MacroName ==
"isdigit") ||
801 (MacroName ==
"isgraph") || (MacroName ==
"islower") ||
802 (MacroName ==
"isnctrl") || (MacroName ==
"isprint") ||
803 (MacroName ==
"ispunct") || (MacroName ==
"isspace") ||
804 (MacroName ==
"isupper") || (MacroName ==
"isxdigit"));
807bool ArrayBoundChecker::isOffsetObviouslyNonnegative(
const Expr *E,
809 const ArraySubscriptExpr *ASE = getAsCleanArraySubscriptExpr(E,
C);
815bool ArrayBoundChecker::isInAddressOf(
const Stmt *S, ASTContext &ACtx) {
818 const DynTypedNodeList Parents = ParentCtx.
getParents(*S);
821 S = Parents[0].get<Stmt>();
822 }
while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
823 const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
824 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
827bool ArrayBoundChecker::isIdiomaticPastTheEndPtr(
const Expr *E,
829 NonLoc Offset, NonLoc Limit,
833 State, Offset, Limit,
C.getSValBuilder(),
true);
834 return EqualsToThreshold && !NotEqualToThreshold;
839void ento::registerArrayBoundChecker(CheckerManager &mgr) {
843bool ento::shouldRegisterArrayBoundChecker(
const CheckerManager &mgr) {
static std::pair< ProgramStateRef, ProgramStateRef > compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, SValBuilder &SVB, bool CheckEquality=false)
static std::string getRegionName(const MemSpaceRegion *Space, const SubRegion *Region)
static std::optional< std::pair< const SubRegion *, NonLoc > > computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location)
For a given Location that can be represented as a symbolic expression Arr[Idx] (or perhaps Arr[Idx1][...
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value)
static std::optional< int64_t > getConcreteValue(NonLoc SV)
static Messages getTaintMsgs(const MemSpaceRegion *Space, const SubRegion *Region, const char *OffsetName, bool AlsoMentionUnderflow)
static bool isUnsigned(SValBuilder &SVB, NonLoc Value)
static Messages getNonTaintMsgs(const ASTContext &ACtx, const MemSpaceRegion *Space, const SubRegion *Region, NonLoc Offset, std::optional< NonLoc > Extent, SVal Location, BadOffsetKind Problem)
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
static bool tryDividePair(std::optional< int64_t > &Val1, std::optional< int64_t > &Val2, int64_t Divisor)
Try to divide Val1 and Val2 (in place) by Divisor and return true if it can be performed (Divisor is ...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
ParentMapContext & getParentMapContext()
Returns the dynamic AST node parent map context.
const LangOptions & getLangOpts() const
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
This represents one expression.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
DynTypedNodeList getParents(const NodeT &Node)
Returns the parents of the given node (within the traversal scope).
A (possibly-)qualified type.
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
SourceLocation getBeginLoc() const LLVM_READONLY
bool isUnsignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is unsigned or an enumeration types whose underlying ...
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
A record of the "type" of an APSInt, used for conversions.
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
CHECKER * registerChecker(AT &&...Args)
Register a single-part checker (derived from Checker): construct its singleton instance,...
Simple checker classes that implement one frontend (i.e.
ElementRegion is used to represent both array elements and casts.
QualType getElementType() const
MemRegion - The root abstract class for all memory regions.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
std::string getDescriptiveName(bool UseQuotes=true) const
Get descriptive name for memory region.
const RegionTy * getAs() const
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
NonLoc makeArrayIndex(uint64_t idx)
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
QualType getConditionType() const
virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0
Tries to get the maximal possible (integer) value of a given SVal.
NonLoc makeZeroArrayIndex()
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
SubRegion - A region that subsets another larger region.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
llvm::iterator_range< symbol_iterator > symbols() const
Value representing integer constant.
APSIntPtr getValue() const
Represents symbolic expression that isn't a location.
const char *const TaintedData
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
BinarySymExprImpl< const SymExpr *, APSIntPtr, SymExpr::Kind::SymIntExprKind > SymIntExpr
Represents a symbolic expression like 'x' + 3.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
const FunctionProtoType * T