25#include "llvm/ADT/SmallString.h"
26#include "llvm/Support/FormatVariadic.h"
27#include "llvm/Support/raw_ostream.h"
41static std::optional<QualType> determineElementType(
const Expr *
E,
43 const auto *ASE = dyn_cast<ArraySubscriptExpr>(
E);
47 const MemRegion *SubscriptBaseReg =
C.getSVal(ASE->getBase()).getAsRegion();
48 if (!SubscriptBaseReg)
53 if (isa<ElementRegion>(SubscriptBaseReg->
StripCasts()))
56 return ASE->getType();
59static std::optional<int64_t>
60determineElementSize(
const std::optional<QualType>
T,
const CheckerContext &
C) {
63 return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
66class StateUpdateReporter {
68 const NonLoc ByteOffsetVal;
69 const std::optional<QualType> ElementType;
70 const std::optional<int64_t> ElementSize;
71 bool AssumedNonNegative =
false;
72 std::optional<NonLoc> AssumedUpperBound = std::nullopt;
77 : Reg(R), ByteOffsetVal(ByteOffsVal),
78 ElementType(determineElementType(
E,
C)),
79 ElementSize(determineElementSize(ElementType,
C)) {}
81 void recordNonNegativeAssumption() { AssumedNonNegative =
true; }
82 void recordUpperBoundAssumption(
NonLoc UpperBoundVal) {
83 AssumedUpperBound = UpperBoundVal;
86 bool assumedNonNegative() {
return AssumedNonNegative; }
107 static bool providesInformationAboutInteresting(
SymbolRef Sym,
109 static bool providesInformationAboutInteresting(
SVal SV,
111 return providesInformationAboutInteresting(SV.
getAsSymbol(), BR);
126class ArrayBoundCheckerV2 :
public Checker<check::PostStmt<ArraySubscriptExpr>,
127 check::PostStmt<UnaryOperator>,
128 check::PostStmt<MemberExpr>> {
129 BugType BT{
this,
"Out-of-bound access"};
135 NonLoc Offset, std::optional<NonLoc> Extent,
136 bool IsTaintBug =
false)
const;
154 if (
E->getOpcode() == UO_Deref)
159 performCheck(
E->getBase(),
C);
170static std::optional<std::pair<const SubRegion *, NonLoc>>
182 dyn_cast_or_null<ElementRegion>(Location.
getAsRegion());
200 auto Delta = EvalBinOp(BO_Mul, *Index, Size);
205 Offset = EvalBinOp(BO_Add, *Offset, *Delta);
212 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
216 return std::make_pair(OwnerRegion, *Offset);
241static std::pair<NonLoc, nonloc::ConcreteInt>
245 if (SymVal && SymVal->isExpression()) {
246 if (
const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
247 llvm::APSInt constant =
249 switch (SIE->getOpcode()) {
253 if ((extent.
getValue() % constant) != 0)
254 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
270 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
275 return MaxV && MaxV->isNegative();
290static std::pair<ProgramStateRef, ProgramStateRef>
307 return {
nullptr, State};
310 return {State,
nullptr};
315 return {
nullptr, State};
329 auto BelowThreshold =
334 return State->assume(*BelowThreshold);
336 return {
nullptr,
nullptr};
346 if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
347 return formatv(
"the field '{0}'", Name);
348 return "the unnamed field";
351 if (isa<AllocaRegion>(Region))
352 return "the memory returned by 'alloca'";
354 if (isa<SymbolicRegion>(Region) &&
356 return "the heap area";
358 if (isa<StringRegion>(Region))
359 return "the string literal";
366 return ConcreteVal->getValue().tryExtValue();
379 OffsetStr = formatv(
" {0}", ConcreteOffset);
382 formatv(
"Out of bound access to memory preceding {0}", RegName),
383 formatv(
"Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
391 std::optional<int64_t> &Val2, int64_t Divisor) {
394 const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
395 const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
396 if (!Val1HasRemainder && !Val2HasRemainder) {
408 bool AlsoMentionUnderflow) {
411 assert(EReg &&
"this checker only handles element access");
412 QualType ElemType = EReg->getElementType();
419 bool UseByteOffsets = !
tryDividePair(OffsetN, ExtentN, ElemSize);
420 const char *OffsetOrIndex = UseByteOffsets ?
"byte offset" :
"index";
423 llvm::raw_svector_ostream Out(Buf);
425 if (!ExtentN && !UseByteOffsets)
426 Out <<
"'" << ElemType.
getAsString() <<
"' element in ";
427 Out << RegName <<
" at ";
428 if (AlsoMentionUnderflow) {
429 Out <<
"a negative or overflowing " << OffsetOrIndex;
430 }
else if (OffsetN) {
431 Out << OffsetOrIndex <<
" " << *OffsetN;
433 Out <<
"an overflowing " << OffsetOrIndex;
436 Out <<
", while it holds only ";
444 Out <<
" '" << ElemType.
getAsString() <<
"' element";
450 return {formatv(
"Out of bound access to memory {0} {1}",
451 AlsoMentionUnderflow ?
"around" :
"after the end of",
457 bool AlsoMentionUnderflow) {
459 return {formatv(
"Potential out of bound access to {0} with tainted {1}",
460 RegName, OffsetName),
461 formatv(
"Access of {0} with a tainted {1} that may be {2}too large",
463 AlsoMentionUnderflow ?
"negative or " :
"")};
468 if (!AssumedNonNegative && !AssumedUpperBound)
472 return getMessage(BR);
477 bool ShouldReportNonNegative = AssumedNonNegative;
478 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
479 if (AssumedUpperBound &&
480 providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
484 ShouldReportNonNegative =
false;
494 const bool UseIndex =
495 ElementSize &&
tryDividePair(OffsetN, ExtentN, *ElementSize);
498 llvm::raw_svector_ostream Out(Buf);
503 Out <<
"'" << OffsetN <<
"' ";
504 }
else if (AssumedUpperBound) {
505 Out <<
"byte offset ";
507 Out <<
"'" << OffsetN <<
"' ";
513 if (ShouldReportNonNegative) {
514 Out <<
" non-negative";
516 if (AssumedUpperBound) {
517 if (ShouldReportNonNegative)
519 Out <<
" less than ";
521 Out << *ExtentN <<
", ";
522 if (UseIndex && ElementType)
523 Out <<
"the number of '" << ElementType->getAsString()
526 Out <<
"the extent of ";
529 return std::string(Out.str());
532bool StateUpdateReporter::providesInformationAboutInteresting(
544 if (isa<SymSymExpr>(PartSym))
551 const SVal Location =
C.getSVal(
E);
558 if (isFromCtypeMacro(
E,
C.getASTContext()))
564 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
570 auto [Reg, ByteOffset] = *RawOffset;
574 StateUpdateReporter SUR(Reg, ByteOffset,
E,
C);
578 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
589 if (PrecedesLowerBound) {
591 if (!WithinLowerBound) {
594 reportOOB(
C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
599 SUR.recordNonNegativeAssumption();
605 if (WithinLowerBound)
606 State = WithinLowerBound;
617 bool AlsoMentionUnderflow = SUR.assumedNonNegative();
619 auto [WithinUpperBound, ExceedsUpperBound] =
622 if (ExceedsUpperBound) {
624 if (!WithinUpperBound) {
628 if (isIdiomaticPastTheEndPtr(
E, ExceedsUpperBound, ByteOffset,
630 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(
C));
636 Location, AlsoMentionUnderflow);
637 reportOOB(
C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
647 const char *OffsetName =
"offset";
648 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(
E))
649 if (
isTainted(State, ASE->getIdx(),
C.getLocationContext()))
650 OffsetName =
"index";
652 Messages Msgs =
getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
653 reportOOB(
C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
659 SUR.recordUpperBoundAssumption(*KnownSize);
665 if (WithinUpperBound)
666 State = WithinUpperBound;
670 C.addTransition(State, SUR.createNoteTag(
C));
675 NonLoc Val,
bool MarkTaint) {
698 NonLoc Offset, std::optional<NonLoc> Extent,
699 bool IsTaintBug )
const {
705 auto BR = std::make_unique<PathSensitiveBugReport>(
706 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
721 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
723 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
725 C.emitReport(std::move(BR));
728bool ArrayBoundCheckerV2::isFromCtypeMacro(
const Stmt *S,
ASTContext &ACtx) {
730 if (!
Loc.isMacroID())
736 if (MacroName.size() < 7 || MacroName[0] !=
'i' || MacroName[1] !=
's')
739 return ((MacroName ==
"isalnum") || (MacroName ==
"isalpha") ||
740 (MacroName ==
"isblank") || (MacroName ==
"isdigit") ||
741 (MacroName ==
"isgraph") || (MacroName ==
"islower") ||
742 (MacroName ==
"isnctrl") || (MacroName ==
"isprint") ||
743 (MacroName ==
"ispunct") || (MacroName ==
"isspace") ||
744 (MacroName ==
"isupper") || (MacroName ==
"isxdigit"));
747bool ArrayBoundCheckerV2::isInAddressOf(
const Stmt *S,
ASTContext &ACtx) {
753 S = Parents[0].get<
Stmt>();
754 }
while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
755 const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
756 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
759bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(
const Expr *
E,
763 if (isa<ArraySubscriptExpr>(
E) && isInAddressOf(
E,
C.getASTContext())) {
765 State, Offset, Limit,
C.getSValBuilder(),
true);
766 return EqualsToThreshold && !NotEqualToThreshold;
775bool ento::shouldRegisterArrayBoundCheckerV2(
const CheckerManager &mgr) {
static std::pair< ProgramStateRef, ProgramStateRef > compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, SValBuilder &SVB, bool CheckEquality=false)
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, NonLoc Offset, NonLoc Extent, SVal Location, bool AlsoMentionUnderflow)
static std::optional< std::pair< const SubRegion *, NonLoc > > computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location)
For a given Location that can be represented as a symbolic expression Arr[Idx] (or perhaps Arr[Idx1][...
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName, bool AlsoMentionUnderflow)
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value)
static std::string getRegionName(const SubRegion *Region)
static std::optional< int64_t > getConcreteValue(NonLoc SV)
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset)
static bool isUnsigned(SValBuilder &SVB, NonLoc Value)
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
static bool tryDividePair(std::optional< int64_t > &Val1, std::optional< int64_t > &Val2, int64_t Divisor)
Try to divide Val1 and Val2 (in place) by Divisor and return true if it can be performed (Divisor is ...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
ParentMapContext & getParentMapContext()
Returns the dynamic AST node parent map context.
const LangOptions & getLangOpts() const
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Container for either a single DynTypedNode or for an ArrayRef to DynTypedNode.
This represents one expression.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
DynTypedNodeList getParents(const NodeT &Node)
Returns the parents of the given node (within the traversal scope).
A (possibly-)qualified type.
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Encodes a location in the source.
Stmt - This represents one statement.
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
A record of the "type" of an APSInt, used for conversions.
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Template implementation for all binary symbolic expressions.
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
ElementRegion is used to represent both array elements and casts.
QualType getElementType() const
MemRegion - The root abstract class for all memory regions.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
std::string getDescriptiveName(bool UseQuotes=true) const
Get descriptive name for memory region.
const RegionTy * getAs() const
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
NonLoc makeArrayIndex(uint64_t idx)
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
QualType getConditionType() const
virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0
Tries to get the maximal possible (integer) value of a given SVal.
NonLoc makeZeroArrayIndex()
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
const MemRegion * getAsRegion() const
SubRegion - A region that subsets another larger region.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
llvm::iterator_range< symbol_iterator > symbols() const
Value representing integer constant.
const llvm::APSInt & getValue() const
Represents symbolic expression that isn't a location.
const char *const TaintedData
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T