24#include "llvm/ADT/SmallString.h"
25#include "llvm/Support/FormatVariadic.h"
26#include "llvm/Support/raw_ostream.h"
35enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
37class ArrayBoundCheckerV2 :
38 public Checker<check::Location> {
39 BugType BT{
this,
"Out-of-bound access"};
43 NonLoc Offset, std::string RegName, std::string Msg)
const;
48 void checkLocation(
SVal l,
bool isLoad,
const Stmt *S,
58static std::optional<std::pair<const SubRegion *, NonLoc>>
70 dyn_cast_or_null<ElementRegion>(Location.
getAsRegion());
88 auto Delta = EvalBinOp(BO_Mul, *Index, Size);
93 Offset = EvalBinOp(BO_Add, *Offset, *Delta);
100 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
104 return std::make_pair(OwnerRegion, *Offset);
115static std::pair<NonLoc, nonloc::ConcreteInt>
119 if (SymVal && SymVal->isExpression()) {
120 if (
const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
121 llvm::APSInt constant =
123 switch (SIE->getOpcode()) {
127 if ((extent.
getValue() % constant) != 0)
128 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
144 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
152static std::pair<ProgramStateRef, ProgramStateRef>
167 return {
nullptr, State};
170 auto BelowThreshold =
174 return State->assume(*BelowThreshold);
176 return {
nullptr,
nullptr};
186 if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
187 return formatv(
"the field '{0}'", Name);
188 return "the unnamed field";
191 if (isa<AllocaRegion>(Region))
192 return "the memory returned by 'alloca'";
194 if (isa<SymbolicRegion>(Region) &&
196 return "the heap area";
198 if (isa<StringRegion>(Region))
199 return "the string literal";
206 return ConcreteVal->getValue().tryExtValue();
211static std::string
getShortMsg(OOB_Kind Kind, std::string RegName) {
212 static const char *ShortMsgTemplates[] = {
213 "Out of bound access to memory preceding {0}",
214 "Out of bound access to memory after the end of {0}",
215 "Potential out of bound access to {0} with tainted offset"};
217 return formatv(ShortMsgTemplates[Kind], RegName);
222 llvm::raw_svector_ostream Out(Buf);
223 Out <<
"Access of " << RegName <<
" at negative byte offset";
225 Out <<
' ' << ConcreteIdx->getValue();
226 return std::string(Buf);
231 assert(EReg &&
"this checker only handles element access");
232 QualType ElemType = EReg->getElementType();
237 bool UseByteOffsets =
true;
239 const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
240 const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
241 if (!OffsetHasRemainder && !ExtentHasRemainder) {
242 UseByteOffsets =
false;
244 *OffsetN /= ElemSize;
246 *ExtentN /= ElemSize;
251 llvm::raw_svector_ostream Out(Buf);
253 if (!ExtentN && !UseByteOffsets)
254 Out <<
"'" << ElemType.
getAsString() <<
"' element in ";
255 Out << RegName <<
" at ";
257 Out << (UseByteOffsets ?
"byte offset " :
"index ") << *OffsetN;
259 Out <<
"an overflowing " << (UseByteOffsets ?
"byte offset" :
"index");
262 Out <<
", while it holds only ";
270 Out <<
" '" << ElemType.
getAsString() <<
"' element";
276 return std::string(Buf);
280 llvm::raw_svector_ostream Out(Buf);
281 Out <<
"Access of " << RegName
282 <<
" with a tainted offset that may be too large";
283 return std::string(Buf);
286void ArrayBoundCheckerV2::checkLocation(
SVal Location,
bool IsLoad,
305 if (isFromCtypeMacro(LoadS,
C.getASTContext()))
311 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
317 auto [Reg, ByteOffset] = *RawOffset;
321 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
332 if (PrecedesLowerBound && !WithinLowerBound) {
336 reportOOB(
C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
340 if (WithinLowerBound)
341 State = WithinLowerBound;
347 auto [WithinUpperBound, ExceedsUpperBound] =
350 if (ExceedsUpperBound) {
351 if (!WithinUpperBound) {
354 std::string Msg =
getExceedsMsg(
C.getASTContext(), RegName, ByteOffset,
355 *KnownSize, Location);
356 reportOOB(
C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
363 reportOOB(
C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
368 if (WithinUpperBound)
369 State = WithinUpperBound;
372 C.addTransition(State);
377 NonLoc Offset, std::string RegName,
378 std::string Msg)
const {
386 auto BR = std::make_unique<PathSensitiveBugReport>(
387 Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);
390 if (Kind == OOB_Taint)
392 BR->markInteresting(Sym);
394 C.emitReport(std::move(BR));
397bool ArrayBoundCheckerV2::isFromCtypeMacro(
const Stmt *S,
ASTContext &ACtx) {
399 if (!
Loc.isMacroID())
405 if (MacroName.size() < 7 || MacroName[0] !=
'i' || MacroName[1] !=
's')
408 return ((MacroName ==
"isalnum") || (MacroName ==
"isalpha") ||
409 (MacroName ==
"isblank") || (MacroName ==
"isdigit") ||
410 (MacroName ==
"isgraph") || (MacroName ==
"islower") ||
411 (MacroName ==
"isnctrl") || (MacroName ==
"isprint") ||
412 (MacroName ==
"ispunct") || (MacroName ==
"isspace") ||
413 (MacroName ==
"isupper") || (MacroName ==
"isxdigit"));
420bool ento::shouldRegisterArrayBoundCheckerV2(
const CheckerManager &mgr) {
static std::optional< std::pair< const SubRegion *, NonLoc > > computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location)
For a given Location that can be represented as a symbolic expression Arr[Idx] (or perhaps Arr[Idx1][...
static std::string getPrecedesMsg(std::string RegName, NonLoc Offset)
static std::string getRegionName(const SubRegion *Region)
static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName, NonLoc Offset, NonLoc Extent, SVal Location)
static std::optional< int64_t > getConcreteValue(NonLoc SV)
static std::string getTaintMsg(std::string RegName)
static std::string getShortMsg(OOB_Kind Kind, std::string RegName)
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
static std::pair< ProgramStateRef, ProgramStateRef > compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, SValBuilder &SVB)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
const LangOptions & getLangOpts() const
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
A (possibly-)qualified type.
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Encodes a location in the source.
Stmt - This represents one statement.
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
A record of the "type" of an APSInt, used for conversions.
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Template implementation for all binary symbolic expressions.
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
ElementRegion is used to represent both array elements and casts.
QualType getElementType() const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
std::string getDescriptiveName(bool UseQuotes=true) const
Get descriptive name for memory region.
const RegionTy * getAs() const
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
NonLoc makeArrayIndex(uint64_t idx)
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
QualType getConditionType() const
NonLoc makeZeroArrayIndex()
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
const MemRegion * getAsRegion() const
SubRegion - A region that subsets another larger region.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Value representing integer constant.
const llvm::APSInt & getValue() const
Represents symbolic expression that isn't a location.
const char *const TaintedData
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)