clang 22.0.0git
CStringChecker.cpp
Go to the documentation of this file.
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <functional>
34#include <optional>
35
36using namespace clang;
37using namespace ento;
38using namespace std::placeholders;
39
40namespace {
41struct AnyArgExpr {
42 const Expr *Expression;
43 unsigned ArgumentIndex;
44};
45struct SourceArgExpr : AnyArgExpr {};
46struct DestinationArgExpr : AnyArgExpr {};
47struct SizeArgExpr : AnyArgExpr {};
48
49using ErrorMessage = SmallString<128>;
50enum class AccessKind { write, read };
51
52static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
53 AccessKind Access) {
54 ErrorMessage Message;
55 llvm::raw_svector_ostream Os(Message);
56
57 // Function classification like: Memory copy function
58 Os << toUppercase(FunctionDescription.front())
59 << &FunctionDescription.data()[1];
60
61 if (Access == AccessKind::write) {
62 Os << " overflows the destination buffer";
63 } else { // read access
64 Os << " accesses out-of-bound array element";
65 }
66
67 return Message;
68}
69
70enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
71
72enum class CharKind { Regular = 0, Wide };
73constexpr CharKind CK_Regular = CharKind::Regular;
74constexpr CharKind CK_Wide = CharKind::Wide;
75
76static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
77 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
78 : Ctx.WideCharTy);
79}
80
81class CStringChecker
82 : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
83 check::LiveSymbols, check::DeadSymbols,
84 check::RegionChanges> {
85 mutable const char *CurrentFunctionDescription = nullptr;
86
87public:
88 // FIXME: The bug types emitted by this checker family have confused garbage
89 // in their Description and Category fields (e.g. `categories::UnixAPI` is
90 // passed as the description in several cases and `uninitialized` is mistyped
91 // as `unitialized`). This should be cleaned up.
92 CheckerFrontendWithBugType NullArg{categories::UnixAPI};
93 CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
94 CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
95 "Improper arguments"};
96 CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
97 CheckerFrontendWithBugType UninitializedRead{
98 "Accessing unitialized/garbage values"};
99
100 StringRef getDebugTag() const override { return "MallocChecker"; }
101
102 static void *getTag() { static int tag; return &tag; }
103
104 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
105 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
106 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
107 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
108
110 checkRegionChanges(ProgramStateRef state,
111 const InvalidatedSymbols *,
112 ArrayRef<const MemRegion *> ExplicitRegions,
113 ArrayRef<const MemRegion *> Regions,
114 const LocationContext *LCtx,
115 const CallEvent *Call) const;
116
117 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
118 const CallEvent &)>;
119
120 CallDescriptionMap<FnCheck> Callbacks = {
121 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
122 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
123 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
124 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
125 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
126 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
127 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
128 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
129 {{CDM::CLibrary, {"memcmp"}, 3},
130 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
131 {{CDM::CLibrary, {"wmemcmp"}, 3},
132 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
133 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
134 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
135 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
136 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
137 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
138 &CStringChecker::evalMemset},
139 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
140 // FIXME: C23 introduces 'memset_explicit', maybe also model that
141 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
142 &CStringChecker::evalStrcpy},
143 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
144 &CStringChecker::evalStrncpy},
145 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
146 &CStringChecker::evalStpcpy},
147 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
148 &CStringChecker::evalStrlcpy},
149 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
150 &CStringChecker::evalStrcat},
151 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
152 &CStringChecker::evalStrncat},
153 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
154 &CStringChecker::evalStrlcat},
155 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
156 &CStringChecker::evalstrLength},
157 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
158 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
159 &CStringChecker::evalstrnLength},
160 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
161 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
162 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
163 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
164 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
165 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
166 {{CDM::CLibrary, {"strxfrm"}, 3}, &CStringChecker::evalStrxfrm},
167 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
168 {{CDM::CLibrary, {"bcmp"}, 3},
169 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
170 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
171 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
172 &CStringChecker::evalBzero},
173
174 // When recognizing calls to the following variadic functions, we accept
175 // any number of arguments in the call (std::nullopt = accept any
176 // number), but check that in the declaration there are 2 and 3
177 // parameters respectively. (Note that the parameter count does not
178 // include the "...". Calls where the number of arguments is too small
179 // will be discarded by the callback.)
180 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
181 &CStringChecker::evalSprintf},
182 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
183 &CStringChecker::evalSnprintf},
184 };
185
186 // These require a bit of special handling.
187 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
188 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
189
190 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
191 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
192 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
193 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
194 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
195 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
196 ProgramStateRef state, SizeArgExpr Size,
197 DestinationArgExpr Dest, SourceArgExpr Source,
198 bool Restricted, bool IsMempcpy, CharKind CK) const;
199
200 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201
202 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
203 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
204 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
205 bool IsStrnlen = false) const;
206
207 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
208 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
209 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
210 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
211 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
212 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
213 bool returnPtr = true) const;
214
215 void evalStrxfrm(CheckerContext &C, const CallEvent &Call) const;
216
217 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
218 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
220
221 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
222 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
223 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
224 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
226 bool IsBounded = false, bool IgnoreCase = false) const;
227
228 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
229
230 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
231 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
232 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
233 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
234 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
235
236 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
237 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
238 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
239 bool IsBounded) const;
240
241 // Utility methods
242 std::pair<ProgramStateRef , ProgramStateRef >
243 static assumeZero(CheckerContext &C,
244 ProgramStateRef state, SVal V, QualType Ty);
245
246 static ProgramStateRef setCStringLength(ProgramStateRef state,
247 const MemRegion *MR,
248 SVal strLength);
249 static SVal getCStringLengthForRegion(CheckerContext &C,
250 ProgramStateRef &state,
251 const Expr *Ex,
252 const MemRegion *MR,
253 bool hypothetical);
254 static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
255
256 SVal getCStringLength(CheckerContext &C,
257 ProgramStateRef &state,
258 const Expr *Ex,
259 SVal Buf,
260 bool hypothetical = false) const;
261
262 const StringLiteral *getCStringLiteral(CheckerContext &C,
263 ProgramStateRef &state,
264 const Expr *expr,
265 SVal val) const;
266
267 /// Invalidate the destination buffer determined by characters copied.
268 static ProgramStateRef
269 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
270 const Expr *BufE, ConstCFGElementRef Elem,
271 SVal BufV, SVal SizeV, QualType SizeTy);
272
273 /// Operation never overflows, do not invalidate the super region.
274 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
275 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
276
277 /// We do not know whether the operation can overflow (e.g. size is unknown),
278 /// invalidate the super region and escape related pointers.
279 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
280 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
281
282 /// Invalidate the source buffer for escaping pointers.
283 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
286 SVal BufV);
287
288 /// @param InvalidationTraitOperations Determine how to invlidate the
289 /// MemRegion by setting the invalidation traits. Return true to cause pointer
290 /// escape, or false otherwise.
291 static ProgramStateRef invalidateBufferAux(
292 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
293 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
294 const MemRegion *)>
295 InvalidationTraitOperations);
296
297 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
298 const MemRegion *MR);
299
300 static bool memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
301 SVal CharE, const Expr *Size, CheckerContext &C,
302 ProgramStateRef &State);
303
304 // Re-usable checks
305 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
306 AnyArgExpr Arg, SVal l) const;
307 // Check whether the origin region behind \p Element (like the actual array
308 // region \p Element is from) is initialized.
309 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
310 AnyArgExpr Buffer, SVal Element, SVal Size) const;
311 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
312 AnyArgExpr Buffer, SVal Element,
313 AccessKind Access,
314 CharKind CK = CharKind::Regular) const;
315 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
316 AnyArgExpr Buffer, SizeArgExpr Size,
317 AccessKind Access,
318 CharKind CK = CharKind::Regular) const;
319 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
320 SizeArgExpr Size, AnyArgExpr First,
321 AnyArgExpr Second,
322 CharKind CK = CharKind::Regular) const;
323 void emitOverlapBug(CheckerContext &C,
324 ProgramStateRef state,
325 const Stmt *First,
326 const Stmt *Second) const;
327
328 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
329 StringRef WarningMsg) const;
330 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
331 const Stmt *S, StringRef WarningMsg) const;
332 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
333 const Stmt *S, StringRef WarningMsg) const;
334 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
335 const Expr *E, const MemRegion *R,
336 StringRef Msg) const;
337 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
338 ProgramStateRef state,
339 NonLoc left,
340 NonLoc right) const;
341
342 // Return true if the destination buffer of the copy function may be in bound.
343 // Expects SVal of Size to be positive and unsigned.
344 // Expects SVal of FirstBuf to be a FieldRegion.
345 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
346 SVal BufVal, QualType BufTy, SVal LengthVal,
347 QualType LengthTy);
348};
349
350} //end anonymous namespace
351
352REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
353
354//===----------------------------------------------------------------------===//
355// Individual checks and utility methods.
356//===----------------------------------------------------------------------===//
357
358std::pair<ProgramStateRef, ProgramStateRef>
359CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
360 QualType Ty) {
361 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
362 if (!val)
363 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
364
365 SValBuilder &svalBuilder = C.getSValBuilder();
366 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
367 return State->assume(svalBuilder.evalEQ(State, *val, zero));
368}
369
370ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
371 ProgramStateRef State,
372 AnyArgExpr Arg, SVal l) const {
373 // If a previous check has failed, propagate the failure.
374 if (!State)
375 return nullptr;
376
377 ProgramStateRef stateNull, stateNonNull;
378 std::tie(stateNull, stateNonNull) =
379 assumeZero(C, State, l, Arg.Expression->getType());
380
381 if (stateNull && !stateNonNull) {
382 if (NullArg.isEnabled()) {
383 SmallString<80> buf;
384 llvm::raw_svector_ostream OS(buf);
385 assert(CurrentFunctionDescription);
386 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
387 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
388 << CurrentFunctionDescription;
389
390 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
391 }
392 return nullptr;
393 }
394
395 // From here on, assume that the value is non-null.
396 assert(stateNonNull);
397 return stateNonNull;
398}
399
400static std::optional<NonLoc> getIndex(ProgramStateRef State,
401 const ElementRegion *ER, CharKind CK) {
403 ASTContext &Ctx = SVB.getContext();
404
405 if (CK == CharKind::Regular) {
406 if (ER->getValueType() != Ctx.CharTy)
407 return {};
408 return ER->getIndex();
409 }
410
411 if (ER->getValueType() != Ctx.WideCharTy)
412 return {};
413
414 QualType SizeTy = Ctx.getSizeType();
415 NonLoc WideSize =
417 SizeTy)
418 .castAs<NonLoc>();
419 SVal Offset =
420 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
421 if (Offset.isUnknown())
422 return {};
423 return Offset.castAs<NonLoc>();
424}
425
426// Basically 1 -> 1st, 12 -> 12th, etc.
427static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
428 Os << Idx << llvm::getOrdinalSuffix(Idx);
429}
430
431ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
432 ProgramStateRef State,
433 AnyArgExpr Buffer, SVal Element,
434 SVal Size) const {
435
436 // If a previous check has failed, propagate the failure.
437 if (!State)
438 return nullptr;
439
440 const MemRegion *R = Element.getAsRegion();
441 const auto *ER = dyn_cast_or_null<ElementRegion>(R);
442 if (!ER)
443 return State;
444
445 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
446 if (!SuperR)
447 return State;
448
449 // FIXME: We ought to able to check objects as well. Maybe
450 // UninitializedObjectChecker could help?
451 if (!SuperR->getValueType()->isArrayType())
452 return State;
453
454 SValBuilder &SVB = C.getSValBuilder();
455 ASTContext &Ctx = SVB.getContext();
456
457 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
458 const NonLoc Zero = SVB.makeZeroArrayIndex();
459
460 std::optional<Loc> FirstElementVal =
461 State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
462 if (!FirstElementVal)
463 return State;
464
465 // Ensure that we wouldn't read uninitialized value.
466 if (UninitializedRead.isEnabled() &&
467 State->getSVal(*FirstElementVal).isUndef()) {
468 llvm::SmallString<258> Buf;
469 llvm::raw_svector_ostream OS(Buf);
470 OS << "The first element of the ";
471 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
472 OS << " argument is undefined";
473 emitUninitializedReadBug(C, State, Buffer.Expression,
474 FirstElementVal->getAsRegion(), OS.str());
475 return nullptr;
476 }
477
478 // We won't check whether the entire region is fully initialized -- lets just
479 // check that the first and the last element is. So, onto checking the last
480 // element:
481 const QualType IdxTy = SVB.getArrayIndexType();
482
483 NonLoc ElemSize =
484 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
485 .castAs<NonLoc>();
486
487 // FIXME: Check that the size arg to the cstring function is divisible by
488 // size of the actual element type?
489
490 // The type of the argument to the cstring function is either char or wchar,
491 // but thats not the type of the original array (or memory region).
492 // Suppose the following:
493 // int t[5];
494 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
495 // When checking whether t is fully initialized, we see it as char array of
496 // size sizeof(int)*5. If we check the last element as a character, we read
497 // the last byte of an integer, which will be undefined. But just because
498 // that value is undefined, it doesn't mean that the element is uninitialized!
499 // For this reason, we need to retrieve the actual last element with the
500 // correct type.
501
502 // Divide the size argument to the cstring function by the actual element
503 // type. This value will be size of the array, or the index to the
504 // past-the-end element.
505 std::optional<NonLoc> Offset =
506 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
507 IdxTy)
508 .getAs<NonLoc>();
509
510 // Retrieve the index of the last element.
511 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
512 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
513
514 if (!Offset)
515 return State;
516
517 SVal LastElementVal =
518 State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
519 if (!isa<Loc>(LastElementVal))
520 return State;
521
522 if (UninitializedRead.isEnabled() &&
523 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
524 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
525 // If we can't get emit a sensible last element index, just bail out --
526 // prefer to emit nothing in favour of emitting garbage quality reports.
527 if (!IdxInt) {
528 C.addSink();
529 return nullptr;
530 }
531 llvm::SmallString<258> Buf;
532 llvm::raw_svector_ostream OS(Buf);
533 OS << "The last accessed element (at index ";
534 OS << IdxInt->getExtValue();
535 OS << ") in the ";
536 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
537 OS << " argument is undefined";
538 emitUninitializedReadBug(C, State, Buffer.Expression,
539 LastElementVal.getAsRegion(), OS.str());
540 return nullptr;
541 }
542 return State;
543}
544// FIXME: The root of this logic was copied from the old checker
545// alpha.security.ArrayBound (which is removed within this commit).
546// It should be refactored to use the different, more sophisticated bounds
547// checking logic used by the new checker ``security.ArrayBound``.
548ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
549 ProgramStateRef state,
550 AnyArgExpr Buffer, SVal Element,
551 AccessKind Access,
552 CharKind CK) const {
553
554 // If a previous check has failed, propagate the failure.
555 if (!state)
556 return nullptr;
557
558 // Check for out of bound array element access.
559 const MemRegion *R = Element.getAsRegion();
560 if (!R)
561 return state;
562
563 const auto *ER = dyn_cast<ElementRegion>(R);
564 if (!ER)
565 return state;
566
567 // Get the index of the accessed element.
568 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
569 if (!Idx)
570 return state;
571
572 // Get the size of the array.
573 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
574 DefinedOrUnknownSVal Size =
575 getDynamicExtent(state, superReg, C.getSValBuilder());
576
577 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
578 if (StOutBound && !StInBound) {
579 if (!OutOfBounds.isEnabled())
580 return nullptr;
581
582 ErrorMessage Message =
583 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
584 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
585 return nullptr;
586 }
587
588 // Array bound check succeeded. From this point forward the array bound
589 // should always succeed.
590 return StInBound;
591}
592
594CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
595 AnyArgExpr Buffer, SizeArgExpr Size,
596 AccessKind Access, CharKind CK) const {
597 // If a previous check has failed, propagate the failure.
598 if (!State)
599 return nullptr;
600
601 SValBuilder &svalBuilder = C.getSValBuilder();
602 ASTContext &Ctx = svalBuilder.getContext();
603
604 QualType SizeTy = Size.Expression->getType();
605 QualType PtrTy = getCharPtrType(Ctx, CK);
606
607 // Check that the first buffer is non-null.
608 SVal BufVal = C.getSVal(Buffer.Expression);
609 State = checkNonNull(C, State, Buffer, BufVal);
610 if (!State)
611 return nullptr;
612
613 // If out-of-bounds checking is turned off, skip the rest.
614 if (!OutOfBounds.isEnabled())
615 return State;
616
617 SVal BufStart =
618 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
619
620 // Check if the first byte of the buffer is accessible.
621 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
622
623 if (!State)
624 return nullptr;
625
626 // Get the access length and make sure it is known.
627 // FIXME: This assumes the caller has already checked that the access length
628 // is positive. And that it's unsigned.
629 SVal LengthVal = C.getSVal(Size.Expression);
630 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
631 if (!Length)
632 return State;
633
634 // Compute the offset of the last element to be accessed: size-1.
635 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
636 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
637 if (Offset.isUnknown())
638 return nullptr;
639 NonLoc LastOffset = Offset.castAs<NonLoc>();
640
641 // Check that the first buffer is sufficiently long.
642 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
643
644 SVal BufEnd =
645 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
646 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
647 if (Access == AccessKind::read)
648 State = checkInit(C, State, Buffer, BufEnd, *Length);
649
650 // If the buffer isn't large enough, abort.
651 if (!State)
652 return nullptr;
653 }
654
655 // Large enough or not, return this state!
656 return State;
657}
658
659ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
660 ProgramStateRef state,
661 SizeArgExpr Size, AnyArgExpr First,
662 AnyArgExpr Second,
663 CharKind CK) const {
664 if (!BufferOverlap.isEnabled())
665 return state;
666
667 // Do a simple check for overlap: if the two arguments are from the same
668 // buffer, see if the end of the first is greater than the start of the second
669 // or vice versa.
670
671 // If a previous check has failed, propagate the failure.
672 if (!state)
673 return nullptr;
674
675 ProgramStateRef stateTrue, stateFalse;
676
677 if (!First.Expression->getType()->isAnyPointerType() ||
678 !Second.Expression->getType()->isAnyPointerType())
679 return state;
680
681 // Assume different address spaces cannot overlap.
682 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
683 Second.Expression->getType()->getPointeeType().getAddressSpace())
684 return state;
685
686 // Get the buffer values and make sure they're known locations.
687 const LocationContext *LCtx = C.getLocationContext();
688 SVal firstVal = state->getSVal(First.Expression, LCtx);
689 SVal secondVal = state->getSVal(Second.Expression, LCtx);
690
691 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
692 if (!firstLoc)
693 return state;
694
695 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
696 if (!secondLoc)
697 return state;
698
699 // Are the two values the same?
700 SValBuilder &svalBuilder = C.getSValBuilder();
701 std::tie(stateTrue, stateFalse) =
702 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
703
704 if (stateTrue && !stateFalse) {
705 // If the values are known to be equal, that's automatically an overlap.
706 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
707 return nullptr;
708 }
709
710 // assume the two expressions are not equal.
711 assert(stateFalse);
712 state = stateFalse;
713
714 // Which value comes first?
715 QualType cmpTy = svalBuilder.getConditionType();
716 SVal reverse =
717 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
718 std::optional<DefinedOrUnknownSVal> reverseTest =
719 reverse.getAs<DefinedOrUnknownSVal>();
720 if (!reverseTest)
721 return state;
722
723 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
724 if (stateTrue) {
725 if (stateFalse) {
726 // If we don't know which one comes first, we can't perform this test.
727 return state;
728 } else {
729 // Switch the values so that firstVal is before secondVal.
730 std::swap(firstLoc, secondLoc);
731
732 // Switch the Exprs as well, so that they still correspond.
733 std::swap(First, Second);
734 }
735 }
736
737 // Get the length, and make sure it too is known.
738 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
739 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
740 if (!Length)
741 return state;
742
743 // Convert the first buffer's start address to char*.
744 // Bail out if the cast fails.
745 ASTContext &Ctx = svalBuilder.getContext();
746 QualType CharPtrTy = getCharPtrType(Ctx, CK);
747 SVal FirstStart =
748 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
749 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
750 if (!FirstStartLoc)
751 return state;
752
753 // Compute the end of the first buffer. Bail out if THAT fails.
754 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
755 *Length, CharPtrTy);
756 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
757 if (!FirstEndLoc)
758 return state;
759
760 // Is the end of the first buffer past the start of the second buffer?
761 SVal Overlap =
762 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
763 std::optional<DefinedOrUnknownSVal> OverlapTest =
764 Overlap.getAs<DefinedOrUnknownSVal>();
765 if (!OverlapTest)
766 return state;
767
768 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
769
770 if (stateTrue && !stateFalse) {
771 // Overlap!
772 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
773 return nullptr;
774 }
775
776 // assume the two expressions don't overlap.
777 assert(stateFalse);
778 return stateFalse;
779}
780
781void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
782 const Stmt *First, const Stmt *Second) const {
783 ExplodedNode *N = C.generateErrorNode(state);
784 if (!N)
785 return;
786
787 // Generate a report for this bug.
788 auto report = std::make_unique<PathSensitiveBugReport>(
789 BufferOverlap, "Arguments must not be overlapping buffers", N);
790 report->addRange(First->getSourceRange());
791 report->addRange(Second->getSourceRange());
792
793 C.emitReport(std::move(report));
794}
795
796void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
797 const Stmt *S, StringRef WarningMsg) const {
798 if (ExplodedNode *N = C.generateErrorNode(State)) {
799 auto Report =
800 std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
801 Report->addRange(S->getSourceRange());
802 if (const auto *Ex = dyn_cast<Expr>(S))
804 C.emitReport(std::move(Report));
805 }
806}
807
808void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
809 ProgramStateRef State,
810 const Expr *E, const MemRegion *R,
811 StringRef Msg) const {
812 if (ExplodedNode *N = C.generateErrorNode(State)) {
813 auto Report =
814 std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
815 Report->addNote("Other elements might also be undefined",
816 Report->getLocation());
817 Report->addRange(E->getSourceRange());
819 Report->addVisitor<NoStoreFuncVisitor>(R->castAs<SubRegion>());
820 C.emitReport(std::move(Report));
821 }
822}
823
824void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
825 ProgramStateRef State, const Stmt *S,
826 StringRef WarningMsg) const {
827 if (ExplodedNode *N = C.generateErrorNode(State)) {
828 // FIXME: It would be nice to eventually make this diagnostic more clear,
829 // e.g., by referencing the original declaration or by saying *why* this
830 // reference is outside the range.
831 auto Report =
832 std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
833 Report->addRange(S->getSourceRange());
834 C.emitReport(std::move(Report));
835 }
836}
837
838void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
839 const Stmt *S,
840 StringRef WarningMsg) const {
841 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
842 auto Report =
843 std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
844
845 Report->addRange(S->getSourceRange());
846 C.emitReport(std::move(Report));
847 }
848}
849
850ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
851 ProgramStateRef state,
852 NonLoc left,
853 NonLoc right) const {
854 // If out-of-bounds checking is turned off, skip the rest.
855 if (!OutOfBounds.isEnabled())
856 return state;
857
858 // If a previous check has failed, propagate the failure.
859 if (!state)
860 return nullptr;
861
862 SValBuilder &svalBuilder = C.getSValBuilder();
863 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
864
865 QualType sizeTy = svalBuilder.getContext().getSizeType();
866 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
867 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
868
869 SVal maxMinusRight;
870 if (isa<nonloc::ConcreteInt>(right)) {
871 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
872 sizeTy);
873 } else {
874 // Try switching the operands. (The order of these two assignments is
875 // important!)
876 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
877 sizeTy);
878 left = right;
879 }
880
881 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
882 QualType cmpTy = svalBuilder.getConditionType();
883 // If left > max - right, we have an overflow.
884 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
885 *maxMinusRightNL, cmpTy);
886
887 auto [StateOverflow, StateOkay] =
888 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
889
890 if (StateOverflow && !StateOkay) {
891 // On this path the analyzer is convinced that the addition of these two
892 // values would overflow `size_t` which must be caused by the inaccuracy
893 // of our modeling because this method is called in situations where the
894 // summands are size/length values which are much less than SIZE_MAX. To
895 // avoid false positives let's just sink this invalid path.
896 C.addSink(StateOverflow);
897 return nullptr;
898 }
899
900 // From now on, assume an overflow didn't occur.
901 assert(StateOkay);
902 state = StateOkay;
903 }
904
905 return state;
906}
907
908ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
909 const MemRegion *MR,
910 SVal strLength) {
911 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
912
913 MR = MR->StripCasts();
914
915 switch (MR->getKind()) {
916 case MemRegion::StringRegionKind:
917 // FIXME: This can happen if we strcpy() into a string region. This is
918 // undefined [C99 6.4.5p6], but we should still warn about it.
919 return state;
920
921 case MemRegion::SymbolicRegionKind:
922 case MemRegion::AllocaRegionKind:
923 case MemRegion::NonParamVarRegionKind:
924 case MemRegion::ParamVarRegionKind:
925 case MemRegion::FieldRegionKind:
926 case MemRegion::ObjCIvarRegionKind:
927 // These are the types we can currently track string lengths for.
928 break;
929
930 case MemRegion::ElementRegionKind:
931 // FIXME: Handle element regions by upper-bounding the parent region's
932 // string length.
933 return state;
934
935 default:
936 // Other regions (mostly non-data) can't have a reliable C string length.
937 // For now, just ignore the change.
938 // FIXME: These are rare but not impossible. We should output some kind of
939 // warning for things like strcpy((char[]){'a', 0}, "b");
940 return state;
941 }
942
943 if (strLength.isUnknown())
944 return state->remove<CStringLength>(MR);
945
946 return state->set<CStringLength>(MR, strLength);
947}
948
949SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
950 ProgramStateRef &state,
951 const Expr *Ex,
952 const MemRegion *MR,
953 bool hypothetical) {
954 if (!hypothetical) {
955 // If there's a recorded length, go ahead and return it.
956 const SVal *Recorded = state->get<CStringLength>(MR);
957 if (Recorded)
958 return *Recorded;
959 }
960
961 // Otherwise, get a new symbol and update the state.
962 SValBuilder &svalBuilder = C.getSValBuilder();
963 QualType sizeTy = svalBuilder.getContext().getSizeType();
964 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
965 MR, Ex, sizeTy,
966 C.getLocationContext(),
967 C.blockCount());
968
969 if (!hypothetical) {
970 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
971 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
972 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
973 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
974 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
975 std::optional<APSIntPtr> maxLengthInt =
976 BVF.evalAPSInt(BO_Div, maxValInt, fourInt);
977 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
978 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
979 svalBuilder.getConditionType());
980 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
981 }
982 state = state->set<CStringLength>(MR, strLength);
983 }
984
985 return strLength;
986}
987
988const StringLiteral *
989CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
990 switch (MR->getKind()) {
991 case MemRegion::StringRegionKind:
992 return cast<StringRegion>(MR)->getStringLiteral();
993 case MemRegion::NonParamVarRegionKind:
994 if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
995 Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
996 return dyn_cast_or_null<StringLiteral>(Decl->getInit());
997 return nullptr;
998 default:
999 return nullptr;
1000 }
1001}
1002
1003SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1004 const Expr *Ex, SVal Buf,
1005 bool hypothetical) const {
1006 const MemRegion *MR = Buf.getAsRegion();
1007 if (!MR) {
1008 // If we can't get a region, see if it's something we /know/ isn't a
1009 // C string. In the context of locations, the only time we can issue such
1010 // a warning is for labels.
1011 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1012 if (NotNullTerm.isEnabled()) {
1013 SmallString<120> buf;
1014 llvm::raw_svector_ostream os(buf);
1015 assert(CurrentFunctionDescription);
1016 os << "Argument to " << CurrentFunctionDescription
1017 << " is the address of the label '" << Label->getLabel()->getName()
1018 << "', which is not a null-terminated string";
1019
1020 emitNotCStringBug(C, state, Ex, os.str());
1021 }
1022 return UndefinedVal();
1023 }
1024
1025 // If it's not a region and not a label, give up.
1026 return UnknownVal();
1027 }
1028
1029 // If we have a region, strip casts from it and see if we can figure out
1030 // its length. For anything we can't figure out, just return UnknownVal.
1031 MR = MR->StripCasts();
1032
1033 if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
1034 // If we have a global constant with a string literal initializer,
1035 // compute the initializer's length.
1036 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1037 // so we can assume that the byte length is the correct C string length.
1038 // FIXME: Embedded null characters are not handled.
1039 SValBuilder &SVB = C.getSValBuilder();
1040 return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
1041 }
1042
1043 switch (MR->getKind()) {
1044 case MemRegion::StringRegionKind:
1045 case MemRegion::NonParamVarRegionKind:
1046 case MemRegion::SymbolicRegionKind:
1047 case MemRegion::AllocaRegionKind:
1048 case MemRegion::ParamVarRegionKind:
1049 case MemRegion::FieldRegionKind:
1050 case MemRegion::ObjCIvarRegionKind:
1051 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1052 case MemRegion::CompoundLiteralRegionKind:
1053 // FIXME: Can we track this? Is it necessary?
1054 return UnknownVal();
1055 case MemRegion::ElementRegionKind: {
1056 // If an offset into the string literal is used, use the original length
1057 // minus the offset.
1058 // FIXME: Embedded null characters are not handled.
1059 const ElementRegion *ER = cast<ElementRegion>(MR);
1060 const SubRegion *SuperReg =
1062 const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
1063 if (!StrLit)
1064 return UnknownVal();
1065 SValBuilder &SVB = C.getSValBuilder();
1066 NonLoc Idx = ER->getIndex();
1067 QualType SizeTy = SVB.getContext().getSizeType();
1068 NonLoc LengthVal =
1069 SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>();
1070 if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
1071 SVB.getConditionType())
1072 .castAs<DefinedOrUnknownSVal>(),
1073 true))
1074 return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy);
1075 return UnknownVal();
1076 }
1077 default:
1078 // Other regions (mostly non-data) can't have a reliable C string length.
1079 // In this case, an error is emitted and UndefinedVal is returned.
1080 // The caller should always be prepared to handle this case.
1081 if (NotNullTerm.isEnabled()) {
1082 SmallString<120> buf;
1083 llvm::raw_svector_ostream os(buf);
1084
1085 assert(CurrentFunctionDescription);
1086 os << "Argument to " << CurrentFunctionDescription << " is ";
1087
1088 if (SummarizeRegion(os, C.getASTContext(), MR))
1089 os << ", which is not a null-terminated string";
1090 else
1091 os << "not a null-terminated string";
1092
1093 emitNotCStringBug(C, state, Ex, os.str());
1094 }
1095 return UndefinedVal();
1096 }
1097}
1098
1099const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1100 ProgramStateRef &state, const Expr *expr, SVal val) const {
1101 // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
1102
1103 // Get the memory region pointed to by the val.
1104 const MemRegion *bufRegion = val.getAsRegion();
1105 if (!bufRegion)
1106 return nullptr;
1107
1108 // Strip casts off the memory region.
1109 bufRegion = bufRegion->StripCasts();
1110
1111 // Cast the memory region to a string region.
1112 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1113 if (!strRegion)
1114 return nullptr;
1115
1116 // Return the actual string in the string region.
1117 return strRegion->getStringLiteral();
1118}
1119
1120bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1121 SVal BufVal, QualType BufTy,
1122 SVal LengthVal, QualType LengthTy) {
1123 // If we do not know that the buffer is long enough we return 'true'.
1124 // Otherwise the parent region of this field region would also get
1125 // invalidated, which would lead to warnings based on an unknown state.
1126
1127 if (LengthVal.isUnknown())
1128 return false;
1129
1130 // Originally copied from CheckBufferAccess and CheckLocation.
1131 SValBuilder &SB = C.getSValBuilder();
1132 ASTContext &Ctx = C.getASTContext();
1133
1134 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1135
1136 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1137 if (!Length)
1138 return true; // cf top comment.
1139
1140 // Compute the offset of the last element to be accessed: size-1.
1141 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1142 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1143 if (Offset.isUnknown())
1144 return true; // cf top comment
1145 NonLoc LastOffset = Offset.castAs<NonLoc>();
1146
1147 // Check that the first buffer is sufficiently long.
1148 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1149 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1150 if (!BufLoc)
1151 return true; // cf top comment.
1152
1153 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1154
1155 // Check for out of bound array element access.
1156 const MemRegion *R = BufEnd.getAsRegion();
1157 if (!R)
1158 return true; // cf top comment.
1159
1160 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1161 if (!ER)
1162 return true; // cf top comment.
1163
1164 // Support library functions defined with non-default address spaces
1165 assert(ER->getValueType()->getCanonicalTypeUnqualified() ==
1166 C.getASTContext().CharTy &&
1167 "isFirstBufInBound should only be called with char* ElementRegions");
1168
1169 // Get the size of the array.
1170 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1171 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1172
1173 // Get the index of the accessed element.
1174 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1175
1176 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1177
1178 return static_cast<bool>(StInBound);
1179}
1180
1181ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1182 CheckerContext &C, ProgramStateRef S, const Expr *BufE,
1183 ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) {
1184 auto InvalidationTraitOperations =
1185 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1186 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1187 // If destination buffer is a field region and access is in bound, do
1188 // not invalidate its super region.
1189 if (MemRegion::FieldRegionKind == R->getKind() &&
1190 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1191 ITraits.setTrait(
1192 R,
1194 }
1195 return false;
1196 };
1197
1198 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1199}
1200
1202CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1203 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1204 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1205 const MemRegion *R) {
1206 return isa<FieldRegion>(R);
1207 };
1208
1209 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1210}
1211
1212ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1213 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1214 auto InvalidationTraitOperations =
1215 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1216 if (MemRegion::FieldRegionKind == R->getKind())
1217 ITraits.setTrait(
1218 R,
1220 return false;
1221 };
1222
1223 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1224}
1225
1226ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1228 ConstCFGElementRef Elem,
1229 SVal BufV) {
1230 auto InvalidationTraitOperations =
1231 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1232 ITraits.setTrait(
1233 R->getBaseRegion(),
1235 ITraits.setTrait(R,
1237 return true;
1238 };
1239
1240 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1241}
1242
1243ProgramStateRef CStringChecker::invalidateBufferAux(
1244 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
1245 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1246 const MemRegion *)>
1247 InvalidationTraitOperations) {
1248 std::optional<Loc> L = V.getAs<Loc>();
1249 if (!L)
1250 return State;
1251
1252 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1253 // some assumptions about the value that CFRefCount can't. Even so, it should
1254 // probably be refactored.
1255 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1256 const MemRegion *R = MR->getRegion()->StripCasts();
1257
1258 // Are we dealing with an ElementRegion? If so, we should be invalidating
1259 // the super-region.
1260 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1261 R = ER->getSuperRegion();
1262 // FIXME: What about layers of ElementRegions?
1263 }
1264
1265 // Invalidate this region.
1266 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1267 RegionAndSymbolInvalidationTraits ITraits;
1268 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1269
1270 return State->invalidateRegions(R, Elem, C.blockCount(), LCtx,
1271 CausesPointerEscape, nullptr, nullptr,
1272 &ITraits);
1273 }
1274
1275 // If we have a non-region value by chance, just remove the binding.
1276 // FIXME: is this necessary or correct? This handles the non-Region
1277 // cases. Is it ever valid to store to these?
1278 return State->killBinding(*L);
1279}
1280
1281bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1282 const MemRegion *MR) {
1283 switch (MR->getKind()) {
1284 case MemRegion::FunctionCodeRegionKind: {
1285 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1286 os << "the address of the function '" << *FD << '\'';
1287 else
1288 os << "the address of a function";
1289 return true;
1290 }
1291 case MemRegion::BlockCodeRegionKind:
1292 os << "block text";
1293 return true;
1294 case MemRegion::BlockDataRegionKind:
1295 os << "a block";
1296 return true;
1297 case MemRegion::CXXThisRegionKind:
1298 case MemRegion::CXXTempObjectRegionKind:
1299 os << "a C++ temp object of type "
1300 << cast<TypedValueRegion>(MR)->getValueType();
1301 return true;
1302 case MemRegion::NonParamVarRegionKind:
1303 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1304 return true;
1305 case MemRegion::ParamVarRegionKind:
1306 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1307 return true;
1308 case MemRegion::FieldRegionKind:
1309 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1310 return true;
1311 case MemRegion::ObjCIvarRegionKind:
1312 os << "an instance variable of type "
1313 << cast<TypedValueRegion>(MR)->getValueType();
1314 return true;
1315 default:
1316 return false;
1317 }
1318}
1319
1320bool CStringChecker::memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
1321 SVal CharVal, const Expr *Size,
1322 CheckerContext &C, ProgramStateRef &State) {
1323 SVal MemVal = C.getSVal(DstBuffer);
1324 SVal SizeVal = C.getSVal(Size);
1325 const MemRegion *MR = MemVal.getAsRegion();
1326 if (!MR)
1327 return false;
1328
1329 // We're about to model memset by producing a "default binding" in the Store.
1330 // Our current implementation - RegionStore - doesn't support default bindings
1331 // that don't cover the whole base region. So we should first get the offset
1332 // and the base region to figure out whether the offset of buffer is 0.
1333 RegionOffset Offset = MR->getAsOffset();
1334 const MemRegion *BR = Offset.getRegion();
1335
1336 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1337 if (!SizeNL)
1338 return false;
1339
1340 SValBuilder &svalBuilder = C.getSValBuilder();
1341 ASTContext &Ctx = C.getASTContext();
1342
1343 // void *memset(void *dest, int ch, size_t count);
1344 // For now we can only handle the case of offset is 0 and concrete char value.
1345 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1346 Offset.getOffset() == 0) {
1347 // Get the base region's size.
1348 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1349
1350 ProgramStateRef StateWholeReg, StateNotWholeReg;
1351 std::tie(StateWholeReg, StateNotWholeReg) =
1352 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1353
1354 // With the semantic of 'memset()', we should convert the CharVal to
1355 // unsigned char.
1356 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1357
1358 ProgramStateRef StateNullChar, StateNonNullChar;
1359 std::tie(StateNullChar, StateNonNullChar) =
1360 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1361
1362 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1363 !StateNonNullChar) {
1364 // If the 'memset()' acts on the whole region of destination buffer and
1365 // the value of the second argument of 'memset()' is zero, bind the second
1366 // argument's value to the destination buffer with 'default binding'.
1367 // FIXME: Since there is no perfect way to bind the non-zero character, we
1368 // can only deal with zero value here. In the future, we need to deal with
1369 // the binding of non-zero value in the case of whole region.
1370 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1371 C.getLocationContext());
1372 } else {
1373 // If the destination buffer's extent is not equal to the value of
1374 // third argument, just invalidate buffer.
1375 State = invalidateDestinationBufferBySize(
1376 C, State, DstBuffer, Elem, MemVal, SizeVal, Size->getType());
1377 }
1378
1379 if (StateNullChar && !StateNonNullChar) {
1380 // If the value of the second argument of 'memset()' is zero, set the
1381 // string length of destination buffer to 0 directly.
1382 State = setCStringLength(State, MR,
1383 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1384 } else if (!StateNullChar && StateNonNullChar) {
1385 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1386 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1387 C.getLocationContext(), C.blockCount());
1388
1389 // If the value of second argument is not zero, then the string length
1390 // is at least the size argument.
1391 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1392 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1393
1394 State = setCStringLength(
1395 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1396 MR, NewStrLen);
1397 }
1398 } else {
1399 // If the offset is not zero and char value is not concrete, we can do
1400 // nothing but invalidate the buffer.
1401 State = invalidateDestinationBufferBySize(C, State, DstBuffer, Elem, MemVal,
1402 SizeVal, Size->getType());
1403 }
1404 return true;
1405}
1406
1407//===----------------------------------------------------------------------===//
1408// evaluation of individual function calls.
1409//===----------------------------------------------------------------------===//
1410
1411void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1412 ProgramStateRef state, SizeArgExpr Size,
1413 DestinationArgExpr Dest,
1414 SourceArgExpr Source, bool Restricted,
1415 bool IsMempcpy, CharKind CK) const {
1416 CurrentFunctionDescription = "memory copy function";
1417
1418 // See if the size argument is zero.
1419 const LocationContext *LCtx = C.getLocationContext();
1420 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1421 QualType sizeTy = Size.Expression->getType();
1422
1423 ProgramStateRef stateZeroSize, stateNonZeroSize;
1424 std::tie(stateZeroSize, stateNonZeroSize) =
1425 assumeZero(C, state, sizeVal, sizeTy);
1426
1427 // Get the value of the Dest.
1428 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1429
1430 // If the size is zero, there won't be any actual memory access, so
1431 // just bind the return value to the destination buffer and return.
1432 if (stateZeroSize && !stateNonZeroSize) {
1433 stateZeroSize =
1434 stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1435 C.addTransition(stateZeroSize);
1436 return;
1437 }
1438
1439 // If the size can be nonzero, we have to check the other arguments.
1440 if (stateNonZeroSize) {
1441 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1442 // to the size of the destination buffer, then emit a warning
1443 // that an attacker may provoke a buffer overflow error.
1444 state = stateNonZeroSize;
1445
1446 // Ensure the destination is not null. If it is NULL there will be a
1447 // NULL pointer dereference.
1448 state = checkNonNull(C, state, Dest, destVal);
1449 if (!state)
1450 return;
1451
1452 // Get the value of the Src.
1453 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1454
1455 // Ensure the source is not null. If it is NULL there will be a
1456 // NULL pointer dereference.
1457 state = checkNonNull(C, state, Source, srcVal);
1458 if (!state)
1459 return;
1460
1461 // Ensure the accesses are valid and that the buffers do not overlap.
1462 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1463 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1464
1465 if (Restricted)
1466 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1467
1468 if (!state)
1469 return;
1470
1471 // If this is mempcpy, get the byte after the last byte copied and
1472 // bind the expr.
1473 if (IsMempcpy) {
1474 // Get the byte after the last byte copied.
1475 SValBuilder &SvalBuilder = C.getSValBuilder();
1476 ASTContext &Ctx = SvalBuilder.getContext();
1477 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1478 SVal DestRegCharVal =
1479 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1480 SVal lastElement = C.getSValBuilder().evalBinOp(
1481 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1482 // If we don't know how much we copied, we can at least
1483 // conjure a return value for later.
1484 if (lastElement.isUnknown())
1485 lastElement = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1486
1487 // The byte after the last byte copied is the return value.
1488 state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1489 } else {
1490 // All other copies return the destination buffer.
1491 // (Well, bcopy() has a void return type, but this won't hurt.)
1492 state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1493 }
1494
1495 // Invalidate the destination (regular invalidation without pointer-escaping
1496 // the address of the top-level region).
1497 // FIXME: Even if we can't perfectly model the copy, we should see if we
1498 // can use LazyCompoundVals to copy the source values into the destination.
1499 // This would probably remove any existing bindings past the end of the
1500 // copied region, but that's still an improvement over blank invalidation.
1501 state = invalidateDestinationBufferBySize(
1502 C, state, Dest.Expression, Call.getCFGElementRef(),
1503 C.getSVal(Dest.Expression), sizeVal, Size.Expression->getType());
1504
1505 // Invalidate the source (const-invalidation without const-pointer-escaping
1506 // the address of the top-level region).
1507 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(),
1508 C.getSVal(Source.Expression));
1509
1510 C.addTransition(state);
1511 }
1512}
1513
1514void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1515 CharKind CK) const {
1516 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1517 // The return value is the address of the destination buffer.
1518 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1519 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1520 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1521
1522 ProgramStateRef State = C.getState();
1523
1524 constexpr bool IsRestricted = true;
1525 constexpr bool IsMempcpy = false;
1526 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1527}
1528
1529void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1530 CharKind CK) const {
1531 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1532 // The return value is a pointer to the byte following the last written byte.
1533 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1534 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1535 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1536
1537 constexpr bool IsRestricted = true;
1538 constexpr bool IsMempcpy = true;
1539 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1540 IsMempcpy, CK);
1541}
1542
1543void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1544 CharKind CK) const {
1545 // void *memmove(void *dst, const void *src, size_t n);
1546 // The return value is the address of the destination buffer.
1547 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1548 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1549 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1550
1551 constexpr bool IsRestricted = false;
1552 constexpr bool IsMempcpy = false;
1553 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1554 IsMempcpy, CK);
1555}
1556
1557void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1558 // void bcopy(const void *src, void *dst, size_t n);
1559 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1560 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1561 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1562
1563 constexpr bool IsRestricted = false;
1564 constexpr bool IsMempcpy = false;
1565 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1566 IsMempcpy, CharKind::Regular);
1567}
1568
1569void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1570 CharKind CK) const {
1571 // int memcmp(const void *s1, const void *s2, size_t n);
1572 CurrentFunctionDescription = "memory comparison function";
1573
1574 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1575 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1576 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1577
1578 ProgramStateRef State = C.getState();
1579 SValBuilder &Builder = C.getSValBuilder();
1580 const LocationContext *LCtx = C.getLocationContext();
1581
1582 // See if the size argument is zero.
1583 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1584 QualType sizeTy = Size.Expression->getType();
1585
1586 ProgramStateRef stateZeroSize, stateNonZeroSize;
1587 std::tie(stateZeroSize, stateNonZeroSize) =
1588 assumeZero(C, State, sizeVal, sizeTy);
1589
1590 // If the size can be zero, the result will be 0 in that case, and we don't
1591 // have to check either of the buffers.
1592 if (stateZeroSize) {
1593 State = stateZeroSize;
1594 State = State->BindExpr(Call.getOriginExpr(), LCtx,
1595 Builder.makeZeroVal(Call.getResultType()));
1596 C.addTransition(State);
1597 }
1598
1599 // If the size can be nonzero, we have to check the other arguments.
1600 if (stateNonZeroSize) {
1601 State = stateNonZeroSize;
1602 // If we know the two buffers are the same, we know the result is 0.
1603 // First, get the two buffers' addresses. Another checker will have already
1604 // made sure they're not undefined.
1605 DefinedOrUnknownSVal LV =
1606 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1607 DefinedOrUnknownSVal RV =
1608 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1609
1610 // See if they are the same.
1611 ProgramStateRef SameBuffer, NotSameBuffer;
1612 std::tie(SameBuffer, NotSameBuffer) =
1613 State->assume(Builder.evalEQ(State, LV, RV));
1614
1615 // If the two arguments are the same buffer, we know the result is 0,
1616 // and we only need to check one size.
1617 if (SameBuffer && !NotSameBuffer) {
1618 State = SameBuffer;
1619 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1620 if (State) {
1621 State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1622 Builder.makeZeroVal(Call.getResultType()));
1623 C.addTransition(State);
1624 }
1625 return;
1626 }
1627
1628 // If the two arguments might be different buffers, we have to check
1629 // the size of both of them.
1630 assert(NotSameBuffer);
1631 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1632 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1633 if (State) {
1634 // The return value is the comparison result, which we don't know.
1635 SVal CmpV = Builder.conjureSymbolVal(Call, C.blockCount());
1636 State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1637 C.addTransition(State);
1638 }
1639 }
1640}
1641
1642void CStringChecker::evalstrLength(CheckerContext &C,
1643 const CallEvent &Call) const {
1644 // size_t strlen(const char *s);
1645 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1646}
1647
1648void CStringChecker::evalstrnLength(CheckerContext &C,
1649 const CallEvent &Call) const {
1650 // size_t strnlen(const char *s, size_t maxlen);
1651 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1652}
1653
1654void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1655 const CallEvent &Call,
1656 bool IsStrnlen) const {
1657 CurrentFunctionDescription = "string length function";
1658 ProgramStateRef state = C.getState();
1659 const LocationContext *LCtx = C.getLocationContext();
1660
1661 if (IsStrnlen) {
1662 const Expr *maxlenExpr = Call.getArgExpr(1);
1663 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1664
1665 ProgramStateRef stateZeroSize, stateNonZeroSize;
1666 std::tie(stateZeroSize, stateNonZeroSize) =
1667 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1668
1669 // If the size can be zero, the result will be 0 in that case, and we don't
1670 // have to check the string itself.
1671 if (stateZeroSize) {
1672 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1673 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1674 C.addTransition(stateZeroSize);
1675 }
1676
1677 // If the size is GUARANTEED to be zero, we're done!
1678 if (!stateNonZeroSize)
1679 return;
1680
1681 // Otherwise, record the assumption that the size is nonzero.
1682 state = stateNonZeroSize;
1683 }
1684
1685 // Check that the string argument is non-null.
1686 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1687 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1688 state = checkNonNull(C, state, Arg, ArgVal);
1689
1690 if (!state)
1691 return;
1692
1693 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1694
1695 // If the argument isn't a valid C string, there's no valid state to
1696 // transition to.
1697 if (strLength.isUndef())
1698 return;
1699
1700 DefinedOrUnknownSVal result = UnknownVal();
1701
1702 // If the check is for strnlen() then bind the return value to no more than
1703 // the maxlen value.
1704 if (IsStrnlen) {
1705 QualType cmpTy = C.getSValBuilder().getConditionType();
1706
1707 // It's a little unfortunate to be getting this again,
1708 // but it's not that expensive...
1709 const Expr *maxlenExpr = Call.getArgExpr(1);
1710 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1711
1712 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1713 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1714
1715 if (strLengthNL && maxlenValNL) {
1716 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1717
1718 // Check if the strLength is greater than the maxlen.
1719 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1720 C.getSValBuilder()
1721 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1722 .castAs<DefinedOrUnknownSVal>());
1723
1724 if (stateStringTooLong && !stateStringNotTooLong) {
1725 // If the string is longer than maxlen, return maxlen.
1726 result = *maxlenValNL;
1727 } else if (stateStringNotTooLong && !stateStringTooLong) {
1728 // If the string is shorter than maxlen, return its length.
1729 result = *strLengthNL;
1730 }
1731 }
1732
1733 if (result.isUnknown()) {
1734 // If we don't have enough information for a comparison, there's
1735 // no guarantee the full string length will actually be returned.
1736 // All we know is the return value is the min of the string length
1737 // and the limit. This is better than nothing.
1738 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1739 NonLoc resultNL = result.castAs<NonLoc>();
1740
1741 if (strLengthNL) {
1742 state = state->assume(C.getSValBuilder().evalBinOpNN(
1743 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1744 .castAs<DefinedOrUnknownSVal>(), true);
1745 }
1746
1747 if (maxlenValNL) {
1748 state = state->assume(C.getSValBuilder().evalBinOpNN(
1749 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1750 .castAs<DefinedOrUnknownSVal>(), true);
1751 }
1752 }
1753
1754 } else {
1755 // This is a plain strlen(), not strnlen().
1756 result = strLength.castAs<DefinedOrUnknownSVal>();
1757
1758 // If we don't know the length of the string, conjure a return
1759 // value, so it can be used in constraints, at least.
1760 if (result.isUnknown()) {
1761 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1762 }
1763 }
1764
1765 // Bind the return value.
1766 assert(!result.isUnknown() && "Should have conjured a value by now");
1767 state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1768 C.addTransition(state);
1769}
1770
1771void CStringChecker::evalStrcpy(CheckerContext &C,
1772 const CallEvent &Call) const {
1773 // char *strcpy(char *restrict dst, const char *restrict src);
1774 evalStrcpyCommon(C, Call,
1775 /* ReturnEnd = */ false,
1776 /* IsBounded = */ false,
1777 /* appendK = */ ConcatFnKind::none);
1778}
1779
1780void CStringChecker::evalStrncpy(CheckerContext &C,
1781 const CallEvent &Call) const {
1782 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1783 evalStrcpyCommon(C, Call,
1784 /* ReturnEnd = */ false,
1785 /* IsBounded = */ true,
1786 /* appendK = */ ConcatFnKind::none);
1787}
1788
1789void CStringChecker::evalStpcpy(CheckerContext &C,
1790 const CallEvent &Call) const {
1791 // char *stpcpy(char *restrict dst, const char *restrict src);
1792 evalStrcpyCommon(C, Call,
1793 /* ReturnEnd = */ true,
1794 /* IsBounded = */ false,
1795 /* appendK = */ ConcatFnKind::none);
1796}
1797
1798void CStringChecker::evalStrlcpy(CheckerContext &C,
1799 const CallEvent &Call) const {
1800 // size_t strlcpy(char *dest, const char *src, size_t size);
1801 evalStrcpyCommon(C, Call,
1802 /* ReturnEnd = */ true,
1803 /* IsBounded = */ true,
1804 /* appendK = */ ConcatFnKind::none,
1805 /* returnPtr = */ false);
1806}
1807
1808void CStringChecker::evalStrcat(CheckerContext &C,
1809 const CallEvent &Call) const {
1810 // char *strcat(char *restrict s1, const char *restrict s2);
1811 evalStrcpyCommon(C, Call,
1812 /* ReturnEnd = */ false,
1813 /* IsBounded = */ false,
1814 /* appendK = */ ConcatFnKind::strcat);
1815}
1816
1817void CStringChecker::evalStrncat(CheckerContext &C,
1818 const CallEvent &Call) const {
1819 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1820 evalStrcpyCommon(C, Call,
1821 /* ReturnEnd = */ false,
1822 /* IsBounded = */ true,
1823 /* appendK = */ ConcatFnKind::strcat);
1824}
1825
1826void CStringChecker::evalStrlcat(CheckerContext &C,
1827 const CallEvent &Call) const {
1828 // size_t strlcat(char *dst, const char *src, size_t size);
1829 // It will append at most size - strlen(dst) - 1 bytes,
1830 // NULL-terminating the result.
1831 evalStrcpyCommon(C, Call,
1832 /* ReturnEnd = */ false,
1833 /* IsBounded = */ true,
1834 /* appendK = */ ConcatFnKind::strlcat,
1835 /* returnPtr = */ false);
1836}
1837
1838void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1839 bool ReturnEnd, bool IsBounded,
1840 ConcatFnKind appendK,
1841 bool returnPtr) const {
1842 if (appendK == ConcatFnKind::none)
1843 CurrentFunctionDescription = "string copy function";
1844 else
1845 CurrentFunctionDescription = "string concatenation function";
1846
1847 ProgramStateRef state = C.getState();
1848 const LocationContext *LCtx = C.getLocationContext();
1849
1850 // Check that the destination is non-null.
1851 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1852 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1853 state = checkNonNull(C, state, Dst, DstVal);
1854 if (!state)
1855 return;
1856
1857 // Check that the source is non-null.
1858 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1859 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1860 state = checkNonNull(C, state, srcExpr, srcVal);
1861 if (!state)
1862 return;
1863
1864 // Get the string length of the source.
1865 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1866 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1867
1868 // Get the string length of the destination buffer.
1869 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1870 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1871
1872 // If the source isn't a valid C string, give up.
1873 if (strLength.isUndef())
1874 return;
1875
1876 SValBuilder &svalBuilder = C.getSValBuilder();
1877 QualType cmpTy = svalBuilder.getConditionType();
1878 QualType sizeTy = svalBuilder.getContext().getSizeType();
1879
1880 // These two values allow checking two kinds of errors:
1881 // - actual overflows caused by a source that doesn't fit in the destination
1882 // - potential overflows caused by a bound that could exceed the destination
1883 SVal amountCopied = UnknownVal();
1884 SVal maxLastElementIndex = UnknownVal();
1885 const char *boundWarning = nullptr;
1886
1887 // FIXME: Why do we choose the srcExpr if the access has no size?
1888 // Note that the 3rd argument of the call would be the size parameter.
1889 SizeArgExpr SrcExprAsSizeDummy = {
1890 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1891 state = CheckOverlap(
1892 C, state,
1893 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1894 Dst, srcExpr);
1895
1896 if (!state)
1897 return;
1898
1899 // If the function is strncpy, strncat, etc... it is bounded.
1900 if (IsBounded) {
1901 // Get the max number of characters to copy.
1902 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1903 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1904
1905 // Protect against misdeclared strncpy().
1906 lenVal =
1907 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1908
1909 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1910
1911 // If we know both values, we might be able to figure out how much
1912 // we're copying.
1913 if (strLengthNL && lenValNL) {
1914 switch (appendK) {
1915 case ConcatFnKind::none:
1916 case ConcatFnKind::strcat: {
1917 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1918 // Check if the max number to copy is less than the length of the src.
1919 // If the bound is equal to the source length, strncpy won't null-
1920 // terminate the result!
1921 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1922 svalBuilder
1923 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1924 .castAs<DefinedOrUnknownSVal>());
1925
1926 if (stateSourceTooLong && !stateSourceNotTooLong) {
1927 // Max number to copy is less than the length of the src, so the
1928 // actual strLength copied is the max number arg.
1929 state = stateSourceTooLong;
1930 amountCopied = lenVal;
1931
1932 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1933 // The source buffer entirely fits in the bound.
1934 state = stateSourceNotTooLong;
1935 amountCopied = strLength;
1936 }
1937 break;
1938 }
1939 case ConcatFnKind::strlcat:
1940 if (!dstStrLengthNL)
1941 return;
1942
1943 // amountCopied = min (size - dstLen - 1 , srcLen)
1944 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1945 *dstStrLengthNL, sizeTy);
1946 if (!isa<NonLoc>(freeSpace))
1947 return;
1948 freeSpace =
1949 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1950 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1951 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1952
1953 // While unlikely, it is possible that the subtraction is
1954 // too complex to compute, let's check whether it succeeded.
1955 if (!freeSpaceNL)
1956 return;
1957 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1958 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1959
1960 ProgramStateRef TrueState, FalseState;
1961 std::tie(TrueState, FalseState) =
1962 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1963
1964 // srcStrLength <= size - dstStrLength -1
1965 if (TrueState && !FalseState) {
1966 amountCopied = strLength;
1967 }
1968
1969 // srcStrLength > size - dstStrLength -1
1970 if (!TrueState && FalseState) {
1971 amountCopied = freeSpace;
1972 }
1973
1974 if (TrueState && FalseState)
1975 amountCopied = UnknownVal();
1976 break;
1977 }
1978 }
1979 // We still want to know if the bound is known to be too large.
1980 if (lenValNL) {
1981 switch (appendK) {
1982 case ConcatFnKind::strcat:
1983 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1984
1985 // Get the string length of the destination. If the destination is
1986 // memory that can't have a string length, we shouldn't be copying
1987 // into it anyway.
1988 if (dstStrLength.isUndef())
1989 return;
1990
1991 if (dstStrLengthNL) {
1992 maxLastElementIndex = svalBuilder.evalBinOpNN(
1993 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1994
1995 boundWarning = "Size argument is greater than the free space in the "
1996 "destination buffer";
1997 }
1998 break;
1999 case ConcatFnKind::none:
2000 case ConcatFnKind::strlcat:
2001 // For strncpy and strlcat, this is just checking
2002 // that lenVal <= sizeof(dst).
2003 // (Yes, strncpy and strncat differ in how they treat termination.
2004 // strncat ALWAYS terminates, but strncpy doesn't.)
2005
2006 // We need a special case for when the copy size is zero, in which
2007 // case strncpy will do no work at all. Our bounds check uses n-1
2008 // as the last element accessed, so n == 0 is problematic.
2009 ProgramStateRef StateZeroSize, StateNonZeroSize;
2010 std::tie(StateZeroSize, StateNonZeroSize) =
2011 assumeZero(C, state, *lenValNL, sizeTy);
2012
2013 // If the size is known to be zero, we're done.
2014 if (StateZeroSize && !StateNonZeroSize) {
2015 if (returnPtr) {
2016 StateZeroSize =
2017 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
2018 } else {
2019 if (appendK == ConcatFnKind::none) {
2020 // strlcpy returns strlen(src)
2021 StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
2022 LCtx, strLength);
2023 } else {
2024 // strlcat returns strlen(src) + strlen(dst)
2025 SVal retSize = svalBuilder.evalBinOp(
2026 state, BO_Add, strLength, dstStrLength, sizeTy);
2027 StateZeroSize =
2028 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2029 }
2030 }
2031 C.addTransition(StateZeroSize);
2032 return;
2033 }
2034
2035 // Otherwise, go ahead and figure out the last element we'll touch.
2036 // We don't record the non-zero assumption here because we can't
2037 // be sure. We won't warn on a possible zero.
2038 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2039 maxLastElementIndex =
2040 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2041 boundWarning = "Size argument is greater than the length of the "
2042 "destination buffer";
2043 break;
2044 }
2045 }
2046 } else {
2047 // The function isn't bounded. The amount copied should match the length
2048 // of the source buffer.
2049 amountCopied = strLength;
2050 }
2051
2052 assert(state);
2053
2054 // This represents the number of characters copied into the destination
2055 // buffer. (It may not actually be the strlen if the destination buffer
2056 // is not terminated.)
2057 SVal finalStrLength = UnknownVal();
2058 SVal strlRetVal = UnknownVal();
2059
2060 if (appendK == ConcatFnKind::none && !returnPtr) {
2061 // strlcpy returns the sizeof(src)
2062 strlRetVal = strLength;
2063 }
2064
2065 // If this is an appending function (strcat, strncat...) then set the
2066 // string length to strlen(src) + strlen(dst) since the buffer will
2067 // ultimately contain both.
2068 if (appendK != ConcatFnKind::none) {
2069 // Get the string length of the destination. If the destination is memory
2070 // that can't have a string length, we shouldn't be copying into it anyway.
2071 if (dstStrLength.isUndef())
2072 return;
2073
2074 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2075 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2076 *dstStrLengthNL, sizeTy);
2077 }
2078
2079 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2080
2081 // If we know both string lengths, we might know the final string length.
2082 if (amountCopiedNL && dstStrLengthNL) {
2083 // Make sure the two lengths together don't overflow a size_t.
2084 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2085 if (!state)
2086 return;
2087
2088 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2089 *dstStrLengthNL, sizeTy);
2090 }
2091
2092 // If we couldn't get a single value for the final string length,
2093 // we can at least bound it by the individual lengths.
2094 if (finalStrLength.isUnknown()) {
2095 // Try to get a "hypothetical" string length symbol, which we can later
2096 // set as a real value if that turns out to be the case.
2097 finalStrLength =
2098 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2099 assert(!finalStrLength.isUndef());
2100
2101 if (std::optional<NonLoc> finalStrLengthNL =
2102 finalStrLength.getAs<NonLoc>()) {
2103 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2104 // we overwrite dst string with the src
2105 // finalStrLength >= srcStrLength
2106 SVal sourceInResult = svalBuilder.evalBinOpNN(
2107 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2108 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2109 true);
2110 if (!state)
2111 return;
2112 }
2113
2114 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2115 // we extend the dst string with the src
2116 // finalStrLength >= dstStrLength
2117 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2118 *finalStrLengthNL,
2119 *dstStrLengthNL,
2120 cmpTy);
2121 state =
2122 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2123 if (!state)
2124 return;
2125 }
2126 }
2127 }
2128
2129 } else {
2130 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2131 // the final string length will match the input string length.
2132 finalStrLength = amountCopied;
2133 }
2134
2135 SVal Result;
2136
2137 if (returnPtr) {
2138 // The final result of the function will either be a pointer past the last
2139 // copied element, or a pointer to the start of the destination buffer.
2140 Result = (ReturnEnd ? UnknownVal() : DstVal);
2141 } else {
2142 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2143 //strlcpy, strlcat
2144 Result = strlRetVal;
2145 else
2146 Result = finalStrLength;
2147 }
2148
2149 assert(state);
2150
2151 // If the destination is a MemRegion, try to check for a buffer overflow and
2152 // record the new string length.
2153 if (std::optional<loc::MemRegionVal> dstRegVal =
2154 DstVal.getAs<loc::MemRegionVal>()) {
2155 QualType ptrTy = Dst.Expression->getType();
2156
2157 // If we have an exact value on a bounded copy, use that to check for
2158 // overflows, rather than our estimate about how much is actually copied.
2159 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2160 SVal maxLastElement =
2161 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2162
2163 // Check if the first byte of the destination is writable.
2164 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2165 if (!state)
2166 return;
2167 // Check if the last byte of the destination is writable.
2168 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2169 if (!state)
2170 return;
2171 }
2172
2173 // Then, if the final length is known...
2174 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2175 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2176 *knownStrLength, ptrTy);
2177
2178 // ...and we haven't checked the bound, we'll check the actual copy.
2179 if (!boundWarning) {
2180 // Check if the first byte of the destination is writable.
2181 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2182 if (!state)
2183 return;
2184 // Check if the last byte of the destination is writable.
2185 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2186 if (!state)
2187 return;
2188 }
2189
2190 // If this is a stpcpy-style copy, the last element is the return value.
2191 if (returnPtr && ReturnEnd)
2192 Result = lastElement;
2193 }
2194
2195 // For bounded method, amountCopied take the minimum of two values,
2196 // for ConcatFnKind::strlcat:
2197 // amountCopied = min (size - dstLen - 1 , srcLen)
2198 // for others:
2199 // amountCopied = min (srcLen, size)
2200 // So even if we don't know about amountCopied, as long as one of them will
2201 // not cause an out-of-bound access, the whole function's operation will not
2202 // too, that will avoid invalidating the superRegion of data member in that
2203 // situation.
2204 bool CouldAccessOutOfBound = true;
2205 if (IsBounded && amountCopied.isUnknown()) {
2206 auto CouldAccessOutOfBoundForSVal =
2207 [&](std::optional<NonLoc> Val) -> bool {
2208 if (!Val)
2209 return true;
2210 return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2211 Dst.Expression->getType(), *Val,
2212 C.getASTContext().getSizeType());
2213 };
2214
2215 CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2216
2217 if (CouldAccessOutOfBound) {
2218 // Get the max number of characters to copy.
2219 const Expr *LenExpr = Call.getArgExpr(2);
2220 SVal LenVal = state->getSVal(LenExpr, LCtx);
2221
2222 // Protect against misdeclared strncpy().
2223 LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2224
2225 // Because analyzer doesn't handle expressions like `size -
2226 // dstLen - 1` very well, we roughly use `size` for
2227 // ConcatFnKind::strlcat here, same with other concat kinds.
2228 CouldAccessOutOfBound =
2229 CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2230 }
2231 }
2232
2233 // Invalidate the destination (regular invalidation without pointer-escaping
2234 // the address of the top-level region). This must happen before we set the
2235 // C string length because invalidation will clear the length.
2236 // FIXME: Even if we can't perfectly model the copy, we should see if we
2237 // can use LazyCompoundVals to copy the source values into the destination.
2238 // This would probably remove any existing bindings past the end of the
2239 // string, but that's still an improvement over blank invalidation.
2240 if (CouldAccessOutOfBound)
2241 state = invalidateDestinationBufferBySize(
2242 C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2243 amountCopied, C.getASTContext().getSizeType());
2244 else
2245 state = invalidateDestinationBufferNeverOverflows(
2246 C, state, Call.getCFGElementRef(), *dstRegVal);
2247
2248 // Invalidate the source (const-invalidation without const-pointer-escaping
2249 // the address of the top-level region).
2250 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(), srcVal);
2251
2252 // Set the C string length of the destination, if we know it.
2253 if (IsBounded && (appendK == ConcatFnKind::none)) {
2254 // strncpy is annoying in that it doesn't guarantee to null-terminate
2255 // the result string. If the original string didn't fit entirely inside
2256 // the bound (including the null-terminator), we don't know how long the
2257 // result is.
2258 if (amountCopied != strLength)
2259 finalStrLength = UnknownVal();
2260 }
2261 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2262 }
2263
2264 assert(state);
2265
2266 if (returnPtr) {
2267 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2268 // overflow, we still need a result. Conjure a return value.
2269 if (ReturnEnd && Result.isUnknown()) {
2270 Result = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2271 }
2272 }
2273 // Set the return value.
2274 state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2275 C.addTransition(state);
2276}
2277
2278void CStringChecker::evalStrxfrm(CheckerContext &C,
2279 const CallEvent &Call) const {
2280 // size_t strxfrm(char *dest, const char *src, size_t n);
2281 CurrentFunctionDescription = "locale transformation function";
2282
2283 ProgramStateRef State = C.getState();
2284 const LocationContext *LCtx = C.getLocationContext();
2285 SValBuilder &SVB = C.getSValBuilder();
2286
2287 // Get arguments
2288 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2289 SourceArgExpr Source = {{Call.getArgExpr(1), 1}};
2290 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2291
2292 // `src` can never be null
2293 SVal SrcVal = State->getSVal(Source.Expression, LCtx);
2294 State = checkNonNull(C, State, Source, SrcVal);
2295 if (!State)
2296 return;
2297
2298 // Buffer must not overlap
2299 State = CheckOverlap(C, State, Size, Dest, Source, CK_Regular);
2300 if (!State)
2301 return;
2302
2303 // The function returns an implementation-defined length needed for
2304 // transformation
2305 SVal RetVal = SVB.conjureSymbolVal(Call, C.blockCount());
2306
2307 auto BindReturnAndTransition = [&RetVal, &Call, LCtx,
2308 &C](ProgramStateRef State) {
2309 if (State) {
2310 State = State->BindExpr(Call.getOriginExpr(), LCtx, RetVal);
2311 C.addTransition(State);
2312 }
2313 };
2314
2315 // Check if size is zero
2316 SVal SizeVal = State->getSVal(Size.Expression, LCtx);
2317 QualType SizeTy = Size.Expression->getType();
2318
2319 auto [StateZeroSize, StateSizeNonZero] =
2320 assumeZero(C, State, SizeVal, SizeTy);
2321
2322 // We can't assume anything about size, just bind the return value and be done
2323 if (!StateZeroSize && !StateSizeNonZero)
2324 return BindReturnAndTransition(State);
2325
2326 // If `n` is 0, we just return the implementation defined length
2327 if (StateZeroSize && !StateSizeNonZero)
2328 return BindReturnAndTransition(StateZeroSize);
2329
2330 // If `n` is not 0, `dest` can not be null.
2331 SVal DestVal = StateSizeNonZero->getSVal(Dest.Expression, LCtx);
2332 StateSizeNonZero = checkNonNull(C, StateSizeNonZero, Dest, DestVal);
2333 if (!StateSizeNonZero)
2334 return;
2335
2336 // Check that we can write to the destination buffer
2337 StateSizeNonZero = CheckBufferAccess(C, StateSizeNonZero, Dest, Size,
2338 AccessKind::write, CK_Regular);
2339 if (!StateSizeNonZero)
2340 return;
2341
2342 // Success: return value < `n`
2343 // Failure: return value >= `n`
2344 auto ComparisonVal = SVB.evalBinOp(StateSizeNonZero, BO_LT, RetVal, SizeVal,
2345 SVB.getConditionType())
2346 .getAs<DefinedOrUnknownSVal>();
2347 if (!ComparisonVal) {
2348 // Fallback: invalidate the buffer.
2349 StateSizeNonZero = invalidateDestinationBufferBySize(
2350 C, StateSizeNonZero, Dest.Expression, Call.getCFGElementRef(), DestVal,
2351 SizeVal, Size.Expression->getType());
2352 return BindReturnAndTransition(StateSizeNonZero);
2353 }
2354
2355 auto [StateSuccess, StateFailure] = StateSizeNonZero->assume(*ComparisonVal);
2356
2357 if (StateSuccess) {
2358 // The transformation invalidated the buffer.
2359 StateSuccess = invalidateDestinationBufferBySize(
2360 C, StateSuccess, Dest.Expression, Call.getCFGElementRef(), DestVal,
2361 SizeVal, Size.Expression->getType());
2362 BindReturnAndTransition(StateSuccess);
2363 // Fallthrough: We also want to add a transition to the failure state below.
2364 }
2365
2366 if (StateFailure) {
2367 // `dest` buffer content is undefined
2368 if (auto DestLoc = DestVal.getAs<loc::MemRegionVal>()) {
2369 StateFailure = StateFailure->killBinding(*DestLoc);
2370 StateFailure =
2371 StateFailure->bindDefaultInitial(*DestLoc, UndefinedVal{}, LCtx);
2372 }
2373
2374 BindReturnAndTransition(StateFailure);
2375 }
2376}
2377
2378void CStringChecker::evalStrcmp(CheckerContext &C,
2379 const CallEvent &Call) const {
2380 //int strcmp(const char *s1, const char *s2);
2381 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2382}
2383
2384void CStringChecker::evalStrncmp(CheckerContext &C,
2385 const CallEvent &Call) const {
2386 //int strncmp(const char *s1, const char *s2, size_t n);
2387 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2388}
2389
2390void CStringChecker::evalStrcasecmp(CheckerContext &C,
2391 const CallEvent &Call) const {
2392 //int strcasecmp(const char *s1, const char *s2);
2393 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2394}
2395
2396void CStringChecker::evalStrncasecmp(CheckerContext &C,
2397 const CallEvent &Call) const {
2398 //int strncasecmp(const char *s1, const char *s2, size_t n);
2399 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2400}
2401
2402void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2403 bool IsBounded, bool IgnoreCase) const {
2404 CurrentFunctionDescription = "string comparison function";
2405 ProgramStateRef state = C.getState();
2406 const LocationContext *LCtx = C.getLocationContext();
2407
2408 // Check that the first string is non-null
2409 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2410 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2411 state = checkNonNull(C, state, Left, LeftVal);
2412 if (!state)
2413 return;
2414
2415 // Check that the second string is non-null.
2416 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2417 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2418 state = checkNonNull(C, state, Right, RightVal);
2419 if (!state)
2420 return;
2421
2422 // Get the string length of the first string or give up.
2423 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2424 if (LeftLength.isUndef())
2425 return;
2426
2427 // Get the string length of the second string or give up.
2428 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2429 if (RightLength.isUndef())
2430 return;
2431
2432 // If we know the two buffers are the same, we know the result is 0.
2433 // First, get the two buffers' addresses. Another checker will have already
2434 // made sure they're not undefined.
2435 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2436 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2437
2438 // See if they are the same.
2439 SValBuilder &svalBuilder = C.getSValBuilder();
2440 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2441 ProgramStateRef StSameBuf, StNotSameBuf;
2442 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2443
2444 // If the two arguments might be the same buffer, we know the result is 0,
2445 // and we only need to check one size.
2446 if (StSameBuf) {
2447 StSameBuf =
2448 StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2449 svalBuilder.makeZeroVal(Call.getResultType()));
2450 C.addTransition(StSameBuf);
2451
2452 // If the two arguments are GUARANTEED to be the same, we're done!
2453 if (!StNotSameBuf)
2454 return;
2455 }
2456
2457 assert(StNotSameBuf);
2458 state = StNotSameBuf;
2459
2460 // At this point we can go about comparing the two buffers.
2461 // For now, we only do this if they're both known string literals.
2462
2463 // Attempt to extract string literals from both expressions.
2464 const StringLiteral *LeftStrLiteral =
2465 getCStringLiteral(C, state, Left.Expression, LeftVal);
2466 const StringLiteral *RightStrLiteral =
2467 getCStringLiteral(C, state, Right.Expression, RightVal);
2468 bool canComputeResult = false;
2469 SVal resultVal = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2470
2471 if (LeftStrLiteral && RightStrLiteral) {
2472 StringRef LeftStrRef = LeftStrLiteral->getString();
2473 StringRef RightStrRef = RightStrLiteral->getString();
2474
2475 if (IsBounded) {
2476 // Get the max number of characters to compare.
2477 const Expr *lenExpr = Call.getArgExpr(2);
2478 SVal lenVal = state->getSVal(lenExpr, LCtx);
2479
2480 // If the length is known, we can get the right substrings.
2481 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2482 // Create substrings of each to compare the prefix.
2483 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2484 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2485 canComputeResult = true;
2486 }
2487 } else {
2488 // This is a normal, unbounded strcmp.
2489 canComputeResult = true;
2490 }
2491
2492 if (canComputeResult) {
2493 // Real strcmp stops at null characters.
2494 size_t s1Term = LeftStrRef.find('\0');
2495 if (s1Term != StringRef::npos)
2496 LeftStrRef = LeftStrRef.substr(0, s1Term);
2497
2498 size_t s2Term = RightStrRef.find('\0');
2499 if (s2Term != StringRef::npos)
2500 RightStrRef = RightStrRef.substr(0, s2Term);
2501
2502 // Use StringRef's comparison methods to compute the actual result.
2503 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2504 : LeftStrRef.compare(RightStrRef);
2505
2506 // The strcmp function returns an integer greater than, equal to, or less
2507 // than zero, [c11, p7.24.4.2].
2508 if (compareRes == 0) {
2509 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2510 }
2511 else {
2512 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2513 // Constrain strcmp's result range based on the result of StringRef's
2514 // comparison methods.
2515 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2516 SVal compareWithZero =
2517 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2518 svalBuilder.getConditionType());
2519 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2520 state = state->assume(compareWithZeroVal, true);
2521 }
2522 }
2523 }
2524
2525 state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2526
2527 // Record this as a possible path.
2528 C.addTransition(state);
2529}
2530
2531void CStringChecker::evalStrsep(CheckerContext &C,
2532 const CallEvent &Call) const {
2533 // char *strsep(char **stringp, const char *delim);
2534 // Verify whether the search string parameter matches the return type.
2535 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2536
2537 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2538 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2539 CharPtrTy.getUnqualifiedType())
2540 return;
2541
2542 CurrentFunctionDescription = "strsep()";
2543 ProgramStateRef State = C.getState();
2544 const LocationContext *LCtx = C.getLocationContext();
2545
2546 // Check that the search string pointer is non-null (though it may point to
2547 // a null string).
2548 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2549 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2550 if (!State)
2551 return;
2552
2553 // Check that the delimiter string is non-null.
2554 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2555 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2556 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2557 if (!State)
2558 return;
2559
2560 SValBuilder &SVB = C.getSValBuilder();
2561 SVal Result;
2562 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2563 // Get the current value of the search string pointer, as a char*.
2564 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2565
2566 // Invalidate the search string, representing the change of one delimiter
2567 // character to NUL.
2568 // As the replacement never overflows, do not invalidate its super region.
2569 State = invalidateDestinationBufferNeverOverflows(
2570 C, State, Call.getCFGElementRef(), Result);
2571
2572 // Overwrite the search string pointer. The new value is either an address
2573 // further along in the same string, or NULL if there are no more tokens.
2574 State = State->bindLoc(*SearchStrLoc,
2575 SVB.conjureSymbolVal(Call, C.blockCount(), getTag()),
2576 LCtx);
2577 } else {
2578 assert(SearchStrVal.isUnknown());
2579 // Conjure a symbolic value. It's the best we can do.
2580 Result = SVB.conjureSymbolVal(Call, C.blockCount());
2581 }
2582
2583 // Set the return value, and finish.
2584 State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2585 C.addTransition(State);
2586}
2587
2588// These should probably be moved into a C++ standard library checker.
2589void CStringChecker::evalStdCopy(CheckerContext &C,
2590 const CallEvent &Call) const {
2591 evalStdCopyCommon(C, Call);
2592}
2593
2594void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2595 const CallEvent &Call) const {
2596 evalStdCopyCommon(C, Call);
2597}
2598
2599void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2600 const CallEvent &Call) const {
2601 if (!Call.getArgExpr(2)->getType()->isPointerType())
2602 return;
2603
2604 ProgramStateRef State = C.getState();
2605
2606 const LocationContext *LCtx = C.getLocationContext();
2607
2608 // template <class _InputIterator, class _OutputIterator>
2609 // _OutputIterator
2610 // copy(_InputIterator __first, _InputIterator __last,
2611 // _OutputIterator __result)
2612
2613 // Invalidate the destination buffer
2614 const Expr *Dst = Call.getArgExpr(2);
2615 SVal DstVal = State->getSVal(Dst, LCtx);
2616 // FIXME: As we do not know how many items are copied, we also invalidate the
2617 // super region containing the target location.
2618 State = invalidateDestinationBufferAlwaysEscapeSuperRegion(
2619 C, State, Call.getCFGElementRef(), DstVal);
2620
2621 SValBuilder &SVB = C.getSValBuilder();
2622
2623 SVal ResultVal = SVB.conjureSymbolVal(Call, C.blockCount());
2624 State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2625
2626 C.addTransition(State);
2627}
2628
2629void CStringChecker::evalMemset(CheckerContext &C,
2630 const CallEvent &Call) const {
2631 // void *memset(void *s, int c, size_t n);
2632 CurrentFunctionDescription = "memory set function";
2633
2634 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2635 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2636 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2637
2638 ProgramStateRef State = C.getState();
2639
2640 // See if the size argument is zero.
2641 const LocationContext *LCtx = C.getLocationContext();
2642 SVal SizeVal = C.getSVal(Size.Expression);
2643 QualType SizeTy = Size.Expression->getType();
2644
2645 ProgramStateRef ZeroSize, NonZeroSize;
2646 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2647
2648 // Get the value of the memory area.
2649 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2650
2651 // If the size is zero, there won't be any actual memory access, so
2652 // just bind the return value to the buffer and return.
2653 if (ZeroSize && !NonZeroSize) {
2654 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2655 C.addTransition(ZeroSize);
2656 return;
2657 }
2658
2659 // Ensure the memory area is not null.
2660 // If it is NULL there will be a NULL pointer dereference.
2661 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2662 if (!State)
2663 return;
2664
2665 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2666 if (!State)
2667 return;
2668
2669 // According to the values of the arguments, bind the value of the second
2670 // argument to the destination buffer and set string length, or just
2671 // invalidate the destination buffer.
2672 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(),
2673 C.getSVal(CharE.Expression), Size.Expression, C, State))
2674 return;
2675
2676 State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2677 C.addTransition(State);
2678}
2679
2680void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2681 CurrentFunctionDescription = "memory clearance function";
2682
2683 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2684 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2685 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2686
2687 ProgramStateRef State = C.getState();
2688
2689 // See if the size argument is zero.
2690 SVal SizeVal = C.getSVal(Size.Expression);
2691 QualType SizeTy = Size.Expression->getType();
2692
2693 ProgramStateRef StateZeroSize, StateNonZeroSize;
2694 std::tie(StateZeroSize, StateNonZeroSize) =
2695 assumeZero(C, State, SizeVal, SizeTy);
2696
2697 // If the size is zero, there won't be any actual memory access,
2698 // In this case we just return.
2699 if (StateZeroSize && !StateNonZeroSize) {
2700 C.addTransition(StateZeroSize);
2701 return;
2702 }
2703
2704 // Get the value of the memory area.
2705 SVal MemVal = C.getSVal(Buffer.Expression);
2706
2707 // Ensure the memory area is not null.
2708 // If it is NULL there will be a NULL pointer dereference.
2709 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2710 if (!State)
2711 return;
2712
2713 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2714 if (!State)
2715 return;
2716
2717 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(), Zero,
2718 Size.Expression, C, State))
2719 return;
2720
2721 C.addTransition(State);
2722}
2723
2724void CStringChecker::evalSprintf(CheckerContext &C,
2725 const CallEvent &Call) const {
2726 CurrentFunctionDescription = "'sprintf'";
2727 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2728}
2729
2730void CStringChecker::evalSnprintf(CheckerContext &C,
2731 const CallEvent &Call) const {
2732 CurrentFunctionDescription = "'snprintf'";
2733 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2734}
2735
2736void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2737 bool IsBounded) const {
2738 ProgramStateRef State = C.getState();
2739 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2740 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2741
2742 const auto NumParams = Call.parameters().size();
2743 if (CE->getNumArgs() < NumParams) {
2744 // This is an invalid call, let's just ignore it.
2745 return;
2746 }
2747
2748 const auto AllArguments =
2749 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2750 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2751
2752 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2753 // We consider only string buffers
2754 if (const QualType type = ArgExpr->getType();
2755 !type->isAnyPointerType() ||
2756 !type->getPointeeType()->isAnyCharacterType())
2757 continue;
2758 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2759
2760 // Ensure the buffers do not overlap.
2761 SizeArgExpr SrcExprAsSizeDummy = {
2762 {Source.Expression, Source.ArgumentIndex}};
2763 State = CheckOverlap(
2764 C, State,
2765 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2766 Dest, Source);
2767 if (!State)
2768 return;
2769 }
2770
2771 C.addTransition(State);
2772}
2773
2774//===----------------------------------------------------------------------===//
2775// The driver method, and other Checker callbacks.
2776//===----------------------------------------------------------------------===//
2777
2778CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2779 CheckerContext &C) const {
2780 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2781 if (!CE)
2782 return nullptr;
2783
2784 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2785 if (!FD)
2786 return nullptr;
2787
2788 if (StdCopy.matches(Call))
2789 return &CStringChecker::evalStdCopy;
2790 if (StdCopyBackward.matches(Call))
2791 return &CStringChecker::evalStdCopyBackward;
2792
2793 // Pro-actively check that argument types are safe to do arithmetic upon.
2794 // We do not want to crash if someone accidentally passes a structure
2795 // into, say, a C++ overload of any of these functions. We could not check
2796 // that for std::copy because they may have arguments of other types.
2797 for (auto I : CE->arguments()) {
2798 QualType T = I->getType();
2800 return nullptr;
2801 }
2802
2803 const FnCheck *Callback = Callbacks.lookup(Call);
2804 if (Callback)
2805 return *Callback;
2806
2807 return nullptr;
2808}
2809
2810bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2811 FnCheck Callback = identifyCall(Call, C);
2812
2813 // If the callee isn't a string function, let another checker handle it.
2814 if (!Callback)
2815 return false;
2816
2817 // Check and evaluate the call.
2818 assert(isa<CallExpr>(Call.getOriginExpr()));
2819 Callback(this, C, Call);
2820
2821 // If the evaluate call resulted in no change, chain to the next eval call
2822 // handler.
2823 // Note, the custom CString evaluation calls assume that basic safety
2824 // properties are held. However, if the user chooses to turn off some of these
2825 // checks, we ignore the issues and leave the call evaluation to a generic
2826 // handler.
2827 return C.isDifferent();
2828}
2829
2830void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2831 // Record string length for char a[] = "abc";
2832 ProgramStateRef state = C.getState();
2833
2834 for (const auto *I : DS->decls()) {
2835 const VarDecl *D = dyn_cast<VarDecl>(I);
2836 if (!D)
2837 continue;
2838
2839 // FIXME: Handle array fields of structs.
2840 if (!D->getType()->isArrayType())
2841 continue;
2842
2843 const Expr *Init = D->getInit();
2844 if (!Init)
2845 continue;
2847 continue;
2848
2849 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2850 const MemRegion *MR = VarLoc.getAsRegion();
2851 if (!MR)
2852 continue;
2853
2854 SVal StrVal = C.getSVal(Init);
2855 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2856 DefinedOrUnknownSVal strLength =
2857 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2858
2859 state = state->set<CStringLength>(MR, strLength);
2860 }
2861
2862 C.addTransition(state);
2863}
2864
2866CStringChecker::checkRegionChanges(ProgramStateRef state,
2867 const InvalidatedSymbols *,
2868 ArrayRef<const MemRegion *> ExplicitRegions,
2869 ArrayRef<const MemRegion *> Regions,
2870 const LocationContext *LCtx,
2871 const CallEvent *Call) const {
2872 CStringLengthTy Entries = state->get<CStringLength>();
2873 if (Entries.isEmpty())
2874 return state;
2875
2876 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2877 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2878
2879 // First build sets for the changed regions and their super-regions.
2880 for (const MemRegion *MR : Regions) {
2881 Invalidated.insert(MR);
2882
2883 SuperRegions.insert(MR);
2884 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2885 MR = SR->getSuperRegion();
2886 SuperRegions.insert(MR);
2887 }
2888 }
2889
2890 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2891
2892 // Then loop over the entries in the current state.
2893 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2894 // Is this entry for a super-region of a changed region?
2895 if (SuperRegions.count(MR)) {
2896 Entries = F.remove(Entries, MR);
2897 continue;
2898 }
2899
2900 // Is this entry for a sub-region of a changed region?
2901 const MemRegion *Super = MR;
2902 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2903 Super = SR->getSuperRegion();
2904 if (Invalidated.count(Super)) {
2905 Entries = F.remove(Entries, MR);
2906 break;
2907 }
2908 }
2909 }
2910
2911 return state->set<CStringLength>(Entries);
2912}
2913
2914void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2915 SymbolReaper &SR) const {
2916 // Mark all symbols in our string length map as valid.
2917 CStringLengthTy Entries = state->get<CStringLength>();
2918
2919 for (SVal Len : llvm::make_second_range(Entries)) {
2920 for (SymbolRef Sym : Len.symbols())
2921 SR.markInUse(Sym);
2922 }
2923}
2924
2925void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2926 CheckerContext &C) const {
2927 ProgramStateRef state = C.getState();
2928 CStringLengthTy Entries = state->get<CStringLength>();
2929 if (Entries.isEmpty())
2930 return;
2931
2932 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2933 for (auto [Reg, Len] : Entries) {
2934 if (SymbolRef Sym = Len.getAsSymbol()) {
2935 if (SR.isDead(Sym))
2936 Entries = F.remove(Entries, Reg);
2937 }
2938 }
2939
2940 state = state->set<CStringLength>(Entries);
2941 C.addTransition(state);
2942}
2943
2944void ento::registerCStringModeling(CheckerManager &Mgr) {
2945 // Other checker relies on the modeling implemented in this checker family,
2946 // so this "modeling checker" can register the 'CStringChecker' backend for
2947 // its callbacks without enabling any of its frontends.
2948 Mgr.getChecker<CStringChecker>();
2949}
2950
2951bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
2952 return true;
2953}
2954
2955#define REGISTER_CHECKER(NAME) \
2956 void ento::registerCString##NAME(CheckerManager &Mgr) { \
2957 Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr); \
2958 } \
2959 \
2960 bool ento::shouldRegisterCString##NAME(const CheckerManager &) { \
2961 return true; \
2962 }
2963
2964REGISTER_CHECKER(NullArg)
2965REGISTER_CHECKER(OutOfBounds)
2966REGISTER_CHECKER(BufferOverlap)
2967REGISTER_CHECKER(NotNullTerm)
2968REGISTER_CHECKER(UninitializedRead)
#define V(N, I)
static std::optional< NonLoc > getIndex(ProgramStateRef State, const ElementRegion *ER, CharKind CK)
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx)
#define REGISTER_CHECKER(name)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType WideCharTy
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType CharTy
CanQualType IntTy
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType UnsignedCharTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
decl_range decls()
Definition Stmt.h:1659
QualType getType() const
Definition Expr.h:144
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8404
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition TypeBase.h:8372
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
unsigned getLength() const
Definition Expr.h:1909
StringRef getString() const
Definition Expr.h:1867
bool isArrayType() const
Definition TypeBase.h:8614
bool isPointerType() const
Definition TypeBase.h:8515
CanQualType getCanonicalTypeUnqualified() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition TypeBase.h:8989
bool isAnyPointerType() const
Definition TypeBase.h:8523
QualType getType() const
Definition Decl.h:723
const Expr * getInit() const
Definition Decl.h:1368
APSIntPtr getMaxValue(const llvm::APSInt &v)
std::optional< APSIntPtr > evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:584
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
ElementRegion is used to represent both array elements and casts.
Definition MemRegion.h:1227
QualType getValueType() const override
Definition MemRegion.h:1249
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:98
LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy * castAs() const
Definition MemRegion.h:1424
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Kind getKind() const
Definition MemRegion.h:203
@ TK_PreserveContents
Tells that a region's contents is not changed.
Definition MemRegion.h:1672
@ TK_SuppressEscape
Suppress pointer-escaping of a region.
Definition MemRegion.h:1675
void setTrait(SymbolRef Sym, InvalidationKinds IK)
bool hasSymbolicOffset() const
Definition MemRegion.h:83
const MemRegion * getRegion() const
It might return null.
Definition MemRegion.h:81
int64_t getOffset() const
Definition MemRegion.h:85
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
BasicValueFactory & getBasicValueFactory()
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, const LocationContext *LCtx, unsigned count)
ProgramStateManager & getStateManager()
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
loc::MemRegionVal makeLoc(SymbolRef sym)
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, ConstCFGElementRef elem, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition SVals.h:56
bool isUndef() const
Definition SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
bool isValid() const
Definition SVals.h:111
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
bool isUnknown() const
Definition SVals.h:105
LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral * getStringLiteral() const
Definition MemRegion.h:873
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition MemRegion.h:487
llvm::iterator_range< symbol_iterator > symbols() const
Definition SymExpr.h:107
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
__inline void unsigned int _2
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
llvm::DenseSet< SymbolRef > InvalidatedSymbols
Definition Store.h:51
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
Definition SymExpr.h:133
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
std::variant< struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl > Decl
All declarations that can appear in a module declaration.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
CFGBlock::ConstCFGElementRef ConstCFGElementRef
Definition CFG.h:1199
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition CharInfo.h:233
U cast(CodeGen::Address addr)
Definition Address.h:327
int const char * function
Definition c++config.h:31