clang 22.0.0git
CStringChecker.cpp
Go to the documentation of this file.
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <functional>
34#include <optional>
35
36using namespace clang;
37using namespace ento;
38using namespace std::placeholders;
39
40namespace {
41struct AnyArgExpr {
42 const Expr *Expression;
43 unsigned ArgumentIndex;
44};
45struct SourceArgExpr : AnyArgExpr {};
46struct DestinationArgExpr : AnyArgExpr {};
47struct SizeArgExpr : AnyArgExpr {};
48
49using ErrorMessage = SmallString<128>;
50enum class AccessKind { write, read };
51
52static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
53 AccessKind Access) {
54 ErrorMessage Message;
55 llvm::raw_svector_ostream Os(Message);
56
57 // Function classification like: Memory copy function
58 Os << toUppercase(FunctionDescription.front())
59 << &FunctionDescription.data()[1];
60
61 if (Access == AccessKind::write) {
62 Os << " overflows the destination buffer";
63 } else { // read access
64 Os << " accesses out-of-bound array element";
65 }
66
67 return Message;
68}
69
70enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
71
72enum class CharKind { Regular = 0, Wide };
73constexpr CharKind CK_Regular = CharKind::Regular;
74constexpr CharKind CK_Wide = CharKind::Wide;
75
76static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
77 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
78 : Ctx.WideCharTy);
79}
80
81class CStringChecker
82 : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
83 check::LiveSymbols, check::DeadSymbols,
84 check::RegionChanges> {
85 mutable const char *CurrentFunctionDescription = nullptr;
86
87public:
88 // FIXME: The bug types emitted by this checker family have confused garbage
89 // in their Description and Category fields (e.g. `categories::UnixAPI` is
90 // passed as the description in several cases and `uninitialized` is mistyped
91 // as `unitialized`). This should be cleaned up.
92 CheckerFrontendWithBugType NullArg{categories::UnixAPI};
93 CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
94 CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
95 "Improper arguments"};
96 CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
97 CheckerFrontendWithBugType UninitializedRead{
98 "Accessing unitialized/garbage values"};
99
100 StringRef getDebugTag() const override { return "MallocChecker"; }
101
102 static void *getTag() { static int tag; return &tag; }
103
104 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
105 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
106 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
107 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
108
110 checkRegionChanges(ProgramStateRef state,
111 const InvalidatedSymbols *,
112 ArrayRef<const MemRegion *> ExplicitRegions,
113 ArrayRef<const MemRegion *> Regions,
114 const LocationContext *LCtx,
115 const CallEvent *Call) const;
116
117 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
118 const CallEvent &)>;
119
120 CallDescriptionMap<FnCheck> Callbacks = {
121 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
122 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
123 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
124 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
125 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
126 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
127 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
128 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
129 {{CDM::CLibrary, {"memcmp"}, 3},
130 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
131 {{CDM::CLibrary, {"wmemcmp"}, 3},
132 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
133 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
134 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
135 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
136 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
137 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
138 &CStringChecker::evalMemset},
139 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
140 // FIXME: C23 introduces 'memset_explicit', maybe also model that
141 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
142 &CStringChecker::evalStrcpy},
143 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
144 &CStringChecker::evalStrncpy},
145 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
146 &CStringChecker::evalStpcpy},
147 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
148 &CStringChecker::evalStrlcpy},
149 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
150 &CStringChecker::evalStrcat},
151 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
152 &CStringChecker::evalStrncat},
153 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
154 &CStringChecker::evalStrlcat},
155 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
156 &CStringChecker::evalstrLength},
157 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
158 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
159 &CStringChecker::evalstrnLength},
160 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
161 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
162 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
163 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
164 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
165 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
166 {{CDM::CLibrary, {"strxfrm"}, 3}, &CStringChecker::evalStrxfrm},
167 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
168 {{CDM::CLibrary, {"bcmp"}, 3},
169 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
170 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
171 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
172 &CStringChecker::evalBzero},
173
174 // When recognizing calls to the following variadic functions, we accept
175 // any number of arguments in the call (std::nullopt = accept any
176 // number), but check that in the declaration there are 2 and 3
177 // parameters respectively. (Note that the parameter count does not
178 // include the "...". Calls where the number of arguments is too small
179 // will be discarded by the callback.)
180 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
181 &CStringChecker::evalSprintf},
182 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
183 &CStringChecker::evalSnprintf},
184 };
185
186 // These require a bit of special handling.
187 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
188 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
189
190 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
191 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
192 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
193 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
194 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
195 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
196 ProgramStateRef state, SizeArgExpr Size,
197 DestinationArgExpr Dest, SourceArgExpr Source,
198 bool Restricted, bool IsMempcpy, CharKind CK) const;
199
200 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201
202 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
203 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
204 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
205 bool IsStrnlen = false) const;
206
207 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
208 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
209 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
210 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
211 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
212 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
213 bool returnPtr = true) const;
214
215 void evalStrxfrm(CheckerContext &C, const CallEvent &Call) const;
216
217 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
218 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
220
221 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
222 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
223 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
224 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
226 bool IsBounded = false, bool IgnoreCase = false) const;
227
228 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
229
230 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
231 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
232 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
233 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
234 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
235
236 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
237 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
238 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
239 bool IsBounded) const;
240
241 // Utility methods
242 std::pair<ProgramStateRef , ProgramStateRef >
243 static assumeZero(CheckerContext &C,
244 ProgramStateRef state, SVal V, QualType Ty);
245
246 static ProgramStateRef setCStringLength(ProgramStateRef state,
247 const MemRegion *MR,
248 SVal strLength);
249 static SVal getCStringLengthForRegion(CheckerContext &C,
250 ProgramStateRef &state,
251 const Expr *Ex,
252 const MemRegion *MR,
253 bool hypothetical);
254 SVal getCStringLength(CheckerContext &C,
255 ProgramStateRef &state,
256 const Expr *Ex,
257 SVal Buf,
258 bool hypothetical = false) const;
259
260 const StringLiteral *getCStringLiteral(CheckerContext &C,
261 ProgramStateRef &state,
262 const Expr *expr,
263 SVal val) const;
264
265 /// Invalidate the destination buffer determined by characters copied.
266 static ProgramStateRef
267 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
268 const Expr *BufE, ConstCFGElementRef Elem,
269 SVal BufV, SVal SizeV, QualType SizeTy);
270
271 /// Operation never overflows, do not invalidate the super region.
272 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
273 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
274
275 /// We do not know whether the operation can overflow (e.g. size is unknown),
276 /// invalidate the super region and escape related pointers.
277 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
278 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
279
280 /// Invalidate the source buffer for escaping pointers.
281 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
284 SVal BufV);
285
286 /// @param InvalidationTraitOperations Determine how to invlidate the
287 /// MemRegion by setting the invalidation traits. Return true to cause pointer
288 /// escape, or false otherwise.
289 static ProgramStateRef invalidateBufferAux(
290 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
291 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
292 const MemRegion *)>
293 InvalidationTraitOperations);
294
295 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
296 const MemRegion *MR);
297
298 static bool memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
299 SVal CharE, const Expr *Size, CheckerContext &C,
300 ProgramStateRef &State);
301
302 // Re-usable checks
303 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
304 AnyArgExpr Arg, SVal l) const;
305 // Check whether the origin region behind \p Element (like the actual array
306 // region \p Element is from) is initialized.
307 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
308 AnyArgExpr Buffer, SVal Element, SVal Size) const;
309 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
310 AnyArgExpr Buffer, SVal Element,
311 AccessKind Access,
312 CharKind CK = CharKind::Regular) const;
313 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
314 AnyArgExpr Buffer, SizeArgExpr Size,
315 AccessKind Access,
316 CharKind CK = CharKind::Regular) const;
317 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
318 SizeArgExpr Size, AnyArgExpr First,
319 AnyArgExpr Second,
320 CharKind CK = CharKind::Regular) const;
321 void emitOverlapBug(CheckerContext &C,
322 ProgramStateRef state,
323 const Stmt *First,
324 const Stmt *Second) const;
325
326 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
327 StringRef WarningMsg) const;
328 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
329 const Stmt *S, StringRef WarningMsg) const;
330 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
331 const Stmt *S, StringRef WarningMsg) const;
332 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
333 const Expr *E, const MemRegion *R,
334 StringRef Msg) const;
335 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
336 ProgramStateRef state,
337 NonLoc left,
338 NonLoc right) const;
339
340 // Return true if the destination buffer of the copy function may be in bound.
341 // Expects SVal of Size to be positive and unsigned.
342 // Expects SVal of FirstBuf to be a FieldRegion.
343 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
344 SVal BufVal, QualType BufTy, SVal LengthVal,
345 QualType LengthTy);
346};
347
348} //end anonymous namespace
349
350REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
351
352//===----------------------------------------------------------------------===//
353// Individual checks and utility methods.
354//===----------------------------------------------------------------------===//
355
356std::pair<ProgramStateRef, ProgramStateRef>
357CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
358 QualType Ty) {
359 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
360 if (!val)
361 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
362
363 SValBuilder &svalBuilder = C.getSValBuilder();
364 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
365 return State->assume(svalBuilder.evalEQ(State, *val, zero));
366}
367
368ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
369 ProgramStateRef State,
370 AnyArgExpr Arg, SVal l) const {
371 // If a previous check has failed, propagate the failure.
372 if (!State)
373 return nullptr;
374
375 ProgramStateRef stateNull, stateNonNull;
376 std::tie(stateNull, stateNonNull) =
377 assumeZero(C, State, l, Arg.Expression->getType());
378
379 if (stateNull && !stateNonNull) {
380 if (NullArg.isEnabled()) {
381 SmallString<80> buf;
382 llvm::raw_svector_ostream OS(buf);
383 assert(CurrentFunctionDescription);
384 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
385 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
386 << CurrentFunctionDescription;
387
388 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
389 }
390 return nullptr;
391 }
392
393 // From here on, assume that the value is non-null.
394 assert(stateNonNull);
395 return stateNonNull;
396}
397
398static std::optional<NonLoc> getIndex(ProgramStateRef State,
399 const ElementRegion *ER, CharKind CK) {
401 ASTContext &Ctx = SVB.getContext();
402
403 if (CK == CharKind::Regular) {
404 if (ER->getValueType() != Ctx.CharTy)
405 return {};
406 return ER->getIndex();
407 }
408
409 if (ER->getValueType() != Ctx.WideCharTy)
410 return {};
411
412 QualType SizeTy = Ctx.getSizeType();
413 NonLoc WideSize =
415 SizeTy)
416 .castAs<NonLoc>();
417 SVal Offset =
418 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
419 if (Offset.isUnknown())
420 return {};
421 return Offset.castAs<NonLoc>();
422}
423
424// Basically 1 -> 1st, 12 -> 12th, etc.
425static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
426 Os << Idx << llvm::getOrdinalSuffix(Idx);
427}
428
429ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
430 ProgramStateRef State,
431 AnyArgExpr Buffer, SVal Element,
432 SVal Size) const {
433
434 // If a previous check has failed, propagate the failure.
435 if (!State)
436 return nullptr;
437
438 const MemRegion *R = Element.getAsRegion();
439 const auto *ER = dyn_cast_or_null<ElementRegion>(R);
440 if (!ER)
441 return State;
442
443 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
444 if (!SuperR)
445 return State;
446
447 // FIXME: We ought to able to check objects as well. Maybe
448 // UninitializedObjectChecker could help?
449 if (!SuperR->getValueType()->isArrayType())
450 return State;
451
452 SValBuilder &SVB = C.getSValBuilder();
453 ASTContext &Ctx = SVB.getContext();
454
455 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
456 const NonLoc Zero = SVB.makeZeroArrayIndex();
457
458 std::optional<Loc> FirstElementVal =
459 State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
460 if (!FirstElementVal)
461 return State;
462
463 // Ensure that we wouldn't read uninitialized value.
464 if (UninitializedRead.isEnabled() &&
465 State->getSVal(*FirstElementVal).isUndef()) {
466 llvm::SmallString<258> Buf;
467 llvm::raw_svector_ostream OS(Buf);
468 OS << "The first element of the ";
469 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
470 OS << " argument is undefined";
471 emitUninitializedReadBug(C, State, Buffer.Expression,
472 FirstElementVal->getAsRegion(), OS.str());
473 return nullptr;
474 }
475
476 // We won't check whether the entire region is fully initialized -- lets just
477 // check that the first and the last element is. So, onto checking the last
478 // element:
479 const QualType IdxTy = SVB.getArrayIndexType();
480
481 NonLoc ElemSize =
482 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
483 .castAs<NonLoc>();
484
485 // FIXME: Check that the size arg to the cstring function is divisible by
486 // size of the actual element type?
487
488 // The type of the argument to the cstring function is either char or wchar,
489 // but thats not the type of the original array (or memory region).
490 // Suppose the following:
491 // int t[5];
492 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
493 // When checking whether t is fully initialized, we see it as char array of
494 // size sizeof(int)*5. If we check the last element as a character, we read
495 // the last byte of an integer, which will be undefined. But just because
496 // that value is undefined, it doesn't mean that the element is uninitialized!
497 // For this reason, we need to retrieve the actual last element with the
498 // correct type.
499
500 // Divide the size argument to the cstring function by the actual element
501 // type. This value will be size of the array, or the index to the
502 // past-the-end element.
503 std::optional<NonLoc> Offset =
504 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
505 IdxTy)
506 .getAs<NonLoc>();
507
508 // Retrieve the index of the last element.
509 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
510 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
511
512 if (!Offset)
513 return State;
514
515 SVal LastElementVal =
516 State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
517 if (!isa<Loc>(LastElementVal))
518 return State;
519
520 if (UninitializedRead.isEnabled() &&
521 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
522 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
523 // If we can't get emit a sensible last element index, just bail out --
524 // prefer to emit nothing in favour of emitting garbage quality reports.
525 if (!IdxInt) {
526 C.addSink();
527 return nullptr;
528 }
529 llvm::SmallString<258> Buf;
530 llvm::raw_svector_ostream OS(Buf);
531 OS << "The last accessed element (at index ";
532 OS << IdxInt->getExtValue();
533 OS << ") in the ";
534 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
535 OS << " argument is undefined";
536 emitUninitializedReadBug(C, State, Buffer.Expression,
537 LastElementVal.getAsRegion(), OS.str());
538 return nullptr;
539 }
540 return State;
541}
542// FIXME: The root of this logic was copied from the old checker
543// alpha.security.ArrayBound (which is removed within this commit).
544// It should be refactored to use the different, more sophisticated bounds
545// checking logic used by the new checker ``security.ArrayBound``.
546ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
547 ProgramStateRef state,
548 AnyArgExpr Buffer, SVal Element,
549 AccessKind Access,
550 CharKind CK) const {
551
552 // If a previous check has failed, propagate the failure.
553 if (!state)
554 return nullptr;
555
556 // Check for out of bound array element access.
557 const MemRegion *R = Element.getAsRegion();
558 if (!R)
559 return state;
560
561 const auto *ER = dyn_cast<ElementRegion>(R);
562 if (!ER)
563 return state;
564
565 // Get the index of the accessed element.
566 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
567 if (!Idx)
568 return state;
569
570 // Get the size of the array.
571 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
572 DefinedOrUnknownSVal Size =
573 getDynamicExtent(state, superReg, C.getSValBuilder());
574
575 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
576 if (StOutBound && !StInBound) {
577 if (!OutOfBounds.isEnabled())
578 return nullptr;
579
580 ErrorMessage Message =
581 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
582 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
583 return nullptr;
584 }
585
586 // Array bound check succeeded. From this point forward the array bound
587 // should always succeed.
588 return StInBound;
589}
590
592CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
593 AnyArgExpr Buffer, SizeArgExpr Size,
594 AccessKind Access, CharKind CK) const {
595 // If a previous check has failed, propagate the failure.
596 if (!State)
597 return nullptr;
598
599 SValBuilder &svalBuilder = C.getSValBuilder();
600 ASTContext &Ctx = svalBuilder.getContext();
601
602 QualType SizeTy = Size.Expression->getType();
603 QualType PtrTy = getCharPtrType(Ctx, CK);
604
605 // Check that the first buffer is non-null.
606 SVal BufVal = C.getSVal(Buffer.Expression);
607 State = checkNonNull(C, State, Buffer, BufVal);
608 if (!State)
609 return nullptr;
610
611 // If out-of-bounds checking is turned off, skip the rest.
612 if (!OutOfBounds.isEnabled())
613 return State;
614
615 SVal BufStart =
616 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
617
618 // Check if the first byte of the buffer is accessible.
619 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
620
621 if (!State)
622 return nullptr;
623
624 // Get the access length and make sure it is known.
625 // FIXME: This assumes the caller has already checked that the access length
626 // is positive. And that it's unsigned.
627 SVal LengthVal = C.getSVal(Size.Expression);
628 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
629 if (!Length)
630 return State;
631
632 // Compute the offset of the last element to be accessed: size-1.
633 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
634 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
635 if (Offset.isUnknown())
636 return nullptr;
637 NonLoc LastOffset = Offset.castAs<NonLoc>();
638
639 // Check that the first buffer is sufficiently long.
640 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
641
642 SVal BufEnd =
643 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
644 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
645 if (Access == AccessKind::read)
646 State = checkInit(C, State, Buffer, BufEnd, *Length);
647
648 // If the buffer isn't large enough, abort.
649 if (!State)
650 return nullptr;
651 }
652
653 // Large enough or not, return this state!
654 return State;
655}
656
657ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
658 ProgramStateRef state,
659 SizeArgExpr Size, AnyArgExpr First,
660 AnyArgExpr Second,
661 CharKind CK) const {
662 if (!BufferOverlap.isEnabled())
663 return state;
664
665 // Do a simple check for overlap: if the two arguments are from the same
666 // buffer, see if the end of the first is greater than the start of the second
667 // or vice versa.
668
669 // If a previous check has failed, propagate the failure.
670 if (!state)
671 return nullptr;
672
673 ProgramStateRef stateTrue, stateFalse;
674
675 if (!First.Expression->getType()->isAnyPointerType() ||
676 !Second.Expression->getType()->isAnyPointerType())
677 return state;
678
679 // Assume different address spaces cannot overlap.
680 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
681 Second.Expression->getType()->getPointeeType().getAddressSpace())
682 return state;
683
684 // Get the buffer values and make sure they're known locations.
685 const LocationContext *LCtx = C.getLocationContext();
686 SVal firstVal = state->getSVal(First.Expression, LCtx);
687 SVal secondVal = state->getSVal(Second.Expression, LCtx);
688
689 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
690 if (!firstLoc)
691 return state;
692
693 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
694 if (!secondLoc)
695 return state;
696
697 // Are the two values the same?
698 SValBuilder &svalBuilder = C.getSValBuilder();
699 std::tie(stateTrue, stateFalse) =
700 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
701
702 if (stateTrue && !stateFalse) {
703 // If the values are known to be equal, that's automatically an overlap.
704 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
705 return nullptr;
706 }
707
708 // assume the two expressions are not equal.
709 assert(stateFalse);
710 state = stateFalse;
711
712 // Which value comes first?
713 QualType cmpTy = svalBuilder.getConditionType();
714 SVal reverse =
715 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
716 std::optional<DefinedOrUnknownSVal> reverseTest =
717 reverse.getAs<DefinedOrUnknownSVal>();
718 if (!reverseTest)
719 return state;
720
721 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
722 if (stateTrue) {
723 if (stateFalse) {
724 // If we don't know which one comes first, we can't perform this test.
725 return state;
726 } else {
727 // Switch the values so that firstVal is before secondVal.
728 std::swap(firstLoc, secondLoc);
729
730 // Switch the Exprs as well, so that they still correspond.
731 std::swap(First, Second);
732 }
733 }
734
735 // Get the length, and make sure it too is known.
736 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
737 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
738 if (!Length)
739 return state;
740
741 // Convert the first buffer's start address to char*.
742 // Bail out if the cast fails.
743 ASTContext &Ctx = svalBuilder.getContext();
744 QualType CharPtrTy = getCharPtrType(Ctx, CK);
745 SVal FirstStart =
746 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
747 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
748 if (!FirstStartLoc)
749 return state;
750
751 // Compute the end of the first buffer. Bail out if THAT fails.
752 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
753 *Length, CharPtrTy);
754 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
755 if (!FirstEndLoc)
756 return state;
757
758 // Is the end of the first buffer past the start of the second buffer?
759 SVal Overlap =
760 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
761 std::optional<DefinedOrUnknownSVal> OverlapTest =
762 Overlap.getAs<DefinedOrUnknownSVal>();
763 if (!OverlapTest)
764 return state;
765
766 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
767
768 if (stateTrue && !stateFalse) {
769 // Overlap!
770 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
771 return nullptr;
772 }
773
774 // assume the two expressions don't overlap.
775 assert(stateFalse);
776 return stateFalse;
777}
778
779void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
780 const Stmt *First, const Stmt *Second) const {
781 ExplodedNode *N = C.generateErrorNode(state);
782 if (!N)
783 return;
784
785 // Generate a report for this bug.
786 auto report = std::make_unique<PathSensitiveBugReport>(
787 BufferOverlap, "Arguments must not be overlapping buffers", N);
788 report->addRange(First->getSourceRange());
789 report->addRange(Second->getSourceRange());
790
791 C.emitReport(std::move(report));
792}
793
794void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
795 const Stmt *S, StringRef WarningMsg) const {
796 if (ExplodedNode *N = C.generateErrorNode(State)) {
797 auto Report =
798 std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
799 Report->addRange(S->getSourceRange());
800 if (const auto *Ex = dyn_cast<Expr>(S))
802 C.emitReport(std::move(Report));
803 }
804}
805
806void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
807 ProgramStateRef State,
808 const Expr *E, const MemRegion *R,
809 StringRef Msg) const {
810 if (ExplodedNode *N = C.generateErrorNode(State)) {
811 auto Report =
812 std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
813 Report->addNote("Other elements might also be undefined",
814 Report->getLocation());
815 Report->addRange(E->getSourceRange());
817 Report->addVisitor<NoStoreFuncVisitor>(R->castAs<SubRegion>());
818 C.emitReport(std::move(Report));
819 }
820}
821
822void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
823 ProgramStateRef State, const Stmt *S,
824 StringRef WarningMsg) const {
825 if (ExplodedNode *N = C.generateErrorNode(State)) {
826 // FIXME: It would be nice to eventually make this diagnostic more clear,
827 // e.g., by referencing the original declaration or by saying *why* this
828 // reference is outside the range.
829 auto Report =
830 std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
831 Report->addRange(S->getSourceRange());
832 C.emitReport(std::move(Report));
833 }
834}
835
836void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
837 const Stmt *S,
838 StringRef WarningMsg) const {
839 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
840 auto Report =
841 std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
842
843 Report->addRange(S->getSourceRange());
844 C.emitReport(std::move(Report));
845 }
846}
847
848ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
849 ProgramStateRef state,
850 NonLoc left,
851 NonLoc right) const {
852 // If out-of-bounds checking is turned off, skip the rest.
853 if (!OutOfBounds.isEnabled())
854 return state;
855
856 // If a previous check has failed, propagate the failure.
857 if (!state)
858 return nullptr;
859
860 SValBuilder &svalBuilder = C.getSValBuilder();
861 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
862
863 QualType sizeTy = svalBuilder.getContext().getSizeType();
864 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
865 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
866
867 SVal maxMinusRight;
868 if (isa<nonloc::ConcreteInt>(right)) {
869 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
870 sizeTy);
871 } else {
872 // Try switching the operands. (The order of these two assignments is
873 // important!)
874 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
875 sizeTy);
876 left = right;
877 }
878
879 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
880 QualType cmpTy = svalBuilder.getConditionType();
881 // If left > max - right, we have an overflow.
882 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
883 *maxMinusRightNL, cmpTy);
884
885 auto [StateOverflow, StateOkay] =
886 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
887
888 if (StateOverflow && !StateOkay) {
889 // On this path the analyzer is convinced that the addition of these two
890 // values would overflow `size_t` which must be caused by the inaccuracy
891 // of our modeling because this method is called in situations where the
892 // summands are size/length values which are much less than SIZE_MAX. To
893 // avoid false positives let's just sink this invalid path.
894 C.addSink(StateOverflow);
895 return nullptr;
896 }
897
898 // From now on, assume an overflow didn't occur.
899 assert(StateOkay);
900 state = StateOkay;
901 }
902
903 return state;
904}
905
906ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
907 const MemRegion *MR,
908 SVal strLength) {
909 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
910
911 MR = MR->StripCasts();
912
913 switch (MR->getKind()) {
914 case MemRegion::StringRegionKind:
915 // FIXME: This can happen if we strcpy() into a string region. This is
916 // undefined [C99 6.4.5p6], but we should still warn about it.
917 return state;
918
919 case MemRegion::SymbolicRegionKind:
920 case MemRegion::AllocaRegionKind:
921 case MemRegion::NonParamVarRegionKind:
922 case MemRegion::ParamVarRegionKind:
923 case MemRegion::FieldRegionKind:
924 case MemRegion::ObjCIvarRegionKind:
925 // These are the types we can currently track string lengths for.
926 break;
927
928 case MemRegion::ElementRegionKind:
929 // FIXME: Handle element regions by upper-bounding the parent region's
930 // string length.
931 return state;
932
933 default:
934 // Other regions (mostly non-data) can't have a reliable C string length.
935 // For now, just ignore the change.
936 // FIXME: These are rare but not impossible. We should output some kind of
937 // warning for things like strcpy((char[]){'a', 0}, "b");
938 return state;
939 }
940
941 if (strLength.isUnknown())
942 return state->remove<CStringLength>(MR);
943
944 return state->set<CStringLength>(MR, strLength);
945}
946
947SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
948 ProgramStateRef &state,
949 const Expr *Ex,
950 const MemRegion *MR,
951 bool hypothetical) {
952 if (!hypothetical) {
953 // If there's a recorded length, go ahead and return it.
954 const SVal *Recorded = state->get<CStringLength>(MR);
955 if (Recorded)
956 return *Recorded;
957 }
958
959 // Otherwise, get a new symbol and update the state.
960 SValBuilder &svalBuilder = C.getSValBuilder();
961 QualType sizeTy = svalBuilder.getContext().getSizeType();
962 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
963 MR, Ex, sizeTy,
964 C.getLocationContext(),
965 C.blockCount());
966
967 if (!hypothetical) {
968 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
969 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
970 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
971 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
972 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
973 std::optional<APSIntPtr> maxLengthInt =
974 BVF.evalAPSInt(BO_Div, maxValInt, fourInt);
975 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
976 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
977 svalBuilder.getConditionType());
978 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
979 }
980 state = state->set<CStringLength>(MR, strLength);
981 }
982
983 return strLength;
984}
985
986SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
987 const Expr *Ex, SVal Buf,
988 bool hypothetical) const {
989 const MemRegion *MR = Buf.getAsRegion();
990 if (!MR) {
991 // If we can't get a region, see if it's something we /know/ isn't a
992 // C string. In the context of locations, the only time we can issue such
993 // a warning is for labels.
994 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
995 if (NotNullTerm.isEnabled()) {
996 SmallString<120> buf;
997 llvm::raw_svector_ostream os(buf);
998 assert(CurrentFunctionDescription);
999 os << "Argument to " << CurrentFunctionDescription
1000 << " is the address of the label '" << Label->getLabel()->getName()
1001 << "', which is not a null-terminated string";
1002
1003 emitNotCStringBug(C, state, Ex, os.str());
1004 }
1005 return UndefinedVal();
1006 }
1007
1008 // If it's not a region and not a label, give up.
1009 return UnknownVal();
1010 }
1011
1012 // If we have a region, strip casts from it and see if we can figure out
1013 // its length. For anything we can't figure out, just return UnknownVal.
1014 MR = MR->StripCasts();
1015
1016 switch (MR->getKind()) {
1017 case MemRegion::StringRegionKind: {
1018 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1019 // so we can assume that the byte length is the correct C string length.
1020 SValBuilder &svalBuilder = C.getSValBuilder();
1021 QualType sizeTy = svalBuilder.getContext().getSizeType();
1022 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
1023 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
1024 }
1025 case MemRegion::NonParamVarRegionKind: {
1026 // If we have a global constant with a string literal initializer,
1027 // compute the initializer's length.
1028 const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1029 if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1030 if (const Expr *Init = Decl->getInit()) {
1031 if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
1032 SValBuilder &SvalBuilder = C.getSValBuilder();
1033 QualType SizeTy = SvalBuilder.getContext().getSizeType();
1034 return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
1035 }
1036 }
1037 }
1038 [[fallthrough]];
1039 }
1040 case MemRegion::SymbolicRegionKind:
1041 case MemRegion::AllocaRegionKind:
1042 case MemRegion::ParamVarRegionKind:
1043 case MemRegion::FieldRegionKind:
1044 case MemRegion::ObjCIvarRegionKind:
1045 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1046 case MemRegion::CompoundLiteralRegionKind:
1047 // FIXME: Can we track this? Is it necessary?
1048 return UnknownVal();
1049 case MemRegion::ElementRegionKind:
1050 // FIXME: How can we handle this? It's not good enough to subtract the
1051 // offset from the base string length; consider "123\x00567" and &a[5].
1052 return UnknownVal();
1053 default:
1054 // Other regions (mostly non-data) can't have a reliable C string length.
1055 // In this case, an error is emitted and UndefinedVal is returned.
1056 // The caller should always be prepared to handle this case.
1057 if (NotNullTerm.isEnabled()) {
1058 SmallString<120> buf;
1059 llvm::raw_svector_ostream os(buf);
1060
1061 assert(CurrentFunctionDescription);
1062 os << "Argument to " << CurrentFunctionDescription << " is ";
1063
1064 if (SummarizeRegion(os, C.getASTContext(), MR))
1065 os << ", which is not a null-terminated string";
1066 else
1067 os << "not a null-terminated string";
1068
1069 emitNotCStringBug(C, state, Ex, os.str());
1070 }
1071 return UndefinedVal();
1072 }
1073}
1074
1075const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1076 ProgramStateRef &state, const Expr *expr, SVal val) const {
1077
1078 // Get the memory region pointed to by the val.
1079 const MemRegion *bufRegion = val.getAsRegion();
1080 if (!bufRegion)
1081 return nullptr;
1082
1083 // Strip casts off the memory region.
1084 bufRegion = bufRegion->StripCasts();
1085
1086 // Cast the memory region to a string region.
1087 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1088 if (!strRegion)
1089 return nullptr;
1090
1091 // Return the actual string in the string region.
1092 return strRegion->getStringLiteral();
1093}
1094
1095bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1096 SVal BufVal, QualType BufTy,
1097 SVal LengthVal, QualType LengthTy) {
1098 // If we do not know that the buffer is long enough we return 'true'.
1099 // Otherwise the parent region of this field region would also get
1100 // invalidated, which would lead to warnings based on an unknown state.
1101
1102 if (LengthVal.isUnknown())
1103 return false;
1104
1105 // Originally copied from CheckBufferAccess and CheckLocation.
1106 SValBuilder &SB = C.getSValBuilder();
1107 ASTContext &Ctx = C.getASTContext();
1108
1109 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1110
1111 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1112 if (!Length)
1113 return true; // cf top comment.
1114
1115 // Compute the offset of the last element to be accessed: size-1.
1116 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1117 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1118 if (Offset.isUnknown())
1119 return true; // cf top comment
1120 NonLoc LastOffset = Offset.castAs<NonLoc>();
1121
1122 // Check that the first buffer is sufficiently long.
1123 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1124 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1125 if (!BufLoc)
1126 return true; // cf top comment.
1127
1128 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1129
1130 // Check for out of bound array element access.
1131 const MemRegion *R = BufEnd.getAsRegion();
1132 if (!R)
1133 return true; // cf top comment.
1134
1135 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1136 if (!ER)
1137 return true; // cf top comment.
1138
1139 // Support library functions defined with non-default address spaces
1140 assert(ER->getValueType()->getCanonicalTypeUnqualified() ==
1141 C.getASTContext().CharTy &&
1142 "isFirstBufInBound should only be called with char* ElementRegions");
1143
1144 // Get the size of the array.
1145 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1146 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1147
1148 // Get the index of the accessed element.
1149 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1150
1151 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1152
1153 return static_cast<bool>(StInBound);
1154}
1155
1156ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1157 CheckerContext &C, ProgramStateRef S, const Expr *BufE,
1158 ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) {
1159 auto InvalidationTraitOperations =
1160 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1161 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1162 // If destination buffer is a field region and access is in bound, do
1163 // not invalidate its super region.
1164 if (MemRegion::FieldRegionKind == R->getKind() &&
1165 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1166 ITraits.setTrait(
1167 R,
1169 }
1170 return false;
1171 };
1172
1173 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1174}
1175
1177CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1178 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1179 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1180 const MemRegion *R) {
1181 return isa<FieldRegion>(R);
1182 };
1183
1184 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1185}
1186
1187ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1188 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1189 auto InvalidationTraitOperations =
1190 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1191 if (MemRegion::FieldRegionKind == R->getKind())
1192 ITraits.setTrait(
1193 R,
1195 return false;
1196 };
1197
1198 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1199}
1200
1201ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1203 ConstCFGElementRef Elem,
1204 SVal BufV) {
1205 auto InvalidationTraitOperations =
1206 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1207 ITraits.setTrait(
1208 R->getBaseRegion(),
1210 ITraits.setTrait(R,
1212 return true;
1213 };
1214
1215 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1216}
1217
1218ProgramStateRef CStringChecker::invalidateBufferAux(
1219 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
1220 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1221 const MemRegion *)>
1222 InvalidationTraitOperations) {
1223 std::optional<Loc> L = V.getAs<Loc>();
1224 if (!L)
1225 return State;
1226
1227 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1228 // some assumptions about the value that CFRefCount can't. Even so, it should
1229 // probably be refactored.
1230 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1231 const MemRegion *R = MR->getRegion()->StripCasts();
1232
1233 // Are we dealing with an ElementRegion? If so, we should be invalidating
1234 // the super-region.
1235 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1236 R = ER->getSuperRegion();
1237 // FIXME: What about layers of ElementRegions?
1238 }
1239
1240 // Invalidate this region.
1241 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1242 RegionAndSymbolInvalidationTraits ITraits;
1243 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1244
1245 return State->invalidateRegions(R, Elem, C.blockCount(), LCtx,
1246 CausesPointerEscape, nullptr, nullptr,
1247 &ITraits);
1248 }
1249
1250 // If we have a non-region value by chance, just remove the binding.
1251 // FIXME: is this necessary or correct? This handles the non-Region
1252 // cases. Is it ever valid to store to these?
1253 return State->killBinding(*L);
1254}
1255
1256bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1257 const MemRegion *MR) {
1258 switch (MR->getKind()) {
1259 case MemRegion::FunctionCodeRegionKind: {
1260 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1261 os << "the address of the function '" << *FD << '\'';
1262 else
1263 os << "the address of a function";
1264 return true;
1265 }
1266 case MemRegion::BlockCodeRegionKind:
1267 os << "block text";
1268 return true;
1269 case MemRegion::BlockDataRegionKind:
1270 os << "a block";
1271 return true;
1272 case MemRegion::CXXThisRegionKind:
1273 case MemRegion::CXXTempObjectRegionKind:
1274 os << "a C++ temp object of type "
1275 << cast<TypedValueRegion>(MR)->getValueType();
1276 return true;
1277 case MemRegion::NonParamVarRegionKind:
1278 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1279 return true;
1280 case MemRegion::ParamVarRegionKind:
1281 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1282 return true;
1283 case MemRegion::FieldRegionKind:
1284 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1285 return true;
1286 case MemRegion::ObjCIvarRegionKind:
1287 os << "an instance variable of type "
1288 << cast<TypedValueRegion>(MR)->getValueType();
1289 return true;
1290 default:
1291 return false;
1292 }
1293}
1294
1295bool CStringChecker::memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
1296 SVal CharVal, const Expr *Size,
1297 CheckerContext &C, ProgramStateRef &State) {
1298 SVal MemVal = C.getSVal(DstBuffer);
1299 SVal SizeVal = C.getSVal(Size);
1300 const MemRegion *MR = MemVal.getAsRegion();
1301 if (!MR)
1302 return false;
1303
1304 // We're about to model memset by producing a "default binding" in the Store.
1305 // Our current implementation - RegionStore - doesn't support default bindings
1306 // that don't cover the whole base region. So we should first get the offset
1307 // and the base region to figure out whether the offset of buffer is 0.
1308 RegionOffset Offset = MR->getAsOffset();
1309 const MemRegion *BR = Offset.getRegion();
1310
1311 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1312 if (!SizeNL)
1313 return false;
1314
1315 SValBuilder &svalBuilder = C.getSValBuilder();
1316 ASTContext &Ctx = C.getASTContext();
1317
1318 // void *memset(void *dest, int ch, size_t count);
1319 // For now we can only handle the case of offset is 0 and concrete char value.
1320 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1321 Offset.getOffset() == 0) {
1322 // Get the base region's size.
1323 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1324
1325 ProgramStateRef StateWholeReg, StateNotWholeReg;
1326 std::tie(StateWholeReg, StateNotWholeReg) =
1327 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1328
1329 // With the semantic of 'memset()', we should convert the CharVal to
1330 // unsigned char.
1331 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1332
1333 ProgramStateRef StateNullChar, StateNonNullChar;
1334 std::tie(StateNullChar, StateNonNullChar) =
1335 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1336
1337 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1338 !StateNonNullChar) {
1339 // If the 'memset()' acts on the whole region of destination buffer and
1340 // the value of the second argument of 'memset()' is zero, bind the second
1341 // argument's value to the destination buffer with 'default binding'.
1342 // FIXME: Since there is no perfect way to bind the non-zero character, we
1343 // can only deal with zero value here. In the future, we need to deal with
1344 // the binding of non-zero value in the case of whole region.
1345 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1346 C.getLocationContext());
1347 } else {
1348 // If the destination buffer's extent is not equal to the value of
1349 // third argument, just invalidate buffer.
1350 State = invalidateDestinationBufferBySize(
1351 C, State, DstBuffer, Elem, MemVal, SizeVal, Size->getType());
1352 }
1353
1354 if (StateNullChar && !StateNonNullChar) {
1355 // If the value of the second argument of 'memset()' is zero, set the
1356 // string length of destination buffer to 0 directly.
1357 State = setCStringLength(State, MR,
1358 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1359 } else if (!StateNullChar && StateNonNullChar) {
1360 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1361 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1362 C.getLocationContext(), C.blockCount());
1363
1364 // If the value of second argument is not zero, then the string length
1365 // is at least the size argument.
1366 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1367 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1368
1369 State = setCStringLength(
1370 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1371 MR, NewStrLen);
1372 }
1373 } else {
1374 // If the offset is not zero and char value is not concrete, we can do
1375 // nothing but invalidate the buffer.
1376 State = invalidateDestinationBufferBySize(C, State, DstBuffer, Elem, MemVal,
1377 SizeVal, Size->getType());
1378 }
1379 return true;
1380}
1381
1382//===----------------------------------------------------------------------===//
1383// evaluation of individual function calls.
1384//===----------------------------------------------------------------------===//
1385
1386void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1387 ProgramStateRef state, SizeArgExpr Size,
1388 DestinationArgExpr Dest,
1389 SourceArgExpr Source, bool Restricted,
1390 bool IsMempcpy, CharKind CK) const {
1391 CurrentFunctionDescription = "memory copy function";
1392
1393 // See if the size argument is zero.
1394 const LocationContext *LCtx = C.getLocationContext();
1395 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1396 QualType sizeTy = Size.Expression->getType();
1397
1398 ProgramStateRef stateZeroSize, stateNonZeroSize;
1399 std::tie(stateZeroSize, stateNonZeroSize) =
1400 assumeZero(C, state, sizeVal, sizeTy);
1401
1402 // Get the value of the Dest.
1403 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1404
1405 // If the size is zero, there won't be any actual memory access, so
1406 // just bind the return value to the destination buffer and return.
1407 if (stateZeroSize && !stateNonZeroSize) {
1408 stateZeroSize =
1409 stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1410 C.addTransition(stateZeroSize);
1411 return;
1412 }
1413
1414 // If the size can be nonzero, we have to check the other arguments.
1415 if (stateNonZeroSize) {
1416 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1417 // to the size of the destination buffer, then emit a warning
1418 // that an attacker may provoke a buffer overflow error.
1419 state = stateNonZeroSize;
1420
1421 // Ensure the destination is not null. If it is NULL there will be a
1422 // NULL pointer dereference.
1423 state = checkNonNull(C, state, Dest, destVal);
1424 if (!state)
1425 return;
1426
1427 // Get the value of the Src.
1428 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1429
1430 // Ensure the source is not null. If it is NULL there will be a
1431 // NULL pointer dereference.
1432 state = checkNonNull(C, state, Source, srcVal);
1433 if (!state)
1434 return;
1435
1436 // Ensure the accesses are valid and that the buffers do not overlap.
1437 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1438 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1439
1440 if (Restricted)
1441 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1442
1443 if (!state)
1444 return;
1445
1446 // If this is mempcpy, get the byte after the last byte copied and
1447 // bind the expr.
1448 if (IsMempcpy) {
1449 // Get the byte after the last byte copied.
1450 SValBuilder &SvalBuilder = C.getSValBuilder();
1451 ASTContext &Ctx = SvalBuilder.getContext();
1452 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1453 SVal DestRegCharVal =
1454 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1455 SVal lastElement = C.getSValBuilder().evalBinOp(
1456 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1457 // If we don't know how much we copied, we can at least
1458 // conjure a return value for later.
1459 if (lastElement.isUnknown())
1460 lastElement = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1461
1462 // The byte after the last byte copied is the return value.
1463 state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1464 } else {
1465 // All other copies return the destination buffer.
1466 // (Well, bcopy() has a void return type, but this won't hurt.)
1467 state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1468 }
1469
1470 // Invalidate the destination (regular invalidation without pointer-escaping
1471 // the address of the top-level region).
1472 // FIXME: Even if we can't perfectly model the copy, we should see if we
1473 // can use LazyCompoundVals to copy the source values into the destination.
1474 // This would probably remove any existing bindings past the end of the
1475 // copied region, but that's still an improvement over blank invalidation.
1476 state = invalidateDestinationBufferBySize(
1477 C, state, Dest.Expression, Call.getCFGElementRef(),
1478 C.getSVal(Dest.Expression), sizeVal, Size.Expression->getType());
1479
1480 // Invalidate the source (const-invalidation without const-pointer-escaping
1481 // the address of the top-level region).
1482 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(),
1483 C.getSVal(Source.Expression));
1484
1485 C.addTransition(state);
1486 }
1487}
1488
1489void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1490 CharKind CK) const {
1491 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1492 // The return value is the address of the destination buffer.
1493 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1494 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1495 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1496
1497 ProgramStateRef State = C.getState();
1498
1499 constexpr bool IsRestricted = true;
1500 constexpr bool IsMempcpy = false;
1501 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1502}
1503
1504void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1505 CharKind CK) const {
1506 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1507 // The return value is a pointer to the byte following the last written byte.
1508 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1509 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1510 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1511
1512 constexpr bool IsRestricted = true;
1513 constexpr bool IsMempcpy = true;
1514 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1515 IsMempcpy, CK);
1516}
1517
1518void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1519 CharKind CK) const {
1520 // void *memmove(void *dst, const void *src, size_t n);
1521 // The return value is the address of the destination buffer.
1522 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1523 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1524 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1525
1526 constexpr bool IsRestricted = false;
1527 constexpr bool IsMempcpy = false;
1528 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1529 IsMempcpy, CK);
1530}
1531
1532void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1533 // void bcopy(const void *src, void *dst, size_t n);
1534 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1535 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1536 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1537
1538 constexpr bool IsRestricted = false;
1539 constexpr bool IsMempcpy = false;
1540 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1541 IsMempcpy, CharKind::Regular);
1542}
1543
1544void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1545 CharKind CK) const {
1546 // int memcmp(const void *s1, const void *s2, size_t n);
1547 CurrentFunctionDescription = "memory comparison function";
1548
1549 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1550 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1551 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1552
1553 ProgramStateRef State = C.getState();
1554 SValBuilder &Builder = C.getSValBuilder();
1555 const LocationContext *LCtx = C.getLocationContext();
1556
1557 // See if the size argument is zero.
1558 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1559 QualType sizeTy = Size.Expression->getType();
1560
1561 ProgramStateRef stateZeroSize, stateNonZeroSize;
1562 std::tie(stateZeroSize, stateNonZeroSize) =
1563 assumeZero(C, State, sizeVal, sizeTy);
1564
1565 // If the size can be zero, the result will be 0 in that case, and we don't
1566 // have to check either of the buffers.
1567 if (stateZeroSize) {
1568 State = stateZeroSize;
1569 State = State->BindExpr(Call.getOriginExpr(), LCtx,
1570 Builder.makeZeroVal(Call.getResultType()));
1571 C.addTransition(State);
1572 }
1573
1574 // If the size can be nonzero, we have to check the other arguments.
1575 if (stateNonZeroSize) {
1576 State = stateNonZeroSize;
1577 // If we know the two buffers are the same, we know the result is 0.
1578 // First, get the two buffers' addresses. Another checker will have already
1579 // made sure they're not undefined.
1580 DefinedOrUnknownSVal LV =
1581 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1582 DefinedOrUnknownSVal RV =
1583 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1584
1585 // See if they are the same.
1586 ProgramStateRef SameBuffer, NotSameBuffer;
1587 std::tie(SameBuffer, NotSameBuffer) =
1588 State->assume(Builder.evalEQ(State, LV, RV));
1589
1590 // If the two arguments are the same buffer, we know the result is 0,
1591 // and we only need to check one size.
1592 if (SameBuffer && !NotSameBuffer) {
1593 State = SameBuffer;
1594 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1595 if (State) {
1596 State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1597 Builder.makeZeroVal(Call.getResultType()));
1598 C.addTransition(State);
1599 }
1600 return;
1601 }
1602
1603 // If the two arguments might be different buffers, we have to check
1604 // the size of both of them.
1605 assert(NotSameBuffer);
1606 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1607 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1608 if (State) {
1609 // The return value is the comparison result, which we don't know.
1610 SVal CmpV = Builder.conjureSymbolVal(Call, C.blockCount());
1611 State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1612 C.addTransition(State);
1613 }
1614 }
1615}
1616
1617void CStringChecker::evalstrLength(CheckerContext &C,
1618 const CallEvent &Call) const {
1619 // size_t strlen(const char *s);
1620 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1621}
1622
1623void CStringChecker::evalstrnLength(CheckerContext &C,
1624 const CallEvent &Call) const {
1625 // size_t strnlen(const char *s, size_t maxlen);
1626 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1627}
1628
1629void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1630 const CallEvent &Call,
1631 bool IsStrnlen) const {
1632 CurrentFunctionDescription = "string length function";
1633 ProgramStateRef state = C.getState();
1634 const LocationContext *LCtx = C.getLocationContext();
1635
1636 if (IsStrnlen) {
1637 const Expr *maxlenExpr = Call.getArgExpr(1);
1638 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1639
1640 ProgramStateRef stateZeroSize, stateNonZeroSize;
1641 std::tie(stateZeroSize, stateNonZeroSize) =
1642 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1643
1644 // If the size can be zero, the result will be 0 in that case, and we don't
1645 // have to check the string itself.
1646 if (stateZeroSize) {
1647 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1648 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1649 C.addTransition(stateZeroSize);
1650 }
1651
1652 // If the size is GUARANTEED to be zero, we're done!
1653 if (!stateNonZeroSize)
1654 return;
1655
1656 // Otherwise, record the assumption that the size is nonzero.
1657 state = stateNonZeroSize;
1658 }
1659
1660 // Check that the string argument is non-null.
1661 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1662 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1663 state = checkNonNull(C, state, Arg, ArgVal);
1664
1665 if (!state)
1666 return;
1667
1668 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1669
1670 // If the argument isn't a valid C string, there's no valid state to
1671 // transition to.
1672 if (strLength.isUndef())
1673 return;
1674
1675 DefinedOrUnknownSVal result = UnknownVal();
1676
1677 // If the check is for strnlen() then bind the return value to no more than
1678 // the maxlen value.
1679 if (IsStrnlen) {
1680 QualType cmpTy = C.getSValBuilder().getConditionType();
1681
1682 // It's a little unfortunate to be getting this again,
1683 // but it's not that expensive...
1684 const Expr *maxlenExpr = Call.getArgExpr(1);
1685 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1686
1687 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1688 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1689
1690 if (strLengthNL && maxlenValNL) {
1691 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1692
1693 // Check if the strLength is greater than the maxlen.
1694 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1695 C.getSValBuilder()
1696 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1697 .castAs<DefinedOrUnknownSVal>());
1698
1699 if (stateStringTooLong && !stateStringNotTooLong) {
1700 // If the string is longer than maxlen, return maxlen.
1701 result = *maxlenValNL;
1702 } else if (stateStringNotTooLong && !stateStringTooLong) {
1703 // If the string is shorter than maxlen, return its length.
1704 result = *strLengthNL;
1705 }
1706 }
1707
1708 if (result.isUnknown()) {
1709 // If we don't have enough information for a comparison, there's
1710 // no guarantee the full string length will actually be returned.
1711 // All we know is the return value is the min of the string length
1712 // and the limit. This is better than nothing.
1713 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1714 NonLoc resultNL = result.castAs<NonLoc>();
1715
1716 if (strLengthNL) {
1717 state = state->assume(C.getSValBuilder().evalBinOpNN(
1718 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1719 .castAs<DefinedOrUnknownSVal>(), true);
1720 }
1721
1722 if (maxlenValNL) {
1723 state = state->assume(C.getSValBuilder().evalBinOpNN(
1724 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1725 .castAs<DefinedOrUnknownSVal>(), true);
1726 }
1727 }
1728
1729 } else {
1730 // This is a plain strlen(), not strnlen().
1731 result = strLength.castAs<DefinedOrUnknownSVal>();
1732
1733 // If we don't know the length of the string, conjure a return
1734 // value, so it can be used in constraints, at least.
1735 if (result.isUnknown()) {
1736 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1737 }
1738 }
1739
1740 // Bind the return value.
1741 assert(!result.isUnknown() && "Should have conjured a value by now");
1742 state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1743 C.addTransition(state);
1744}
1745
1746void CStringChecker::evalStrcpy(CheckerContext &C,
1747 const CallEvent &Call) const {
1748 // char *strcpy(char *restrict dst, const char *restrict src);
1749 evalStrcpyCommon(C, Call,
1750 /* ReturnEnd = */ false,
1751 /* IsBounded = */ false,
1752 /* appendK = */ ConcatFnKind::none);
1753}
1754
1755void CStringChecker::evalStrncpy(CheckerContext &C,
1756 const CallEvent &Call) const {
1757 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1758 evalStrcpyCommon(C, Call,
1759 /* ReturnEnd = */ false,
1760 /* IsBounded = */ true,
1761 /* appendK = */ ConcatFnKind::none);
1762}
1763
1764void CStringChecker::evalStpcpy(CheckerContext &C,
1765 const CallEvent &Call) const {
1766 // char *stpcpy(char *restrict dst, const char *restrict src);
1767 evalStrcpyCommon(C, Call,
1768 /* ReturnEnd = */ true,
1769 /* IsBounded = */ false,
1770 /* appendK = */ ConcatFnKind::none);
1771}
1772
1773void CStringChecker::evalStrlcpy(CheckerContext &C,
1774 const CallEvent &Call) const {
1775 // size_t strlcpy(char *dest, const char *src, size_t size);
1776 evalStrcpyCommon(C, Call,
1777 /* ReturnEnd = */ true,
1778 /* IsBounded = */ true,
1779 /* appendK = */ ConcatFnKind::none,
1780 /* returnPtr = */ false);
1781}
1782
1783void CStringChecker::evalStrcat(CheckerContext &C,
1784 const CallEvent &Call) const {
1785 // char *strcat(char *restrict s1, const char *restrict s2);
1786 evalStrcpyCommon(C, Call,
1787 /* ReturnEnd = */ false,
1788 /* IsBounded = */ false,
1789 /* appendK = */ ConcatFnKind::strcat);
1790}
1791
1792void CStringChecker::evalStrncat(CheckerContext &C,
1793 const CallEvent &Call) const {
1794 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1795 evalStrcpyCommon(C, Call,
1796 /* ReturnEnd = */ false,
1797 /* IsBounded = */ true,
1798 /* appendK = */ ConcatFnKind::strcat);
1799}
1800
1801void CStringChecker::evalStrlcat(CheckerContext &C,
1802 const CallEvent &Call) const {
1803 // size_t strlcat(char *dst, const char *src, size_t size);
1804 // It will append at most size - strlen(dst) - 1 bytes,
1805 // NULL-terminating the result.
1806 evalStrcpyCommon(C, Call,
1807 /* ReturnEnd = */ false,
1808 /* IsBounded = */ true,
1809 /* appendK = */ ConcatFnKind::strlcat,
1810 /* returnPtr = */ false);
1811}
1812
1813void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1814 bool ReturnEnd, bool IsBounded,
1815 ConcatFnKind appendK,
1816 bool returnPtr) const {
1817 if (appendK == ConcatFnKind::none)
1818 CurrentFunctionDescription = "string copy function";
1819 else
1820 CurrentFunctionDescription = "string concatenation function";
1821
1822 ProgramStateRef state = C.getState();
1823 const LocationContext *LCtx = C.getLocationContext();
1824
1825 // Check that the destination is non-null.
1826 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1827 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1828 state = checkNonNull(C, state, Dst, DstVal);
1829 if (!state)
1830 return;
1831
1832 // Check that the source is non-null.
1833 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1834 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1835 state = checkNonNull(C, state, srcExpr, srcVal);
1836 if (!state)
1837 return;
1838
1839 // Get the string length of the source.
1840 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1841 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1842
1843 // Get the string length of the destination buffer.
1844 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1845 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1846
1847 // If the source isn't a valid C string, give up.
1848 if (strLength.isUndef())
1849 return;
1850
1851 SValBuilder &svalBuilder = C.getSValBuilder();
1852 QualType cmpTy = svalBuilder.getConditionType();
1853 QualType sizeTy = svalBuilder.getContext().getSizeType();
1854
1855 // These two values allow checking two kinds of errors:
1856 // - actual overflows caused by a source that doesn't fit in the destination
1857 // - potential overflows caused by a bound that could exceed the destination
1858 SVal amountCopied = UnknownVal();
1859 SVal maxLastElementIndex = UnknownVal();
1860 const char *boundWarning = nullptr;
1861
1862 // FIXME: Why do we choose the srcExpr if the access has no size?
1863 // Note that the 3rd argument of the call would be the size parameter.
1864 SizeArgExpr SrcExprAsSizeDummy = {
1865 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1866 state = CheckOverlap(
1867 C, state,
1868 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1869 Dst, srcExpr);
1870
1871 if (!state)
1872 return;
1873
1874 // If the function is strncpy, strncat, etc... it is bounded.
1875 if (IsBounded) {
1876 // Get the max number of characters to copy.
1877 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1878 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1879
1880 // Protect against misdeclared strncpy().
1881 lenVal =
1882 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1883
1884 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1885
1886 // If we know both values, we might be able to figure out how much
1887 // we're copying.
1888 if (strLengthNL && lenValNL) {
1889 switch (appendK) {
1890 case ConcatFnKind::none:
1891 case ConcatFnKind::strcat: {
1892 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1893 // Check if the max number to copy is less than the length of the src.
1894 // If the bound is equal to the source length, strncpy won't null-
1895 // terminate the result!
1896 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1897 svalBuilder
1898 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1899 .castAs<DefinedOrUnknownSVal>());
1900
1901 if (stateSourceTooLong && !stateSourceNotTooLong) {
1902 // Max number to copy is less than the length of the src, so the
1903 // actual strLength copied is the max number arg.
1904 state = stateSourceTooLong;
1905 amountCopied = lenVal;
1906
1907 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1908 // The source buffer entirely fits in the bound.
1909 state = stateSourceNotTooLong;
1910 amountCopied = strLength;
1911 }
1912 break;
1913 }
1914 case ConcatFnKind::strlcat:
1915 if (!dstStrLengthNL)
1916 return;
1917
1918 // amountCopied = min (size - dstLen - 1 , srcLen)
1919 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1920 *dstStrLengthNL, sizeTy);
1921 if (!isa<NonLoc>(freeSpace))
1922 return;
1923 freeSpace =
1924 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1925 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1926 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1927
1928 // While unlikely, it is possible that the subtraction is
1929 // too complex to compute, let's check whether it succeeded.
1930 if (!freeSpaceNL)
1931 return;
1932 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1933 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1934
1935 ProgramStateRef TrueState, FalseState;
1936 std::tie(TrueState, FalseState) =
1937 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1938
1939 // srcStrLength <= size - dstStrLength -1
1940 if (TrueState && !FalseState) {
1941 amountCopied = strLength;
1942 }
1943
1944 // srcStrLength > size - dstStrLength -1
1945 if (!TrueState && FalseState) {
1946 amountCopied = freeSpace;
1947 }
1948
1949 if (TrueState && FalseState)
1950 amountCopied = UnknownVal();
1951 break;
1952 }
1953 }
1954 // We still want to know if the bound is known to be too large.
1955 if (lenValNL) {
1956 switch (appendK) {
1957 case ConcatFnKind::strcat:
1958 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1959
1960 // Get the string length of the destination. If the destination is
1961 // memory that can't have a string length, we shouldn't be copying
1962 // into it anyway.
1963 if (dstStrLength.isUndef())
1964 return;
1965
1966 if (dstStrLengthNL) {
1967 maxLastElementIndex = svalBuilder.evalBinOpNN(
1968 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1969
1970 boundWarning = "Size argument is greater than the free space in the "
1971 "destination buffer";
1972 }
1973 break;
1974 case ConcatFnKind::none:
1975 case ConcatFnKind::strlcat:
1976 // For strncpy and strlcat, this is just checking
1977 // that lenVal <= sizeof(dst).
1978 // (Yes, strncpy and strncat differ in how they treat termination.
1979 // strncat ALWAYS terminates, but strncpy doesn't.)
1980
1981 // We need a special case for when the copy size is zero, in which
1982 // case strncpy will do no work at all. Our bounds check uses n-1
1983 // as the last element accessed, so n == 0 is problematic.
1984 ProgramStateRef StateZeroSize, StateNonZeroSize;
1985 std::tie(StateZeroSize, StateNonZeroSize) =
1986 assumeZero(C, state, *lenValNL, sizeTy);
1987
1988 // If the size is known to be zero, we're done.
1989 if (StateZeroSize && !StateNonZeroSize) {
1990 if (returnPtr) {
1991 StateZeroSize =
1992 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
1993 } else {
1994 if (appendK == ConcatFnKind::none) {
1995 // strlcpy returns strlen(src)
1996 StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
1997 LCtx, strLength);
1998 } else {
1999 // strlcat returns strlen(src) + strlen(dst)
2000 SVal retSize = svalBuilder.evalBinOp(
2001 state, BO_Add, strLength, dstStrLength, sizeTy);
2002 StateZeroSize =
2003 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2004 }
2005 }
2006 C.addTransition(StateZeroSize);
2007 return;
2008 }
2009
2010 // Otherwise, go ahead and figure out the last element we'll touch.
2011 // We don't record the non-zero assumption here because we can't
2012 // be sure. We won't warn on a possible zero.
2013 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2014 maxLastElementIndex =
2015 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2016 boundWarning = "Size argument is greater than the length of the "
2017 "destination buffer";
2018 break;
2019 }
2020 }
2021 } else {
2022 // The function isn't bounded. The amount copied should match the length
2023 // of the source buffer.
2024 amountCopied = strLength;
2025 }
2026
2027 assert(state);
2028
2029 // This represents the number of characters copied into the destination
2030 // buffer. (It may not actually be the strlen if the destination buffer
2031 // is not terminated.)
2032 SVal finalStrLength = UnknownVal();
2033 SVal strlRetVal = UnknownVal();
2034
2035 if (appendK == ConcatFnKind::none && !returnPtr) {
2036 // strlcpy returns the sizeof(src)
2037 strlRetVal = strLength;
2038 }
2039
2040 // If this is an appending function (strcat, strncat...) then set the
2041 // string length to strlen(src) + strlen(dst) since the buffer will
2042 // ultimately contain both.
2043 if (appendK != ConcatFnKind::none) {
2044 // Get the string length of the destination. If the destination is memory
2045 // that can't have a string length, we shouldn't be copying into it anyway.
2046 if (dstStrLength.isUndef())
2047 return;
2048
2049 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2050 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2051 *dstStrLengthNL, sizeTy);
2052 }
2053
2054 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2055
2056 // If we know both string lengths, we might know the final string length.
2057 if (amountCopiedNL && dstStrLengthNL) {
2058 // Make sure the two lengths together don't overflow a size_t.
2059 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2060 if (!state)
2061 return;
2062
2063 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2064 *dstStrLengthNL, sizeTy);
2065 }
2066
2067 // If we couldn't get a single value for the final string length,
2068 // we can at least bound it by the individual lengths.
2069 if (finalStrLength.isUnknown()) {
2070 // Try to get a "hypothetical" string length symbol, which we can later
2071 // set as a real value if that turns out to be the case.
2072 finalStrLength =
2073 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2074 assert(!finalStrLength.isUndef());
2075
2076 if (std::optional<NonLoc> finalStrLengthNL =
2077 finalStrLength.getAs<NonLoc>()) {
2078 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2079 // we overwrite dst string with the src
2080 // finalStrLength >= srcStrLength
2081 SVal sourceInResult = svalBuilder.evalBinOpNN(
2082 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2083 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2084 true);
2085 if (!state)
2086 return;
2087 }
2088
2089 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2090 // we extend the dst string with the src
2091 // finalStrLength >= dstStrLength
2092 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2093 *finalStrLengthNL,
2094 *dstStrLengthNL,
2095 cmpTy);
2096 state =
2097 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2098 if (!state)
2099 return;
2100 }
2101 }
2102 }
2103
2104 } else {
2105 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2106 // the final string length will match the input string length.
2107 finalStrLength = amountCopied;
2108 }
2109
2110 SVal Result;
2111
2112 if (returnPtr) {
2113 // The final result of the function will either be a pointer past the last
2114 // copied element, or a pointer to the start of the destination buffer.
2115 Result = (ReturnEnd ? UnknownVal() : DstVal);
2116 } else {
2117 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2118 //strlcpy, strlcat
2119 Result = strlRetVal;
2120 else
2121 Result = finalStrLength;
2122 }
2123
2124 assert(state);
2125
2126 // If the destination is a MemRegion, try to check for a buffer overflow and
2127 // record the new string length.
2128 if (std::optional<loc::MemRegionVal> dstRegVal =
2129 DstVal.getAs<loc::MemRegionVal>()) {
2130 QualType ptrTy = Dst.Expression->getType();
2131
2132 // If we have an exact value on a bounded copy, use that to check for
2133 // overflows, rather than our estimate about how much is actually copied.
2134 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2135 SVal maxLastElement =
2136 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2137
2138 // Check if the first byte of the destination is writable.
2139 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2140 if (!state)
2141 return;
2142 // Check if the last byte of the destination is writable.
2143 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2144 if (!state)
2145 return;
2146 }
2147
2148 // Then, if the final length is known...
2149 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2150 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2151 *knownStrLength, ptrTy);
2152
2153 // ...and we haven't checked the bound, we'll check the actual copy.
2154 if (!boundWarning) {
2155 // Check if the first byte of the destination is writable.
2156 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2157 if (!state)
2158 return;
2159 // Check if the last byte of the destination is writable.
2160 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2161 if (!state)
2162 return;
2163 }
2164
2165 // If this is a stpcpy-style copy, the last element is the return value.
2166 if (returnPtr && ReturnEnd)
2167 Result = lastElement;
2168 }
2169
2170 // For bounded method, amountCopied take the minimum of two values,
2171 // for ConcatFnKind::strlcat:
2172 // amountCopied = min (size - dstLen - 1 , srcLen)
2173 // for others:
2174 // amountCopied = min (srcLen, size)
2175 // So even if we don't know about amountCopied, as long as one of them will
2176 // not cause an out-of-bound access, the whole function's operation will not
2177 // too, that will avoid invalidating the superRegion of data member in that
2178 // situation.
2179 bool CouldAccessOutOfBound = true;
2180 if (IsBounded && amountCopied.isUnknown()) {
2181 auto CouldAccessOutOfBoundForSVal =
2182 [&](std::optional<NonLoc> Val) -> bool {
2183 if (!Val)
2184 return true;
2185 return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2186 Dst.Expression->getType(), *Val,
2187 C.getASTContext().getSizeType());
2188 };
2189
2190 CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2191
2192 if (CouldAccessOutOfBound) {
2193 // Get the max number of characters to copy.
2194 const Expr *LenExpr = Call.getArgExpr(2);
2195 SVal LenVal = state->getSVal(LenExpr, LCtx);
2196
2197 // Protect against misdeclared strncpy().
2198 LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2199
2200 // Because analyzer doesn't handle expressions like `size -
2201 // dstLen - 1` very well, we roughly use `size` for
2202 // ConcatFnKind::strlcat here, same with other concat kinds.
2203 CouldAccessOutOfBound =
2204 CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2205 }
2206 }
2207
2208 // Invalidate the destination (regular invalidation without pointer-escaping
2209 // the address of the top-level region). This must happen before we set the
2210 // C string length because invalidation will clear the length.
2211 // FIXME: Even if we can't perfectly model the copy, we should see if we
2212 // can use LazyCompoundVals to copy the source values into the destination.
2213 // This would probably remove any existing bindings past the end of the
2214 // string, but that's still an improvement over blank invalidation.
2215 if (CouldAccessOutOfBound)
2216 state = invalidateDestinationBufferBySize(
2217 C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2218 amountCopied, C.getASTContext().getSizeType());
2219 else
2220 state = invalidateDestinationBufferNeverOverflows(
2221 C, state, Call.getCFGElementRef(), *dstRegVal);
2222
2223 // Invalidate the source (const-invalidation without const-pointer-escaping
2224 // the address of the top-level region).
2225 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(), srcVal);
2226
2227 // Set the C string length of the destination, if we know it.
2228 if (IsBounded && (appendK == ConcatFnKind::none)) {
2229 // strncpy is annoying in that it doesn't guarantee to null-terminate
2230 // the result string. If the original string didn't fit entirely inside
2231 // the bound (including the null-terminator), we don't know how long the
2232 // result is.
2233 if (amountCopied != strLength)
2234 finalStrLength = UnknownVal();
2235 }
2236 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2237 }
2238
2239 assert(state);
2240
2241 if (returnPtr) {
2242 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2243 // overflow, we still need a result. Conjure a return value.
2244 if (ReturnEnd && Result.isUnknown()) {
2245 Result = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2246 }
2247 }
2248 // Set the return value.
2249 state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2250 C.addTransition(state);
2251}
2252
2253void CStringChecker::evalStrxfrm(CheckerContext &C,
2254 const CallEvent &Call) const {
2255 // size_t strxfrm(char *dest, const char *src, size_t n);
2256 CurrentFunctionDescription = "locale transformation function";
2257
2258 ProgramStateRef State = C.getState();
2259 const LocationContext *LCtx = C.getLocationContext();
2260 SValBuilder &SVB = C.getSValBuilder();
2261
2262 // Get arguments
2263 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2264 SourceArgExpr Source = {{Call.getArgExpr(1), 1}};
2265 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2266
2267 // `src` can never be null
2268 SVal SrcVal = State->getSVal(Source.Expression, LCtx);
2269 State = checkNonNull(C, State, Source, SrcVal);
2270 if (!State)
2271 return;
2272
2273 // Buffer must not overlap
2274 State = CheckOverlap(C, State, Size, Dest, Source, CK_Regular);
2275 if (!State)
2276 return;
2277
2278 // The function returns an implementation-defined length needed for
2279 // transformation
2280 SVal RetVal = SVB.conjureSymbolVal(Call, C.blockCount());
2281
2282 auto BindReturnAndTransition = [&RetVal, &Call, LCtx,
2283 &C](ProgramStateRef State) {
2284 if (State) {
2285 State = State->BindExpr(Call.getOriginExpr(), LCtx, RetVal);
2286 C.addTransition(State);
2287 }
2288 };
2289
2290 // Check if size is zero
2291 SVal SizeVal = State->getSVal(Size.Expression, LCtx);
2292 QualType SizeTy = Size.Expression->getType();
2293
2294 auto [StateZeroSize, StateSizeNonZero] =
2295 assumeZero(C, State, SizeVal, SizeTy);
2296
2297 // We can't assume anything about size, just bind the return value and be done
2298 if (!StateZeroSize && !StateSizeNonZero)
2299 return BindReturnAndTransition(State);
2300
2301 // If `n` is 0, we just return the implementation defined length
2302 if (StateZeroSize && !StateSizeNonZero)
2303 return BindReturnAndTransition(StateZeroSize);
2304
2305 // If `n` is not 0, `dest` can not be null.
2306 SVal DestVal = StateSizeNonZero->getSVal(Dest.Expression, LCtx);
2307 StateSizeNonZero = checkNonNull(C, StateSizeNonZero, Dest, DestVal);
2308 if (!StateSizeNonZero)
2309 return;
2310
2311 // Check that we can write to the destination buffer
2312 StateSizeNonZero = CheckBufferAccess(C, StateSizeNonZero, Dest, Size,
2313 AccessKind::write, CK_Regular);
2314 if (!StateSizeNonZero)
2315 return;
2316
2317 // Success: return value < `n`
2318 // Failure: return value >= `n`
2319 auto ComparisonVal = SVB.evalBinOp(StateSizeNonZero, BO_LT, RetVal, SizeVal,
2320 SVB.getConditionType())
2321 .getAs<DefinedOrUnknownSVal>();
2322 if (!ComparisonVal) {
2323 // Fallback: invalidate the buffer.
2324 StateSizeNonZero = invalidateDestinationBufferBySize(
2325 C, StateSizeNonZero, Dest.Expression, Call.getCFGElementRef(), DestVal,
2326 SizeVal, Size.Expression->getType());
2327 return BindReturnAndTransition(StateSizeNonZero);
2328 }
2329
2330 auto [StateSuccess, StateFailure] = StateSizeNonZero->assume(*ComparisonVal);
2331
2332 if (StateSuccess) {
2333 // The transformation invalidated the buffer.
2334 StateSuccess = invalidateDestinationBufferBySize(
2335 C, StateSuccess, Dest.Expression, Call.getCFGElementRef(), DestVal,
2336 SizeVal, Size.Expression->getType());
2337 BindReturnAndTransition(StateSuccess);
2338 // Fallthrough: We also want to add a transition to the failure state below.
2339 }
2340
2341 if (StateFailure) {
2342 // `dest` buffer content is undefined
2343 if (auto DestLoc = DestVal.getAs<loc::MemRegionVal>()) {
2344 StateFailure = StateFailure->killBinding(*DestLoc);
2345 StateFailure =
2346 StateFailure->bindDefaultInitial(*DestLoc, UndefinedVal{}, LCtx);
2347 }
2348
2349 BindReturnAndTransition(StateFailure);
2350 }
2351}
2352
2353void CStringChecker::evalStrcmp(CheckerContext &C,
2354 const CallEvent &Call) const {
2355 //int strcmp(const char *s1, const char *s2);
2356 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2357}
2358
2359void CStringChecker::evalStrncmp(CheckerContext &C,
2360 const CallEvent &Call) const {
2361 //int strncmp(const char *s1, const char *s2, size_t n);
2362 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2363}
2364
2365void CStringChecker::evalStrcasecmp(CheckerContext &C,
2366 const CallEvent &Call) const {
2367 //int strcasecmp(const char *s1, const char *s2);
2368 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2369}
2370
2371void CStringChecker::evalStrncasecmp(CheckerContext &C,
2372 const CallEvent &Call) const {
2373 //int strncasecmp(const char *s1, const char *s2, size_t n);
2374 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2375}
2376
2377void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2378 bool IsBounded, bool IgnoreCase) const {
2379 CurrentFunctionDescription = "string comparison function";
2380 ProgramStateRef state = C.getState();
2381 const LocationContext *LCtx = C.getLocationContext();
2382
2383 // Check that the first string is non-null
2384 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2385 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2386 state = checkNonNull(C, state, Left, LeftVal);
2387 if (!state)
2388 return;
2389
2390 // Check that the second string is non-null.
2391 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2392 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2393 state = checkNonNull(C, state, Right, RightVal);
2394 if (!state)
2395 return;
2396
2397 // Get the string length of the first string or give up.
2398 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2399 if (LeftLength.isUndef())
2400 return;
2401
2402 // Get the string length of the second string or give up.
2403 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2404 if (RightLength.isUndef())
2405 return;
2406
2407 // If we know the two buffers are the same, we know the result is 0.
2408 // First, get the two buffers' addresses. Another checker will have already
2409 // made sure they're not undefined.
2410 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2411 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2412
2413 // See if they are the same.
2414 SValBuilder &svalBuilder = C.getSValBuilder();
2415 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2416 ProgramStateRef StSameBuf, StNotSameBuf;
2417 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2418
2419 // If the two arguments might be the same buffer, we know the result is 0,
2420 // and we only need to check one size.
2421 if (StSameBuf) {
2422 StSameBuf =
2423 StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2424 svalBuilder.makeZeroVal(Call.getResultType()));
2425 C.addTransition(StSameBuf);
2426
2427 // If the two arguments are GUARANTEED to be the same, we're done!
2428 if (!StNotSameBuf)
2429 return;
2430 }
2431
2432 assert(StNotSameBuf);
2433 state = StNotSameBuf;
2434
2435 // At this point we can go about comparing the two buffers.
2436 // For now, we only do this if they're both known string literals.
2437
2438 // Attempt to extract string literals from both expressions.
2439 const StringLiteral *LeftStrLiteral =
2440 getCStringLiteral(C, state, Left.Expression, LeftVal);
2441 const StringLiteral *RightStrLiteral =
2442 getCStringLiteral(C, state, Right.Expression, RightVal);
2443 bool canComputeResult = false;
2444 SVal resultVal = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2445
2446 if (LeftStrLiteral && RightStrLiteral) {
2447 StringRef LeftStrRef = LeftStrLiteral->getString();
2448 StringRef RightStrRef = RightStrLiteral->getString();
2449
2450 if (IsBounded) {
2451 // Get the max number of characters to compare.
2452 const Expr *lenExpr = Call.getArgExpr(2);
2453 SVal lenVal = state->getSVal(lenExpr, LCtx);
2454
2455 // If the length is known, we can get the right substrings.
2456 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2457 // Create substrings of each to compare the prefix.
2458 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2459 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2460 canComputeResult = true;
2461 }
2462 } else {
2463 // This is a normal, unbounded strcmp.
2464 canComputeResult = true;
2465 }
2466
2467 if (canComputeResult) {
2468 // Real strcmp stops at null characters.
2469 size_t s1Term = LeftStrRef.find('\0');
2470 if (s1Term != StringRef::npos)
2471 LeftStrRef = LeftStrRef.substr(0, s1Term);
2472
2473 size_t s2Term = RightStrRef.find('\0');
2474 if (s2Term != StringRef::npos)
2475 RightStrRef = RightStrRef.substr(0, s2Term);
2476
2477 // Use StringRef's comparison methods to compute the actual result.
2478 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2479 : LeftStrRef.compare(RightStrRef);
2480
2481 // The strcmp function returns an integer greater than, equal to, or less
2482 // than zero, [c11, p7.24.4.2].
2483 if (compareRes == 0) {
2484 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2485 }
2486 else {
2487 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2488 // Constrain strcmp's result range based on the result of StringRef's
2489 // comparison methods.
2490 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2491 SVal compareWithZero =
2492 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2493 svalBuilder.getConditionType());
2494 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2495 state = state->assume(compareWithZeroVal, true);
2496 }
2497 }
2498 }
2499
2500 state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2501
2502 // Record this as a possible path.
2503 C.addTransition(state);
2504}
2505
2506void CStringChecker::evalStrsep(CheckerContext &C,
2507 const CallEvent &Call) const {
2508 // char *strsep(char **stringp, const char *delim);
2509 // Verify whether the search string parameter matches the return type.
2510 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2511
2512 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2513 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2514 CharPtrTy.getUnqualifiedType())
2515 return;
2516
2517 CurrentFunctionDescription = "strsep()";
2518 ProgramStateRef State = C.getState();
2519 const LocationContext *LCtx = C.getLocationContext();
2520
2521 // Check that the search string pointer is non-null (though it may point to
2522 // a null string).
2523 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2524 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2525 if (!State)
2526 return;
2527
2528 // Check that the delimiter string is non-null.
2529 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2530 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2531 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2532 if (!State)
2533 return;
2534
2535 SValBuilder &SVB = C.getSValBuilder();
2536 SVal Result;
2537 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2538 // Get the current value of the search string pointer, as a char*.
2539 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2540
2541 // Invalidate the search string, representing the change of one delimiter
2542 // character to NUL.
2543 // As the replacement never overflows, do not invalidate its super region.
2544 State = invalidateDestinationBufferNeverOverflows(
2545 C, State, Call.getCFGElementRef(), Result);
2546
2547 // Overwrite the search string pointer. The new value is either an address
2548 // further along in the same string, or NULL if there are no more tokens.
2549 State = State->bindLoc(*SearchStrLoc,
2550 SVB.conjureSymbolVal(Call, C.blockCount(), getTag()),
2551 LCtx);
2552 } else {
2553 assert(SearchStrVal.isUnknown());
2554 // Conjure a symbolic value. It's the best we can do.
2555 Result = SVB.conjureSymbolVal(Call, C.blockCount());
2556 }
2557
2558 // Set the return value, and finish.
2559 State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2560 C.addTransition(State);
2561}
2562
2563// These should probably be moved into a C++ standard library checker.
2564void CStringChecker::evalStdCopy(CheckerContext &C,
2565 const CallEvent &Call) const {
2566 evalStdCopyCommon(C, Call);
2567}
2568
2569void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2570 const CallEvent &Call) const {
2571 evalStdCopyCommon(C, Call);
2572}
2573
2574void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2575 const CallEvent &Call) const {
2576 if (!Call.getArgExpr(2)->getType()->isPointerType())
2577 return;
2578
2579 ProgramStateRef State = C.getState();
2580
2581 const LocationContext *LCtx = C.getLocationContext();
2582
2583 // template <class _InputIterator, class _OutputIterator>
2584 // _OutputIterator
2585 // copy(_InputIterator __first, _InputIterator __last,
2586 // _OutputIterator __result)
2587
2588 // Invalidate the destination buffer
2589 const Expr *Dst = Call.getArgExpr(2);
2590 SVal DstVal = State->getSVal(Dst, LCtx);
2591 // FIXME: As we do not know how many items are copied, we also invalidate the
2592 // super region containing the target location.
2593 State = invalidateDestinationBufferAlwaysEscapeSuperRegion(
2594 C, State, Call.getCFGElementRef(), DstVal);
2595
2596 SValBuilder &SVB = C.getSValBuilder();
2597
2598 SVal ResultVal = SVB.conjureSymbolVal(Call, C.blockCount());
2599 State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2600
2601 C.addTransition(State);
2602}
2603
2604void CStringChecker::evalMemset(CheckerContext &C,
2605 const CallEvent &Call) const {
2606 // void *memset(void *s, int c, size_t n);
2607 CurrentFunctionDescription = "memory set function";
2608
2609 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2610 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2611 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2612
2613 ProgramStateRef State = C.getState();
2614
2615 // See if the size argument is zero.
2616 const LocationContext *LCtx = C.getLocationContext();
2617 SVal SizeVal = C.getSVal(Size.Expression);
2618 QualType SizeTy = Size.Expression->getType();
2619
2620 ProgramStateRef ZeroSize, NonZeroSize;
2621 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2622
2623 // Get the value of the memory area.
2624 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2625
2626 // If the size is zero, there won't be any actual memory access, so
2627 // just bind the return value to the buffer and return.
2628 if (ZeroSize && !NonZeroSize) {
2629 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2630 C.addTransition(ZeroSize);
2631 return;
2632 }
2633
2634 // Ensure the memory area is not null.
2635 // If it is NULL there will be a NULL pointer dereference.
2636 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2637 if (!State)
2638 return;
2639
2640 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2641 if (!State)
2642 return;
2643
2644 // According to the values of the arguments, bind the value of the second
2645 // argument to the destination buffer and set string length, or just
2646 // invalidate the destination buffer.
2647 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(),
2648 C.getSVal(CharE.Expression), Size.Expression, C, State))
2649 return;
2650
2651 State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2652 C.addTransition(State);
2653}
2654
2655void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2656 CurrentFunctionDescription = "memory clearance function";
2657
2658 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2659 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2660 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2661
2662 ProgramStateRef State = C.getState();
2663
2664 // See if the size argument is zero.
2665 SVal SizeVal = C.getSVal(Size.Expression);
2666 QualType SizeTy = Size.Expression->getType();
2667
2668 ProgramStateRef StateZeroSize, StateNonZeroSize;
2669 std::tie(StateZeroSize, StateNonZeroSize) =
2670 assumeZero(C, State, SizeVal, SizeTy);
2671
2672 // If the size is zero, there won't be any actual memory access,
2673 // In this case we just return.
2674 if (StateZeroSize && !StateNonZeroSize) {
2675 C.addTransition(StateZeroSize);
2676 return;
2677 }
2678
2679 // Get the value of the memory area.
2680 SVal MemVal = C.getSVal(Buffer.Expression);
2681
2682 // Ensure the memory area is not null.
2683 // If it is NULL there will be a NULL pointer dereference.
2684 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2685 if (!State)
2686 return;
2687
2688 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2689 if (!State)
2690 return;
2691
2692 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(), Zero,
2693 Size.Expression, C, State))
2694 return;
2695
2696 C.addTransition(State);
2697}
2698
2699void CStringChecker::evalSprintf(CheckerContext &C,
2700 const CallEvent &Call) const {
2701 CurrentFunctionDescription = "'sprintf'";
2702 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2703}
2704
2705void CStringChecker::evalSnprintf(CheckerContext &C,
2706 const CallEvent &Call) const {
2707 CurrentFunctionDescription = "'snprintf'";
2708 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2709}
2710
2711void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2712 bool IsBounded) const {
2713 ProgramStateRef State = C.getState();
2714 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2715 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2716
2717 const auto NumParams = Call.parameters().size();
2718 if (CE->getNumArgs() < NumParams) {
2719 // This is an invalid call, let's just ignore it.
2720 return;
2721 }
2722
2723 const auto AllArguments =
2724 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2725 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2726
2727 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2728 // We consider only string buffers
2729 if (const QualType type = ArgExpr->getType();
2730 !type->isAnyPointerType() ||
2731 !type->getPointeeType()->isAnyCharacterType())
2732 continue;
2733 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2734
2735 // Ensure the buffers do not overlap.
2736 SizeArgExpr SrcExprAsSizeDummy = {
2737 {Source.Expression, Source.ArgumentIndex}};
2738 State = CheckOverlap(
2739 C, State,
2740 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2741 Dest, Source);
2742 if (!State)
2743 return;
2744 }
2745
2746 C.addTransition(State);
2747}
2748
2749//===----------------------------------------------------------------------===//
2750// The driver method, and other Checker callbacks.
2751//===----------------------------------------------------------------------===//
2752
2753CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2754 CheckerContext &C) const {
2755 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2756 if (!CE)
2757 return nullptr;
2758
2759 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2760 if (!FD)
2761 return nullptr;
2762
2763 if (StdCopy.matches(Call))
2764 return &CStringChecker::evalStdCopy;
2765 if (StdCopyBackward.matches(Call))
2766 return &CStringChecker::evalStdCopyBackward;
2767
2768 // Pro-actively check that argument types are safe to do arithmetic upon.
2769 // We do not want to crash if someone accidentally passes a structure
2770 // into, say, a C++ overload of any of these functions. We could not check
2771 // that for std::copy because they may have arguments of other types.
2772 for (auto I : CE->arguments()) {
2773 QualType T = I->getType();
2775 return nullptr;
2776 }
2777
2778 const FnCheck *Callback = Callbacks.lookup(Call);
2779 if (Callback)
2780 return *Callback;
2781
2782 return nullptr;
2783}
2784
2785bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2786 FnCheck Callback = identifyCall(Call, C);
2787
2788 // If the callee isn't a string function, let another checker handle it.
2789 if (!Callback)
2790 return false;
2791
2792 // Check and evaluate the call.
2793 assert(isa<CallExpr>(Call.getOriginExpr()));
2794 Callback(this, C, Call);
2795
2796 // If the evaluate call resulted in no change, chain to the next eval call
2797 // handler.
2798 // Note, the custom CString evaluation calls assume that basic safety
2799 // properties are held. However, if the user chooses to turn off some of these
2800 // checks, we ignore the issues and leave the call evaluation to a generic
2801 // handler.
2802 return C.isDifferent();
2803}
2804
2805void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2806 // Record string length for char a[] = "abc";
2807 ProgramStateRef state = C.getState();
2808
2809 for (const auto *I : DS->decls()) {
2810 const VarDecl *D = dyn_cast<VarDecl>(I);
2811 if (!D)
2812 continue;
2813
2814 // FIXME: Handle array fields of structs.
2815 if (!D->getType()->isArrayType())
2816 continue;
2817
2818 const Expr *Init = D->getInit();
2819 if (!Init)
2820 continue;
2822 continue;
2823
2824 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2825 const MemRegion *MR = VarLoc.getAsRegion();
2826 if (!MR)
2827 continue;
2828
2829 SVal StrVal = C.getSVal(Init);
2830 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2831 DefinedOrUnknownSVal strLength =
2832 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2833
2834 state = state->set<CStringLength>(MR, strLength);
2835 }
2836
2837 C.addTransition(state);
2838}
2839
2841CStringChecker::checkRegionChanges(ProgramStateRef state,
2842 const InvalidatedSymbols *,
2843 ArrayRef<const MemRegion *> ExplicitRegions,
2844 ArrayRef<const MemRegion *> Regions,
2845 const LocationContext *LCtx,
2846 const CallEvent *Call) const {
2847 CStringLengthTy Entries = state->get<CStringLength>();
2848 if (Entries.isEmpty())
2849 return state;
2850
2851 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2852 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2853
2854 // First build sets for the changed regions and their super-regions.
2855 for (const MemRegion *MR : Regions) {
2856 Invalidated.insert(MR);
2857
2858 SuperRegions.insert(MR);
2859 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2860 MR = SR->getSuperRegion();
2861 SuperRegions.insert(MR);
2862 }
2863 }
2864
2865 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2866
2867 // Then loop over the entries in the current state.
2868 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2869 // Is this entry for a super-region of a changed region?
2870 if (SuperRegions.count(MR)) {
2871 Entries = F.remove(Entries, MR);
2872 continue;
2873 }
2874
2875 // Is this entry for a sub-region of a changed region?
2876 const MemRegion *Super = MR;
2877 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2878 Super = SR->getSuperRegion();
2879 if (Invalidated.count(Super)) {
2880 Entries = F.remove(Entries, MR);
2881 break;
2882 }
2883 }
2884 }
2885
2886 return state->set<CStringLength>(Entries);
2887}
2888
2889void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2890 SymbolReaper &SR) const {
2891 // Mark all symbols in our string length map as valid.
2892 CStringLengthTy Entries = state->get<CStringLength>();
2893
2894 for (SVal Len : llvm::make_second_range(Entries)) {
2895 for (SymbolRef Sym : Len.symbols())
2896 SR.markInUse(Sym);
2897 }
2898}
2899
2900void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2901 CheckerContext &C) const {
2902 ProgramStateRef state = C.getState();
2903 CStringLengthTy Entries = state->get<CStringLength>();
2904 if (Entries.isEmpty())
2905 return;
2906
2907 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2908 for (auto [Reg, Len] : Entries) {
2909 if (SymbolRef Sym = Len.getAsSymbol()) {
2910 if (SR.isDead(Sym))
2911 Entries = F.remove(Entries, Reg);
2912 }
2913 }
2914
2915 state = state->set<CStringLength>(Entries);
2916 C.addTransition(state);
2917}
2918
2919void ento::registerCStringModeling(CheckerManager &Mgr) {
2920 // Other checker relies on the modeling implemented in this checker family,
2921 // so this "modeling checker" can register the 'CStringChecker' backend for
2922 // its callbacks without enabling any of its frontends.
2923 Mgr.getChecker<CStringChecker>();
2924}
2925
2926bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
2927 return true;
2928}
2929
2930#define REGISTER_CHECKER(NAME) \
2931 void ento::registerCString##NAME(CheckerManager &Mgr) { \
2932 Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr); \
2933 } \
2934 \
2935 bool ento::shouldRegisterCString##NAME(const CheckerManager &) { \
2936 return true; \
2937 }
2938
2939REGISTER_CHECKER(NullArg)
2940REGISTER_CHECKER(OutOfBounds)
2941REGISTER_CHECKER(BufferOverlap)
2942REGISTER_CHECKER(NotNullTerm)
2943REGISTER_CHECKER(UninitializedRead)
#define V(N, I)
static std::optional< NonLoc > getIndex(ProgramStateRef State, const ElementRegion *ER, CharKind CK)
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx)
#define REGISTER_CHECKER(name)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType WideCharTy
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType CharTy
CanQualType IntTy
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType UnsignedCharTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
decl_range decls()
Definition Stmt.h:1659
QualType getType() const
Definition Expr.h:144
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8413
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition TypeBase.h:8381
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:334
unsigned getLength() const
Definition Expr.h:1909
StringRef getString() const
Definition Expr.h:1867
bool isArrayType() const
Definition TypeBase.h:8623
bool isPointerType() const
Definition TypeBase.h:8524
CanQualType getCanonicalTypeUnqualified() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition TypeBase.h:8998
bool isAnyPointerType() const
Definition TypeBase.h:8532
QualType getType() const
Definition Decl.h:723
const Expr * getInit() const
Definition Decl.h:1368
APSIntPtr getMaxValue(const llvm::APSInt &v)
std::optional< APSIntPtr > evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:584
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
ElementRegion is used to represent both array elements and casts.
Definition MemRegion.h:1227
QualType getValueType() const override
Definition MemRegion.h:1249
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:98
LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy * castAs() const
Definition MemRegion.h:1424
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Kind getKind() const
Definition MemRegion.h:203
@ TK_PreserveContents
Tells that a region's contents is not changed.
Definition MemRegion.h:1672
@ TK_SuppressEscape
Suppress pointer-escaping of a region.
Definition MemRegion.h:1675
void setTrait(SymbolRef Sym, InvalidationKinds IK)
bool hasSymbolicOffset() const
Definition MemRegion.h:83
const MemRegion * getRegion() const
It might return null.
Definition MemRegion.h:81
int64_t getOffset() const
Definition MemRegion.h:85
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
BasicValueFactory & getBasicValueFactory()
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, const LocationContext *LCtx, unsigned count)
ProgramStateManager & getStateManager()
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
loc::MemRegionVal makeLoc(SymbolRef sym)
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, ConstCFGElementRef elem, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition SVals.h:56
bool isUndef() const
Definition SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
bool isValid() const
Definition SVals.h:111
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
bool isUnknown() const
Definition SVals.h:105
LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral * getStringLiteral() const
Definition MemRegion.h:873
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition MemRegion.h:487
llvm::iterator_range< symbol_iterator > symbols() const
Definition SymExpr.h:107
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
__inline void unsigned int _2
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
llvm::DenseSet< SymbolRef > InvalidatedSymbols
Definition Store.h:51
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
Definition SymExpr.h:133
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
std::variant< struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl > Decl
All declarations that can appear in a module declaration.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
CFGBlock::ConstCFGElementRef ConstCFGElementRef
Definition CFG.h:1199
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition CharInfo.h:233
U cast(CodeGen::Address addr)
Definition Address.h:327
int const char * function
Definition c++config.h:31