clang 23.0.0git
CStringChecker.cpp
Go to the documentation of this file.
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <functional>
34#include <optional>
35
36using namespace clang;
37using namespace ento;
38using namespace std::placeholders;
39
40namespace {
41struct AnyArgExpr {
42 const Expr *Expression;
43 unsigned ArgumentIndex;
44};
45struct SourceArgExpr : AnyArgExpr {};
46struct DestinationArgExpr : AnyArgExpr {};
47struct SizeArgExpr : AnyArgExpr {};
48
49using ErrorMessage = SmallString<128>;
50enum class AccessKind { write, read };
51
52static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
53 AccessKind Access) {
54 ErrorMessage Message;
55 llvm::raw_svector_ostream Os(Message);
56
57 // Function classification like: Memory copy function
58 Os << toUppercase(FunctionDescription.front())
59 << &FunctionDescription.data()[1];
60
61 if (Access == AccessKind::write) {
62 Os << " overflows the destination buffer";
63 } else { // read access
64 Os << " accesses out-of-bound array element";
65 }
66
67 return Message;
68}
69
70enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
71
72enum class CharKind { Regular = 0, Wide };
73constexpr CharKind CK_Regular = CharKind::Regular;
74constexpr CharKind CK_Wide = CharKind::Wide;
75
76static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
77 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
78 : Ctx.WideCharTy);
79}
80
81class CStringChecker
82 : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
83 check::LiveSymbols, check::DeadSymbols,
84 check::RegionChanges> {
85 mutable const char *CurrentFunctionDescription = nullptr;
86
87public:
88 // FIXME: The bug types emitted by this checker family have confused garbage
89 // in their Description and Category fields (e.g. `categories::UnixAPI` is
90 // passed as the description in several cases and `uninitialized` is mistyped
91 // as `unitialized`). This should be cleaned up.
92 CheckerFrontendWithBugType NullArg{categories::UnixAPI};
93 CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
94 CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
95 "Improper arguments"};
96 CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
97 CheckerFrontendWithBugType UninitializedRead{
98 "Accessing unitialized/garbage values"};
99
100 StringRef getDebugTag() const override { return "MallocChecker"; }
101
102 static void *getTag() { static int tag; return &tag; }
103
104 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
105 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
106 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
107 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
108
110 checkRegionChanges(ProgramStateRef state,
111 const InvalidatedSymbols *,
112 ArrayRef<const MemRegion *> ExplicitRegions,
113 ArrayRef<const MemRegion *> Regions,
114 const LocationContext *LCtx,
115 const CallEvent *Call) const;
116
117 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
118 const CallEvent &)>;
119
120 CallDescriptionMap<FnCheck> Callbacks = {
121 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
122 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
123 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
124 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
125 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
126 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
127 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
128 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
129 {{CDM::CLibrary, {"memcmp"}, 3},
130 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
131 {{CDM::CLibrary, {"wmemcmp"}, 3},
132 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
133 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
134 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
135 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
136 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
137 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
138 &CStringChecker::evalMemset},
139 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
140 // FIXME: C23 introduces 'memset_explicit', maybe also model that
141 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
142 &CStringChecker::evalStrcpy},
143 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
144 &CStringChecker::evalStrncpy},
145 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
146 &CStringChecker::evalStpcpy},
147 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
148 &CStringChecker::evalStrlcpy},
149 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
150 &CStringChecker::evalStrcat},
151 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
152 &CStringChecker::evalStrncat},
153 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
154 &CStringChecker::evalStrlcat},
155 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
156 &CStringChecker::evalstrLength},
157 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
158 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
159 &CStringChecker::evalstrnLength},
160 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
161 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
162 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
163 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
164 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
165 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
166 {{CDM::CLibrary, {"strxfrm"}, 3}, &CStringChecker::evalStrxfrm},
167 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
168 {{CDM::CLibrary, {"bcmp"}, 3},
169 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
170 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
171 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
172 &CStringChecker::evalBzero},
173
174 // When recognizing calls to the following variadic functions, we accept
175 // any number of arguments in the call (std::nullopt = accept any
176 // number), but check that in the declaration there are 2 and 3
177 // parameters respectively. (Note that the parameter count does not
178 // include the "...". Calls where the number of arguments is too small
179 // will be discarded by the callback.)
180 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
181 &CStringChecker::evalSprintf},
182 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
183 &CStringChecker::evalSnprintf},
184 };
185
186 // These require a bit of special handling.
187 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
188 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
189
190 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
191 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
192 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
193 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
194 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
195 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
196 ProgramStateRef state, SizeArgExpr Size,
197 DestinationArgExpr Dest, SourceArgExpr Source,
198 bool Restricted, bool IsMempcpy, CharKind CK) const;
199
200 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201
202 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
203 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
204 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
205 bool IsStrnlen = false) const;
206
207 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
208 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
209 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
210 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
211 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
212 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
213 bool returnPtr = true) const;
214
215 void evalStrxfrm(CheckerContext &C, const CallEvent &Call) const;
216
217 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
218 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
220
221 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
222 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
223 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
224 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
226 bool IsBounded = false, bool IgnoreCase = false) const;
227
228 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
229
230 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
231 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
232 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
233 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
234 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
235
236 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
237 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
238 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
239 bool IsBounded) const;
240
241 // Utility methods
242 std::pair<ProgramStateRef , ProgramStateRef >
243 static assumeZero(CheckerContext &C,
244 ProgramStateRef state, SVal V, QualType Ty);
245
246 static ProgramStateRef setCStringLength(ProgramStateRef state,
247 const MemRegion *MR,
248 SVal strLength);
249 static SVal getCStringLengthForRegion(CheckerContext &C,
250 ProgramStateRef &state,
251 const Expr *Ex,
252 const MemRegion *MR,
253 bool hypothetical);
254 static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
255
256 SVal getCStringLength(CheckerContext &C,
257 ProgramStateRef &state,
258 const Expr *Ex,
259 SVal Buf,
260 bool hypothetical = false) const;
261
262 const StringLiteral *getCStringLiteral(CheckerContext &C,
263 ProgramStateRef &state,
264 const Expr *expr,
265 SVal val) const;
266
267 /// Invalidate the destination buffer determined by characters copied.
268 static ProgramStateRef
269 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
270 const Expr *BufE, ConstCFGElementRef Elem,
271 SVal BufV, SVal SizeV, QualType SizeTy);
272
273 /// Operation never overflows, do not invalidate the super region.
274 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
275 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
276
277 /// We do not know whether the operation can overflow (e.g. size is unknown),
278 /// invalidate the super region and escape related pointers.
279 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
280 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
281
282 /// Invalidate the source buffer for escaping pointers.
283 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
286 SVal BufV);
287
288 /// @param InvalidationTraitOperations Determine how to invlidate the
289 /// MemRegion by setting the invalidation traits. Return true to cause pointer
290 /// escape, or false otherwise.
291 static ProgramStateRef invalidateBufferAux(
292 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
293 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
294 const MemRegion *)>
295 InvalidationTraitOperations);
296
297 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
298 const MemRegion *MR);
299
300 static bool memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
301 SVal CharE, const Expr *Size, CheckerContext &C,
302 ProgramStateRef &State);
303
304 // Re-usable checks
305 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
306 AnyArgExpr Arg, SVal l) const;
307 // Check whether the origin region behind \p Element (like the actual array
308 // region \p Element is from) is initialized.
309 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
310 AnyArgExpr Buffer, SVal Element, SVal Size) const;
311 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
312 AnyArgExpr Buffer, SVal Element,
313 AccessKind Access,
314 CharKind CK = CharKind::Regular) const;
315 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
316 AnyArgExpr Buffer, SizeArgExpr Size,
317 AccessKind Access,
318 CharKind CK = CharKind::Regular) const;
319 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
320 SizeArgExpr Size, AnyArgExpr First,
321 AnyArgExpr Second,
322 CharKind CK = CharKind::Regular) const;
323 void emitOverlapBug(CheckerContext &C,
324 ProgramStateRef state,
325 const Stmt *First,
326 const Stmt *Second) const;
327
328 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
329 StringRef WarningMsg) const;
330 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
331 const Stmt *S, StringRef WarningMsg) const;
332 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
333 const Stmt *S, StringRef WarningMsg) const;
334 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
335 const Expr *E, const MemRegion *R,
336 StringRef Msg) const;
337 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
338 ProgramStateRef state,
339 NonLoc left,
340 NonLoc right) const;
341
342 // Return true if the destination buffer of the copy function may be in bound.
343 // Expects SVal of Size to be positive and unsigned.
344 // Expects SVal of FirstBuf to be a FieldRegion.
345 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
346 SVal BufVal, QualType BufTy, SVal LengthVal,
347 QualType LengthTy);
348};
349
350} //end anonymous namespace
351
352REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
353
354//===----------------------------------------------------------------------===//
355// Individual checks and utility methods.
356//===----------------------------------------------------------------------===//
357
358std::pair<ProgramStateRef, ProgramStateRef>
359CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
360 QualType Ty) {
361 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
362 if (!val)
363 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
364
365 SValBuilder &svalBuilder = C.getSValBuilder();
366 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
367 return State->assume(svalBuilder.evalEQ(State, *val, zero));
368}
369
370ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
371 ProgramStateRef State,
372 AnyArgExpr Arg, SVal l) const {
373 // If a previous check has failed, propagate the failure.
374 if (!State)
375 return nullptr;
376
377 ProgramStateRef stateNull, stateNonNull;
378 std::tie(stateNull, stateNonNull) =
379 assumeZero(C, State, l, Arg.Expression->getType());
380
381 if (stateNull && !stateNonNull) {
382 if (NullArg.isEnabled()) {
383 SmallString<80> buf;
384 llvm::raw_svector_ostream OS(buf);
385 assert(CurrentFunctionDescription);
386 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
387 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
388 << CurrentFunctionDescription;
389
390 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
391 }
392 return nullptr;
393 }
394
395 // From here on, assume that the value is non-null.
396 assert(stateNonNull);
397 return stateNonNull;
398}
399
400static std::optional<NonLoc> getIndex(ProgramStateRef State,
401 const ElementRegion *ER, CharKind CK) {
403 ASTContext &Ctx = SVB.getContext();
404
405 if (CK == CharKind::Regular) {
406 if (ER->getValueType() != Ctx.CharTy)
407 return {};
408 return ER->getIndex();
409 }
410
411 if (ER->getValueType() != Ctx.WideCharTy)
412 return {};
413
414 QualType SizeTy = Ctx.getSizeType();
415 NonLoc WideSize =
417 SizeTy)
418 .castAs<NonLoc>();
419 SVal Offset =
420 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
421 if (Offset.isUnknown())
422 return {};
423 return Offset.castAs<NonLoc>();
424}
425
426// Basically 1 -> 1st, 12 -> 12th, etc.
427static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
428 Os << Idx << llvm::getOrdinalSuffix(Idx);
429}
430
431ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
432 ProgramStateRef State,
433 AnyArgExpr Buffer, SVal Element,
434 SVal Size) const {
435
436 // If a previous check has failed, propagate the failure.
437 if (!State)
438 return nullptr;
439
440 SVal BufVal = C.getSVal(Buffer.Expression);
441 const auto *ER = dyn_cast_or_null<ElementRegion>(BufVal.getAsRegion());
442 if (!ER)
443 return State;
444
445 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
446 if (!SuperR)
447 return State;
448
449 // FIXME: We ought to able to check objects as well. Maybe
450 // UninitializedObjectChecker could help?
451 if (!SuperR->getValueType()->isArrayType())
452 return State;
453
454 SValBuilder &SVB = C.getSValBuilder();
455 ASTContext &Ctx = SVB.getContext();
456
457 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
458
459 std::optional<Loc> FirstElementVal =
460 State->getLValue(ElemTy, SVB.makeZeroArrayIndex(), BufVal).getAs<Loc>();
461 if (!FirstElementVal)
462 return State;
463
464 // Ensure that we wouldn't read uninitialized value.
465 if (UninitializedRead.isEnabled() &&
466 State->getSVal(*FirstElementVal).isUndef()) {
467 llvm::SmallString<258> Buf;
468 llvm::raw_svector_ostream OS(Buf);
469 OS << "The first element of the ";
470 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
471 OS << " argument is undefined";
472 emitUninitializedReadBug(C, State, Buffer.Expression,
473 FirstElementVal->getAsRegion(), OS.str());
474 return nullptr;
475 }
476
477 // We won't check whether the entire region is fully initialized -- let's just
478 // check that the first and the last element is. So, onto checking the last
479 // element:
480
481 const QualType IdxTy = SVB.getArrayIndexType();
482 NonLoc ElemSize =
483 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
484 .castAs<NonLoc>();
485
486 // FIXME: Check that the size arg to the cstring function is divisible by
487 // size of the actual element type?
488
489 // The type of the argument to the cstring function is either char or wchar,
490 // but thats not the type of the original array (or memory region).
491 // Suppose the following:
492 // int t[5];
493 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
494 // When checking whether t is fully initialized, we see it as char array of
495 // size sizeof(int)*5. If we check the last element as a character, we read
496 // the last byte of an integer, which will be undefined. But just because
497 // that value is undefined, it doesn't mean that the element is uninitialized!
498 // For this reason, we need to retrieve the actual last element with the
499 // correct type.
500
501 // Divide the size argument to the cstring function by the actual element
502 // type. This value will be size of the array, or the index to the
503 // past-the-end element.
504 std::optional<NonLoc> Offset =
505 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
506 IdxTy)
507 .getAs<NonLoc>();
508
509 if (!Offset)
510 return State;
511
512 // Retrieve the index of the last element relative to the buffer pointer.
513 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
514 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
515
516 SVal LastElementVal = State->getLValue(ElemTy, LastIdx, BufVal);
517 if (!isa<Loc>(LastElementVal))
518 return State;
519
520 if (UninitializedRead.isEnabled() &&
521 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
522 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
523 // If we can't get emit a sensible last element index, just bail out --
524 // prefer to emit nothing in favour of emitting garbage quality reports.
525 if (!IdxInt) {
526 C.addSink();
527 return nullptr;
528 }
529 llvm::SmallString<258> Buf;
530 llvm::raw_svector_ostream OS(Buf);
531 OS << "The last accessed element (at index ";
532 OS << IdxInt->getExtValue();
533 OS << ") in the ";
534 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
535 OS << " argument is undefined";
536 emitUninitializedReadBug(C, State, Buffer.Expression,
537 LastElementVal.getAsRegion(), OS.str());
538 return nullptr;
539 }
540 return State;
541}
542// FIXME: The root of this logic was copied from the old checker
543// alpha.security.ArrayBound (which is removed within this commit).
544// It should be refactored to use the different, more sophisticated bounds
545// checking logic used by the new checker ``security.ArrayBound``.
546ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
547 ProgramStateRef state,
548 AnyArgExpr Buffer, SVal Element,
549 AccessKind Access,
550 CharKind CK) const {
551
552 // If a previous check has failed, propagate the failure.
553 if (!state)
554 return nullptr;
555
556 // Check for out of bound array element access.
557 const MemRegion *R = Element.getAsRegion();
558 if (!R)
559 return state;
560
561 const auto *ER = dyn_cast<ElementRegion>(R);
562 if (!ER)
563 return state;
564
565 // Get the index of the accessed element.
566 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
567 if (!Idx)
568 return state;
569
570 // Get the size of the array.
571 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
572 DefinedOrUnknownSVal Size =
573 getDynamicExtent(state, superReg, C.getSValBuilder());
574
575 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
576 if (StOutBound && !StInBound) {
577 // The analyzer determined that the access is out-of-bounds, which is
578 // a fatal error: ideally we'd return nullptr to terminate this path
579 // regardless of whether the OutOfBounds checker frontend is enabled.
580 // However, the current out-of-bounds modeling produces too many false
581 // positives, so when the frontend is disabled we return the original
582 // (unconstrained) state and let the analysis continue. This is
583 // inconsistent: returning `state` instead of `StOutBound` discards the
584 // constraint that the index is out-of-bounds, and callers cannot
585 // distinguish "we proved an error" from "we couldn't determine anything"
586 // since both return the original state.
587 // TODO: Once the OutOfBounds frontend is stable, return nullptr here
588 // unconditionally to stop the analysis on this path.
589 if (!OutOfBounds.isEnabled())
590 return state;
591
592 ErrorMessage Message =
593 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
594 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
595 return nullptr;
596 }
597
598 // Array bound check succeeded. From this point forward the array bound
599 // should always succeed.
600 return StInBound;
601}
602
604CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
605 AnyArgExpr Buffer, SizeArgExpr Size,
606 AccessKind Access, CharKind CK) const {
607 // If a previous check has failed, propagate the failure.
608 if (!State)
609 return nullptr;
610
611 SValBuilder &svalBuilder = C.getSValBuilder();
612 ASTContext &Ctx = svalBuilder.getContext();
613
614 QualType SizeTy = Size.Expression->getType();
615 QualType PtrTy = getCharPtrType(Ctx, CK);
616
617 // Check that the first buffer is non-null.
618 SVal BufVal = C.getSVal(Buffer.Expression);
619 State = checkNonNull(C, State, Buffer, BufVal);
620 if (!State)
621 return nullptr;
622
623 SVal BufStart =
624 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
625
626 // Check if the first byte of the buffer is accessible.
627 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
628
629 if (!State)
630 return nullptr;
631
632 // Get the access length and make sure it is known.
633 // FIXME: This assumes the caller has already checked that the access length
634 // is positive. And that it's unsigned.
635 SVal LengthVal = C.getSVal(Size.Expression);
636 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
637 if (!Length)
638 return State;
639
640 // Compute the offset of the last element to be accessed: size-1.
641 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
642 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
643 if (Offset.isUnknown())
644 return nullptr;
645 NonLoc LastOffset = Offset.castAs<NonLoc>();
646
647 // Check that the first buffer is sufficiently long.
648 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
649
650 SVal BufEnd =
651 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
652 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
653 if (Access == AccessKind::read)
654 State = checkInit(C, State, Buffer, BufStart, *Length);
655
656 // If the buffer isn't large enough, abort.
657 if (!State)
658 return nullptr;
659 }
660
661 // Large enough or not, return this state!
662 return State;
663}
664
665ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
666 ProgramStateRef state,
667 SizeArgExpr Size, AnyArgExpr First,
668 AnyArgExpr Second,
669 CharKind CK) const {
670 // Do a simple check for overlap: if the two arguments are from the same
671 // buffer, see if the end of the first is greater than the start of the second
672 // or vice versa.
673
674 // If a previous check has failed, propagate the failure.
675 if (!state)
676 return nullptr;
677
678 ProgramStateRef stateTrue, stateFalse;
679
680 if (!First.Expression->getType()->isAnyPointerType() ||
681 !Second.Expression->getType()->isAnyPointerType())
682 return state;
683
684 // Assume different address spaces cannot overlap.
685 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
686 Second.Expression->getType()->getPointeeType().getAddressSpace())
687 return state;
688
689 // Get the buffer values and make sure they're known locations.
690 const LocationContext *LCtx = C.getLocationContext();
691 SVal firstVal = state->getSVal(First.Expression, LCtx);
692 SVal secondVal = state->getSVal(Second.Expression, LCtx);
693
694 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
695 if (!firstLoc)
696 return state;
697
698 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
699 if (!secondLoc)
700 return state;
701
702 // Are the two values the same?
703 SValBuilder &svalBuilder = C.getSValBuilder();
704 std::tie(stateTrue, stateFalse) =
705 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
706
707 if (stateTrue && !stateFalse) {
708 if (BufferOverlap.isEnabled()) {
709 // If the values are known to be equal, that's automatically an overlap.
710 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
711 return nullptr;
712 }
713 // The analyzer proved that the two pointers are equal, which guarantees
714 // overlap. When BufferOverlap is disabled, we return the original state
715 // instead of nullptr (to avoid stopping the path) or stateTrue (which
716 // would encode the equality constraint). This creates an inconsistency:
717 // callers treat any non-null return as "no overlap found" and proceed
718 // with subsequent modeling (e.g. memcpy side effects), even though the
719 // operation has undefined behavior. Additionally, returning `state` instead
720 // of `stateTrue` discards the pointer-equality constraint, making the
721 // analysis less precise.
722 // FIXME: At minimum, return stateTrue to preserve the equality
723 // constraint. Ideally, return nullptr to stop the path unconditionally,
724 // since overlap is proven regardless of whether we report it.
725 return state;
726 }
727
728 // assume the two expressions are not equal.
729 assert(stateFalse);
730 state = stateFalse;
731
732 // Which value comes first?
733 QualType cmpTy = svalBuilder.getConditionType();
734 SVal reverse =
735 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
736 std::optional<DefinedOrUnknownSVal> reverseTest =
737 reverse.getAs<DefinedOrUnknownSVal>();
738 if (!reverseTest)
739 return state;
740
741 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
742 if (stateTrue) {
743 if (stateFalse) {
744 // If we don't know which one comes first, we can't perform this test.
745 return state;
746 } else {
747 // Switch the values so that firstVal is before secondVal.
748 std::swap(firstLoc, secondLoc);
749
750 // Switch the Exprs as well, so that they still correspond.
751 std::swap(First, Second);
752 }
753 }
754
755 // Get the length, and make sure it too is known.
756 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
757 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
758 if (!Length)
759 return state;
760
761 // Convert the first buffer's start address to char*.
762 // Bail out if the cast fails.
763 ASTContext &Ctx = svalBuilder.getContext();
764 QualType CharPtrTy = getCharPtrType(Ctx, CK);
765 SVal FirstStart =
766 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
767 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
768 if (!FirstStartLoc)
769 return state;
770
771 // Compute the end of the first buffer. Bail out if THAT fails.
772 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
773 *Length, CharPtrTy);
774 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
775 if (!FirstEndLoc)
776 return state;
777
778 // Is the end of the first buffer past the start of the second buffer?
779 SVal Overlap =
780 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
781 std::optional<DefinedOrUnknownSVal> OverlapTest =
782 Overlap.getAs<DefinedOrUnknownSVal>();
783 if (!OverlapTest)
784 return state;
785
786 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
787
788 if (stateTrue && !stateFalse) {
789 if (BufferOverlap.isEnabled()) {
790 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
791 return nullptr;
792 }
793 // The analyzer proved that the end of the first buffer is past the start
794 // of the second, which means the buffers overlap. This is the same
795 // inconsistency as the equal-pointers case above: when BufferOverlap is
796 // disabled, we return the original state, so callers cannot distinguish
797 // "proven overlap" from "couldn't determine anything" and will proceed
798 // to model side effects (e.g. memcpy) on a path with proven UB.
799 // Returning `stateTrue` would at least preserve the overlap constraint;
800 // returning nullptr would correctly terminate the path.
801 // FIXME: Return nullptr unconditionally once BufferOverlap is stable.
802 return state;
803 }
804
805 // assume the two expressions don't overlap.
806 assert(stateFalse);
807 return stateFalse;
808}
809
810void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
811 const Stmt *First,
812 const Stmt *Second) const {
813 assert(BufferOverlap.isEnabled() &&
814 "Can't emit from a checker that is not enabled!");
815 ExplodedNode *N = C.generateErrorNode(state);
816 if (!N)
817 return;
818
819 // Generate a report for this bug.
820 auto report = std::make_unique<PathSensitiveBugReport>(
821 BufferOverlap, "Arguments must not be overlapping buffers", N);
822 report->addRange(First->getSourceRange());
823 report->addRange(Second->getSourceRange());
824
825 C.emitReport(std::move(report));
826}
827
828void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
829 const Stmt *S, StringRef WarningMsg) const {
830 assert(NullArg.isEnabled() &&
831 "Can't emit from a checker that is not enabled!");
832 if (ExplodedNode *N = C.generateErrorNode(State)) {
833 auto Report =
834 std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
835 Report->addRange(S->getSourceRange());
836 if (const auto *Ex = dyn_cast<Expr>(S))
838 C.emitReport(std::move(Report));
839 }
840}
841
842void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
843 ProgramStateRef State,
844 const Expr *E, const MemRegion *R,
845 StringRef Msg) const {
846 assert(UninitializedRead.isEnabled() &&
847 "Can't emit from a checker that is not enabled!");
848 if (ExplodedNode *N = C.generateErrorNode(State)) {
849 auto Report =
850 std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
851 Report->addNote("Other elements might also be undefined",
852 Report->getLocation());
853 Report->addRange(E->getSourceRange());
855 Report->addVisitor<NoStoreFuncVisitor>(R->castAs<SubRegion>());
856 C.emitReport(std::move(Report));
857 }
858}
859
860void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
861 ProgramStateRef State, const Stmt *S,
862 StringRef WarningMsg) const {
863 assert(OutOfBounds.isEnabled() &&
864 "Can't emit from a checker that is not enabled!");
865 if (ExplodedNode *N = C.generateErrorNode(State)) {
866 // FIXME: It would be nice to eventually make this diagnostic more clear,
867 // e.g., by referencing the original declaration or by saying *why* this
868 // reference is outside the range.
869 auto Report =
870 std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
871 Report->addRange(S->getSourceRange());
872 C.emitReport(std::move(Report));
873 }
874}
875
876void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
877 const Stmt *S,
878 StringRef WarningMsg) const {
879 assert(NotNullTerm.isEnabled() &&
880 "Can't emit from a checker that is not enabled!");
881 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
882 auto Report =
883 std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
884
885 Report->addRange(S->getSourceRange());
886 C.emitReport(std::move(Report));
887 }
888}
889
890ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
891 ProgramStateRef state,
892 NonLoc left,
893 NonLoc right) const {
894 // If a previous check has failed, propagate the failure.
895 if (!state)
896 return nullptr;
897
898 SValBuilder &svalBuilder = C.getSValBuilder();
899 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
900
901 QualType sizeTy = svalBuilder.getContext().getSizeType();
902 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
903 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
904
905 SVal maxMinusRight;
906 if (isa<nonloc::ConcreteInt>(right)) {
907 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
908 sizeTy);
909 } else {
910 // Try switching the operands. (The order of these two assignments is
911 // important!)
912 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
913 sizeTy);
914 left = right;
915 }
916
917 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
918 QualType cmpTy = svalBuilder.getConditionType();
919 // If left > max - right, we have an overflow.
920 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
921 *maxMinusRightNL, cmpTy);
922
923 auto [StateOverflow, StateOkay] =
924 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
925
926 if (StateOverflow && !StateOkay) {
927 // On this path the analyzer is convinced that the addition of these two
928 // values would overflow `size_t` which must be caused by the inaccuracy
929 // of our modeling because this method is called in situations where the
930 // summands are size/length values which are much less than SIZE_MAX. To
931 // avoid false positives let's just sink this invalid path.
932 C.addSink(StateOverflow);
933 return nullptr;
934 }
935
936 // From now on, assume an overflow didn't occur.
937 assert(StateOkay);
938 state = StateOkay;
939 }
940
941 return state;
942}
943
944ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
945 const MemRegion *MR,
946 SVal strLength) {
947 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
948
949 MR = MR->StripCasts();
950
951 switch (MR->getKind()) {
952 case MemRegion::StringRegionKind:
953 // FIXME: This can happen if we strcpy() into a string region. This is
954 // undefined [C99 6.4.5p6], but we should still warn about it.
955 return state;
956
957 case MemRegion::SymbolicRegionKind:
958 case MemRegion::AllocaRegionKind:
959 case MemRegion::NonParamVarRegionKind:
960 case MemRegion::ParamVarRegionKind:
961 case MemRegion::FieldRegionKind:
962 case MemRegion::ObjCIvarRegionKind:
963 // These are the types we can currently track string lengths for.
964 break;
965
966 case MemRegion::ElementRegionKind:
967 // FIXME: Handle element regions by upper-bounding the parent region's
968 // string length.
969 return state;
970
971 default:
972 // Other regions (mostly non-data) can't have a reliable C string length.
973 // For now, just ignore the change.
974 // FIXME: These are rare but not impossible. We should output some kind of
975 // warning for things like strcpy((char[]){'a', 0}, "b");
976 return state;
977 }
978
979 if (strLength.isUnknown())
980 return state->remove<CStringLength>(MR);
981
982 return state->set<CStringLength>(MR, strLength);
983}
984
985SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
986 ProgramStateRef &state,
987 const Expr *Ex,
988 const MemRegion *MR,
989 bool hypothetical) {
990 if (!hypothetical) {
991 // If there's a recorded length, go ahead and return it.
992 const SVal *Recorded = state->get<CStringLength>(MR);
993 if (Recorded)
994 return *Recorded;
995 }
996
997 // Otherwise, get a new symbol and update the state.
998 SValBuilder &svalBuilder = C.getSValBuilder();
999 QualType sizeTy = svalBuilder.getContext().getSizeType();
1000 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
1001 MR, Ex, sizeTy,
1002 C.getLocationContext(),
1003 C.blockCount());
1004
1005 if (!hypothetical) {
1006 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1007 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1008 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1009 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
1010 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
1011 std::optional<APSIntPtr> maxLengthInt =
1012 BVF.evalAPSInt(BO_Div, maxValInt, fourInt);
1013 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
1014 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
1015 svalBuilder.getConditionType());
1016 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
1017 }
1018 state = state->set<CStringLength>(MR, strLength);
1019 }
1020
1021 return strLength;
1022}
1023
1024const StringLiteral *
1025CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
1026 switch (MR->getKind()) {
1027 case MemRegion::StringRegionKind:
1028 return cast<StringRegion>(MR)->getStringLiteral();
1029 case MemRegion::NonParamVarRegionKind:
1030 if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1031 Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
1032 return dyn_cast_or_null<StringLiteral>(Decl->getInit());
1033 return nullptr;
1034 default:
1035 return nullptr;
1036 }
1037}
1038
1039SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1040 const Expr *Ex, SVal Buf,
1041 bool hypothetical) const {
1042 const MemRegion *MR = Buf.getAsRegion();
1043 if (!MR) {
1044 // If we can't get a region, see if it's something we /know/ isn't a
1045 // C string. In the context of locations, the only time we can issue such
1046 // a warning is for labels.
1047 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1048 if (NotNullTerm.isEnabled()) {
1049 SmallString<120> buf;
1050 llvm::raw_svector_ostream os(buf);
1051 assert(CurrentFunctionDescription);
1052 os << "Argument to " << CurrentFunctionDescription
1053 << " is the address of the label '" << Label->getLabel()->getName()
1054 << "', which is not a null-terminated string";
1055
1056 emitNotCStringBug(C, state, Ex, os.str());
1057 }
1058 return UndefinedVal();
1059 }
1060
1061 // If it's not a region and not a label, give up.
1062 return UnknownVal();
1063 }
1064
1065 // If we have a region, strip casts from it and see if we can figure out
1066 // its length. For anything we can't figure out, just return UnknownVal.
1067 MR = MR->StripCasts();
1068
1069 if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
1070 // If we have a global constant with a string literal initializer,
1071 // compute the initializer's length.
1072 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1073 // so we can assume that the byte length is the correct C string length.
1074 // FIXME: Embedded null characters are not handled.
1075 SValBuilder &SVB = C.getSValBuilder();
1076 return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
1077 }
1078
1079 switch (MR->getKind()) {
1080 case MemRegion::StringRegionKind:
1081 case MemRegion::NonParamVarRegionKind:
1082 case MemRegion::SymbolicRegionKind:
1083 case MemRegion::AllocaRegionKind:
1084 case MemRegion::ParamVarRegionKind:
1085 case MemRegion::FieldRegionKind:
1086 case MemRegion::ObjCIvarRegionKind:
1087 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1088 case MemRegion::CompoundLiteralRegionKind:
1089 // FIXME: Can we track this? Is it necessary?
1090 return UnknownVal();
1091 case MemRegion::ElementRegionKind: {
1092 // If an offset into the string literal is used, use the original length
1093 // minus the offset.
1094 // FIXME: Embedded null characters are not handled.
1095 const ElementRegion *ER = cast<ElementRegion>(MR);
1096 const SubRegion *SuperReg =
1098 const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
1099 if (!StrLit)
1100 return UnknownVal();
1101 SValBuilder &SVB = C.getSValBuilder();
1102 NonLoc Idx = ER->getIndex();
1103 QualType SizeTy = SVB.getContext().getSizeType();
1104 NonLoc LengthVal =
1105 SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>();
1106 if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
1107 SVB.getConditionType())
1108 .castAs<DefinedOrUnknownSVal>(),
1109 true))
1110 return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy);
1111 return UnknownVal();
1112 }
1113 default:
1114 // Other regions (mostly non-data) can't have a reliable C string length.
1115 // In this case, an error is emitted and UndefinedVal is returned.
1116 // The caller should always be prepared to handle this case.
1117 if (NotNullTerm.isEnabled()) {
1118 SmallString<120> buf;
1119 llvm::raw_svector_ostream os(buf);
1120
1121 assert(CurrentFunctionDescription);
1122 os << "Argument to " << CurrentFunctionDescription << " is ";
1123
1124 if (SummarizeRegion(os, C.getASTContext(), MR))
1125 os << ", which is not a null-terminated string";
1126 else
1127 os << "not a null-terminated string";
1128
1129 emitNotCStringBug(C, state, Ex, os.str());
1130 }
1131 return UndefinedVal();
1132 }
1133}
1134
1135const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1136 ProgramStateRef &state, const Expr *expr, SVal val) const {
1137 // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
1138
1139 // Get the memory region pointed to by the val.
1140 const MemRegion *bufRegion = val.getAsRegion();
1141 if (!bufRegion)
1142 return nullptr;
1143
1144 // Strip casts off the memory region.
1145 bufRegion = bufRegion->StripCasts();
1146
1147 // Cast the memory region to a string region.
1148 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1149 if (!strRegion)
1150 return nullptr;
1151
1152 // Return the actual string in the string region.
1153 return strRegion->getStringLiteral();
1154}
1155
1156bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1157 SVal BufVal, QualType BufTy,
1158 SVal LengthVal, QualType LengthTy) {
1159 // If we do not know that the buffer is long enough we return 'true'.
1160 // Otherwise the parent region of this field region would also get
1161 // invalidated, which would lead to warnings based on an unknown state.
1162
1163 if (LengthVal.isUnknown())
1164 return false;
1165
1166 // Originally copied from CheckBufferAccess and CheckLocation.
1167 SValBuilder &SB = C.getSValBuilder();
1168 ASTContext &Ctx = C.getASTContext();
1169
1170 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1171
1172 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1173 if (!Length)
1174 return true; // cf top comment.
1175
1176 // Compute the offset of the last element to be accessed: size-1.
1177 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1178 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1179 if (Offset.isUnknown())
1180 return true; // cf top comment
1181 NonLoc LastOffset = Offset.castAs<NonLoc>();
1182
1183 // Check that the first buffer is sufficiently long.
1184 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1185 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1186 if (!BufLoc)
1187 return true; // cf top comment.
1188
1189 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1190
1191 // Check for out of bound array element access.
1192 const MemRegion *R = BufEnd.getAsRegion();
1193 if (!R)
1194 return true; // cf top comment.
1195
1196 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1197 if (!ER)
1198 return true; // cf top comment.
1199
1200 // Support library functions defined with non-default address spaces
1201 assert(ER->getValueType()->getCanonicalTypeUnqualified() ==
1202 C.getASTContext().CharTy &&
1203 "isFirstBufInBound should only be called with char* ElementRegions");
1204
1205 // Get the size of the array.
1206 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1207 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1208
1209 // Get the index of the accessed element.
1210 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1211
1212 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1213
1214 return static_cast<bool>(StInBound);
1215}
1216
1217ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1218 CheckerContext &C, ProgramStateRef S, const Expr *BufE,
1219 ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) {
1220 auto InvalidationTraitOperations =
1221 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1222 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1223 // If destination buffer is a field region and access is in bound, do
1224 // not invalidate its super region.
1225 if (MemRegion::FieldRegionKind == R->getKind() &&
1226 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1227 ITraits.setTrait(
1228 R,
1230 }
1231 return false;
1232 };
1233
1234 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1235}
1236
1238CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1239 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1240 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1241 const MemRegion *R) {
1242 return isa<FieldRegion>(R);
1243 };
1244
1245 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1246}
1247
1248ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1249 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1250 auto InvalidationTraitOperations =
1251 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1252 if (MemRegion::FieldRegionKind == R->getKind())
1253 ITraits.setTrait(
1254 R,
1256 return false;
1257 };
1258
1259 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1260}
1261
1262ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1264 ConstCFGElementRef Elem,
1265 SVal BufV) {
1266 auto InvalidationTraitOperations =
1267 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1268 ITraits.setTrait(
1269 R->getBaseRegion(),
1271 ITraits.setTrait(R,
1273 return true;
1274 };
1275
1276 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1277}
1278
1279ProgramStateRef CStringChecker::invalidateBufferAux(
1280 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
1281 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1282 const MemRegion *)>
1283 InvalidationTraitOperations) {
1284 std::optional<Loc> L = V.getAs<Loc>();
1285 if (!L)
1286 return State;
1287
1288 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1289 // some assumptions about the value that CFRefCount can't. Even so, it should
1290 // probably be refactored.
1291 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1292 const MemRegion *R = MR->getRegion()->StripCasts();
1293
1294 // Are we dealing with an ElementRegion? If so, we should be invalidating
1295 // the super-region.
1296 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1297 R = ER->getSuperRegion();
1298 // FIXME: What about layers of ElementRegions?
1299 }
1300
1301 // Invalidate this region.
1302 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1303 RegionAndSymbolInvalidationTraits ITraits;
1304 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1305
1306 return State->invalidateRegions(R, Elem, C.blockCount(), LCtx,
1307 CausesPointerEscape, nullptr, nullptr,
1308 &ITraits);
1309 }
1310
1311 // If we have a non-region value by chance, just remove the binding.
1312 // FIXME: is this necessary or correct? This handles the non-Region
1313 // cases. Is it ever valid to store to these?
1314 return State->killBinding(*L);
1315}
1316
1317bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1318 const MemRegion *MR) {
1319 switch (MR->getKind()) {
1320 case MemRegion::FunctionCodeRegionKind: {
1321 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1322 os << "the address of the function '" << *FD << '\'';
1323 else
1324 os << "the address of a function";
1325 return true;
1326 }
1327 case MemRegion::BlockCodeRegionKind:
1328 os << "block text";
1329 return true;
1330 case MemRegion::BlockDataRegionKind:
1331 os << "a block";
1332 return true;
1333 case MemRegion::CXXThisRegionKind:
1334 case MemRegion::CXXTempObjectRegionKind:
1335 os << "a C++ temp object of type "
1336 << cast<TypedValueRegion>(MR)->getValueType();
1337 return true;
1338 case MemRegion::NonParamVarRegionKind:
1339 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1340 return true;
1341 case MemRegion::ParamVarRegionKind:
1342 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1343 return true;
1344 case MemRegion::FieldRegionKind:
1345 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1346 return true;
1347 case MemRegion::ObjCIvarRegionKind:
1348 os << "an instance variable of type "
1349 << cast<TypedValueRegion>(MR)->getValueType();
1350 return true;
1351 default:
1352 return false;
1353 }
1354}
1355
1356bool CStringChecker::memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
1357 SVal CharVal, const Expr *Size,
1358 CheckerContext &C, ProgramStateRef &State) {
1359 SVal MemVal = C.getSVal(DstBuffer);
1360 SVal SizeVal = C.getSVal(Size);
1361 const MemRegion *MR = MemVal.getAsRegion();
1362 if (!MR)
1363 return false;
1364
1365 // We're about to model memset by producing a "default binding" in the Store.
1366 // Our current implementation - RegionStore - doesn't support default bindings
1367 // that don't cover the whole base region. So we should first get the offset
1368 // and the base region to figure out whether the offset of buffer is 0.
1369 RegionOffset Offset = MR->getAsOffset();
1370 const MemRegion *BR = Offset.getRegion();
1371
1372 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1373 if (!SizeNL)
1374 return false;
1375
1376 SValBuilder &svalBuilder = C.getSValBuilder();
1377 ASTContext &Ctx = C.getASTContext();
1378
1379 // void *memset(void *dest, int ch, size_t count);
1380 // For now we can only handle the case of offset is 0 and concrete char value.
1381 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1382 Offset.getOffset() == 0) {
1383 // Get the base region's size.
1384 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1385
1386 ProgramStateRef StateWholeReg, StateNotWholeReg;
1387 std::tie(StateWholeReg, StateNotWholeReg) =
1388 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1389
1390 // With the semantic of 'memset()', we should convert the CharVal to
1391 // unsigned char.
1392 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1393
1394 ProgramStateRef StateNullChar, StateNonNullChar;
1395 std::tie(StateNullChar, StateNonNullChar) =
1396 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1397
1398 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1399 !StateNonNullChar) {
1400 // If the 'memset()' acts on the whole region of destination buffer and
1401 // the value of the second argument of 'memset()' is zero, bind the second
1402 // argument's value to the destination buffer with 'default binding'.
1403 // FIXME: Since there is no perfect way to bind the non-zero character, we
1404 // can only deal with zero value here. In the future, we need to deal with
1405 // the binding of non-zero value in the case of whole region.
1406 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1407 C.getLocationContext());
1408 } else {
1409 // If the destination buffer's extent is not equal to the value of
1410 // third argument, just invalidate buffer.
1411 State = invalidateDestinationBufferBySize(
1412 C, State, DstBuffer, Elem, MemVal, SizeVal, Size->getType());
1413 }
1414
1415 if (StateNullChar && !StateNonNullChar) {
1416 // If the value of the second argument of 'memset()' is zero, set the
1417 // string length of destination buffer to 0 directly.
1418 State = setCStringLength(State, MR,
1419 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1420 } else if (!StateNullChar && StateNonNullChar) {
1421 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1422 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1423 C.getLocationContext(), C.blockCount());
1424
1425 // If the value of second argument is not zero, then the string length
1426 // is at least the size argument.
1427 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1428 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1429
1430 State = setCStringLength(
1431 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1432 MR, NewStrLen);
1433 }
1434 } else {
1435 // If the offset is not zero and char value is not concrete, we can do
1436 // nothing but invalidate the buffer.
1437 State = invalidateDestinationBufferBySize(C, State, DstBuffer, Elem, MemVal,
1438 SizeVal, Size->getType());
1439 }
1440 return true;
1441}
1442
1443//===----------------------------------------------------------------------===//
1444// evaluation of individual function calls.
1445//===----------------------------------------------------------------------===//
1446
1447void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1448 ProgramStateRef state, SizeArgExpr Size,
1449 DestinationArgExpr Dest,
1450 SourceArgExpr Source, bool Restricted,
1451 bool IsMempcpy, CharKind CK) const {
1452 CurrentFunctionDescription = "memory copy function";
1453
1454 // See if the size argument is zero.
1455 const LocationContext *LCtx = C.getLocationContext();
1456 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1457 QualType sizeTy = Size.Expression->getType();
1458
1459 ProgramStateRef stateZeroSize, stateNonZeroSize;
1460 std::tie(stateZeroSize, stateNonZeroSize) =
1461 assumeZero(C, state, sizeVal, sizeTy);
1462
1463 // Get the value of the Dest.
1464 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1465
1466 // If the size is zero, there won't be any actual memory access, so
1467 // just bind the return value to the destination buffer and return.
1468 if (stateZeroSize && !stateNonZeroSize) {
1469 stateZeroSize =
1470 stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1471 C.addTransition(stateZeroSize);
1472 return;
1473 }
1474
1475 // If the size can be nonzero, we have to check the other arguments.
1476 if (stateNonZeroSize) {
1477 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1478 // to the size of the destination buffer, then emit a warning
1479 // that an attacker may provoke a buffer overflow error.
1480 state = stateNonZeroSize;
1481
1482 // Ensure the destination is not null. If it is NULL there will be a
1483 // NULL pointer dereference.
1484 state = checkNonNull(C, state, Dest, destVal);
1485 if (!state)
1486 return;
1487
1488 // Get the value of the Src.
1489 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1490
1491 // Ensure the source is not null. If it is NULL there will be a
1492 // NULL pointer dereference.
1493 state = checkNonNull(C, state, Source, srcVal);
1494 if (!state)
1495 return;
1496
1497 // Ensure the accesses are valid and that the buffers do not overlap.
1498 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1499 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1500
1501 if (Restricted)
1502 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1503
1504 if (!state)
1505 return;
1506
1507 // If this is mempcpy, get the byte after the last byte copied and
1508 // bind the expr.
1509 if (IsMempcpy) {
1510 // Get the byte after the last byte copied.
1511 SValBuilder &SvalBuilder = C.getSValBuilder();
1512 ASTContext &Ctx = SvalBuilder.getContext();
1513 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1514 SVal DestRegCharVal =
1515 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1516 SVal lastElement = C.getSValBuilder().evalBinOp(
1517 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1518 // If we don't know how much we copied, we can at least
1519 // conjure a return value for later.
1520 if (lastElement.isUnknown())
1521 lastElement = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1522
1523 // The byte after the last byte copied is the return value.
1524 state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1525 } else {
1526 // All other copies return the destination buffer.
1527 // (Well, bcopy() has a void return type, but this won't hurt.)
1528 state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1529 }
1530
1531 // Invalidate the destination (regular invalidation without pointer-escaping
1532 // the address of the top-level region).
1533 // FIXME: Even if we can't perfectly model the copy, we should see if we
1534 // can use LazyCompoundVals to copy the source values into the destination.
1535 // This would probably remove any existing bindings past the end of the
1536 // copied region, but that's still an improvement over blank invalidation.
1537 state = invalidateDestinationBufferBySize(
1538 C, state, Dest.Expression, Call.getCFGElementRef(),
1539 C.getSVal(Dest.Expression), sizeVal, Size.Expression->getType());
1540
1541 // Invalidate the source (const-invalidation without const-pointer-escaping
1542 // the address of the top-level region).
1543 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(),
1544 C.getSVal(Source.Expression));
1545
1546 C.addTransition(state);
1547 }
1548}
1549
1550void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1551 CharKind CK) const {
1552 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1553 // The return value is the address of the destination buffer.
1554 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1555 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1556 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1557
1558 ProgramStateRef State = C.getState();
1559
1560 constexpr bool IsRestricted = true;
1561 constexpr bool IsMempcpy = false;
1562 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1563}
1564
1565void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1566 CharKind CK) const {
1567 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1568 // The return value is a pointer to the byte following the last written byte.
1569 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1570 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1571 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1572
1573 constexpr bool IsRestricted = true;
1574 constexpr bool IsMempcpy = true;
1575 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1576 IsMempcpy, CK);
1577}
1578
1579void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1580 CharKind CK) const {
1581 // void *memmove(void *dst, const void *src, size_t n);
1582 // The return value is the address of the destination buffer.
1583 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1584 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1585 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1586
1587 constexpr bool IsRestricted = false;
1588 constexpr bool IsMempcpy = false;
1589 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1590 IsMempcpy, CK);
1591}
1592
1593void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1594 // void bcopy(const void *src, void *dst, size_t n);
1595 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1596 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1597 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1598
1599 constexpr bool IsRestricted = false;
1600 constexpr bool IsMempcpy = false;
1601 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1602 IsMempcpy, CharKind::Regular);
1603}
1604
1605void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1606 CharKind CK) const {
1607 // int memcmp(const void *s1, const void *s2, size_t n);
1608 CurrentFunctionDescription = "memory comparison function";
1609
1610 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1611 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1612 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1613
1614 ProgramStateRef State = C.getState();
1615 SValBuilder &Builder = C.getSValBuilder();
1616 const LocationContext *LCtx = C.getLocationContext();
1617
1618 // See if the size argument is zero.
1619 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1620 QualType sizeTy = Size.Expression->getType();
1621
1622 ProgramStateRef stateZeroSize, stateNonZeroSize;
1623 std::tie(stateZeroSize, stateNonZeroSize) =
1624 assumeZero(C, State, sizeVal, sizeTy);
1625
1626 // If the size can be zero, the result will be 0 in that case, and we don't
1627 // have to check either of the buffers.
1628 if (stateZeroSize) {
1629 State = stateZeroSize;
1630 State = State->BindExpr(Call.getOriginExpr(), LCtx,
1631 Builder.makeZeroVal(Call.getResultType()));
1632 C.addTransition(State);
1633 }
1634
1635 // If the size can be nonzero, we have to check the other arguments.
1636 if (stateNonZeroSize) {
1637 State = stateNonZeroSize;
1638 // If we know the two buffers are the same, we know the result is 0.
1639 // First, get the two buffers' addresses. Another checker will have already
1640 // made sure they're not undefined.
1641 DefinedOrUnknownSVal LV =
1642 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1643 DefinedOrUnknownSVal RV =
1644 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1645
1646 // See if they are the same.
1647 ProgramStateRef SameBuffer, NotSameBuffer;
1648 std::tie(SameBuffer, NotSameBuffer) =
1649 State->assume(Builder.evalEQ(State, LV, RV));
1650
1651 // If the two arguments are the same buffer, we know the result is 0,
1652 // and we only need to check one size.
1653 if (SameBuffer && !NotSameBuffer) {
1654 State = SameBuffer;
1655 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1656 if (State) {
1657 State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1658 Builder.makeZeroVal(Call.getResultType()));
1659 C.addTransition(State);
1660 }
1661 return;
1662 }
1663
1664 // If the two arguments might be different buffers, we have to check
1665 // the size of both of them.
1666 assert(NotSameBuffer);
1667 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1668 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1669 if (State) {
1670 // The return value is the comparison result, which we don't know.
1671 SVal CmpV = Builder.conjureSymbolVal(Call, C.blockCount());
1672 State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1673 C.addTransition(State);
1674 }
1675 }
1676}
1677
1678void CStringChecker::evalstrLength(CheckerContext &C,
1679 const CallEvent &Call) const {
1680 // size_t strlen(const char *s);
1681 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1682}
1683
1684void CStringChecker::evalstrnLength(CheckerContext &C,
1685 const CallEvent &Call) const {
1686 // size_t strnlen(const char *s, size_t maxlen);
1687 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1688}
1689
1690void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1691 const CallEvent &Call,
1692 bool IsStrnlen) const {
1693 CurrentFunctionDescription = "string length function";
1694 ProgramStateRef state = C.getState();
1695 const LocationContext *LCtx = C.getLocationContext();
1696
1697 if (IsStrnlen) {
1698 const Expr *maxlenExpr = Call.getArgExpr(1);
1699 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1700
1701 ProgramStateRef stateZeroSize, stateNonZeroSize;
1702 std::tie(stateZeroSize, stateNonZeroSize) =
1703 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1704
1705 // If the size can be zero, the result will be 0 in that case, and we don't
1706 // have to check the string itself.
1707 if (stateZeroSize) {
1708 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1709 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1710 C.addTransition(stateZeroSize);
1711 }
1712
1713 // If the size is GUARANTEED to be zero, we're done!
1714 if (!stateNonZeroSize)
1715 return;
1716
1717 // Otherwise, record the assumption that the size is nonzero.
1718 state = stateNonZeroSize;
1719 }
1720
1721 // Check that the string argument is non-null.
1722 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1723 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1724 state = checkNonNull(C, state, Arg, ArgVal);
1725
1726 if (!state)
1727 return;
1728
1729 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1730
1731 // If the argument isn't a valid C string, there's no valid state to
1732 // transition to.
1733 if (strLength.isUndef())
1734 return;
1735
1736 DefinedOrUnknownSVal result = UnknownVal();
1737
1738 // If the check is for strnlen() then bind the return value to no more than
1739 // the maxlen value.
1740 if (IsStrnlen) {
1741 QualType cmpTy = C.getSValBuilder().getConditionType();
1742
1743 // It's a little unfortunate to be getting this again,
1744 // but it's not that expensive...
1745 const Expr *maxlenExpr = Call.getArgExpr(1);
1746 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1747
1748 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1749 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1750
1751 if (strLengthNL && maxlenValNL) {
1752 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1753
1754 // Check if the strLength is greater than the maxlen.
1755 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1756 C.getSValBuilder()
1757 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1758 .castAs<DefinedOrUnknownSVal>());
1759
1760 if (stateStringTooLong && !stateStringNotTooLong) {
1761 // If the string is longer than maxlen, return maxlen.
1762 result = *maxlenValNL;
1763 } else if (stateStringNotTooLong && !stateStringTooLong) {
1764 // If the string is shorter than maxlen, return its length.
1765 result = *strLengthNL;
1766 }
1767 }
1768
1769 if (result.isUnknown()) {
1770 // If we don't have enough information for a comparison, there's
1771 // no guarantee the full string length will actually be returned.
1772 // All we know is the return value is the min of the string length
1773 // and the limit. This is better than nothing.
1774 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1775 NonLoc resultNL = result.castAs<NonLoc>();
1776
1777 if (strLengthNL) {
1778 state = state->assume(C.getSValBuilder().evalBinOpNN(
1779 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1780 .castAs<DefinedOrUnknownSVal>(), true);
1781 }
1782
1783 if (maxlenValNL) {
1784 state = state->assume(C.getSValBuilder().evalBinOpNN(
1785 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1786 .castAs<DefinedOrUnknownSVal>(), true);
1787 }
1788 }
1789
1790 } else {
1791 // This is a plain strlen(), not strnlen().
1792 result = strLength.castAs<DefinedOrUnknownSVal>();
1793
1794 // If we don't know the length of the string, conjure a return
1795 // value, so it can be used in constraints, at least.
1796 if (result.isUnknown()) {
1797 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1798 }
1799 }
1800
1801 // Bind the return value.
1802 assert(!result.isUnknown() && "Should have conjured a value by now");
1803 state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1804 C.addTransition(state);
1805}
1806
1807void CStringChecker::evalStrcpy(CheckerContext &C,
1808 const CallEvent &Call) const {
1809 // char *strcpy(char *restrict dst, const char *restrict src);
1810 evalStrcpyCommon(C, Call,
1811 /* ReturnEnd = */ false,
1812 /* IsBounded = */ false,
1813 /* appendK = */ ConcatFnKind::none);
1814}
1815
1816void CStringChecker::evalStrncpy(CheckerContext &C,
1817 const CallEvent &Call) const {
1818 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1819 evalStrcpyCommon(C, Call,
1820 /* ReturnEnd = */ false,
1821 /* IsBounded = */ true,
1822 /* appendK = */ ConcatFnKind::none);
1823}
1824
1825void CStringChecker::evalStpcpy(CheckerContext &C,
1826 const CallEvent &Call) const {
1827 // char *stpcpy(char *restrict dst, const char *restrict src);
1828 evalStrcpyCommon(C, Call,
1829 /* ReturnEnd = */ true,
1830 /* IsBounded = */ false,
1831 /* appendK = */ ConcatFnKind::none);
1832}
1833
1834void CStringChecker::evalStrlcpy(CheckerContext &C,
1835 const CallEvent &Call) const {
1836 // size_t strlcpy(char *dest, const char *src, size_t size);
1837 evalStrcpyCommon(C, Call,
1838 /* ReturnEnd = */ true,
1839 /* IsBounded = */ true,
1840 /* appendK = */ ConcatFnKind::none,
1841 /* returnPtr = */ false);
1842}
1843
1844void CStringChecker::evalStrcat(CheckerContext &C,
1845 const CallEvent &Call) const {
1846 // char *strcat(char *restrict s1, const char *restrict s2);
1847 evalStrcpyCommon(C, Call,
1848 /* ReturnEnd = */ false,
1849 /* IsBounded = */ false,
1850 /* appendK = */ ConcatFnKind::strcat);
1851}
1852
1853void CStringChecker::evalStrncat(CheckerContext &C,
1854 const CallEvent &Call) const {
1855 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1856 evalStrcpyCommon(C, Call,
1857 /* ReturnEnd = */ false,
1858 /* IsBounded = */ true,
1859 /* appendK = */ ConcatFnKind::strcat);
1860}
1861
1862void CStringChecker::evalStrlcat(CheckerContext &C,
1863 const CallEvent &Call) const {
1864 // size_t strlcat(char *dst, const char *src, size_t size);
1865 // It will append at most size - strlen(dst) - 1 bytes,
1866 // NULL-terminating the result.
1867 evalStrcpyCommon(C, Call,
1868 /* ReturnEnd = */ false,
1869 /* IsBounded = */ true,
1870 /* appendK = */ ConcatFnKind::strlcat,
1871 /* returnPtr = */ false);
1872}
1873
1874void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1875 bool ReturnEnd, bool IsBounded,
1876 ConcatFnKind appendK,
1877 bool returnPtr) const {
1878 if (appendK == ConcatFnKind::none)
1879 CurrentFunctionDescription = "string copy function";
1880 else
1881 CurrentFunctionDescription = "string concatenation function";
1882
1883 ProgramStateRef state = C.getState();
1884 const LocationContext *LCtx = C.getLocationContext();
1885
1886 // Check that the destination is non-null.
1887 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1888 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1889 state = checkNonNull(C, state, Dst, DstVal);
1890 if (!state)
1891 return;
1892
1893 // Check that the source is non-null.
1894 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1895 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1896 state = checkNonNull(C, state, srcExpr, srcVal);
1897 if (!state)
1898 return;
1899
1900 // Get the string length of the source.
1901 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1902 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1903
1904 // Get the string length of the destination buffer.
1905 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1906 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1907
1908 // If the source isn't a valid C string, give up.
1909 if (strLength.isUndef())
1910 return;
1911
1912 SValBuilder &svalBuilder = C.getSValBuilder();
1913 QualType cmpTy = svalBuilder.getConditionType();
1914 QualType sizeTy = svalBuilder.getContext().getSizeType();
1915
1916 // These two values allow checking two kinds of errors:
1917 // - actual overflows caused by a source that doesn't fit in the destination
1918 // - potential overflows caused by a bound that could exceed the destination
1919 SVal amountCopied = UnknownVal();
1920 SVal maxLastElementIndex = UnknownVal();
1921 const char *boundWarning = nullptr;
1922
1923 // FIXME: Why do we choose the srcExpr if the access has no size?
1924 // Note that the 3rd argument of the call would be the size parameter.
1925 SizeArgExpr SrcExprAsSizeDummy = {
1926 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1927 state = CheckOverlap(
1928 C, state,
1929 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1930 Dst, srcExpr);
1931
1932 if (!state)
1933 return;
1934
1935 // If the function is strncpy, strncat, etc... it is bounded.
1936 if (IsBounded) {
1937 // Get the max number of characters to copy.
1938 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1939 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1940
1941 // Protect against misdeclared strncpy().
1942 lenVal =
1943 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1944
1945 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1946
1947 // If we know both values, we might be able to figure out how much
1948 // we're copying.
1949 if (strLengthNL && lenValNL) {
1950 switch (appendK) {
1951 case ConcatFnKind::none:
1952 case ConcatFnKind::strcat: {
1953 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1954 // Check if the max number to copy is less than the length of the src.
1955 // If the bound is equal to the source length, strncpy won't null-
1956 // terminate the result!
1957 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1958 svalBuilder
1959 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1960 .castAs<DefinedOrUnknownSVal>());
1961
1962 if (stateSourceTooLong && !stateSourceNotTooLong) {
1963 // Max number to copy is less than the length of the src, so the
1964 // actual strLength copied is the max number arg.
1965 state = stateSourceTooLong;
1966 amountCopied = lenVal;
1967
1968 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1969 // The source buffer entirely fits in the bound.
1970 state = stateSourceNotTooLong;
1971 amountCopied = strLength;
1972 }
1973 break;
1974 }
1975 case ConcatFnKind::strlcat:
1976 if (!dstStrLengthNL)
1977 return;
1978
1979 // amountCopied = min (size - dstLen - 1 , srcLen)
1980 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1981 *dstStrLengthNL, sizeTy);
1982 if (!isa<NonLoc>(freeSpace))
1983 return;
1984 freeSpace =
1985 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1986 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1987 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1988
1989 // While unlikely, it is possible that the subtraction is
1990 // too complex to compute, let's check whether it succeeded.
1991 if (!freeSpaceNL)
1992 return;
1993 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1994 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1995
1996 ProgramStateRef TrueState, FalseState;
1997 std::tie(TrueState, FalseState) =
1998 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1999
2000 // srcStrLength <= size - dstStrLength -1
2001 if (TrueState && !FalseState) {
2002 amountCopied = strLength;
2003 }
2004
2005 // srcStrLength > size - dstStrLength -1
2006 if (!TrueState && FalseState) {
2007 amountCopied = freeSpace;
2008 }
2009
2010 if (TrueState && FalseState)
2011 amountCopied = UnknownVal();
2012 break;
2013 }
2014 }
2015 // We still want to know if the bound is known to be too large.
2016 if (lenValNL) {
2017 switch (appendK) {
2018 case ConcatFnKind::strcat:
2019 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2020
2021 // Get the string length of the destination. If the destination is
2022 // memory that can't have a string length, we shouldn't be copying
2023 // into it anyway.
2024 if (dstStrLength.isUndef())
2025 return;
2026
2027 if (dstStrLengthNL) {
2028 maxLastElementIndex = svalBuilder.evalBinOpNN(
2029 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
2030
2031 boundWarning = "Size argument is greater than the free space in the "
2032 "destination buffer";
2033 }
2034 break;
2035 case ConcatFnKind::none:
2036 case ConcatFnKind::strlcat:
2037 // For strncpy and strlcat, this is just checking
2038 // that lenVal <= sizeof(dst).
2039 // (Yes, strncpy and strncat differ in how they treat termination.
2040 // strncat ALWAYS terminates, but strncpy doesn't.)
2041
2042 // We need a special case for when the copy size is zero, in which
2043 // case strncpy will do no work at all. Our bounds check uses n-1
2044 // as the last element accessed, so n == 0 is problematic.
2045 ProgramStateRef StateZeroSize, StateNonZeroSize;
2046 std::tie(StateZeroSize, StateNonZeroSize) =
2047 assumeZero(C, state, *lenValNL, sizeTy);
2048
2049 // If the size is known to be zero, we're done.
2050 if (StateZeroSize && !StateNonZeroSize) {
2051 if (returnPtr) {
2052 StateZeroSize =
2053 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
2054 } else {
2055 if (appendK == ConcatFnKind::none) {
2056 // strlcpy returns strlen(src)
2057 StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
2058 LCtx, strLength);
2059 } else {
2060 // strlcat returns strlen(src) + strlen(dst)
2061 SVal retSize = svalBuilder.evalBinOp(
2062 state, BO_Add, strLength, dstStrLength, sizeTy);
2063 StateZeroSize =
2064 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2065 }
2066 }
2067 C.addTransition(StateZeroSize);
2068 return;
2069 }
2070
2071 // Otherwise, go ahead and figure out the last element we'll touch.
2072 // We don't record the non-zero assumption here because we can't
2073 // be sure. We won't warn on a possible zero.
2074 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2075 maxLastElementIndex =
2076 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2077 boundWarning = "Size argument is greater than the length of the "
2078 "destination buffer";
2079 break;
2080 }
2081 }
2082 } else {
2083 // The function isn't bounded. The amount copied should match the length
2084 // of the source buffer.
2085 amountCopied = strLength;
2086 }
2087
2088 assert(state);
2089
2090 // This represents the number of characters copied into the destination
2091 // buffer. (It may not actually be the strlen if the destination buffer
2092 // is not terminated.)
2093 SVal finalStrLength = UnknownVal();
2094 SVal strlRetVal = UnknownVal();
2095
2096 if (appendK == ConcatFnKind::none && !returnPtr) {
2097 // strlcpy returns the sizeof(src)
2098 strlRetVal = strLength;
2099 }
2100
2101 // If this is an appending function (strcat, strncat...) then set the
2102 // string length to strlen(src) + strlen(dst) since the buffer will
2103 // ultimately contain both.
2104 if (appendK != ConcatFnKind::none) {
2105 // Get the string length of the destination. If the destination is memory
2106 // that can't have a string length, we shouldn't be copying into it anyway.
2107 if (dstStrLength.isUndef())
2108 return;
2109
2110 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2111 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2112 *dstStrLengthNL, sizeTy);
2113 }
2114
2115 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2116
2117 // If we know both string lengths, we might know the final string length.
2118 if (amountCopiedNL && dstStrLengthNL) {
2119 // Make sure the two lengths together don't overflow a size_t.
2120 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2121 if (!state)
2122 return;
2123
2124 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2125 *dstStrLengthNL, sizeTy);
2126 }
2127
2128 // If we couldn't get a single value for the final string length,
2129 // we can at least bound it by the individual lengths.
2130 if (finalStrLength.isUnknown()) {
2131 // Try to get a "hypothetical" string length symbol, which we can later
2132 // set as a real value if that turns out to be the case.
2133 finalStrLength =
2134 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2135 assert(!finalStrLength.isUndef());
2136
2137 if (std::optional<NonLoc> finalStrLengthNL =
2138 finalStrLength.getAs<NonLoc>()) {
2139 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2140 // we overwrite dst string with the src
2141 // finalStrLength >= srcStrLength
2142 SVal sourceInResult = svalBuilder.evalBinOpNN(
2143 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2144 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2145 true);
2146 if (!state)
2147 return;
2148 }
2149
2150 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2151 // we extend the dst string with the src
2152 // finalStrLength >= dstStrLength
2153 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2154 *finalStrLengthNL,
2155 *dstStrLengthNL,
2156 cmpTy);
2157 state =
2158 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2159 if (!state)
2160 return;
2161 }
2162 }
2163 }
2164
2165 } else {
2166 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2167 // the final string length will match the input string length.
2168 finalStrLength = amountCopied;
2169 }
2170
2171 SVal Result;
2172
2173 if (returnPtr) {
2174 // The final result of the function will either be a pointer past the last
2175 // copied element, or a pointer to the start of the destination buffer.
2176 Result = (ReturnEnd ? UnknownVal() : DstVal);
2177 } else {
2178 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2179 //strlcpy, strlcat
2180 Result = strlRetVal;
2181 else
2182 Result = finalStrLength;
2183 }
2184
2185 assert(state);
2186
2187 // If the destination is a MemRegion, try to check for a buffer overflow and
2188 // record the new string length.
2189 if (std::optional<loc::MemRegionVal> dstRegVal =
2190 DstVal.getAs<loc::MemRegionVal>()) {
2191 QualType ptrTy = Dst.Expression->getType();
2192
2193 // If we have an exact value on a bounded copy, use that to check for
2194 // overflows, rather than our estimate about how much is actually copied.
2195 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2196 SVal maxLastElement =
2197 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2198
2199 // Check if the first byte of the destination is writable.
2200 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2201 if (!state)
2202 return;
2203 // Check if the last byte of the destination is writable.
2204 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2205 if (!state)
2206 return;
2207 }
2208
2209 // Then, if the final length is known...
2210 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2211 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2212 *knownStrLength, ptrTy);
2213
2214 // ...and we haven't checked the bound, we'll check the actual copy.
2215 if (!boundWarning) {
2216 // Check if the first byte of the destination is writable.
2217 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2218 if (!state)
2219 return;
2220 // Check if the last byte of the destination is writable.
2221 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2222 if (!state)
2223 return;
2224 }
2225
2226 // If this is a stpcpy-style copy, the last element is the return value.
2227 if (returnPtr && ReturnEnd)
2228 Result = lastElement;
2229 }
2230
2231 // For bounded method, amountCopied take the minimum of two values,
2232 // for ConcatFnKind::strlcat:
2233 // amountCopied = min (size - dstLen - 1 , srcLen)
2234 // for others:
2235 // amountCopied = min (srcLen, size)
2236 // So even if we don't know about amountCopied, as long as one of them will
2237 // not cause an out-of-bound access, the whole function's operation will not
2238 // too, that will avoid invalidating the superRegion of data member in that
2239 // situation.
2240 bool CouldAccessOutOfBound = true;
2241 if (IsBounded && amountCopied.isUnknown()) {
2242 auto CouldAccessOutOfBoundForSVal =
2243 [&](std::optional<NonLoc> Val) -> bool {
2244 if (!Val)
2245 return true;
2246 return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2247 Dst.Expression->getType(), *Val,
2248 C.getASTContext().getSizeType());
2249 };
2250
2251 CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2252
2253 if (CouldAccessOutOfBound) {
2254 // Get the max number of characters to copy.
2255 const Expr *LenExpr = Call.getArgExpr(2);
2256 SVal LenVal = state->getSVal(LenExpr, LCtx);
2257
2258 // Protect against misdeclared strncpy().
2259 LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2260
2261 // Because analyzer doesn't handle expressions like `size -
2262 // dstLen - 1` very well, we roughly use `size` for
2263 // ConcatFnKind::strlcat here, same with other concat kinds.
2264 CouldAccessOutOfBound =
2265 CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2266 }
2267 }
2268
2269 // Invalidate the destination (regular invalidation without pointer-escaping
2270 // the address of the top-level region). This must happen before we set the
2271 // C string length because invalidation will clear the length.
2272 // FIXME: Even if we can't perfectly model the copy, we should see if we
2273 // can use LazyCompoundVals to copy the source values into the destination.
2274 // This would probably remove any existing bindings past the end of the
2275 // string, but that's still an improvement over blank invalidation.
2276 if (CouldAccessOutOfBound)
2277 state = invalidateDestinationBufferBySize(
2278 C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2279 amountCopied, C.getASTContext().getSizeType());
2280 else
2281 state = invalidateDestinationBufferNeverOverflows(
2282 C, state, Call.getCFGElementRef(), *dstRegVal);
2283
2284 // Invalidate the source (const-invalidation without const-pointer-escaping
2285 // the address of the top-level region).
2286 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(), srcVal);
2287
2288 // Set the C string length of the destination, if we know it.
2289 if (IsBounded && (appendK == ConcatFnKind::none)) {
2290 // strncpy is annoying in that it doesn't guarantee to null-terminate
2291 // the result string. If the original string didn't fit entirely inside
2292 // the bound (including the null-terminator), we don't know how long the
2293 // result is.
2294 if (amountCopied != strLength)
2295 finalStrLength = UnknownVal();
2296 }
2297 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2298 }
2299
2300 assert(state);
2301
2302 if (returnPtr) {
2303 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2304 // overflow, we still need a result. Conjure a return value.
2305 if (ReturnEnd && Result.isUnknown()) {
2306 Result = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2307 }
2308 }
2309 // Set the return value.
2310 state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2311 C.addTransition(state);
2312}
2313
2314void CStringChecker::evalStrxfrm(CheckerContext &C,
2315 const CallEvent &Call) const {
2316 // size_t strxfrm(char *dest, const char *src, size_t n);
2317 CurrentFunctionDescription = "locale transformation function";
2318
2319 ProgramStateRef State = C.getState();
2320 const LocationContext *LCtx = C.getLocationContext();
2321 SValBuilder &SVB = C.getSValBuilder();
2322
2323 // Get arguments
2324 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2325 SourceArgExpr Source = {{Call.getArgExpr(1), 1}};
2326 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2327
2328 // `src` can never be null
2329 SVal SrcVal = State->getSVal(Source.Expression, LCtx);
2330 State = checkNonNull(C, State, Source, SrcVal);
2331 if (!State)
2332 return;
2333
2334 // Buffer must not overlap
2335 State = CheckOverlap(C, State, Size, Dest, Source, CK_Regular);
2336 if (!State)
2337 return;
2338
2339 // The function returns an implementation-defined length needed for
2340 // transformation
2341 SVal RetVal = SVB.conjureSymbolVal(Call, C.blockCount());
2342
2343 auto BindReturnAndTransition = [&RetVal, &Call, LCtx,
2344 &C](ProgramStateRef State) {
2345 if (State) {
2346 State = State->BindExpr(Call.getOriginExpr(), LCtx, RetVal);
2347 C.addTransition(State);
2348 }
2349 };
2350
2351 // Check if size is zero
2352 SVal SizeVal = State->getSVal(Size.Expression, LCtx);
2353 QualType SizeTy = Size.Expression->getType();
2354
2355 auto [StateZeroSize, StateSizeNonZero] =
2356 assumeZero(C, State, SizeVal, SizeTy);
2357
2358 // We can't assume anything about size, just bind the return value and be done
2359 if (!StateZeroSize && !StateSizeNonZero)
2360 return BindReturnAndTransition(State);
2361
2362 // If `n` is 0, we just return the implementation defined length
2363 if (StateZeroSize && !StateSizeNonZero)
2364 return BindReturnAndTransition(StateZeroSize);
2365
2366 // If `n` is not 0, `dest` can not be null.
2367 SVal DestVal = StateSizeNonZero->getSVal(Dest.Expression, LCtx);
2368 StateSizeNonZero = checkNonNull(C, StateSizeNonZero, Dest, DestVal);
2369 if (!StateSizeNonZero)
2370 return;
2371
2372 // Check that we can write to the destination buffer
2373 StateSizeNonZero = CheckBufferAccess(C, StateSizeNonZero, Dest, Size,
2374 AccessKind::write, CK_Regular);
2375 if (!StateSizeNonZero)
2376 return;
2377
2378 // Success: return value < `n`
2379 // Failure: return value >= `n`
2380 auto ComparisonVal = SVB.evalBinOp(StateSizeNonZero, BO_LT, RetVal, SizeVal,
2381 SVB.getConditionType())
2382 .getAs<DefinedOrUnknownSVal>();
2383 if (!ComparisonVal) {
2384 // Fallback: invalidate the buffer.
2385 StateSizeNonZero = invalidateDestinationBufferBySize(
2386 C, StateSizeNonZero, Dest.Expression, Call.getCFGElementRef(), DestVal,
2387 SizeVal, Size.Expression->getType());
2388 return BindReturnAndTransition(StateSizeNonZero);
2389 }
2390
2391 auto [StateSuccess, StateFailure] = StateSizeNonZero->assume(*ComparisonVal);
2392
2393 if (StateSuccess) {
2394 // The transformation invalidated the buffer.
2395 StateSuccess = invalidateDestinationBufferBySize(
2396 C, StateSuccess, Dest.Expression, Call.getCFGElementRef(), DestVal,
2397 SizeVal, Size.Expression->getType());
2398 BindReturnAndTransition(StateSuccess);
2399 // Fallthrough: We also want to add a transition to the failure state below.
2400 }
2401
2402 if (StateFailure) {
2403 // `dest` buffer content is undefined
2404 if (auto DestLoc = DestVal.getAs<loc::MemRegionVal>()) {
2405 StateFailure = StateFailure->killBinding(*DestLoc);
2406 StateFailure =
2407 StateFailure->bindDefaultInitial(*DestLoc, UndefinedVal{}, LCtx);
2408 }
2409
2410 BindReturnAndTransition(StateFailure);
2411 }
2412}
2413
2414void CStringChecker::evalStrcmp(CheckerContext &C,
2415 const CallEvent &Call) const {
2416 //int strcmp(const char *s1, const char *s2);
2417 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2418}
2419
2420void CStringChecker::evalStrncmp(CheckerContext &C,
2421 const CallEvent &Call) const {
2422 //int strncmp(const char *s1, const char *s2, size_t n);
2423 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2424}
2425
2426void CStringChecker::evalStrcasecmp(CheckerContext &C,
2427 const CallEvent &Call) const {
2428 //int strcasecmp(const char *s1, const char *s2);
2429 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2430}
2431
2432void CStringChecker::evalStrncasecmp(CheckerContext &C,
2433 const CallEvent &Call) const {
2434 //int strncasecmp(const char *s1, const char *s2, size_t n);
2435 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2436}
2437
2438void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2439 bool IsBounded, bool IgnoreCase) const {
2440 CurrentFunctionDescription = "string comparison function";
2441 ProgramStateRef state = C.getState();
2442 const LocationContext *LCtx = C.getLocationContext();
2443
2444 // Check that the first string is non-null
2445 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2446 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2447 state = checkNonNull(C, state, Left, LeftVal);
2448 if (!state)
2449 return;
2450
2451 // Check that the second string is non-null.
2452 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2453 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2454 state = checkNonNull(C, state, Right, RightVal);
2455 if (!state)
2456 return;
2457
2458 // Get the string length of the first string or give up.
2459 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2460 if (LeftLength.isUndef())
2461 return;
2462
2463 // Get the string length of the second string or give up.
2464 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2465 if (RightLength.isUndef())
2466 return;
2467
2468 // If we know the two buffers are the same, we know the result is 0.
2469 // First, get the two buffers' addresses. Another checker will have already
2470 // made sure they're not undefined.
2471 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2472 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2473
2474 // See if they are the same.
2475 SValBuilder &svalBuilder = C.getSValBuilder();
2476 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2477 ProgramStateRef StSameBuf, StNotSameBuf;
2478 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2479
2480 // If the two arguments might be the same buffer, we know the result is 0,
2481 // and we only need to check one size.
2482 if (StSameBuf) {
2483 StSameBuf =
2484 StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2485 svalBuilder.makeZeroVal(Call.getResultType()));
2486 C.addTransition(StSameBuf);
2487
2488 // If the two arguments are GUARANTEED to be the same, we're done!
2489 if (!StNotSameBuf)
2490 return;
2491 }
2492
2493 assert(StNotSameBuf);
2494 state = StNotSameBuf;
2495
2496 // At this point we can go about comparing the two buffers.
2497 // For now, we only do this if they're both known string literals.
2498
2499 // Attempt to extract string literals from both expressions.
2500 const StringLiteral *LeftStrLiteral =
2501 getCStringLiteral(C, state, Left.Expression, LeftVal);
2502 const StringLiteral *RightStrLiteral =
2503 getCStringLiteral(C, state, Right.Expression, RightVal);
2504 bool canComputeResult = false;
2505 SVal resultVal = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2506
2507 if (LeftStrLiteral && RightStrLiteral) {
2508 StringRef LeftStrRef = LeftStrLiteral->getString();
2509 StringRef RightStrRef = RightStrLiteral->getString();
2510
2511 if (IsBounded) {
2512 // Get the max number of characters to compare.
2513 const Expr *lenExpr = Call.getArgExpr(2);
2514 SVal lenVal = state->getSVal(lenExpr, LCtx);
2515
2516 // If the length is known, we can get the right substrings.
2517 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2518 // Create substrings of each to compare the prefix.
2519 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2520 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2521 canComputeResult = true;
2522 }
2523 } else {
2524 // This is a normal, unbounded strcmp.
2525 canComputeResult = true;
2526 }
2527
2528 if (canComputeResult) {
2529 // Real strcmp stops at null characters.
2530 size_t s1Term = LeftStrRef.find('\0');
2531 if (s1Term != StringRef::npos)
2532 LeftStrRef = LeftStrRef.substr(0, s1Term);
2533
2534 size_t s2Term = RightStrRef.find('\0');
2535 if (s2Term != StringRef::npos)
2536 RightStrRef = RightStrRef.substr(0, s2Term);
2537
2538 // Use StringRef's comparison methods to compute the actual result.
2539 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2540 : LeftStrRef.compare(RightStrRef);
2541
2542 // The strcmp function returns an integer greater than, equal to, or less
2543 // than zero, [c11, p7.24.4.2].
2544 if (compareRes == 0) {
2545 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2546 }
2547 else {
2548 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2549 // Constrain strcmp's result range based on the result of StringRef's
2550 // comparison methods.
2551 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2552 SVal compareWithZero =
2553 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2554 svalBuilder.getConditionType());
2555 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2556 state = state->assume(compareWithZeroVal, true);
2557 }
2558 }
2559 }
2560
2561 state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2562
2563 // Record this as a possible path.
2564 C.addTransition(state);
2565}
2566
2567void CStringChecker::evalStrsep(CheckerContext &C,
2568 const CallEvent &Call) const {
2569 // char *strsep(char **stringp, const char *delim);
2570 // Verify whether the search string parameter matches the return type.
2571 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2572
2573 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2574 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2575 CharPtrTy.getUnqualifiedType())
2576 return;
2577
2578 CurrentFunctionDescription = "strsep()";
2579 ProgramStateRef State = C.getState();
2580 const LocationContext *LCtx = C.getLocationContext();
2581
2582 // Check that the search string pointer is non-null (though it may point to
2583 // a null string).
2584 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2585 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2586 if (!State)
2587 return;
2588
2589 // Check that the delimiter string is non-null.
2590 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2591 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2592 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2593 if (!State)
2594 return;
2595
2596 SValBuilder &SVB = C.getSValBuilder();
2597 SVal Result;
2598 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2599 // Get the current value of the search string pointer, as a char*.
2600 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2601
2602 // Invalidate the search string, representing the change of one delimiter
2603 // character to NUL.
2604 // As the replacement never overflows, do not invalidate its super region.
2605 State = invalidateDestinationBufferNeverOverflows(
2606 C, State, Call.getCFGElementRef(), Result);
2607
2608 // Overwrite the search string pointer. The new value is either an address
2609 // further along in the same string, or NULL if there are no more tokens.
2610 State = State->bindLoc(*SearchStrLoc,
2611 SVB.conjureSymbolVal(Call, C.blockCount(), getTag()),
2612 LCtx);
2613 } else {
2614 assert(SearchStrVal.isUnknown());
2615 // Conjure a symbolic value. It's the best we can do.
2616 Result = SVB.conjureSymbolVal(Call, C.blockCount());
2617 }
2618
2619 // Set the return value, and finish.
2620 State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2621 C.addTransition(State);
2622}
2623
2624// These should probably be moved into a C++ standard library checker.
2625void CStringChecker::evalStdCopy(CheckerContext &C,
2626 const CallEvent &Call) const {
2627 evalStdCopyCommon(C, Call);
2628}
2629
2630void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2631 const CallEvent &Call) const {
2632 evalStdCopyCommon(C, Call);
2633}
2634
2635void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2636 const CallEvent &Call) const {
2637 if (!Call.getArgExpr(2)->getType()->isPointerType())
2638 return;
2639
2640 ProgramStateRef State = C.getState();
2641
2642 const LocationContext *LCtx = C.getLocationContext();
2643
2644 // template <class _InputIterator, class _OutputIterator>
2645 // _OutputIterator
2646 // copy(_InputIterator __first, _InputIterator __last,
2647 // _OutputIterator __result)
2648
2649 // Invalidate the destination buffer
2650 const Expr *Dst = Call.getArgExpr(2);
2651 SVal DstVal = State->getSVal(Dst, LCtx);
2652 // FIXME: As we do not know how many items are copied, we also invalidate the
2653 // super region containing the target location.
2654 State = invalidateDestinationBufferAlwaysEscapeSuperRegion(
2655 C, State, Call.getCFGElementRef(), DstVal);
2656
2657 SValBuilder &SVB = C.getSValBuilder();
2658
2659 SVal ResultVal = SVB.conjureSymbolVal(Call, C.blockCount());
2660 State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2661
2662 C.addTransition(State);
2663}
2664
2665void CStringChecker::evalMemset(CheckerContext &C,
2666 const CallEvent &Call) const {
2667 // void *memset(void *s, int c, size_t n);
2668 CurrentFunctionDescription = "memory set function";
2669
2670 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2671 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2672 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2673
2674 ProgramStateRef State = C.getState();
2675
2676 // See if the size argument is zero.
2677 const LocationContext *LCtx = C.getLocationContext();
2678 SVal SizeVal = C.getSVal(Size.Expression);
2679 QualType SizeTy = Size.Expression->getType();
2680
2681 ProgramStateRef ZeroSize, NonZeroSize;
2682 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2683
2684 // Get the value of the memory area.
2685 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2686
2687 // If the size is zero, there won't be any actual memory access, so
2688 // just bind the return value to the buffer and return.
2689 if (ZeroSize && !NonZeroSize) {
2690 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2691 C.addTransition(ZeroSize);
2692 return;
2693 }
2694
2695 // Ensure the memory area is not null.
2696 // If it is NULL there will be a NULL pointer dereference.
2697 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2698 if (!State)
2699 return;
2700
2701 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2702 if (!State)
2703 return;
2704
2705 // According to the values of the arguments, bind the value of the second
2706 // argument to the destination buffer and set string length, or just
2707 // invalidate the destination buffer.
2708 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(),
2709 C.getSVal(CharE.Expression), Size.Expression, C, State))
2710 return;
2711
2712 State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2713 C.addTransition(State);
2714}
2715
2716void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2717 CurrentFunctionDescription = "memory clearance function";
2718
2719 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2720 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2721 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2722
2723 ProgramStateRef State = C.getState();
2724
2725 // See if the size argument is zero.
2726 SVal SizeVal = C.getSVal(Size.Expression);
2727 QualType SizeTy = Size.Expression->getType();
2728
2729 ProgramStateRef StateZeroSize, StateNonZeroSize;
2730 std::tie(StateZeroSize, StateNonZeroSize) =
2731 assumeZero(C, State, SizeVal, SizeTy);
2732
2733 // If the size is zero, there won't be any actual memory access,
2734 // In this case we just return.
2735 if (StateZeroSize && !StateNonZeroSize) {
2736 C.addTransition(StateZeroSize);
2737 return;
2738 }
2739
2740 // Get the value of the memory area.
2741 SVal MemVal = C.getSVal(Buffer.Expression);
2742
2743 // Ensure the memory area is not null.
2744 // If it is NULL there will be a NULL pointer dereference.
2745 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2746 if (!State)
2747 return;
2748
2749 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2750 if (!State)
2751 return;
2752
2753 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(), Zero,
2754 Size.Expression, C, State))
2755 return;
2756
2757 C.addTransition(State);
2758}
2759
2760void CStringChecker::evalSprintf(CheckerContext &C,
2761 const CallEvent &Call) const {
2762 CurrentFunctionDescription = "'sprintf'";
2763 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2764}
2765
2766void CStringChecker::evalSnprintf(CheckerContext &C,
2767 const CallEvent &Call) const {
2768 CurrentFunctionDescription = "'snprintf'";
2769 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2770}
2771
2772void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2773 bool IsBounded) const {
2774 ProgramStateRef State = C.getState();
2775 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2776 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2777
2778 const auto NumParams = Call.parameters().size();
2779 if (CE->getNumArgs() < NumParams) {
2780 // This is an invalid call, let's just ignore it.
2781 return;
2782 }
2783
2784 const auto AllArguments =
2785 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2786 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2787
2788 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2789 // We consider only string buffers
2790 if (const QualType type = ArgExpr->getType();
2791 !type->isAnyPointerType() ||
2792 !type->getPointeeType()->isAnyCharacterType())
2793 continue;
2794 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2795
2796 // Ensure the buffers do not overlap.
2797 SizeArgExpr SrcExprAsSizeDummy = {
2798 {Source.Expression, Source.ArgumentIndex}};
2799 State = CheckOverlap(
2800 C, State,
2801 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2802 Dest, Source);
2803 if (!State)
2804 return;
2805 }
2806
2807 C.addTransition(State);
2808}
2809
2810//===----------------------------------------------------------------------===//
2811// The driver method, and other Checker callbacks.
2812//===----------------------------------------------------------------------===//
2813
2814CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2815 CheckerContext &C) const {
2816 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2817 if (!CE)
2818 return nullptr;
2819
2820 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2821 if (!FD)
2822 return nullptr;
2823
2824 if (StdCopy.matches(Call))
2825 return &CStringChecker::evalStdCopy;
2826 if (StdCopyBackward.matches(Call))
2827 return &CStringChecker::evalStdCopyBackward;
2828
2829 // Pro-actively check that argument types are safe to do arithmetic upon.
2830 // We do not want to crash if someone accidentally passes a structure
2831 // into, say, a C++ overload of any of these functions. We could not check
2832 // that for std::copy because they may have arguments of other types.
2833 for (auto I : CE->arguments()) {
2834 QualType T = I->getType();
2835 if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2836 return nullptr;
2837 }
2838
2839 const FnCheck *Callback = Callbacks.lookup(Call);
2840 if (Callback)
2841 return *Callback;
2842
2843 return nullptr;
2844}
2845
2846bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2847 FnCheck Callback = identifyCall(Call, C);
2848
2849 // If the callee isn't a string function, let another checker handle it.
2850 if (!Callback)
2851 return false;
2852
2853 // Check and evaluate the call.
2854 assert(isa<CallExpr>(Call.getOriginExpr()));
2855 Callback(this, C, Call);
2856
2857 // If the evaluate call resulted in no change, chain to the next eval call
2858 // handler.
2859 // Note, the custom CString evaluation calls assume that basic safety
2860 // properties are held. However, if the user chooses to turn off some of these
2861 // checks, we ignore the issues and leave the call evaluation to a generic
2862 // handler.
2863 return C.isDifferent();
2864}
2865
2866void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2867 // Record string length for char a[] = "abc";
2868 ProgramStateRef state = C.getState();
2869
2870 for (const auto *I : DS->decls()) {
2871 const VarDecl *D = dyn_cast<VarDecl>(I);
2872 if (!D)
2873 continue;
2874
2875 // FIXME: Handle array fields of structs.
2876 if (!D->getType()->isArrayType())
2877 continue;
2878
2879 const Expr *Init = D->getInit();
2880 if (!Init)
2881 continue;
2883 continue;
2884
2885 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2886 const MemRegion *MR = VarLoc.getAsRegion();
2887 if (!MR)
2888 continue;
2889
2890 SVal StrVal = C.getSVal(Init);
2891 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2892 DefinedOrUnknownSVal strLength =
2893 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2894
2895 state = state->set<CStringLength>(MR, strLength);
2896 }
2897
2898 C.addTransition(state);
2899}
2900
2902CStringChecker::checkRegionChanges(ProgramStateRef state,
2903 const InvalidatedSymbols *,
2904 ArrayRef<const MemRegion *> ExplicitRegions,
2905 ArrayRef<const MemRegion *> Regions,
2906 const LocationContext *LCtx,
2907 const CallEvent *Call) const {
2908 CStringLengthTy Entries = state->get<CStringLength>();
2909 if (Entries.isEmpty())
2910 return state;
2911
2912 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2913 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2914
2915 // First build sets for the changed regions and their super-regions.
2916 for (const MemRegion *MR : Regions) {
2917 Invalidated.insert(MR);
2918
2919 SuperRegions.insert(MR);
2920 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2921 MR = SR->getSuperRegion();
2922 SuperRegions.insert(MR);
2923 }
2924 }
2925
2926 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2927
2928 // Then loop over the entries in the current state.
2929 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2930 // Is this entry for a super-region of a changed region?
2931 if (SuperRegions.count(MR)) {
2932 Entries = F.remove(Entries, MR);
2933 continue;
2934 }
2935
2936 // Is this entry for a sub-region of a changed region?
2937 const MemRegion *Super = MR;
2938 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2939 Super = SR->getSuperRegion();
2940 if (Invalidated.count(Super)) {
2941 Entries = F.remove(Entries, MR);
2942 break;
2943 }
2944 }
2945 }
2946
2947 return state->set<CStringLength>(Entries);
2948}
2949
2950void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2951 SymbolReaper &SR) const {
2952 // Mark all symbols in our string length map as valid.
2953 CStringLengthTy Entries = state->get<CStringLength>();
2954
2955 for (SVal Len : llvm::make_second_range(Entries)) {
2956 for (SymbolRef Sym : Len.symbols())
2957 SR.markInUse(Sym);
2958 }
2959}
2960
2961void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2962 CheckerContext &C) const {
2963 ProgramStateRef state = C.getState();
2964 CStringLengthTy Entries = state->get<CStringLength>();
2965 if (Entries.isEmpty())
2966 return;
2967
2968 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2969 for (auto [Reg, Len] : Entries) {
2970 if (SymbolRef Sym = Len.getAsSymbol()) {
2971 if (SR.isDead(Sym))
2972 Entries = F.remove(Entries, Reg);
2973 }
2974 }
2975
2976 state = state->set<CStringLength>(Entries);
2977 C.addTransition(state);
2978}
2979
2980void ento::registerCStringModeling(CheckerManager &Mgr) {
2981 // Other checker relies on the modeling implemented in this checker family,
2982 // so this "modeling checker" can register the 'CStringChecker' backend for
2983 // its callbacks without enabling any of its frontends.
2984 Mgr.getChecker<CStringChecker>();
2985}
2986
2987bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
2988 return true;
2989}
2990
2991#define REGISTER_CHECKER(NAME) \
2992 void ento::registerCString##NAME(CheckerManager &Mgr) { \
2993 Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr); \
2994 } \
2995 \
2996 bool ento::shouldRegisterCString##NAME(const CheckerManager &) { \
2997 return true; \
2998 }
2999
3000REGISTER_CHECKER(NullArg)
3001REGISTER_CHECKER(OutOfBounds)
3002REGISTER_CHECKER(BufferOverlap)
3003REGISTER_CHECKER(NotNullTerm)
3004REGISTER_CHECKER(UninitializedRead)
3005
3006#undef REGISTER_CHECKER
#define V(N, I)
static std::optional< NonLoc > getIndex(ProgramStateRef State, const ElementRegion *ER, CharKind CK)
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx)
#define REGISTER_CHECKER(name)
Result
Implement __builtin_bit_cast and related operations.
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:228
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType WideCharTy
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType CharTy
CanQualType IntTy
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType UnsignedCharTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
decl_range decls()
Definition Stmt.h:1689
QualType getType() const
Definition Expr.h:144
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8571
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition TypeBase.h:8539
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
unsigned getLength() const
Definition Expr.h:1912
StringRef getString() const
Definition Expr.h:1870
bool isArrayType() const
Definition TypeBase.h:8781
bool isPointerType() const
Definition TypeBase.h:8682
CanQualType getCanonicalTypeUnqualified() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:789
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition TypeBase.h:9170
bool isAnyPointerType() const
Definition TypeBase.h:8690
QualType getType() const
Definition Decl.h:723
const Expr * getInit() const
Definition Decl.h:1381
APSIntPtr getMaxValue(const llvm::APSInt &v)
std::optional< APSIntPtr > evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:584
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
ElementRegion is used to represent both array elements and casts.
Definition MemRegion.h:1232
QualType getValueType() const override
Definition MemRegion.h:1254
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:98
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
Kind getKind() const
Definition MemRegion.h:203
@ TK_PreserveContents
Tells that a region's contents is not changed.
Definition MemRegion.h:1676
@ TK_SuppressEscape
Suppress pointer-escaping of a region.
Definition MemRegion.h:1679
void setTrait(SymbolRef Sym, InvalidationKinds IK)
bool hasSymbolicOffset() const
Definition MemRegion.h:83
const MemRegion * getRegion() const
It might return null.
Definition MemRegion.h:81
int64_t getOffset() const
Definition MemRegion.h:85
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
BasicValueFactory & getBasicValueFactory()
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, const LocationContext *LCtx, unsigned count)
ProgramStateManager & getStateManager()
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
loc::MemRegionVal makeLoc(SymbolRef sym)
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, ConstCFGElementRef elem, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition SVals.h:56
bool isUndef() const
Definition SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
bool isValid() const
Definition SVals.h:111
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
bool isUnknown() const
Definition SVals.h:105
LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral * getStringLiteral() const
Definition MemRegion.h:873
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition MemRegion.h:487
llvm::iterator_range< symbol_iterator > symbols() const
Definition SymExpr.h:107
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
__inline void unsigned int _2
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
llvm::DenseSet< SymbolRef > InvalidatedSymbols
Definition Store.h:51
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
Definition SymExpr.h:133
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
std::variant< struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl > Decl
All declarations that can appear in a module declaration.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
CFGBlock::ConstCFGElementRef ConstCFGElementRef
Definition CFG.h:1227
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition CharInfo.h:233
U cast(CodeGen::Address addr)
Definition Address.h:327
int const char * function
Definition c++config.h:31