clang 23.0.0git
CStringChecker.cpp
Go to the documentation of this file.
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <functional>
34#include <optional>
35
36using namespace clang;
37using namespace ento;
38using namespace std::placeholders;
39
40namespace {
41struct AnyArgExpr {
42 const Expr *Expression;
43 unsigned ArgumentIndex;
44};
45struct SourceArgExpr : AnyArgExpr {};
46struct DestinationArgExpr : AnyArgExpr {};
47struct SizeArgExpr : AnyArgExpr {};
48
49using ErrorMessage = SmallString<128>;
50enum class AccessKind { write, read };
51
52static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
53 AccessKind Access) {
54 ErrorMessage Message;
55 llvm::raw_svector_ostream Os(Message);
56
57 // Function classification like: Memory copy function
58 Os << toUppercase(FunctionDescription.front())
59 << &FunctionDescription.data()[1];
60
61 if (Access == AccessKind::write) {
62 Os << " overflows the destination buffer";
63 } else { // read access
64 Os << " accesses out-of-bound array element";
65 }
66
67 return Message;
68}
69
70enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
71
72enum class CharKind { Regular = 0, Wide };
73constexpr CharKind CK_Regular = CharKind::Regular;
74constexpr CharKind CK_Wide = CharKind::Wide;
75
76static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
77 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
78 : Ctx.WideCharTy);
79}
80
81class CStringChecker
82 : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
83 check::LiveSymbols, check::DeadSymbols,
84 check::RegionChanges> {
85 mutable const char *CurrentFunctionDescription = nullptr;
86
87public:
88 // FIXME: The bug types emitted by this checker family have confused garbage
89 // in their Description and Category fields (e.g. `categories::UnixAPI` is
90 // passed as the description in several cases and `uninitialized` is mistyped
91 // as `unitialized`). This should be cleaned up.
92 CheckerFrontendWithBugType NullArg{categories::UnixAPI};
93 CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
94 CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
95 "Improper arguments"};
96 CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
97 CheckerFrontendWithBugType UninitializedRead{
98 "Accessing unitialized/garbage values"};
99
100 StringRef getDebugTag() const override { return "MallocChecker"; }
101
102 static void *getTag() { static int tag; return &tag; }
103
104 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
105 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
106 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
107 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
108
110 checkRegionChanges(ProgramStateRef state, const InvalidatedSymbols *,
111 ArrayRef<const MemRegion *> ExplicitRegions,
112 ArrayRef<const MemRegion *> Regions, const StackFrame *SF,
113 const CallEvent *Call) const;
114
115 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
116 const CallEvent &)>;
117
118 CallDescriptionMap<FnCheck> Callbacks = {
119 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
120 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
121 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
122 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
123 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
124 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
125 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
126 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
127 {{CDM::CLibrary, {"memcmp"}, 3},
128 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
129 {{CDM::CLibrary, {"wmemcmp"}, 3},
130 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
131 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
132 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
133 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
134 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
135 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
136 &CStringChecker::evalMemset},
137 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
138 // FIXME: C23 introduces 'memset_explicit', maybe also model that
139 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
140 &CStringChecker::evalStrcpy},
141 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
142 &CStringChecker::evalStrncpy},
143 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
144 &CStringChecker::evalStpcpy},
145 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
146 &CStringChecker::evalStrlcpy},
147 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
148 &CStringChecker::evalStrcat},
149 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
150 &CStringChecker::evalStrncat},
151 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
152 &CStringChecker::evalStrlcat},
153 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
154 &CStringChecker::evalstrLength},
155 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
156 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
157 &CStringChecker::evalstrnLength},
158 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
159 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
160 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
161 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
162 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
163 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
164 {{CDM::CLibrary, {"strxfrm"}, 3}, &CStringChecker::evalStrxfrm},
165 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
166 {{CDM::CLibrary, {"bcmp"}, 3},
167 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
168 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
169 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
170 &CStringChecker::evalBzero},
171
172 // When recognizing calls to the following variadic functions, we accept
173 // any number of arguments in the call (std::nullopt = accept any
174 // number), but check that in the declaration there are 2 and 3
175 // parameters respectively. (Note that the parameter count does not
176 // include the "...". Calls where the number of arguments is too small
177 // will be discarded by the callback.)
178 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
179 &CStringChecker::evalSprintf},
180 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
181 &CStringChecker::evalSnprintf},
182 };
183
184 // These require a bit of special handling.
185 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
186 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
187
188 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
189 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
190 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
191 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
192 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
193 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
194 ProgramStateRef state, SizeArgExpr Size,
195 DestinationArgExpr Dest, SourceArgExpr Source,
196 bool Restricted, bool IsMempcpy, CharKind CK) const;
197
198 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
199
200 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
201 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
202 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
203 bool IsStrnlen = false) const;
204
205 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
206 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
207 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
208 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
209 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
210 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
211 bool returnPtr = true) const;
212
213 void evalStrxfrm(CheckerContext &C, const CallEvent &Call) const;
214
215 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
216 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
217 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
218
219 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
220 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
221 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
222 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
223 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
224 bool IsBounded = false, bool IgnoreCase = false) const;
225
226 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
227
228 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
229 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
230 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
231 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
232 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
233
234 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
235 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
236 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
237 bool IsBounded) const;
238
239 // Utility methods
240 std::pair<ProgramStateRef , ProgramStateRef >
241 static assumeZero(CheckerContext &C,
242 ProgramStateRef state, SVal V, QualType Ty);
243
244 static ProgramStateRef setCStringLength(ProgramStateRef state,
245 const MemRegion *MR,
246 SVal strLength);
247 static SVal getCStringLengthForRegion(CheckerContext &C,
248 ProgramStateRef &state,
249 const Expr *Ex,
250 const MemRegion *MR,
251 bool hypothetical);
252 static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
253
254 SVal getCStringLength(CheckerContext &C,
255 ProgramStateRef &state,
256 const Expr *Ex,
257 SVal Buf,
258 bool hypothetical = false) const;
259
260 const StringLiteral *getCStringLiteral(CheckerContext &C,
261 ProgramStateRef &state,
262 const Expr *expr,
263 SVal val) const;
264
265 /// Invalidate the destination buffer determined by characters copied.
266 static ProgramStateRef
267 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
268 const Expr *BufE, ConstCFGElementRef Elem,
269 SVal BufV, SVal SizeV, QualType SizeTy);
270
271 /// Operation never overflows, do not invalidate the super region.
272 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
273 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
274
275 /// We do not know whether the operation can overflow (e.g. size is unknown),
276 /// invalidate the super region and escape related pointers.
277 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
278 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
279
280 /// Invalidate the source buffer for escaping pointers.
281 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
284 SVal BufV);
285
286 /// @param InvalidationTraitOperations Determine how to invlidate the
287 /// MemRegion by setting the invalidation traits. Return true to cause pointer
288 /// escape, or false otherwise.
289 static ProgramStateRef invalidateBufferAux(
290 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
291 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
292 const MemRegion *)>
293 InvalidationTraitOperations);
294
295 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
296 const MemRegion *MR);
297
298 static bool memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
299 SVal CharE, const Expr *Size, CheckerContext &C,
300 ProgramStateRef &State);
301
302 // Re-usable checks
303 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
304 AnyArgExpr Arg, SVal l) const;
305 // Check whether the origin region behind \p Element (like the actual array
306 // region \p Element is from) is initialized.
307 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
308 AnyArgExpr Buffer, SVal Element, SVal Size) const;
309 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
310 AnyArgExpr Buffer, SVal Element,
311 AccessKind Access,
312 CharKind CK = CharKind::Regular) const;
313 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
314 AnyArgExpr Buffer, SizeArgExpr Size,
315 AccessKind Access,
316 CharKind CK = CharKind::Regular) const;
317 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
318 SizeArgExpr Size, AnyArgExpr First,
319 AnyArgExpr Second,
320 CharKind CK = CharKind::Regular) const;
321 void emitOverlapBug(CheckerContext &C,
322 ProgramStateRef state,
323 const Stmt *First,
324 const Stmt *Second) const;
325
326 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
327 StringRef WarningMsg) const;
328 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
329 const Stmt *S, StringRef WarningMsg) const;
330 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
331 const Stmt *S, StringRef WarningMsg) const;
332 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
333 const Expr *E, const MemRegion *R,
334 StringRef Msg) const;
335 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
336 ProgramStateRef state,
337 NonLoc left,
338 NonLoc right) const;
339
340 // Return true if the destination buffer of the copy function may be in bound.
341 // Expects SVal of Size to be positive and unsigned.
342 // Expects SVal of FirstBuf to be a FieldRegion.
343 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
344 SVal BufVal, QualType BufTy, SVal LengthVal,
345 QualType LengthTy);
346};
347
348} //end anonymous namespace
349
350REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
351
352//===----------------------------------------------------------------------===//
353// Individual checks and utility methods.
354//===----------------------------------------------------------------------===//
355
356std::pair<ProgramStateRef, ProgramStateRef>
357CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
358 QualType Ty) {
359 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
360 if (!val)
361 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
362
363 SValBuilder &svalBuilder = C.getSValBuilder();
364 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
365 return State->assume(svalBuilder.evalEQ(State, *val, zero));
366}
367
368ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
369 ProgramStateRef State,
370 AnyArgExpr Arg, SVal l) const {
371 // If a previous check has failed, propagate the failure.
372 if (!State)
373 return nullptr;
374
375 ProgramStateRef stateNull, stateNonNull;
376 std::tie(stateNull, stateNonNull) =
377 assumeZero(C, State, l, Arg.Expression->getType());
378
379 if (stateNull && !stateNonNull) {
380 if (NullArg.isEnabled()) {
381 SmallString<80> buf;
382 llvm::raw_svector_ostream OS(buf);
383 assert(CurrentFunctionDescription);
384 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
385 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
386 << CurrentFunctionDescription;
387
388 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
389 }
390 return nullptr;
391 }
392
393 // From here on, assume that the value is non-null.
394 assert(stateNonNull);
395 return stateNonNull;
396}
397
398static std::optional<NonLoc> getIndex(ProgramStateRef State,
399 const ElementRegion *ER, CharKind CK) {
401 ASTContext &Ctx = SVB.getContext();
402
403 if (CK == CharKind::Regular) {
404 if (ER->getValueType() != Ctx.CharTy)
405 return {};
406 return ER->getIndex();
407 }
408
409 if (ER->getValueType() != Ctx.WideCharTy)
410 return {};
411
412 QualType SizeTy = Ctx.getSizeType();
413 NonLoc WideSize =
415 SizeTy)
416 .castAs<NonLoc>();
417 SVal Offset =
418 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
419 if (Offset.isUnknown())
420 return {};
421 return Offset.castAs<NonLoc>();
422}
423
424// Basically 1 -> 1st, 12 -> 12th, etc.
425static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
426 Os << Idx << llvm::getOrdinalSuffix(Idx);
427}
428
429ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
430 ProgramStateRef State,
431 AnyArgExpr Buffer, SVal Element,
432 SVal Size) const {
433
434 // If a previous check has failed, propagate the failure.
435 if (!State)
436 return nullptr;
437
438 SVal BufVal = C.getSVal(Buffer.Expression);
439 const auto *ER = dyn_cast_or_null<ElementRegion>(BufVal.getAsRegion());
440 if (!ER)
441 return State;
442
443 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
444 if (!SuperR)
445 return State;
446
447 // FIXME: We ought to able to check objects as well. Maybe
448 // UninitializedObjectChecker could help?
449 if (!SuperR->getValueType()->isArrayType())
450 return State;
451
452 SValBuilder &SVB = C.getSValBuilder();
453 ASTContext &Ctx = SVB.getContext();
454
455 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
456
457 std::optional<Loc> FirstElementVal =
458 State->getLValue(ElemTy, SVB.makeZeroArrayIndex(), BufVal).getAs<Loc>();
459 if (!FirstElementVal)
460 return State;
461
462 // Ensure that we wouldn't read uninitialized value.
463 if (UninitializedRead.isEnabled() &&
464 State->getSVal(*FirstElementVal).isUndef()) {
465 llvm::SmallString<258> Buf;
466 llvm::raw_svector_ostream OS(Buf);
467 OS << "The first element of the ";
468 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
469 OS << " argument is undefined";
470 emitUninitializedReadBug(C, State, Buffer.Expression,
471 FirstElementVal->getAsRegion(), OS.str());
472 return nullptr;
473 }
474
475 // We won't check whether the entire region is fully initialized -- let's just
476 // check that the first and the last element is. So, onto checking the last
477 // element:
478
479 const QualType IdxTy = SVB.getArrayIndexType();
480 NonLoc ElemSize =
481 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
482 .castAs<NonLoc>();
483
484 // FIXME: Check that the size arg to the cstring function is divisible by
485 // size of the actual element type?
486
487 // The type of the argument to the cstring function is either char or wchar,
488 // but thats not the type of the original array (or memory region).
489 // Suppose the following:
490 // int t[5];
491 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
492 // When checking whether t is fully initialized, we see it as char array of
493 // size sizeof(int)*5. If we check the last element as a character, we read
494 // the last byte of an integer, which will be undefined. But just because
495 // that value is undefined, it doesn't mean that the element is uninitialized!
496 // For this reason, we need to retrieve the actual last element with the
497 // correct type.
498
499 // Divide the size argument to the cstring function by the actual element
500 // type. This value will be size of the array, or the index to the
501 // past-the-end element.
502 std::optional<NonLoc> Offset =
503 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
504 IdxTy)
505 .getAs<NonLoc>();
506
507 if (!Offset)
508 return State;
509
510 // Retrieve the index of the last element relative to the buffer pointer.
511 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
512 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
513
514 SVal LastElementVal = State->getLValue(ElemTy, LastIdx, BufVal);
515 if (!isa<Loc>(LastElementVal))
516 return State;
517
518 if (UninitializedRead.isEnabled() &&
519 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
520 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
521 // If we can't get emit a sensible last element index, just bail out --
522 // prefer to emit nothing in favour of emitting garbage quality reports.
523 if (!IdxInt) {
524 C.addSink();
525 return nullptr;
526 }
527 llvm::SmallString<258> Buf;
528 llvm::raw_svector_ostream OS(Buf);
529 OS << "The last accessed element (at index ";
530 OS << IdxInt->getExtValue();
531 OS << ") in the ";
532 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
533 OS << " argument is undefined";
534 emitUninitializedReadBug(C, State, Buffer.Expression,
535 LastElementVal.getAsRegion(), OS.str());
536 return nullptr;
537 }
538 return State;
539}
540// FIXME: The root of this logic was copied from the old checker
541// alpha.security.ArrayBound (which is removed within this commit).
542// It should be refactored to use the different, more sophisticated bounds
543// checking logic used by the new checker ``security.ArrayBound``.
544ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
545 ProgramStateRef state,
546 AnyArgExpr Buffer, SVal Element,
547 AccessKind Access,
548 CharKind CK) const {
549
550 // If a previous check has failed, propagate the failure.
551 if (!state)
552 return nullptr;
553
554 // Check for out of bound array element access.
555 const MemRegion *R = Element.getAsRegion();
556 if (!R)
557 return state;
558
559 const auto *ER = dyn_cast<ElementRegion>(R);
560 if (!ER)
561 return state;
562
563 // Get the index of the accessed element.
564 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
565 if (!Idx)
566 return state;
567
568 // Get the size of the array.
569 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
570 DefinedOrUnknownSVal Size =
571 getDynamicExtent(state, superReg, C.getSValBuilder());
572
573 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
574 if (StOutBound && !StInBound) {
575 // The analyzer determined that the access is out-of-bounds, which is
576 // a fatal error: ideally we'd return nullptr to terminate this path
577 // regardless of whether the OutOfBounds checker frontend is enabled.
578 // However, the current out-of-bounds modeling produces too many false
579 // positives, so when the frontend is disabled we return the original
580 // (unconstrained) state and let the analysis continue. This is
581 // inconsistent: returning `state` instead of `StOutBound` discards the
582 // constraint that the index is out-of-bounds, and callers cannot
583 // distinguish "we proved an error" from "we couldn't determine anything"
584 // since both return the original state.
585 // TODO: Once the OutOfBounds frontend is stable, return nullptr here
586 // unconditionally to stop the analysis on this path.
587 if (!OutOfBounds.isEnabled())
588 return state;
589
590 ErrorMessage Message =
591 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
592 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
593 return nullptr;
594 }
595
596 // Array bound check succeeded. From this point forward the array bound
597 // should always succeed.
598 return StInBound;
599}
600
602CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
603 AnyArgExpr Buffer, SizeArgExpr Size,
604 AccessKind Access, CharKind CK) const {
605 // If a previous check has failed, propagate the failure.
606 if (!State)
607 return nullptr;
608
609 SValBuilder &svalBuilder = C.getSValBuilder();
610 ASTContext &Ctx = svalBuilder.getContext();
611
612 QualType SizeTy = Size.Expression->getType();
613 QualType PtrTy = getCharPtrType(Ctx, CK);
614
615 // Check that the first buffer is non-null.
616 SVal BufVal = C.getSVal(Buffer.Expression);
617 State = checkNonNull(C, State, Buffer, BufVal);
618 if (!State)
619 return nullptr;
620
621 SVal BufStart =
622 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
623
624 // Check if the first byte of the buffer is accessible.
625 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
626
627 if (!State)
628 return nullptr;
629
630 // Get the access length and make sure it is known.
631 // FIXME: This assumes the caller has already checked that the access length
632 // is positive. And that it's unsigned.
633 SVal LengthVal = C.getSVal(Size.Expression);
634 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
635 if (!Length)
636 return State;
637
638 // Compute the offset of the last element to be accessed: size-1.
639 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
640 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
641 if (Offset.isUnknown())
642 return nullptr;
643 NonLoc LastOffset = Offset.castAs<NonLoc>();
644
645 // Check that the first buffer is sufficiently long.
646 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
647
648 SVal BufEnd =
649 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
650 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
651 if (Access == AccessKind::read)
652 State = checkInit(C, State, Buffer, BufStart, *Length);
653
654 // If the buffer isn't large enough, abort.
655 if (!State)
656 return nullptr;
657 }
658
659 // Large enough or not, return this state!
660 return State;
661}
662
663ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
664 ProgramStateRef state,
665 SizeArgExpr Size, AnyArgExpr First,
666 AnyArgExpr Second,
667 CharKind CK) const {
668 // Do a simple check for overlap: if the two arguments are from the same
669 // buffer, see if the end of the first is greater than the start of the second
670 // or vice versa.
671
672 // If a previous check has failed, propagate the failure.
673 if (!state)
674 return nullptr;
675
676 ProgramStateRef stateTrue, stateFalse;
677
678 if (!First.Expression->getType()->isAnyPointerType() ||
679 !Second.Expression->getType()->isAnyPointerType())
680 return state;
681
682 // Assume different address spaces cannot overlap.
683 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
684 Second.Expression->getType()->getPointeeType().getAddressSpace())
685 return state;
686
687 // Get the buffer values and make sure they're known locations.
688 const StackFrame *SF = C.getStackFrame();
689 SVal firstVal = state->getSVal(First.Expression, SF);
690 SVal secondVal = state->getSVal(Second.Expression, SF);
691
692 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
693 if (!firstLoc)
694 return state;
695
696 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
697 if (!secondLoc)
698 return state;
699
700 // Are the two values the same?
701 SValBuilder &svalBuilder = C.getSValBuilder();
702 std::tie(stateTrue, stateFalse) =
703 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
704
705 if (stateTrue && !stateFalse) {
706 if (BufferOverlap.isEnabled()) {
707 // If the values are known to be equal, that's automatically an overlap.
708 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
709 return nullptr;
710 }
711 // The analyzer proved that the two pointers are equal, which guarantees
712 // overlap. When BufferOverlap is disabled, we return the original state
713 // instead of nullptr (to avoid stopping the path) or stateTrue (which
714 // would encode the equality constraint). This creates an inconsistency:
715 // callers treat any non-null return as "no overlap found" and proceed
716 // with subsequent modeling (e.g. memcpy side effects), even though the
717 // operation has undefined behavior. Additionally, returning `state` instead
718 // of `stateTrue` discards the pointer-equality constraint, making the
719 // analysis less precise.
720 // FIXME: At minimum, return stateTrue to preserve the equality
721 // constraint. Ideally, return nullptr to stop the path unconditionally,
722 // since overlap is proven regardless of whether we report it.
723 return state;
724 }
725
726 // assume the two expressions are not equal.
727 assert(stateFalse);
728 state = stateFalse;
729
730 // Which value comes first?
731 QualType cmpTy = svalBuilder.getConditionType();
732 SVal reverse =
733 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
734 std::optional<DefinedOrUnknownSVal> reverseTest =
735 reverse.getAs<DefinedOrUnknownSVal>();
736 if (!reverseTest)
737 return state;
738
739 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
740 if (stateTrue) {
741 if (stateFalse) {
742 // If we don't know which one comes first, we can't perform this test.
743 return state;
744 } else {
745 // Switch the values so that firstVal is before secondVal.
746 std::swap(firstLoc, secondLoc);
747
748 // Switch the Exprs as well, so that they still correspond.
749 std::swap(First, Second);
750 }
751 }
752
753 // Get the length, and make sure it too is known.
754 SVal LengthVal = state->getSVal(Size.Expression, SF);
755 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
756 if (!Length)
757 return state;
758
759 // Convert the first buffer's start address to char*.
760 // Bail out if the cast fails.
761 ASTContext &Ctx = svalBuilder.getContext();
762 QualType CharPtrTy = getCharPtrType(Ctx, CK);
763 SVal FirstStart =
764 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
765 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
766 if (!FirstStartLoc)
767 return state;
768
769 // Compute the end of the first buffer. Bail out if THAT fails.
770 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
771 *Length, CharPtrTy);
772 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
773 if (!FirstEndLoc)
774 return state;
775
776 // Is the end of the first buffer past the start of the second buffer?
777 SVal Overlap =
778 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
779 std::optional<DefinedOrUnknownSVal> OverlapTest =
780 Overlap.getAs<DefinedOrUnknownSVal>();
781 if (!OverlapTest)
782 return state;
783
784 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
785
786 if (stateTrue && !stateFalse) {
787 if (BufferOverlap.isEnabled()) {
788 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
789 return nullptr;
790 }
791 // The analyzer proved that the end of the first buffer is past the start
792 // of the second, which means the buffers overlap. This is the same
793 // inconsistency as the equal-pointers case above: when BufferOverlap is
794 // disabled, we return the original state, so callers cannot distinguish
795 // "proven overlap" from "couldn't determine anything" and will proceed
796 // to model side effects (e.g. memcpy) on a path with proven UB.
797 // Returning `stateTrue` would at least preserve the overlap constraint;
798 // returning nullptr would correctly terminate the path.
799 // FIXME: Return nullptr unconditionally once BufferOverlap is stable.
800 return state;
801 }
802
803 // assume the two expressions don't overlap.
804 assert(stateFalse);
805 return stateFalse;
806}
807
808void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
809 const Stmt *First,
810 const Stmt *Second) const {
811 assert(BufferOverlap.isEnabled() &&
812 "Can't emit from a checker that is not enabled!");
813 ExplodedNode *N = C.generateErrorNode(state);
814 if (!N)
815 return;
816
817 // Generate a report for this bug.
818 auto report = std::make_unique<PathSensitiveBugReport>(
819 BufferOverlap, "Arguments must not be overlapping buffers", N);
820 report->addRange(First->getSourceRange());
821 report->addRange(Second->getSourceRange());
822
823 C.emitReport(std::move(report));
824}
825
826void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
827 const Stmt *S, StringRef WarningMsg) const {
828 assert(NullArg.isEnabled() &&
829 "Can't emit from a checker that is not enabled!");
830 if (ExplodedNode *N = C.generateErrorNode(State)) {
831 auto Report =
832 std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
833 Report->addRange(S->getSourceRange());
834 if (const auto *Ex = dyn_cast<Expr>(S))
836 C.emitReport(std::move(Report));
837 }
838}
839
840void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
841 ProgramStateRef State,
842 const Expr *E, const MemRegion *R,
843 StringRef Msg) const {
844 assert(UninitializedRead.isEnabled() &&
845 "Can't emit from a checker that is not enabled!");
846 if (ExplodedNode *N = C.generateErrorNode(State)) {
847 auto Report =
848 std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
849 Report->addNote("Other elements might also be undefined",
850 Report->getLocation());
851 Report->addRange(E->getSourceRange());
853 Report->addVisitor<NoStoreFuncVisitor>(R->castAs<SubRegion>());
854 C.emitReport(std::move(Report));
855 }
856}
857
858void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
859 ProgramStateRef State, const Stmt *S,
860 StringRef WarningMsg) const {
861 assert(OutOfBounds.isEnabled() &&
862 "Can't emit from a checker that is not enabled!");
863 if (ExplodedNode *N = C.generateErrorNode(State)) {
864 // FIXME: It would be nice to eventually make this diagnostic more clear,
865 // e.g., by referencing the original declaration or by saying *why* this
866 // reference is outside the range.
867 auto Report =
868 std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
869 Report->addRange(S->getSourceRange());
870 C.emitReport(std::move(Report));
871 }
872}
873
874void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
875 const Stmt *S,
876 StringRef WarningMsg) const {
877 assert(NotNullTerm.isEnabled() &&
878 "Can't emit from a checker that is not enabled!");
879 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
880 auto Report =
881 std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
882
883 Report->addRange(S->getSourceRange());
884 C.emitReport(std::move(Report));
885 }
886}
887
888ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
889 ProgramStateRef state,
890 NonLoc left,
891 NonLoc right) const {
892 // If a previous check has failed, propagate the failure.
893 if (!state)
894 return nullptr;
895
896 SValBuilder &svalBuilder = C.getSValBuilder();
897 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
898
899 QualType sizeTy = svalBuilder.getContext().getSizeType();
900 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
901 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
902
903 SVal maxMinusRight;
904 if (isa<nonloc::ConcreteInt>(right)) {
905 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
906 sizeTy);
907 } else {
908 // Try switching the operands. (The order of these two assignments is
909 // important!)
910 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
911 sizeTy);
912 left = right;
913 }
914
915 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
916 QualType cmpTy = svalBuilder.getConditionType();
917 // If left > max - right, we have an overflow.
918 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
919 *maxMinusRightNL, cmpTy);
920
921 auto [StateOverflow, StateOkay] =
922 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
923
924 if (StateOverflow && !StateOkay) {
925 // On this path the analyzer is convinced that the addition of these two
926 // values would overflow `size_t` which must be caused by the inaccuracy
927 // of our modeling because this method is called in situations where the
928 // summands are size/length values which are much less than SIZE_MAX. To
929 // avoid false positives let's just sink this invalid path.
930 C.addSink(StateOverflow);
931 return nullptr;
932 }
933
934 // From now on, assume an overflow didn't occur.
935 assert(StateOkay);
936 state = StateOkay;
937 }
938
939 return state;
940}
941
942ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
943 const MemRegion *MR,
944 SVal strLength) {
945 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
946
947 MR = MR->StripCasts();
948
949 switch (MR->getKind()) {
950 case MemRegion::StringRegionKind:
951 // FIXME: This can happen if we strcpy() into a string region. This is
952 // undefined [C99 6.4.5p6], but we should still warn about it.
953 return state;
954
955 case MemRegion::SymbolicRegionKind:
956 case MemRegion::AllocaRegionKind:
957 case MemRegion::NonParamVarRegionKind:
958 case MemRegion::ParamVarRegionKind:
959 case MemRegion::FieldRegionKind:
960 case MemRegion::ObjCIvarRegionKind:
961 // These are the types we can currently track string lengths for.
962 break;
963
964 case MemRegion::ElementRegionKind:
965 // FIXME: Handle element regions by upper-bounding the parent region's
966 // string length.
967 return state;
968
969 default:
970 // Other regions (mostly non-data) can't have a reliable C string length.
971 // For now, just ignore the change.
972 // FIXME: These are rare but not impossible. We should output some kind of
973 // warning for things like strcpy((char[]){'a', 0}, "b");
974 return state;
975 }
976
977 if (strLength.isUnknown())
978 return state->remove<CStringLength>(MR);
979
980 return state->set<CStringLength>(MR, strLength);
981}
982
983SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
984 ProgramStateRef &state,
985 const Expr *Ex,
986 const MemRegion *MR,
987 bool hypothetical) {
988 if (!hypothetical) {
989 // If there's a recorded length, go ahead and return it.
990 const SVal *Recorded = state->get<CStringLength>(MR);
991 if (Recorded)
992 return *Recorded;
993 }
994
995 // Otherwise, get a new symbol and update the state.
996 SValBuilder &svalBuilder = C.getSValBuilder();
997 QualType sizeTy = svalBuilder.getContext().getSizeType();
998 SVal strLength =
999 svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), MR, Ex, sizeTy,
1000 C.getStackFrame(), C.blockCount());
1001
1002 if (!hypothetical) {
1003 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1004 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1005 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1006 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
1007 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
1008 std::optional<APSIntPtr> maxLengthInt =
1009 BVF.evalAPSInt(BO_Div, maxValInt, fourInt);
1010 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
1011 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
1012 svalBuilder.getConditionType());
1013 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
1014 }
1015 state = state->set<CStringLength>(MR, strLength);
1016 }
1017
1018 return strLength;
1019}
1020
1021const StringLiteral *
1022CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
1023 switch (MR->getKind()) {
1024 case MemRegion::StringRegionKind:
1025 return cast<StringRegion>(MR)->getStringLiteral();
1026 case MemRegion::NonParamVarRegionKind:
1027 if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1028 Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
1029 return dyn_cast_or_null<StringLiteral>(Decl->getInit());
1030 return nullptr;
1031 default:
1032 return nullptr;
1033 }
1034}
1035
1036SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1037 const Expr *Ex, SVal Buf,
1038 bool hypothetical) const {
1039 const MemRegion *MR = Buf.getAsRegion();
1040 if (!MR) {
1041 // If we can't get a region, see if it's something we /know/ isn't a
1042 // C string. In the context of locations, the only time we can issue such
1043 // a warning is for labels.
1044 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1045 if (NotNullTerm.isEnabled()) {
1046 SmallString<120> buf;
1047 llvm::raw_svector_ostream os(buf);
1048 assert(CurrentFunctionDescription);
1049 os << "Argument to " << CurrentFunctionDescription
1050 << " is the address of the label '" << Label->getLabel()->getName()
1051 << "', which is not a null-terminated string";
1052
1053 emitNotCStringBug(C, state, Ex, os.str());
1054 }
1055 return UndefinedVal();
1056 }
1057
1058 // If it's not a region and not a label, give up.
1059 return UnknownVal();
1060 }
1061
1062 // If we have a region, strip casts from it and see if we can figure out
1063 // its length. For anything we can't figure out, just return UnknownVal.
1064 MR = MR->StripCasts();
1065
1066 if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
1067 // If we have a global constant with a string literal initializer,
1068 // compute the initializer's length.
1069 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1070 // so we can assume that the byte length is the correct C string length.
1071 // FIXME: Embedded null characters are not handled.
1072 SValBuilder &SVB = C.getSValBuilder();
1073 return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
1074 }
1075
1076 switch (MR->getKind()) {
1077 case MemRegion::StringRegionKind:
1078 case MemRegion::NonParamVarRegionKind:
1079 case MemRegion::SymbolicRegionKind:
1080 case MemRegion::AllocaRegionKind:
1081 case MemRegion::ParamVarRegionKind:
1082 case MemRegion::FieldRegionKind:
1083 case MemRegion::ObjCIvarRegionKind:
1084 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1085 case MemRegion::CompoundLiteralRegionKind:
1086 // FIXME: Can we track this? Is it necessary?
1087 return UnknownVal();
1088 case MemRegion::ElementRegionKind: {
1089 // If an offset into the string literal is used, use the original length
1090 // minus the offset.
1091 // FIXME: Embedded null characters are not handled.
1092 const ElementRegion *ER = cast<ElementRegion>(MR);
1093 const SubRegion *SuperReg =
1095 const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
1096 if (!StrLit)
1097 return UnknownVal();
1098 SValBuilder &SVB = C.getSValBuilder();
1099 NonLoc Idx = ER->getIndex();
1100 QualType SizeTy = SVB.getContext().getSizeType();
1101 NonLoc LengthVal =
1102 SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>();
1103 if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
1104 SVB.getConditionType())
1105 .castAs<DefinedOrUnknownSVal>(),
1106 true))
1107 return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy);
1108 return UnknownVal();
1109 }
1110 default:
1111 // Other regions (mostly non-data) can't have a reliable C string length.
1112 // In this case, an error is emitted and UndefinedVal is returned.
1113 // The caller should always be prepared to handle this case.
1114 if (NotNullTerm.isEnabled()) {
1115 SmallString<120> buf;
1116 llvm::raw_svector_ostream os(buf);
1117
1118 assert(CurrentFunctionDescription);
1119 os << "Argument to " << CurrentFunctionDescription << " is ";
1120
1121 if (SummarizeRegion(os, C.getASTContext(), MR))
1122 os << ", which is not a null-terminated string";
1123 else
1124 os << "not a null-terminated string";
1125
1126 emitNotCStringBug(C, state, Ex, os.str());
1127 }
1128 return UndefinedVal();
1129 }
1130}
1131
1132const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1133 ProgramStateRef &state, const Expr *expr, SVal val) const {
1134 // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
1135
1136 // Get the memory region pointed to by the val.
1137 const MemRegion *bufRegion = val.getAsRegion();
1138 if (!bufRegion)
1139 return nullptr;
1140
1141 // Strip casts off the memory region.
1142 bufRegion = bufRegion->StripCasts();
1143
1144 // Cast the memory region to a string region.
1145 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1146 if (!strRegion)
1147 return nullptr;
1148
1149 // Return the actual string in the string region.
1150 return strRegion->getStringLiteral();
1151}
1152
1153bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1154 SVal BufVal, QualType BufTy,
1155 SVal LengthVal, QualType LengthTy) {
1156 // If we do not know that the buffer is long enough we return 'true'.
1157 // Otherwise the parent region of this field region would also get
1158 // invalidated, which would lead to warnings based on an unknown state.
1159
1160 if (LengthVal.isUnknown())
1161 return false;
1162
1163 // Originally copied from CheckBufferAccess and CheckLocation.
1164 SValBuilder &SB = C.getSValBuilder();
1165 ASTContext &Ctx = C.getASTContext();
1166
1167 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1168
1169 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1170 if (!Length)
1171 return true; // cf top comment.
1172
1173 // Compute the offset of the last element to be accessed: size-1.
1174 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1175 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1176 if (Offset.isUnknown())
1177 return true; // cf top comment
1178 NonLoc LastOffset = Offset.castAs<NonLoc>();
1179
1180 // Check that the first buffer is sufficiently long.
1181 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1182 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1183 if (!BufLoc)
1184 return true; // cf top comment.
1185
1186 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1187
1188 // Check for out of bound array element access.
1189 const MemRegion *R = BufEnd.getAsRegion();
1190 if (!R)
1191 return true; // cf top comment.
1192
1193 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1194 if (!ER)
1195 return true; // cf top comment.
1196
1197 // Support library functions defined with non-default address spaces
1198 assert(ER->getValueType()->getCanonicalTypeUnqualified() ==
1199 C.getASTContext().CharTy &&
1200 "isFirstBufInBound should only be called with char* ElementRegions");
1201
1202 // Get the size of the array.
1203 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1204 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1205
1206 // Get the index of the accessed element.
1207 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1208
1209 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1210
1211 return static_cast<bool>(StInBound);
1212}
1213
1214ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1215 CheckerContext &C, ProgramStateRef S, const Expr *BufE,
1216 ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) {
1217 auto InvalidationTraitOperations =
1218 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1219 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1220 // If destination buffer is a field region and access is in bound, do
1221 // not invalidate its super region.
1222 if (MemRegion::FieldRegionKind == R->getKind() &&
1223 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1224 ITraits.setTrait(
1225 R,
1227 }
1228 return false;
1229 };
1230
1231 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1232}
1233
1235CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1236 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1237 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1238 const MemRegion *R) {
1239 return isa<FieldRegion>(R);
1240 };
1241
1242 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1243}
1244
1245ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1246 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1247 auto InvalidationTraitOperations =
1248 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1249 if (MemRegion::FieldRegionKind == R->getKind())
1250 ITraits.setTrait(
1251 R,
1253 return false;
1254 };
1255
1256 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1257}
1258
1259ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1261 ConstCFGElementRef Elem,
1262 SVal BufV) {
1263 auto InvalidationTraitOperations =
1264 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1265 ITraits.setTrait(
1266 R->getBaseRegion(),
1268 ITraits.setTrait(R,
1270 return true;
1271 };
1272
1273 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1274}
1275
1276ProgramStateRef CStringChecker::invalidateBufferAux(
1277 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
1278 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1279 const MemRegion *)>
1280 InvalidationTraitOperations) {
1281 std::optional<Loc> L = V.getAs<Loc>();
1282 if (!L)
1283 return State;
1284
1285 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1286 // some assumptions about the value that CFRefCount can't. Even so, it should
1287 // probably be refactored.
1288 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1289 const MemRegion *R = MR->getRegion()->StripCasts();
1290
1291 // Are we dealing with an ElementRegion? If so, we should be invalidating
1292 // the super-region.
1293 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1294 R = ER->getSuperRegion();
1295 // FIXME: What about layers of ElementRegions?
1296 }
1297
1298 // Invalidate this region.
1299 const StackFrame *SF = C.getPredecessor()->getStackFrame();
1300 RegionAndSymbolInvalidationTraits ITraits;
1301 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1302
1303 return State->invalidateRegions(R, Elem, C.blockCount(), SF,
1304 CausesPointerEscape, nullptr, nullptr,
1305 &ITraits);
1306 }
1307
1308 // If we have a non-region value by chance, just remove the binding.
1309 // FIXME: is this necessary or correct? This handles the non-Region
1310 // cases. Is it ever valid to store to these?
1311 return State->killBinding(*L);
1312}
1313
1314bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1315 const MemRegion *MR) {
1316 switch (MR->getKind()) {
1317 case MemRegion::FunctionCodeRegionKind: {
1318 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1319 os << "the address of the function '" << *FD << '\'';
1320 else
1321 os << "the address of a function";
1322 return true;
1323 }
1324 case MemRegion::BlockCodeRegionKind:
1325 os << "block text";
1326 return true;
1327 case MemRegion::BlockDataRegionKind:
1328 os << "a block";
1329 return true;
1330 case MemRegion::CXXThisRegionKind:
1331 case MemRegion::CXXTempObjectRegionKind:
1332 os << "a C++ temp object of type "
1333 << cast<TypedValueRegion>(MR)->getValueType();
1334 return true;
1335 case MemRegion::NonParamVarRegionKind:
1336 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1337 return true;
1338 case MemRegion::ParamVarRegionKind:
1339 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1340 return true;
1341 case MemRegion::FieldRegionKind:
1342 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1343 return true;
1344 case MemRegion::ObjCIvarRegionKind:
1345 os << "an instance variable of type "
1346 << cast<TypedValueRegion>(MR)->getValueType();
1347 return true;
1348 default:
1349 return false;
1350 }
1351}
1352
1353bool CStringChecker::memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
1354 SVal CharVal, const Expr *Size,
1355 CheckerContext &C, ProgramStateRef &State) {
1356 SVal MemVal = C.getSVal(DstBuffer);
1357 SVal SizeVal = C.getSVal(Size);
1358 const MemRegion *MR = MemVal.getAsRegion();
1359 if (!MR)
1360 return false;
1361
1362 // We're about to model memset by producing a "default binding" in the Store.
1363 // Our current implementation - RegionStore - doesn't support default bindings
1364 // that don't cover the whole base region. So we should first get the offset
1365 // and the base region to figure out whether the offset of buffer is 0.
1366 RegionOffset Offset = MR->getAsOffset();
1367 const MemRegion *BR = Offset.getRegion();
1368
1369 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1370 if (!SizeNL)
1371 return false;
1372
1373 SValBuilder &svalBuilder = C.getSValBuilder();
1374 ASTContext &Ctx = C.getASTContext();
1375
1376 // void *memset(void *dest, int ch, size_t count);
1377 // For now we can only handle the case of offset is 0 and concrete char value.
1378 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1379 Offset.getOffset() == 0) {
1380 // Get the base region's size.
1381 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1382
1383 ProgramStateRef StateWholeReg, StateNotWholeReg;
1384 std::tie(StateWholeReg, StateNotWholeReg) =
1385 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1386
1387 // With the semantic of 'memset()', we should convert the CharVal to
1388 // unsigned char.
1389 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1390
1391 ProgramStateRef StateNullChar, StateNonNullChar;
1392 std::tie(StateNullChar, StateNonNullChar) =
1393 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1394
1395 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1396 !StateNonNullChar) {
1397 // If the 'memset()' acts on the whole region of destination buffer and
1398 // the value of the second argument of 'memset()' is zero, bind the second
1399 // argument's value to the destination buffer with 'default binding'.
1400 // FIXME: Since there is no perfect way to bind the non-zero character, we
1401 // can only deal with zero value here. In the future, we need to deal with
1402 // the binding of non-zero value in the case of whole region.
1403 State =
1404 State->bindDefaultZero(svalBuilder.makeLoc(BR), C.getStackFrame());
1405 } else {
1406 // If the destination buffer's extent is not equal to the value of
1407 // third argument, just invalidate buffer.
1408 State = invalidateDestinationBufferBySize(
1409 C, State, DstBuffer, Elem, MemVal, SizeVal, Size->getType());
1410 }
1411
1412 if (StateNullChar && !StateNonNullChar) {
1413 // If the value of the second argument of 'memset()' is zero, set the
1414 // string length of destination buffer to 0 directly.
1415 State = setCStringLength(State, MR,
1416 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1417 } else if (!StateNullChar && StateNonNullChar) {
1418 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1419 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1420 C.getStackFrame(), C.blockCount());
1421
1422 // If the value of second argument is not zero, then the string length
1423 // is at least the size argument.
1424 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1425 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1426
1427 State = setCStringLength(
1428 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1429 MR, NewStrLen);
1430 }
1431 } else {
1432 // If the offset is not zero and char value is not concrete, we can do
1433 // nothing but invalidate the buffer.
1434 State = invalidateDestinationBufferBySize(C, State, DstBuffer, Elem, MemVal,
1435 SizeVal, Size->getType());
1436 }
1437 return true;
1438}
1439
1440//===----------------------------------------------------------------------===//
1441// evaluation of individual function calls.
1442//===----------------------------------------------------------------------===//
1443
1444void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1445 ProgramStateRef state, SizeArgExpr Size,
1446 DestinationArgExpr Dest,
1447 SourceArgExpr Source, bool Restricted,
1448 bool IsMempcpy, CharKind CK) const {
1449 CurrentFunctionDescription = "memory copy function";
1450
1451 // See if the size argument is zero.
1452 const StackFrame *SF = C.getStackFrame();
1453 SVal sizeVal = state->getSVal(Size.Expression, SF);
1454 QualType sizeTy = Size.Expression->getType();
1455
1456 ProgramStateRef stateZeroSize, stateNonZeroSize;
1457 std::tie(stateZeroSize, stateNonZeroSize) =
1458 assumeZero(C, state, sizeVal, sizeTy);
1459
1460 // Get the value of the Dest.
1461 SVal destVal = state->getSVal(Dest.Expression, SF);
1462
1463 // If the size is zero, there won't be any actual memory access, so
1464 // just bind the return value to the destination buffer and return.
1465 if (stateZeroSize && !stateNonZeroSize) {
1466 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), SF, destVal);
1467 C.addTransition(stateZeroSize);
1468 return;
1469 }
1470
1471 // If the size can be nonzero, we have to check the other arguments.
1472 if (stateNonZeroSize) {
1473 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1474 // to the size of the destination buffer, then emit a warning
1475 // that an attacker may provoke a buffer overflow error.
1476 state = stateNonZeroSize;
1477
1478 // Ensure the destination is not null. If it is NULL there will be a
1479 // NULL pointer dereference.
1480 state = checkNonNull(C, state, Dest, destVal);
1481 if (!state)
1482 return;
1483
1484 // Get the value of the Src.
1485 SVal srcVal = state->getSVal(Source.Expression, SF);
1486
1487 // Ensure the source is not null. If it is NULL there will be a
1488 // NULL pointer dereference.
1489 state = checkNonNull(C, state, Source, srcVal);
1490 if (!state)
1491 return;
1492
1493 // Ensure the accesses are valid and that the buffers do not overlap.
1494 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1495 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1496
1497 if (Restricted)
1498 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1499
1500 if (!state)
1501 return;
1502
1503 // If this is mempcpy, get the byte after the last byte copied and
1504 // bind the expr.
1505 if (IsMempcpy) {
1506 // Get the byte after the last byte copied.
1507 SValBuilder &SvalBuilder = C.getSValBuilder();
1508 ASTContext &Ctx = SvalBuilder.getContext();
1509 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1510 SVal DestRegCharVal =
1511 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1512 SVal lastElement = C.getSValBuilder().evalBinOp(
1513 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1514 // If we don't know how much we copied, we can at least
1515 // conjure a return value for later.
1516 if (lastElement.isUnknown())
1517 lastElement = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1518
1519 // The byte after the last byte copied is the return value.
1520 state = state->BindExpr(Call.getOriginExpr(), SF, lastElement);
1521 } else {
1522 // All other copies return the destination buffer.
1523 // (Well, bcopy() has a void return type, but this won't hurt.)
1524 state = state->BindExpr(Call.getOriginExpr(), SF, destVal);
1525 }
1526
1527 // Invalidate the destination (regular invalidation without pointer-escaping
1528 // the address of the top-level region).
1529 // FIXME: Even if we can't perfectly model the copy, we should see if we
1530 // can use LazyCompoundVals to copy the source values into the destination.
1531 // This would probably remove any existing bindings past the end of the
1532 // copied region, but that's still an improvement over blank invalidation.
1533 state = invalidateDestinationBufferBySize(
1534 C, state, Dest.Expression, Call.getCFGElementRef(),
1535 C.getSVal(Dest.Expression), sizeVal, Size.Expression->getType());
1536
1537 // Invalidate the source (const-invalidation without const-pointer-escaping
1538 // the address of the top-level region).
1539 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(),
1540 C.getSVal(Source.Expression));
1541
1542 C.addTransition(state);
1543 }
1544}
1545
1546void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1547 CharKind CK) const {
1548 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1549 // The return value is the address of the destination buffer.
1550 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1551 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1552 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1553
1554 ProgramStateRef State = C.getState();
1555
1556 constexpr bool IsRestricted = true;
1557 constexpr bool IsMempcpy = false;
1558 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1559}
1560
1561void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1562 CharKind CK) const {
1563 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1564 // The return value is a pointer to the byte following the last written byte.
1565 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1566 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1567 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1568
1569 constexpr bool IsRestricted = true;
1570 constexpr bool IsMempcpy = true;
1571 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1572 IsMempcpy, CK);
1573}
1574
1575void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1576 CharKind CK) const {
1577 // void *memmove(void *dst, const void *src, size_t n);
1578 // The return value is the address of the destination buffer.
1579 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1580 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1581 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1582
1583 constexpr bool IsRestricted = false;
1584 constexpr bool IsMempcpy = false;
1585 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1586 IsMempcpy, CK);
1587}
1588
1589void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1590 // void bcopy(const void *src, void *dst, size_t n);
1591 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1592 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1593 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1594
1595 constexpr bool IsRestricted = false;
1596 constexpr bool IsMempcpy = false;
1597 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1598 IsMempcpy, CharKind::Regular);
1599}
1600
1601void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1602 CharKind CK) const {
1603 // int memcmp(const void *s1, const void *s2, size_t n);
1604 CurrentFunctionDescription = "memory comparison function";
1605
1606 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1607 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1608 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1609
1610 ProgramStateRef State = C.getState();
1611 SValBuilder &Builder = C.getSValBuilder();
1612 const StackFrame *SF = C.getStackFrame();
1613
1614 // See if the size argument is zero.
1615 SVal sizeVal = State->getSVal(Size.Expression, SF);
1616 QualType sizeTy = Size.Expression->getType();
1617
1618 ProgramStateRef stateZeroSize, stateNonZeroSize;
1619 std::tie(stateZeroSize, stateNonZeroSize) =
1620 assumeZero(C, State, sizeVal, sizeTy);
1621
1622 // If the size can be zero, the result will be 0 in that case, and we don't
1623 // have to check either of the buffers.
1624 if (stateZeroSize) {
1625 State = stateZeroSize;
1626 State = State->BindExpr(Call.getOriginExpr(), SF,
1627 Builder.makeZeroVal(Call.getResultType()));
1628 C.addTransition(State);
1629 }
1630
1631 // If the size can be nonzero, we have to check the other arguments.
1632 if (stateNonZeroSize) {
1633 State = stateNonZeroSize;
1634 // If we know the two buffers are the same, we know the result is 0.
1635 // First, get the two buffers' addresses. Another checker will have already
1636 // made sure they're not undefined.
1637 DefinedOrUnknownSVal LV =
1638 State->getSVal(Left.Expression, SF).castAs<DefinedOrUnknownSVal>();
1639 DefinedOrUnknownSVal RV =
1640 State->getSVal(Right.Expression, SF).castAs<DefinedOrUnknownSVal>();
1641
1642 // See if they are the same.
1643 ProgramStateRef SameBuffer, NotSameBuffer;
1644 std::tie(SameBuffer, NotSameBuffer) =
1645 State->assume(Builder.evalEQ(State, LV, RV));
1646
1647 // If the two arguments are the same buffer, we know the result is 0,
1648 // and we only need to check one size.
1649 if (SameBuffer && !NotSameBuffer) {
1650 State = SameBuffer;
1651 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1652 if (State) {
1653 State = SameBuffer->BindExpr(Call.getOriginExpr(), SF,
1654 Builder.makeZeroVal(Call.getResultType()));
1655 C.addTransition(State);
1656 }
1657 return;
1658 }
1659
1660 // If the two arguments might be different buffers, we have to check
1661 // the size of both of them.
1662 assert(NotSameBuffer);
1663 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1664 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1665 if (State) {
1666 // The return value is the comparison result, which we don't know.
1667 SVal CmpV = Builder.conjureSymbolVal(Call, C.blockCount());
1668 State = State->BindExpr(Call.getOriginExpr(), SF, CmpV);
1669 C.addTransition(State);
1670 }
1671 }
1672}
1673
1674void CStringChecker::evalstrLength(CheckerContext &C,
1675 const CallEvent &Call) const {
1676 // size_t strlen(const char *s);
1677 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1678}
1679
1680void CStringChecker::evalstrnLength(CheckerContext &C,
1681 const CallEvent &Call) const {
1682 // size_t strnlen(const char *s, size_t maxlen);
1683 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1684}
1685
1686void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1687 const CallEvent &Call,
1688 bool IsStrnlen) const {
1689 CurrentFunctionDescription = "string length function";
1690 ProgramStateRef state = C.getState();
1691 const StackFrame *SF = C.getStackFrame();
1692
1693 if (IsStrnlen) {
1694 const Expr *maxlenExpr = Call.getArgExpr(1);
1695 SVal maxlenVal = state->getSVal(maxlenExpr, SF);
1696
1697 ProgramStateRef stateZeroSize, stateNonZeroSize;
1698 std::tie(stateZeroSize, stateNonZeroSize) =
1699 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1700
1701 // If the size can be zero, the result will be 0 in that case, and we don't
1702 // have to check the string itself.
1703 if (stateZeroSize) {
1704 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1705 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), SF, zero);
1706 C.addTransition(stateZeroSize);
1707 }
1708
1709 // If the size is GUARANTEED to be zero, we're done!
1710 if (!stateNonZeroSize)
1711 return;
1712
1713 // Otherwise, record the assumption that the size is nonzero.
1714 state = stateNonZeroSize;
1715 }
1716
1717 // Check that the string argument is non-null.
1718 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1719 SVal ArgVal = state->getSVal(Arg.Expression, SF);
1720 state = checkNonNull(C, state, Arg, ArgVal);
1721
1722 if (!state)
1723 return;
1724
1725 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1726
1727 // If the argument isn't a valid C string, there's no valid state to
1728 // transition to.
1729 if (strLength.isUndef())
1730 return;
1731
1732 DefinedOrUnknownSVal result = UnknownVal();
1733
1734 // If the check is for strnlen() then bind the return value to no more than
1735 // the maxlen value.
1736 if (IsStrnlen) {
1737 QualType cmpTy = C.getSValBuilder().getConditionType();
1738
1739 // It's a little unfortunate to be getting this again,
1740 // but it's not that expensive...
1741 const Expr *maxlenExpr = Call.getArgExpr(1);
1742 SVal maxlenVal = state->getSVal(maxlenExpr, SF);
1743
1744 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1745 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1746
1747 if (strLengthNL && maxlenValNL) {
1748 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1749
1750 // Check if the strLength is greater than the maxlen.
1751 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1752 C.getSValBuilder()
1753 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1754 .castAs<DefinedOrUnknownSVal>());
1755
1756 if (stateStringTooLong && !stateStringNotTooLong) {
1757 // If the string is longer than maxlen, return maxlen.
1758 result = *maxlenValNL;
1759 } else if (stateStringNotTooLong && !stateStringTooLong) {
1760 // If the string is shorter than maxlen, return its length.
1761 result = *strLengthNL;
1762 }
1763 }
1764
1765 if (result.isUnknown()) {
1766 // If we don't have enough information for a comparison, there's
1767 // no guarantee the full string length will actually be returned.
1768 // All we know is the return value is the min of the string length
1769 // and the limit. This is better than nothing.
1770 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1771 NonLoc resultNL = result.castAs<NonLoc>();
1772
1773 if (strLengthNL) {
1774 state = state->assume(C.getSValBuilder().evalBinOpNN(
1775 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1776 .castAs<DefinedOrUnknownSVal>(), true);
1777 }
1778
1779 if (maxlenValNL) {
1780 state = state->assume(C.getSValBuilder().evalBinOpNN(
1781 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1782 .castAs<DefinedOrUnknownSVal>(), true);
1783 }
1784 }
1785
1786 } else {
1787 // This is a plain strlen(), not strnlen().
1788 result = strLength.castAs<DefinedOrUnknownSVal>();
1789
1790 // If we don't know the length of the string, conjure a return
1791 // value, so it can be used in constraints, at least.
1792 if (result.isUnknown()) {
1793 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1794 }
1795 }
1796
1797 // Bind the return value.
1798 assert(!result.isUnknown() && "Should have conjured a value by now");
1799 state = state->BindExpr(Call.getOriginExpr(), SF, result);
1800 C.addTransition(state);
1801}
1802
1803void CStringChecker::evalStrcpy(CheckerContext &C,
1804 const CallEvent &Call) const {
1805 // char *strcpy(char *restrict dst, const char *restrict src);
1806 evalStrcpyCommon(C, Call,
1807 /* ReturnEnd = */ false,
1808 /* IsBounded = */ false,
1809 /* appendK = */ ConcatFnKind::none);
1810}
1811
1812void CStringChecker::evalStrncpy(CheckerContext &C,
1813 const CallEvent &Call) const {
1814 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1815 evalStrcpyCommon(C, Call,
1816 /* ReturnEnd = */ false,
1817 /* IsBounded = */ true,
1818 /* appendK = */ ConcatFnKind::none);
1819}
1820
1821void CStringChecker::evalStpcpy(CheckerContext &C,
1822 const CallEvent &Call) const {
1823 // char *stpcpy(char *restrict dst, const char *restrict src);
1824 evalStrcpyCommon(C, Call,
1825 /* ReturnEnd = */ true,
1826 /* IsBounded = */ false,
1827 /* appendK = */ ConcatFnKind::none);
1828}
1829
1830void CStringChecker::evalStrlcpy(CheckerContext &C,
1831 const CallEvent &Call) const {
1832 // size_t strlcpy(char *dest, const char *src, size_t size);
1833 evalStrcpyCommon(C, Call,
1834 /* ReturnEnd = */ true,
1835 /* IsBounded = */ true,
1836 /* appendK = */ ConcatFnKind::none,
1837 /* returnPtr = */ false);
1838}
1839
1840void CStringChecker::evalStrcat(CheckerContext &C,
1841 const CallEvent &Call) const {
1842 // char *strcat(char *restrict s1, const char *restrict s2);
1843 evalStrcpyCommon(C, Call,
1844 /* ReturnEnd = */ false,
1845 /* IsBounded = */ false,
1846 /* appendK = */ ConcatFnKind::strcat);
1847}
1848
1849void CStringChecker::evalStrncat(CheckerContext &C,
1850 const CallEvent &Call) const {
1851 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1852 evalStrcpyCommon(C, Call,
1853 /* ReturnEnd = */ false,
1854 /* IsBounded = */ true,
1855 /* appendK = */ ConcatFnKind::strcat);
1856}
1857
1858void CStringChecker::evalStrlcat(CheckerContext &C,
1859 const CallEvent &Call) const {
1860 // size_t strlcat(char *dst, const char *src, size_t size);
1861 // It will append at most size - strlen(dst) - 1 bytes,
1862 // NULL-terminating the result.
1863 evalStrcpyCommon(C, Call,
1864 /* ReturnEnd = */ false,
1865 /* IsBounded = */ true,
1866 /* appendK = */ ConcatFnKind::strlcat,
1867 /* returnPtr = */ false);
1868}
1869
1870void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1871 bool ReturnEnd, bool IsBounded,
1872 ConcatFnKind appendK,
1873 bool returnPtr) const {
1874 if (appendK == ConcatFnKind::none)
1875 CurrentFunctionDescription = "string copy function";
1876 else
1877 CurrentFunctionDescription = "string concatenation function";
1878
1879 ProgramStateRef state = C.getState();
1880 const StackFrame *SF = C.getStackFrame();
1881
1882 // Check that the destination is non-null.
1883 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1884 SVal DstVal = state->getSVal(Dst.Expression, SF);
1885 state = checkNonNull(C, state, Dst, DstVal);
1886 if (!state)
1887 return;
1888
1889 // Check that the source is non-null.
1890 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1891 SVal srcVal = state->getSVal(srcExpr.Expression, SF);
1892 state = checkNonNull(C, state, srcExpr, srcVal);
1893 if (!state)
1894 return;
1895
1896 // Get the string length of the source.
1897 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1898 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1899
1900 // Get the string length of the destination buffer.
1901 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1902 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1903
1904 // If the source isn't a valid C string, give up.
1905 if (strLength.isUndef())
1906 return;
1907
1908 SValBuilder &svalBuilder = C.getSValBuilder();
1909 QualType cmpTy = svalBuilder.getConditionType();
1910 QualType sizeTy = svalBuilder.getContext().getSizeType();
1911
1912 // These two values allow checking two kinds of errors:
1913 // - actual overflows caused by a source that doesn't fit in the destination
1914 // - potential overflows caused by a bound that could exceed the destination
1915 SVal amountCopied = UnknownVal();
1916 SVal maxLastElementIndex = UnknownVal();
1917 const char *boundWarning = nullptr;
1918
1919 // FIXME: Why do we choose the srcExpr if the access has no size?
1920 // Note that the 3rd argument of the call would be the size parameter.
1921 SizeArgExpr SrcExprAsSizeDummy = {
1922 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1923 state = CheckOverlap(
1924 C, state,
1925 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1926 Dst, srcExpr);
1927
1928 if (!state)
1929 return;
1930
1931 // If the function is strncpy, strncat, etc... it is bounded.
1932 if (IsBounded) {
1933 // Get the max number of characters to copy.
1934 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1935 SVal lenVal = state->getSVal(lenExpr.Expression, SF);
1936
1937 // Protect against misdeclared strncpy().
1938 lenVal =
1939 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1940
1941 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1942
1943 // If we know both values, we might be able to figure out how much
1944 // we're copying.
1945 if (strLengthNL && lenValNL) {
1946 switch (appendK) {
1947 case ConcatFnKind::none:
1948 case ConcatFnKind::strcat: {
1949 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1950 // Check if the max number to copy is less than the length of the src.
1951 // If the bound is equal to the source length, strncpy won't null-
1952 // terminate the result!
1953 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1954 svalBuilder
1955 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1956 .castAs<DefinedOrUnknownSVal>());
1957
1958 if (stateSourceTooLong && !stateSourceNotTooLong) {
1959 // Max number to copy is less than the length of the src, so the
1960 // actual strLength copied is the max number arg.
1961 state = stateSourceTooLong;
1962 amountCopied = lenVal;
1963
1964 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1965 // The source buffer entirely fits in the bound.
1966 state = stateSourceNotTooLong;
1967 amountCopied = strLength;
1968 }
1969 break;
1970 }
1971 case ConcatFnKind::strlcat:
1972 if (!dstStrLengthNL)
1973 return;
1974
1975 // amountCopied = min (size - dstLen - 1 , srcLen)
1976 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1977 *dstStrLengthNL, sizeTy);
1978 if (!isa<NonLoc>(freeSpace))
1979 return;
1980 freeSpace =
1981 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1982 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1983 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1984
1985 // While unlikely, it is possible that the subtraction is
1986 // too complex to compute, let's check whether it succeeded.
1987 if (!freeSpaceNL)
1988 return;
1989 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1990 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1991
1992 ProgramStateRef TrueState, FalseState;
1993 std::tie(TrueState, FalseState) =
1994 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1995
1996 // srcStrLength <= size - dstStrLength -1
1997 if (TrueState && !FalseState) {
1998 amountCopied = strLength;
1999 }
2000
2001 // srcStrLength > size - dstStrLength -1
2002 if (!TrueState && FalseState) {
2003 amountCopied = freeSpace;
2004 }
2005
2006 if (TrueState && FalseState)
2007 amountCopied = UnknownVal();
2008 break;
2009 }
2010 }
2011 // We still want to know if the bound is known to be too large.
2012 if (lenValNL) {
2013 switch (appendK) {
2014 case ConcatFnKind::strcat:
2015 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2016
2017 // Get the string length of the destination. If the destination is
2018 // memory that can't have a string length, we shouldn't be copying
2019 // into it anyway.
2020 if (dstStrLength.isUndef())
2021 return;
2022
2023 if (dstStrLengthNL) {
2024 maxLastElementIndex = svalBuilder.evalBinOpNN(
2025 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
2026
2027 boundWarning = "Size argument is greater than the free space in the "
2028 "destination buffer";
2029 }
2030 break;
2031 case ConcatFnKind::none:
2032 case ConcatFnKind::strlcat:
2033 // For strncpy and strlcat, this is just checking
2034 // that lenVal <= sizeof(dst).
2035 // (Yes, strncpy and strncat differ in how they treat termination.
2036 // strncat ALWAYS terminates, but strncpy doesn't.)
2037
2038 // We need a special case for when the copy size is zero, in which
2039 // case strncpy will do no work at all. Our bounds check uses n-1
2040 // as the last element accessed, so n == 0 is problematic.
2041 ProgramStateRef StateZeroSize, StateNonZeroSize;
2042 std::tie(StateZeroSize, StateNonZeroSize) =
2043 assumeZero(C, state, *lenValNL, sizeTy);
2044
2045 // If the size is known to be zero, we're done.
2046 if (StateZeroSize && !StateNonZeroSize) {
2047 if (returnPtr) {
2048 StateZeroSize =
2049 StateZeroSize->BindExpr(Call.getOriginExpr(), SF, DstVal);
2050 } else {
2051 if (appendK == ConcatFnKind::none) {
2052 // strlcpy returns strlen(src)
2053 StateZeroSize =
2054 StateZeroSize->BindExpr(Call.getOriginExpr(), SF, strLength);
2055 } else {
2056 // strlcat returns strlen(src) + strlen(dst)
2057 SVal retSize = svalBuilder.evalBinOp(
2058 state, BO_Add, strLength, dstStrLength, sizeTy);
2059 StateZeroSize =
2060 StateZeroSize->BindExpr(Call.getOriginExpr(), SF, retSize);
2061 }
2062 }
2063 C.addTransition(StateZeroSize);
2064 return;
2065 }
2066
2067 // Otherwise, go ahead and figure out the last element we'll touch.
2068 // We don't record the non-zero assumption here because we can't
2069 // be sure. We won't warn on a possible zero.
2070 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2071 maxLastElementIndex =
2072 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2073 boundWarning = "Size argument is greater than the length of the "
2074 "destination buffer";
2075 break;
2076 }
2077 }
2078 } else {
2079 // The function isn't bounded. The amount copied should match the length
2080 // of the source buffer.
2081 amountCopied = strLength;
2082 }
2083
2084 assert(state);
2085
2086 // This represents the number of characters copied into the destination
2087 // buffer. (It may not actually be the strlen if the destination buffer
2088 // is not terminated.)
2089 SVal finalStrLength = UnknownVal();
2090 SVal strlRetVal = UnknownVal();
2091
2092 if (appendK == ConcatFnKind::none && !returnPtr) {
2093 // strlcpy returns the sizeof(src)
2094 strlRetVal = strLength;
2095 }
2096
2097 // If this is an appending function (strcat, strncat...) then set the
2098 // string length to strlen(src) + strlen(dst) since the buffer will
2099 // ultimately contain both.
2100 if (appendK != ConcatFnKind::none) {
2101 // Get the string length of the destination. If the destination is memory
2102 // that can't have a string length, we shouldn't be copying into it anyway.
2103 if (dstStrLength.isUndef())
2104 return;
2105
2106 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2107 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2108 *dstStrLengthNL, sizeTy);
2109 }
2110
2111 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2112
2113 // If we know both string lengths, we might know the final string length.
2114 if (amountCopiedNL && dstStrLengthNL) {
2115 // Make sure the two lengths together don't overflow a size_t.
2116 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2117 if (!state)
2118 return;
2119
2120 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2121 *dstStrLengthNL, sizeTy);
2122 }
2123
2124 // If we couldn't get a single value for the final string length,
2125 // we can at least bound it by the individual lengths.
2126 if (finalStrLength.isUnknown()) {
2127 // Try to get a "hypothetical" string length symbol, which we can later
2128 // set as a real value if that turns out to be the case.
2129 finalStrLength =
2130 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2131 assert(!finalStrLength.isUndef());
2132
2133 if (std::optional<NonLoc> finalStrLengthNL =
2134 finalStrLength.getAs<NonLoc>()) {
2135 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2136 // we overwrite dst string with the src
2137 // finalStrLength >= srcStrLength
2138 SVal sourceInResult = svalBuilder.evalBinOpNN(
2139 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2140 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2141 true);
2142 if (!state)
2143 return;
2144 }
2145
2146 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2147 // we extend the dst string with the src
2148 // finalStrLength >= dstStrLength
2149 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2150 *finalStrLengthNL,
2151 *dstStrLengthNL,
2152 cmpTy);
2153 state =
2154 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2155 if (!state)
2156 return;
2157 }
2158 }
2159 }
2160
2161 } else {
2162 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2163 // the final string length will match the input string length.
2164 finalStrLength = amountCopied;
2165 }
2166
2167 SVal Result;
2168
2169 if (returnPtr) {
2170 // The final result of the function will either be a pointer past the last
2171 // copied element, or a pointer to the start of the destination buffer.
2172 Result = (ReturnEnd ? UnknownVal() : DstVal);
2173 } else {
2174 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2175 //strlcpy, strlcat
2176 Result = strlRetVal;
2177 else
2178 Result = finalStrLength;
2179 }
2180
2181 assert(state);
2182
2183 // If the destination is a MemRegion, try to check for a buffer overflow and
2184 // record the new string length.
2185 if (std::optional<loc::MemRegionVal> dstRegVal =
2186 DstVal.getAs<loc::MemRegionVal>()) {
2187 QualType ptrTy = Dst.Expression->getType();
2188
2189 // If we have an exact value on a bounded copy, use that to check for
2190 // overflows, rather than our estimate about how much is actually copied.
2191 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2192 SVal maxLastElement =
2193 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2194
2195 // Check if the first byte of the destination is writable.
2196 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2197 if (!state)
2198 return;
2199 // Check if the last byte of the destination is writable.
2200 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2201 if (!state)
2202 return;
2203 }
2204
2205 // Then, if the final length is known...
2206 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2207 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2208 *knownStrLength, ptrTy);
2209
2210 // ...and we haven't checked the bound, we'll check the actual copy.
2211 if (!boundWarning) {
2212 // Check if the first byte of the destination is writable.
2213 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2214 if (!state)
2215 return;
2216 // Check if the last byte of the destination is writable.
2217 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2218 if (!state)
2219 return;
2220 }
2221
2222 // If this is a stpcpy-style copy, the last element is the return value.
2223 if (returnPtr && ReturnEnd)
2224 Result = lastElement;
2225 }
2226
2227 // For bounded method, amountCopied take the minimum of two values,
2228 // for ConcatFnKind::strlcat:
2229 // amountCopied = min (size - dstLen - 1 , srcLen)
2230 // for others:
2231 // amountCopied = min (srcLen, size)
2232 // So even if we don't know about amountCopied, as long as one of them will
2233 // not cause an out-of-bound access, the whole function's operation will not
2234 // too, that will avoid invalidating the superRegion of data member in that
2235 // situation.
2236 bool CouldAccessOutOfBound = true;
2237 if (IsBounded && amountCopied.isUnknown()) {
2238 auto CouldAccessOutOfBoundForSVal =
2239 [&](std::optional<NonLoc> Val) -> bool {
2240 if (!Val)
2241 return true;
2242 return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2243 Dst.Expression->getType(), *Val,
2244 C.getASTContext().getSizeType());
2245 };
2246
2247 CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2248
2249 if (CouldAccessOutOfBound) {
2250 // Get the max number of characters to copy.
2251 const Expr *LenExpr = Call.getArgExpr(2);
2252 SVal LenVal = state->getSVal(LenExpr, SF);
2253
2254 // Protect against misdeclared strncpy().
2255 LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2256
2257 // Because analyzer doesn't handle expressions like `size -
2258 // dstLen - 1` very well, we roughly use `size` for
2259 // ConcatFnKind::strlcat here, same with other concat kinds.
2260 CouldAccessOutOfBound =
2261 CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2262 }
2263 }
2264
2265 // Invalidate the destination (regular invalidation without pointer-escaping
2266 // the address of the top-level region). This must happen before we set the
2267 // C string length because invalidation will clear the length.
2268 // FIXME: Even if we can't perfectly model the copy, we should see if we
2269 // can use LazyCompoundVals to copy the source values into the destination.
2270 // This would probably remove any existing bindings past the end of the
2271 // string, but that's still an improvement over blank invalidation.
2272 if (CouldAccessOutOfBound)
2273 state = invalidateDestinationBufferBySize(
2274 C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2275 amountCopied, C.getASTContext().getSizeType());
2276 else
2277 state = invalidateDestinationBufferNeverOverflows(
2278 C, state, Call.getCFGElementRef(), *dstRegVal);
2279
2280 // Invalidate the source (const-invalidation without const-pointer-escaping
2281 // the address of the top-level region).
2282 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(), srcVal);
2283
2284 // Set the C string length of the destination, if we know it.
2285 if (IsBounded && (appendK == ConcatFnKind::none)) {
2286 // strncpy is annoying in that it doesn't guarantee to null-terminate
2287 // the result string. If the original string didn't fit entirely inside
2288 // the bound (including the null-terminator), we don't know how long the
2289 // result is.
2290 if (amountCopied != strLength)
2291 finalStrLength = UnknownVal();
2292 }
2293 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2294 }
2295
2296 assert(state);
2297
2298 if (returnPtr) {
2299 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2300 // overflow, we still need a result. Conjure a return value.
2301 if (ReturnEnd && Result.isUnknown()) {
2302 Result = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2303 }
2304 }
2305 // Set the return value.
2306 state = state->BindExpr(Call.getOriginExpr(), SF, Result);
2307 C.addTransition(state);
2308}
2309
2310void CStringChecker::evalStrxfrm(CheckerContext &C,
2311 const CallEvent &Call) const {
2312 // size_t strxfrm(char *dest, const char *src, size_t n);
2313 CurrentFunctionDescription = "locale transformation function";
2314
2315 ProgramStateRef State = C.getState();
2316 const StackFrame *SF = C.getStackFrame();
2317 SValBuilder &SVB = C.getSValBuilder();
2318
2319 // Get arguments
2320 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2321 SourceArgExpr Source = {{Call.getArgExpr(1), 1}};
2322 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2323
2324 // `src` can never be null
2325 SVal SrcVal = State->getSVal(Source.Expression, SF);
2326 State = checkNonNull(C, State, Source, SrcVal);
2327 if (!State)
2328 return;
2329
2330 // Buffer must not overlap
2331 State = CheckOverlap(C, State, Size, Dest, Source, CK_Regular);
2332 if (!State)
2333 return;
2334
2335 // The function returns an implementation-defined length needed for
2336 // transformation
2337 SVal RetVal = SVB.conjureSymbolVal(Call, C.blockCount());
2338
2339 auto BindReturnAndTransition = [&RetVal, &Call, SF,
2340 &C](ProgramStateRef State) {
2341 if (State) {
2342 State = State->BindExpr(Call.getOriginExpr(), SF, RetVal);
2343 C.addTransition(State);
2344 }
2345 };
2346
2347 // Check if size is zero
2348 SVal SizeVal = State->getSVal(Size.Expression, SF);
2349 QualType SizeTy = Size.Expression->getType();
2350
2351 auto [StateZeroSize, StateSizeNonZero] =
2352 assumeZero(C, State, SizeVal, SizeTy);
2353
2354 // We can't assume anything about size, just bind the return value and be done
2355 if (!StateZeroSize && !StateSizeNonZero)
2356 return BindReturnAndTransition(State);
2357
2358 // If `n` is 0, we just return the implementation defined length
2359 if (StateZeroSize && !StateSizeNonZero)
2360 return BindReturnAndTransition(StateZeroSize);
2361
2362 // If `n` is not 0, `dest` can not be null.
2363 SVal DestVal = StateSizeNonZero->getSVal(Dest.Expression, SF);
2364 StateSizeNonZero = checkNonNull(C, StateSizeNonZero, Dest, DestVal);
2365 if (!StateSizeNonZero)
2366 return;
2367
2368 // Check that we can write to the destination buffer
2369 StateSizeNonZero = CheckBufferAccess(C, StateSizeNonZero, Dest, Size,
2370 AccessKind::write, CK_Regular);
2371 if (!StateSizeNonZero)
2372 return;
2373
2374 // Success: return value < `n`
2375 // Failure: return value >= `n`
2376 auto ComparisonVal = SVB.evalBinOp(StateSizeNonZero, BO_LT, RetVal, SizeVal,
2377 SVB.getConditionType())
2378 .getAs<DefinedOrUnknownSVal>();
2379 if (!ComparisonVal) {
2380 // Fallback: invalidate the buffer.
2381 StateSizeNonZero = invalidateDestinationBufferBySize(
2382 C, StateSizeNonZero, Dest.Expression, Call.getCFGElementRef(), DestVal,
2383 SizeVal, Size.Expression->getType());
2384 return BindReturnAndTransition(StateSizeNonZero);
2385 }
2386
2387 auto [StateSuccess, StateFailure] = StateSizeNonZero->assume(*ComparisonVal);
2388
2389 if (StateSuccess) {
2390 // The transformation invalidated the buffer.
2391 StateSuccess = invalidateDestinationBufferBySize(
2392 C, StateSuccess, Dest.Expression, Call.getCFGElementRef(), DestVal,
2393 SizeVal, Size.Expression->getType());
2394 BindReturnAndTransition(StateSuccess);
2395 // Fallthrough: We also want to add a transition to the failure state below.
2396 }
2397
2398 if (StateFailure) {
2399 // `dest` buffer content is undefined
2400 if (auto DestLoc = DestVal.getAs<loc::MemRegionVal>()) {
2401 StateFailure = StateFailure->killBinding(*DestLoc);
2402 StateFailure =
2403 StateFailure->bindDefaultInitial(*DestLoc, UndefinedVal{}, SF);
2404 }
2405
2406 BindReturnAndTransition(StateFailure);
2407 }
2408}
2409
2410void CStringChecker::evalStrcmp(CheckerContext &C,
2411 const CallEvent &Call) const {
2412 //int strcmp(const char *s1, const char *s2);
2413 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2414}
2415
2416void CStringChecker::evalStrncmp(CheckerContext &C,
2417 const CallEvent &Call) const {
2418 //int strncmp(const char *s1, const char *s2, size_t n);
2419 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2420}
2421
2422void CStringChecker::evalStrcasecmp(CheckerContext &C,
2423 const CallEvent &Call) const {
2424 //int strcasecmp(const char *s1, const char *s2);
2425 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2426}
2427
2428void CStringChecker::evalStrncasecmp(CheckerContext &C,
2429 const CallEvent &Call) const {
2430 //int strncasecmp(const char *s1, const char *s2, size_t n);
2431 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2432}
2433
2434void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2435 bool IsBounded, bool IgnoreCase) const {
2436 CurrentFunctionDescription = "string comparison function";
2437 ProgramStateRef state = C.getState();
2438 const StackFrame *SF = C.getStackFrame();
2439
2440 // Check that the first string is non-null
2441 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2442 SVal LeftVal = state->getSVal(Left.Expression, SF);
2443 state = checkNonNull(C, state, Left, LeftVal);
2444 if (!state)
2445 return;
2446
2447 // Check that the second string is non-null.
2448 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2449 SVal RightVal = state->getSVal(Right.Expression, SF);
2450 state = checkNonNull(C, state, Right, RightVal);
2451 if (!state)
2452 return;
2453
2454 // Get the string length of the first string or give up.
2455 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2456 if (LeftLength.isUndef())
2457 return;
2458
2459 // Get the string length of the second string or give up.
2460 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2461 if (RightLength.isUndef())
2462 return;
2463
2464 // If we know the two buffers are the same, we know the result is 0.
2465 // First, get the two buffers' addresses. Another checker will have already
2466 // made sure they're not undefined.
2467 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2468 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2469
2470 // See if they are the same.
2471 SValBuilder &svalBuilder = C.getSValBuilder();
2472 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2473 ProgramStateRef StSameBuf, StNotSameBuf;
2474 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2475
2476 // If the two arguments might be the same buffer, we know the result is 0,
2477 // and we only need to check one size.
2478 if (StSameBuf) {
2479 StSameBuf =
2480 StSameBuf->BindExpr(Call.getOriginExpr(), SF,
2481 svalBuilder.makeZeroVal(Call.getResultType()));
2482 C.addTransition(StSameBuf);
2483
2484 // If the two arguments are GUARANTEED to be the same, we're done!
2485 if (!StNotSameBuf)
2486 return;
2487 }
2488
2489 assert(StNotSameBuf);
2490 state = StNotSameBuf;
2491
2492 // At this point we can go about comparing the two buffers.
2493 // For now, we only do this if they're both known string literals.
2494
2495 // Attempt to extract string literals from both expressions.
2496 const StringLiteral *LeftStrLiteral =
2497 getCStringLiteral(C, state, Left.Expression, LeftVal);
2498 const StringLiteral *RightStrLiteral =
2499 getCStringLiteral(C, state, Right.Expression, RightVal);
2500 bool canComputeResult = false;
2501 SVal resultVal = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2502
2503 if (LeftStrLiteral && RightStrLiteral) {
2504 StringRef LeftStrRef = LeftStrLiteral->getString();
2505 StringRef RightStrRef = RightStrLiteral->getString();
2506
2507 if (IsBounded) {
2508 // Get the max number of characters to compare.
2509 const Expr *lenExpr = Call.getArgExpr(2);
2510 SVal lenVal = state->getSVal(lenExpr, SF);
2511
2512 // If the length is known, we can get the right substrings.
2513 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2514 // Create substrings of each to compare the prefix.
2515 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2516 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2517 canComputeResult = true;
2518 }
2519 } else {
2520 // This is a normal, unbounded strcmp.
2521 canComputeResult = true;
2522 }
2523
2524 if (canComputeResult) {
2525 // Real strcmp stops at null characters.
2526 size_t s1Term = LeftStrRef.find('\0');
2527 if (s1Term != StringRef::npos)
2528 LeftStrRef = LeftStrRef.substr(0, s1Term);
2529
2530 size_t s2Term = RightStrRef.find('\0');
2531 if (s2Term != StringRef::npos)
2532 RightStrRef = RightStrRef.substr(0, s2Term);
2533
2534 // Use StringRef's comparison methods to compute the actual result.
2535 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2536 : LeftStrRef.compare(RightStrRef);
2537
2538 // The strcmp function returns an integer greater than, equal to, or less
2539 // than zero, [c11, p7.24.4.2].
2540 if (compareRes == 0) {
2541 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2542 }
2543 else {
2544 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2545 // Constrain strcmp's result range based on the result of StringRef's
2546 // comparison methods.
2547 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2548 SVal compareWithZero =
2549 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2550 svalBuilder.getConditionType());
2551 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2552 state = state->assume(compareWithZeroVal, true);
2553 }
2554 }
2555 }
2556
2557 state = state->BindExpr(Call.getOriginExpr(), SF, resultVal);
2558
2559 // Record this as a possible path.
2560 C.addTransition(state);
2561}
2562
2563void CStringChecker::evalStrsep(CheckerContext &C,
2564 const CallEvent &Call) const {
2565 // char *strsep(char **stringp, const char *delim);
2566 // Verify whether the search string parameter matches the return type.
2567 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2568
2569 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2570 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2571 CharPtrTy.getUnqualifiedType())
2572 return;
2573
2574 CurrentFunctionDescription = "strsep()";
2575 ProgramStateRef State = C.getState();
2576 const StackFrame *SF = C.getStackFrame();
2577
2578 // Check that the search string pointer is non-null (though it may point to
2579 // a null string).
2580 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, SF);
2581 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2582 if (!State)
2583 return;
2584
2585 // Check that the delimiter string is non-null.
2586 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2587 SVal DelimStrVal = State->getSVal(DelimStr.Expression, SF);
2588 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2589 if (!State)
2590 return;
2591
2592 SValBuilder &SVB = C.getSValBuilder();
2593 SVal Result;
2594 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2595 // Get the current value of the search string pointer, as a char*.
2596 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2597
2598 // Invalidate the search string, representing the change of one delimiter
2599 // character to NUL.
2600 // As the replacement never overflows, do not invalidate its super region.
2601 State = invalidateDestinationBufferNeverOverflows(
2602 C, State, Call.getCFGElementRef(), Result);
2603
2604 // Overwrite the search string pointer. The new value is either an address
2605 // further along in the same string, or NULL if there are no more tokens.
2606 State = State->bindLoc(*SearchStrLoc,
2607 SVB.conjureSymbolVal(Call, C.blockCount(), getTag()),
2608 SF);
2609 } else {
2610 assert(SearchStrVal.isUnknown());
2611 // Conjure a symbolic value. It's the best we can do.
2612 Result = SVB.conjureSymbolVal(Call, C.blockCount());
2613 }
2614
2615 // Set the return value, and finish.
2616 State = State->BindExpr(Call.getOriginExpr(), SF, Result);
2617 C.addTransition(State);
2618}
2619
2620// These should probably be moved into a C++ standard library checker.
2621void CStringChecker::evalStdCopy(CheckerContext &C,
2622 const CallEvent &Call) const {
2623 evalStdCopyCommon(C, Call);
2624}
2625
2626void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2627 const CallEvent &Call) const {
2628 evalStdCopyCommon(C, Call);
2629}
2630
2631void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2632 const CallEvent &Call) const {
2633 if (!Call.getArgExpr(2)->getType()->isPointerType())
2634 return;
2635
2636 ProgramStateRef State = C.getState();
2637
2638 const StackFrame *SF = C.getStackFrame();
2639
2640 // template <class _InputIterator, class _OutputIterator>
2641 // _OutputIterator
2642 // copy(_InputIterator __first, _InputIterator __last,
2643 // _OutputIterator __result)
2644
2645 // Invalidate the destination buffer
2646 const Expr *Dst = Call.getArgExpr(2);
2647 SVal DstVal = State->getSVal(Dst, SF);
2648 // FIXME: As we do not know how many items are copied, we also invalidate the
2649 // super region containing the target location.
2650 State = invalidateDestinationBufferAlwaysEscapeSuperRegion(
2651 C, State, Call.getCFGElementRef(), DstVal);
2652
2653 SValBuilder &SVB = C.getSValBuilder();
2654
2655 SVal ResultVal = SVB.conjureSymbolVal(Call, C.blockCount());
2656 State = State->BindExpr(Call.getOriginExpr(), SF, ResultVal);
2657
2658 C.addTransition(State);
2659}
2660
2661void CStringChecker::evalMemset(CheckerContext &C,
2662 const CallEvent &Call) const {
2663 // void *memset(void *s, int c, size_t n);
2664 CurrentFunctionDescription = "memory set function";
2665
2666 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2667 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2668 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2669
2670 ProgramStateRef State = C.getState();
2671
2672 // See if the size argument is zero.
2673 const StackFrame *SF = C.getStackFrame();
2674 SVal SizeVal = C.getSVal(Size.Expression);
2675 QualType SizeTy = Size.Expression->getType();
2676
2677 ProgramStateRef ZeroSize, NonZeroSize;
2678 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2679
2680 // Get the value of the memory area.
2681 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2682
2683 // If the size is zero, there won't be any actual memory access, so
2684 // just bind the return value to the buffer and return.
2685 if (ZeroSize && !NonZeroSize) {
2686 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), SF, BufferPtrVal);
2687 C.addTransition(ZeroSize);
2688 return;
2689 }
2690
2691 // Ensure the memory area is not null.
2692 // If it is NULL there will be a NULL pointer dereference.
2693 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2694 if (!State)
2695 return;
2696
2697 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2698 if (!State)
2699 return;
2700
2701 // According to the values of the arguments, bind the value of the second
2702 // argument to the destination buffer and set string length, or just
2703 // invalidate the destination buffer.
2704 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(),
2705 C.getSVal(CharE.Expression), Size.Expression, C, State))
2706 return;
2707
2708 State = State->BindExpr(Call.getOriginExpr(), SF, BufferPtrVal);
2709 C.addTransition(State);
2710}
2711
2712void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2713 CurrentFunctionDescription = "memory clearance function";
2714
2715 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2716 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2717 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2718
2719 ProgramStateRef State = C.getState();
2720
2721 // See if the size argument is zero.
2722 SVal SizeVal = C.getSVal(Size.Expression);
2723 QualType SizeTy = Size.Expression->getType();
2724
2725 ProgramStateRef StateZeroSize, StateNonZeroSize;
2726 std::tie(StateZeroSize, StateNonZeroSize) =
2727 assumeZero(C, State, SizeVal, SizeTy);
2728
2729 // If the size is zero, there won't be any actual memory access,
2730 // In this case we just return.
2731 if (StateZeroSize && !StateNonZeroSize) {
2732 C.addTransition(StateZeroSize);
2733 return;
2734 }
2735
2736 // Get the value of the memory area.
2737 SVal MemVal = C.getSVal(Buffer.Expression);
2738
2739 // Ensure the memory area is not null.
2740 // If it is NULL there will be a NULL pointer dereference.
2741 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2742 if (!State)
2743 return;
2744
2745 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2746 if (!State)
2747 return;
2748
2749 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(), Zero,
2750 Size.Expression, C, State))
2751 return;
2752
2753 C.addTransition(State);
2754}
2755
2756void CStringChecker::evalSprintf(CheckerContext &C,
2757 const CallEvent &Call) const {
2758 CurrentFunctionDescription = "'sprintf'";
2759 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2760}
2761
2762void CStringChecker::evalSnprintf(CheckerContext &C,
2763 const CallEvent &Call) const {
2764 CurrentFunctionDescription = "'snprintf'";
2765 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2766}
2767
2768void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2769 bool IsBounded) const {
2770 ProgramStateRef State = C.getState();
2771 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2772 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2773
2774 const auto NumParams = Call.parameters().size();
2775 if (CE->getNumArgs() < NumParams) {
2776 // This is an invalid call, let's just ignore it.
2777 return;
2778 }
2779
2780 const auto AllArguments =
2781 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2782 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2783
2784 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2785 // We consider only string buffers
2786 if (const QualType type = ArgExpr->getType();
2787 !type->isAnyPointerType() ||
2788 !type->getPointeeType()->isAnyCharacterType())
2789 continue;
2790 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2791
2792 // Ensure the buffers do not overlap.
2793 SizeArgExpr SrcExprAsSizeDummy = {
2794 {Source.Expression, Source.ArgumentIndex}};
2795 State = CheckOverlap(
2796 C, State,
2797 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2798 Dest, Source);
2799 if (!State)
2800 return;
2801 }
2802
2803 C.addTransition(State);
2804}
2805
2806//===----------------------------------------------------------------------===//
2807// The driver method, and other Checker callbacks.
2808//===----------------------------------------------------------------------===//
2809
2810CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2811 CheckerContext &C) const {
2812 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2813 if (!CE)
2814 return nullptr;
2815
2816 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2817 if (!FD)
2818 return nullptr;
2819
2820 if (StdCopy.matches(Call))
2821 return &CStringChecker::evalStdCopy;
2822 if (StdCopyBackward.matches(Call))
2823 return &CStringChecker::evalStdCopyBackward;
2824
2825 // Pro-actively check that argument types are safe to do arithmetic upon.
2826 // We do not want to crash if someone accidentally passes a structure
2827 // into, say, a C++ overload of any of these functions. We could not check
2828 // that for std::copy because they may have arguments of other types.
2829 for (auto I : CE->arguments()) {
2830 QualType T = I->getType();
2831 if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2832 return nullptr;
2833 }
2834
2835 const FnCheck *Callback = Callbacks.lookup(Call);
2836 if (Callback)
2837 return *Callback;
2838
2839 return nullptr;
2840}
2841
2842bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2843 FnCheck Callback = identifyCall(Call, C);
2844
2845 // If the callee isn't a string function, let another checker handle it.
2846 if (!Callback)
2847 return false;
2848
2849 // Check and evaluate the call.
2850 assert(isa<CallExpr>(Call.getOriginExpr()));
2851 Callback(this, C, Call);
2852
2853 // If the evaluate call resulted in no change, chain to the next eval call
2854 // handler.
2855 // Note, the custom CString evaluation calls assume that basic safety
2856 // properties are held. However, if the user chooses to turn off some of these
2857 // checks, we ignore the issues and leave the call evaluation to a generic
2858 // handler.
2859 return C.isDifferent();
2860}
2861
2862void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2863 // Record string length for char a[] = "abc";
2864 ProgramStateRef state = C.getState();
2865
2866 for (const auto *I : DS->decls()) {
2867 const VarDecl *D = dyn_cast<VarDecl>(I);
2868 if (!D)
2869 continue;
2870
2871 // FIXME: Handle array fields of structs.
2872 if (!D->getType()->isArrayType())
2873 continue;
2874
2875 const Expr *Init = D->getInit();
2876 if (!Init)
2877 continue;
2879 continue;
2880
2881 Loc VarLoc = state->getLValue(D, C.getStackFrame());
2882 const MemRegion *MR = VarLoc.getAsRegion();
2883 if (!MR)
2884 continue;
2885
2886 SVal StrVal = C.getSVal(Init);
2887 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2888 DefinedOrUnknownSVal strLength =
2889 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2890
2891 state = state->set<CStringLength>(MR, strLength);
2892 }
2893
2894 C.addTransition(state);
2895}
2896
2897ProgramStateRef CStringChecker::checkRegionChanges(
2898 ProgramStateRef state, const InvalidatedSymbols *,
2899 ArrayRef<const MemRegion *> ExplicitRegions,
2900 ArrayRef<const MemRegion *> Regions, const StackFrame *SF,
2901 const CallEvent *Call) const {
2902 CStringLengthTy Entries = state->get<CStringLength>();
2903 if (Entries.isEmpty())
2904 return state;
2905
2906 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2907 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2908
2909 // First build sets for the changed regions and their super-regions.
2910 for (const MemRegion *MR : Regions) {
2911 Invalidated.insert(MR);
2912
2913 SuperRegions.insert(MR);
2914 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2915 MR = SR->getSuperRegion();
2916 SuperRegions.insert(MR);
2917 }
2918 }
2919
2920 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2921
2922 // Then loop over the entries in the current state.
2923 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2924 // Is this entry for a super-region of a changed region?
2925 if (SuperRegions.count(MR)) {
2926 Entries = F.remove(Entries, MR);
2927 continue;
2928 }
2929
2930 // Is this entry for a sub-region of a changed region?
2931 const MemRegion *Super = MR;
2932 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2933 Super = SR->getSuperRegion();
2934 if (Invalidated.count(Super)) {
2935 Entries = F.remove(Entries, MR);
2936 break;
2937 }
2938 }
2939 }
2940
2941 return state->set<CStringLength>(Entries);
2942}
2943
2944void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2945 SymbolReaper &SR) const {
2946 // Mark all symbols in our string length map as valid.
2947 CStringLengthTy Entries = state->get<CStringLength>();
2948
2949 for (SVal Len : llvm::make_second_range(Entries)) {
2950 for (SymbolRef Sym : Len.symbols())
2951 SR.markInUse(Sym);
2952 }
2953}
2954
2955void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2956 CheckerContext &C) const {
2957 ProgramStateRef state = C.getState();
2958 CStringLengthTy Entries = state->get<CStringLength>();
2959 if (Entries.isEmpty())
2960 return;
2961
2962 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2963 for (auto [Reg, Len] : Entries) {
2964 if (SymbolRef Sym = Len.getAsSymbol()) {
2965 if (SR.isDead(Sym))
2966 Entries = F.remove(Entries, Reg);
2967 }
2968 }
2969
2970 state = state->set<CStringLength>(Entries);
2971 C.addTransition(state);
2972}
2973
2974void ento::registerCStringModeling(CheckerManager &Mgr) {
2975 // Other checker relies on the modeling implemented in this checker family,
2976 // so this "modeling checker" can register the 'CStringChecker' backend for
2977 // its callbacks without enabling any of its frontends.
2978 Mgr.getChecker<CStringChecker>();
2979}
2980
2981bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
2982 return true;
2983}
2984
2985#define REGISTER_CHECKER(NAME) \
2986 void ento::registerCString##NAME(CheckerManager &Mgr) { \
2987 Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr); \
2988 } \
2989 \
2990 bool ento::shouldRegisterCString##NAME(const CheckerManager &) { \
2991 return true; \
2992 }
2993
2994REGISTER_CHECKER(NullArg)
2995REGISTER_CHECKER(OutOfBounds)
2996REGISTER_CHECKER(BufferOverlap)
2997REGISTER_CHECKER(NotNullTerm)
2998REGISTER_CHECKER(UninitializedRead)
2999
3000#undef REGISTER_CHECKER
#define V(N, I)
static std::optional< NonLoc > getIndex(ProgramStateRef State, const ElementRegion *ER, CharKind CK)
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx)
#define REGISTER_CHECKER(name)
Result
Implement __builtin_bit_cast and related operations.
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType WideCharTy
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType CharTy
CanQualType IntTy
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType UnsignedCharTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
decl_range decls()
Definition Stmt.h:1689
QualType getType() const
Definition Expr.h:144
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8573
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition TypeBase.h:8541
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
unsigned getLength() const
Definition Expr.h:1912
StringRef getString() const
Definition Expr.h:1870
bool isArrayType() const
Definition TypeBase.h:8783
bool isPointerType() const
Definition TypeBase.h:8684
CanQualType getCanonicalTypeUnqualified() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:790
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition TypeBase.h:9172
bool isAnyPointerType() const
Definition TypeBase.h:8692
QualType getType() const
Definition Decl.h:723
const Expr * getInit() const
Definition Decl.h:1381
APSIntPtr getMaxValue(const llvm::APSInt &v)
std::optional< APSIntPtr > evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:596
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
ElementRegion is used to represent both array elements and casts.
Definition MemRegion.h:1230
QualType getValueType() const override
Definition MemRegion.h:1252
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:97
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
Kind getKind() const
Definition MemRegion.h:202
@ TK_PreserveContents
Tells that a region's contents is not changed.
Definition MemRegion.h:1671
@ TK_SuppressEscape
Suppress pointer-escaping of a region.
Definition MemRegion.h:1674
void setTrait(SymbolRef Sym, InvalidationKinds IK)
bool hasSymbolicOffset() const
Definition MemRegion.h:82
const MemRegion * getRegion() const
It might return null.
Definition MemRegion.h:80
int64_t getOffset() const
Definition MemRegion.h:84
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
BasicValueFactory & getBasicValueFactory()
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
ProgramStateManager & getStateManager()
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
loc::MemRegionVal makeLoc(SymbolRef sym)
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, const StackFrame *SF, unsigned count)
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, ConstCFGElementRef elem, const StackFrame *SF, unsigned count)
Create a new symbol with a unique 'name'.
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition SVals.h:56
bool isUndef() const
Definition SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
bool isValid() const
Definition SVals.h:111
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
bool isUnknown() const
Definition SVals.h:105
LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral * getStringLiteral() const
Definition MemRegion.h:871
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition MemRegion.h:486
llvm::iterator_range< symbol_iterator > symbols() const
Definition SymExpr.h:107
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
__inline void unsigned int _2
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
llvm::DenseSet< SymbolRef > InvalidatedSymbols
Definition Store.h:50
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
Definition SymExpr.h:133
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
std::variant< struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl > Decl
All declarations that can appear in a module declaration.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
CFGBlock::ConstCFGElementRef ConstCFGElementRef
Definition CFG.h:1248
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition CharInfo.h:233
U cast(CodeGen::Address addr)
Definition Address.h:327
int const char * function
Definition c++config.h:31