clang 23.0.0git
CStringChecker.cpp
Go to the documentation of this file.
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <functional>
34#include <optional>
35
36using namespace clang;
37using namespace ento;
38using namespace std::placeholders;
39
40namespace {
41struct AnyArgExpr {
42 const Expr *Expression;
43 unsigned ArgumentIndex;
44};
45struct SourceArgExpr : AnyArgExpr {};
46struct DestinationArgExpr : AnyArgExpr {};
47struct SizeArgExpr : AnyArgExpr {};
48
49using ErrorMessage = SmallString<128>;
50enum class AccessKind { write, read };
51
52static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
53 AccessKind Access) {
54 ErrorMessage Message;
55 llvm::raw_svector_ostream Os(Message);
56
57 // Function classification like: Memory copy function
58 Os << toUppercase(FunctionDescription.front())
59 << &FunctionDescription.data()[1];
60
61 if (Access == AccessKind::write) {
62 Os << " overflows the destination buffer";
63 } else { // read access
64 Os << " accesses out-of-bound array element";
65 }
66
67 return Message;
68}
69
70enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
71
72enum class CharKind { Regular = 0, Wide };
73constexpr CharKind CK_Regular = CharKind::Regular;
74constexpr CharKind CK_Wide = CharKind::Wide;
75
76static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
77 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
78 : Ctx.WideCharTy);
79}
80
81class CStringChecker
82 : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
83 check::LiveSymbols, check::DeadSymbols,
84 check::RegionChanges> {
85 mutable const char *CurrentFunctionDescription = nullptr;
86
87public:
88 // FIXME: The bug types emitted by this checker family have confused garbage
89 // in their Description and Category fields (e.g. `categories::UnixAPI` is
90 // passed as the description in several cases and `uninitialized` is mistyped
91 // as `unitialized`). This should be cleaned up.
92 CheckerFrontendWithBugType NullArg{categories::UnixAPI};
93 CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
94 CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
95 "Improper arguments"};
96 CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
97 CheckerFrontendWithBugType UninitializedRead{
98 "Accessing unitialized/garbage values"};
99
100 StringRef getDebugTag() const override { return "MallocChecker"; }
101
102 static void *getTag() { static int tag; return &tag; }
103
104 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
105 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
106 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
107 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
108
110 checkRegionChanges(ProgramStateRef state,
111 const InvalidatedSymbols *,
112 ArrayRef<const MemRegion *> ExplicitRegions,
113 ArrayRef<const MemRegion *> Regions,
114 const LocationContext *LCtx,
115 const CallEvent *Call) const;
116
117 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
118 const CallEvent &)>;
119
120 CallDescriptionMap<FnCheck> Callbacks = {
121 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
122 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
123 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
124 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
125 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
126 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
127 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
128 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
129 {{CDM::CLibrary, {"memcmp"}, 3},
130 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
131 {{CDM::CLibrary, {"wmemcmp"}, 3},
132 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
133 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
134 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
135 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
136 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
137 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
138 &CStringChecker::evalMemset},
139 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
140 // FIXME: C23 introduces 'memset_explicit', maybe also model that
141 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
142 &CStringChecker::evalStrcpy},
143 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
144 &CStringChecker::evalStrncpy},
145 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
146 &CStringChecker::evalStpcpy},
147 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
148 &CStringChecker::evalStrlcpy},
149 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
150 &CStringChecker::evalStrcat},
151 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
152 &CStringChecker::evalStrncat},
153 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
154 &CStringChecker::evalStrlcat},
155 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
156 &CStringChecker::evalstrLength},
157 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
158 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
159 &CStringChecker::evalstrnLength},
160 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
161 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
162 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
163 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
164 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
165 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
166 {{CDM::CLibrary, {"strxfrm"}, 3}, &CStringChecker::evalStrxfrm},
167 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
168 {{CDM::CLibrary, {"bcmp"}, 3},
169 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
170 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
171 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
172 &CStringChecker::evalBzero},
173
174 // When recognizing calls to the following variadic functions, we accept
175 // any number of arguments in the call (std::nullopt = accept any
176 // number), but check that in the declaration there are 2 and 3
177 // parameters respectively. (Note that the parameter count does not
178 // include the "...". Calls where the number of arguments is too small
179 // will be discarded by the callback.)
180 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
181 &CStringChecker::evalSprintf},
182 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
183 &CStringChecker::evalSnprintf},
184 };
185
186 // These require a bit of special handling.
187 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
188 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
189
190 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
191 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
192 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
193 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
194 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
195 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
196 ProgramStateRef state, SizeArgExpr Size,
197 DestinationArgExpr Dest, SourceArgExpr Source,
198 bool Restricted, bool IsMempcpy, CharKind CK) const;
199
200 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201
202 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
203 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
204 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
205 bool IsStrnlen = false) const;
206
207 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
208 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
209 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
210 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
211 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
212 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
213 bool returnPtr = true) const;
214
215 void evalStrxfrm(CheckerContext &C, const CallEvent &Call) const;
216
217 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
218 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
220
221 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
222 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
223 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
224 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
226 bool IsBounded = false, bool IgnoreCase = false) const;
227
228 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
229
230 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
231 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
232 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
233 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
234 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
235
236 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
237 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
238 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
239 bool IsBounded) const;
240
241 // Utility methods
242 std::pair<ProgramStateRef , ProgramStateRef >
243 static assumeZero(CheckerContext &C,
244 ProgramStateRef state, SVal V, QualType Ty);
245
246 static ProgramStateRef setCStringLength(ProgramStateRef state,
247 const MemRegion *MR,
248 SVal strLength);
249 static SVal getCStringLengthForRegion(CheckerContext &C,
250 ProgramStateRef &state,
251 const Expr *Ex,
252 const MemRegion *MR,
253 bool hypothetical);
254 static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
255
256 SVal getCStringLength(CheckerContext &C,
257 ProgramStateRef &state,
258 const Expr *Ex,
259 SVal Buf,
260 bool hypothetical = false) const;
261
262 const StringLiteral *getCStringLiteral(CheckerContext &C,
263 ProgramStateRef &state,
264 const Expr *expr,
265 SVal val) const;
266
267 /// Invalidate the destination buffer determined by characters copied.
268 static ProgramStateRef
269 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
270 const Expr *BufE, ConstCFGElementRef Elem,
271 SVal BufV, SVal SizeV, QualType SizeTy);
272
273 /// Operation never overflows, do not invalidate the super region.
274 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
275 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
276
277 /// We do not know whether the operation can overflow (e.g. size is unknown),
278 /// invalidate the super region and escape related pointers.
279 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
280 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
281
282 /// Invalidate the source buffer for escaping pointers.
283 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
286 SVal BufV);
287
288 /// @param InvalidationTraitOperations Determine how to invlidate the
289 /// MemRegion by setting the invalidation traits. Return true to cause pointer
290 /// escape, or false otherwise.
291 static ProgramStateRef invalidateBufferAux(
292 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
293 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
294 const MemRegion *)>
295 InvalidationTraitOperations);
296
297 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
298 const MemRegion *MR);
299
300 static bool memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
301 SVal CharE, const Expr *Size, CheckerContext &C,
302 ProgramStateRef &State);
303
304 // Re-usable checks
305 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
306 AnyArgExpr Arg, SVal l) const;
307 // Check whether the origin region behind \p Element (like the actual array
308 // region \p Element is from) is initialized.
309 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
310 AnyArgExpr Buffer, SVal Element, SVal Size) const;
311 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
312 AnyArgExpr Buffer, SVal Element,
313 AccessKind Access,
314 CharKind CK = CharKind::Regular) const;
315 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
316 AnyArgExpr Buffer, SizeArgExpr Size,
317 AccessKind Access,
318 CharKind CK = CharKind::Regular) const;
319 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
320 SizeArgExpr Size, AnyArgExpr First,
321 AnyArgExpr Second,
322 CharKind CK = CharKind::Regular) const;
323 void emitOverlapBug(CheckerContext &C,
324 ProgramStateRef state,
325 const Stmt *First,
326 const Stmt *Second) const;
327
328 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
329 StringRef WarningMsg) const;
330 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
331 const Stmt *S, StringRef WarningMsg) const;
332 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
333 const Stmt *S, StringRef WarningMsg) const;
334 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
335 const Expr *E, const MemRegion *R,
336 StringRef Msg) const;
337 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
338 ProgramStateRef state,
339 NonLoc left,
340 NonLoc right) const;
341
342 // Return true if the destination buffer of the copy function may be in bound.
343 // Expects SVal of Size to be positive and unsigned.
344 // Expects SVal of FirstBuf to be a FieldRegion.
345 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
346 SVal BufVal, QualType BufTy, SVal LengthVal,
347 QualType LengthTy);
348};
349
350} //end anonymous namespace
351
352REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
353
354//===----------------------------------------------------------------------===//
355// Individual checks and utility methods.
356//===----------------------------------------------------------------------===//
357
358std::pair<ProgramStateRef, ProgramStateRef>
359CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
360 QualType Ty) {
361 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
362 if (!val)
363 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
364
365 SValBuilder &svalBuilder = C.getSValBuilder();
366 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
367 return State->assume(svalBuilder.evalEQ(State, *val, zero));
368}
369
370ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
371 ProgramStateRef State,
372 AnyArgExpr Arg, SVal l) const {
373 // If a previous check has failed, propagate the failure.
374 if (!State)
375 return nullptr;
376
377 ProgramStateRef stateNull, stateNonNull;
378 std::tie(stateNull, stateNonNull) =
379 assumeZero(C, State, l, Arg.Expression->getType());
380
381 if (stateNull && !stateNonNull) {
382 if (NullArg.isEnabled()) {
383 SmallString<80> buf;
384 llvm::raw_svector_ostream OS(buf);
385 assert(CurrentFunctionDescription);
386 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
387 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
388 << CurrentFunctionDescription;
389
390 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
391 }
392 return nullptr;
393 }
394
395 // From here on, assume that the value is non-null.
396 assert(stateNonNull);
397 return stateNonNull;
398}
399
400static std::optional<NonLoc> getIndex(ProgramStateRef State,
401 const ElementRegion *ER, CharKind CK) {
403 ASTContext &Ctx = SVB.getContext();
404
405 if (CK == CharKind::Regular) {
406 if (ER->getValueType() != Ctx.CharTy)
407 return {};
408 return ER->getIndex();
409 }
410
411 if (ER->getValueType() != Ctx.WideCharTy)
412 return {};
413
414 QualType SizeTy = Ctx.getSizeType();
415 NonLoc WideSize =
417 SizeTy)
418 .castAs<NonLoc>();
419 SVal Offset =
420 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
421 if (Offset.isUnknown())
422 return {};
423 return Offset.castAs<NonLoc>();
424}
425
426// Basically 1 -> 1st, 12 -> 12th, etc.
427static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
428 Os << Idx << llvm::getOrdinalSuffix(Idx);
429}
430
431ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
432 ProgramStateRef State,
433 AnyArgExpr Buffer, SVal Element,
434 SVal Size) const {
435
436 // If a previous check has failed, propagate the failure.
437 if (!State)
438 return nullptr;
439
440 const MemRegion *R = Element.getAsRegion();
441 const auto *ER = dyn_cast_or_null<ElementRegion>(R);
442 if (!ER)
443 return State;
444
445 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
446 if (!SuperR)
447 return State;
448
449 // FIXME: We ought to able to check objects as well. Maybe
450 // UninitializedObjectChecker could help?
451 if (!SuperR->getValueType()->isArrayType())
452 return State;
453
454 SValBuilder &SVB = C.getSValBuilder();
455 ASTContext &Ctx = SVB.getContext();
456
457 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
458 const NonLoc Zero = SVB.makeZeroArrayIndex();
459
460 std::optional<Loc> FirstElementVal =
461 State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
462 if (!FirstElementVal)
463 return State;
464
465 // Ensure that we wouldn't read uninitialized value.
466 if (UninitializedRead.isEnabled() &&
467 State->getSVal(*FirstElementVal).isUndef()) {
468 llvm::SmallString<258> Buf;
469 llvm::raw_svector_ostream OS(Buf);
470 OS << "The first element of the ";
471 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
472 OS << " argument is undefined";
473 emitUninitializedReadBug(C, State, Buffer.Expression,
474 FirstElementVal->getAsRegion(), OS.str());
475 return nullptr;
476 }
477
478 // We won't check whether the entire region is fully initialized -- let's just
479 // check that the first and the last element is. So, onto checking the last
480 // element:
481 const QualType IdxTy = SVB.getArrayIndexType();
482
483 NonLoc ElemSize =
484 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
485 .castAs<NonLoc>();
486
487 // FIXME: Check that the size arg to the cstring function is divisible by
488 // size of the actual element type?
489
490 // The type of the argument to the cstring function is either char or wchar,
491 // but thats not the type of the original array (or memory region).
492 // Suppose the following:
493 // int t[5];
494 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
495 // When checking whether t is fully initialized, we see it as char array of
496 // size sizeof(int)*5. If we check the last element as a character, we read
497 // the last byte of an integer, which will be undefined. But just because
498 // that value is undefined, it doesn't mean that the element is uninitialized!
499 // For this reason, we need to retrieve the actual last element with the
500 // correct type.
501
502 // Divide the size argument to the cstring function by the actual element
503 // type. This value will be size of the array, or the index to the
504 // past-the-end element.
505 std::optional<NonLoc> Offset =
506 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
507 IdxTy)
508 .getAs<NonLoc>();
509
510 if (!Offset)
511 return State;
512
513 // Retrieve the index of the last element.
514 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
515 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
516
517 SVal LastElementVal =
518 State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
519 if (!isa<Loc>(LastElementVal))
520 return State;
521
522 if (UninitializedRead.isEnabled() &&
523 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
524 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
525 // If we can't get emit a sensible last element index, just bail out --
526 // prefer to emit nothing in favour of emitting garbage quality reports.
527 if (!IdxInt) {
528 C.addSink();
529 return nullptr;
530 }
531 llvm::SmallString<258> Buf;
532 llvm::raw_svector_ostream OS(Buf);
533 OS << "The last accessed element (at index ";
534 OS << IdxInt->getExtValue();
535 OS << ") in the ";
536 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
537 OS << " argument is undefined";
538 emitUninitializedReadBug(C, State, Buffer.Expression,
539 LastElementVal.getAsRegion(), OS.str());
540 return nullptr;
541 }
542 return State;
543}
544// FIXME: The root of this logic was copied from the old checker
545// alpha.security.ArrayBound (which is removed within this commit).
546// It should be refactored to use the different, more sophisticated bounds
547// checking logic used by the new checker ``security.ArrayBound``.
548ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
549 ProgramStateRef state,
550 AnyArgExpr Buffer, SVal Element,
551 AccessKind Access,
552 CharKind CK) const {
553
554 // If a previous check has failed, propagate the failure.
555 if (!state)
556 return nullptr;
557
558 // Check for out of bound array element access.
559 const MemRegion *R = Element.getAsRegion();
560 if (!R)
561 return state;
562
563 const auto *ER = dyn_cast<ElementRegion>(R);
564 if (!ER)
565 return state;
566
567 // Get the index of the accessed element.
568 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
569 if (!Idx)
570 return state;
571
572 // Get the size of the array.
573 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
574 DefinedOrUnknownSVal Size =
575 getDynamicExtent(state, superReg, C.getSValBuilder());
576
577 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
578 if (StOutBound && !StInBound) {
579 // The analyzer determined that the access is out-of-bounds, which is
580 // a fatal error: ideally we'd return nullptr to terminate this path
581 // regardless of whether the OutOfBounds checker frontend is enabled.
582 // However, the current out-of-bounds modeling produces too many false
583 // positives, so when the frontend is disabled we return the original
584 // (unconstrained) state and let the analysis continue. This is
585 // inconsistent: returning `state` instead of `StOutBound` discards the
586 // constraint that the index is out-of-bounds, and callers cannot
587 // distinguish "we proved an error" from "we couldn't determine anything"
588 // since both return the original state.
589 // TODO: Once the OutOfBounds frontend is stable, return nullptr here
590 // unconditionally to stop the analysis on this path.
591 if (!OutOfBounds.isEnabled())
592 return state;
593
594 ErrorMessage Message =
595 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
596 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
597 return nullptr;
598 }
599
600 // Array bound check succeeded. From this point forward the array bound
601 // should always succeed.
602 return StInBound;
603}
604
606CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
607 AnyArgExpr Buffer, SizeArgExpr Size,
608 AccessKind Access, CharKind CK) const {
609 // If a previous check has failed, propagate the failure.
610 if (!State)
611 return nullptr;
612
613 SValBuilder &svalBuilder = C.getSValBuilder();
614 ASTContext &Ctx = svalBuilder.getContext();
615
616 QualType SizeTy = Size.Expression->getType();
617 QualType PtrTy = getCharPtrType(Ctx, CK);
618
619 // Check that the first buffer is non-null.
620 SVal BufVal = C.getSVal(Buffer.Expression);
621 State = checkNonNull(C, State, Buffer, BufVal);
622 if (!State)
623 return nullptr;
624
625 SVal BufStart =
626 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
627
628 // Check if the first byte of the buffer is accessible.
629 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
630
631 if (!State)
632 return nullptr;
633
634 // Get the access length and make sure it is known.
635 // FIXME: This assumes the caller has already checked that the access length
636 // is positive. And that it's unsigned.
637 SVal LengthVal = C.getSVal(Size.Expression);
638 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
639 if (!Length)
640 return State;
641
642 // Compute the offset of the last element to be accessed: size-1.
643 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
644 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
645 if (Offset.isUnknown())
646 return nullptr;
647 NonLoc LastOffset = Offset.castAs<NonLoc>();
648
649 // Check that the first buffer is sufficiently long.
650 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
651
652 SVal BufEnd =
653 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
654 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
655 if (Access == AccessKind::read)
656 State = checkInit(C, State, Buffer, BufEnd, *Length);
657
658 // If the buffer isn't large enough, abort.
659 if (!State)
660 return nullptr;
661 }
662
663 // Large enough or not, return this state!
664 return State;
665}
666
667ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
668 ProgramStateRef state,
669 SizeArgExpr Size, AnyArgExpr First,
670 AnyArgExpr Second,
671 CharKind CK) const {
672 // Do a simple check for overlap: if the two arguments are from the same
673 // buffer, see if the end of the first is greater than the start of the second
674 // or vice versa.
675
676 // If a previous check has failed, propagate the failure.
677 if (!state)
678 return nullptr;
679
680 ProgramStateRef stateTrue, stateFalse;
681
682 if (!First.Expression->getType()->isAnyPointerType() ||
683 !Second.Expression->getType()->isAnyPointerType())
684 return state;
685
686 // Assume different address spaces cannot overlap.
687 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
688 Second.Expression->getType()->getPointeeType().getAddressSpace())
689 return state;
690
691 // Get the buffer values and make sure they're known locations.
692 const LocationContext *LCtx = C.getLocationContext();
693 SVal firstVal = state->getSVal(First.Expression, LCtx);
694 SVal secondVal = state->getSVal(Second.Expression, LCtx);
695
696 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
697 if (!firstLoc)
698 return state;
699
700 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
701 if (!secondLoc)
702 return state;
703
704 // Are the two values the same?
705 SValBuilder &svalBuilder = C.getSValBuilder();
706 std::tie(stateTrue, stateFalse) =
707 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
708
709 if (stateTrue && !stateFalse) {
710 if (BufferOverlap.isEnabled()) {
711 // If the values are known to be equal, that's automatically an overlap.
712 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
713 return nullptr;
714 }
715 // The analyzer proved that the two pointers are equal, which guarantees
716 // overlap. When BufferOverlap is disabled, we return the original state
717 // instead of nullptr (to avoid stopping the path) or stateTrue (which
718 // would encode the equality constraint). This creates an inconsistency:
719 // callers treat any non-null return as "no overlap found" and proceed
720 // with subsequent modeling (e.g. memcpy side effects), even though the
721 // operation has undefined behavior. Additionally, returning `state` instead
722 // of `stateTrue` discards the pointer-equality constraint, making the
723 // analysis less precise.
724 // FIXME: At minimum, return stateTrue to preserve the equality
725 // constraint. Ideally, return nullptr to stop the path unconditionally,
726 // since overlap is proven regardless of whether we report it.
727 return state;
728 }
729
730 // assume the two expressions are not equal.
731 assert(stateFalse);
732 state = stateFalse;
733
734 // Which value comes first?
735 QualType cmpTy = svalBuilder.getConditionType();
736 SVal reverse =
737 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
738 std::optional<DefinedOrUnknownSVal> reverseTest =
739 reverse.getAs<DefinedOrUnknownSVal>();
740 if (!reverseTest)
741 return state;
742
743 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
744 if (stateTrue) {
745 if (stateFalse) {
746 // If we don't know which one comes first, we can't perform this test.
747 return state;
748 } else {
749 // Switch the values so that firstVal is before secondVal.
750 std::swap(firstLoc, secondLoc);
751
752 // Switch the Exprs as well, so that they still correspond.
753 std::swap(First, Second);
754 }
755 }
756
757 // Get the length, and make sure it too is known.
758 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
759 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
760 if (!Length)
761 return state;
762
763 // Convert the first buffer's start address to char*.
764 // Bail out if the cast fails.
765 ASTContext &Ctx = svalBuilder.getContext();
766 QualType CharPtrTy = getCharPtrType(Ctx, CK);
767 SVal FirstStart =
768 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
769 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
770 if (!FirstStartLoc)
771 return state;
772
773 // Compute the end of the first buffer. Bail out if THAT fails.
774 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
775 *Length, CharPtrTy);
776 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
777 if (!FirstEndLoc)
778 return state;
779
780 // Is the end of the first buffer past the start of the second buffer?
781 SVal Overlap =
782 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
783 std::optional<DefinedOrUnknownSVal> OverlapTest =
784 Overlap.getAs<DefinedOrUnknownSVal>();
785 if (!OverlapTest)
786 return state;
787
788 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
789
790 if (stateTrue && !stateFalse) {
791 if (BufferOverlap.isEnabled()) {
792 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
793 return nullptr;
794 }
795 // The analyzer proved that the end of the first buffer is past the start
796 // of the second, which means the buffers overlap. This is the same
797 // inconsistency as the equal-pointers case above: when BufferOverlap is
798 // disabled, we return the original state, so callers cannot distinguish
799 // "proven overlap" from "couldn't determine anything" and will proceed
800 // to model side effects (e.g. memcpy) on a path with proven UB.
801 // Returning `stateTrue` would at least preserve the overlap constraint;
802 // returning nullptr would correctly terminate the path.
803 // FIXME: Return nullptr unconditionally once BufferOverlap is stable.
804 return state;
805 }
806
807 // assume the two expressions don't overlap.
808 assert(stateFalse);
809 return stateFalse;
810}
811
812void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
813 const Stmt *First,
814 const Stmt *Second) const {
815 assert(BufferOverlap.isEnabled() &&
816 "Can't emit from a checker that is not enabled!");
817 ExplodedNode *N = C.generateErrorNode(state);
818 if (!N)
819 return;
820
821 // Generate a report for this bug.
822 auto report = std::make_unique<PathSensitiveBugReport>(
823 BufferOverlap, "Arguments must not be overlapping buffers", N);
824 report->addRange(First->getSourceRange());
825 report->addRange(Second->getSourceRange());
826
827 C.emitReport(std::move(report));
828}
829
830void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
831 const Stmt *S, StringRef WarningMsg) const {
832 assert(NullArg.isEnabled() &&
833 "Can't emit from a checker that is not enabled!");
834 if (ExplodedNode *N = C.generateErrorNode(State)) {
835 auto Report =
836 std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
837 Report->addRange(S->getSourceRange());
838 if (const auto *Ex = dyn_cast<Expr>(S))
840 C.emitReport(std::move(Report));
841 }
842}
843
844void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
845 ProgramStateRef State,
846 const Expr *E, const MemRegion *R,
847 StringRef Msg) const {
848 assert(UninitializedRead.isEnabled() &&
849 "Can't emit from a checker that is not enabled!");
850 if (ExplodedNode *N = C.generateErrorNode(State)) {
851 auto Report =
852 std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
853 Report->addNote("Other elements might also be undefined",
854 Report->getLocation());
855 Report->addRange(E->getSourceRange());
857 Report->addVisitor<NoStoreFuncVisitor>(R->castAs<SubRegion>());
858 C.emitReport(std::move(Report));
859 }
860}
861
862void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
863 ProgramStateRef State, const Stmt *S,
864 StringRef WarningMsg) const {
865 assert(OutOfBounds.isEnabled() &&
866 "Can't emit from a checker that is not enabled!");
867 if (ExplodedNode *N = C.generateErrorNode(State)) {
868 // FIXME: It would be nice to eventually make this diagnostic more clear,
869 // e.g., by referencing the original declaration or by saying *why* this
870 // reference is outside the range.
871 auto Report =
872 std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
873 Report->addRange(S->getSourceRange());
874 C.emitReport(std::move(Report));
875 }
876}
877
878void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
879 const Stmt *S,
880 StringRef WarningMsg) const {
881 assert(NotNullTerm.isEnabled() &&
882 "Can't emit from a checker that is not enabled!");
883 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
884 auto Report =
885 std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
886
887 Report->addRange(S->getSourceRange());
888 C.emitReport(std::move(Report));
889 }
890}
891
892ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
893 ProgramStateRef state,
894 NonLoc left,
895 NonLoc right) const {
896 // If a previous check has failed, propagate the failure.
897 if (!state)
898 return nullptr;
899
900 SValBuilder &svalBuilder = C.getSValBuilder();
901 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
902
903 QualType sizeTy = svalBuilder.getContext().getSizeType();
904 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
905 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
906
907 SVal maxMinusRight;
908 if (isa<nonloc::ConcreteInt>(right)) {
909 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
910 sizeTy);
911 } else {
912 // Try switching the operands. (The order of these two assignments is
913 // important!)
914 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
915 sizeTy);
916 left = right;
917 }
918
919 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
920 QualType cmpTy = svalBuilder.getConditionType();
921 // If left > max - right, we have an overflow.
922 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
923 *maxMinusRightNL, cmpTy);
924
925 auto [StateOverflow, StateOkay] =
926 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
927
928 if (StateOverflow && !StateOkay) {
929 // On this path the analyzer is convinced that the addition of these two
930 // values would overflow `size_t` which must be caused by the inaccuracy
931 // of our modeling because this method is called in situations where the
932 // summands are size/length values which are much less than SIZE_MAX. To
933 // avoid false positives let's just sink this invalid path.
934 C.addSink(StateOverflow);
935 return nullptr;
936 }
937
938 // From now on, assume an overflow didn't occur.
939 assert(StateOkay);
940 state = StateOkay;
941 }
942
943 return state;
944}
945
946ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
947 const MemRegion *MR,
948 SVal strLength) {
949 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
950
951 MR = MR->StripCasts();
952
953 switch (MR->getKind()) {
954 case MemRegion::StringRegionKind:
955 // FIXME: This can happen if we strcpy() into a string region. This is
956 // undefined [C99 6.4.5p6], but we should still warn about it.
957 return state;
958
959 case MemRegion::SymbolicRegionKind:
960 case MemRegion::AllocaRegionKind:
961 case MemRegion::NonParamVarRegionKind:
962 case MemRegion::ParamVarRegionKind:
963 case MemRegion::FieldRegionKind:
964 case MemRegion::ObjCIvarRegionKind:
965 // These are the types we can currently track string lengths for.
966 break;
967
968 case MemRegion::ElementRegionKind:
969 // FIXME: Handle element regions by upper-bounding the parent region's
970 // string length.
971 return state;
972
973 default:
974 // Other regions (mostly non-data) can't have a reliable C string length.
975 // For now, just ignore the change.
976 // FIXME: These are rare but not impossible. We should output some kind of
977 // warning for things like strcpy((char[]){'a', 0}, "b");
978 return state;
979 }
980
981 if (strLength.isUnknown())
982 return state->remove<CStringLength>(MR);
983
984 return state->set<CStringLength>(MR, strLength);
985}
986
987SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
988 ProgramStateRef &state,
989 const Expr *Ex,
990 const MemRegion *MR,
991 bool hypothetical) {
992 if (!hypothetical) {
993 // If there's a recorded length, go ahead and return it.
994 const SVal *Recorded = state->get<CStringLength>(MR);
995 if (Recorded)
996 return *Recorded;
997 }
998
999 // Otherwise, get a new symbol and update the state.
1000 SValBuilder &svalBuilder = C.getSValBuilder();
1001 QualType sizeTy = svalBuilder.getContext().getSizeType();
1002 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
1003 MR, Ex, sizeTy,
1004 C.getLocationContext(),
1005 C.blockCount());
1006
1007 if (!hypothetical) {
1008 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1009 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1010 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1011 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
1012 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
1013 std::optional<APSIntPtr> maxLengthInt =
1014 BVF.evalAPSInt(BO_Div, maxValInt, fourInt);
1015 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
1016 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
1017 svalBuilder.getConditionType());
1018 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
1019 }
1020 state = state->set<CStringLength>(MR, strLength);
1021 }
1022
1023 return strLength;
1024}
1025
1026const StringLiteral *
1027CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
1028 switch (MR->getKind()) {
1029 case MemRegion::StringRegionKind:
1030 return cast<StringRegion>(MR)->getStringLiteral();
1031 case MemRegion::NonParamVarRegionKind:
1032 if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1033 Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
1034 return dyn_cast_or_null<StringLiteral>(Decl->getInit());
1035 return nullptr;
1036 default:
1037 return nullptr;
1038 }
1039}
1040
1041SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1042 const Expr *Ex, SVal Buf,
1043 bool hypothetical) const {
1044 const MemRegion *MR = Buf.getAsRegion();
1045 if (!MR) {
1046 // If we can't get a region, see if it's something we /know/ isn't a
1047 // C string. In the context of locations, the only time we can issue such
1048 // a warning is for labels.
1049 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1050 if (NotNullTerm.isEnabled()) {
1051 SmallString<120> buf;
1052 llvm::raw_svector_ostream os(buf);
1053 assert(CurrentFunctionDescription);
1054 os << "Argument to " << CurrentFunctionDescription
1055 << " is the address of the label '" << Label->getLabel()->getName()
1056 << "', which is not a null-terminated string";
1057
1058 emitNotCStringBug(C, state, Ex, os.str());
1059 }
1060 return UndefinedVal();
1061 }
1062
1063 // If it's not a region and not a label, give up.
1064 return UnknownVal();
1065 }
1066
1067 // If we have a region, strip casts from it and see if we can figure out
1068 // its length. For anything we can't figure out, just return UnknownVal.
1069 MR = MR->StripCasts();
1070
1071 if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
1072 // If we have a global constant with a string literal initializer,
1073 // compute the initializer's length.
1074 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1075 // so we can assume that the byte length is the correct C string length.
1076 // FIXME: Embedded null characters are not handled.
1077 SValBuilder &SVB = C.getSValBuilder();
1078 return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
1079 }
1080
1081 switch (MR->getKind()) {
1082 case MemRegion::StringRegionKind:
1083 case MemRegion::NonParamVarRegionKind:
1084 case MemRegion::SymbolicRegionKind:
1085 case MemRegion::AllocaRegionKind:
1086 case MemRegion::ParamVarRegionKind:
1087 case MemRegion::FieldRegionKind:
1088 case MemRegion::ObjCIvarRegionKind:
1089 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1090 case MemRegion::CompoundLiteralRegionKind:
1091 // FIXME: Can we track this? Is it necessary?
1092 return UnknownVal();
1093 case MemRegion::ElementRegionKind: {
1094 // If an offset into the string literal is used, use the original length
1095 // minus the offset.
1096 // FIXME: Embedded null characters are not handled.
1097 const ElementRegion *ER = cast<ElementRegion>(MR);
1098 const SubRegion *SuperReg =
1100 const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
1101 if (!StrLit)
1102 return UnknownVal();
1103 SValBuilder &SVB = C.getSValBuilder();
1104 NonLoc Idx = ER->getIndex();
1105 QualType SizeTy = SVB.getContext().getSizeType();
1106 NonLoc LengthVal =
1107 SVB.makeIntVal(StrLit->getLength(), SizeTy).castAs<NonLoc>();
1108 if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
1109 SVB.getConditionType())
1110 .castAs<DefinedOrUnknownSVal>(),
1111 true))
1112 return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx, SizeTy);
1113 return UnknownVal();
1114 }
1115 default:
1116 // Other regions (mostly non-data) can't have a reliable C string length.
1117 // In this case, an error is emitted and UndefinedVal is returned.
1118 // The caller should always be prepared to handle this case.
1119 if (NotNullTerm.isEnabled()) {
1120 SmallString<120> buf;
1121 llvm::raw_svector_ostream os(buf);
1122
1123 assert(CurrentFunctionDescription);
1124 os << "Argument to " << CurrentFunctionDescription << " is ";
1125
1126 if (SummarizeRegion(os, C.getASTContext(), MR))
1127 os << ", which is not a null-terminated string";
1128 else
1129 os << "not a null-terminated string";
1130
1131 emitNotCStringBug(C, state, Ex, os.str());
1132 }
1133 return UndefinedVal();
1134 }
1135}
1136
1137const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1138 ProgramStateRef &state, const Expr *expr, SVal val) const {
1139 // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
1140
1141 // Get the memory region pointed to by the val.
1142 const MemRegion *bufRegion = val.getAsRegion();
1143 if (!bufRegion)
1144 return nullptr;
1145
1146 // Strip casts off the memory region.
1147 bufRegion = bufRegion->StripCasts();
1148
1149 // Cast the memory region to a string region.
1150 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1151 if (!strRegion)
1152 return nullptr;
1153
1154 // Return the actual string in the string region.
1155 return strRegion->getStringLiteral();
1156}
1157
1158bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1159 SVal BufVal, QualType BufTy,
1160 SVal LengthVal, QualType LengthTy) {
1161 // If we do not know that the buffer is long enough we return 'true'.
1162 // Otherwise the parent region of this field region would also get
1163 // invalidated, which would lead to warnings based on an unknown state.
1164
1165 if (LengthVal.isUnknown())
1166 return false;
1167
1168 // Originally copied from CheckBufferAccess and CheckLocation.
1169 SValBuilder &SB = C.getSValBuilder();
1170 ASTContext &Ctx = C.getASTContext();
1171
1172 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1173
1174 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1175 if (!Length)
1176 return true; // cf top comment.
1177
1178 // Compute the offset of the last element to be accessed: size-1.
1179 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1180 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1181 if (Offset.isUnknown())
1182 return true; // cf top comment
1183 NonLoc LastOffset = Offset.castAs<NonLoc>();
1184
1185 // Check that the first buffer is sufficiently long.
1186 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1187 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1188 if (!BufLoc)
1189 return true; // cf top comment.
1190
1191 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1192
1193 // Check for out of bound array element access.
1194 const MemRegion *R = BufEnd.getAsRegion();
1195 if (!R)
1196 return true; // cf top comment.
1197
1198 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1199 if (!ER)
1200 return true; // cf top comment.
1201
1202 // Support library functions defined with non-default address spaces
1203 assert(ER->getValueType()->getCanonicalTypeUnqualified() ==
1204 C.getASTContext().CharTy &&
1205 "isFirstBufInBound should only be called with char* ElementRegions");
1206
1207 // Get the size of the array.
1208 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1209 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1210
1211 // Get the index of the accessed element.
1212 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1213
1214 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1215
1216 return static_cast<bool>(StInBound);
1217}
1218
1219ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1220 CheckerContext &C, ProgramStateRef S, const Expr *BufE,
1221 ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) {
1222 auto InvalidationTraitOperations =
1223 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1224 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1225 // If destination buffer is a field region and access is in bound, do
1226 // not invalidate its super region.
1227 if (MemRegion::FieldRegionKind == R->getKind() &&
1228 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1229 ITraits.setTrait(
1230 R,
1232 }
1233 return false;
1234 };
1235
1236 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1237}
1238
1240CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1241 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1242 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1243 const MemRegion *R) {
1244 return isa<FieldRegion>(R);
1245 };
1246
1247 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1248}
1249
1250ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1251 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1252 auto InvalidationTraitOperations =
1253 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1254 if (MemRegion::FieldRegionKind == R->getKind())
1255 ITraits.setTrait(
1256 R,
1258 return false;
1259 };
1260
1261 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1262}
1263
1264ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1266 ConstCFGElementRef Elem,
1267 SVal BufV) {
1268 auto InvalidationTraitOperations =
1269 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1270 ITraits.setTrait(
1271 R->getBaseRegion(),
1273 ITraits.setTrait(R,
1275 return true;
1276 };
1277
1278 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1279}
1280
1281ProgramStateRef CStringChecker::invalidateBufferAux(
1282 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
1283 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1284 const MemRegion *)>
1285 InvalidationTraitOperations) {
1286 std::optional<Loc> L = V.getAs<Loc>();
1287 if (!L)
1288 return State;
1289
1290 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1291 // some assumptions about the value that CFRefCount can't. Even so, it should
1292 // probably be refactored.
1293 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1294 const MemRegion *R = MR->getRegion()->StripCasts();
1295
1296 // Are we dealing with an ElementRegion? If so, we should be invalidating
1297 // the super-region.
1298 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1299 R = ER->getSuperRegion();
1300 // FIXME: What about layers of ElementRegions?
1301 }
1302
1303 // Invalidate this region.
1304 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1305 RegionAndSymbolInvalidationTraits ITraits;
1306 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1307
1308 return State->invalidateRegions(R, Elem, C.blockCount(), LCtx,
1309 CausesPointerEscape, nullptr, nullptr,
1310 &ITraits);
1311 }
1312
1313 // If we have a non-region value by chance, just remove the binding.
1314 // FIXME: is this necessary or correct? This handles the non-Region
1315 // cases. Is it ever valid to store to these?
1316 return State->killBinding(*L);
1317}
1318
1319bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1320 const MemRegion *MR) {
1321 switch (MR->getKind()) {
1322 case MemRegion::FunctionCodeRegionKind: {
1323 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1324 os << "the address of the function '" << *FD << '\'';
1325 else
1326 os << "the address of a function";
1327 return true;
1328 }
1329 case MemRegion::BlockCodeRegionKind:
1330 os << "block text";
1331 return true;
1332 case MemRegion::BlockDataRegionKind:
1333 os << "a block";
1334 return true;
1335 case MemRegion::CXXThisRegionKind:
1336 case MemRegion::CXXTempObjectRegionKind:
1337 os << "a C++ temp object of type "
1338 << cast<TypedValueRegion>(MR)->getValueType();
1339 return true;
1340 case MemRegion::NonParamVarRegionKind:
1341 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1342 return true;
1343 case MemRegion::ParamVarRegionKind:
1344 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1345 return true;
1346 case MemRegion::FieldRegionKind:
1347 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1348 return true;
1349 case MemRegion::ObjCIvarRegionKind:
1350 os << "an instance variable of type "
1351 << cast<TypedValueRegion>(MR)->getValueType();
1352 return true;
1353 default:
1354 return false;
1355 }
1356}
1357
1358bool CStringChecker::memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
1359 SVal CharVal, const Expr *Size,
1360 CheckerContext &C, ProgramStateRef &State) {
1361 SVal MemVal = C.getSVal(DstBuffer);
1362 SVal SizeVal = C.getSVal(Size);
1363 const MemRegion *MR = MemVal.getAsRegion();
1364 if (!MR)
1365 return false;
1366
1367 // We're about to model memset by producing a "default binding" in the Store.
1368 // Our current implementation - RegionStore - doesn't support default bindings
1369 // that don't cover the whole base region. So we should first get the offset
1370 // and the base region to figure out whether the offset of buffer is 0.
1371 RegionOffset Offset = MR->getAsOffset();
1372 const MemRegion *BR = Offset.getRegion();
1373
1374 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1375 if (!SizeNL)
1376 return false;
1377
1378 SValBuilder &svalBuilder = C.getSValBuilder();
1379 ASTContext &Ctx = C.getASTContext();
1380
1381 // void *memset(void *dest, int ch, size_t count);
1382 // For now we can only handle the case of offset is 0 and concrete char value.
1383 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1384 Offset.getOffset() == 0) {
1385 // Get the base region's size.
1386 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1387
1388 ProgramStateRef StateWholeReg, StateNotWholeReg;
1389 std::tie(StateWholeReg, StateNotWholeReg) =
1390 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1391
1392 // With the semantic of 'memset()', we should convert the CharVal to
1393 // unsigned char.
1394 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1395
1396 ProgramStateRef StateNullChar, StateNonNullChar;
1397 std::tie(StateNullChar, StateNonNullChar) =
1398 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1399
1400 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1401 !StateNonNullChar) {
1402 // If the 'memset()' acts on the whole region of destination buffer and
1403 // the value of the second argument of 'memset()' is zero, bind the second
1404 // argument's value to the destination buffer with 'default binding'.
1405 // FIXME: Since there is no perfect way to bind the non-zero character, we
1406 // can only deal with zero value here. In the future, we need to deal with
1407 // the binding of non-zero value in the case of whole region.
1408 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1409 C.getLocationContext());
1410 } else {
1411 // If the destination buffer's extent is not equal to the value of
1412 // third argument, just invalidate buffer.
1413 State = invalidateDestinationBufferBySize(
1414 C, State, DstBuffer, Elem, MemVal, SizeVal, Size->getType());
1415 }
1416
1417 if (StateNullChar && !StateNonNullChar) {
1418 // If the value of the second argument of 'memset()' is zero, set the
1419 // string length of destination buffer to 0 directly.
1420 State = setCStringLength(State, MR,
1421 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1422 } else if (!StateNullChar && StateNonNullChar) {
1423 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1424 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1425 C.getLocationContext(), C.blockCount());
1426
1427 // If the value of second argument is not zero, then the string length
1428 // is at least the size argument.
1429 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1430 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1431
1432 State = setCStringLength(
1433 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1434 MR, NewStrLen);
1435 }
1436 } else {
1437 // If the offset is not zero and char value is not concrete, we can do
1438 // nothing but invalidate the buffer.
1439 State = invalidateDestinationBufferBySize(C, State, DstBuffer, Elem, MemVal,
1440 SizeVal, Size->getType());
1441 }
1442 return true;
1443}
1444
1445//===----------------------------------------------------------------------===//
1446// evaluation of individual function calls.
1447//===----------------------------------------------------------------------===//
1448
1449void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1450 ProgramStateRef state, SizeArgExpr Size,
1451 DestinationArgExpr Dest,
1452 SourceArgExpr Source, bool Restricted,
1453 bool IsMempcpy, CharKind CK) const {
1454 CurrentFunctionDescription = "memory copy function";
1455
1456 // See if the size argument is zero.
1457 const LocationContext *LCtx = C.getLocationContext();
1458 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1459 QualType sizeTy = Size.Expression->getType();
1460
1461 ProgramStateRef stateZeroSize, stateNonZeroSize;
1462 std::tie(stateZeroSize, stateNonZeroSize) =
1463 assumeZero(C, state, sizeVal, sizeTy);
1464
1465 // Get the value of the Dest.
1466 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1467
1468 // If the size is zero, there won't be any actual memory access, so
1469 // just bind the return value to the destination buffer and return.
1470 if (stateZeroSize && !stateNonZeroSize) {
1471 stateZeroSize =
1472 stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1473 C.addTransition(stateZeroSize);
1474 return;
1475 }
1476
1477 // If the size can be nonzero, we have to check the other arguments.
1478 if (stateNonZeroSize) {
1479 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1480 // to the size of the destination buffer, then emit a warning
1481 // that an attacker may provoke a buffer overflow error.
1482 state = stateNonZeroSize;
1483
1484 // Ensure the destination is not null. If it is NULL there will be a
1485 // NULL pointer dereference.
1486 state = checkNonNull(C, state, Dest, destVal);
1487 if (!state)
1488 return;
1489
1490 // Get the value of the Src.
1491 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1492
1493 // Ensure the source is not null. If it is NULL there will be a
1494 // NULL pointer dereference.
1495 state = checkNonNull(C, state, Source, srcVal);
1496 if (!state)
1497 return;
1498
1499 // Ensure the accesses are valid and that the buffers do not overlap.
1500 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1501 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1502
1503 if (Restricted)
1504 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1505
1506 if (!state)
1507 return;
1508
1509 // If this is mempcpy, get the byte after the last byte copied and
1510 // bind the expr.
1511 if (IsMempcpy) {
1512 // Get the byte after the last byte copied.
1513 SValBuilder &SvalBuilder = C.getSValBuilder();
1514 ASTContext &Ctx = SvalBuilder.getContext();
1515 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1516 SVal DestRegCharVal =
1517 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1518 SVal lastElement = C.getSValBuilder().evalBinOp(
1519 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1520 // If we don't know how much we copied, we can at least
1521 // conjure a return value for later.
1522 if (lastElement.isUnknown())
1523 lastElement = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1524
1525 // The byte after the last byte copied is the return value.
1526 state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1527 } else {
1528 // All other copies return the destination buffer.
1529 // (Well, bcopy() has a void return type, but this won't hurt.)
1530 state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1531 }
1532
1533 // Invalidate the destination (regular invalidation without pointer-escaping
1534 // the address of the top-level region).
1535 // FIXME: Even if we can't perfectly model the copy, we should see if we
1536 // can use LazyCompoundVals to copy the source values into the destination.
1537 // This would probably remove any existing bindings past the end of the
1538 // copied region, but that's still an improvement over blank invalidation.
1539 state = invalidateDestinationBufferBySize(
1540 C, state, Dest.Expression, Call.getCFGElementRef(),
1541 C.getSVal(Dest.Expression), sizeVal, Size.Expression->getType());
1542
1543 // Invalidate the source (const-invalidation without const-pointer-escaping
1544 // the address of the top-level region).
1545 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(),
1546 C.getSVal(Source.Expression));
1547
1548 C.addTransition(state);
1549 }
1550}
1551
1552void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1553 CharKind CK) const {
1554 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1555 // The return value is the address of the destination buffer.
1556 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1557 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1558 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1559
1560 ProgramStateRef State = C.getState();
1561
1562 constexpr bool IsRestricted = true;
1563 constexpr bool IsMempcpy = false;
1564 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1565}
1566
1567void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1568 CharKind CK) const {
1569 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1570 // The return value is a pointer to the byte following the last written byte.
1571 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1572 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1573 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1574
1575 constexpr bool IsRestricted = true;
1576 constexpr bool IsMempcpy = true;
1577 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1578 IsMempcpy, CK);
1579}
1580
1581void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1582 CharKind CK) const {
1583 // void *memmove(void *dst, const void *src, size_t n);
1584 // The return value is the address of the destination buffer.
1585 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1586 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1587 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1588
1589 constexpr bool IsRestricted = false;
1590 constexpr bool IsMempcpy = false;
1591 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1592 IsMempcpy, CK);
1593}
1594
1595void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1596 // void bcopy(const void *src, void *dst, size_t n);
1597 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1598 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1599 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1600
1601 constexpr bool IsRestricted = false;
1602 constexpr bool IsMempcpy = false;
1603 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1604 IsMempcpy, CharKind::Regular);
1605}
1606
1607void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1608 CharKind CK) const {
1609 // int memcmp(const void *s1, const void *s2, size_t n);
1610 CurrentFunctionDescription = "memory comparison function";
1611
1612 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1613 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1614 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1615
1616 ProgramStateRef State = C.getState();
1617 SValBuilder &Builder = C.getSValBuilder();
1618 const LocationContext *LCtx = C.getLocationContext();
1619
1620 // See if the size argument is zero.
1621 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1622 QualType sizeTy = Size.Expression->getType();
1623
1624 ProgramStateRef stateZeroSize, stateNonZeroSize;
1625 std::tie(stateZeroSize, stateNonZeroSize) =
1626 assumeZero(C, State, sizeVal, sizeTy);
1627
1628 // If the size can be zero, the result will be 0 in that case, and we don't
1629 // have to check either of the buffers.
1630 if (stateZeroSize) {
1631 State = stateZeroSize;
1632 State = State->BindExpr(Call.getOriginExpr(), LCtx,
1633 Builder.makeZeroVal(Call.getResultType()));
1634 C.addTransition(State);
1635 }
1636
1637 // If the size can be nonzero, we have to check the other arguments.
1638 if (stateNonZeroSize) {
1639 State = stateNonZeroSize;
1640 // If we know the two buffers are the same, we know the result is 0.
1641 // First, get the two buffers' addresses. Another checker will have already
1642 // made sure they're not undefined.
1643 DefinedOrUnknownSVal LV =
1644 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1645 DefinedOrUnknownSVal RV =
1646 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1647
1648 // See if they are the same.
1649 ProgramStateRef SameBuffer, NotSameBuffer;
1650 std::tie(SameBuffer, NotSameBuffer) =
1651 State->assume(Builder.evalEQ(State, LV, RV));
1652
1653 // If the two arguments are the same buffer, we know the result is 0,
1654 // and we only need to check one size.
1655 if (SameBuffer && !NotSameBuffer) {
1656 State = SameBuffer;
1657 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1658 if (State) {
1659 State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1660 Builder.makeZeroVal(Call.getResultType()));
1661 C.addTransition(State);
1662 }
1663 return;
1664 }
1665
1666 // If the two arguments might be different buffers, we have to check
1667 // the size of both of them.
1668 assert(NotSameBuffer);
1669 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1670 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1671 if (State) {
1672 // The return value is the comparison result, which we don't know.
1673 SVal CmpV = Builder.conjureSymbolVal(Call, C.blockCount());
1674 State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1675 C.addTransition(State);
1676 }
1677 }
1678}
1679
1680void CStringChecker::evalstrLength(CheckerContext &C,
1681 const CallEvent &Call) const {
1682 // size_t strlen(const char *s);
1683 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1684}
1685
1686void CStringChecker::evalstrnLength(CheckerContext &C,
1687 const CallEvent &Call) const {
1688 // size_t strnlen(const char *s, size_t maxlen);
1689 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1690}
1691
1692void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1693 const CallEvent &Call,
1694 bool IsStrnlen) const {
1695 CurrentFunctionDescription = "string length function";
1696 ProgramStateRef state = C.getState();
1697 const LocationContext *LCtx = C.getLocationContext();
1698
1699 if (IsStrnlen) {
1700 const Expr *maxlenExpr = Call.getArgExpr(1);
1701 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1702
1703 ProgramStateRef stateZeroSize, stateNonZeroSize;
1704 std::tie(stateZeroSize, stateNonZeroSize) =
1705 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1706
1707 // If the size can be zero, the result will be 0 in that case, and we don't
1708 // have to check the string itself.
1709 if (stateZeroSize) {
1710 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1711 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1712 C.addTransition(stateZeroSize);
1713 }
1714
1715 // If the size is GUARANTEED to be zero, we're done!
1716 if (!stateNonZeroSize)
1717 return;
1718
1719 // Otherwise, record the assumption that the size is nonzero.
1720 state = stateNonZeroSize;
1721 }
1722
1723 // Check that the string argument is non-null.
1724 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1725 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1726 state = checkNonNull(C, state, Arg, ArgVal);
1727
1728 if (!state)
1729 return;
1730
1731 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1732
1733 // If the argument isn't a valid C string, there's no valid state to
1734 // transition to.
1735 if (strLength.isUndef())
1736 return;
1737
1738 DefinedOrUnknownSVal result = UnknownVal();
1739
1740 // If the check is for strnlen() then bind the return value to no more than
1741 // the maxlen value.
1742 if (IsStrnlen) {
1743 QualType cmpTy = C.getSValBuilder().getConditionType();
1744
1745 // It's a little unfortunate to be getting this again,
1746 // but it's not that expensive...
1747 const Expr *maxlenExpr = Call.getArgExpr(1);
1748 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1749
1750 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1751 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1752
1753 if (strLengthNL && maxlenValNL) {
1754 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1755
1756 // Check if the strLength is greater than the maxlen.
1757 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1758 C.getSValBuilder()
1759 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1760 .castAs<DefinedOrUnknownSVal>());
1761
1762 if (stateStringTooLong && !stateStringNotTooLong) {
1763 // If the string is longer than maxlen, return maxlen.
1764 result = *maxlenValNL;
1765 } else if (stateStringNotTooLong && !stateStringTooLong) {
1766 // If the string is shorter than maxlen, return its length.
1767 result = *strLengthNL;
1768 }
1769 }
1770
1771 if (result.isUnknown()) {
1772 // If we don't have enough information for a comparison, there's
1773 // no guarantee the full string length will actually be returned.
1774 // All we know is the return value is the min of the string length
1775 // and the limit. This is better than nothing.
1776 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1777 NonLoc resultNL = result.castAs<NonLoc>();
1778
1779 if (strLengthNL) {
1780 state = state->assume(C.getSValBuilder().evalBinOpNN(
1781 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1782 .castAs<DefinedOrUnknownSVal>(), true);
1783 }
1784
1785 if (maxlenValNL) {
1786 state = state->assume(C.getSValBuilder().evalBinOpNN(
1787 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1788 .castAs<DefinedOrUnknownSVal>(), true);
1789 }
1790 }
1791
1792 } else {
1793 // This is a plain strlen(), not strnlen().
1794 result = strLength.castAs<DefinedOrUnknownSVal>();
1795
1796 // If we don't know the length of the string, conjure a return
1797 // value, so it can be used in constraints, at least.
1798 if (result.isUnknown()) {
1799 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1800 }
1801 }
1802
1803 // Bind the return value.
1804 assert(!result.isUnknown() && "Should have conjured a value by now");
1805 state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1806 C.addTransition(state);
1807}
1808
1809void CStringChecker::evalStrcpy(CheckerContext &C,
1810 const CallEvent &Call) const {
1811 // char *strcpy(char *restrict dst, const char *restrict src);
1812 evalStrcpyCommon(C, Call,
1813 /* ReturnEnd = */ false,
1814 /* IsBounded = */ false,
1815 /* appendK = */ ConcatFnKind::none);
1816}
1817
1818void CStringChecker::evalStrncpy(CheckerContext &C,
1819 const CallEvent &Call) const {
1820 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1821 evalStrcpyCommon(C, Call,
1822 /* ReturnEnd = */ false,
1823 /* IsBounded = */ true,
1824 /* appendK = */ ConcatFnKind::none);
1825}
1826
1827void CStringChecker::evalStpcpy(CheckerContext &C,
1828 const CallEvent &Call) const {
1829 // char *stpcpy(char *restrict dst, const char *restrict src);
1830 evalStrcpyCommon(C, Call,
1831 /* ReturnEnd = */ true,
1832 /* IsBounded = */ false,
1833 /* appendK = */ ConcatFnKind::none);
1834}
1835
1836void CStringChecker::evalStrlcpy(CheckerContext &C,
1837 const CallEvent &Call) const {
1838 // size_t strlcpy(char *dest, const char *src, size_t size);
1839 evalStrcpyCommon(C, Call,
1840 /* ReturnEnd = */ true,
1841 /* IsBounded = */ true,
1842 /* appendK = */ ConcatFnKind::none,
1843 /* returnPtr = */ false);
1844}
1845
1846void CStringChecker::evalStrcat(CheckerContext &C,
1847 const CallEvent &Call) const {
1848 // char *strcat(char *restrict s1, const char *restrict s2);
1849 evalStrcpyCommon(C, Call,
1850 /* ReturnEnd = */ false,
1851 /* IsBounded = */ false,
1852 /* appendK = */ ConcatFnKind::strcat);
1853}
1854
1855void CStringChecker::evalStrncat(CheckerContext &C,
1856 const CallEvent &Call) const {
1857 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1858 evalStrcpyCommon(C, Call,
1859 /* ReturnEnd = */ false,
1860 /* IsBounded = */ true,
1861 /* appendK = */ ConcatFnKind::strcat);
1862}
1863
1864void CStringChecker::evalStrlcat(CheckerContext &C,
1865 const CallEvent &Call) const {
1866 // size_t strlcat(char *dst, const char *src, size_t size);
1867 // It will append at most size - strlen(dst) - 1 bytes,
1868 // NULL-terminating the result.
1869 evalStrcpyCommon(C, Call,
1870 /* ReturnEnd = */ false,
1871 /* IsBounded = */ true,
1872 /* appendK = */ ConcatFnKind::strlcat,
1873 /* returnPtr = */ false);
1874}
1875
1876void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1877 bool ReturnEnd, bool IsBounded,
1878 ConcatFnKind appendK,
1879 bool returnPtr) const {
1880 if (appendK == ConcatFnKind::none)
1881 CurrentFunctionDescription = "string copy function";
1882 else
1883 CurrentFunctionDescription = "string concatenation function";
1884
1885 ProgramStateRef state = C.getState();
1886 const LocationContext *LCtx = C.getLocationContext();
1887
1888 // Check that the destination is non-null.
1889 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1890 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1891 state = checkNonNull(C, state, Dst, DstVal);
1892 if (!state)
1893 return;
1894
1895 // Check that the source is non-null.
1896 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1897 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1898 state = checkNonNull(C, state, srcExpr, srcVal);
1899 if (!state)
1900 return;
1901
1902 // Get the string length of the source.
1903 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1904 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1905
1906 // Get the string length of the destination buffer.
1907 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1908 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1909
1910 // If the source isn't a valid C string, give up.
1911 if (strLength.isUndef())
1912 return;
1913
1914 SValBuilder &svalBuilder = C.getSValBuilder();
1915 QualType cmpTy = svalBuilder.getConditionType();
1916 QualType sizeTy = svalBuilder.getContext().getSizeType();
1917
1918 // These two values allow checking two kinds of errors:
1919 // - actual overflows caused by a source that doesn't fit in the destination
1920 // - potential overflows caused by a bound that could exceed the destination
1921 SVal amountCopied = UnknownVal();
1922 SVal maxLastElementIndex = UnknownVal();
1923 const char *boundWarning = nullptr;
1924
1925 // FIXME: Why do we choose the srcExpr if the access has no size?
1926 // Note that the 3rd argument of the call would be the size parameter.
1927 SizeArgExpr SrcExprAsSizeDummy = {
1928 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1929 state = CheckOverlap(
1930 C, state,
1931 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1932 Dst, srcExpr);
1933
1934 if (!state)
1935 return;
1936
1937 // If the function is strncpy, strncat, etc... it is bounded.
1938 if (IsBounded) {
1939 // Get the max number of characters to copy.
1940 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1941 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1942
1943 // Protect against misdeclared strncpy().
1944 lenVal =
1945 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1946
1947 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1948
1949 // If we know both values, we might be able to figure out how much
1950 // we're copying.
1951 if (strLengthNL && lenValNL) {
1952 switch (appendK) {
1953 case ConcatFnKind::none:
1954 case ConcatFnKind::strcat: {
1955 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1956 // Check if the max number to copy is less than the length of the src.
1957 // If the bound is equal to the source length, strncpy won't null-
1958 // terminate the result!
1959 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1960 svalBuilder
1961 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1962 .castAs<DefinedOrUnknownSVal>());
1963
1964 if (stateSourceTooLong && !stateSourceNotTooLong) {
1965 // Max number to copy is less than the length of the src, so the
1966 // actual strLength copied is the max number arg.
1967 state = stateSourceTooLong;
1968 amountCopied = lenVal;
1969
1970 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1971 // The source buffer entirely fits in the bound.
1972 state = stateSourceNotTooLong;
1973 amountCopied = strLength;
1974 }
1975 break;
1976 }
1977 case ConcatFnKind::strlcat:
1978 if (!dstStrLengthNL)
1979 return;
1980
1981 // amountCopied = min (size - dstLen - 1 , srcLen)
1982 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1983 *dstStrLengthNL, sizeTy);
1984 if (!isa<NonLoc>(freeSpace))
1985 return;
1986 freeSpace =
1987 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1988 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1989 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1990
1991 // While unlikely, it is possible that the subtraction is
1992 // too complex to compute, let's check whether it succeeded.
1993 if (!freeSpaceNL)
1994 return;
1995 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1996 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1997
1998 ProgramStateRef TrueState, FalseState;
1999 std::tie(TrueState, FalseState) =
2000 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
2001
2002 // srcStrLength <= size - dstStrLength -1
2003 if (TrueState && !FalseState) {
2004 amountCopied = strLength;
2005 }
2006
2007 // srcStrLength > size - dstStrLength -1
2008 if (!TrueState && FalseState) {
2009 amountCopied = freeSpace;
2010 }
2011
2012 if (TrueState && FalseState)
2013 amountCopied = UnknownVal();
2014 break;
2015 }
2016 }
2017 // We still want to know if the bound is known to be too large.
2018 if (lenValNL) {
2019 switch (appendK) {
2020 case ConcatFnKind::strcat:
2021 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2022
2023 // Get the string length of the destination. If the destination is
2024 // memory that can't have a string length, we shouldn't be copying
2025 // into it anyway.
2026 if (dstStrLength.isUndef())
2027 return;
2028
2029 if (dstStrLengthNL) {
2030 maxLastElementIndex = svalBuilder.evalBinOpNN(
2031 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
2032
2033 boundWarning = "Size argument is greater than the free space in the "
2034 "destination buffer";
2035 }
2036 break;
2037 case ConcatFnKind::none:
2038 case ConcatFnKind::strlcat:
2039 // For strncpy and strlcat, this is just checking
2040 // that lenVal <= sizeof(dst).
2041 // (Yes, strncpy and strncat differ in how they treat termination.
2042 // strncat ALWAYS terminates, but strncpy doesn't.)
2043
2044 // We need a special case for when the copy size is zero, in which
2045 // case strncpy will do no work at all. Our bounds check uses n-1
2046 // as the last element accessed, so n == 0 is problematic.
2047 ProgramStateRef StateZeroSize, StateNonZeroSize;
2048 std::tie(StateZeroSize, StateNonZeroSize) =
2049 assumeZero(C, state, *lenValNL, sizeTy);
2050
2051 // If the size is known to be zero, we're done.
2052 if (StateZeroSize && !StateNonZeroSize) {
2053 if (returnPtr) {
2054 StateZeroSize =
2055 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
2056 } else {
2057 if (appendK == ConcatFnKind::none) {
2058 // strlcpy returns strlen(src)
2059 StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
2060 LCtx, strLength);
2061 } else {
2062 // strlcat returns strlen(src) + strlen(dst)
2063 SVal retSize = svalBuilder.evalBinOp(
2064 state, BO_Add, strLength, dstStrLength, sizeTy);
2065 StateZeroSize =
2066 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2067 }
2068 }
2069 C.addTransition(StateZeroSize);
2070 return;
2071 }
2072
2073 // Otherwise, go ahead and figure out the last element we'll touch.
2074 // We don't record the non-zero assumption here because we can't
2075 // be sure. We won't warn on a possible zero.
2076 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2077 maxLastElementIndex =
2078 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2079 boundWarning = "Size argument is greater than the length of the "
2080 "destination buffer";
2081 break;
2082 }
2083 }
2084 } else {
2085 // The function isn't bounded. The amount copied should match the length
2086 // of the source buffer.
2087 amountCopied = strLength;
2088 }
2089
2090 assert(state);
2091
2092 // This represents the number of characters copied into the destination
2093 // buffer. (It may not actually be the strlen if the destination buffer
2094 // is not terminated.)
2095 SVal finalStrLength = UnknownVal();
2096 SVal strlRetVal = UnknownVal();
2097
2098 if (appendK == ConcatFnKind::none && !returnPtr) {
2099 // strlcpy returns the sizeof(src)
2100 strlRetVal = strLength;
2101 }
2102
2103 // If this is an appending function (strcat, strncat...) then set the
2104 // string length to strlen(src) + strlen(dst) since the buffer will
2105 // ultimately contain both.
2106 if (appendK != ConcatFnKind::none) {
2107 // Get the string length of the destination. If the destination is memory
2108 // that can't have a string length, we shouldn't be copying into it anyway.
2109 if (dstStrLength.isUndef())
2110 return;
2111
2112 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2113 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2114 *dstStrLengthNL, sizeTy);
2115 }
2116
2117 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2118
2119 // If we know both string lengths, we might know the final string length.
2120 if (amountCopiedNL && dstStrLengthNL) {
2121 // Make sure the two lengths together don't overflow a size_t.
2122 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2123 if (!state)
2124 return;
2125
2126 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2127 *dstStrLengthNL, sizeTy);
2128 }
2129
2130 // If we couldn't get a single value for the final string length,
2131 // we can at least bound it by the individual lengths.
2132 if (finalStrLength.isUnknown()) {
2133 // Try to get a "hypothetical" string length symbol, which we can later
2134 // set as a real value if that turns out to be the case.
2135 finalStrLength =
2136 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2137 assert(!finalStrLength.isUndef());
2138
2139 if (std::optional<NonLoc> finalStrLengthNL =
2140 finalStrLength.getAs<NonLoc>()) {
2141 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2142 // we overwrite dst string with the src
2143 // finalStrLength >= srcStrLength
2144 SVal sourceInResult = svalBuilder.evalBinOpNN(
2145 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2146 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2147 true);
2148 if (!state)
2149 return;
2150 }
2151
2152 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2153 // we extend the dst string with the src
2154 // finalStrLength >= dstStrLength
2155 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2156 *finalStrLengthNL,
2157 *dstStrLengthNL,
2158 cmpTy);
2159 state =
2160 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2161 if (!state)
2162 return;
2163 }
2164 }
2165 }
2166
2167 } else {
2168 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2169 // the final string length will match the input string length.
2170 finalStrLength = amountCopied;
2171 }
2172
2173 SVal Result;
2174
2175 if (returnPtr) {
2176 // The final result of the function will either be a pointer past the last
2177 // copied element, or a pointer to the start of the destination buffer.
2178 Result = (ReturnEnd ? UnknownVal() : DstVal);
2179 } else {
2180 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2181 //strlcpy, strlcat
2182 Result = strlRetVal;
2183 else
2184 Result = finalStrLength;
2185 }
2186
2187 assert(state);
2188
2189 // If the destination is a MemRegion, try to check for a buffer overflow and
2190 // record the new string length.
2191 if (std::optional<loc::MemRegionVal> dstRegVal =
2192 DstVal.getAs<loc::MemRegionVal>()) {
2193 QualType ptrTy = Dst.Expression->getType();
2194
2195 // If we have an exact value on a bounded copy, use that to check for
2196 // overflows, rather than our estimate about how much is actually copied.
2197 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2198 SVal maxLastElement =
2199 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2200
2201 // Check if the first byte of the destination is writable.
2202 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2203 if (!state)
2204 return;
2205 // Check if the last byte of the destination is writable.
2206 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2207 if (!state)
2208 return;
2209 }
2210
2211 // Then, if the final length is known...
2212 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2213 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2214 *knownStrLength, ptrTy);
2215
2216 // ...and we haven't checked the bound, we'll check the actual copy.
2217 if (!boundWarning) {
2218 // Check if the first byte of the destination is writable.
2219 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2220 if (!state)
2221 return;
2222 // Check if the last byte of the destination is writable.
2223 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2224 if (!state)
2225 return;
2226 }
2227
2228 // If this is a stpcpy-style copy, the last element is the return value.
2229 if (returnPtr && ReturnEnd)
2230 Result = lastElement;
2231 }
2232
2233 // For bounded method, amountCopied take the minimum of two values,
2234 // for ConcatFnKind::strlcat:
2235 // amountCopied = min (size - dstLen - 1 , srcLen)
2236 // for others:
2237 // amountCopied = min (srcLen, size)
2238 // So even if we don't know about amountCopied, as long as one of them will
2239 // not cause an out-of-bound access, the whole function's operation will not
2240 // too, that will avoid invalidating the superRegion of data member in that
2241 // situation.
2242 bool CouldAccessOutOfBound = true;
2243 if (IsBounded && amountCopied.isUnknown()) {
2244 auto CouldAccessOutOfBoundForSVal =
2245 [&](std::optional<NonLoc> Val) -> bool {
2246 if (!Val)
2247 return true;
2248 return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2249 Dst.Expression->getType(), *Val,
2250 C.getASTContext().getSizeType());
2251 };
2252
2253 CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2254
2255 if (CouldAccessOutOfBound) {
2256 // Get the max number of characters to copy.
2257 const Expr *LenExpr = Call.getArgExpr(2);
2258 SVal LenVal = state->getSVal(LenExpr, LCtx);
2259
2260 // Protect against misdeclared strncpy().
2261 LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2262
2263 // Because analyzer doesn't handle expressions like `size -
2264 // dstLen - 1` very well, we roughly use `size` for
2265 // ConcatFnKind::strlcat here, same with other concat kinds.
2266 CouldAccessOutOfBound =
2267 CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2268 }
2269 }
2270
2271 // Invalidate the destination (regular invalidation without pointer-escaping
2272 // the address of the top-level region). This must happen before we set the
2273 // C string length because invalidation will clear the length.
2274 // FIXME: Even if we can't perfectly model the copy, we should see if we
2275 // can use LazyCompoundVals to copy the source values into the destination.
2276 // This would probably remove any existing bindings past the end of the
2277 // string, but that's still an improvement over blank invalidation.
2278 if (CouldAccessOutOfBound)
2279 state = invalidateDestinationBufferBySize(
2280 C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2281 amountCopied, C.getASTContext().getSizeType());
2282 else
2283 state = invalidateDestinationBufferNeverOverflows(
2284 C, state, Call.getCFGElementRef(), *dstRegVal);
2285
2286 // Invalidate the source (const-invalidation without const-pointer-escaping
2287 // the address of the top-level region).
2288 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(), srcVal);
2289
2290 // Set the C string length of the destination, if we know it.
2291 if (IsBounded && (appendK == ConcatFnKind::none)) {
2292 // strncpy is annoying in that it doesn't guarantee to null-terminate
2293 // the result string. If the original string didn't fit entirely inside
2294 // the bound (including the null-terminator), we don't know how long the
2295 // result is.
2296 if (amountCopied != strLength)
2297 finalStrLength = UnknownVal();
2298 }
2299 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2300 }
2301
2302 assert(state);
2303
2304 if (returnPtr) {
2305 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2306 // overflow, we still need a result. Conjure a return value.
2307 if (ReturnEnd && Result.isUnknown()) {
2308 Result = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2309 }
2310 }
2311 // Set the return value.
2312 state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2313 C.addTransition(state);
2314}
2315
2316void CStringChecker::evalStrxfrm(CheckerContext &C,
2317 const CallEvent &Call) const {
2318 // size_t strxfrm(char *dest, const char *src, size_t n);
2319 CurrentFunctionDescription = "locale transformation function";
2320
2321 ProgramStateRef State = C.getState();
2322 const LocationContext *LCtx = C.getLocationContext();
2323 SValBuilder &SVB = C.getSValBuilder();
2324
2325 // Get arguments
2326 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2327 SourceArgExpr Source = {{Call.getArgExpr(1), 1}};
2328 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2329
2330 // `src` can never be null
2331 SVal SrcVal = State->getSVal(Source.Expression, LCtx);
2332 State = checkNonNull(C, State, Source, SrcVal);
2333 if (!State)
2334 return;
2335
2336 // Buffer must not overlap
2337 State = CheckOverlap(C, State, Size, Dest, Source, CK_Regular);
2338 if (!State)
2339 return;
2340
2341 // The function returns an implementation-defined length needed for
2342 // transformation
2343 SVal RetVal = SVB.conjureSymbolVal(Call, C.blockCount());
2344
2345 auto BindReturnAndTransition = [&RetVal, &Call, LCtx,
2346 &C](ProgramStateRef State) {
2347 if (State) {
2348 State = State->BindExpr(Call.getOriginExpr(), LCtx, RetVal);
2349 C.addTransition(State);
2350 }
2351 };
2352
2353 // Check if size is zero
2354 SVal SizeVal = State->getSVal(Size.Expression, LCtx);
2355 QualType SizeTy = Size.Expression->getType();
2356
2357 auto [StateZeroSize, StateSizeNonZero] =
2358 assumeZero(C, State, SizeVal, SizeTy);
2359
2360 // We can't assume anything about size, just bind the return value and be done
2361 if (!StateZeroSize && !StateSizeNonZero)
2362 return BindReturnAndTransition(State);
2363
2364 // If `n` is 0, we just return the implementation defined length
2365 if (StateZeroSize && !StateSizeNonZero)
2366 return BindReturnAndTransition(StateZeroSize);
2367
2368 // If `n` is not 0, `dest` can not be null.
2369 SVal DestVal = StateSizeNonZero->getSVal(Dest.Expression, LCtx);
2370 StateSizeNonZero = checkNonNull(C, StateSizeNonZero, Dest, DestVal);
2371 if (!StateSizeNonZero)
2372 return;
2373
2374 // Check that we can write to the destination buffer
2375 StateSizeNonZero = CheckBufferAccess(C, StateSizeNonZero, Dest, Size,
2376 AccessKind::write, CK_Regular);
2377 if (!StateSizeNonZero)
2378 return;
2379
2380 // Success: return value < `n`
2381 // Failure: return value >= `n`
2382 auto ComparisonVal = SVB.evalBinOp(StateSizeNonZero, BO_LT, RetVal, SizeVal,
2383 SVB.getConditionType())
2384 .getAs<DefinedOrUnknownSVal>();
2385 if (!ComparisonVal) {
2386 // Fallback: invalidate the buffer.
2387 StateSizeNonZero = invalidateDestinationBufferBySize(
2388 C, StateSizeNonZero, Dest.Expression, Call.getCFGElementRef(), DestVal,
2389 SizeVal, Size.Expression->getType());
2390 return BindReturnAndTransition(StateSizeNonZero);
2391 }
2392
2393 auto [StateSuccess, StateFailure] = StateSizeNonZero->assume(*ComparisonVal);
2394
2395 if (StateSuccess) {
2396 // The transformation invalidated the buffer.
2397 StateSuccess = invalidateDestinationBufferBySize(
2398 C, StateSuccess, Dest.Expression, Call.getCFGElementRef(), DestVal,
2399 SizeVal, Size.Expression->getType());
2400 BindReturnAndTransition(StateSuccess);
2401 // Fallthrough: We also want to add a transition to the failure state below.
2402 }
2403
2404 if (StateFailure) {
2405 // `dest` buffer content is undefined
2406 if (auto DestLoc = DestVal.getAs<loc::MemRegionVal>()) {
2407 StateFailure = StateFailure->killBinding(*DestLoc);
2408 StateFailure =
2409 StateFailure->bindDefaultInitial(*DestLoc, UndefinedVal{}, LCtx);
2410 }
2411
2412 BindReturnAndTransition(StateFailure);
2413 }
2414}
2415
2416void CStringChecker::evalStrcmp(CheckerContext &C,
2417 const CallEvent &Call) const {
2418 //int strcmp(const char *s1, const char *s2);
2419 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2420}
2421
2422void CStringChecker::evalStrncmp(CheckerContext &C,
2423 const CallEvent &Call) const {
2424 //int strncmp(const char *s1, const char *s2, size_t n);
2425 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2426}
2427
2428void CStringChecker::evalStrcasecmp(CheckerContext &C,
2429 const CallEvent &Call) const {
2430 //int strcasecmp(const char *s1, const char *s2);
2431 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2432}
2433
2434void CStringChecker::evalStrncasecmp(CheckerContext &C,
2435 const CallEvent &Call) const {
2436 //int strncasecmp(const char *s1, const char *s2, size_t n);
2437 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2438}
2439
2440void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2441 bool IsBounded, bool IgnoreCase) const {
2442 CurrentFunctionDescription = "string comparison function";
2443 ProgramStateRef state = C.getState();
2444 const LocationContext *LCtx = C.getLocationContext();
2445
2446 // Check that the first string is non-null
2447 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2448 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2449 state = checkNonNull(C, state, Left, LeftVal);
2450 if (!state)
2451 return;
2452
2453 // Check that the second string is non-null.
2454 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2455 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2456 state = checkNonNull(C, state, Right, RightVal);
2457 if (!state)
2458 return;
2459
2460 // Get the string length of the first string or give up.
2461 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2462 if (LeftLength.isUndef())
2463 return;
2464
2465 // Get the string length of the second string or give up.
2466 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2467 if (RightLength.isUndef())
2468 return;
2469
2470 // If we know the two buffers are the same, we know the result is 0.
2471 // First, get the two buffers' addresses. Another checker will have already
2472 // made sure they're not undefined.
2473 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2474 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2475
2476 // See if they are the same.
2477 SValBuilder &svalBuilder = C.getSValBuilder();
2478 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2479 ProgramStateRef StSameBuf, StNotSameBuf;
2480 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2481
2482 // If the two arguments might be the same buffer, we know the result is 0,
2483 // and we only need to check one size.
2484 if (StSameBuf) {
2485 StSameBuf =
2486 StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2487 svalBuilder.makeZeroVal(Call.getResultType()));
2488 C.addTransition(StSameBuf);
2489
2490 // If the two arguments are GUARANTEED to be the same, we're done!
2491 if (!StNotSameBuf)
2492 return;
2493 }
2494
2495 assert(StNotSameBuf);
2496 state = StNotSameBuf;
2497
2498 // At this point we can go about comparing the two buffers.
2499 // For now, we only do this if they're both known string literals.
2500
2501 // Attempt to extract string literals from both expressions.
2502 const StringLiteral *LeftStrLiteral =
2503 getCStringLiteral(C, state, Left.Expression, LeftVal);
2504 const StringLiteral *RightStrLiteral =
2505 getCStringLiteral(C, state, Right.Expression, RightVal);
2506 bool canComputeResult = false;
2507 SVal resultVal = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2508
2509 if (LeftStrLiteral && RightStrLiteral) {
2510 StringRef LeftStrRef = LeftStrLiteral->getString();
2511 StringRef RightStrRef = RightStrLiteral->getString();
2512
2513 if (IsBounded) {
2514 // Get the max number of characters to compare.
2515 const Expr *lenExpr = Call.getArgExpr(2);
2516 SVal lenVal = state->getSVal(lenExpr, LCtx);
2517
2518 // If the length is known, we can get the right substrings.
2519 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2520 // Create substrings of each to compare the prefix.
2521 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2522 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2523 canComputeResult = true;
2524 }
2525 } else {
2526 // This is a normal, unbounded strcmp.
2527 canComputeResult = true;
2528 }
2529
2530 if (canComputeResult) {
2531 // Real strcmp stops at null characters.
2532 size_t s1Term = LeftStrRef.find('\0');
2533 if (s1Term != StringRef::npos)
2534 LeftStrRef = LeftStrRef.substr(0, s1Term);
2535
2536 size_t s2Term = RightStrRef.find('\0');
2537 if (s2Term != StringRef::npos)
2538 RightStrRef = RightStrRef.substr(0, s2Term);
2539
2540 // Use StringRef's comparison methods to compute the actual result.
2541 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2542 : LeftStrRef.compare(RightStrRef);
2543
2544 // The strcmp function returns an integer greater than, equal to, or less
2545 // than zero, [c11, p7.24.4.2].
2546 if (compareRes == 0) {
2547 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2548 }
2549 else {
2550 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2551 // Constrain strcmp's result range based on the result of StringRef's
2552 // comparison methods.
2553 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2554 SVal compareWithZero =
2555 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2556 svalBuilder.getConditionType());
2557 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2558 state = state->assume(compareWithZeroVal, true);
2559 }
2560 }
2561 }
2562
2563 state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2564
2565 // Record this as a possible path.
2566 C.addTransition(state);
2567}
2568
2569void CStringChecker::evalStrsep(CheckerContext &C,
2570 const CallEvent &Call) const {
2571 // char *strsep(char **stringp, const char *delim);
2572 // Verify whether the search string parameter matches the return type.
2573 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2574
2575 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2576 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2577 CharPtrTy.getUnqualifiedType())
2578 return;
2579
2580 CurrentFunctionDescription = "strsep()";
2581 ProgramStateRef State = C.getState();
2582 const LocationContext *LCtx = C.getLocationContext();
2583
2584 // Check that the search string pointer is non-null (though it may point to
2585 // a null string).
2586 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2587 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2588 if (!State)
2589 return;
2590
2591 // Check that the delimiter string is non-null.
2592 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2593 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2594 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2595 if (!State)
2596 return;
2597
2598 SValBuilder &SVB = C.getSValBuilder();
2599 SVal Result;
2600 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2601 // Get the current value of the search string pointer, as a char*.
2602 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2603
2604 // Invalidate the search string, representing the change of one delimiter
2605 // character to NUL.
2606 // As the replacement never overflows, do not invalidate its super region.
2607 State = invalidateDestinationBufferNeverOverflows(
2608 C, State, Call.getCFGElementRef(), Result);
2609
2610 // Overwrite the search string pointer. The new value is either an address
2611 // further along in the same string, or NULL if there are no more tokens.
2612 State = State->bindLoc(*SearchStrLoc,
2613 SVB.conjureSymbolVal(Call, C.blockCount(), getTag()),
2614 LCtx);
2615 } else {
2616 assert(SearchStrVal.isUnknown());
2617 // Conjure a symbolic value. It's the best we can do.
2618 Result = SVB.conjureSymbolVal(Call, C.blockCount());
2619 }
2620
2621 // Set the return value, and finish.
2622 State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2623 C.addTransition(State);
2624}
2625
2626// These should probably be moved into a C++ standard library checker.
2627void CStringChecker::evalStdCopy(CheckerContext &C,
2628 const CallEvent &Call) const {
2629 evalStdCopyCommon(C, Call);
2630}
2631
2632void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2633 const CallEvent &Call) const {
2634 evalStdCopyCommon(C, Call);
2635}
2636
2637void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2638 const CallEvent &Call) const {
2639 if (!Call.getArgExpr(2)->getType()->isPointerType())
2640 return;
2641
2642 ProgramStateRef State = C.getState();
2643
2644 const LocationContext *LCtx = C.getLocationContext();
2645
2646 // template <class _InputIterator, class _OutputIterator>
2647 // _OutputIterator
2648 // copy(_InputIterator __first, _InputIterator __last,
2649 // _OutputIterator __result)
2650
2651 // Invalidate the destination buffer
2652 const Expr *Dst = Call.getArgExpr(2);
2653 SVal DstVal = State->getSVal(Dst, LCtx);
2654 // FIXME: As we do not know how many items are copied, we also invalidate the
2655 // super region containing the target location.
2656 State = invalidateDestinationBufferAlwaysEscapeSuperRegion(
2657 C, State, Call.getCFGElementRef(), DstVal);
2658
2659 SValBuilder &SVB = C.getSValBuilder();
2660
2661 SVal ResultVal = SVB.conjureSymbolVal(Call, C.blockCount());
2662 State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2663
2664 C.addTransition(State);
2665}
2666
2667void CStringChecker::evalMemset(CheckerContext &C,
2668 const CallEvent &Call) const {
2669 // void *memset(void *s, int c, size_t n);
2670 CurrentFunctionDescription = "memory set function";
2671
2672 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2673 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2674 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2675
2676 ProgramStateRef State = C.getState();
2677
2678 // See if the size argument is zero.
2679 const LocationContext *LCtx = C.getLocationContext();
2680 SVal SizeVal = C.getSVal(Size.Expression);
2681 QualType SizeTy = Size.Expression->getType();
2682
2683 ProgramStateRef ZeroSize, NonZeroSize;
2684 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2685
2686 // Get the value of the memory area.
2687 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2688
2689 // If the size is zero, there won't be any actual memory access, so
2690 // just bind the return value to the buffer and return.
2691 if (ZeroSize && !NonZeroSize) {
2692 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2693 C.addTransition(ZeroSize);
2694 return;
2695 }
2696
2697 // Ensure the memory area is not null.
2698 // If it is NULL there will be a NULL pointer dereference.
2699 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2700 if (!State)
2701 return;
2702
2703 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2704 if (!State)
2705 return;
2706
2707 // According to the values of the arguments, bind the value of the second
2708 // argument to the destination buffer and set string length, or just
2709 // invalidate the destination buffer.
2710 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(),
2711 C.getSVal(CharE.Expression), Size.Expression, C, State))
2712 return;
2713
2714 State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2715 C.addTransition(State);
2716}
2717
2718void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2719 CurrentFunctionDescription = "memory clearance function";
2720
2721 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2722 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2723 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2724
2725 ProgramStateRef State = C.getState();
2726
2727 // See if the size argument is zero.
2728 SVal SizeVal = C.getSVal(Size.Expression);
2729 QualType SizeTy = Size.Expression->getType();
2730
2731 ProgramStateRef StateZeroSize, StateNonZeroSize;
2732 std::tie(StateZeroSize, StateNonZeroSize) =
2733 assumeZero(C, State, SizeVal, SizeTy);
2734
2735 // If the size is zero, there won't be any actual memory access,
2736 // In this case we just return.
2737 if (StateZeroSize && !StateNonZeroSize) {
2738 C.addTransition(StateZeroSize);
2739 return;
2740 }
2741
2742 // Get the value of the memory area.
2743 SVal MemVal = C.getSVal(Buffer.Expression);
2744
2745 // Ensure the memory area is not null.
2746 // If it is NULL there will be a NULL pointer dereference.
2747 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2748 if (!State)
2749 return;
2750
2751 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2752 if (!State)
2753 return;
2754
2755 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(), Zero,
2756 Size.Expression, C, State))
2757 return;
2758
2759 C.addTransition(State);
2760}
2761
2762void CStringChecker::evalSprintf(CheckerContext &C,
2763 const CallEvent &Call) const {
2764 CurrentFunctionDescription = "'sprintf'";
2765 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2766}
2767
2768void CStringChecker::evalSnprintf(CheckerContext &C,
2769 const CallEvent &Call) const {
2770 CurrentFunctionDescription = "'snprintf'";
2771 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2772}
2773
2774void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2775 bool IsBounded) const {
2776 ProgramStateRef State = C.getState();
2777 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2778 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2779
2780 const auto NumParams = Call.parameters().size();
2781 if (CE->getNumArgs() < NumParams) {
2782 // This is an invalid call, let's just ignore it.
2783 return;
2784 }
2785
2786 const auto AllArguments =
2787 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2788 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2789
2790 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2791 // We consider only string buffers
2792 if (const QualType type = ArgExpr->getType();
2793 !type->isAnyPointerType() ||
2794 !type->getPointeeType()->isAnyCharacterType())
2795 continue;
2796 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2797
2798 // Ensure the buffers do not overlap.
2799 SizeArgExpr SrcExprAsSizeDummy = {
2800 {Source.Expression, Source.ArgumentIndex}};
2801 State = CheckOverlap(
2802 C, State,
2803 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2804 Dest, Source);
2805 if (!State)
2806 return;
2807 }
2808
2809 C.addTransition(State);
2810}
2811
2812//===----------------------------------------------------------------------===//
2813// The driver method, and other Checker callbacks.
2814//===----------------------------------------------------------------------===//
2815
2816CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2817 CheckerContext &C) const {
2818 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2819 if (!CE)
2820 return nullptr;
2821
2822 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2823 if (!FD)
2824 return nullptr;
2825
2826 if (StdCopy.matches(Call))
2827 return &CStringChecker::evalStdCopy;
2828 if (StdCopyBackward.matches(Call))
2829 return &CStringChecker::evalStdCopyBackward;
2830
2831 // Pro-actively check that argument types are safe to do arithmetic upon.
2832 // We do not want to crash if someone accidentally passes a structure
2833 // into, say, a C++ overload of any of these functions. We could not check
2834 // that for std::copy because they may have arguments of other types.
2835 for (auto I : CE->arguments()) {
2836 QualType T = I->getType();
2837 if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2838 return nullptr;
2839 }
2840
2841 const FnCheck *Callback = Callbacks.lookup(Call);
2842 if (Callback)
2843 return *Callback;
2844
2845 return nullptr;
2846}
2847
2848bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2849 FnCheck Callback = identifyCall(Call, C);
2850
2851 // If the callee isn't a string function, let another checker handle it.
2852 if (!Callback)
2853 return false;
2854
2855 // Check and evaluate the call.
2856 assert(isa<CallExpr>(Call.getOriginExpr()));
2857 Callback(this, C, Call);
2858
2859 // If the evaluate call resulted in no change, chain to the next eval call
2860 // handler.
2861 // Note, the custom CString evaluation calls assume that basic safety
2862 // properties are held. However, if the user chooses to turn off some of these
2863 // checks, we ignore the issues and leave the call evaluation to a generic
2864 // handler.
2865 return C.isDifferent();
2866}
2867
2868void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2869 // Record string length for char a[] = "abc";
2870 ProgramStateRef state = C.getState();
2871
2872 for (const auto *I : DS->decls()) {
2873 const VarDecl *D = dyn_cast<VarDecl>(I);
2874 if (!D)
2875 continue;
2876
2877 // FIXME: Handle array fields of structs.
2878 if (!D->getType()->isArrayType())
2879 continue;
2880
2881 const Expr *Init = D->getInit();
2882 if (!Init)
2883 continue;
2885 continue;
2886
2887 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2888 const MemRegion *MR = VarLoc.getAsRegion();
2889 if (!MR)
2890 continue;
2891
2892 SVal StrVal = C.getSVal(Init);
2893 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2894 DefinedOrUnknownSVal strLength =
2895 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2896
2897 state = state->set<CStringLength>(MR, strLength);
2898 }
2899
2900 C.addTransition(state);
2901}
2902
2904CStringChecker::checkRegionChanges(ProgramStateRef state,
2905 const InvalidatedSymbols *,
2906 ArrayRef<const MemRegion *> ExplicitRegions,
2907 ArrayRef<const MemRegion *> Regions,
2908 const LocationContext *LCtx,
2909 const CallEvent *Call) const {
2910 CStringLengthTy Entries = state->get<CStringLength>();
2911 if (Entries.isEmpty())
2912 return state;
2913
2914 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2915 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2916
2917 // First build sets for the changed regions and their super-regions.
2918 for (const MemRegion *MR : Regions) {
2919 Invalidated.insert(MR);
2920
2921 SuperRegions.insert(MR);
2922 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2923 MR = SR->getSuperRegion();
2924 SuperRegions.insert(MR);
2925 }
2926 }
2927
2928 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2929
2930 // Then loop over the entries in the current state.
2931 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2932 // Is this entry for a super-region of a changed region?
2933 if (SuperRegions.count(MR)) {
2934 Entries = F.remove(Entries, MR);
2935 continue;
2936 }
2937
2938 // Is this entry for a sub-region of a changed region?
2939 const MemRegion *Super = MR;
2940 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2941 Super = SR->getSuperRegion();
2942 if (Invalidated.count(Super)) {
2943 Entries = F.remove(Entries, MR);
2944 break;
2945 }
2946 }
2947 }
2948
2949 return state->set<CStringLength>(Entries);
2950}
2951
2952void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2953 SymbolReaper &SR) const {
2954 // Mark all symbols in our string length map as valid.
2955 CStringLengthTy Entries = state->get<CStringLength>();
2956
2957 for (SVal Len : llvm::make_second_range(Entries)) {
2958 for (SymbolRef Sym : Len.symbols())
2959 SR.markInUse(Sym);
2960 }
2961}
2962
2963void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2964 CheckerContext &C) const {
2965 ProgramStateRef state = C.getState();
2966 CStringLengthTy Entries = state->get<CStringLength>();
2967 if (Entries.isEmpty())
2968 return;
2969
2970 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2971 for (auto [Reg, Len] : Entries) {
2972 if (SymbolRef Sym = Len.getAsSymbol()) {
2973 if (SR.isDead(Sym))
2974 Entries = F.remove(Entries, Reg);
2975 }
2976 }
2977
2978 state = state->set<CStringLength>(Entries);
2979 C.addTransition(state);
2980}
2981
2982void ento::registerCStringModeling(CheckerManager &Mgr) {
2983 // Other checker relies on the modeling implemented in this checker family,
2984 // so this "modeling checker" can register the 'CStringChecker' backend for
2985 // its callbacks without enabling any of its frontends.
2986 Mgr.getChecker<CStringChecker>();
2987}
2988
2989bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
2990 return true;
2991}
2992
2993#define REGISTER_CHECKER(NAME) \
2994 void ento::registerCString##NAME(CheckerManager &Mgr) { \
2995 Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr); \
2996 } \
2997 \
2998 bool ento::shouldRegisterCString##NAME(const CheckerManager &) { \
2999 return true; \
3000 }
3001
3002REGISTER_CHECKER(NullArg)
3003REGISTER_CHECKER(OutOfBounds)
3004REGISTER_CHECKER(BufferOverlap)
3005REGISTER_CHECKER(NotNullTerm)
3006REGISTER_CHECKER(UninitializedRead)
3007
3008#undef REGISTER_CHECKER
#define V(N, I)
static std::optional< NonLoc > getIndex(ProgramStateRef State, const ElementRegion *ER, CharKind CK)
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx)
#define REGISTER_CHECKER(name)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType WideCharTy
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType CharTy
CanQualType IntTy
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType UnsignedCharTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
decl_range decls()
Definition Stmt.h:1680
QualType getType() const
Definition Expr.h:144
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8557
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition TypeBase.h:8525
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
unsigned getLength() const
Definition Expr.h:1912
StringRef getString() const
Definition Expr.h:1870
bool isArrayType() const
Definition TypeBase.h:8767
bool isPointerType() const
Definition TypeBase.h:8668
CanQualType getCanonicalTypeUnqualified() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:754
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition TypeBase.h:9156
bool isAnyPointerType() const
Definition TypeBase.h:8676
QualType getType() const
Definition Decl.h:723
const Expr * getInit() const
Definition Decl.h:1383
APSIntPtr getMaxValue(const llvm::APSInt &v)
std::optional< APSIntPtr > evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:584
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
ElementRegion is used to represent both array elements and casts.
Definition MemRegion.h:1232
QualType getValueType() const override
Definition MemRegion.h:1254
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:98
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
Kind getKind() const
Definition MemRegion.h:203
@ TK_PreserveContents
Tells that a region's contents is not changed.
Definition MemRegion.h:1677
@ TK_SuppressEscape
Suppress pointer-escaping of a region.
Definition MemRegion.h:1680
void setTrait(SymbolRef Sym, InvalidationKinds IK)
bool hasSymbolicOffset() const
Definition MemRegion.h:83
const MemRegion * getRegion() const
It might return null.
Definition MemRegion.h:81
int64_t getOffset() const
Definition MemRegion.h:85
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
BasicValueFactory & getBasicValueFactory()
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, const LocationContext *LCtx, unsigned count)
ProgramStateManager & getStateManager()
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
loc::MemRegionVal makeLoc(SymbolRef sym)
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, ConstCFGElementRef elem, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition SVals.h:56
bool isUndef() const
Definition SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
bool isValid() const
Definition SVals.h:111
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
bool isUnknown() const
Definition SVals.h:105
LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral * getStringLiteral() const
Definition MemRegion.h:873
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition MemRegion.h:487
llvm::iterator_range< symbol_iterator > symbols() const
Definition SymExpr.h:107
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
__inline void unsigned int _2
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
llvm::DenseSet< SymbolRef > InvalidatedSymbols
Definition Store.h:51
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
Definition SymExpr.h:133
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
std::variant< struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl > Decl
All declarations that can appear in a module declaration.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
CFGBlock::ConstCFGElementRef ConstCFGElementRef
Definition CFG.h:1227
@ Result
The result type of a method or function.
Definition TypeBase.h:905
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition CharInfo.h:233
U cast(CodeGen::Address addr)
Definition Address.h:327
int const char * function
Definition c++config.h:31