clang 20.0.0git
SimpleSValBuilder.cpp
Go to the documentation of this file.
1// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10//
11//===----------------------------------------------------------------------===//
12
19#include <optional>
20
21using namespace clang;
22using namespace ento;
23
24namespace {
25class SimpleSValBuilder : public SValBuilder {
26
27 // Query the constraint manager whether the SVal has only one possible
28 // (integer) value. If that is the case, the value is returned. Otherwise,
29 // returns NULL.
30 // This is an implementation detail. Checkers should use `getKnownValue()`
31 // instead.
32 static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
33
34 // Helper function that returns the value stored in a nonloc::ConcreteInt or
35 // loc::ConcreteInt.
36 static const llvm::APSInt *getConcreteValue(SVal V);
37
38 // With one `simplifySValOnce` call, a compound symbols might collapse to
39 // simpler symbol tree that is still possible to further simplify. Thus, we
40 // do the simplification on a new symbol tree until we reach the simplest
41 // form, i.e. the fixpoint.
42 // Consider the following symbol `(b * b) * b * b` which has this tree:
43 // *
44 // / \
45 // * b
46 // / \
47 // / b
48 // (b * b)
49 // Now, if the `b * b == 1` new constraint is added then during the first
50 // iteration we have the following transformations:
51 // * *
52 // / \ / \
53 // * b --> b b
54 // / \
55 // / b
56 // 1
57 // We need another iteration to reach the final result `1`.
58 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
59
60 // Recursively descends into symbolic expressions and replaces symbols
61 // with their known values (in the sense of the getConstValue() method).
62 // We traverse the symbol tree and query the constraint values for the
63 // sub-trees and if a value is a constant we do the constant folding.
64 SVal simplifySValOnce(ProgramStateRef State, SVal V);
65
66public:
67 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
68 ProgramStateManager &stateMgr)
69 : SValBuilder(alloc, context, stateMgr) {}
70 ~SimpleSValBuilder() override {}
71
73 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
75 Loc lhs, Loc rhs, QualType resultTy) override;
77 Loc lhs, NonLoc rhs, QualType resultTy) override;
78
79 /// Evaluates a given SVal by recursively evaluating and
80 /// simplifying the children SVals. If the SVal has only one possible
81 /// (integer) value, that value is returned. Otherwise, returns NULL.
82 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
83
84 /// Evaluates a given SVal by recursively evaluating and simplifying the
85 /// children SVals, then returns its minimal possible (integer) value. If the
86 /// constraint manager cannot provide a meaningful answer, this returns NULL.
87 const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override;
88
89 /// Evaluates a given SVal by recursively evaluating and simplifying the
90 /// children SVals, then returns its maximal possible (integer) value. If the
91 /// constraint manager cannot provide a meaningful answer, this returns NULL.
92 const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override;
93
94 SVal simplifySVal(ProgramStateRef State, SVal V) override;
95
96 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
97 const llvm::APSInt &RHS, QualType resultTy);
98};
99} // end anonymous namespace
100
101SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
102 ASTContext &context,
103 ProgramStateManager &stateMgr) {
104 return new SimpleSValBuilder(alloc, context, stateMgr);
105}
106
107// Checks if the negation the value and flipping sign preserve
108// the semantics on the operation in the resultType
109static bool isNegationValuePreserving(const llvm::APSInt &Value,
110 APSIntType ResultType) {
111 const unsigned ValueBits = Value.getSignificantBits();
112 if (ValueBits == ResultType.getBitWidth()) {
113 // The value is the lowest negative value that is representable
114 // in signed integer with bitWith of result type. The
115 // negation is representable if resultType is unsigned.
116 return ResultType.isUnsigned();
117 }
118
119 // If resultType bitWith is higher that number of bits required
120 // to represent RHS, the sign flip produce same value.
121 return ValueBits < ResultType.getBitWidth();
122}
123
124//===----------------------------------------------------------------------===//
125// Transfer function for binary operators.
126//===----------------------------------------------------------------------===//
127
128SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
130 const llvm::APSInt &RHS,
131 QualType resultTy) {
132 bool isIdempotent = false;
133
134 // Check for a few special cases with known reductions first.
135 switch (op) {
136 default:
137 // We can't reduce this case; just treat it normally.
138 break;
139 case BO_Mul:
140 // a*0 and a*1
141 if (RHS == 0)
142 return makeIntVal(0, resultTy);
143 else if (RHS == 1)
144 isIdempotent = true;
145 break;
146 case BO_Div:
147 // a/0 and a/1
148 if (RHS == 0)
149 // This is also handled elsewhere.
150 return UndefinedVal();
151 else if (RHS == 1)
152 isIdempotent = true;
153 break;
154 case BO_Rem:
155 // a%0 and a%1
156 if (RHS == 0)
157 // This is also handled elsewhere.
158 return UndefinedVal();
159 else if (RHS == 1)
160 return makeIntVal(0, resultTy);
161 break;
162 case BO_Add:
163 case BO_Sub:
164 case BO_Shl:
165 case BO_Shr:
166 case BO_Xor:
167 // a+0, a-0, a<<0, a>>0, a^0
168 if (RHS == 0)
169 isIdempotent = true;
170 break;
171 case BO_And:
172 // a&0 and a&(~0)
173 if (RHS == 0)
174 return makeIntVal(0, resultTy);
175 else if (RHS.isAllOnes())
176 isIdempotent = true;
177 break;
178 case BO_Or:
179 // a|0 and a|(~0)
180 if (RHS == 0)
181 isIdempotent = true;
182 else if (RHS.isAllOnes()) {
183 return nonloc::ConcreteInt(BasicVals.Convert(resultTy, RHS));
184 }
185 break;
186 }
187
188 // Idempotent ops (like a*1) can still change the type of an expression.
189 // Wrap the LHS up in a NonLoc again and let evalCast do the
190 // dirty work.
191 if (isIdempotent)
192 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
193
194 // If we reach this point, the expression cannot be simplified.
195 // Make a SymbolVal for the entire expression, after converting the RHS.
196 std::optional<APSIntPtr> ConvertedRHS = BasicVals.getValue(RHS);
198 // We're looking for a type big enough to compare the symbolic value
199 // with the given constant.
200 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
201 ASTContext &Ctx = getContext();
202 QualType SymbolType = LHS->getType();
203 uint64_t ValWidth = RHS.getBitWidth();
204 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
205
206 if (ValWidth < TypeWidth) {
207 // If the value is too small, extend it.
208 ConvertedRHS = BasicVals.Convert(SymbolType, RHS);
209 } else if (ValWidth == TypeWidth) {
210 // If the value is signed but the symbol is unsigned, do the comparison
211 // in unsigned space. [C99 6.3.1.8]
212 // (For the opposite case, the value is already unsigned.)
213 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
214 ConvertedRHS = BasicVals.Convert(SymbolType, RHS);
215 }
216 } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) {
217 // Change a+(-N) into a-N, and a-(-N) into a+N
218 // Adjust addition/subtraction of negative value, to
219 // subtraction/addition of the negated value.
220 APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy);
221 if (isNegationValuePreserving(RHS, resultIntTy)) {
222 ConvertedRHS = BasicVals.getValue(-resultIntTy.convert(RHS));
223 op = (op == BO_Add) ? BO_Sub : BO_Add;
224 } else {
225 ConvertedRHS = BasicVals.Convert(resultTy, RHS);
226 }
227 } else
228 ConvertedRHS = BasicVals.Convert(resultTy, RHS);
229
230 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
231}
232
233// See if Sym is known to be a relation Rel with Bound.
235 llvm::APSInt Bound, ProgramStateRef State) {
236 SValBuilder &SVB = State->getStateManager().getSValBuilder();
238 SVal Result = SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
239 nonloc::ConcreteInt(BV.getValue(Bound)),
240 SVB.getConditionType());
241 if (auto DV = Result.getAs<DefinedSVal>()) {
242 return !State->assume(*DV, false);
243 }
244 return false;
245}
246
247// See if Sym is known to be within [min/4, max/4], where min and max
248// are the bounds of the symbol's integral type. With such symbols,
249// some manipulations can be performed without the risk of overflow.
250// assume() doesn't cause infinite recursion because we should be dealing
251// with simpler symbols on every recursive call.
253 ProgramStateRef State) {
254 SValBuilder &SVB = State->getStateManager().getSValBuilder();
256
257 QualType T = Sym->getType();
259 "This only works with signed integers!");
260 APSIntType AT = BV.getAPSIntType(T);
261
262 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
263 return isInRelation(BO_LE, Sym, Max, State) &&
264 isInRelation(BO_GE, Sym, Min, State);
265}
266
267// Same for the concrete integers: see if I is within [min/4, max/4].
268static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
269 APSIntType AT(I);
270 assert(!AT.isUnsigned() &&
271 "This only works with signed integers!");
272
273 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
274 return (I <= Max) && (I >= -Max);
275}
276
277static std::pair<SymbolRef, APSIntPtr> decomposeSymbol(SymbolRef Sym,
278 BasicValueFactory &BV) {
279 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
280 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
281 return std::make_pair(SymInt->getLHS(),
282 (SymInt->getOpcode() == BO_Add)
283 ? BV.getValue(SymInt->getRHS())
284 : BV.getValue(-SymInt->getRHS()));
285
286 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
287 return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
288}
289
290// Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
291// same signed integral type and no overflows occur (which should be checked
292// by the caller).
295 SymbolRef LSym, llvm::APSInt LInt,
296 SymbolRef RSym, llvm::APSInt RInt) {
297 SValBuilder &SVB = State->getStateManager().getSValBuilder();
299 SymbolManager &SymMgr = SVB.getSymbolManager();
300
301 QualType SymTy = LSym->getType();
302 assert(SymTy == RSym->getType() &&
303 "Symbols are not of the same type!");
304 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
305 "Integers are not of the same type as symbols!");
306 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
307 "Integers are not of the same type as symbols!");
308
309 QualType ResultTy;
311 ResultTy = SVB.getConditionType();
312 else if (BinaryOperator::isAdditiveOp(Op))
313 ResultTy = SymTy;
314 else
315 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
316
317 if (LSym == RSym)
318 return SVB
319 .evalBinOpNN(State, Op, nonloc::ConcreteInt(BV.getValue(LInt)),
320 nonloc::ConcreteInt(BV.getValue(RInt)), ResultTy)
321 .castAs<NonLoc>();
322
323 SymbolRef ResultSym = nullptr;
324 BinaryOperator::Opcode ResultOp;
325 llvm::APSInt ResultInt;
327 // Prefer comparing to a non-negative number.
328 // FIXME: Maybe it'd be better to have consistency in
329 // "$x - $y" vs. "$y - $x" because those are solver's keys.
330 if (LInt > RInt) {
331 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
333 ResultInt = LInt - RInt; // Opposite order!
334 } else {
335 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
336 ResultOp = Op;
337 ResultInt = RInt - LInt; // Opposite order!
338 }
339 } else {
340 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
341 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
342 ResultOp = BO_Add;
343 // Bring back the cosmetic difference.
344 if (ResultInt < 0) {
345 ResultInt = -ResultInt;
346 ResultOp = BO_Sub;
347 } else if (ResultInt == 0) {
348 // Shortcut: Simplify "$x + 0" to "$x".
349 return nonloc::SymbolVal(ResultSym);
350 }
351 }
352 APSIntPtr PersistentResultInt = BV.getValue(ResultInt);
353 return nonloc::SymbolVal(
354 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
355}
356
357// Rearrange if symbol type matches the result type and if the operator is a
358// comparison operator, both symbol and constant must be within constant
359// overflow bounds.
361 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
362 return Sym->getType() == Ty &&
364 (isWithinConstantOverflowBounds(Sym, State) &&
366}
367
368static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
370 NonLoc Rhs, QualType ResultTy) {
371 ProgramStateManager &StateMgr = State->getStateManager();
372 SValBuilder &SVB = StateMgr.getSValBuilder();
373
374 // We expect everything to be of the same type - this type.
375 QualType SingleTy;
376
377 // FIXME: After putting complexity threshold to the symbols we can always
378 // rearrange additive operations but rearrange comparisons only if
379 // option is set.
380 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
381 return std::nullopt;
382
383 SymbolRef LSym = Lhs.getAsSymbol();
384 if (!LSym)
385 return std::nullopt;
386
388 SingleTy = LSym->getType();
389 if (ResultTy != SVB.getConditionType())
390 return std::nullopt;
391 // Initialize SingleTy later with a symbol's type.
392 } else if (BinaryOperator::isAdditiveOp(Op)) {
393 SingleTy = ResultTy;
394 if (LSym->getType() != SingleTy)
395 return std::nullopt;
396 } else {
397 // Don't rearrange other operations.
398 return std::nullopt;
399 }
400
401 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
402
403 // Rearrange signed symbolic expressions only
404 if (!SingleTy->isSignedIntegerOrEnumerationType())
405 return std::nullopt;
406
407 SymbolRef RSym = Rhs.getAsSymbol();
408 if (!RSym || RSym->getType() != SingleTy)
409 return std::nullopt;
410
411 BasicValueFactory &BV = State->getBasicVals();
412 llvm::APSInt LInt, RInt;
413 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
414 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
415 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
416 !shouldRearrange(State, Op, RSym, RInt, SingleTy))
417 return std::nullopt;
418
419 // We know that no overflows can occur anymore.
420 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
421}
422
423SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
425 NonLoc lhs, NonLoc rhs,
426 QualType resultTy) {
427 NonLoc InputLHS = lhs;
428 NonLoc InputRHS = rhs;
429
430 // Constraints may have changed since the creation of a bound SVal. Check if
431 // the values can be simplified based on those new constraints.
432 SVal simplifiedLhs = simplifySVal(state, lhs);
433 SVal simplifiedRhs = simplifySVal(state, rhs);
434 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
435 lhs = *simplifiedLhsAsNonLoc;
436 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
437 rhs = *simplifiedRhsAsNonLoc;
438
439 // Handle trivial case where left-side and right-side are the same.
440 if (lhs == rhs)
441 switch (op) {
442 default:
443 break;
444 case BO_EQ:
445 case BO_LE:
446 case BO_GE:
447 return makeTruthVal(true, resultTy);
448 case BO_LT:
449 case BO_GT:
450 case BO_NE:
451 return makeTruthVal(false, resultTy);
452 case BO_Xor:
453 case BO_Sub:
454 if (resultTy->isIntegralOrEnumerationType())
455 return makeIntVal(0, resultTy);
456 return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
457 QualType{});
458 case BO_Or:
459 case BO_And:
460 return evalCast(lhs, resultTy, QualType{});
461 }
462
463 while (true) {
464 switch (lhs.getKind()) {
465 default:
466 return makeSymExprValNN(op, lhs, rhs, resultTy);
467 case nonloc::PointerToMemberKind: {
468 assert(rhs.getKind() == nonloc::PointerToMemberKind &&
469 "Both SVals should have pointer-to-member-type");
470 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
471 RPTM = rhs.castAs<nonloc::PointerToMember>();
472 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
473 switch (op) {
474 case BO_EQ:
475 return makeTruthVal(LPTMD == RPTMD, resultTy);
476 case BO_NE:
477 return makeTruthVal(LPTMD != RPTMD, resultTy);
478 default:
479 return UnknownVal();
480 }
481 }
482 case nonloc::LocAsIntegerKind: {
483 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
484 switch (rhs.getKind()) {
485 case nonloc::LocAsIntegerKind:
486 // FIXME: at the moment the implementation
487 // of modeling "pointers as integers" is not complete.
489 return UnknownVal();
490 return evalBinOpLL(state, op, lhsL,
492 resultTy);
493 case nonloc::ConcreteIntKind: {
494 // FIXME: at the moment the implementation
495 // of modeling "pointers as integers" is not complete.
497 return UnknownVal();
498 // Transform the integer into a location and compare.
499 // FIXME: This only makes sense for comparisons. If we want to, say,
500 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
501 // then pack it back into a LocAsInteger.
502 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
503 // If the region has a symbolic base, pay attention to the type; it
504 // might be coming from a non-default address space. For non-symbolic
505 // regions it doesn't matter that much because such comparisons would
506 // most likely evaluate to concrete false anyway. FIXME: We might
507 // still need to handle the non-comparison case.
508 if (SymbolRef lSym = lhs.getAsLocSymbol(true))
509 BasicVals.getAPSIntType(lSym->getType()).apply(i);
510 else
511 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
512 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
513 }
514 default:
515 switch (op) {
516 case BO_EQ:
517 return makeTruthVal(false, resultTy);
518 case BO_NE:
519 return makeTruthVal(true, resultTy);
520 default:
521 // This case also handles pointer arithmetic.
522 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
523 }
524 }
525 }
526 case nonloc::ConcreteIntKind: {
527 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
528
529 // If we're dealing with two known constants, just perform the operation.
530 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) {
531 llvm::APSInt RHSValue = *KnownRHSValue;
533 // We're looking for a type big enough to compare the two values.
534 // FIXME: This is not correct. char + short will result in a promotion
535 // to int. Unfortunately we have lost types by this point.
536 APSIntType CompareType = std::max(APSIntType(LHSValue),
537 APSIntType(RHSValue));
538 CompareType.apply(LHSValue);
539 CompareType.apply(RHSValue);
540 } else if (!BinaryOperator::isShiftOp(op)) {
541 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
542 IntType.apply(LHSValue);
543 IntType.apply(RHSValue);
544 }
545
546 std::optional<APSIntPtr> Result =
547 BasicVals.evalAPSInt(op, LHSValue, RHSValue);
548 if (!Result) {
549 if (op == BO_Shl || op == BO_Shr) {
550 // FIXME: At this point the constant folding claims that the result
551 // of a bitwise shift is undefined. However, constant folding
552 // relies on the inaccurate type information that is stored in the
553 // bit size of APSInt objects, and if we reached this point, then
554 // the checker core.BitwiseShift already determined that the shift
555 // is valid (in a PreStmt callback, by querying the real type from
556 // the AST node).
557 // To avoid embarrassing false positives, let's just say that we
558 // don't know anything about the result of the shift.
559 return UnknownVal();
560 }
561 return UndefinedVal();
562 }
563
564 return nonloc::ConcreteInt(*Result);
565 }
566
567 // Swap the left and right sides and flip the operator if doing so
568 // allows us to better reason about the expression (this is a form
569 // of expression canonicalization).
570 // While we're at it, catch some special cases for non-commutative ops.
571 switch (op) {
572 case BO_LT:
573 case BO_GT:
574 case BO_LE:
575 case BO_GE:
577 [[fallthrough]];
578 case BO_EQ:
579 case BO_NE:
580 case BO_Add:
581 case BO_Mul:
582 case BO_And:
583 case BO_Xor:
584 case BO_Or:
585 std::swap(lhs, rhs);
586 continue;
587 case BO_Shr:
588 // (~0)>>a
589 if (LHSValue.isAllOnes() && LHSValue.isSigned())
590 return evalCast(lhs, resultTy, QualType{});
591 [[fallthrough]];
592 case BO_Shl:
593 // 0<<a and 0>>a
594 if (LHSValue == 0)
595 return evalCast(lhs, resultTy, QualType{});
596 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
597 case BO_Div:
598 // 0 / x == 0
599 case BO_Rem:
600 // 0 % x == 0
601 if (LHSValue == 0)
602 return makeZeroVal(resultTy);
603 [[fallthrough]];
604 default:
605 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
606 }
607 }
608 case nonloc::SymbolValKind: {
609 // We only handle LHS as simple symbols or SymIntExprs.
610 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
611
612 // LHS is a symbolic expression.
613 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
614
615 // Is this a logical not? (!x is represented as x == 0.)
616 if (op == BO_EQ && rhs.isZeroConstant()) {
617 // We know how to negate certain expressions. Simplify them here.
618
619 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
620 switch (opc) {
621 default:
622 // We don't know how to negate this operation.
623 // Just handle it as if it were a normal comparison to 0.
624 break;
625 case BO_LAnd:
626 case BO_LOr:
627 llvm_unreachable("Logical operators handled by branching logic.");
628 case BO_Assign:
629 case BO_MulAssign:
630 case BO_DivAssign:
631 case BO_RemAssign:
632 case BO_AddAssign:
633 case BO_SubAssign:
634 case BO_ShlAssign:
635 case BO_ShrAssign:
636 case BO_AndAssign:
637 case BO_XorAssign:
638 case BO_OrAssign:
639 case BO_Comma:
640 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
641 case BO_PtrMemD:
642 case BO_PtrMemI:
643 llvm_unreachable("Pointer arithmetic not handled here.");
644 case BO_LT:
645 case BO_GT:
646 case BO_LE:
647 case BO_GE:
648 case BO_EQ:
649 case BO_NE:
650 assert(resultTy->isBooleanType() ||
651 resultTy == getConditionType());
652 assert(symIntExpr->getType()->isBooleanType() ||
653 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
654 getConditionType()));
655 // Negate the comparison and make a value.
657 return makeNonLoc(symIntExpr->getLHS(), opc,
658 symIntExpr->getRHS(), resultTy);
659 }
660 }
661
662 // For now, only handle expressions whose RHS is a constant.
663 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) {
664 // If both the LHS and the current expression are additive,
665 // fold their constants and try again.
667 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
669 // Convert the two constants to a common type, then combine them.
670
671 // resultTy may not be the best type to convert to, but it's
672 // probably the best choice in expressions with mixed type
673 // (such as x+1U+2LL). The rules for implicit conversions should
674 // choose a reasonable type to preserve the expression, and will
675 // at least match how the value is going to be used.
676 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
677 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
678 const llvm::APSInt &second = IntType.convert(*RHSValue);
679
680 // If the op and lop agrees, then we just need to
681 // sum the constants. Otherwise, we change to operation
682 // type if substraction would produce negative value
683 // (and cause overflow for unsigned integers),
684 // as consequence x+1U-10 produces x-9U, instead
685 // of x+4294967287U, that would be produced without this
686 // additional check.
687 std::optional<APSIntPtr> newRHS;
688 if (lop == op) {
689 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
690 } else if (first >= second) {
691 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
692 op = lop;
693 } else {
694 newRHS = BasicVals.evalAPSInt(BO_Sub, second, first);
695 }
696
697 assert(newRHS && "Invalid operation despite common type!");
698 rhs = nonloc::ConcreteInt(*newRHS);
699 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
700 continue;
701 }
702 }
703
704 // Otherwise, make a SymIntExpr out of the expression.
705 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
706 }
707 }
708
709 // Is the RHS a constant?
710 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs))
711 return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
712
713 if (std::optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
714 return *V;
715
716 // Give up -- this is not a symbolic expression we can handle.
717 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
718 }
719 }
720 }
721}
722
724 const FieldRegion *RightFR,
726 QualType resultTy,
727 SimpleSValBuilder &SVB) {
728 // Only comparisons are meaningful here!
730 return UnknownVal();
731
732 // Next, see if the two FRs have the same super-region.
733 // FIXME: This doesn't handle casts yet, and simply stripping the casts
734 // doesn't help.
735 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
736 return UnknownVal();
737
738 const FieldDecl *LeftFD = LeftFR->getDecl();
739 const FieldDecl *RightFD = RightFR->getDecl();
740 const RecordDecl *RD = LeftFD->getParent();
741
742 // Make sure the two FRs are from the same kind of record. Just in case!
743 // FIXME: This is probably where inheritance would be a problem.
744 if (RD != RightFD->getParent())
745 return UnknownVal();
746
747 // We know for sure that the two fields are not the same, since that
748 // would have given us the same SVal.
749 if (op == BO_EQ)
750 return SVB.makeTruthVal(false, resultTy);
751 if (op == BO_NE)
752 return SVB.makeTruthVal(true, resultTy);
753
754 // Iterate through the fields and see which one comes first.
755 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
756 // members and the units in which bit-fields reside have addresses that
757 // increase in the order in which they are declared."
758 bool leftFirst = (op == BO_LT || op == BO_LE);
759 for (const auto *I : RD->fields()) {
760 if (I == LeftFD)
761 return SVB.makeTruthVal(leftFirst, resultTy);
762 if (I == RightFD)
763 return SVB.makeTruthVal(!leftFirst, resultTy);
764 }
765
766 llvm_unreachable("Fields not found in parent record's definition");
767}
768
769// This is used in debug builds only for now because some downstream users
770// may hit this assert in their subsequent merges.
771// There are still places in the analyzer where equal bitwidth Locs
772// are compared, and need to be found and corrected. Recent previous fixes have
773// addressed the known problems of making NULLs with specific bitwidths
774// for Loc comparisons along with deprecation of APIs for the same purpose.
775//
776static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
777 Loc LhsLoc) {
778 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
779 ASTContext &Ctx = State->getStateManager().getContext();
780 uint64_t RhsBitwidth =
781 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
782 uint64_t LhsBitwidth =
783 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
784 if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) {
785 assert(RhsBitwidth == LhsBitwidth &&
786 "RhsLoc and LhsLoc bitwidth must be same!");
787 }
788}
789
790// FIXME: all this logic will change if/when we have MemRegion::getLocation().
791SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
793 Loc lhs, Loc rhs,
794 QualType resultTy) {
795
796 // Assert that bitwidth of lhs and rhs are the same.
797 // This can happen if two different address spaces are used,
798 // and the bitwidths of the address spaces are different.
799 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
800 // FIXME: See comment above in the function assertEqualBitWidths
801 assertEqualBitWidths(state, rhs, lhs);
802
803 // Only comparisons and subtractions are valid operations on two pointers.
804 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
805 // However, if a pointer is casted to an integer, evalBinOpNN may end up
806 // calling this function with another operation (PR7527). We don't attempt to
807 // model this for now, but it could be useful, particularly when the
808 // "location" is actually an integer value that's been passed through a void*.
809 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
810 return UnknownVal();
811
812 // Special cases for when both sides are identical.
813 if (lhs == rhs) {
814 switch (op) {
815 default:
816 llvm_unreachable("Unimplemented operation for two identical values");
817 case BO_Sub:
818 return makeZeroVal(resultTy);
819 case BO_EQ:
820 case BO_LE:
821 case BO_GE:
822 return makeTruthVal(true, resultTy);
823 case BO_NE:
824 case BO_LT:
825 case BO_GT:
826 return makeTruthVal(false, resultTy);
827 }
828 }
829
830 switch (lhs.getKind()) {
831 default:
832 llvm_unreachable("Ordering not implemented for this Loc.");
833
834 case loc::GotoLabelKind:
835 // The only thing we know about labels is that they're non-null.
836 if (rhs.isZeroConstant()) {
837 switch (op) {
838 default:
839 break;
840 case BO_Sub:
841 return evalCast(lhs, resultTy, QualType{});
842 case BO_EQ:
843 case BO_LE:
844 case BO_LT:
845 return makeTruthVal(false, resultTy);
846 case BO_NE:
847 case BO_GT:
848 case BO_GE:
849 return makeTruthVal(true, resultTy);
850 }
851 }
852 // There may be two labels for the same location, and a function region may
853 // have the same address as a label at the start of the function (depending
854 // on the ABI).
855 // FIXME: we can probably do a comparison against other MemRegions, though.
856 // FIXME: is there a way to tell if two labels refer to the same location?
857 return UnknownVal();
858
859 case loc::ConcreteIntKind: {
860 auto L = lhs.castAs<loc::ConcreteInt>();
861
862 // If one of the operands is a symbol and the other is a constant,
863 // build an expression for use by the constraint manager.
864 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
865 if (op == BO_Cmp)
866 return UnknownVal();
867
869 return makeNonLoc(L.getValue(), op, rSym, resultTy);
870
872 return makeNonLoc(rSym, op, L.getValue(), resultTy);
873 }
874
875 // If both operands are constants, just perform the operation.
876 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
877 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
878
879 if (std::optional<APSIntPtr> ResultInt =
880 BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue()))
881 return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
882 return UnknownVal();
883 }
884
885 // Special case comparisons against NULL.
886 // This must come after the test if the RHS is a symbol, which is used to
887 // build constraints. The address of any non-symbolic region is guaranteed
888 // to be non-NULL, as is any label.
889 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
890 if (lhs.isZeroConstant()) {
891 switch (op) {
892 default:
893 break;
894 case BO_EQ:
895 case BO_GT:
896 case BO_GE:
897 return makeTruthVal(false, resultTy);
898 case BO_NE:
899 case BO_LT:
900 case BO_LE:
901 return makeTruthVal(true, resultTy);
902 }
903 }
904
905 // Comparing an arbitrary integer to a region or label address is
906 // completely unknowable.
907 return UnknownVal();
908 }
909 case loc::MemRegionValKind: {
910 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
911 // If one of the operands is a symbol and the other is a constant,
912 // build an expression for use by the constraint manager.
913 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
915 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
916 return UnknownVal();
917 }
918 // Special case comparisons to NULL.
919 // This must come after the test if the LHS is a symbol, which is used to
920 // build constraints. The address of any non-symbolic region is guaranteed
921 // to be non-NULL.
922 if (rInt->isZeroConstant()) {
923 if (op == BO_Sub)
924 return evalCast(lhs, resultTy, QualType{});
925
927 QualType boolType = getContext().BoolTy;
928 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
929 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
930 return evalBinOpNN(state, op, l, r, resultTy);
931 }
932 }
933
934 // Comparing a region to an arbitrary integer is completely unknowable.
935 return UnknownVal();
936 }
937
938 // Get both values as regions, if possible.
939 const MemRegion *LeftMR = lhs.getAsRegion();
940 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
941
942 const MemRegion *RightMR = rhs.getAsRegion();
943 if (!RightMR)
944 // The RHS is probably a label, which in theory could address a region.
945 // FIXME: we can probably make a more useful statement about non-code
946 // regions, though.
947 return UnknownVal();
948
949 const MemRegion *LeftBase = LeftMR->getBaseRegion();
950 const MemRegion *RightBase = RightMR->getBaseRegion();
951 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
952 const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
953 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
954
955 // If the two regions are from different known memory spaces they cannot be
956 // equal. Also, assume that no symbolic region (whose memory space is
957 // unknown) is on the stack.
958 if (LeftMS != RightMS &&
959 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
960 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
961 switch (op) {
962 default:
963 return UnknownVal();
964 case BO_EQ:
965 return makeTruthVal(false, resultTy);
966 case BO_NE:
967 return makeTruthVal(true, resultTy);
968 }
969 }
970
971 // If both values wrap regions, see if they're from different base regions.
972 // Note, heap base symbolic regions are assumed to not alias with
973 // each other; for example, we assume that malloc returns different address
974 // on each invocation.
975 // FIXME: ObjC object pointers always reside on the heap, but currently
976 // we treat their memory space as unknown, because symbolic pointers
977 // to ObjC objects may alias. There should be a way to construct
978 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
979 // guesses memory space for ObjC object pointers manually instead of
980 // relying on us.
981 if (LeftBase != RightBase &&
982 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
983 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
984 switch (op) {
985 default:
986 return UnknownVal();
987 case BO_EQ:
988 return makeTruthVal(false, resultTy);
989 case BO_NE:
990 return makeTruthVal(true, resultTy);
991 }
992 }
993
994 // Handle special cases for when both regions are element regions.
995 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
996 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
997 if (RightER && LeftER) {
998 // Next, see if the two ERs have the same super-region and matching types.
999 // FIXME: This should do something useful even if the types don't match,
1000 // though if both indexes are constant the RegionRawOffset path will
1001 // give the correct answer.
1002 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1003 LeftER->getElementType() == RightER->getElementType()) {
1004 // Get the left index and cast it to the correct type.
1005 // If the index is unknown or undefined, bail out here.
1006 SVal LeftIndexVal = LeftER->getIndex();
1007 std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1008 if (!LeftIndex)
1009 return UnknownVal();
1010 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
1011 LeftIndex = LeftIndexVal.getAs<NonLoc>();
1012 if (!LeftIndex)
1013 return UnknownVal();
1014
1015 // Do the same for the right index.
1016 SVal RightIndexVal = RightER->getIndex();
1017 std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1018 if (!RightIndex)
1019 return UnknownVal();
1020 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
1021 RightIndex = RightIndexVal.getAs<NonLoc>();
1022 if (!RightIndex)
1023 return UnknownVal();
1024
1025 // Actually perform the operation.
1026 // evalBinOpNN expects the two indexes to already be the right type.
1027 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
1028 }
1029 }
1030
1031 // Special handling of the FieldRegions, even with symbolic offsets.
1032 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1033 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1034 if (RightFR && LeftFR) {
1035 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1036 *this);
1037 if (!R.isUnknown())
1038 return R;
1039 }
1040
1041 // Compare the regions using the raw offsets.
1042 RegionOffset LeftOffset = LeftMR->getAsOffset();
1043 RegionOffset RightOffset = RightMR->getAsOffset();
1044
1045 if (LeftOffset.getRegion() != nullptr &&
1046 LeftOffset.getRegion() == RightOffset.getRegion() &&
1047 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1048 int64_t left = LeftOffset.getOffset();
1049 int64_t right = RightOffset.getOffset();
1050
1051 switch (op) {
1052 default:
1053 return UnknownVal();
1054 case BO_LT:
1055 return makeTruthVal(left < right, resultTy);
1056 case BO_GT:
1057 return makeTruthVal(left > right, resultTy);
1058 case BO_LE:
1059 return makeTruthVal(left <= right, resultTy);
1060 case BO_GE:
1061 return makeTruthVal(left >= right, resultTy);
1062 case BO_EQ:
1063 return makeTruthVal(left == right, resultTy);
1064 case BO_NE:
1065 return makeTruthVal(left != right, resultTy);
1066 }
1067 }
1068
1069 // At this point we're not going to get a good answer, but we can try
1070 // conjuring an expression instead.
1071 SymbolRef LHSSym = lhs.getAsLocSymbol();
1072 SymbolRef RHSSym = rhs.getAsLocSymbol();
1073 if (LHSSym && RHSSym)
1074 return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1075
1076 // If we get here, we have no way of comparing the regions.
1077 return UnknownVal();
1078 }
1079 }
1080}
1081
1082SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1084 NonLoc rhs, QualType resultTy) {
1085 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1086 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1087 if (PTMSV->isNullMemberPointer())
1088 return UndefinedVal();
1089
1090 auto getFieldLValue = [&](const auto *FD) -> SVal {
1091 SVal Result = lhs;
1092
1093 for (const auto &I : *PTMSV)
1094 Result = StateMgr.getStoreManager().evalDerivedToBase(
1095 Result, I->getType(), I->isVirtual());
1096
1097 return state->getLValue(FD, Result);
1098 };
1099
1100 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1101 return getFieldLValue(FD);
1102 }
1103 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1104 return getFieldLValue(FD);
1105 }
1106 }
1107
1108 return rhs;
1109 }
1110
1111 assert(!BinaryOperator::isComparisonOp(op) &&
1112 "arguments to comparison ops must be of the same type");
1113
1114 // Special case: rhs is a zero constant.
1115 if (rhs.isZeroConstant())
1116 return lhs;
1117
1118 // Perserve the null pointer so that it can be found by the DerefChecker.
1119 if (lhs.isZeroConstant())
1120 return lhs;
1121
1122 // We are dealing with pointer arithmetic.
1123
1124 // Handle pointer arithmetic on constant values.
1125 if (std::optional<nonloc::ConcreteInt> rhsInt =
1126 rhs.getAs<nonloc::ConcreteInt>()) {
1127 if (std::optional<loc::ConcreteInt> lhsInt =
1128 lhs.getAs<loc::ConcreteInt>()) {
1129 const llvm::APSInt &leftI = lhsInt->getValue();
1130 assert(leftI.isUnsigned());
1131 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1132
1133 // Convert the bitwidth of rightI. This should deal with overflow
1134 // since we are dealing with concrete values.
1135 rightI = rightI.extOrTrunc(leftI.getBitWidth());
1136
1137 // Offset the increment by the pointer size.
1138 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1139 QualType pointeeType = resultTy->getPointeeType();
1140 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1141 rightI *= Multiplicand;
1142
1143 // Compute the adjusted pointer.
1144 switch (op) {
1145 case BO_Add:
1146 rightI = leftI + rightI;
1147 break;
1148 case BO_Sub:
1149 rightI = leftI - rightI;
1150 break;
1151 default:
1152 llvm_unreachable("Invalid pointer arithmetic operation");
1153 }
1154 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1155 }
1156 }
1157
1158 // Handle cases where 'lhs' is a region.
1159 if (const MemRegion *region = lhs.getAsRegion()) {
1160 rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1161 SVal index = UnknownVal();
1162 const SubRegion *superR = nullptr;
1163 // We need to know the type of the pointer in order to add an integer to it.
1164 // Depending on the type, different amount of bytes is added.
1165 QualType elementType;
1166
1167 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1168 assert(op == BO_Add || op == BO_Sub);
1169 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1170 getArrayIndexType());
1171 superR = cast<SubRegion>(elemReg->getSuperRegion());
1172 elementType = elemReg->getElementType();
1173 }
1174 else if (isa<SubRegion>(region)) {
1175 assert(op == BO_Add || op == BO_Sub);
1176 index = (op == BO_Add) ? rhs : evalMinus(rhs);
1177 superR = cast<SubRegion>(region);
1178 // TODO: Is this actually reliable? Maybe improving our MemRegion
1179 // hierarchy to provide typed regions for all non-void pointers would be
1180 // better. For instance, we cannot extend this towards LocAsInteger
1181 // operations, where result type of the expression is integer.
1182 if (resultTy->isAnyPointerType())
1183 elementType = resultTy->getPointeeType();
1184 }
1185
1186 // Represent arithmetic on void pointers as arithmetic on char pointers.
1187 // It is fine when a TypedValueRegion of char value type represents
1188 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1189 if (elementType->isVoidType())
1190 elementType = getContext().CharTy;
1191
1192 if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1193 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1194 superR, getContext()));
1195 }
1196 }
1197 return UnknownVal();
1198}
1199
1200const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1201 SVal V) {
1202 if (const llvm::APSInt *Res = getConcreteValue(V))
1203 return Res;
1204
1205 if (SymbolRef Sym = V.getAsSymbol())
1206 return state->getConstraintManager().getSymVal(state, Sym);
1207
1208 return nullptr;
1209}
1210
1211const llvm::APSInt *SimpleSValBuilder::getConcreteValue(SVal V) {
1212 if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1213 return X->getValue().get();
1214
1215 if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1216 return X->getValue().get();
1217
1218 return nullptr;
1219}
1220
1221const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1222 SVal V) {
1223 return getConstValue(state, simplifySVal(state, V));
1224}
1225
1226const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state,
1227 SVal V) {
1228 V = simplifySVal(state, V);
1229
1230 if (const llvm::APSInt *Res = getConcreteValue(V))
1231 return Res;
1232
1233 if (SymbolRef Sym = V.getAsSymbol())
1234 return state->getConstraintManager().getSymMinVal(state, Sym);
1235
1236 return nullptr;
1237}
1238
1239const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state,
1240 SVal V) {
1241 V = simplifySVal(state, V);
1242
1243 if (const llvm::APSInt *Res = getConcreteValue(V))
1244 return Res;
1245
1246 if (SymbolRef Sym = V.getAsSymbol())
1247 return state->getConstraintManager().getSymMaxVal(state, Sym);
1248
1249 return nullptr;
1250}
1251
1252SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1253 SVal SimplifiedVal = simplifySValOnce(State, Val);
1254 while (SimplifiedVal != Val) {
1255 Val = SimplifiedVal;
1256 SimplifiedVal = simplifySValOnce(State, Val);
1257 }
1258 return SimplifiedVal;
1259}
1260
1261SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1262 return simplifyUntilFixpoint(State, V);
1263}
1264
1265SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1266 // For now, this function tries to constant-fold symbols inside a
1267 // nonloc::SymbolVal, and does nothing else. More simplifications should
1268 // be possible, such as constant-folding an index in an ElementRegion.
1269
1270 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1271 ProgramStateRef State;
1272 SValBuilder &SVB;
1273
1274 // Cache results for the lifetime of the Simplifier. Results change every
1275 // time new constraints are added to the program state, which is the whole
1276 // point of simplifying, and for that very reason it's pointless to maintain
1277 // the same cache for the duration of the whole analysis.
1278 llvm::DenseMap<SymbolRef, SVal> Cached;
1279
1280 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1281 return Sym == Val.getAsSymbol();
1282 }
1283
1284 SVal cache(SymbolRef Sym, SVal V) {
1285 Cached[Sym] = V;
1286 return V;
1287 }
1288
1289 SVal skip(SymbolRef Sym) {
1290 return cache(Sym, SVB.makeSymbolVal(Sym));
1291 }
1292
1293 // Return the known const value for the Sym if available, or return Undef
1294 // otherwise.
1295 SVal getConst(SymbolRef Sym) {
1296 const llvm::APSInt *Const =
1297 State->getConstraintManager().getSymVal(State, Sym);
1298 if (Const)
1299 return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1300 : (SVal)SVB.makeIntVal(*Const);
1301 return UndefinedVal();
1302 }
1303
1304 SVal getConstOrVisit(SymbolRef Sym) {
1305 const SVal Ret = getConst(Sym);
1306 if (Ret.isUndef())
1307 return Visit(Sym);
1308 return Ret;
1309 }
1310
1311 public:
1312 Simplifier(ProgramStateRef State)
1313 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1314
1315 SVal VisitSymbolData(const SymbolData *S) {
1316 // No cache here.
1317 if (const llvm::APSInt *I =
1318 State->getConstraintManager().getSymVal(State, S))
1319 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1320 : (SVal)SVB.makeIntVal(*I);
1321 return SVB.makeSymbolVal(S);
1322 }
1323
1324 SVal VisitSymIntExpr(const SymIntExpr *S) {
1325 auto I = Cached.find(S);
1326 if (I != Cached.end())
1327 return I->second;
1328
1329 SVal LHS = getConstOrVisit(S->getLHS());
1330 if (isUnchanged(S->getLHS(), LHS))
1331 return skip(S);
1332
1333 SVal RHS;
1334 // By looking at the APSInt in the right-hand side of S, we cannot
1335 // figure out if it should be treated as a Loc or as a NonLoc.
1336 // So make our guess by recalling that we cannot multiply pointers
1337 // or compare a pointer to an integer.
1338 if (Loc::isLocType(S->getLHS()->getType()) &&
1339 BinaryOperator::isComparisonOp(S->getOpcode())) {
1340 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1341 // the same meaning for Loc-type symbols, but the latter form
1342 // is preferred in SVal computations for being Loc itself.
1343 if (SymbolRef Sym = LHS.getAsSymbol()) {
1344 assert(Loc::isLocType(Sym->getType()));
1345 LHS = SVB.makeLoc(Sym);
1346 }
1347 RHS = SVB.makeIntLocVal(S->getRHS());
1348 } else {
1349 RHS = SVB.makeIntVal(S->getRHS());
1350 }
1351
1352 return cache(
1353 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1354 }
1355
1356 SVal VisitIntSymExpr(const IntSymExpr *S) {
1357 auto I = Cached.find(S);
1358 if (I != Cached.end())
1359 return I->second;
1360
1361 SVal RHS = getConstOrVisit(S->getRHS());
1362 if (isUnchanged(S->getRHS(), RHS))
1363 return skip(S);
1364
1365 SVal LHS = SVB.makeIntVal(S->getLHS());
1366 return cache(
1367 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1368 }
1369
1370 SVal VisitSymSymExpr(const SymSymExpr *S) {
1371 auto I = Cached.find(S);
1372 if (I != Cached.end())
1373 return I->second;
1374
1375 // For now don't try to simplify mixed Loc/NonLoc expressions
1376 // because they often appear from LocAsInteger operations
1377 // and we don't know how to combine a LocAsInteger
1378 // with a concrete value.
1379 if (Loc::isLocType(S->getLHS()->getType()) !=
1380 Loc::isLocType(S->getRHS()->getType()))
1381 return skip(S);
1382
1383 SVal LHS = getConstOrVisit(S->getLHS());
1384 SVal RHS = getConstOrVisit(S->getRHS());
1385
1386 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1387 return skip(S);
1388
1389 return cache(
1390 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1391 }
1392
1393 SVal VisitSymbolCast(const SymbolCast *S) {
1394 auto I = Cached.find(S);
1395 if (I != Cached.end())
1396 return I->second;
1397 const SymExpr *OpSym = S->getOperand();
1398 SVal OpVal = getConstOrVisit(OpSym);
1399 if (isUnchanged(OpSym, OpVal))
1400 return skip(S);
1401
1402 return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType()));
1403 }
1404
1405 SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1406 auto I = Cached.find(S);
1407 if (I != Cached.end())
1408 return I->second;
1409 SVal Op = getConstOrVisit(S->getOperand());
1410 if (isUnchanged(S->getOperand(), Op))
1411 return skip(S);
1412
1413 return cache(
1414 S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType()));
1415 }
1416
1418
1419 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1420
1421 SVal VisitSymbolVal(nonloc::SymbolVal V) {
1422 // Simplification is much more costly than computing complexity.
1423 // For high complexity, it may be not worth it.
1424 return Visit(V.getSymbol());
1425 }
1426
1427 SVal VisitSVal(SVal V) { return V; }
1428 };
1429
1430 SVal SimplifiedV = Simplifier(State).Visit(V);
1431 return SimplifiedV;
1432}
#define V(N, I)
Definition: ASTContext.h:3443
static std::optional< int64_t > getConcreteValue(NonLoc SV)
#define X(type, name)
Definition: Value.h:144
static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, llvm::APSInt Bound, ProgramStateRef State)
static NonLoc doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef LSym, llvm::APSInt LInt, SymbolRef RSym, llvm::APSInt RInt)
static bool isNegationValuePreserving(const llvm::APSInt &Value, APSIntType ResultType)
static std::optional< NonLoc > tryRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, NonLoc Lhs, NonLoc Rhs, QualType ResultTy)
static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef Sym, llvm::APSInt Int, QualType Ty)
static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, const FieldRegion *RightFR, BinaryOperator::Opcode op, QualType resultTy, SimpleSValBuilder &SVB)
static bool isWithinConstantOverflowBounds(SymbolRef Sym, ProgramStateRef State)
static std::pair< SymbolRef, APSIntPtr > decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV)
static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, Loc LhsLoc)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2482
bool isComparisonOp() const
Definition: Expr.h:4010
static Opcode negateComparisonOp(Opcode Opc)
Definition: Expr.h:4015
static Opcode reverseComparisonOp(Opcode Opc)
Definition: Expr.h:4028
bool isShiftOp() const
Definition: Expr.h:3998
bool isAdditiveOp() const
Definition: Expr.h:3996
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition: Decl.h:3250
Represents a field injected from an anonymous union/struct into the parent scope.
Definition: Decl.h:3321
A (possibly-)qualified type.
Definition: Type.h:929
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:996
Represents a struct/union/class.
Definition: Decl.h:4148
field_range fields() const
Definition: Decl.h:4354
bool isVoidType() const
Definition: Type.h:8510
bool isBooleanType() const
Definition: Type.h:8638
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2201
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8625
bool isAnyPointerType() const
Definition: Type.h:8194
A safe wrapper around APSInt objects allocated and owned by BasicValueFactory.
Definition: APSIntPtr.h:19
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:19
bool isUnsigned() const
Definition: APSIntType.h:31
uint32_t getBitWidth() const
Definition: APSIntType.h:30
llvm::APSInt getMaxValue() const LLVM_READONLY
Returns the maximum value for this type.
Definition: APSIntType.h:65
void apply(llvm::APSInt &Value) const
Convert a given APSInt, in place, to match this type.
Definition: APSIntType.h:37
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Definition: APSIntType.h:48
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:69
APSIntType getAPSIntType(QualType T) const
Returns the type of the APSInt used to store values of the given QualType.
Template implementation for all binary symbolic expressions.
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1199
QualType getElementType() const
Definition: MemRegion.h:1223
NonLoc getIndex() const
Definition: MemRegion.h:1219
LLVM_ATTRIBUTE_RETURNS_NONNULL const FieldDecl * getDecl() const override
Definition: MemRegion.h:1125
FullSValVisitor - a convenient mixed visitor for all three: SVal, SymExpr and MemRegion subclasses.
Definition: SValVisitor.h:130
static bool isLocType(QualType T)
Definition: SVals.h:262
RetTy VisitMemRegion(const MemRegion *R)
Definition: SValVisitor.h:120
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
Definition: MemRegion.cpp:1351
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
Definition: MemRegion.cpp:1683
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1377
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
Definition: MemRegion.h:208
Represent a region's offset within the top level base region.
Definition: MemRegion.h:64
bool hasSymbolicOffset() const
Definition: MemRegion.h:82
const MemRegion * getRegion() const
It might return null.
Definition: MemRegion.h:80
int64_t getOffset() const
Definition: MemRegion.h:84
virtual const llvm::APSInt * getKnownValue(ProgramStateRef state, SVal val)=0
Evaluates a given SVal.
BasicValueFactory & getBasicValueFactory()
Definition: SValBuilder.h:161
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:288
loc::MemRegionVal makeLoc(SymbolRef sym)
Definition: SValBuilder.h:374
virtual const llvm::APSInt * getMinValue(ProgramStateRef state, SVal val)=0
Tries to get the minimal possible (integer) value of a given SVal.
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
DefinedSVal makeSymbolVal(SymbolRef Sym)
Make an SVal that represents the given symbol.
Definition: SValBuilder.h:397
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
const AnalyzerOptions & getAnalyzerOptions() const
Definition: SValBuilder.h:170
virtual SVal simplifySVal(ProgramStateRef State, SVal Val)=0
Simplify symbolic expressions within a given SVal.
QualType getConditionType() const
Definition: SValBuilder.h:153
SVal evalUnaryOp(ProgramStateRef state, UnaryOperator::Opcode opc, SVal operand, QualType type)
virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0
Tries to get the maximal possible (integer) value of a given SVal.
SymbolManager & getSymbolManager()
Definition: SValBuilder.h:164
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
loc::ConcreteInt makeIntLocVal(const llvm::APSInt &integer)
Definition: SValBuilder.h:304
RetTy VisitSVal(SVal V)
Definition: SValVisitor.h:61
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:56
bool isZeroConstant() const
Definition: SVals.cpp:258
SValKind getKind() const
Definition: SVals.h:91
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:104
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:87
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
Definition: SVals.cpp:181
SymbolRef getAsLocSymbol(bool IncludeBaseRegions=false) const
If this SVal is a location and wraps a symbol, return that SymbolRef.
Definition: SVals.cpp:68
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:83
bool isUnknown() const
Definition: SVals.h:105
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:446
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition: MemRegion.h:459
RetTy VisitSymExpr(SymbolRef S)
Definition: SValVisitor.h:89
Symbolic value.
Definition: SymExpr.h:30
virtual QualType getType() const =0
Represents a cast expression.
A symbol representing data which can be stored in a memory location (region).
Definition: SymExpr.h:119
const SymIntExpr * getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, APSIntPtr rhs, QualType t)
const SymSymExpr * getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t)
Represents a symbolic expression involving a unary operator.
Value representing integer constant.
Definition: SVals.h:300
Value representing pointer-to-member.
Definition: SVals.h:434
const PTMDataType getPTMData() const
Definition: SVals.h:441
Represents symbolic expression that isn't a location.
Definition: SVals.h:279
SValBuilder * createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr)
bool Const(InterpState &S, CodePtr OpPC, const T &Arg)
Definition: Interp.h:1243
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind
const FunctionProtoType * T
unsigned long uint64_t
long int64_t