clang 17.0.0git
SimpleSValBuilder.cpp
Go to the documentation of this file.
1// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10//
11//===----------------------------------------------------------------------===//
12
18#include <optional>
19
20using namespace clang;
21using namespace ento;
22
23namespace {
24class SimpleSValBuilder : public SValBuilder {
25
26 // Query the constraint manager whether the SVal has only one possible
27 // (integer) value. If that is the case, the value is returned. Otherwise,
28 // returns NULL.
29 // This is an implementation detail. Checkers should use `getKnownValue()`
30 // instead.
31 const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
32
33 // With one `simplifySValOnce` call, a compound symbols might collapse to
34 // simpler symbol tree that is still possible to further simplify. Thus, we
35 // do the simplification on a new symbol tree until we reach the simplest
36 // form, i.e. the fixpoint.
37 // Consider the following symbol `(b * b) * b * b` which has this tree:
38 // *
39 // / \
40 // * b
41 // / \
42 // / b
43 // (b * b)
44 // Now, if the `b * b == 1` new constraint is added then during the first
45 // iteration we have the following transformations:
46 // * *
47 // / \ / \
48 // * b --> b b
49 // / \
50 // / b
51 // 1
52 // We need another iteration to reach the final result `1`.
53 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
54
55 // Recursively descends into symbolic expressions and replaces symbols
56 // with their known values (in the sense of the getConstValue() method).
57 // We traverse the symbol tree and query the constraint values for the
58 // sub-trees and if a value is a constant we do the constant folding.
59 SVal simplifySValOnce(ProgramStateRef State, SVal V);
60
61public:
62 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
63 ProgramStateManager &stateMgr)
64 : SValBuilder(alloc, context, stateMgr) {}
65 ~SimpleSValBuilder() override {}
66
68 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
70 Loc lhs, Loc rhs, QualType resultTy) override;
72 Loc lhs, NonLoc rhs, QualType resultTy) override;
73
74 /// Evaluates a given SVal by recursively evaluating and
75 /// simplifying the children SVals. If the SVal has only one possible
76 /// (integer) value, that value is returned. Otherwise, returns NULL.
77 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
78
79 SVal simplifySVal(ProgramStateRef State, SVal V) override;
80
81 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
82 const llvm::APSInt &RHS, QualType resultTy);
83};
84} // end anonymous namespace
85
86SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
87 ASTContext &context,
88 ProgramStateManager &stateMgr) {
89 return new SimpleSValBuilder(alloc, context, stateMgr);
90}
91
92// Checks if the negation the value and flipping sign preserve
93// the semantics on the operation in the resultType
94static bool isNegationValuePreserving(const llvm::APSInt &Value,
95 APSIntType ResultType) {
96 const unsigned ValueBits = Value.getSignificantBits();
97 if (ValueBits == ResultType.getBitWidth()) {
98 // The value is the lowest negative value that is representable
99 // in signed integer with bitWith of result type. The
100 // negation is representable if resultType is unsigned.
101 return ResultType.isUnsigned();
102 }
103
104 // If resultType bitWith is higher that number of bits required
105 // to represent RHS, the sign flip produce same value.
106 return ValueBits < ResultType.getBitWidth();
107}
108
109//===----------------------------------------------------------------------===//
110// Transfer function for binary operators.
111//===----------------------------------------------------------------------===//
112
113SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
115 const llvm::APSInt &RHS,
116 QualType resultTy) {
117 bool isIdempotent = false;
118
119 // Check for a few special cases with known reductions first.
120 switch (op) {
121 default:
122 // We can't reduce this case; just treat it normally.
123 break;
124 case BO_Mul:
125 // a*0 and a*1
126 if (RHS == 0)
127 return makeIntVal(0, resultTy);
128 else if (RHS == 1)
129 isIdempotent = true;
130 break;
131 case BO_Div:
132 // a/0 and a/1
133 if (RHS == 0)
134 // This is also handled elsewhere.
135 return UndefinedVal();
136 else if (RHS == 1)
137 isIdempotent = true;
138 break;
139 case BO_Rem:
140 // a%0 and a%1
141 if (RHS == 0)
142 // This is also handled elsewhere.
143 return UndefinedVal();
144 else if (RHS == 1)
145 return makeIntVal(0, resultTy);
146 break;
147 case BO_Add:
148 case BO_Sub:
149 case BO_Shl:
150 case BO_Shr:
151 case BO_Xor:
152 // a+0, a-0, a<<0, a>>0, a^0
153 if (RHS == 0)
154 isIdempotent = true;
155 break;
156 case BO_And:
157 // a&0 and a&(~0)
158 if (RHS == 0)
159 return makeIntVal(0, resultTy);
160 else if (RHS.isAllOnes())
161 isIdempotent = true;
162 break;
163 case BO_Or:
164 // a|0 and a|(~0)
165 if (RHS == 0)
166 isIdempotent = true;
167 else if (RHS.isAllOnes()) {
168 const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
169 return nonloc::ConcreteInt(Result);
170 }
171 break;
172 }
173
174 // Idempotent ops (like a*1) can still change the type of an expression.
175 // Wrap the LHS up in a NonLoc again and let evalCast do the
176 // dirty work.
177 if (isIdempotent)
178 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
179
180 // If we reach this point, the expression cannot be simplified.
181 // Make a SymbolVal for the entire expression, after converting the RHS.
182 const llvm::APSInt *ConvertedRHS = &RHS;
184 // We're looking for a type big enough to compare the symbolic value
185 // with the given constant.
186 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
187 ASTContext &Ctx = getContext();
188 QualType SymbolType = LHS->getType();
189 uint64_t ValWidth = RHS.getBitWidth();
190 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
191
192 if (ValWidth < TypeWidth) {
193 // If the value is too small, extend it.
194 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
195 } else if (ValWidth == TypeWidth) {
196 // If the value is signed but the symbol is unsigned, do the comparison
197 // in unsigned space. [C99 6.3.1.8]
198 // (For the opposite case, the value is already unsigned.)
199 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
200 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
201 }
202 } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) {
203 // Change a+(-N) into a-N, and a-(-N) into a+N
204 // Adjust addition/subtraction of negative value, to
205 // subtraction/addition of the negated value.
206 APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy);
207 if (isNegationValuePreserving(RHS, resultIntTy)) {
208 ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS));
209 op = (op == BO_Add) ? BO_Sub : BO_Add;
210 } else {
211 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
212 }
213 } else
214 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
215
216 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
217}
218
219// See if Sym is known to be a relation Rel with Bound.
221 llvm::APSInt Bound, ProgramStateRef State) {
222 SValBuilder &SVB = State->getStateManager().getSValBuilder();
223 SVal Result =
224 SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
226 if (auto DV = Result.getAs<DefinedSVal>()) {
227 return !State->assume(*DV, false);
228 }
229 return false;
230}
231
232// See if Sym is known to be within [min/4, max/4], where min and max
233// are the bounds of the symbol's integral type. With such symbols,
234// some manipulations can be performed without the risk of overflow.
235// assume() doesn't cause infinite recursion because we should be dealing
236// with simpler symbols on every recursive call.
238 ProgramStateRef State) {
239 SValBuilder &SVB = State->getStateManager().getSValBuilder();
241
242 QualType T = Sym->getType();
244 "This only works with signed integers!");
245 APSIntType AT = BV.getAPSIntType(T);
246
247 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
248 return isInRelation(BO_LE, Sym, Max, State) &&
249 isInRelation(BO_GE, Sym, Min, State);
250}
251
252// Same for the concrete integers: see if I is within [min/4, max/4].
253static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
254 APSIntType AT(I);
255 assert(!AT.isUnsigned() &&
256 "This only works with signed integers!");
257
258 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
259 return (I <= Max) && (I >= -Max);
260}
261
262static std::pair<SymbolRef, llvm::APSInt>
264 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
265 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
266 return std::make_pair(SymInt->getLHS(),
267 (SymInt->getOpcode() == BO_Add) ?
268 (SymInt->getRHS()) :
269 (-SymInt->getRHS()));
270
271 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
272 return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
273}
274
275// Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
276// same signed integral type and no overflows occur (which should be checked
277// by the caller).
280 SymbolRef LSym, llvm::APSInt LInt,
281 SymbolRef RSym, llvm::APSInt RInt) {
282 SValBuilder &SVB = State->getStateManager().getSValBuilder();
284 SymbolManager &SymMgr = SVB.getSymbolManager();
285
286 QualType SymTy = LSym->getType();
287 assert(SymTy == RSym->getType() &&
288 "Symbols are not of the same type!");
289 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
290 "Integers are not of the same type as symbols!");
291 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
292 "Integers are not of the same type as symbols!");
293
294 QualType ResultTy;
296 ResultTy = SVB.getConditionType();
297 else if (BinaryOperator::isAdditiveOp(Op))
298 ResultTy = SymTy;
299 else
300 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
301
302 if (LSym == RSym)
303 return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
304 nonloc::ConcreteInt(RInt), ResultTy)
305 .castAs<NonLoc>();
306
307 SymbolRef ResultSym = nullptr;
308 BinaryOperator::Opcode ResultOp;
309 llvm::APSInt ResultInt;
311 // Prefer comparing to a non-negative number.
312 // FIXME: Maybe it'd be better to have consistency in
313 // "$x - $y" vs. "$y - $x" because those are solver's keys.
314 if (LInt > RInt) {
315 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
317 ResultInt = LInt - RInt; // Opposite order!
318 } else {
319 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
320 ResultOp = Op;
321 ResultInt = RInt - LInt; // Opposite order!
322 }
323 } else {
324 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
325 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
326 ResultOp = BO_Add;
327 // Bring back the cosmetic difference.
328 if (ResultInt < 0) {
329 ResultInt = -ResultInt;
330 ResultOp = BO_Sub;
331 } else if (ResultInt == 0) {
332 // Shortcut: Simplify "$x + 0" to "$x".
333 return nonloc::SymbolVal(ResultSym);
334 }
335 }
336 const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
337 return nonloc::SymbolVal(
338 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
339}
340
341// Rearrange if symbol type matches the result type and if the operator is a
342// comparison operator, both symbol and constant must be within constant
343// overflow bounds.
345 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
346 return Sym->getType() == Ty &&
348 (isWithinConstantOverflowBounds(Sym, State) &&
350}
351
352static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
354 NonLoc Rhs, QualType ResultTy) {
355 ProgramStateManager &StateMgr = State->getStateManager();
356 SValBuilder &SVB = StateMgr.getSValBuilder();
357
358 // We expect everything to be of the same type - this type.
359 QualType SingleTy;
360
361 // FIXME: After putting complexity threshold to the symbols we can always
362 // rearrange additive operations but rearrange comparisons only if
363 // option is set.
364 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
365 return std::nullopt;
366
367 SymbolRef LSym = Lhs.getAsSymbol();
368 if (!LSym)
369 return std::nullopt;
370
372 SingleTy = LSym->getType();
373 if (ResultTy != SVB.getConditionType())
374 return std::nullopt;
375 // Initialize SingleTy later with a symbol's type.
376 } else if (BinaryOperator::isAdditiveOp(Op)) {
377 SingleTy = ResultTy;
378 if (LSym->getType() != SingleTy)
379 return std::nullopt;
380 } else {
381 // Don't rearrange other operations.
382 return std::nullopt;
383 }
384
385 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
386
387 // Rearrange signed symbolic expressions only
388 if (!SingleTy->isSignedIntegerOrEnumerationType())
389 return std::nullopt;
390
391 SymbolRef RSym = Rhs.getAsSymbol();
392 if (!RSym || RSym->getType() != SingleTy)
393 return std::nullopt;
394
395 BasicValueFactory &BV = State->getBasicVals();
396 llvm::APSInt LInt, RInt;
397 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
398 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
399 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
400 !shouldRearrange(State, Op, RSym, RInt, SingleTy))
401 return std::nullopt;
402
403 // We know that no overflows can occur anymore.
404 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
405}
406
407SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
409 NonLoc lhs, NonLoc rhs,
410 QualType resultTy) {
411 NonLoc InputLHS = lhs;
412 NonLoc InputRHS = rhs;
413
414 // Constraints may have changed since the creation of a bound SVal. Check if
415 // the values can be simplified based on those new constraints.
416 SVal simplifiedLhs = simplifySVal(state, lhs);
417 SVal simplifiedRhs = simplifySVal(state, rhs);
418 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
419 lhs = *simplifiedLhsAsNonLoc;
420 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
421 rhs = *simplifiedRhsAsNonLoc;
422
423 // Handle trivial case where left-side and right-side are the same.
424 if (lhs == rhs)
425 switch (op) {
426 default:
427 break;
428 case BO_EQ:
429 case BO_LE:
430 case BO_GE:
431 return makeTruthVal(true, resultTy);
432 case BO_LT:
433 case BO_GT:
434 case BO_NE:
435 return makeTruthVal(false, resultTy);
436 case BO_Xor:
437 case BO_Sub:
438 if (resultTy->isIntegralOrEnumerationType())
439 return makeIntVal(0, resultTy);
440 return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
441 QualType{});
442 case BO_Or:
443 case BO_And:
444 return evalCast(lhs, resultTy, QualType{});
445 }
446
447 while (true) {
448 switch (lhs.getSubKind()) {
449 default:
450 return makeSymExprValNN(op, lhs, rhs, resultTy);
451 case nonloc::PointerToMemberKind: {
452 assert(rhs.getSubKind() == nonloc::PointerToMemberKind &&
453 "Both SVals should have pointer-to-member-type");
454 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
455 RPTM = rhs.castAs<nonloc::PointerToMember>();
456 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
457 switch (op) {
458 case BO_EQ:
459 return makeTruthVal(LPTMD == RPTMD, resultTy);
460 case BO_NE:
461 return makeTruthVal(LPTMD != RPTMD, resultTy);
462 default:
463 return UnknownVal();
464 }
465 }
466 case nonloc::LocAsIntegerKind: {
467 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
468 switch (rhs.getSubKind()) {
469 case nonloc::LocAsIntegerKind:
470 // FIXME: at the moment the implementation
471 // of modeling "pointers as integers" is not complete.
473 return UnknownVal();
474 return evalBinOpLL(state, op, lhsL,
476 resultTy);
477 case nonloc::ConcreteIntKind: {
478 // FIXME: at the moment the implementation
479 // of modeling "pointers as integers" is not complete.
481 return UnknownVal();
482 // Transform the integer into a location and compare.
483 // FIXME: This only makes sense for comparisons. If we want to, say,
484 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
485 // then pack it back into a LocAsInteger.
486 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
487 // If the region has a symbolic base, pay attention to the type; it
488 // might be coming from a non-default address space. For non-symbolic
489 // regions it doesn't matter that much because such comparisons would
490 // most likely evaluate to concrete false anyway. FIXME: We might
491 // still need to handle the non-comparison case.
492 if (SymbolRef lSym = lhs.getAsLocSymbol(true))
493 BasicVals.getAPSIntType(lSym->getType()).apply(i);
494 else
495 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
496 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
497 }
498 default:
499 switch (op) {
500 case BO_EQ:
501 return makeTruthVal(false, resultTy);
502 case BO_NE:
503 return makeTruthVal(true, resultTy);
504 default:
505 // This case also handles pointer arithmetic.
506 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
507 }
508 }
509 }
510 case nonloc::ConcreteIntKind: {
511 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
512
513 // If we're dealing with two known constants, just perform the operation.
514 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) {
515 llvm::APSInt RHSValue = *KnownRHSValue;
517 // We're looking for a type big enough to compare the two values.
518 // FIXME: This is not correct. char + short will result in a promotion
519 // to int. Unfortunately we have lost types by this point.
520 APSIntType CompareType = std::max(APSIntType(LHSValue),
521 APSIntType(RHSValue));
522 CompareType.apply(LHSValue);
523 CompareType.apply(RHSValue);
524 } else if (!BinaryOperator::isShiftOp(op)) {
525 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
526 IntType.apply(LHSValue);
527 IntType.apply(RHSValue);
528 }
529
530 const llvm::APSInt *Result =
531 BasicVals.evalAPSInt(op, LHSValue, RHSValue);
532 if (!Result)
533 return UndefinedVal();
534
535 return nonloc::ConcreteInt(*Result);
536 }
537
538 // Swap the left and right sides and flip the operator if doing so
539 // allows us to better reason about the expression (this is a form
540 // of expression canonicalization).
541 // While we're at it, catch some special cases for non-commutative ops.
542 switch (op) {
543 case BO_LT:
544 case BO_GT:
545 case BO_LE:
546 case BO_GE:
548 [[fallthrough]];
549 case BO_EQ:
550 case BO_NE:
551 case BO_Add:
552 case BO_Mul:
553 case BO_And:
554 case BO_Xor:
555 case BO_Or:
556 std::swap(lhs, rhs);
557 continue;
558 case BO_Shr:
559 // (~0)>>a
560 if (LHSValue.isAllOnes() && LHSValue.isSigned())
561 return evalCast(lhs, resultTy, QualType{});
562 [[fallthrough]];
563 case BO_Shl:
564 // 0<<a and 0>>a
565 if (LHSValue == 0)
566 return evalCast(lhs, resultTy, QualType{});
567 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
568 case BO_Div:
569 // 0 / x == 0
570 case BO_Rem:
571 // 0 % x == 0
572 if (LHSValue == 0)
573 return makeZeroVal(resultTy);
574 [[fallthrough]];
575 default:
576 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
577 }
578 }
579 case nonloc::SymbolValKind: {
580 // We only handle LHS as simple symbols or SymIntExprs.
581 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
582
583 // LHS is a symbolic expression.
584 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
585
586 // Is this a logical not? (!x is represented as x == 0.)
587 if (op == BO_EQ && rhs.isZeroConstant()) {
588 // We know how to negate certain expressions. Simplify them here.
589
590 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
591 switch (opc) {
592 default:
593 // We don't know how to negate this operation.
594 // Just handle it as if it were a normal comparison to 0.
595 break;
596 case BO_LAnd:
597 case BO_LOr:
598 llvm_unreachable("Logical operators handled by branching logic.");
599 case BO_Assign:
600 case BO_MulAssign:
601 case BO_DivAssign:
602 case BO_RemAssign:
603 case BO_AddAssign:
604 case BO_SubAssign:
605 case BO_ShlAssign:
606 case BO_ShrAssign:
607 case BO_AndAssign:
608 case BO_XorAssign:
609 case BO_OrAssign:
610 case BO_Comma:
611 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
612 case BO_PtrMemD:
613 case BO_PtrMemI:
614 llvm_unreachable("Pointer arithmetic not handled here.");
615 case BO_LT:
616 case BO_GT:
617 case BO_LE:
618 case BO_GE:
619 case BO_EQ:
620 case BO_NE:
621 assert(resultTy->isBooleanType() ||
622 resultTy == getConditionType());
623 assert(symIntExpr->getType()->isBooleanType() ||
624 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
625 getConditionType()));
626 // Negate the comparison and make a value.
628 return makeNonLoc(symIntExpr->getLHS(), opc,
629 symIntExpr->getRHS(), resultTy);
630 }
631 }
632
633 // For now, only handle expressions whose RHS is a constant.
634 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) {
635 // If both the LHS and the current expression are additive,
636 // fold their constants and try again.
638 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
640 // Convert the two constants to a common type, then combine them.
641
642 // resultTy may not be the best type to convert to, but it's
643 // probably the best choice in expressions with mixed type
644 // (such as x+1U+2LL). The rules for implicit conversions should
645 // choose a reasonable type to preserve the expression, and will
646 // at least match how the value is going to be used.
647 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
648 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
649 const llvm::APSInt &second = IntType.convert(*RHSValue);
650
651 // If the op and lop agrees, then we just need to
652 // sum the constants. Otherwise, we change to operation
653 // type if substraction would produce negative value
654 // (and cause overflow for unsigned integers),
655 // as consequence x+1U-10 produces x-9U, instead
656 // of x+4294967287U, that would be produced without this
657 // additional check.
658 const llvm::APSInt *newRHS;
659 if (lop == op) {
660 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
661 } else if (first >= second) {
662 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
663 op = lop;
664 } else {
665 newRHS = BasicVals.evalAPSInt(BO_Sub, second, first);
666 }
667
668 assert(newRHS && "Invalid operation despite common type!");
669 rhs = nonloc::ConcreteInt(*newRHS);
670 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
671 continue;
672 }
673 }
674
675 // Otherwise, make a SymIntExpr out of the expression.
676 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
677 }
678 }
679
680 // Is the RHS a constant?
681 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs))
682 return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
683
684 if (std::optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
685 return *V;
686
687 // Give up -- this is not a symbolic expression we can handle.
688 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
689 }
690 }
691 }
692}
693
695 const FieldRegion *RightFR,
697 QualType resultTy,
698 SimpleSValBuilder &SVB) {
699 // Only comparisons are meaningful here!
701 return UnknownVal();
702
703 // Next, see if the two FRs have the same super-region.
704 // FIXME: This doesn't handle casts yet, and simply stripping the casts
705 // doesn't help.
706 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
707 return UnknownVal();
708
709 const FieldDecl *LeftFD = LeftFR->getDecl();
710 const FieldDecl *RightFD = RightFR->getDecl();
711 const RecordDecl *RD = LeftFD->getParent();
712
713 // Make sure the two FRs are from the same kind of record. Just in case!
714 // FIXME: This is probably where inheritance would be a problem.
715 if (RD != RightFD->getParent())
716 return UnknownVal();
717
718 // We know for sure that the two fields are not the same, since that
719 // would have given us the same SVal.
720 if (op == BO_EQ)
721 return SVB.makeTruthVal(false, resultTy);
722 if (op == BO_NE)
723 return SVB.makeTruthVal(true, resultTy);
724
725 // Iterate through the fields and see which one comes first.
726 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
727 // members and the units in which bit-fields reside have addresses that
728 // increase in the order in which they are declared."
729 bool leftFirst = (op == BO_LT || op == BO_LE);
730 for (const auto *I : RD->fields()) {
731 if (I == LeftFD)
732 return SVB.makeTruthVal(leftFirst, resultTy);
733 if (I == RightFD)
734 return SVB.makeTruthVal(!leftFirst, resultTy);
735 }
736
737 llvm_unreachable("Fields not found in parent record's definition");
738}
739
740// This is used in debug builds only for now because some downstream users
741// may hit this assert in their subsequent merges.
742// There are still places in the analyzer where equal bitwidth Locs
743// are compared, and need to be found and corrected. Recent previous fixes have
744// addressed the known problems of making NULLs with specific bitwidths
745// for Loc comparisons along with deprecation of APIs for the same purpose.
746//
747static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
748 Loc LhsLoc) {
749 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
750 ASTContext &Ctx = State->getStateManager().getContext();
751 uint64_t RhsBitwidth =
752 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
753 uint64_t LhsBitwidth =
754 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
755 if (RhsBitwidth && LhsBitwidth &&
756 (LhsLoc.getSubKind() == RhsLoc.getSubKind())) {
757 assert(RhsBitwidth == LhsBitwidth &&
758 "RhsLoc and LhsLoc bitwidth must be same!");
759 }
760}
761
762// FIXME: all this logic will change if/when we have MemRegion::getLocation().
763SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
765 Loc lhs, Loc rhs,
766 QualType resultTy) {
767
768 // Assert that bitwidth of lhs and rhs are the same.
769 // This can happen if two different address spaces are used,
770 // and the bitwidths of the address spaces are different.
771 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
772 // FIXME: See comment above in the function assertEqualBitWidths
773 assertEqualBitWidths(state, rhs, lhs);
774
775 // Only comparisons and subtractions are valid operations on two pointers.
776 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
777 // However, if a pointer is casted to an integer, evalBinOpNN may end up
778 // calling this function with another operation (PR7527). We don't attempt to
779 // model this for now, but it could be useful, particularly when the
780 // "location" is actually an integer value that's been passed through a void*.
781 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
782 return UnknownVal();
783
784 // Special cases for when both sides are identical.
785 if (lhs == rhs) {
786 switch (op) {
787 default:
788 llvm_unreachable("Unimplemented operation for two identical values");
789 case BO_Sub:
790 return makeZeroVal(resultTy);
791 case BO_EQ:
792 case BO_LE:
793 case BO_GE:
794 return makeTruthVal(true, resultTy);
795 case BO_NE:
796 case BO_LT:
797 case BO_GT:
798 return makeTruthVal(false, resultTy);
799 }
800 }
801
802 switch (lhs.getSubKind()) {
803 default:
804 llvm_unreachable("Ordering not implemented for this Loc.");
805
806 case loc::GotoLabelKind:
807 // The only thing we know about labels is that they're non-null.
808 if (rhs.isZeroConstant()) {
809 switch (op) {
810 default:
811 break;
812 case BO_Sub:
813 return evalCast(lhs, resultTy, QualType{});
814 case BO_EQ:
815 case BO_LE:
816 case BO_LT:
817 return makeTruthVal(false, resultTy);
818 case BO_NE:
819 case BO_GT:
820 case BO_GE:
821 return makeTruthVal(true, resultTy);
822 }
823 }
824 // There may be two labels for the same location, and a function region may
825 // have the same address as a label at the start of the function (depending
826 // on the ABI).
827 // FIXME: we can probably do a comparison against other MemRegions, though.
828 // FIXME: is there a way to tell if two labels refer to the same location?
829 return UnknownVal();
830
831 case loc::ConcreteIntKind: {
832 auto L = lhs.castAs<loc::ConcreteInt>();
833
834 // If one of the operands is a symbol and the other is a constant,
835 // build an expression for use by the constraint manager.
836 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
837 // We can only build expressions with symbols on the left,
838 // so we need a reversible operator.
839 if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
840 return UnknownVal();
841
843 return makeNonLoc(rSym, op, L.getValue(), resultTy);
844 }
845
846 // If both operands are constants, just perform the operation.
847 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
848 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
849
850 if (const auto *ResultInt =
851 BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue()))
852 return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
853 return UnknownVal();
854 }
855
856 // Special case comparisons against NULL.
857 // This must come after the test if the RHS is a symbol, which is used to
858 // build constraints. The address of any non-symbolic region is guaranteed
859 // to be non-NULL, as is any label.
860 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
861 if (lhs.isZeroConstant()) {
862 switch (op) {
863 default:
864 break;
865 case BO_EQ:
866 case BO_GT:
867 case BO_GE:
868 return makeTruthVal(false, resultTy);
869 case BO_NE:
870 case BO_LT:
871 case BO_LE:
872 return makeTruthVal(true, resultTy);
873 }
874 }
875
876 // Comparing an arbitrary integer to a region or label address is
877 // completely unknowable.
878 return UnknownVal();
879 }
880 case loc::MemRegionValKind: {
881 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
882 // If one of the operands is a symbol and the other is a constant,
883 // build an expression for use by the constraint manager.
884 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
886 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
887 return UnknownVal();
888 }
889 // Special case comparisons to NULL.
890 // This must come after the test if the LHS is a symbol, which is used to
891 // build constraints. The address of any non-symbolic region is guaranteed
892 // to be non-NULL.
893 if (rInt->isZeroConstant()) {
894 if (op == BO_Sub)
895 return evalCast(lhs, resultTy, QualType{});
896
898 QualType boolType = getContext().BoolTy;
899 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
900 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
901 return evalBinOpNN(state, op, l, r, resultTy);
902 }
903 }
904
905 // Comparing a region to an arbitrary integer is completely unknowable.
906 return UnknownVal();
907 }
908
909 // Get both values as regions, if possible.
910 const MemRegion *LeftMR = lhs.getAsRegion();
911 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
912
913 const MemRegion *RightMR = rhs.getAsRegion();
914 if (!RightMR)
915 // The RHS is probably a label, which in theory could address a region.
916 // FIXME: we can probably make a more useful statement about non-code
917 // regions, though.
918 return UnknownVal();
919
920 const MemRegion *LeftBase = LeftMR->getBaseRegion();
921 const MemRegion *RightBase = RightMR->getBaseRegion();
922 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
923 const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
924 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
925
926 // If the two regions are from different known memory spaces they cannot be
927 // equal. Also, assume that no symbolic region (whose memory space is
928 // unknown) is on the stack.
929 if (LeftMS != RightMS &&
930 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
931 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
932 switch (op) {
933 default:
934 return UnknownVal();
935 case BO_EQ:
936 return makeTruthVal(false, resultTy);
937 case BO_NE:
938 return makeTruthVal(true, resultTy);
939 }
940 }
941
942 // If both values wrap regions, see if they're from different base regions.
943 // Note, heap base symbolic regions are assumed to not alias with
944 // each other; for example, we assume that malloc returns different address
945 // on each invocation.
946 // FIXME: ObjC object pointers always reside on the heap, but currently
947 // we treat their memory space as unknown, because symbolic pointers
948 // to ObjC objects may alias. There should be a way to construct
949 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
950 // guesses memory space for ObjC object pointers manually instead of
951 // relying on us.
952 if (LeftBase != RightBase &&
953 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
954 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
955 switch (op) {
956 default:
957 return UnknownVal();
958 case BO_EQ:
959 return makeTruthVal(false, resultTy);
960 case BO_NE:
961 return makeTruthVal(true, resultTy);
962 }
963 }
964
965 // Handle special cases for when both regions are element regions.
966 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
967 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
968 if (RightER && LeftER) {
969 // Next, see if the two ERs have the same super-region and matching types.
970 // FIXME: This should do something useful even if the types don't match,
971 // though if both indexes are constant the RegionRawOffset path will
972 // give the correct answer.
973 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
974 LeftER->getElementType() == RightER->getElementType()) {
975 // Get the left index and cast it to the correct type.
976 // If the index is unknown or undefined, bail out here.
977 SVal LeftIndexVal = LeftER->getIndex();
978 std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
979 if (!LeftIndex)
980 return UnknownVal();
981 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
982 LeftIndex = LeftIndexVal.getAs<NonLoc>();
983 if (!LeftIndex)
984 return UnknownVal();
985
986 // Do the same for the right index.
987 SVal RightIndexVal = RightER->getIndex();
988 std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
989 if (!RightIndex)
990 return UnknownVal();
991 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
992 RightIndex = RightIndexVal.getAs<NonLoc>();
993 if (!RightIndex)
994 return UnknownVal();
995
996 // Actually perform the operation.
997 // evalBinOpNN expects the two indexes to already be the right type.
998 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
999 }
1000 }
1001
1002 // Special handling of the FieldRegions, even with symbolic offsets.
1003 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1004 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1005 if (RightFR && LeftFR) {
1006 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1007 *this);
1008 if (!R.isUnknown())
1009 return R;
1010 }
1011
1012 // Compare the regions using the raw offsets.
1013 RegionOffset LeftOffset = LeftMR->getAsOffset();
1014 RegionOffset RightOffset = RightMR->getAsOffset();
1015
1016 if (LeftOffset.getRegion() != nullptr &&
1017 LeftOffset.getRegion() == RightOffset.getRegion() &&
1018 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1019 int64_t left = LeftOffset.getOffset();
1020 int64_t right = RightOffset.getOffset();
1021
1022 switch (op) {
1023 default:
1024 return UnknownVal();
1025 case BO_LT:
1026 return makeTruthVal(left < right, resultTy);
1027 case BO_GT:
1028 return makeTruthVal(left > right, resultTy);
1029 case BO_LE:
1030 return makeTruthVal(left <= right, resultTy);
1031 case BO_GE:
1032 return makeTruthVal(left >= right, resultTy);
1033 case BO_EQ:
1034 return makeTruthVal(left == right, resultTy);
1035 case BO_NE:
1036 return makeTruthVal(left != right, resultTy);
1037 }
1038 }
1039
1040 // At this point we're not going to get a good answer, but we can try
1041 // conjuring an expression instead.
1042 SymbolRef LHSSym = lhs.getAsLocSymbol();
1043 SymbolRef RHSSym = rhs.getAsLocSymbol();
1044 if (LHSSym && RHSSym)
1045 return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1046
1047 // If we get here, we have no way of comparing the regions.
1048 return UnknownVal();
1049 }
1050 }
1051}
1052
1053SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1055 NonLoc rhs, QualType resultTy) {
1056 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1057 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1058 if (PTMSV->isNullMemberPointer())
1059 return UndefinedVal();
1060
1061 auto getFieldLValue = [&](const auto *FD) -> SVal {
1062 SVal Result = lhs;
1063
1064 for (const auto &I : *PTMSV)
1065 Result = StateMgr.getStoreManager().evalDerivedToBase(
1066 Result, I->getType(), I->isVirtual());
1067
1068 return state->getLValue(FD, Result);
1069 };
1070
1071 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1072 return getFieldLValue(FD);
1073 }
1074 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1075 return getFieldLValue(FD);
1076 }
1077 }
1078
1079 return rhs;
1080 }
1081
1082 assert(!BinaryOperator::isComparisonOp(op) &&
1083 "arguments to comparison ops must be of the same type");
1084
1085 // Special case: rhs is a zero constant.
1086 if (rhs.isZeroConstant())
1087 return lhs;
1088
1089 // Perserve the null pointer so that it can be found by the DerefChecker.
1090 if (lhs.isZeroConstant())
1091 return lhs;
1092
1093 // We are dealing with pointer arithmetic.
1094
1095 // Handle pointer arithmetic on constant values.
1096 if (std::optional<nonloc::ConcreteInt> rhsInt =
1097 rhs.getAs<nonloc::ConcreteInt>()) {
1098 if (std::optional<loc::ConcreteInt> lhsInt =
1099 lhs.getAs<loc::ConcreteInt>()) {
1100 const llvm::APSInt &leftI = lhsInt->getValue();
1101 assert(leftI.isUnsigned());
1102 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1103
1104 // Convert the bitwidth of rightI. This should deal with overflow
1105 // since we are dealing with concrete values.
1106 rightI = rightI.extOrTrunc(leftI.getBitWidth());
1107
1108 // Offset the increment by the pointer size.
1109 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1110 QualType pointeeType = resultTy->getPointeeType();
1111 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1112 rightI *= Multiplicand;
1113
1114 // Compute the adjusted pointer.
1115 switch (op) {
1116 case BO_Add:
1117 rightI = leftI + rightI;
1118 break;
1119 case BO_Sub:
1120 rightI = leftI - rightI;
1121 break;
1122 default:
1123 llvm_unreachable("Invalid pointer arithmetic operation");
1124 }
1125 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1126 }
1127 }
1128
1129 // Handle cases where 'lhs' is a region.
1130 if (const MemRegion *region = lhs.getAsRegion()) {
1131 rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1132 SVal index = UnknownVal();
1133 const SubRegion *superR = nullptr;
1134 // We need to know the type of the pointer in order to add an integer to it.
1135 // Depending on the type, different amount of bytes is added.
1136 QualType elementType;
1137
1138 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1139 assert(op == BO_Add || op == BO_Sub);
1140 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1141 getArrayIndexType());
1142 superR = cast<SubRegion>(elemReg->getSuperRegion());
1143 elementType = elemReg->getElementType();
1144 }
1145 else if (isa<SubRegion>(region)) {
1146 assert(op == BO_Add || op == BO_Sub);
1147 index = (op == BO_Add) ? rhs : evalMinus(rhs);
1148 superR = cast<SubRegion>(region);
1149 // TODO: Is this actually reliable? Maybe improving our MemRegion
1150 // hierarchy to provide typed regions for all non-void pointers would be
1151 // better. For instance, we cannot extend this towards LocAsInteger
1152 // operations, where result type of the expression is integer.
1153 if (resultTy->isAnyPointerType())
1154 elementType = resultTy->getPointeeType();
1155 }
1156
1157 // Represent arithmetic on void pointers as arithmetic on char pointers.
1158 // It is fine when a TypedValueRegion of char value type represents
1159 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1160 if (elementType->isVoidType())
1161 elementType = getContext().CharTy;
1162
1163 if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1164 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1165 superR, getContext()));
1166 }
1167 }
1168 return UnknownVal();
1169}
1170
1171const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1172 SVal V) {
1173 if (V.isUnknownOrUndef())
1174 return nullptr;
1175
1176 if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1177 return &X->getValue();
1178
1179 if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1180 return &X->getValue();
1181
1182 if (SymbolRef Sym = V.getAsSymbol())
1183 return state->getConstraintManager().getSymVal(state, Sym);
1184
1185 return nullptr;
1186}
1187
1188const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1189 SVal V) {
1190 return getConstValue(state, simplifySVal(state, V));
1191}
1192
1193SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1194 SVal SimplifiedVal = simplifySValOnce(State, Val);
1195 while (SimplifiedVal != Val) {
1196 Val = SimplifiedVal;
1197 SimplifiedVal = simplifySValOnce(State, Val);
1198 }
1199 return SimplifiedVal;
1200}
1201
1202SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1203 return simplifyUntilFixpoint(State, V);
1204}
1205
1206SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1207 // For now, this function tries to constant-fold symbols inside a
1208 // nonloc::SymbolVal, and does nothing else. More simplifications should
1209 // be possible, such as constant-folding an index in an ElementRegion.
1210
1211 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1212 ProgramStateRef State;
1213 SValBuilder &SVB;
1214
1215 // Cache results for the lifetime of the Simplifier. Results change every
1216 // time new constraints are added to the program state, which is the whole
1217 // point of simplifying, and for that very reason it's pointless to maintain
1218 // the same cache for the duration of the whole analysis.
1219 llvm::DenseMap<SymbolRef, SVal> Cached;
1220
1221 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1222 return Sym == Val.getAsSymbol();
1223 }
1224
1225 SVal cache(SymbolRef Sym, SVal V) {
1226 Cached[Sym] = V;
1227 return V;
1228 }
1229
1230 SVal skip(SymbolRef Sym) {
1231 return cache(Sym, SVB.makeSymbolVal(Sym));
1232 }
1233
1234 // Return the known const value for the Sym if available, or return Undef
1235 // otherwise.
1236 SVal getConst(SymbolRef Sym) {
1237 const llvm::APSInt *Const =
1238 State->getConstraintManager().getSymVal(State, Sym);
1239 if (Const)
1240 return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1241 : (SVal)SVB.makeIntVal(*Const);
1242 return UndefinedVal();
1243 }
1244
1245 SVal getConstOrVisit(SymbolRef Sym) {
1246 const SVal Ret = getConst(Sym);
1247 if (Ret.isUndef())
1248 return Visit(Sym);
1249 return Ret;
1250 }
1251
1252 public:
1253 Simplifier(ProgramStateRef State)
1254 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1255
1256 SVal VisitSymbolData(const SymbolData *S) {
1257 // No cache here.
1258 if (const llvm::APSInt *I =
1259 State->getConstraintManager().getSymVal(State, S))
1260 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1261 : (SVal)SVB.makeIntVal(*I);
1262 return SVB.makeSymbolVal(S);
1263 }
1264
1265 SVal VisitSymIntExpr(const SymIntExpr *S) {
1266 auto I = Cached.find(S);
1267 if (I != Cached.end())
1268 return I->second;
1269
1270 SVal LHS = getConstOrVisit(S->getLHS());
1271 if (isUnchanged(S->getLHS(), LHS))
1272 return skip(S);
1273
1274 SVal RHS;
1275 // By looking at the APSInt in the right-hand side of S, we cannot
1276 // figure out if it should be treated as a Loc or as a NonLoc.
1277 // So make our guess by recalling that we cannot multiply pointers
1278 // or compare a pointer to an integer.
1279 if (Loc::isLocType(S->getLHS()->getType()) &&
1280 BinaryOperator::isComparisonOp(S->getOpcode())) {
1281 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1282 // the same meaning for Loc-type symbols, but the latter form
1283 // is preferred in SVal computations for being Loc itself.
1284 if (SymbolRef Sym = LHS.getAsSymbol()) {
1285 assert(Loc::isLocType(Sym->getType()));
1286 LHS = SVB.makeLoc(Sym);
1287 }
1288 RHS = SVB.makeIntLocVal(S->getRHS());
1289 } else {
1290 RHS = SVB.makeIntVal(S->getRHS());
1291 }
1292
1293 return cache(
1294 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1295 }
1296
1297 SVal VisitIntSymExpr(const IntSymExpr *S) {
1298 auto I = Cached.find(S);
1299 if (I != Cached.end())
1300 return I->second;
1301
1302 SVal RHS = getConstOrVisit(S->getRHS());
1303 if (isUnchanged(S->getRHS(), RHS))
1304 return skip(S);
1305
1306 SVal LHS = SVB.makeIntVal(S->getLHS());
1307 return cache(
1308 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1309 }
1310
1311 SVal VisitSymSymExpr(const SymSymExpr *S) {
1312 auto I = Cached.find(S);
1313 if (I != Cached.end())
1314 return I->second;
1315
1316 // For now don't try to simplify mixed Loc/NonLoc expressions
1317 // because they often appear from LocAsInteger operations
1318 // and we don't know how to combine a LocAsInteger
1319 // with a concrete value.
1320 if (Loc::isLocType(S->getLHS()->getType()) !=
1321 Loc::isLocType(S->getRHS()->getType()))
1322 return skip(S);
1323
1324 SVal LHS = getConstOrVisit(S->getLHS());
1325 SVal RHS = getConstOrVisit(S->getRHS());
1326
1327 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1328 return skip(S);
1329
1330 return cache(
1331 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1332 }
1333
1334 SVal VisitSymbolCast(const SymbolCast *S) {
1335 auto I = Cached.find(S);
1336 if (I != Cached.end())
1337 return I->second;
1338 const SymExpr *OpSym = S->getOperand();
1339 SVal OpVal = getConstOrVisit(OpSym);
1340 if (isUnchanged(OpSym, OpVal))
1341 return skip(S);
1342
1343 return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType()));
1344 }
1345
1346 SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1347 auto I = Cached.find(S);
1348 if (I != Cached.end())
1349 return I->second;
1350 SVal Op = getConstOrVisit(S->getOperand());
1351 if (isUnchanged(S->getOperand(), Op))
1352 return skip(S);
1353
1354 return cache(
1355 S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType()));
1356 }
1357
1359
1360 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1361
1362 SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) {
1363 // Simplification is much more costly than computing complexity.
1364 // For high complexity, it may be not worth it.
1365 return Visit(V.getSymbol());
1366 }
1367
1368 SVal VisitSVal(SVal V) { return V; }
1369 };
1370
1371 SVal SimplifiedV = Simplifier(State).Visit(V);
1372 return SimplifiedV;
1373}
#define V(N, I)
Definition: ASTContext.h:3217
static SVal getValue(SVal val, SValBuilder &svalBuilder)
static CompilationDatabasePluginRegistry::Add< FixedCompilationDatabasePlugin > X("fixed-compilation-database", "Reads plain-text flags file")
static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, llvm::APSInt Bound, ProgramStateRef State)
static NonLoc doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef LSym, llvm::APSInt LInt, SymbolRef RSym, llvm::APSInt RInt)
static std::pair< SymbolRef, llvm::APSInt > decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV)
static bool isNegationValuePreserving(const llvm::APSInt &Value, APSIntType ResultType)
static std::optional< NonLoc > tryRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, NonLoc Lhs, NonLoc Rhs, QualType ResultTy)
static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef Sym, llvm::APSInt Int, QualType Ty)
static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, const FieldRegion *RightFR, BinaryOperator::Opcode op, QualType resultTy, SimpleSValBuilder &SVB)
static bool isWithinConstantOverflowBounds(SymbolRef Sym, ProgramStateRef State)
static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, Loc LhsLoc)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
CanQualType VoidPtrTy
Definition: ASTContext.h:1105
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2279
bool isComparisonOp() const
Definition: Expr.h:3914
static Opcode negateComparisonOp(Opcode Opc)
Definition: Expr.h:3919
static Opcode reverseComparisonOp(Opcode Opc)
Definition: Expr.h:3932
bool isShiftOp() const
Definition: Expr.h:3902
bool isAdditiveOp() const
Definition: Expr.h:3900
Represents a member of a struct/union/class.
Definition: Decl.h:2941
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition: Decl.h:3137
Represents a field injected from an anonymous union/struct into the parent scope.
Definition: Decl.h:3200
A (possibly-)qualified type.
Definition: Type.h:736
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:803
Represents a struct/union/class.
Definition: Decl.h:3998
field_range fields() const
Definition: Decl.h:4225
bool isVoidType() const
Definition: Type.h:7218
bool isBooleanType() const
Definition: Type.h:7334
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2063
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:629
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:7321
bool isAnyPointerType() const
Definition: Type.h:6914
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:19
bool isUnsigned() const
Definition: APSIntType.h:31
uint32_t getBitWidth() const
Definition: APSIntType.h:30
llvm::APSInt getMaxValue() const LLVM_READONLY
Returns the maximum value for this type.
Definition: APSIntType.h:65
void apply(llvm::APSInt &Value) const
Convert a given APSInt, in place, to match this type.
Definition: APSIntType.h:37
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Definition: APSIntType.h:48
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:69
APSIntType getAPSIntType(QualType T) const
Returns the type of the APSInt used to store values of the given QualType.
Template implementation for all binary symbolic expressions.
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1189
QualType getElementType() const
Definition: MemRegion.h:1213
NonLoc getIndex() const
Definition: MemRegion.h:1209
LLVM_ATTRIBUTE_RETURNS_NONNULL const FieldDecl * getDecl() const override
Definition: MemRegion.h:1115
FullSValVisitor - a convenient mixed visitor for all three: SVal, SymExpr and MemRegion subclasses.
Definition: SValVisitor.h:139
static bool isLocType(QualType T)
Definition: SVals.h:289
RetTy VisitMemRegion(const MemRegion *R)
Definition: SValVisitor.h:129
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:95
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
Definition: MemRegion.cpp:1277
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
Definition: MemRegion.cpp:1614
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1307
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
Definition: MemRegion.h:204
Represent a region's offset within the top level base region.
Definition: MemRegion.h:62
bool hasSymbolicOffset() const
Definition: MemRegion.h:80
const MemRegion * getRegion() const
It might return null.
Definition: MemRegion.h:78
int64_t getOffset() const
Definition: MemRegion.h:82
virtual const llvm::APSInt * getKnownValue(ProgramStateRef state, SVal val)=0
Evaluates a given SVal.
BasicValueFactory & getBasicValueFactory()
Definition: SValBuilder.h:149
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:269
loc::MemRegionVal makeLoc(SymbolRef sym)
Definition: SValBuilder.h:356
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
DefinedSVal makeSymbolVal(SymbolRef Sym)
Make an SVal that represents the given symbol.
Definition: SValBuilder.h:379
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
const AnalyzerOptions & getAnalyzerOptions() const
Definition: SValBuilder.h:158
virtual SVal simplifySVal(ProgramStateRef State, SVal Val)=0
Simplify symbolic expressions within a given SVal.
QualType getConditionType() const
Definition: SValBuilder.h:141
SVal evalUnaryOp(ProgramStateRef state, UnaryOperator::Opcode opc, SVal operand, QualType type)
SymbolManager & getSymbolManager()
Definition: SValBuilder.h:152
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
loc::ConcreteInt makeIntLocVal(const llvm::APSInt &integer)
Definition: SValBuilder.h:285
RetTy VisitSVal(SVal V)
Definition: SValVisitor.h:68
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:72
bool isZeroConstant() const
Definition: SVals.cpp:261
unsigned getSubKind() const
Definition: SVals.h:109
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:104
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:103
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
Definition: SVals.cpp:184
SymbolRef getAsLocSymbol(bool IncludeBaseRegions=false) const
If this SVal is a location and wraps a symbol, return that SymbolRef.
Definition: SVals.cpp:68
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:99
bool isUnknown() const
Definition: SVals.h:124
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:442
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition: MemRegion.h:455
RetTy VisitSymExpr(SymbolRef S)
Definition: SValVisitor.h:98
Symbolic value.
Definition: SymExpr.h:29
virtual QualType getType() const =0
Represents a cast expression.
A symbol representing data which can be stored in a memory location (region).
Definition: SymExpr.h:117
const SymIntExpr * getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const llvm::APSInt &rhs, QualType t)
const SymSymExpr * getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t)
Represents a symbolic expression involving a unary operator.
Value representing integer constant.
Definition: SVals.h:329
Value representing pointer-to-member.
Definition: SVals.h:441
const PTMDataType getPTMData() const
Definition: SVals.h:448
Represents symbolic expression that isn't a location.
Definition: SVals.h:304
SValBuilder * createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr)
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:159
bool Const(InterpState &S, CodePtr OpPC, const T &Arg)
Definition: Interp.h:685
BinaryOperatorKind
unsigned long uint64_t
long int64_t