clang 18.0.0git
RangeConstraintManager.cpp
Go to the documentation of this file.
1//== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines RangeConstraintManager, a class that tracks simple
10// equality and inequality constraints on symbolic values of ProgramState.
11//
12//===----------------------------------------------------------------------===//
13
20#include "llvm/ADT/FoldingSet.h"
21#include "llvm/ADT/ImmutableSet.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/Support/Compiler.h"
26#include "llvm/Support/raw_ostream.h"
27#include <algorithm>
28#include <iterator>
29#include <optional>
30
31using namespace clang;
32using namespace ento;
33
34// This class can be extended with other tables which will help to reason
35// about ranges more precisely.
37 static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
38 BO_GE < BO_EQ && BO_EQ < BO_NE,
39 "This class relies on operators order. Rework it otherwise.");
40
41public:
43 False = 0,
46 };
47
48private:
49 // CmpOpTable holds states which represent the corresponding range for
50 // branching an exploded graph. We can reason about the branch if there is
51 // a previously known fact of the existence of a comparison expression with
52 // operands used in the current expression.
53 // E.g. assuming (x < y) is true that means (x != y) is surely true.
54 // if (x previous_operation y) // < | != | >
55 // if (x operation y) // != | > | <
56 // tristate // True | Unknown | False
57 //
58 // CmpOpTable represents next:
59 // __|< |> |<=|>=|==|!=|UnknownX2|
60 // < |1 |0 |* |0 |0 |* |1 |
61 // > |0 |1 |0 |* |0 |* |1 |
62 // <=|1 |0 |1 |* |1 |* |0 |
63 // >=|0 |1 |* |1 |1 |* |0 |
64 // ==|0 |0 |* |* |1 |0 |1 |
65 // !=|1 |1 |* |* |0 |1 |0 |
66 //
67 // Columns stands for a previous operator.
68 // Rows stands for a current operator.
69 // Each row has exactly two `Unknown` cases.
70 // UnknownX2 means that both `Unknown` previous operators are met in code,
71 // and there is a special column for that, for example:
72 // if (x >= y)
73 // if (x != y)
74 // if (x <= y)
75 // False only
76 static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
77 const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
78 // < > <= >= == != UnknownX2
81 {True, False, True, Unknown, True, Unknown, False}, // <=
82 {False, True, Unknown, True, True, Unknown, False}, // >=
83 {False, False, Unknown, Unknown, True, False, True}, // ==
84 {True, True, Unknown, Unknown, False, True, False}, // !=
85 };
86
87 static size_t getIndexFromOp(BinaryOperatorKind OP) {
88 return static_cast<size_t>(OP - BO_LT);
89 }
90
91public:
92 constexpr size_t getCmpOpCount() const { return CmpOpCount; }
93
94 static BinaryOperatorKind getOpFromIndex(size_t Index) {
95 return static_cast<BinaryOperatorKind>(Index + BO_LT);
96 }
97
99 BinaryOperatorKind QueriedOP) const {
100 return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
101 }
102
104 return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
105 }
106};
107
108//===----------------------------------------------------------------------===//
109// RangeSet implementation
110//===----------------------------------------------------------------------===//
111
112RangeSet::ContainerType RangeSet::Factory::EmptySet{};
113
115 ContainerType Result;
116 Result.reserve(LHS.size() + RHS.size());
117 std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
118 std::back_inserter(Result));
119 return makePersistent(std::move(Result));
120}
121
123 ContainerType Result;
124 Result.reserve(Original.size() + 1);
125
126 const_iterator Lower = llvm::lower_bound(Original, Element);
127 Result.insert(Result.end(), Original.begin(), Lower);
128 Result.push_back(Element);
129 Result.insert(Result.end(), Lower, Original.end());
130
131 return makePersistent(std::move(Result));
132}
133
134RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
135 return add(Original, Range(Point));
136}
137
139 ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
140 return makePersistent(std::move(Result));
141}
142
144 ContainerType Result;
145 Result.push_back(R);
146 Result = unite(*Original.Impl, Result);
147 return makePersistent(std::move(Result));
148}
149
150RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
151 return unite(Original, Range(ValueFactory.getValue(Point)));
152}
153
154RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
155 llvm::APSInt To) {
156 return unite(Original,
157 Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
158}
159
160template <typename T>
161void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
162 std::swap(First, Second);
163 std::swap(FirstEnd, SecondEnd);
164}
165
166RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
167 const ContainerType &RHS) {
168 if (LHS.empty())
169 return RHS;
170 if (RHS.empty())
171 return LHS;
172
173 using llvm::APSInt;
174 using iterator = ContainerType::const_iterator;
175
176 iterator First = LHS.begin();
177 iterator FirstEnd = LHS.end();
178 iterator Second = RHS.begin();
179 iterator SecondEnd = RHS.end();
180 APSIntType Ty = APSIntType(First->From());
181 const APSInt Min = Ty.getMinValue();
182
183 // Handle a corner case first when both range sets start from MIN.
184 // This helps to avoid complicated conditions below. Specifically, this
185 // particular check for `MIN` is not needed in the loop below every time
186 // when we do `Second->From() - One` operation.
187 if (Min == First->From() && Min == Second->From()) {
188 if (First->To() > Second->To()) {
189 // [ First ]--->
190 // [ Second ]----->
191 // MIN^
192 // The Second range is entirely inside the First one.
193
194 // Check if Second is the last in its RangeSet.
195 if (++Second == SecondEnd)
196 // [ First ]--[ First + 1 ]--->
197 // [ Second ]--------------------->
198 // MIN^
199 // The Union is equal to First's RangeSet.
200 return LHS;
201 } else {
202 // case 1: [ First ]----->
203 // case 2: [ First ]--->
204 // [ Second ]--->
205 // MIN^
206 // The First range is entirely inside or equal to the Second one.
207
208 // Check if First is the last in its RangeSet.
209 if (++First == FirstEnd)
210 // [ First ]----------------------->
211 // [ Second ]--[ Second + 1 ]---->
212 // MIN^
213 // The Union is equal to Second's RangeSet.
214 return RHS;
215 }
216 }
217
218 const APSInt One = Ty.getValue(1);
219 ContainerType Result;
220
221 // This is called when there are no ranges left in one of the ranges.
222 // Append the rest of the ranges from another range set to the Result
223 // and return with that.
224 const auto AppendTheRest = [&Result](iterator I, iterator E) {
225 Result.append(I, E);
226 return Result;
227 };
228
229 while (true) {
230 // We want to keep the following invariant at all times:
231 // ---[ First ------>
232 // -----[ Second --->
233 if (First->From() > Second->From())
234 swapIterators(First, FirstEnd, Second, SecondEnd);
235
236 // The Union definitely starts with First->From().
237 // ----------[ First ------>
238 // ------------[ Second --->
239 // ----------[ Union ------>
240 // UnionStart^
241 const llvm::APSInt &UnionStart = First->From();
242
243 // Loop where the invariant holds.
244 while (true) {
245 // Skip all enclosed ranges.
246 // ---[ First ]--->
247 // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
248 while (First->To() >= Second->To()) {
249 // Check if Second is the last in its RangeSet.
250 if (++Second == SecondEnd) {
251 // Append the Union.
252 // ---[ Union ]--->
253 // -----[ Second ]----->
254 // --------[ First ]--->
255 // UnionEnd^
256 Result.emplace_back(UnionStart, First->To());
257 // ---[ Union ]----------------->
258 // --------------[ First + 1]--->
259 // Append all remaining ranges from the First's RangeSet.
260 return AppendTheRest(++First, FirstEnd);
261 }
262 }
263
264 // Check if First and Second are disjoint. It means that we find
265 // the end of the Union. Exit the loop and append the Union.
266 // ---[ First ]=------------->
267 // ------------=[ Second ]--->
268 // ----MinusOne^
269 if (First->To() < Second->From() - One)
270 break;
271
272 // First is entirely inside the Union. Go next.
273 // ---[ Union ----------->
274 // ---- [ First ]-------->
275 // -------[ Second ]----->
276 // Check if First is the last in its RangeSet.
277 if (++First == FirstEnd) {
278 // Append the Union.
279 // ---[ Union ]--->
280 // -----[ First ]------->
281 // --------[ Second ]--->
282 // UnionEnd^
283 Result.emplace_back(UnionStart, Second->To());
284 // ---[ Union ]------------------>
285 // --------------[ Second + 1]--->
286 // Append all remaining ranges from the Second's RangeSet.
287 return AppendTheRest(++Second, SecondEnd);
288 }
289
290 // We know that we are at one of the two cases:
291 // case 1: --[ First ]--------->
292 // case 2: ----[ First ]------->
293 // --------[ Second ]---------->
294 // In both cases First starts after Second->From().
295 // Make sure that the loop invariant holds.
296 swapIterators(First, FirstEnd, Second, SecondEnd);
297 }
298
299 // Here First and Second are disjoint.
300 // Append the Union.
301 // ---[ Union ]--------------->
302 // -----------------[ Second ]--->
303 // ------[ First ]--------------->
304 // UnionEnd^
305 Result.emplace_back(UnionStart, First->To());
306
307 // Check if First is the last in its RangeSet.
308 if (++First == FirstEnd)
309 // ---[ Union ]--------------->
310 // --------------[ Second ]--->
311 // Append all remaining ranges from the Second's RangeSet.
312 return AppendTheRest(Second, SecondEnd);
313 }
314
315 llvm_unreachable("Normally, we should not reach here");
316}
317
319 ContainerType Result;
320 Result.push_back(From);
321 return makePersistent(std::move(Result));
322}
323
324RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
325 llvm::FoldingSetNodeID ID;
326 void *InsertPos;
327
328 From.Profile(ID);
329 ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
330
331 if (!Result) {
332 // It is cheaper to fully construct the resulting range on stack
333 // and move it to the freshly allocated buffer if we don't have
334 // a set like this already.
335 Result = construct(std::move(From));
336 Cache.InsertNode(Result, InsertPos);
337 }
338
339 return Result;
340}
341
342RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
343 void *Buffer = Arena.Allocate();
344 return new (Buffer) ContainerType(std::move(From));
345}
346
347const llvm::APSInt &RangeSet::getMinValue() const {
348 assert(!isEmpty());
349 return begin()->From();
350}
351
352const llvm::APSInt &RangeSet::getMaxValue() const {
353 assert(!isEmpty());
354 return std::prev(end())->To();
355}
356
358 assert(!isEmpty());
359 return begin()->From().isUnsigned();
360}
361
363 assert(!isEmpty());
364 return begin()->From().getBitWidth();
365}
366
368 assert(!isEmpty());
369 return APSIntType(begin()->From());
370}
371
372bool RangeSet::containsImpl(llvm::APSInt &Point) const {
373 if (isEmpty() || !pin(Point))
374 return false;
375
376 Range Dummy(Point);
377 const_iterator It = llvm::upper_bound(*this, Dummy);
378 if (It == begin())
379 return false;
380
381 return std::prev(It)->Includes(Point);
382}
383
384bool RangeSet::pin(llvm::APSInt &Point) const {
386 if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
387 return false;
388
389 Type.apply(Point);
390 return true;
391}
392
393bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
394 // This function has nine cases, the cartesian product of range-testing
395 // both the upper and lower bounds against the symbol's type.
396 // Each case requires a different pinning operation.
397 // The function returns false if the described range is entirely outside
398 // the range of values for the associated symbol.
400 APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
401 APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
402
403 switch (LowerTest) {
405 switch (UpperTest) {
407 // The entire range is outside the symbol's set of possible values.
408 // If this is a conventionally-ordered range, the state is infeasible.
409 if (Lower <= Upper)
410 return false;
411
412 // However, if the range wraps around, it spans all possible values.
413 Lower = Type.getMinValue();
414 Upper = Type.getMaxValue();
415 break;
417 // The range starts below what's possible but ends within it. Pin.
418 Lower = Type.getMinValue();
419 Type.apply(Upper);
420 break;
422 // The range spans all possible values for the symbol. Pin.
423 Lower = Type.getMinValue();
424 Upper = Type.getMaxValue();
425 break;
426 }
427 break;
429 switch (UpperTest) {
431 // The range wraps around, but all lower values are not possible.
432 Type.apply(Lower);
433 Upper = Type.getMaxValue();
434 break;
436 // The range may or may not wrap around, but both limits are valid.
437 Type.apply(Lower);
438 Type.apply(Upper);
439 break;
441 // The range starts within what's possible but ends above it. Pin.
442 Type.apply(Lower);
443 Upper = Type.getMaxValue();
444 break;
445 }
446 break;
448 switch (UpperTest) {
450 // The range wraps but is outside the symbol's set of possible values.
451 return false;
453 // The range starts above what's possible but ends within it (wrap).
454 Lower = Type.getMinValue();
455 Type.apply(Upper);
456 break;
458 // The entire range is outside the symbol's set of possible values.
459 // If this is a conventionally-ordered range, the state is infeasible.
460 if (Lower <= Upper)
461 return false;
462
463 // However, if the range wraps around, it spans all possible values.
464 Lower = Type.getMinValue();
465 Upper = Type.getMaxValue();
466 break;
467 }
468 break;
469 }
470
471 return true;
472}
473
475 llvm::APSInt Upper) {
476 if (What.isEmpty() || !What.pin(Lower, Upper))
477 return getEmptySet();
478
479 ContainerType DummyContainer;
480
481 if (Lower <= Upper) {
482 // [Lower, Upper] is a regular range.
483 //
484 // Shortcut: check that there is even a possibility of the intersection
485 // by checking the two following situations:
486 //
487 // <---[ What ]---[------]------>
488 // Lower Upper
489 // -or-
490 // <----[------]----[ What ]---->
491 // Lower Upper
492 if (What.getMaxValue() < Lower || Upper < What.getMinValue())
493 return getEmptySet();
494
495 DummyContainer.push_back(
496 Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
497 } else {
498 // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
499 //
500 // Shortcut: check that there is even a possibility of the intersection
501 // by checking the following situation:
502 //
503 // <------]---[ What ]---[------>
504 // Upper Lower
505 if (What.getMaxValue() < Lower && Upper < What.getMinValue())
506 return getEmptySet();
507
508 DummyContainer.push_back(
509 Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
510 DummyContainer.push_back(
511 Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
512 }
513
514 return intersect(*What.Impl, DummyContainer);
515}
516
517RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
518 const RangeSet::ContainerType &RHS) {
519 ContainerType Result;
520 Result.reserve(std::max(LHS.size(), RHS.size()));
521
522 const_iterator First = LHS.begin(), Second = RHS.begin(),
523 FirstEnd = LHS.end(), SecondEnd = RHS.end();
524
525 // If we ran out of ranges in one set, but not in the other,
526 // it means that those elements are definitely not in the
527 // intersection.
528 while (First != FirstEnd && Second != SecondEnd) {
529 // We want to keep the following invariant at all times:
530 //
531 // ----[ First ---------------------->
532 // --------[ Second ----------------->
533 if (Second->From() < First->From())
534 swapIterators(First, FirstEnd, Second, SecondEnd);
535
536 // Loop where the invariant holds:
537 do {
538 // Check for the following situation:
539 //
540 // ----[ First ]--------------------->
541 // ---------------[ Second ]--------->
542 //
543 // which means that...
544 if (Second->From() > First->To()) {
545 // ...First is not in the intersection.
546 //
547 // We should move on to the next range after First and break out of the
548 // loop because the invariant might not be true.
549 ++First;
550 break;
551 }
552
553 // We have a guaranteed intersection at this point!
554 // And this is the current situation:
555 //
556 // ----[ First ]----------------->
557 // -------[ Second ------------------>
558 //
559 // Additionally, it definitely starts with Second->From().
560 const llvm::APSInt &IntersectionStart = Second->From();
561
562 // It is important to know which of the two ranges' ends
563 // is greater. That "longer" range might have some other
564 // intersections, while the "shorter" range might not.
565 if (Second->To() > First->To()) {
566 // Here we make a decision to keep First as the "longer"
567 // range.
568 swapIterators(First, FirstEnd, Second, SecondEnd);
569 }
570
571 // At this point, we have the following situation:
572 //
573 // ---- First ]-------------------->
574 // ---- Second ]--[ Second+1 ---------->
575 //
576 // We don't know the relationship between First->From and
577 // Second->From and we don't know whether Second+1 intersects
578 // with First.
579 //
580 // However, we know that [IntersectionStart, Second->To] is
581 // a part of the intersection...
582 Result.push_back(Range(IntersectionStart, Second->To()));
583 ++Second;
584 // ...and that the invariant will hold for a valid Second+1
585 // because First->From <= Second->To < (Second+1)->From.
586 } while (Second != SecondEnd);
587 }
588
589 if (Result.empty())
590 return getEmptySet();
591
592 return makePersistent(std::move(Result));
593}
594
596 // Shortcut: let's see if the intersection is even possible.
597 if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
598 RHS.getMaxValue() < LHS.getMinValue())
599 return getEmptySet();
600
601 return intersect(*LHS.Impl, *RHS.Impl);
602}
603
605 if (LHS.containsImpl(Point))
606 return getRangeSet(ValueFactory.getValue(Point));
607
608 return getEmptySet();
609}
610
612 if (What.isEmpty())
613 return getEmptySet();
614
615 const llvm::APSInt SampleValue = What.getMinValue();
616 const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
617 const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
618
619 ContainerType Result;
620 Result.reserve(What.size() + (SampleValue == MIN));
621
622 // Handle a special case for MIN value.
623 const_iterator It = What.begin();
624 const_iterator End = What.end();
625
626 const llvm::APSInt &From = It->From();
627 const llvm::APSInt &To = It->To();
628
629 if (From == MIN) {
630 // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
631 if (To == MAX) {
632 return What;
633 }
634
635 const_iterator Last = std::prev(End);
636
637 // Try to find and unite the following ranges:
638 // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
639 if (Last->To() == MAX) {
640 // It means that in the original range we have ranges
641 // [MIN, A], ... , [B, MAX]
642 // And the result should be [MIN, -B], ..., [-A, MAX]
643 Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
644 // We already negated Last, so we can skip it.
645 End = Last;
646 } else {
647 // Add a separate range for the lowest value.
648 Result.emplace_back(MIN, MIN);
649 }
650
651 // Skip adding the second range in case when [From, To] are [MIN, MIN].
652 if (To != MIN) {
653 Result.emplace_back(ValueFactory.getValue(-To), MAX);
654 }
655
656 // Skip the first range in the loop.
657 ++It;
658 }
659
660 // Negate all other ranges.
661 for (; It != End; ++It) {
662 // Negate int values.
663 const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
664 const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
665
666 // Add a negated range.
667 Result.emplace_back(NewFrom, NewTo);
668 }
669
670 llvm::sort(Result);
671 return makePersistent(std::move(Result));
672}
673
674// Convert range set to the given integral type using truncation and promotion.
675// This works similar to APSIntType::apply function but for the range set.
677 // Set is empty or NOOP (aka cast to the same type).
678 if (What.isEmpty() || What.getAPSIntType() == Ty)
679 return What;
680
681 const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
682 const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
683 const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
684
685 if (IsTruncation)
686 return makePersistent(truncateTo(What, Ty));
687
688 // Here we handle 2 cases:
689 // - IsConversion && !IsPromotion.
690 // In this case we handle changing a sign with same bitwidth: char -> uchar,
691 // uint -> int. Here we convert negatives to positives and positives which
692 // is out of range to negatives. We use convertTo function for that.
693 // - IsConversion && IsPromotion && !What.isUnsigned().
694 // In this case we handle changing a sign from signeds to unsigneds with
695 // higher bitwidth: char -> uint, int-> uint64. The point is that we also
696 // need convert negatives to positives and use convertTo function as well.
697 // For example, we don't need such a convertion when converting unsigned to
698 // signed with higher bitwidth, because all the values of unsigned is valid
699 // for the such signed.
700 if (IsConversion && (!IsPromotion || !What.isUnsigned()))
701 return makePersistent(convertTo(What, Ty));
702
703 assert(IsPromotion && "Only promotion operation from unsigneds left.");
704 return makePersistent(promoteTo(What, Ty));
705}
706
708 assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
709 return castTo(What, ValueFactory.getAPSIntType(T));
710}
711
712RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
713 APSIntType Ty) {
714 using llvm::APInt;
715 using llvm::APSInt;
716 ContainerType Result;
717 ContainerType Dummy;
718 // CastRangeSize is an amount of all possible values of cast type.
719 // Example: `char` has 256 values; `short` has 65536 values.
720 // But in fact we use `amount of values` - 1, because
721 // we can't keep `amount of values of UINT64` inside uint64_t.
722 // E.g. 256 is an amount of all possible values of `char` and we can't keep
723 // it inside `char`.
724 // And it's OK, it's enough to do correct calculations.
725 uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
726 for (const Range &R : What) {
727 // Get bounds of the given range.
728 APSInt FromInt = R.From();
729 APSInt ToInt = R.To();
730 // CurrentRangeSize is an amount of all possible values of the current
731 // range minus one.
732 uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
733 // This is an optimization for a specific case when this Range covers
734 // the whole range of the target type.
735 Dummy.clear();
736 if (CurrentRangeSize >= CastRangeSize) {
737 Dummy.emplace_back(ValueFactory.getMinValue(Ty),
738 ValueFactory.getMaxValue(Ty));
739 Result = std::move(Dummy);
740 break;
741 }
742 // Cast the bounds.
743 Ty.apply(FromInt);
744 Ty.apply(ToInt);
745 const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
746 const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
747 if (FromInt > ToInt) {
748 Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
749 Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
750 } else
751 Dummy.emplace_back(PersistentFrom, PersistentTo);
752 // Every range retrieved after truncation potentialy has garbage values.
753 // So, we have to unite every next range with the previouses.
754 Result = unite(Result, Dummy);
755 }
756
757 return Result;
758}
759
760// Divide the convertion into two phases (presented as loops here).
761// First phase(loop) works when casted values go in ascending order.
762// E.g. char{1,3,5,127} -> uint{1,3,5,127}
763// Interrupt the first phase and go to second one when casted values start
764// go in descending order. That means that we crossed over the middle of
765// the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
766// For instance:
767// 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
768// Here we put {1,3,5} to one array and {-128, -1} to another
769// 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
770// Here we put {128,129,255} to one array and {0,1,3} to another.
771// After that we unite both arrays.
772// NOTE: We don't just concatenate the arrays, because they may have
773// adjacent ranges, e.g.:
774// 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
775// unite -> uchar(0, 255)
776// 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
777// unite -> uchar(-2, 1)
778RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
779 APSIntType Ty) {
780 using llvm::APInt;
781 using llvm::APSInt;
782 using Bounds = std::pair<const APSInt &, const APSInt &>;
783 ContainerType AscendArray;
784 ContainerType DescendArray;
785 auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
786 // Get bounds of the given range.
787 APSInt FromInt = R.From();
788 APSInt ToInt = R.To();
789 // Cast the bounds.
790 Ty.apply(FromInt);
791 Ty.apply(ToInt);
792 return {VF.getValue(FromInt), VF.getValue(ToInt)};
793 };
794 // Phase 1. Fill the first array.
795 APSInt LastConvertedInt = Ty.getMinValue();
796 const auto *It = What.begin();
797 const auto *E = What.end();
798 while (It != E) {
799 Bounds NewBounds = CastRange(*(It++));
800 // If values stop going acsending order, go to the second phase(loop).
801 if (NewBounds.first < LastConvertedInt) {
802 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
803 break;
804 }
805 // If the range contains a midpoint, then split the range.
806 // E.g. char(-5, 5) -> uchar(251, 5)
807 // Here we shall add a range (251, 255) to the first array and (0, 5) to the
808 // second one.
809 if (NewBounds.first > NewBounds.second) {
810 DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
811 AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
812 } else
813 // Values are going acsending order.
814 AscendArray.emplace_back(NewBounds.first, NewBounds.second);
815 LastConvertedInt = NewBounds.first;
816 }
817 // Phase 2. Fill the second array.
818 while (It != E) {
819 Bounds NewBounds = CastRange(*(It++));
820 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
821 }
822 // Unite both arrays.
823 return unite(AscendArray, DescendArray);
824}
825
826/// Promotion from unsigneds to signeds/unsigneds left.
827RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
828 APSIntType Ty) {
829 ContainerType Result;
830 // We definitely know the size of the result set.
831 Result.reserve(What.size());
832
833 // Each unsigned value fits every larger type without any changes,
834 // whether the larger type is signed or unsigned. So just promote and push
835 // back each range one by one.
836 for (const Range &R : What) {
837 // Get bounds of the given range.
838 llvm::APSInt FromInt = R.From();
839 llvm::APSInt ToInt = R.To();
840 // Cast the bounds.
841 Ty.apply(FromInt);
842 Ty.apply(ToInt);
843 Result.emplace_back(ValueFactory.getValue(FromInt),
844 ValueFactory.getValue(ToInt));
845 }
846 return Result;
847}
848
850 const llvm::APSInt &Point) {
851 if (!From.contains(Point))
852 return From;
853
854 llvm::APSInt Upper = Point;
855 llvm::APSInt Lower = Point;
856
857 ++Upper;
858 --Lower;
859
860 // Notice that the lower bound is greater than the upper bound.
861 return intersect(From, Upper, Lower);
862}
863
864LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
865 OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
866}
867LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
868
869LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
870 OS << "{ ";
871 llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
872 OS << " }";
873}
874LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
875
877
878namespace {
879class EquivalenceClass;
880} // end anonymous namespace
881
882REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
883REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
884REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
885
886REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
887REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
888
889namespace {
890/// This class encapsulates a set of symbols equal to each other.
891///
892/// The main idea of the approach requiring such classes is in narrowing
893/// and sharing constraints between symbols within the class. Also we can
894/// conclude that there is no practical need in storing constraints for
895/// every member of the class separately.
896///
897/// Main terminology:
898///
899/// * "Equivalence class" is an object of this class, which can be efficiently
900/// compared to other classes. It represents the whole class without
901/// storing the actual in it. The members of the class however can be
902/// retrieved from the state.
903///
904/// * "Class members" are the symbols corresponding to the class. This means
905/// that A == B for every member symbols A and B from the class. Members of
906/// each class are stored in the state.
907///
908/// * "Trivial class" is a class that has and ever had only one same symbol.
909///
910/// * "Merge operation" merges two classes into one. It is the main operation
911/// to produce non-trivial classes.
912/// If, at some point, we can assume that two symbols from two distinct
913/// classes are equal, we can merge these classes.
914class EquivalenceClass : public llvm::FoldingSetNode {
915public:
916 /// Find equivalence class for the given symbol in the given state.
917 [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
918 SymbolRef Sym);
919
920 /// Merge classes for the given symbols and return a new state.
921 [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
922 ProgramStateRef State,
924 SymbolRef Second);
925 // Merge this class with the given class and return a new state.
926 [[nodiscard]] inline ProgramStateRef
927 merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
928
929 /// Return a set of class members for the given state.
930 [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
931
932 /// Return true if the current class is trivial in the given state.
933 /// A class is trivial if and only if there is not any member relations stored
934 /// to it in State/ClassMembers.
935 /// An equivalence class with one member might seem as it does not hold any
936 /// meaningful information, i.e. that is a tautology. However, during the
937 /// removal of dead symbols we do not remove classes with one member for
938 /// resource and performance reasons. Consequently, a class with one member is
939 /// not necessarily trivial. It could happen that we have a class with two
940 /// members and then during the removal of dead symbols we remove one of its
941 /// members. In this case, the class is still non-trivial (it still has the
942 /// mappings in ClassMembers), even though it has only one member.
943 [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
944
945 /// Return true if the current class is trivial and its only member is dead.
946 [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
947 SymbolReaper &Reaper) const;
948
949 [[nodiscard]] static inline ProgramStateRef
950 markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
951 SymbolRef Second);
952 [[nodiscard]] static inline ProgramStateRef
953 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
954 EquivalenceClass First, EquivalenceClass Second);
955 [[nodiscard]] inline ProgramStateRef
956 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
957 EquivalenceClass Other) const;
958 [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
959 SymbolRef Sym);
960 [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
961 [[nodiscard]] inline ClassSet
962 getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
963
964 [[nodiscard]] static inline std::optional<bool>
965 areEqual(ProgramStateRef State, EquivalenceClass First,
966 EquivalenceClass Second);
967 [[nodiscard]] static inline std::optional<bool>
968 areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
969
970 /// Remove one member from the class.
971 [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
972 const SymbolRef Old);
973
974 /// Iterate over all symbols and try to simplify them.
975 [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
977 ProgramStateRef State,
978 EquivalenceClass Class);
979
980 void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
981 LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
982 dumpToStream(State, llvm::errs());
983 }
984
985 /// Check equivalence data for consistency.
986 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
987 isClassDataConsistent(ProgramStateRef State);
988
989 [[nodiscard]] QualType getType() const {
990 return getRepresentativeSymbol()->getType();
991 }
992
993 EquivalenceClass() = delete;
994 EquivalenceClass(const EquivalenceClass &) = default;
995 EquivalenceClass &operator=(const EquivalenceClass &) = delete;
996 EquivalenceClass(EquivalenceClass &&) = default;
997 EquivalenceClass &operator=(EquivalenceClass &&) = delete;
998
999 bool operator==(const EquivalenceClass &Other) const {
1000 return ID == Other.ID;
1001 }
1002 bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
1003 bool operator!=(const EquivalenceClass &Other) const {
1004 return !operator==(Other);
1005 }
1006
1007 static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
1008 ID.AddInteger(CID);
1009 }
1010
1011 void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
1012
1013private:
1014 /* implicit */ EquivalenceClass(SymbolRef Sym)
1015 : ID(reinterpret_cast<uintptr_t>(Sym)) {}
1016
1017 /// This function is intended to be used ONLY within the class.
1018 /// The fact that ID is a pointer to a symbol is an implementation detail
1019 /// and should stay that way.
1020 /// In the current implementation, we use it to retrieve the only member
1021 /// of the trivial class.
1022 SymbolRef getRepresentativeSymbol() const {
1023 return reinterpret_cast<SymbolRef>(ID);
1024 }
1025 static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
1026
1027 inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
1028 SymbolSet Members, EquivalenceClass Other,
1029 SymbolSet OtherMembers);
1030
1031 static inline bool
1032 addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
1034 EquivalenceClass First, EquivalenceClass Second);
1035
1036 /// This is a unique identifier of the class.
1037 uintptr_t ID;
1038};
1039
1040//===----------------------------------------------------------------------===//
1041// Constraint functions
1042//===----------------------------------------------------------------------===//
1043
1044[[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
1045areFeasible(ConstraintRangeTy Constraints) {
1046 return llvm::none_of(
1047 Constraints,
1048 [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
1049 return ClassConstraint.second.isEmpty();
1050 });
1051}
1052
1053[[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1054 EquivalenceClass Class) {
1055 return State->get<ConstraintRange>(Class);
1056}
1057
1058[[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1059 SymbolRef Sym) {
1060 return getConstraint(State, EquivalenceClass::find(State, Sym));
1061}
1062
1063[[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
1064 EquivalenceClass Class,
1065 RangeSet Constraint) {
1066 return State->set<ConstraintRange>(Class, Constraint);
1067}
1068
1069[[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
1070 ConstraintRangeTy Constraints) {
1071 return State->set<ConstraintRange>(Constraints);
1072}
1073
1074//===----------------------------------------------------------------------===//
1075// Equality/diseqiality abstraction
1076//===----------------------------------------------------------------------===//
1077
1078/// A small helper function for detecting symbolic (dis)equality.
1079///
1080/// Equality check can have different forms (like a == b or a - b) and this
1081/// class encapsulates those away if the only thing the user wants to check -
1082/// whether it's equality/diseqiality or not.
1083///
1084/// \returns true if assuming this Sym to be true means equality of operands
1085/// false if it means disequality of operands
1086/// std::nullopt otherwise
1087std::optional<bool> meansEquality(const SymSymExpr *Sym) {
1088 switch (Sym->getOpcode()) {
1089 case BO_Sub:
1090 // This case is: A - B != 0 -> disequality check.
1091 return false;
1092 case BO_EQ:
1093 // This case is: A == B != 0 -> equality check.
1094 return true;
1095 case BO_NE:
1096 // This case is: A != B != 0 -> diseqiality check.
1097 return false;
1098 default:
1099 return std::nullopt;
1100 }
1101}
1102
1103//===----------------------------------------------------------------------===//
1104// Intersection functions
1105//===----------------------------------------------------------------------===//
1106
1107template <class SecondTy, class... RestTy>
1108[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1109 SecondTy Second, RestTy... Tail);
1110
1111template <class... RangeTy> struct IntersectionTraits;
1112
1113template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
1114 // Found RangeSet, no need to check any further
1115 using Type = RangeSet;
1116};
1117
1118template <> struct IntersectionTraits<> {
1119 // We ran out of types, and we didn't find any RangeSet, so the result should
1120 // be optional.
1121 using Type = std::optional<RangeSet>;
1122};
1123
1124template <class OptionalOrPointer, class... TailTy>
1125struct IntersectionTraits<OptionalOrPointer, TailTy...> {
1126 // If current type is Optional or a raw pointer, we should keep looking.
1127 using Type = typename IntersectionTraits<TailTy...>::Type;
1128};
1129
1130template <class EndTy>
1131[[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
1132 // If the list contains only RangeSet or std::optional<RangeSet>, simply
1133 // return that range set.
1134 return End;
1135}
1136
1137[[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet>
1138intersect(RangeSet::Factory &F, const RangeSet *End) {
1139 // This is an extraneous conversion from a raw pointer into
1140 // std::optional<RangeSet>
1141 if (End) {
1142 return *End;
1143 }
1144 return std::nullopt;
1145}
1146
1147template <class... RestTy>
1148[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1149 RangeSet Second, RestTy... Tail) {
1150 // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1151 // of the function and can be sure that the result is RangeSet.
1152 return intersect(F, F.intersect(Head, Second), Tail...);
1153}
1154
1155template <class SecondTy, class... RestTy>
1156[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1157 SecondTy Second, RestTy... Tail) {
1158 if (Second) {
1159 // Here we call the <RangeSet,RangeSet,...> version of the function...
1160 return intersect(F, Head, *Second, Tail...);
1161 }
1162 // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1163 // means that the result is definitely RangeSet.
1164 return intersect(F, Head, Tail...);
1165}
1166
1167/// Main generic intersect function.
1168/// It intersects all of the given range sets. If some of the given arguments
1169/// don't hold a range set (nullptr or std::nullopt), the function will skip
1170/// them.
1171///
1172/// Available representations for the arguments are:
1173/// * RangeSet
1174/// * std::optional<RangeSet>
1175/// * RangeSet *
1176/// Pointer to a RangeSet is automatically assumed to be nullable and will get
1177/// checked as well as the optional version. If this behaviour is undesired,
1178/// please dereference the pointer in the call.
1179///
1180/// Return type depends on the arguments' types. If we can be sure in compile
1181/// time that there will be a range set as a result, the returning type is
1182/// simply RangeSet, in other cases we have to back off to
1183/// std::optional<RangeSet>.
1184///
1185/// Please, prefer optional range sets to raw pointers. If the last argument is
1186/// a raw pointer and all previous arguments are std::nullopt, it will cost one
1187/// additional check to convert RangeSet * into std::optional<RangeSet>.
1188template <class HeadTy, class SecondTy, class... RestTy>
1189[[nodiscard]] inline
1190 typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
1191 intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
1192 RestTy... Tail) {
1193 if (Head) {
1194 return intersect(F, *Head, Second, Tail...);
1195 }
1196 return intersect(F, Second, Tail...);
1197}
1198
1199//===----------------------------------------------------------------------===//
1200// Symbolic reasoning logic
1201//===----------------------------------------------------------------------===//
1202
1203/// A little component aggregating all of the reasoning we have about
1204/// the ranges of symbolic expressions.
1205///
1206/// Even when we don't know the exact values of the operands, we still
1207/// can get a pretty good estimate of the result's range.
1208class SymbolicRangeInferrer
1209 : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
1210public:
1211 template <class SourceType>
1212 static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
1213 SourceType Origin) {
1214 SymbolicRangeInferrer Inferrer(F, State);
1215 return Inferrer.infer(Origin);
1216 }
1217
1218 RangeSet VisitSymExpr(SymbolRef Sym) {
1219 if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym))
1220 return *RS;
1221 // If we've reached this line, the actual type of the symbolic
1222 // expression is not supported for advanced inference.
1223 // In this case, we simply backoff to the default "let's simply
1224 // infer the range from the expression's type".
1225 return infer(Sym->getType());
1226 }
1227
1228 RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
1229 if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
1230 return *RS;
1231 return infer(USE->getType());
1232 }
1233
1234 RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
1235 return VisitBinaryOperator(Sym);
1236 }
1237
1238 RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
1239 return VisitBinaryOperator(Sym);
1240 }
1241
1242 RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
1243 return intersect(
1244 RangeFactory,
1245 // If Sym is a difference of symbols A - B, then maybe we have range
1246 // set stored for B - A.
1247 //
1248 // If we have range set stored for both A - B and B - A then
1249 // calculate the effective range set by intersecting the range set
1250 // for A - B and the negated range set of B - A.
1251 getRangeForNegatedSymSym(SSE),
1252 // If Sym is a comparison expression (except <=>),
1253 // find any other comparisons with the same operands.
1254 // See function description.
1255 getRangeForComparisonSymbol(SSE),
1256 // If Sym is (dis)equality, we might have some information
1257 // on that in our equality classes data structure.
1258 getRangeForEqualities(SSE),
1259 // And we should always check what we can get from the operands.
1260 VisitBinaryOperator(SSE));
1261 }
1262
1263private:
1264 SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
1265 : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
1266
1267 /// Infer range information from the given integer constant.
1268 ///
1269 /// It's not a real "inference", but is here for operating with
1270 /// sub-expressions in a more polymorphic manner.
1271 RangeSet inferAs(const llvm::APSInt &Val, QualType) {
1272 return {RangeFactory, Val};
1273 }
1274
1275 /// Infer range information from symbol in the context of the given type.
1276 RangeSet inferAs(SymbolRef Sym, QualType DestType) {
1277 QualType ActualType = Sym->getType();
1278 // Check that we can reason about the symbol at all.
1279 if (ActualType->isIntegralOrEnumerationType() ||
1280 Loc::isLocType(ActualType)) {
1281 return infer(Sym);
1282 }
1283 // Otherwise, let's simply infer from the destination type.
1284 // We couldn't figure out nothing else about that expression.
1285 return infer(DestType);
1286 }
1287
1288 RangeSet infer(SymbolRef Sym) {
1289 return intersect(RangeFactory,
1290 // Of course, we should take the constraint directly
1291 // associated with this symbol into consideration.
1292 getConstraint(State, Sym),
1293 // Apart from the Sym itself, we can infer quite a lot if
1294 // we look into subexpressions of Sym.
1295 Visit(Sym));
1296 }
1297
1298 RangeSet infer(EquivalenceClass Class) {
1299 if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
1300 return *AssociatedConstraint;
1301
1302 return infer(Class.getType());
1303 }
1304
1305 /// Infer range information solely from the type.
1306 RangeSet infer(QualType T) {
1307 // Lazily generate a new RangeSet representing all possible values for the
1308 // given symbol type.
1309 RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
1310 ValueFactory.getMaxValue(T));
1311
1312 // References are known to be non-zero.
1313 if (T->isReferenceType())
1314 return assumeNonZero(Result, T);
1315
1316 return Result;
1317 }
1318
1319 template <class BinarySymExprTy>
1320 RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
1321 // TODO #1: VisitBinaryOperator implementation might not make a good
1322 // use of the inferred ranges. In this case, we might be calculating
1323 // everything for nothing. This being said, we should introduce some
1324 // sort of laziness mechanism here.
1325 //
1326 // TODO #2: We didn't go into the nested expressions before, so it
1327 // might cause us spending much more time doing the inference.
1328 // This can be a problem for deeply nested expressions that are
1329 // involved in conditions and get tested continuously. We definitely
1330 // need to address this issue and introduce some sort of caching
1331 // in here.
1332 QualType ResultType = Sym->getType();
1333 return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
1334 Sym->getOpcode(),
1335 inferAs(Sym->getRHS(), ResultType), ResultType);
1336 }
1337
1338 RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
1339 RangeSet RHS, QualType T);
1340
1341 //===----------------------------------------------------------------------===//
1342 // Ranges and operators
1343 //===----------------------------------------------------------------------===//
1344
1345 /// Return a rough approximation of the given range set.
1346 ///
1347 /// For the range set:
1348 /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1349 /// it will return the range [x_0, y_N].
1350 static Range fillGaps(RangeSet Origin) {
1351 assert(!Origin.isEmpty());
1352 return {Origin.getMinValue(), Origin.getMaxValue()};
1353 }
1354
1355 /// Try to convert given range into the given type.
1356 ///
1357 /// It will return std::nullopt only when the trivial conversion is possible.
1358 std::optional<Range> convert(const Range &Origin, APSIntType To) {
1359 if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
1360 To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
1361 return std::nullopt;
1362 }
1363 return Range(ValueFactory.Convert(To, Origin.From()),
1364 ValueFactory.Convert(To, Origin.To()));
1365 }
1366
1367 template <BinaryOperator::Opcode Op>
1368 RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
1369 assert(!LHS.isEmpty() && !RHS.isEmpty());
1370
1371 Range CoarseLHS = fillGaps(LHS);
1372 Range CoarseRHS = fillGaps(RHS);
1373
1374 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1375
1376 // We need to convert ranges to the resulting type, so we can compare values
1377 // and combine them in a meaningful (in terms of the given operation) way.
1378 auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
1379 auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
1380
1381 // It is hard to reason about ranges when conversion changes
1382 // borders of the ranges.
1383 if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
1384 return infer(T);
1385 }
1386
1387 return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
1388 }
1389
1390 template <BinaryOperator::Opcode Op>
1391 RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
1392 return infer(T);
1393 }
1394
1395 /// Return a symmetrical range for the given range and type.
1396 ///
1397 /// If T is signed, return the smallest range [-x..x] that covers the original
1398 /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1399 /// exist due to original range covering min(T)).
1400 ///
1401 /// If T is unsigned, return the smallest range [0..x] that covers the
1402 /// original range.
1403 Range getSymmetricalRange(Range Origin, QualType T) {
1404 APSIntType RangeType = ValueFactory.getAPSIntType(T);
1405
1406 if (RangeType.isUnsigned()) {
1407 return Range(ValueFactory.getMinValue(RangeType), Origin.To());
1408 }
1409
1410 if (Origin.From().isMinSignedValue()) {
1411 // If mini is a minimal signed value, absolute value of it is greater
1412 // than the maximal signed value. In order to avoid these
1413 // complications, we simply return the whole range.
1414 return {ValueFactory.getMinValue(RangeType),
1415 ValueFactory.getMaxValue(RangeType)};
1416 }
1417
1418 // At this point, we are sure that the type is signed and we can safely
1419 // use unary - operator.
1420 //
1421 // While calculating absolute maximum, we can use the following formula
1422 // because of these reasons:
1423 // * If From >= 0 then To >= From and To >= -From.
1424 // AbsMax == To == max(To, -From)
1425 // * If To <= 0 then -From >= -To and -From >= From.
1426 // AbsMax == -From == max(-From, To)
1427 // * Otherwise, From <= 0, To >= 0, and
1428 // AbsMax == max(abs(From), abs(To))
1429 llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
1430
1431 // Intersection is guaranteed to be non-empty.
1432 return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
1433 }
1434
1435 /// Return a range set subtracting zero from \p Domain.
1436 RangeSet assumeNonZero(RangeSet Domain, QualType T) {
1437 APSIntType IntType = ValueFactory.getAPSIntType(T);
1438 return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
1439 }
1440
1441 template <typename ProduceNegatedSymFunc>
1442 std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
1443 QualType T) {
1444 // Do not negate if the type cannot be meaningfully negated.
1447 return std::nullopt;
1448
1449 if (SymbolRef NegatedSym = F())
1450 if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
1451 return RangeFactory.negate(*NegatedRange);
1452
1453 return std::nullopt;
1454 }
1455
1456 std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
1457 // Just get the operand when we negate a symbol that is already negated.
1458 // -(-a) == a
1459 return getRangeForNegatedExpr(
1460 [USE]() -> SymbolRef {
1461 if (USE->getOpcode() == UO_Minus)
1462 return USE->getOperand();
1463 return nullptr;
1464 },
1465 USE->getType());
1466 }
1467
1468 std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
1469 return getRangeForNegatedExpr(
1470 [SSE, State = this->State]() -> SymbolRef {
1471 if (SSE->getOpcode() == BO_Sub)
1472 return State->getSymbolManager().getSymSymExpr(
1473 SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
1474 return nullptr;
1475 },
1476 SSE->getType());
1477 }
1478
1479 std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
1480 return getRangeForNegatedExpr(
1481 [Sym, State = this->State]() {
1482 return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
1483 Sym->getType());
1484 },
1485 Sym->getType());
1486 }
1487
1488 // Returns ranges only for binary comparison operators (except <=>)
1489 // when left and right operands are symbolic values.
1490 // Finds any other comparisons with the same operands.
1491 // Then do logical calculations and refuse impossible branches.
1492 // E.g. (x < y) and (x > y) at the same time are impossible.
1493 // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1494 // E.g. (x == y) and (y == x) are just reversed but the same.
1495 // It covers all possible combinations (see CmpOpTable description).
1496 // Note that `x` and `y` can also stand for subexpressions,
1497 // not only for actual symbols.
1498 std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
1499 const BinaryOperatorKind CurrentOP = SSE->getOpcode();
1500
1501 // We currently do not support <=> (C++20).
1502 if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
1503 return std::nullopt;
1504
1505 static const OperatorRelationsTable CmpOpTable{};
1506
1507 const SymExpr *LHS = SSE->getLHS();
1508 const SymExpr *RHS = SSE->getRHS();
1509 QualType T = SSE->getType();
1510
1511 SymbolManager &SymMgr = State->getSymbolManager();
1512
1513 // We use this variable to store the last queried operator (`QueriedOP`)
1514 // for which the `getCmpOpState` returned with `Unknown`. If there are two
1515 // different OPs that returned `Unknown` then we have to query the special
1516 // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1517 // never returns `Unknown`, so `CurrentOP` is a good initial value.
1518 BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
1519
1520 // Loop goes through all of the columns exept the last one ('UnknownX2').
1521 // We treat `UnknownX2` column separately at the end of the loop body.
1522 for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
1523
1524 // Let's find an expression e.g. (x < y).
1526 const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
1527 const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
1528
1529 // If ranges were not previously found,
1530 // try to find a reversed expression (y > x).
1531 if (!QueriedRangeSet) {
1532 const BinaryOperatorKind ROP =
1534 SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
1535 QueriedRangeSet = getConstraint(State, SymSym);
1536 }
1537
1538 if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
1539 continue;
1540
1541 const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
1542 const bool isInFalseBranch =
1543 ConcreteValue ? (*ConcreteValue == 0) : false;
1544
1545 // If it is a false branch, we shall be guided by opposite operator,
1546 // because the table is made assuming we are in the true branch.
1547 // E.g. when (x <= y) is false, then (x > y) is true.
1548 if (isInFalseBranch)
1549 QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
1550
1552 CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
1553
1554 if (BranchState == OperatorRelationsTable::Unknown) {
1555 if (LastQueriedOpToUnknown != CurrentOP &&
1556 LastQueriedOpToUnknown != QueriedOP) {
1557 // If we got the Unknown state for both different operators.
1558 // if (x <= y) // assume true
1559 // if (x != y) // assume true
1560 // if (x < y) // would be also true
1561 // Get a state from `UnknownX2` column.
1562 BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
1563 } else {
1564 LastQueriedOpToUnknown = QueriedOP;
1565 continue;
1566 }
1567 }
1568
1569 return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
1570 : getFalseRange(T);
1571 }
1572
1573 return std::nullopt;
1574 }
1575
1576 std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
1577 std::optional<bool> Equality = meansEquality(Sym);
1578
1579 if (!Equality)
1580 return std::nullopt;
1581
1582 if (std::optional<bool> AreEqual =
1583 EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
1584 // Here we cover two cases at once:
1585 // * if Sym is equality and its operands are known to be equal -> true
1586 // * if Sym is disequality and its operands are disequal -> true
1587 if (*AreEqual == *Equality) {
1588 return getTrueRange(Sym->getType());
1589 }
1590 // Opposite combinations result in false.
1591 return getFalseRange(Sym->getType());
1592 }
1593
1594 return std::nullopt;
1595 }
1596
1597 RangeSet getTrueRange(QualType T) {
1598 RangeSet TypeRange = infer(T);
1599 return assumeNonZero(TypeRange, T);
1600 }
1601
1602 RangeSet getFalseRange(QualType T) {
1603 const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
1604 return RangeSet(RangeFactory, Zero);
1605 }
1606
1607 BasicValueFactory &ValueFactory;
1608 RangeSet::Factory &RangeFactory;
1609 ProgramStateRef State;
1610};
1611
1612//===----------------------------------------------------------------------===//
1613// Range-based reasoning about symbolic operations
1614//===----------------------------------------------------------------------===//
1615
1616template <>
1617RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS,
1618 RangeSet RHS,
1619 QualType T) {
1620 assert(!LHS.isEmpty() && !RHS.isEmpty());
1621
1622 if (LHS.getAPSIntType() == RHS.getAPSIntType()) {
1623 if (intersect(RangeFactory, LHS, RHS).isEmpty())
1624 return getTrueRange(T);
1625
1626 } else {
1627 // We can only lose information if we are casting smaller signed type to
1628 // bigger unsigned type. For e.g.,
1629 // LHS (unsigned short): [2, USHRT_MAX]
1630 // RHS (signed short): [SHRT_MIN, 0]
1631 //
1632 // Casting RHS to LHS type will leave us with overlapping values
1633 // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
1634 //
1635 // We can avoid this by checking if signed type's maximum value is lesser
1636 // than unsigned type's minimum value.
1637
1638 // If both have different signs then only we can get more information.
1639 if (LHS.isUnsigned() != RHS.isUnsigned()) {
1640 if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) {
1641 if (RHS.getMaxValue().isNegative() ||
1642 LHS.getAPSIntType().convert(RHS.getMaxValue()) < LHS.getMinValue())
1643 return getTrueRange(T);
1644
1645 } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) {
1646 if (LHS.getMaxValue().isNegative() ||
1647 RHS.getAPSIntType().convert(LHS.getMaxValue()) < RHS.getMinValue())
1648 return getTrueRange(T);
1649 }
1650 }
1651
1652 // Both RangeSets should be casted to bigger unsigned type.
1653 APSIntType CastingType(std::max(LHS.getBitWidth(), RHS.getBitWidth()),
1654 LHS.isUnsigned() || RHS.isUnsigned());
1655
1656 RangeSet CastedLHS = RangeFactory.castTo(LHS, CastingType);
1657 RangeSet CastedRHS = RangeFactory.castTo(RHS, CastingType);
1658
1659 if (intersect(RangeFactory, CastedLHS, CastedRHS).isEmpty())
1660 return getTrueRange(T);
1661 }
1662
1663 // In all other cases, the resulting range cannot be deduced.
1664 return infer(T);
1665}
1666
1667template <>
1668RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
1669 QualType T) {
1670 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1671 llvm::APSInt Zero = ResultType.getZeroValue();
1672
1673 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1674 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1675
1676 bool IsLHSNegative = LHS.To() < Zero;
1677 bool IsRHSNegative = RHS.To() < Zero;
1678
1679 // Check if both ranges have the same sign.
1680 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1681 (IsLHSNegative && IsRHSNegative)) {
1682 // The result is definitely greater or equal than any of the operands.
1683 const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
1684
1685 // We estimate maximal value for positives as the maximal value for the
1686 // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
1687 //
1688 // TODO: We basically, limit the resulting range from below, but don't do
1689 // anything with the upper bound.
1690 //
1691 // For positive operands, it can be done as follows: for the upper
1692 // bound of LHS and RHS we calculate the most significant bit set.
1693 // Let's call it the N-th bit. Then we can estimate the maximal
1694 // number to be 2^(N+1)-1, i.e. the number with all the bits up to
1695 // the N-th bit set.
1696 const llvm::APSInt &Max = IsLHSNegative
1697 ? ValueFactory.getValue(--Zero)
1698 : ValueFactory.getMaxValue(ResultType);
1699
1700 return {RangeFactory, ValueFactory.getValue(Min), Max};
1701 }
1702
1703 // Otherwise, let's check if at least one of the operands is negative.
1704 if (IsLHSNegative || IsRHSNegative) {
1705 // This means that the result is definitely negative as well.
1706 return {RangeFactory, ValueFactory.getMinValue(ResultType),
1707 ValueFactory.getValue(--Zero)};
1708 }
1709
1710 RangeSet DefaultRange = infer(T);
1711
1712 // It is pretty hard to reason about operands with different signs
1713 // (and especially with possibly different signs). We simply check if it
1714 // can be zero. In order to conclude that the result could not be zero,
1715 // at least one of the operands should be definitely not zero itself.
1716 if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
1717 return assumeNonZero(DefaultRange, T);
1718 }
1719
1720 // Nothing much else to do here.
1721 return DefaultRange;
1722}
1723
1724template <>
1725RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
1726 Range RHS,
1727 QualType T) {
1728 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1729 llvm::APSInt Zero = ResultType.getZeroValue();
1730
1731 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1732 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1733
1734 bool IsLHSNegative = LHS.To() < Zero;
1735 bool IsRHSNegative = RHS.To() < Zero;
1736
1737 // Check if both ranges have the same sign.
1738 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1739 (IsLHSNegative && IsRHSNegative)) {
1740 // The result is definitely less or equal than any of the operands.
1741 const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
1742
1743 // We conservatively estimate lower bound to be the smallest positive
1744 // or negative value corresponding to the sign of the operands.
1745 const llvm::APSInt &Min = IsLHSNegative
1746 ? ValueFactory.getMinValue(ResultType)
1747 : ValueFactory.getValue(Zero);
1748
1749 return {RangeFactory, Min, Max};
1750 }
1751
1752 // Otherwise, let's check if at least one of the operands is positive.
1753 if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
1754 // This makes result definitely positive.
1755 //
1756 // We can also reason about a maximal value by finding the maximal
1757 // value of the positive operand.
1758 const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
1759
1760 // The minimal value on the other hand is much harder to reason about.
1761 // The only thing we know for sure is that the result is positive.
1762 return {RangeFactory, ValueFactory.getValue(Zero),
1763 ValueFactory.getValue(Max)};
1764 }
1765
1766 // Nothing much else to do here.
1767 return infer(T);
1768}
1769
1770template <>
1771RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
1772 Range RHS,
1773 QualType T) {
1774 llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
1775
1776 Range ConservativeRange = getSymmetricalRange(RHS, T);
1777
1778 llvm::APSInt Max = ConservativeRange.To();
1779 llvm::APSInt Min = ConservativeRange.From();
1780
1781 if (Max == Zero) {
1782 // It's an undefined behaviour to divide by 0 and it seems like we know
1783 // for sure that RHS is 0. Let's say that the resulting range is
1784 // simply infeasible for that matter.
1785 return RangeFactory.getEmptySet();
1786 }
1787
1788 // At this point, our conservative range is closed. The result, however,
1789 // couldn't be greater than the RHS' maximal absolute value. Because of
1790 // this reason, we turn the range into open (or half-open in case of
1791 // unsigned integers).
1792 //
1793 // While we operate on integer values, an open interval (a, b) can be easily
1794 // represented by the closed interval [a + 1, b - 1]. And this is exactly
1795 // what we do next.
1796 //
1797 // If we are dealing with unsigned case, we shouldn't move the lower bound.
1798 if (Min.isSigned()) {
1799 ++Min;
1800 }
1801 --Max;
1802
1803 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1804 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1805
1806 // Remainder operator results with negative operands is implementation
1807 // defined. Positive cases are much easier to reason about though.
1808 if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
1809 // If maximal value of LHS is less than maximal value of RHS,
1810 // the result won't get greater than LHS.To().
1811 Max = std::min(LHS.To(), Max);
1812 // We want to check if it is a situation similar to the following:
1813 //
1814 // <------------|---[ LHS ]--------[ RHS ]----->
1815 // -INF 0 +INF
1816 //
1817 // In this situation, we can conclude that (LHS / RHS) == 0 and
1818 // (LHS % RHS) == LHS.
1819 Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
1820 }
1821
1822 // Nevertheless, the symmetrical range for RHS is a conservative estimate
1823 // for any sign of either LHS, or RHS.
1824 return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
1825}
1826
1827RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
1829 RangeSet RHS, QualType T) {
1830 // We should propagate information about unfeasbility of one of the
1831 // operands to the resulting range.
1832 if (LHS.isEmpty() || RHS.isEmpty()) {
1833 return RangeFactory.getEmptySet();
1834 }
1835
1836 switch (Op) {
1837 case BO_NE:
1838 return VisitBinaryOperator<BO_NE>(LHS, RHS, T);
1839 case BO_Or:
1840 return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
1841 case BO_And:
1842 return VisitBinaryOperator<BO_And>(LHS, RHS, T);
1843 case BO_Rem:
1844 return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
1845 default:
1846 return infer(T);
1847 }
1848}
1849
1850//===----------------------------------------------------------------------===//
1851// Constraint manager implementation details
1852//===----------------------------------------------------------------------===//
1853
1854class RangeConstraintManager : public RangedConstraintManager {
1855public:
1856 RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
1857 : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
1858
1859 //===------------------------------------------------------------------===//
1860 // Implementation for interface from ConstraintManager.
1861 //===------------------------------------------------------------------===//
1862
1863 bool haveEqualConstraints(ProgramStateRef S1,
1864 ProgramStateRef S2) const override {
1865 // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1866 // so comparing constraint ranges and class maps should be
1867 // sufficient.
1868 return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
1869 S1->get<ClassMap>() == S2->get<ClassMap>();
1870 }
1871
1872 bool canReasonAbout(SVal X) const override;
1873
1874 ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
1875
1876 const llvm::APSInt *getSymVal(ProgramStateRef State,
1877 SymbolRef Sym) const override;
1878
1879 ProgramStateRef removeDeadBindings(ProgramStateRef State,
1880 SymbolReaper &SymReaper) override;
1881
1882 void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
1883 unsigned int Space = 0, bool IsDot = false) const override;
1884 void printValue(raw_ostream &Out, ProgramStateRef State,
1885 SymbolRef Sym) override;
1886 void printConstraints(raw_ostream &Out, ProgramStateRef State,
1887 const char *NL = "\n", unsigned int Space = 0,
1888 bool IsDot = false) const;
1889 void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
1890 const char *NL = "\n", unsigned int Space = 0,
1891 bool IsDot = false) const;
1892 void printDisequalities(raw_ostream &Out, ProgramStateRef State,
1893 const char *NL = "\n", unsigned int Space = 0,
1894 bool IsDot = false) const;
1895
1896 //===------------------------------------------------------------------===//
1897 // Implementation for interface from RangedConstraintManager.
1898 //===------------------------------------------------------------------===//
1899
1900 ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
1901 const llvm::APSInt &V,
1902 const llvm::APSInt &Adjustment) override;
1903
1904 ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
1905 const llvm::APSInt &V,
1906 const llvm::APSInt &Adjustment) override;
1907
1908 ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
1909 const llvm::APSInt &V,
1910 const llvm::APSInt &Adjustment) override;
1911
1912 ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
1913 const llvm::APSInt &V,
1914 const llvm::APSInt &Adjustment) override;
1915
1916 ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
1917 const llvm::APSInt &V,
1918 const llvm::APSInt &Adjustment) override;
1919
1920 ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
1921 const llvm::APSInt &V,
1922 const llvm::APSInt &Adjustment) override;
1923
1924 ProgramStateRef assumeSymWithinInclusiveRange(
1925 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1926 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1927
1928 ProgramStateRef assumeSymOutsideInclusiveRange(
1929 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1930 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1931
1932private:
1934
1936 RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
1937 ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
1938 RangeSet Range);
1939 ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
1940 RangeSet Range);
1941
1942 RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
1943 const llvm::APSInt &Int,
1944 const llvm::APSInt &Adjustment);
1945 RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
1946 const llvm::APSInt &Int,
1947 const llvm::APSInt &Adjustment);
1948 RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
1949 const llvm::APSInt &Int,
1950 const llvm::APSInt &Adjustment);
1951 RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
1952 const llvm::APSInt &Int,
1953 const llvm::APSInt &Adjustment);
1954 RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
1955 const llvm::APSInt &Int,
1956 const llvm::APSInt &Adjustment);
1957};
1958
1959//===----------------------------------------------------------------------===//
1960// Constraint assignment logic
1961//===----------------------------------------------------------------------===//
1962
1963/// ConstraintAssignorBase is a small utility class that unifies visitor
1964/// for ranges with a visitor for constraints (rangeset/range/constant).
1965///
1966/// It is designed to have one derived class, but generally it can have more.
1967/// Derived class can control which types we handle by defining methods of the
1968/// following form:
1969///
1970/// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1971/// CONSTRAINT Constraint);
1972///
1973/// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1974/// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1975/// return value signifies whether we should try other handle methods
1976/// (i.e. false would mean to stop right after calling this method)
1977template <class Derived> class ConstraintAssignorBase {
1978public:
1979 using Const = const llvm::APSInt &;
1980
1981#define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1982
1983#define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
1984 if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
1985 return false
1986
1987 void assign(SymbolRef Sym, RangeSet Constraint) {
1988 assignImpl(Sym, Constraint);
1989 }
1990
1991 bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
1992 switch (Sym->getKind()) {
1993#define SYMBOL(Id, Parent) \
1994 case SymExpr::Id##Kind: \
1995 DISPATCH(Id);
1996#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1997 }
1998 llvm_unreachable("Unknown SymExpr kind!");
1999 }
2000
2001#define DEFAULT_ASSIGN(Id) \
2002 bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
2003 return true; \
2004 } \
2005 bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
2006 bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
2007
2008 // When we dispatch for constraint types, we first try to check
2009 // if the new constraint is the constant and try the corresponding
2010 // assignor methods. If it didn't interrupt, we can proceed to the
2011 // range, and finally to the range set.
2012#define CONSTRAINT_DISPATCH(Id) \
2013 if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
2014 ASSIGN(Id, Const, Sym, *Const); \
2015 } \
2016 if (Constraint.size() == 1) { \
2017 ASSIGN(Id, Range, Sym, *Constraint.begin()); \
2018 } \
2019 ASSIGN(Id, RangeSet, Sym, Constraint)
2020
2021 // Our internal assign method first tries to call assignor methods for all
2022 // constraint types that apply. And if not interrupted, continues with its
2023 // parent class.
2024#define SYMBOL(Id, Parent) \
2025 bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
2026 CONSTRAINT_DISPATCH(Id); \
2027 DISPATCH(Parent); \
2028 } \
2029 DEFAULT_ASSIGN(Id)
2030#define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
2031#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2032
2033 // Default implementations for the top class that doesn't have parents.
2034 bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
2036 return true;
2037 }
2039
2040#undef DISPATCH
2041#undef CONSTRAINT_DISPATCH
2042#undef DEFAULT_ASSIGN
2043#undef ASSIGN
2044};
2045
2046/// A little component aggregating all of the reasoning we have about
2047/// assigning new constraints to symbols.
2048///
2049/// The main purpose of this class is to associate constraints to symbols,
2050/// and impose additional constraints on other symbols, when we can imply
2051/// them.
2052///
2053/// It has a nice symmetry with SymbolicRangeInferrer. When the latter
2054/// can provide more precise ranges by looking into the operands of the
2055/// expression in question, ConstraintAssignor looks into the operands
2056/// to see if we can imply more from the new constraint.
2057class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
2058public:
2059 template <class ClassOrSymbol>
2060 [[nodiscard]] static ProgramStateRef
2061 assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
2062 ClassOrSymbol CoS, RangeSet NewConstraint) {
2063 if (!State || NewConstraint.isEmpty())
2064 return nullptr;
2065
2066 ConstraintAssignor Assignor{State, Builder, F};
2067 return Assignor.assign(CoS, NewConstraint);
2068 }
2069
2070 /// Handle expressions like: a % b != 0.
2071 template <typename SymT>
2072 bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
2073 if (Sym->getOpcode() != BO_Rem)
2074 return true;
2075 // a % b != 0 implies that a != 0.
2076 if (!Constraint.containsZero()) {
2077 SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
2078 if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
2079 State = State->assume(*NonLocSymSVal, true);
2080 if (!State)
2081 return false;
2082 }
2083 }
2084 return true;
2085 }
2086
2087 inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
2088 inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
2089 RangeSet Constraint) {
2090 return handleRemainderOp(Sym, Constraint);
2091 }
2092 inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2093 RangeSet Constraint);
2094
2095private:
2096 ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
2098 : State(State), Builder(Builder), RangeFactory(F) {}
2099 using Base = ConstraintAssignorBase<ConstraintAssignor>;
2100
2101 /// Base method for handling new constraints for symbols.
2102 [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
2103 // All constraints are actually associated with equivalence classes, and
2104 // that's what we are going to do first.
2105 State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
2106 if (!State)
2107 return nullptr;
2108
2109 // And after that we can check what other things we can get from this
2110 // constraint.
2111 Base::assign(Sym, NewConstraint);
2112 return State;
2113 }
2114
2115 /// Base method for handling new constraints for classes.
2116 [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
2117 RangeSet NewConstraint) {
2118 // There is a chance that we might need to update constraints for the
2119 // classes that are known to be disequal to Class.
2120 //
2121 // In order for this to be even possible, the new constraint should
2122 // be simply a constant because we can't reason about range disequalities.
2123 if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
2124
2125 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2126 ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
2127
2128 // Add new constraint.
2129 Constraints = CF.add(Constraints, Class, NewConstraint);
2130
2131 for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
2132 RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
2133 RangeFactory, State, DisequalClass);
2134
2135 UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
2136
2137 // If we end up with at least one of the disequal classes to be
2138 // constrained with an empty range-set, the state is infeasible.
2139 if (UpdatedConstraint.isEmpty())
2140 return nullptr;
2141
2142 Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
2143 }
2144 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2145 "a state with infeasible constraints");
2146
2147 return setConstraints(State, Constraints);
2148 }
2149
2150 return setConstraint(State, Class, NewConstraint);
2151 }
2152
2153 ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
2154 SymbolRef RHS) {
2155 return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
2156 }
2157
2158 ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
2159 SymbolRef RHS) {
2160 return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
2161 }
2162
2163 [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) {
2164 assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
2165
2166 if (Constraint.getConcreteValue())
2167 return !Constraint.getConcreteValue()->isZero();
2168
2169 if (!Constraint.containsZero())
2170 return true;
2171
2172 return std::nullopt;
2173 }
2174
2175 ProgramStateRef State;
2176 SValBuilder &Builder;
2177 RangeSet::Factory &RangeFactory;
2178};
2179
2180bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
2181 const llvm::APSInt &Constraint) {
2182 llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
2183 // Iterate over all equivalence classes and try to simplify them.
2184 ClassMembersTy Members = State->get<ClassMembers>();
2185 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
2186 EquivalenceClass Class = ClassToSymbolSet.first;
2187 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2188 if (!State)
2189 return false;
2190 SimplifiedClasses.insert(Class);
2191 }
2192
2193 // Trivial equivalence classes (those that have only one symbol member) are
2194 // not stored in the State. Thus, we must skim through the constraints as
2195 // well. And we try to simplify symbols in the constraints.
2196 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2197 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2198 EquivalenceClass Class = ClassConstraint.first;
2199 if (SimplifiedClasses.count(Class)) // Already simplified.
2200 continue;
2201 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2202 if (!State)
2203 return false;
2204 }
2205
2206 // We may have trivial equivalence classes in the disequality info as
2207 // well, and we need to simplify them.
2208 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2209 for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
2210 DisequalityInfo) {
2211 EquivalenceClass Class = DisequalityEntry.first;
2212 ClassSet DisequalClasses = DisequalityEntry.second;
2213 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2214 if (!State)
2215 return false;
2216 }
2217
2218 return true;
2219}
2220
2221bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2222 RangeSet Constraint) {
2223 if (!handleRemainderOp(Sym, Constraint))
2224 return false;
2225
2226 std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
2227
2228 if (!ConstraintAsBool)
2229 return true;
2230
2231 if (std::optional<bool> Equality = meansEquality(Sym)) {
2232 // Here we cover two cases:
2233 // * if Sym is equality and the new constraint is true -> Sym's operands
2234 // should be marked as equal
2235 // * if Sym is disequality and the new constraint is false -> Sym's
2236 // operands should be also marked as equal
2237 if (*Equality == *ConstraintAsBool) {
2238 State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
2239 } else {
2240 // Other combinations leave as with disequal operands.
2241 State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
2242 }
2243
2244 if (!State)
2245 return false;
2246 }
2247
2248 return true;
2249}
2250
2251} // end anonymous namespace
2252
2253std::unique_ptr<ConstraintManager>
2255 ExprEngine *Eng) {
2256 return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
2257}
2258
2260 ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
2261 ConstraintMap Result = F.getEmptyMap();
2262
2263 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2264 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2265 EquivalenceClass Class = ClassConstraint.first;
2266 SymbolSet ClassMembers = Class.getClassMembers(State);
2267 assert(!ClassMembers.isEmpty() &&
2268 "Class must always have at least one member!");
2269
2270 SymbolRef Representative = *ClassMembers.begin();
2271 Result = F.add(Result, Representative, ClassConstraint.second);
2272 }
2273
2274 return Result;
2275}
2276
2277//===----------------------------------------------------------------------===//
2278// EqualityClass implementation details
2279//===----------------------------------------------------------------------===//
2280
2281LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
2282 raw_ostream &os) const {
2283 SymbolSet ClassMembers = getClassMembers(State);
2284 for (const SymbolRef &MemberSym : ClassMembers) {
2285 MemberSym->dump();
2286 os << "\n";
2287 }
2288}
2289
2290inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
2291 SymbolRef Sym) {
2292 assert(State && "State should not be null");
2293 assert(Sym && "Symbol should not be null");
2294 // We store far from all Symbol -> Class mappings
2295 if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
2296 return *NontrivialClass;
2297
2298 // This is a trivial class of Sym.
2299 return Sym;
2300}
2301
2302inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2303 ProgramStateRef State,
2305 SymbolRef Second) {
2306 EquivalenceClass FirstClass = find(State, First);
2307 EquivalenceClass SecondClass = find(State, Second);
2308
2309 return FirstClass.merge(F, State, SecondClass);
2310}
2311
2312inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2313 ProgramStateRef State,
2314 EquivalenceClass Other) {
2315 // It is already the same class.
2316 if (*this == Other)
2317 return State;
2318
2319 // FIXME: As of now, we support only equivalence classes of the same type.
2320 // This limitation is connected to the lack of explicit casts in
2321 // our symbolic expression model.
2322 //
2323 // That means that for `int x` and `char y` we don't distinguish
2324 // between these two very different cases:
2325 // * `x == y`
2326 // * `(char)x == y`
2327 //
2328 // The moment we introduce symbolic casts, this restriction can be
2329 // lifted.
2330 if (getType() != Other.getType())
2331 return State;
2332
2333 SymbolSet Members = getClassMembers(State);
2334 SymbolSet OtherMembers = Other.getClassMembers(State);
2335
2336 // We estimate the size of the class by the height of tree containing
2337 // its members. Merging is not a trivial operation, so it's easier to
2338 // merge the smaller class into the bigger one.
2339 if (Members.getHeight() >= OtherMembers.getHeight()) {
2340 return mergeImpl(F, State, Members, Other, OtherMembers);
2341 } else {
2342 return Other.mergeImpl(F, State, OtherMembers, *this, Members);
2343 }
2344}
2345
2346inline ProgramStateRef
2347EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
2348 ProgramStateRef State, SymbolSet MyMembers,
2349 EquivalenceClass Other, SymbolSet OtherMembers) {
2350 // Essentially what we try to recreate here is some kind of union-find
2351 // data structure. It does have certain limitations due to persistence
2352 // and the need to remove elements from classes.
2353 //
2354 // In this setting, EquialityClass object is the representative of the class
2355 // or the parent element. ClassMap is a mapping of class members to their
2356 // parent. Unlike the union-find structure, they all point directly to the
2357 // class representative because we don't have an opportunity to actually do
2358 // path compression when dealing with immutability. This means that we
2359 // compress paths every time we do merges. It also means that we lose
2360 // the main amortized complexity benefit from the original data structure.
2361 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2362 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2363
2364 // 1. If the merged classes have any constraints associated with them, we
2365 // need to transfer them to the class we have left.
2366 //
2367 // Intersection here makes perfect sense because both of these constraints
2368 // must hold for the whole new class.
2369 if (std::optional<RangeSet> NewClassConstraint =
2370 intersect(RangeFactory, getConstraint(State, *this),
2371 getConstraint(State, Other))) {
2372 // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2373 // range inferrer shouldn't generate ranges incompatible with
2374 // equivalence classes. However, at the moment, due to imperfections
2375 // in the solver, it is possible and the merge function can also
2376 // return infeasible states aka null states.
2377 if (NewClassConstraint->isEmpty())
2378 // Infeasible state
2379 return nullptr;
2380
2381 // No need in tracking constraints of a now-dissolved class.
2382 Constraints = CRF.remove(Constraints, Other);
2383 // Assign new constraints for this class.
2384 Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
2385
2386 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2387 "a state with infeasible constraints");
2388
2389 State = State->set<ConstraintRange>(Constraints);
2390 }
2391
2392 // 2. Get ALL equivalence-related maps
2393 ClassMapTy Classes = State->get<ClassMap>();
2394 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2395
2396 ClassMembersTy Members = State->get<ClassMembers>();
2397 ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
2398
2399 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2400 DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
2401
2402 ClassSet::Factory &CF = State->get_context<ClassSet>();
2403 SymbolSet::Factory &F = getMembersFactory(State);
2404
2405 // 2. Merge members of the Other class into the current class.
2406 SymbolSet NewClassMembers = MyMembers;
2407 for (SymbolRef Sym : OtherMembers) {
2408 NewClassMembers = F.add(NewClassMembers, Sym);
2409 // *this is now the class for all these new symbols.
2410 Classes = CMF.add(Classes, Sym, *this);
2411 }
2412
2413 // 3. Adjust member mapping.
2414 //
2415 // No need in tracking members of a now-dissolved class.
2416 Members = MF.remove(Members, Other);
2417 // Now only the current class is mapped to all the symbols.
2418 Members = MF.add(Members, *this, NewClassMembers);
2419
2420 // 4. Update disequality relations
2421 ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
2422 // We are about to merge two classes but they are already known to be
2423 // non-equal. This is a contradiction.
2424 if (DisequalToOther.contains(*this))
2425 return nullptr;
2426
2427 if (!DisequalToOther.isEmpty()) {
2428 ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
2429 DisequalityInfo = DF.remove(DisequalityInfo, Other);
2430
2431 for (EquivalenceClass DisequalClass : DisequalToOther) {
2432 DisequalToThis = CF.add(DisequalToThis, DisequalClass);
2433
2434 // Disequality is a symmetric relation meaning that if
2435 // DisequalToOther not null then the set for DisequalClass is not
2436 // empty and has at least Other.
2437 ClassSet OriginalSetLinkedToOther =
2438 *DisequalityInfo.lookup(DisequalClass);
2439
2440 // Other will be eliminated and we should replace it with the bigger
2441 // united class.
2442 ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
2443 NewSet = CF.add(NewSet, *this);
2444
2445 DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
2446 }
2447
2448 DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
2449 State = State->set<DisequalityMap>(DisequalityInfo);
2450 }
2451
2452 // 5. Update the state
2453 State = State->set<ClassMap>(Classes);
2454 State = State->set<ClassMembers>(Members);
2455
2456 return State;
2457}
2458
2459inline SymbolSet::Factory &
2460EquivalenceClass::getMembersFactory(ProgramStateRef State) {
2461 return State->get_context<SymbolSet>();
2462}
2463
2464SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
2465 if (const SymbolSet *Members = State->get<ClassMembers>(*this))
2466 return *Members;
2467
2468 // This class is trivial, so we need to construct a set
2469 // with just that one symbol from the class.
2470 SymbolSet::Factory &F = getMembersFactory(State);
2471 return F.add(F.getEmptySet(), getRepresentativeSymbol());
2472}
2473
2474bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
2475 return State->get<ClassMembers>(*this) == nullptr;
2476}
2477
2478bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
2479 SymbolReaper &Reaper) const {
2480 return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
2481}
2482
2483inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2484 ProgramStateRef State,
2486 SymbolRef Second) {
2487 return markDisequal(RF, State, find(State, First), find(State, Second));
2488}
2489
2490inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2491 ProgramStateRef State,
2492 EquivalenceClass First,
2493 EquivalenceClass Second) {
2494 return First.markDisequal(RF, State, Second);
2495}
2496
2497inline ProgramStateRef
2498EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
2499 EquivalenceClass Other) const {
2500 // If we know that two classes are equal, we can only produce an infeasible
2501 // state.
2502 if (*this == Other) {
2503 return nullptr;
2504 }
2505
2506 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2507 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2508
2509 // Disequality is a symmetric relation, so if we mark A as disequal to B,
2510 // we should also mark B as disequalt to A.
2511 if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
2512 Other) ||
2513 !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
2514 *this))
2515 return nullptr;
2516
2517 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2518 "a state with infeasible constraints");
2519
2520 State = State->set<DisequalityMap>(DisequalityInfo);
2521 State = State->set<ConstraintRange>(Constraints);
2522
2523 return State;
2524}
2525
2526inline bool EquivalenceClass::addToDisequalityInfo(
2527 DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
2528 RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
2529 EquivalenceClass Second) {
2530
2531 // 1. Get all of the required factories.
2532 DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
2533 ClassSet::Factory &CF = State->get_context<ClassSet>();
2534 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2535
2536 // 2. Add Second to the set of classes disequal to First.
2537 const ClassSet *CurrentSet = Info.lookup(First);
2538 ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
2539 NewSet = CF.add(NewSet, Second);
2540
2541 Info = F.add(Info, First, NewSet);
2542
2543 // 3. If Second is known to be a constant, we can delete this point
2544 // from the constraint asociated with First.
2545 //
2546 // So, if Second == 10, it means that First != 10.
2547 // At the same time, the same logic does not apply to ranges.
2548 if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
2549 if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
2550
2551 RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
2552 RF, State, First.getRepresentativeSymbol());
2553
2554 FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
2555
2556 // If the First class is about to be constrained with an empty
2557 // range-set, the state is infeasible.
2558 if (FirstConstraint.isEmpty())
2559 return false;
2560
2561 Constraints = CRF.add(Constraints, First, FirstConstraint);
2562 }
2563
2564 return true;
2565}
2566
2567inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2568 SymbolRef FirstSym,
2569 SymbolRef SecondSym) {
2570 return EquivalenceClass::areEqual(State, find(State, FirstSym),
2571 find(State, SecondSym));
2572}
2573
2574inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2575 EquivalenceClass First,
2576 EquivalenceClass Second) {
2577 // The same equivalence class => symbols are equal.
2578 if (First == Second)
2579 return true;
2580
2581 // Let's check if we know anything about these two classes being not equal to
2582 // each other.
2583 ClassSet DisequalToFirst = First.getDisequalClasses(State);
2584 if (DisequalToFirst.contains(Second))
2585 return false;
2586
2587 // It is not clear.
2588 return std::nullopt;
2589}
2590
2591[[nodiscard]] ProgramStateRef
2592EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
2593
2594 SymbolSet ClsMembers = getClassMembers(State);
2595 assert(ClsMembers.contains(Old));
2596
2597 // Remove `Old`'s Class->Sym relation.
2598 SymbolSet::Factory &F = getMembersFactory(State);
2599 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2600 ClsMembers = F.remove(ClsMembers, Old);
2601 // Ensure another precondition of the removeMember function (we can check
2602 // this only with isEmpty, thus we have to do the remove first).
2603 assert(!ClsMembers.isEmpty() &&
2604 "Class should have had at least two members before member removal");
2605 // Overwrite the existing members assigned to this class.
2606 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2607 ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
2608 State = State->set<ClassMembers>(ClassMembersMap);
2609
2610 // Remove `Old`'s Sym->Class relation.
2611 ClassMapTy Classes = State->get<ClassMap>();
2612 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2613 Classes = CMF.remove(Classes, Old);
2614 State = State->set<ClassMap>(Classes);
2615
2616 return State;
2617}
2618
2619// Re-evaluate an SVal with top-level `State->assume` logic.
2620[[nodiscard]] ProgramStateRef
2621reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
2622 if (!Constraint)
2623 return State;
2624
2625 const auto DefinedVal = TheValue.castAs<DefinedSVal>();
2626
2627 // If the SVal is 0, we can simply interpret that as `false`.
2628 if (Constraint->encodesFalseRange())
2629 return State->assume(DefinedVal, false);
2630
2631 // If the constraint does not encode 0 then we can interpret that as `true`
2632 // AND as a Range(Set).
2633 if (Constraint->encodesTrueRange()) {
2634 State = State->assume(DefinedVal, true);
2635 if (!State)
2636 return nullptr;
2637 // Fall through, re-assume based on the range values as well.
2638 }
2639 // Overestimate the individual Ranges with the RangeSet' lowest and
2640 // highest values.
2641 return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
2642 Constraint->getMaxValue(), true);
2643}
2644
2645// Iterate over all symbols and try to simplify them. Once a symbol is
2646// simplified then we check if we can merge the simplified symbol's equivalence
2647// class to this class. This way, we simplify not just the symbols but the
2648// classes as well: we strive to keep the number of the classes to be the
2649// absolute minimum.
2650[[nodiscard]] ProgramStateRef
2651EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
2652 ProgramStateRef State, EquivalenceClass Class) {
2653 SymbolSet ClassMembers = Class.getClassMembers(State);
2654 for (const SymbolRef &MemberSym : ClassMembers) {
2655
2656 const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
2657 const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
2658
2659 // The symbol is collapsed to a constant, check if the current State is
2660 // still feasible.
2661 if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
2662 const llvm::APSInt &SV = CI->getValue();
2663 const RangeSet *ClassConstraint = getConstraint(State, Class);
2664 // We have found a contradiction.
2665 if (ClassConstraint && !ClassConstraint->contains(SV))
2666 return nullptr;
2667 }
2668
2669 if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
2670 // The simplified symbol should be the member of the original Class,
2671 // however, it might be in another existing class at the moment. We
2672 // have to merge these classes.
2673 ProgramStateRef OldState = State;
2674 State = merge(F, State, MemberSym, SimplifiedMemberSym);
2675 if (!State)
2676 return nullptr;
2677 // No state change, no merge happened actually.
2678 if (OldState == State)
2679 continue;
2680
2681 // Be aware that `SimplifiedMemberSym` might refer to an already dead
2682 // symbol. In that case, the eqclass of that might not be the same as the
2683 // eqclass of `MemberSym`. This is because the dead symbols are not
2684 // preserved in the `ClassMap`, hence
2685 // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
2686 // compared to the eqclass of `MemberSym`.
2687 // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
2688 // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
2689 //
2690 // Note that `MemberSym` must be alive here since that is from the
2691 // `ClassMembers` where all the symbols are alive.
2692
2693 // Remove the old and more complex symbol.
2694 State = find(State, MemberSym).removeMember(State, MemberSym);
2695
2696 // Query the class constraint again b/c that may have changed during the
2697 // merge above.
2698 const RangeSet *ClassConstraint = getConstraint(State, Class);
2699
2700 // Re-evaluate an SVal with top-level `State->assume`, this ignites
2701 // a RECURSIVE algorithm that will reach a FIXPOINT.
2702 //
2703 // About performance and complexity: Let us assume that in a State we
2704 // have N non-trivial equivalence classes and that all constraints and
2705 // disequality info is related to non-trivial classes. In the worst case,
2706 // we can simplify only one symbol of one class in each iteration. The
2707 // number of symbols in one class cannot grow b/c we replace the old
2708 // symbol with the simplified one. Also, the number of the equivalence
2709 // classes can decrease only, b/c the algorithm does a merge operation
2710 // optionally. We need N iterations in this case to reach the fixpoint.
2711 // Thus, the steps needed to be done in the worst case is proportional to
2712 // N*N.
2713 //
2714 // This worst case scenario can be extended to that case when we have
2715 // trivial classes in the constraints and in the disequality map. This
2716 // case can be reduced to the case with a State where there are only
2717 // non-trivial classes. This is because a merge operation on two trivial
2718 // classes results in one non-trivial class.
2719 State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
2720 if (!State)
2721 return nullptr;
2722 }
2723 }
2724 return State;
2725}
2726
2727inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
2728 SymbolRef Sym) {
2729 return find(State, Sym).getDisequalClasses(State);
2730}
2731
2732inline ClassSet
2733EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
2734 return getDisequalClasses(State->get<DisequalityMap>(),
2735 State->get_context<ClassSet>());
2736}
2737
2738inline ClassSet
2739EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
2740 ClassSet::Factory &Factory) const {
2741 if (const ClassSet *DisequalClasses = Map.lookup(*this))
2742 return *DisequalClasses;
2743
2744 return Factory.getEmptySet();
2745}
2746
2747bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
2748 ClassMembersTy Members = State->get<ClassMembers>();
2749
2750 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
2751 for (SymbolRef Member : ClassMembersPair.second) {
2752 // Every member of the class should have a mapping back to the class.
2753 if (find(State, Member) == ClassMembersPair.first) {
2754 continue;
2755 }
2756
2757 return false;
2758 }
2759 }
2760
2761 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2762 for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
2763 EquivalenceClass Class = DisequalityInfo.first;
2764 ClassSet DisequalClasses = DisequalityInfo.second;
2765
2766 // There is no use in keeping empty sets in the map.
2767 if (DisequalClasses.isEmpty())
2768 return false;
2769
2770 // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2771 // B != A should also be true.
2772 for (EquivalenceClass DisequalClass : DisequalClasses) {
2773 const ClassSet *DisequalToDisequalClasses =
2774 Disequalities.lookup(DisequalClass);
2775
2776 // It should be a set of at least one element: Class
2777 if (!DisequalToDisequalClasses ||
2778 !DisequalToDisequalClasses->contains(Class))
2779 return false;
2780 }
2781 }
2782
2783 return true;
2784}
2785
2786//===----------------------------------------------------------------------===//
2787// RangeConstraintManager implementation
2788//===----------------------------------------------------------------------===//
2789
2790bool RangeConstraintManager::canReasonAbout(SVal X) const {
2791 std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
2792 if (SymVal && SymVal->isExpression()) {
2793 const SymExpr *SE = SymVal->getSymbol();
2794
2795 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
2796 switch (SIE->getOpcode()) {
2797 // We don't reason yet about bitwise-constraints on symbolic values.
2798 case BO_And:
2799 case BO_Or:
2800 case BO_Xor:
2801 return false;
2802 // We don't reason yet about these arithmetic constraints on
2803 // symbolic values.
2804 case BO_Mul:
2805 case BO_Div:
2806 case BO_Rem:
2807 case BO_Shl:
2808 case BO_Shr:
2809 return false;
2810 // All other cases.
2811 default:
2812 return true;
2813 }
2814 }
2815
2816 if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
2817 // FIXME: Handle <=> here.
2820 // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2821 // We've recently started producing Loc <> NonLoc comparisons (that
2822 // result from casts of one of the operands between eg. intptr_t and
2823 // void *), but we can't reason about them yet.
2824 if (Loc::isLocType(SSE->getLHS()->getType())) {
2825 return Loc::isLocType(SSE->getRHS()->getType());
2826 }
2827 }
2828 }
2829
2830 return false;
2831 }
2832
2833 return true;
2834}
2835
2836ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
2837 SymbolRef Sym) {
2838 const RangeSet *Ranges = getConstraint(State, Sym);
2839
2840 // If we don't have any information about this symbol, it's underconstrained.
2841 if (!Ranges)
2842 return ConditionTruthVal();
2843
2844 // If we have a concrete value, see if it's zero.
2845 if (const llvm::APSInt *Value = Ranges->getConcreteValue())
2846 return *Value == 0;
2847
2848 BasicValueFactory &BV = getBasicVals();
2849 APSIntType IntType = BV.getAPSIntType(Sym->getType());
2850 llvm::APSInt Zero = IntType.getZeroValue();
2851
2852 // Check if zero is in the set of possible values.
2853 if (!Ranges->contains(Zero))
2854 return false;
2855
2856 // Zero is a possible value, but it is not the /only/ possible value.
2857 return ConditionTruthVal();
2858}
2859
2860const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
2861 SymbolRef Sym) const {
2862 const RangeSet *T = getConstraint(St, Sym);
2863 return T ? T->getConcreteValue() : nullptr;
2864}
2865
2866//===----------------------------------------------------------------------===//
2867// Remove dead symbols from existing constraints
2868//===----------------------------------------------------------------------===//
2869
2870/// Scan all symbols referenced by the constraints. If the symbol is not alive
2871/// as marked in LSymbols, mark it as dead in DSymbols.
2873RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
2874 SymbolReaper &SymReaper) {
2875 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2876 ClassMembersTy NewClassMembersMap = ClassMembersMap;
2877 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2878 SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
2879
2880 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2881 ConstraintRangeTy NewConstraints = Constraints;
2882 ConstraintRangeTy::Factory &ConstraintFactory =
2883 State->get_context<ConstraintRange>();
2884
2885 ClassMapTy Map = State->get<ClassMap>();
2886 ClassMapTy NewMap = Map;
2887 ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
2888
2889 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2890 DisequalityMapTy::Factory &DisequalityFactory =
2891 State->get_context<DisequalityMap>();
2892 ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
2893
2894 bool ClassMapChanged = false;
2895 bool MembersMapChanged = false;
2896 bool ConstraintMapChanged = false;
2897 bool DisequalitiesChanged = false;
2898
2899 auto removeDeadClass = [&](EquivalenceClass Class) {
2900 // Remove associated constraint ranges.
2901 Constraints = ConstraintFactory.remove(Constraints, Class);
2902 ConstraintMapChanged = true;
2903
2904 // Update disequality information to not hold any information on the
2905 // removed class.
2906 ClassSet DisequalClasses =
2907 Class.getDisequalClasses(Disequalities, ClassSetFactory);
2908 if (!DisequalClasses.isEmpty()) {
2909 for (EquivalenceClass DisequalClass : DisequalClasses) {
2910 ClassSet DisequalToDisequalSet =
2911 DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
2912 // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2913 // disequality info.
2914 assert(!DisequalToDisequalSet.isEmpty());
2915 ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
2916
2917 // No need in keeping an empty set.
2918 if (NewSet.isEmpty()) {
2919 Disequalities =
2920 DisequalityFactory.remove(Disequalities, DisequalClass);
2921 } else {
2922 Disequalities =
2923 DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
2924 }
2925 }
2926 // Remove the data for the class
2927 Disequalities = DisequalityFactory.remove(Disequalities, Class);
2928 DisequalitiesChanged = true;
2929 }
2930 };
2931
2932 // 1. Let's see if dead symbols are trivial and have associated constraints.
2933 for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
2934 Constraints) {
2935 EquivalenceClass Class = ClassConstraintPair.first;
2936 if (Class.isTriviallyDead(State, SymReaper)) {
2937 // If this class is trivial, we can remove its constraints right away.
2938 removeDeadClass(Class);
2939 }
2940 }
2941
2942 // 2. We don't need to track classes for dead symbols.
2943 for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
2944 SymbolRef Sym = SymbolClassPair.first;
2945
2946 if (SymReaper.isDead(Sym)) {
2947 ClassMapChanged = true;
2948 NewMap = ClassFactory.remove(NewMap, Sym);
2949 }
2950 }
2951
2952 // 3. Remove dead members from classes and remove dead non-trivial classes
2953 // and their constraints.
2954 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
2955 ClassMembersMap) {
2956 EquivalenceClass Class = ClassMembersPair.first;
2957 SymbolSet LiveMembers = ClassMembersPair.second;
2958 bool MembersChanged = false;
2959
2960 for (SymbolRef Member : ClassMembersPair.second) {
2961 if (SymReaper.isDead(Member)) {
2962 MembersChanged = true;
2963 LiveMembers = SetFactory.remove(LiveMembers, Member);
2964 }
2965 }
2966
2967 // Check if the class changed.
2968 if (!MembersChanged)
2969 continue;
2970
2971 MembersMapChanged = true;
2972
2973 if (LiveMembers.isEmpty()) {
2974 // The class is dead now, we need to wipe it out of the members map...
2975 NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
2976
2977 // ...and remove all of its constraints.
2978 removeDeadClass(Class);
2979 } else {
2980 // We need to change the members associated with the class.
2981 NewClassMembersMap =
2982 EMFactory.add(NewClassMembersMap, Class, LiveMembers);
2983 }
2984 }
2985
2986 // 4. Update the state with new maps.
2987 //
2988 // Here we try to be humble and update a map only if it really changed.
2989 if (ClassMapChanged)
2990 State = State->set<ClassMap>(NewMap);
2991
2992 if (MembersMapChanged)
2993 State = State->set<ClassMembers>(NewClassMembersMap);
2994
2995 if (ConstraintMapChanged)
2996 State = State->set<ConstraintRange>(Constraints);
2997
2998 if (DisequalitiesChanged)
2999 State = State->set<DisequalityMap>(Disequalities);
3000
3001 assert(EquivalenceClass::isClassDataConsistent(State));
3002
3003 return State;
3004}
3005
3006RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
3007 SymbolRef Sym) {
3008 return SymbolicRangeInferrer::inferRange(F, State, Sym);
3009}
3010
3011ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
3012 SymbolRef Sym,
3013 RangeSet Range) {
3014 return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
3015}
3016
3017//===------------------------------------------------------------------------===
3018// assumeSymX methods: protected interface for RangeConstraintManager.
3019//===------------------------------------------------------------------------===/
3020
3021// The syntax for ranges below is mathematical, using [x, y] for closed ranges
3022// and (x, y) for open ranges. These ranges are modular, corresponding with
3023// a common treatment of C integer overflow. This means that these methods
3024// do not have to worry about overflow; RangeSet::Intersect can handle such a
3025// "wraparound" range.
3026// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
3027// UINT_MAX, 0, 1, and 2.
3028
3030RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
3031 const llvm::APSInt &Int,
3032 const llvm::APSInt &Adjustment) {
3033 // Before we do any real work, see if the value can even show up.
3034 APSIntType AdjustmentType(Adjustment);
3035 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
3036 return St;
3037
3038 llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
3039 RangeSet New = getRange(St, Sym);
3040 New = F.deletePoint(New, Point);
3041
3042 return setRange(St, Sym, New);
3043}
3044
3046RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
3047 const llvm::APSInt &Int,
3048 const llvm::APSInt &Adjustment) {
3049 // Before we do any real work, see if the value can even show up.
3050 APSIntType AdjustmentType(Adjustment);
3051 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
3052 return nullptr;
3053
3054 // [Int-Adjustment, Int-Adjustment]
3055 llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
3056 RangeSet New = getRange(St, Sym);
3057 New = F.intersect(New, AdjInt);
3058
3059 return setRange(St, Sym, New);
3060}
3061
3062RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
3063 SymbolRef Sym,
3064 const llvm::APSInt &Int,
3065 const llvm::APSInt &Adjustment) {
3066 // Before we do any real work, see if the value can even show up.
3067 APSIntType AdjustmentType(Adjustment);
3068 switch (AdjustmentType.testInRange(Int, true)) {
3070 return F.getEmptySet();
3072 break;
3074 return getRange(St, Sym);
3075 }
3076
3077 // Special case for Int == Min. This is always false.
3078 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3079 llvm::APSInt Min = AdjustmentType.getMinValue();
3080 if (ComparisonVal == Min)
3081 return F.getEmptySet();
3082
3083 llvm::APSInt Lower = Min - Adjustment;
3084 llvm::APSInt Upper = ComparisonVal - Adjustment;
3085 --Upper;
3086
3087 RangeSet Result = getRange(St, Sym);
3088 return F.intersect(Result, Lower, Upper);
3089}
3090
3092RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
3093 const llvm::APSInt &Int,
3094 const llvm::APSInt &Adjustment) {
3095 RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
3096 return setRange(St, Sym, New);
3097}
3098
3099RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
3100 SymbolRef Sym,
3101 const llvm::APSInt &Int,
3102 const llvm::APSInt &Adjustment) {
3103 // Before we do any real work, see if the value can even show up.
3104 APSIntType AdjustmentType(Adjustment);
3105 switch (AdjustmentType.testInRange(Int, true)) {
3107 return getRange(St, Sym);
3109 break;
3111 return F.getEmptySet();
3112 }
3113
3114 // Special case for Int == Max. This is always false.
3115 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3116 llvm::APSInt Max = AdjustmentType.getMaxValue();
3117 if (ComparisonVal == Max)
3118 return F.getEmptySet();
3119
3120 llvm::APSInt Lower = ComparisonVal - Adjustment;
3121 llvm::APSInt Upper = Max - Adjustment;
3122 ++Lower;
3123
3124 RangeSet SymRange = getRange(St, Sym);
3125 return F.intersect(SymRange, Lower, Upper);
3126}
3127
3129RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
3130 const llvm::APSInt &Int,
3131 const llvm::APSInt &Adjustment) {
3132 RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
3133 return setRange(St, Sym, New);
3134}
3135
3136RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
3137 SymbolRef Sym,
3138 const llvm::APSInt &Int,
3139 const llvm::APSInt &Adjustment) {
3140 // Before we do any real work, see if the value can even show up.
3141 APSIntType AdjustmentType(Adjustment);
3142 switch (AdjustmentType.testInRange(Int, true)) {
3144 return getRange(St, Sym);
3146 break;
3148 return F.getEmptySet();
3149 }
3150
3151 // Special case for Int == Min. This is always feasible.
3152 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3153 llvm::APSInt Min = AdjustmentType.getMinValue();
3154 if (ComparisonVal == Min)
3155 return getRange(St, Sym);
3156
3157 llvm::APSInt Max = AdjustmentType.getMaxValue();
3158 llvm::APSInt Lower = ComparisonVal - Adjustment;
3159 llvm::APSInt Upper = Max - Adjustment;
3160
3161 RangeSet SymRange = getRange(St, Sym);
3162 return F.intersect(SymRange, Lower, Upper);
3163}
3164
3166RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
3167 const llvm::APSInt &Int,
3168 const llvm::APSInt &Adjustment) {
3169 RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
3170 return setRange(St, Sym, New);
3171}
3172
3174RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
3175 const llvm::APSInt &Int,
3176 const llvm::APSInt &Adjustment) {
3177 // Before we do any real work, see if the value can even show up.
3178 APSIntType AdjustmentType(Adjustment);
3179 switch (AdjustmentType.testInRange(Int, true)) {
3181 return F.getEmptySet();
3183 break;
3185 return RS();
3186 }
3187
3188 // Special case for Int == Max. This is always feasible.
3189 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3190 llvm::APSInt Max = AdjustmentType.getMaxValue();
3191 if (ComparisonVal == Max)
3192 return RS();
3193
3194 llvm::APSInt Min = AdjustmentType.getMinValue();
3195 llvm::APSInt Lower = Min - Adjustment;
3196 llvm::APSInt Upper = ComparisonVal - Adjustment;
3197
3198 RangeSet Default = RS();
3199 return F.intersect(Default, Lower, Upper);
3200}
3201
3202RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
3203 SymbolRef Sym,
3204 const llvm::APSInt &Int,
3205 const llvm::APSInt &Adjustment) {
3206 return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
3207}
3208
3210RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
3211 const llvm::APSInt &Int,
3212 const llvm::APSInt &Adjustment) {
3213 RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
3214 return setRange(St, Sym, New);
3215}
3216
3217ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
3218 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3219 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3220 RangeSet New = getSymGERange(State, Sym, From, Adjustment);
3221 if (New.isEmpty())
3222 return nullptr;
3223 RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
3224 return setRange(State, Sym, Out);
3225}
3226
3227ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
3228 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3229 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3230 RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
3231 RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
3232 RangeSet New(F.add(RangeLT, RangeGT));
3233 return setRange(State, Sym, New);
3234}
3235
3236//===----------------------------------------------------------------------===//
3237// Pretty-printing.
3238//===----------------------------------------------------------------------===//
3239
3240void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
3241 const char *NL, unsigned int Space,
3242 bool IsDot) const {
3243 printConstraints(Out, State, NL, Space, IsDot);
3244 printEquivalenceClasses(Out, State, NL, Space, IsDot);
3245 printDisequalities(Out, State, NL, Space, IsDot);
3246}
3247
3248void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
3249 SymbolRef Sym) {
3250 const RangeSet RS = getRange(State, Sym);
3251 Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
3252 RS.dump(Out);
3253}
3254
3255static std::string toString(const SymbolRef &Sym) {
3256 std::string S;
3257 llvm::raw_string_ostream O(S);
3258 Sym->dumpToStream(O);
3259 return O.str();
3260}
3261
3262void RangeConstraintManager::printConstraints(raw_ostream &Out,
3263 ProgramStateRef State,
3264 const char *NL,
3265 unsigned int Space,
3266 bool IsDot) const {
3267 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
3268
3269 Indent(Out, Space, IsDot) << "\"constraints\": ";
3270 if (Constraints.isEmpty()) {
3271 Out << "null," << NL;
3272 return;
3273 }
3274
3275 std::map<std::string, RangeSet> OrderedConstraints;
3276 for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
3277 SymbolSet ClassMembers = P.first.getClassMembers(State);
3278 for (const SymbolRef &ClassMember : ClassMembers) {
3279 bool insertion_took_place;
3280 std::tie(std::ignore, insertion_took_place) =
3281 OrderedConstraints.insert({toString(ClassMember), P.second});
3282 assert(insertion_took_place &&
3283 "two symbols should not have the same dump");
3284 }
3285 }
3286
3287 ++Space;
3288 Out << '[' << NL;
3289 bool First = true;
3290 for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
3291 if (First) {
3292 First = false;
3293 } else {
3294 Out << ',';
3295 Out << NL;
3296 }
3297 Indent(Out, Space, IsDot)
3298 << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
3299 P.second.dump(Out);
3300 Out << "\" }";
3301 }
3302 Out << NL;
3303
3304 --Space;
3305 Indent(Out, Space, IsDot) << "]," << NL;
3306}
3307
3308static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
3309 SymbolSet ClassMembers = Class.getClassMembers(State);
3310 llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
3311 ClassMembers.end());
3312 llvm::sort(ClassMembersSorted,
3313 [](const SymbolRef &LHS, const SymbolRef &RHS) {
3314 return toString(LHS) < toString(RHS);
3315 });
3316
3317 bool FirstMember = true;
3318
3319 std::string Str;
3320 llvm::raw_string_ostream Out(Str);
3321 Out << "[ ";
3322 for (SymbolRef ClassMember : ClassMembersSorted) {
3323 if (FirstMember)
3324 FirstMember = false;
3325 else
3326 Out << ", ";
3327 Out << "\"" << ClassMember << "\"";
3328 }
3329 Out << " ]";
3330 return Out.str();
3331}
3332
3333void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
3334 ProgramStateRef State,
3335 const char *NL,
3336 unsigned int Space,
3337 bool IsDot) const {
3338 ClassMembersTy Members = State->get<ClassMembers>();
3339
3340 Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
3341 if (Members.isEmpty()) {
3342 Out << "null," << NL;
3343 return;
3344 }
3345
3346 std::set<std::string> MembersStr;
3347 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
3348 MembersStr.insert(toString(State, ClassToSymbolSet.first));
3349
3350 ++Space;
3351 Out << '[' << NL;
3352 bool FirstClass = true;
3353 for (const std::string &Str : MembersStr) {
3354 if (FirstClass) {
3355 FirstClass = false;
3356 } else {
3357 Out << ',';
3358 Out << NL;
3359 }
3360 Indent(Out, Space, IsDot);
3361 Out << Str;
3362 }
3363 Out << NL;
3364
3365 --Space;
3366 Indent(Out, Space, IsDot) << "]," << NL;
3367}
3368
3369void RangeConstraintManager::printDisequalities(raw_ostream &Out,
3370 ProgramStateRef State,
3371 const char *NL,
3372 unsigned int Space,
3373 bool IsDot) const {
3374 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
3375
3376 Indent(Out, Space, IsDot) << "\"disequality_info\": ";
3377 if (Disequalities.isEmpty()) {
3378 Out << "null," << NL;
3379 return;
3380 }
3381
3382 // Transform the disequality info to an ordered map of
3383 // [string -> (ordered set of strings)]
3384 using EqClassesStrTy = std::set<std::string>;
3385 using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
3386 DisequalityInfoStrTy DisequalityInfoStr;
3387 for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
3388 EquivalenceClass Class = ClassToDisEqSet.first;
3389 ClassSet DisequalClasses = ClassToDisEqSet.second;
3390 EqClassesStrTy MembersStr;
3391 for (EquivalenceClass DisEqClass : DisequalClasses)
3392 MembersStr.insert(toString(State, DisEqClass));
3393 DisequalityInfoStr.insert({toString(State, Class), MembersStr});
3394 }
3395
3396 ++Space;
3397 Out << '[' << NL;
3398 bool FirstClass = true;
3399 for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
3400 DisequalityInfoStr) {
3401 const std::string &Class = ClassToDisEqSet.first;
3402 if (FirstClass) {
3403 FirstClass = false;
3404 } else {
3405 Out << ',';
3406 Out << NL;
3407 }
3408 Indent(Out, Space, IsDot) << "{" << NL;
3409 unsigned int DisEqSpace = Space + 1;
3410 Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
3411 Out << Class;
3412 const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
3413 if (!DisequalClasses.empty()) {
3414 Out << "," << NL;
3415 Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
3416 unsigned int DisEqClassSpace = DisEqSpace + 1;
3417 Indent(Out, DisEqClassSpace, IsDot);
3418 bool FirstDisEqClass = true;
3419 for (const std::string &DisEqClass : DisequalClasses) {
3420 if (FirstDisEqClass) {
3421 FirstDisEqClass = false;
3422 } else {
3423 Out << ',' << NL;
3424 Indent(Out, DisEqClassSpace, IsDot);
3425 }
3426 Out << DisEqClass;
3427 }
3428 Out << "]" << NL;
3429 }
3430 Indent(Out, Space, IsDot) << "}";
3431 }
3432 Out << NL;
3433
3434 --Space;
3435 Indent(Out, Space, IsDot) << "]," << NL;
3436}
#define V(N, I)
Definition: ASTContext.h:3233
StringRef P
llvm::APSInt APSInt
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
#define X(type, name)
Definition: Value.h:142
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
#define CONSTRAINT_DISPATCH(Id)
ProgramStateRef reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue)
#define DEFAULT_ASSIGN(Id)
void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
static CharSourceRange getRange(const CharSourceRange &EditRange, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeMacroExpansion)
Definition: SourceCode.cpp:104
static BinaryOperatorKind getOpFromIndex(size_t Index)
constexpr size_t getCmpOpCount() const
TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP, BinaryOperatorKind QueriedOP) const
TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const
bool isComparisonOp() const
Definition: Expr.h:3934
bool isRelationalOp() const
Definition: Expr.h:3928
static Opcode negateComparisonOp(Opcode Opc)
Definition: Expr.h:3939
static Opcode reverseComparisonOp(Opcode Opc)
Definition: Expr.h:3952
bool isEqualityOp() const
Definition: Expr.h:3931
A (possibly-)qualified type.
Definition: Type.h:736
The type-property cache.
Definition: Type.cpp:4120
The base class of the type hierarchy.
Definition: Type.h:1597
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2108
bool isUnsignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is unsigned or an enumeration types whose underlying ...
Definition: Type.cpp:2158
bool isReferenceType() const
Definition: Type.h:7011
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:7420
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:19
bool isUnsigned() const
Definition: APSIntType.h:31
llvm::APSInt getZeroValue() const LLVM_READONLY
Returns an all-zero value for this type.
Definition: APSIntType.h:55
RangeTestResultKind
Used to classify whether a value is representable using this type.
Definition: APSIntType.h:76
@ RTR_Within
Value is representable using this type.
Definition: APSIntType.h:78
@ RTR_Below
Value is less than the minimum representable value.
Definition: APSIntType.h:77
@ RTR_Above
Value is greater than the maximum representable value.
Definition: APSIntType.h:79
uint32_t getBitWidth() const
Definition: APSIntType.h:30
RangeTestResultKind testInRange(const llvm::APSInt &Val, bool AllowMixedSign) const LLVM_READONLY
Tests whether a given value is losslessly representable using this type.
Definition: APSIntType.cpp:15
void apply(llvm::APSInt &Value) const
Convert a given APSInt, in place, to match this type.
Definition: APSIntType.h:37
llvm::APSInt getMinValue() const LLVM_READONLY
Returns the minimum value for this type.
Definition: APSIntType.h:60
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Definition: APSIntType.h:48
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:69
APSIntType getAPSIntType(QualType T) const
Returns the type of the APSInt used to store values of the given QualType.
Template implementation for all binary symbolic expressions.
QualType getType() const override
BinaryOperator::Opcode getOpcode() const
static bool isLocType(QualType T)
Definition: SVals.h:285
RangeSet unite(RangeSet LHS, RangeSet RHS)
Create a new set which is a union of two given ranges.
RangeSet negate(RangeSet What)
Negate the given range set.
RangeSet intersect(RangeSet LHS, RangeSet RHS)
Intersect the given range sets.
RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point)
Delete the given point from the range set.
RangeSet getRangeSet(Range Origin)
Create a new set with just one range.
RangeSet add(RangeSet LHS, RangeSet RHS)
Create a new set with all ranges from both LHS and RHS.
RangeSet castTo(RangeSet What, APSIntType Ty)
Performs promotions, truncations and conversions of the given set.
persistent set of non-overlapping ranges.
const_iterator end() const
APSIntType getAPSIntType() const
const llvm::APSInt & getMaxValue() const
Get the maximal value covered by the ranges in the set.
bool encodesTrueRange() const
Test if the range doesn't contain zero.
bool encodesFalseRange() const
Test if the range is the [0,0] range.
const_iterator begin() const
const llvm::APSInt & getMinValue() const
Get the minimal value covered by the ranges in the set.
ImplType::const_iterator const_iterator
bool contains(llvm::APSInt Point) const
Test whether the given point is contained by any of the ranges.
void dump(raw_ostream &OS) const
const llvm::APSInt * getConcreteValue() const
getConcreteValue - If a symbol is constrained to equal a specific integer constant then this method r...
A Range represents the closed range [from, to].
const llvm::APSInt & From() const
void dump(raw_ostream &OS) const
bool Includes(const llvm::APSInt &Point) const
const llvm::APSInt & To() const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:73
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:104
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:104
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:100
SymExprVisitor - this class implements a simple visitor for SymExpr subclasses.
Definition: SValVisitor.h:75
Symbolic value.
Definition: SymExpr.h:30
virtual void dumpToStream(raw_ostream &os) const
Definition: SymExpr.h:61
virtual QualType getType() const =0
Kind getKind() const
Definition: SymExpr.h:57
const SymSymExpr * getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t)
A class responsible for cleaning up unused symbols.
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
Represents a symbolic expression involving a unary operator.
QualType getType() const override
UnaryOperator::Opcode getOpcode() const
const SymExpr * getOperand() const
Value representing integer constant.
Definition: SVals.h:325
Represents symbolic expression that isn't a location.
Definition: SVals.h:300
SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym)
Try to simplify a given symbolic expression's associated SVal based on the constraints in State.
llvm::ImmutableMap< SymbolRef, RangeSet > ConstraintMap
SymbolRef simplify(ProgramStateRef State, SymbolRef Sym)
Try to simplify a given symbolic expression based on the constraints in State.
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
@ CF
Indicates that the tracked object is a CF object.
std::unique_ptr< ConstraintManager > CreateRangeConstraintManager(ProgramStateManager &statemgr, ExprEngine *exprengine)
ConstraintMap getConstraintMap(ProgramStateRef State)
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1587
bool Const(InterpState &S, CodePtr OpPC, const T &Arg)
Definition: Interp.h:856
BinaryOperatorKind
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:207
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Result
The result type of a method or function.
bool operator!=(CanQual< T > x, CanQual< U > y)
__UINTPTR_TYPE__ uintptr_t
An unsigned integer type with the property that any valid pointer to void can be converted to this ty...