1// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
14#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
15#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
16#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
18#include <optional>
19
20using namespace clang;
21using namespace ento;
22
23namespace {
24class SimpleSValBuilder : public SValBuilder {
25
26  // Query the constraint manager whether the SVal has only one possible
27  // (integer) value. If that is the case, the value is returned. Otherwise,
28  // returns NULL.
29  // This is an implementation detail. Checkers should use `getKnownValue()`
30  // instead.
31  static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
32
33  // Helper function that returns the value stored in a nonloc::ConcreteInt or
34  // loc::ConcreteInt.
35  static const llvm::APSInt *getConcreteValue(SVal V);
36
37  // With one `simplifySValOnce` call, a compound symbols might collapse to
38  // simpler symbol tree that is still possible to further simplify. Thus, we
39  // do the simplification on a new symbol tree until we reach the simplest
40  // form, i.e. the fixpoint.
41  // Consider the following symbol `(b * b) * b * b` which has this tree:
42  //       *
43  //      / \
44  //     *   b
45  //    /  \
46  //   /    b
47  // (b * b)
48  // Now, if the `b * b == 1` new constraint is added then during the first
49  // iteration we have the following transformations:
50  //       *                  *
51  //      / \                / \
52  //     *   b     -->      b   b
53  //    /  \
54  //   /    b
55  //  1
56  // We need another iteration to reach the final result `1`.
57  SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
58
59  // Recursively descends into symbolic expressions and replaces symbols
60  // with their known values (in the sense of the getConstValue() method).
61  // We traverse the symbol tree and query the constraint values for the
62  // sub-trees and if a value is a constant we do the constant folding.
63  SVal simplifySValOnce(ProgramStateRef State, SVal V);
64
65public:
66  SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
67                    ProgramStateManager &stateMgr)
68      : SValBuilder(alloc, context, stateMgr) {}
69  ~SimpleSValBuilder() override {}
70
71  SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
72                   NonLoc lhs, NonLoc rhs, QualType resultTy) override;
73  SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
74                   Loc lhs, Loc rhs, QualType resultTy) override;
75  SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
76                   Loc lhs, NonLoc rhs, QualType resultTy) override;
77
78  /// Evaluates a given SVal by recursively evaluating and
79  /// simplifying the children SVals. If the SVal has only one possible
80  /// (integer) value, that value is returned. Otherwise, returns NULL.
81  const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
82
83  /// Evaluates a given SVal by recursively evaluating and simplifying the
84  /// children SVals, then returns its minimal possible (integer) value. If the
85  /// constraint manager cannot provide a meaningful answer, this returns NULL.
86  const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override;
87
88  /// Evaluates a given SVal by recursively evaluating and simplifying the
89  /// children SVals, then returns its maximal possible (integer) value. If the
90  /// constraint manager cannot provide a meaningful answer, this returns NULL.
91  const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override;
92
93  SVal simplifySVal(ProgramStateRef State, SVal V) override;
94
95  SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
96                     const llvm::APSInt &RHS, QualType resultTy);
97};
98} // end anonymous namespace
99
100SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
101                                           ASTContext &context,
102                                           ProgramStateManager &stateMgr) {
103  return new SimpleSValBuilder(alloc, context, stateMgr);
104}
105
106// Checks if the negation the value and flipping sign preserve
107// the semantics on the operation in the resultType
108static bool isNegationValuePreserving(const llvm::APSInt &Value,
109                                      APSIntType ResultType) {
110  const unsigned ValueBits = Value.getSignificantBits();
111  if (ValueBits == ResultType.getBitWidth()) {
112    // The value is the lowest negative value that is representable
113    // in signed integer with bitWith of result type. The
114    // negation is representable if resultType is unsigned.
115    return ResultType.isUnsigned();
116  }
117
118  // If resultType bitWith is higher that number of bits required
119  // to represent RHS, the sign flip produce same value.
120  return ValueBits < ResultType.getBitWidth();
121}
122
123//===----------------------------------------------------------------------===//
124// Transfer function for binary operators.
125//===----------------------------------------------------------------------===//
126
127SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
128                                    BinaryOperator::Opcode op,
129                                    const llvm::APSInt &RHS,
130                                    QualType resultTy) {
131  bool isIdempotent = false;
132
133  // Check for a few special cases with known reductions first.
134  switch (op) {
135  default:
136    // We can't reduce this case; just treat it normally.
137    break;
138  case BO_Mul:
139    // a*0 and a*1
140    if (RHS == 0)
141      return makeIntVal(0, resultTy);
142    else if (RHS == 1)
143      isIdempotent = true;
144    break;
145  case BO_Div:
146    // a/0 and a/1
147    if (RHS == 0)
148      // This is also handled elsewhere.
149      return UndefinedVal();
150    else if (RHS == 1)
151      isIdempotent = true;
152    break;
153  case BO_Rem:
154    // a%0 and a%1
155    if (RHS == 0)
156      // This is also handled elsewhere.
157      return UndefinedVal();
158    else if (RHS == 1)
159      return makeIntVal(0, resultTy);
160    break;
161  case BO_Add:
162  case BO_Sub:
163  case BO_Shl:
164  case BO_Shr:
165  case BO_Xor:
166    // a+0, a-0, a<<0, a>>0, a^0
167    if (RHS == 0)
168      isIdempotent = true;
169    break;
170  case BO_And:
171    // a&0 and a&(~0)
172    if (RHS == 0)
173      return makeIntVal(0, resultTy);
174    else if (RHS.isAllOnes())
175      isIdempotent = true;
176    break;
177  case BO_Or:
178    // a|0 and a|(~0)
179    if (RHS == 0)
180      isIdempotent = true;
181    else if (RHS.isAllOnes()) {
182      const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
183      return nonloc::ConcreteInt(Result);
184    }
185    break;
186  }
187
188  // Idempotent ops (like a*1) can still change the type of an expression.
189  // Wrap the LHS up in a NonLoc again and let evalCast do the
190  // dirty work.
191  if (isIdempotent)
192    return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
193
194  // If we reach this point, the expression cannot be simplified.
195  // Make a SymbolVal for the entire expression, after converting the RHS.
196  const llvm::APSInt *ConvertedRHS = &RHS;
197  if (BinaryOperator::isComparisonOp(op)) {
198    // We're looking for a type big enough to compare the symbolic value
199    // with the given constant.
200    // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
201    ASTContext &Ctx = getContext();
202    QualType SymbolType = LHS->getType();
203    uint64_t ValWidth = RHS.getBitWidth();
204    uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
205
206    if (ValWidth < TypeWidth) {
207      // If the value is too small, extend it.
208      ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
209    } else if (ValWidth == TypeWidth) {
210      // If the value is signed but the symbol is unsigned, do the comparison
211      // in unsigned space. [C99 6.3.1.8]
212      // (For the opposite case, the value is already unsigned.)
213      if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
214        ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
215    }
216  } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) {
217    // Change a+(-N) into a-N, and a-(-N) into a+N
218    // Adjust addition/subtraction of negative value, to
219    // subtraction/addition of the negated value.
220    APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy);
221    if (isNegationValuePreserving(RHS, resultIntTy)) {
222      ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS));
223      op = (op == BO_Add) ? BO_Sub : BO_Add;
224    } else {
225      ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
226    }
227  } else
228    ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
229
230  return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
231}
232
233// See if Sym is known to be a relation Rel with Bound.
234static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
235                         llvm::APSInt Bound, ProgramStateRef State) {
236  SValBuilder &SVB = State->getStateManager().getSValBuilder();
237  SVal Result =
238      SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
239                      nonloc::ConcreteInt(Bound), SVB.getConditionType());
240  if (auto DV = Result.getAs<DefinedSVal>()) {
241    return !State->assume(*DV, false);
242  }
243  return false;
244}
245
246// See if Sym is known to be within [min/4, max/4], where min and max
247// are the bounds of the symbol's integral type. With such symbols,
248// some manipulations can be performed without the risk of overflow.
249// assume() doesn't cause infinite recursion because we should be dealing
250// with simpler symbols on every recursive call.
251static bool isWithinConstantOverflowBounds(SymbolRef Sym,
252                                           ProgramStateRef State) {
253  SValBuilder &SVB = State->getStateManager().getSValBuilder();
254  BasicValueFactory &BV = SVB.getBasicValueFactory();
255
256  QualType T = Sym->getType();
257  assert(T->isSignedIntegerOrEnumerationType() &&
258         "This only works with signed integers!");
259  APSIntType AT = BV.getAPSIntType(T);
260
261  llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
262  return isInRelation(BO_LE, Sym, Max, State) &&
263         isInRelation(BO_GE, Sym, Min, State);
264}
265
266// Same for the concrete integers: see if I is within [min/4, max/4].
267static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
268  APSIntType AT(I);
269  assert(!AT.isUnsigned() &&
270         "This only works with signed integers!");
271
272  llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
273  return (I <= Max) && (I >= -Max);
274}
275
276static std::pair<SymbolRef, llvm::APSInt>
277decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) {
278  if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
279    if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
280      return std::make_pair(SymInt->getLHS(),
281                            (SymInt->getOpcode() == BO_Add) ?
282                            (SymInt->getRHS()) :
283                            (-SymInt->getRHS()));
284
285  // Fail to decompose: "reduce" the problem to the "$x + 0" case.
286  return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
287}
288
289// Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
290// same signed integral type and no overflows occur (which should be checked
291// by the caller).
292static NonLoc doRearrangeUnchecked(ProgramStateRef State,
293                                   BinaryOperator::Opcode Op,
294                                   SymbolRef LSym, llvm::APSInt LInt,
295                                   SymbolRef RSym, llvm::APSInt RInt) {
296  SValBuilder &SVB = State->getStateManager().getSValBuilder();
297  BasicValueFactory &BV = SVB.getBasicValueFactory();
298  SymbolManager &SymMgr = SVB.getSymbolManager();
299
300  QualType SymTy = LSym->getType();
301  assert(SymTy == RSym->getType() &&
302         "Symbols are not of the same type!");
303  assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
304         "Integers are not of the same type as symbols!");
305  assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
306         "Integers are not of the same type as symbols!");
307
308  QualType ResultTy;
309  if (BinaryOperator::isComparisonOp(Op))
310    ResultTy = SVB.getConditionType();
311  else if (BinaryOperator::isAdditiveOp(Op))
312    ResultTy = SymTy;
313  else
314    llvm_unreachable("Operation not suitable for unchecked rearrangement!");
315
316  if (LSym == RSym)
317    return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
318                           nonloc::ConcreteInt(RInt), ResultTy)
319        .castAs<NonLoc>();
320
321  SymbolRef ResultSym = nullptr;
322  BinaryOperator::Opcode ResultOp;
323  llvm::APSInt ResultInt;
324  if (BinaryOperator::isComparisonOp(Op)) {
325    // Prefer comparing to a non-negative number.
326    // FIXME: Maybe it'd be better to have consistency in
327    // "$x - $y" vs. "$y - $x" because those are solver's keys.
328    if (LInt > RInt) {
329      ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
330      ResultOp = BinaryOperator::reverseComparisonOp(Op);
331      ResultInt = LInt - RInt; // Opposite order!
332    } else {
333      ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
334      ResultOp = Op;
335      ResultInt = RInt - LInt; // Opposite order!
336    }
337  } else {
338    ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
339    ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
340    ResultOp = BO_Add;
341    // Bring back the cosmetic difference.
342    if (ResultInt < 0) {
343      ResultInt = -ResultInt;
344      ResultOp = BO_Sub;
345    } else if (ResultInt == 0) {
346      // Shortcut: Simplify "$x + 0" to "$x".
347      return nonloc::SymbolVal(ResultSym);
348    }
349  }
350  const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
351  return nonloc::SymbolVal(
352      SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
353}
354
355// Rearrange if symbol type matches the result type and if the operator is a
356// comparison operator, both symbol and constant must be within constant
357// overflow bounds.
358static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
359                            SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
360  return Sym->getType() == Ty &&
361    (!BinaryOperator::isComparisonOp(Op) ||
362     (isWithinConstantOverflowBounds(Sym, State) &&
363      isWithinConstantOverflowBounds(Int)));
364}
365
366static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
367                                          BinaryOperator::Opcode Op, NonLoc Lhs,
368                                          NonLoc Rhs, QualType ResultTy) {
369  ProgramStateManager &StateMgr = State->getStateManager();
370  SValBuilder &SVB = StateMgr.getSValBuilder();
371
372  // We expect everything to be of the same type - this type.
373  QualType SingleTy;
374
375  // FIXME: After putting complexity threshold to the symbols we can always
376  //        rearrange additive operations but rearrange comparisons only if
377  //        option is set.
378  if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
379    return std::nullopt;
380
381  SymbolRef LSym = Lhs.getAsSymbol();
382  if (!LSym)
383    return std::nullopt;
384
385  if (BinaryOperator::isComparisonOp(Op)) {
386    SingleTy = LSym->getType();
387    if (ResultTy != SVB.getConditionType())
388      return std::nullopt;
389    // Initialize SingleTy later with a symbol's type.
390  } else if (BinaryOperator::isAdditiveOp(Op)) {
391    SingleTy = ResultTy;
392    if (LSym->getType() != SingleTy)
393      return std::nullopt;
394  } else {
395    // Don't rearrange other operations.
396    return std::nullopt;
397  }
398
399  assert(!SingleTy.isNull() && "We should have figured out the type by now!");
400
401  // Rearrange signed symbolic expressions only
402  if (!SingleTy->isSignedIntegerOrEnumerationType())
403    return std::nullopt;
404
405  SymbolRef RSym = Rhs.getAsSymbol();
406  if (!RSym || RSym->getType() != SingleTy)
407    return std::nullopt;
408
409  BasicValueFactory &BV = State->getBasicVals();
410  llvm::APSInt LInt, RInt;
411  std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
412  std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
413  if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
414      !shouldRearrange(State, Op, RSym, RInt, SingleTy))
415    return std::nullopt;
416
417  // We know that no overflows can occur anymore.
418  return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
419}
420
421SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
422                                  BinaryOperator::Opcode op,
423                                  NonLoc lhs, NonLoc rhs,
424                                  QualType resultTy)  {
425  NonLoc InputLHS = lhs;
426  NonLoc InputRHS = rhs;
427
428  // Constraints may have changed since the creation of a bound SVal. Check if
429  // the values can be simplified based on those new constraints.
430  SVal simplifiedLhs = simplifySVal(state, lhs);
431  SVal simplifiedRhs = simplifySVal(state, rhs);
432  if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
433    lhs = *simplifiedLhsAsNonLoc;
434  if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
435    rhs = *simplifiedRhsAsNonLoc;
436
437  // Handle trivial case where left-side and right-side are the same.
438  if (lhs == rhs)
439    switch (op) {
440      default:
441        break;
442      case BO_EQ:
443      case BO_LE:
444      case BO_GE:
445        return makeTruthVal(true, resultTy);
446      case BO_LT:
447      case BO_GT:
448      case BO_NE:
449        return makeTruthVal(false, resultTy);
450      case BO_Xor:
451      case BO_Sub:
452        if (resultTy->isIntegralOrEnumerationType())
453          return makeIntVal(0, resultTy);
454        return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
455                        QualType{});
456      case BO_Or:
457      case BO_And:
458        return evalCast(lhs, resultTy, QualType{});
459    }
460
461  while (true) {
462    switch (lhs.getKind()) {
463    default:
464      return makeSymExprValNN(op, lhs, rhs, resultTy);
465    case nonloc::PointerToMemberKind: {
466      assert(rhs.getKind() == nonloc::PointerToMemberKind &&
467             "Both SVals should have pointer-to-member-type");
468      auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
469           RPTM = rhs.castAs<nonloc::PointerToMember>();
470      auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
471      switch (op) {
472        case BO_EQ:
473          return makeTruthVal(LPTMD == RPTMD, resultTy);
474        case BO_NE:
475          return makeTruthVal(LPTMD != RPTMD, resultTy);
476        default:
477          return UnknownVal();
478      }
479    }
480    case nonloc::LocAsIntegerKind: {
481      Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
482      switch (rhs.getKind()) {
483      case nonloc::LocAsIntegerKind:
484        // FIXME: at the moment the implementation
485        // of modeling "pointers as integers" is not complete.
486        if (!BinaryOperator::isComparisonOp(op))
487          return UnknownVal();
488        return evalBinOpLL(state, op, lhsL,
489                           rhs.castAs<nonloc::LocAsInteger>().getLoc(),
490                           resultTy);
491      case nonloc::ConcreteIntKind: {
492        // FIXME: at the moment the implementation
493        // of modeling "pointers as integers" is not complete.
494        if (!BinaryOperator::isComparisonOp(op))
495          return UnknownVal();
496        // Transform the integer into a location and compare.
497        // FIXME: This only makes sense for comparisons. If we want to, say,
498        // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
499        // then pack it back into a LocAsInteger.
500        llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
501        // If the region has a symbolic base, pay attention to the type; it
502        // might be coming from a non-default address space. For non-symbolic
503        // regions it doesn't matter that much because such comparisons would
504        // most likely evaluate to concrete false anyway. FIXME: We might
505        // still need to handle the non-comparison case.
506        if (SymbolRef lSym = lhs.getAsLocSymbol(true))
507          BasicVals.getAPSIntType(lSym->getType()).apply(i);
508        else
509          BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
510        return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
511      }
512        default:
513          switch (op) {
514            case BO_EQ:
515              return makeTruthVal(false, resultTy);
516            case BO_NE:
517              return makeTruthVal(true, resultTy);
518            default:
519              // This case also handles pointer arithmetic.
520              return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
521          }
522        }
523    }
524    case nonloc::ConcreteIntKind: {
525      llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
526
527      // If we're dealing with two known constants, just perform the operation.
528      if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) {
529        llvm::APSInt RHSValue = *KnownRHSValue;
530        if (BinaryOperator::isComparisonOp(op)) {
531          // We're looking for a type big enough to compare the two values.
532          // FIXME: This is not correct. char + short will result in a promotion
533          // to int. Unfortunately we have lost types by this point.
534          APSIntType CompareType = std::max(APSIntType(LHSValue),
535                                            APSIntType(RHSValue));
536          CompareType.apply(LHSValue);
537          CompareType.apply(RHSValue);
538        } else if (!BinaryOperator::isShiftOp(op)) {
539          APSIntType IntType = BasicVals.getAPSIntType(resultTy);
540          IntType.apply(LHSValue);
541          IntType.apply(RHSValue);
542        }
543
544        const llvm::APSInt *Result =
545          BasicVals.evalAPSInt(op, LHSValue, RHSValue);
546        if (!Result) {
547          if (op == BO_Shl || op == BO_Shr) {
548            // FIXME: At this point the constant folding claims that the result
549            // of a bitwise shift is undefined. However, constant folding
550            // relies on the inaccurate type information that is stored in the
551            // bit size of APSInt objects, and if we reached this point, then
552            // the checker core.BitwiseShift already determined that the shift
553            // is valid (in a PreStmt callback, by querying the real type from
554            // the AST node).
555            // To avoid embarrassing false positives, let's just say that we
556            // don't know anything about the result of the shift.
557            return UnknownVal();
558          }
559          return UndefinedVal();
560        }
561
562        return nonloc::ConcreteInt(*Result);
563      }
564
565      // Swap the left and right sides and flip the operator if doing so
566      // allows us to better reason about the expression (this is a form
567      // of expression canonicalization).
568      // While we're at it, catch some special cases for non-commutative ops.
569      switch (op) {
570      case BO_LT:
571      case BO_GT:
572      case BO_LE:
573      case BO_GE:
574        op = BinaryOperator::reverseComparisonOp(op);
575        [[fallthrough]];
576      case BO_EQ:
577      case BO_NE:
578      case BO_Add:
579      case BO_Mul:
580      case BO_And:
581      case BO_Xor:
582      case BO_Or:
583        std::swap(lhs, rhs);
584        continue;
585      case BO_Shr:
586        // (~0)>>a
587        if (LHSValue.isAllOnes() && LHSValue.isSigned())
588          return evalCast(lhs, resultTy, QualType{});
589        [[fallthrough]];
590      case BO_Shl:
591        // 0<<a and 0>>a
592        if (LHSValue == 0)
593          return evalCast(lhs, resultTy, QualType{});
594        return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
595      case BO_Div:
596        // 0 / x == 0
597      case BO_Rem:
598        // 0 % x == 0
599        if (LHSValue == 0)
600          return makeZeroVal(resultTy);
601        [[fallthrough]];
602      default:
603        return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
604      }
605    }
606    case nonloc::SymbolValKind: {
607      // We only handle LHS as simple symbols or SymIntExprs.
608      SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
609
610      // LHS is a symbolic expression.
611      if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
612
613        // Is this a logical not? (!x is represented as x == 0.)
614        if (op == BO_EQ && rhs.isZeroConstant()) {
615          // We know how to negate certain expressions. Simplify them here.
616
617          BinaryOperator::Opcode opc = symIntExpr->getOpcode();
618          switch (opc) {
619          default:
620            // We don't know how to negate this operation.
621            // Just handle it as if it were a normal comparison to 0.
622            break;
623          case BO_LAnd:
624          case BO_LOr:
625            llvm_unreachable("Logical operators handled by branching logic.");
626          case BO_Assign:
627          case BO_MulAssign:
628          case BO_DivAssign:
629          case BO_RemAssign:
630          case BO_AddAssign:
631          case BO_SubAssign:
632          case BO_ShlAssign:
633          case BO_ShrAssign:
634          case BO_AndAssign:
635          case BO_XorAssign:
636          case BO_OrAssign:
637          case BO_Comma:
638            llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
639          case BO_PtrMemD:
640          case BO_PtrMemI:
641            llvm_unreachable("Pointer arithmetic not handled here.");
642          case BO_LT:
643          case BO_GT:
644          case BO_LE:
645          case BO_GE:
646          case BO_EQ:
647          case BO_NE:
648            assert(resultTy->isBooleanType() ||
649                   resultTy == getConditionType());
650            assert(symIntExpr->getType()->isBooleanType() ||
651                   getContext().hasSameUnqualifiedType(symIntExpr->getType(),
652                                                       getConditionType()));
653            // Negate the comparison and make a value.
654            opc = BinaryOperator::negateComparisonOp(opc);
655            return makeNonLoc(symIntExpr->getLHS(), opc,
656                symIntExpr->getRHS(), resultTy);
657          }
658        }
659
660        // For now, only handle expressions whose RHS is a constant.
661        if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) {
662          // If both the LHS and the current expression are additive,
663          // fold their constants and try again.
664          if (BinaryOperator::isAdditiveOp(op)) {
665            BinaryOperator::Opcode lop = symIntExpr->getOpcode();
666            if (BinaryOperator::isAdditiveOp(lop)) {
667              // Convert the two constants to a common type, then combine them.
668
669              // resultTy may not be the best type to convert to, but it's
670              // probably the best choice in expressions with mixed type
671              // (such as x+1U+2LL). The rules for implicit conversions should
672              // choose a reasonable type to preserve the expression, and will
673              // at least match how the value is going to be used.
674              APSIntType IntType = BasicVals.getAPSIntType(resultTy);
675              const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
676              const llvm::APSInt &second = IntType.convert(*RHSValue);
677
678              // If the op and lop agrees, then we just need to
679              // sum the constants. Otherwise, we change to operation
680              // type if substraction would produce negative value
681              // (and cause overflow for unsigned integers),
682              // as consequence x+1U-10 produces x-9U, instead
683              // of x+4294967287U, that would be produced without this
684              // additional check.
685              const llvm::APSInt *newRHS;
686              if (lop == op) {
687                newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
688              } else if (first >= second) {
689                newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
690                op = lop;
691              } else {
692                newRHS = BasicVals.evalAPSInt(BO_Sub, second, first);
693              }
694
695              assert(newRHS && "Invalid operation despite common type!");
696              rhs = nonloc::ConcreteInt(*newRHS);
697              lhs = nonloc::SymbolVal(symIntExpr->getLHS());
698              continue;
699            }
700          }
701
702          // Otherwise, make a SymIntExpr out of the expression.
703          return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
704        }
705      }
706
707      // Is the RHS a constant?
708      if (const llvm::APSInt *RHSValue = getConstValue(state, rhs))
709        return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
710
711      if (std::optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
712        return *V;
713
714      // Give up -- this is not a symbolic expression we can handle.
715      return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
716    }
717    }
718  }
719}
720
721static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
722                                            const FieldRegion *RightFR,
723                                            BinaryOperator::Opcode op,
724                                            QualType resultTy,
725                                            SimpleSValBuilder &SVB) {
726  // Only comparisons are meaningful here!
727  if (!BinaryOperator::isComparisonOp(op))
728    return UnknownVal();
729
730  // Next, see if the two FRs have the same super-region.
731  // FIXME: This doesn't handle casts yet, and simply stripping the casts
732  // doesn't help.
733  if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
734    return UnknownVal();
735
736  const FieldDecl *LeftFD = LeftFR->getDecl();
737  const FieldDecl *RightFD = RightFR->getDecl();
738  const RecordDecl *RD = LeftFD->getParent();
739
740  // Make sure the two FRs are from the same kind of record. Just in case!
741  // FIXME: This is probably where inheritance would be a problem.
742  if (RD != RightFD->getParent())
743    return UnknownVal();
744
745  // We know for sure that the two fields are not the same, since that
746  // would have given us the same SVal.
747  if (op == BO_EQ)
748    return SVB.makeTruthVal(false, resultTy);
749  if (op == BO_NE)
750    return SVB.makeTruthVal(true, resultTy);
751
752  // Iterate through the fields and see which one comes first.
753  // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
754  // members and the units in which bit-fields reside have addresses that
755  // increase in the order in which they are declared."
756  bool leftFirst = (op == BO_LT || op == BO_LE);
757  for (const auto *I : RD->fields()) {
758    if (I == LeftFD)
759      return SVB.makeTruthVal(leftFirst, resultTy);
760    if (I == RightFD)
761      return SVB.makeTruthVal(!leftFirst, resultTy);
762  }
763
764  llvm_unreachable("Fields not found in parent record's definition");
765}
766
767// This is used in debug builds only for now because some downstream users
768// may hit this assert in their subsequent merges.
769// There are still places in the analyzer where equal bitwidth Locs
770// are compared, and need to be found and corrected. Recent previous fixes have
771// addressed the known problems of making NULLs with specific bitwidths
772// for Loc comparisons along with deprecation of APIs for the same purpose.
773//
774static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
775                                 Loc LhsLoc) {
776  // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
777  ASTContext &Ctx = State->getStateManager().getContext();
778  uint64_t RhsBitwidth =
779      RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
780  uint64_t LhsBitwidth =
781      LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
782  if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) {
783    assert(RhsBitwidth == LhsBitwidth &&
784           "RhsLoc and LhsLoc bitwidth must be same!");
785  }
786}
787
788// FIXME: all this logic will change if/when we have MemRegion::getLocation().
789SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
790                                  BinaryOperator::Opcode op,
791                                  Loc lhs, Loc rhs,
792                                  QualType resultTy) {
793
794  // Assert that bitwidth of lhs and rhs are the same.
795  // This can happen if two different address spaces are used,
796  // and the bitwidths of the address spaces are different.
797  // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
798  // FIXME: See comment above in the function assertEqualBitWidths
799  assertEqualBitWidths(state, rhs, lhs);
800
801  // Only comparisons and subtractions are valid operations on two pointers.
802  // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
803  // However, if a pointer is casted to an integer, evalBinOpNN may end up
804  // calling this function with another operation (PR7527). We don't attempt to
805  // model this for now, but it could be useful, particularly when the
806  // "location" is actually an integer value that's been passed through a void*.
807  if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
808    return UnknownVal();
809
810  // Special cases for when both sides are identical.
811  if (lhs == rhs) {
812    switch (op) {
813    default:
814      llvm_unreachable("Unimplemented operation for two identical values");
815    case BO_Sub:
816      return makeZeroVal(resultTy);
817    case BO_EQ:
818    case BO_LE:
819    case BO_GE:
820      return makeTruthVal(true, resultTy);
821    case BO_NE:
822    case BO_LT:
823    case BO_GT:
824      return makeTruthVal(false, resultTy);
825    }
826  }
827
828  switch (lhs.getKind()) {
829  default:
830    llvm_unreachable("Ordering not implemented for this Loc.");
831
832  case loc::GotoLabelKind:
833    // The only thing we know about labels is that they're non-null.
834    if (rhs.isZeroConstant()) {
835      switch (op) {
836      default:
837        break;
838      case BO_Sub:
839        return evalCast(lhs, resultTy, QualType{});
840      case BO_EQ:
841      case BO_LE:
842      case BO_LT:
843        return makeTruthVal(false, resultTy);
844      case BO_NE:
845      case BO_GT:
846      case BO_GE:
847        return makeTruthVal(true, resultTy);
848      }
849    }
850    // There may be two labels for the same location, and a function region may
851    // have the same address as a label at the start of the function (depending
852    // on the ABI).
853    // FIXME: we can probably do a comparison against other MemRegions, though.
854    // FIXME: is there a way to tell if two labels refer to the same location?
855    return UnknownVal();
856
857  case loc::ConcreteIntKind: {
858    auto L = lhs.castAs<loc::ConcreteInt>();
859
860    // If one of the operands is a symbol and the other is a constant,
861    // build an expression for use by the constraint manager.
862    if (SymbolRef rSym = rhs.getAsLocSymbol()) {
863      // We can only build expressions with symbols on the left,
864      // so we need a reversible operator.
865      if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
866        return UnknownVal();
867
868      op = BinaryOperator::reverseComparisonOp(op);
869      return makeNonLoc(rSym, op, L.getValue(), resultTy);
870    }
871
872    // If both operands are constants, just perform the operation.
873    if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
874      assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
875
876      if (const auto *ResultInt =
877              BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue()))
878        return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
879      return UnknownVal();
880    }
881
882    // Special case comparisons against NULL.
883    // This must come after the test if the RHS is a symbol, which is used to
884    // build constraints. The address of any non-symbolic region is guaranteed
885    // to be non-NULL, as is any label.
886    assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
887    if (lhs.isZeroConstant()) {
888      switch (op) {
889      default:
890        break;
891      case BO_EQ:
892      case BO_GT:
893      case BO_GE:
894        return makeTruthVal(false, resultTy);
895      case BO_NE:
896      case BO_LT:
897      case BO_LE:
898        return makeTruthVal(true, resultTy);
899      }
900    }
901
902    // Comparing an arbitrary integer to a region or label address is
903    // completely unknowable.
904    return UnknownVal();
905  }
906  case loc::MemRegionValKind: {
907    if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
908      // If one of the operands is a symbol and the other is a constant,
909      // build an expression for use by the constraint manager.
910      if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
911        if (BinaryOperator::isComparisonOp(op))
912          return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
913        return UnknownVal();
914      }
915      // Special case comparisons to NULL.
916      // This must come after the test if the LHS is a symbol, which is used to
917      // build constraints. The address of any non-symbolic region is guaranteed
918      // to be non-NULL.
919      if (rInt->isZeroConstant()) {
920        if (op == BO_Sub)
921          return evalCast(lhs, resultTy, QualType{});
922
923        if (BinaryOperator::isComparisonOp(op)) {
924          QualType boolType = getContext().BoolTy;
925          NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
926          NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
927          return evalBinOpNN(state, op, l, r, resultTy);
928        }
929      }
930
931      // Comparing a region to an arbitrary integer is completely unknowable.
932      return UnknownVal();
933    }
934
935    // Get both values as regions, if possible.
936    const MemRegion *LeftMR = lhs.getAsRegion();
937    assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
938
939    const MemRegion *RightMR = rhs.getAsRegion();
940    if (!RightMR)
941      // The RHS is probably a label, which in theory could address a region.
942      // FIXME: we can probably make a more useful statement about non-code
943      // regions, though.
944      return UnknownVal();
945
946    const MemRegion *LeftBase = LeftMR->getBaseRegion();
947    const MemRegion *RightBase = RightMR->getBaseRegion();
948    const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
949    const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
950    const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
951
952    // If the two regions are from different known memory spaces they cannot be
953    // equal. Also, assume that no symbolic region (whose memory space is
954    // unknown) is on the stack.
955    if (LeftMS != RightMS &&
956        ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
957         (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
958      switch (op) {
959      default:
960        return UnknownVal();
961      case BO_EQ:
962        return makeTruthVal(false, resultTy);
963      case BO_NE:
964        return makeTruthVal(true, resultTy);
965      }
966    }
967
968    // If both values wrap regions, see if they're from different base regions.
969    // Note, heap base symbolic regions are assumed to not alias with
970    // each other; for example, we assume that malloc returns different address
971    // on each invocation.
972    // FIXME: ObjC object pointers always reside on the heap, but currently
973    // we treat their memory space as unknown, because symbolic pointers
974    // to ObjC objects may alias. There should be a way to construct
975    // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
976    // guesses memory space for ObjC object pointers manually instead of
977    // relying on us.
978    if (LeftBase != RightBase &&
979        ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
980         (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
981      switch (op) {
982      default:
983        return UnknownVal();
984      case BO_EQ:
985        return makeTruthVal(false, resultTy);
986      case BO_NE:
987        return makeTruthVal(true, resultTy);
988      }
989    }
990
991    // Handle special cases for when both regions are element regions.
992    const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
993    const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
994    if (RightER && LeftER) {
995      // Next, see if the two ERs have the same super-region and matching types.
996      // FIXME: This should do something useful even if the types don't match,
997      // though if both indexes are constant the RegionRawOffset path will
998      // give the correct answer.
999      if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1000          LeftER->getElementType() == RightER->getElementType()) {
1001        // Get the left index and cast it to the correct type.
1002        // If the index is unknown or undefined, bail out here.
1003        SVal LeftIndexVal = LeftER->getIndex();
1004        std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1005        if (!LeftIndex)
1006          return UnknownVal();
1007        LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
1008        LeftIndex = LeftIndexVal.getAs<NonLoc>();
1009        if (!LeftIndex)
1010          return UnknownVal();
1011
1012        // Do the same for the right index.
1013        SVal RightIndexVal = RightER->getIndex();
1014        std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1015        if (!RightIndex)
1016          return UnknownVal();
1017        RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
1018        RightIndex = RightIndexVal.getAs<NonLoc>();
1019        if (!RightIndex)
1020          return UnknownVal();
1021
1022        // Actually perform the operation.
1023        // evalBinOpNN expects the two indexes to already be the right type.
1024        return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
1025      }
1026    }
1027
1028    // Special handling of the FieldRegions, even with symbolic offsets.
1029    const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1030    const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1031    if (RightFR && LeftFR) {
1032      SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1033                                               *this);
1034      if (!R.isUnknown())
1035        return R;
1036    }
1037
1038    // Compare the regions using the raw offsets.
1039    RegionOffset LeftOffset = LeftMR->getAsOffset();
1040    RegionOffset RightOffset = RightMR->getAsOffset();
1041
1042    if (LeftOffset.getRegion() != nullptr &&
1043        LeftOffset.getRegion() == RightOffset.getRegion() &&
1044        !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1045      int64_t left = LeftOffset.getOffset();
1046      int64_t right = RightOffset.getOffset();
1047
1048      switch (op) {
1049        default:
1050          return UnknownVal();
1051        case BO_LT:
1052          return makeTruthVal(left < right, resultTy);
1053        case BO_GT:
1054          return makeTruthVal(left > right, resultTy);
1055        case BO_LE:
1056          return makeTruthVal(left <= right, resultTy);
1057        case BO_GE:
1058          return makeTruthVal(left >= right, resultTy);
1059        case BO_EQ:
1060          return makeTruthVal(left == right, resultTy);
1061        case BO_NE:
1062          return makeTruthVal(left != right, resultTy);
1063      }
1064    }
1065
1066    // At this point we're not going to get a good answer, but we can try
1067    // conjuring an expression instead.
1068    SymbolRef LHSSym = lhs.getAsLocSymbol();
1069    SymbolRef RHSSym = rhs.getAsLocSymbol();
1070    if (LHSSym && RHSSym)
1071      return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1072
1073    // If we get here, we have no way of comparing the regions.
1074    return UnknownVal();
1075  }
1076  }
1077}
1078
1079SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1080                                    BinaryOperator::Opcode op, Loc lhs,
1081                                    NonLoc rhs, QualType resultTy) {
1082  if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1083    if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1084      if (PTMSV->isNullMemberPointer())
1085        return UndefinedVal();
1086
1087      auto getFieldLValue = [&](const auto *FD) -> SVal {
1088        SVal Result = lhs;
1089
1090        for (const auto &I : *PTMSV)
1091          Result = StateMgr.getStoreManager().evalDerivedToBase(
1092              Result, I->getType(), I->isVirtual());
1093
1094        return state->getLValue(FD, Result);
1095      };
1096
1097      if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1098        return getFieldLValue(FD);
1099      }
1100      if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1101        return getFieldLValue(FD);
1102      }
1103    }
1104
1105    return rhs;
1106  }
1107
1108  assert(!BinaryOperator::isComparisonOp(op) &&
1109         "arguments to comparison ops must be of the same type");
1110
1111  // Special case: rhs is a zero constant.
1112  if (rhs.isZeroConstant())
1113    return lhs;
1114
1115  // Perserve the null pointer so that it can be found by the DerefChecker.
1116  if (lhs.isZeroConstant())
1117    return lhs;
1118
1119  // We are dealing with pointer arithmetic.
1120
1121  // Handle pointer arithmetic on constant values.
1122  if (std::optional<nonloc::ConcreteInt> rhsInt =
1123          rhs.getAs<nonloc::ConcreteInt>()) {
1124    if (std::optional<loc::ConcreteInt> lhsInt =
1125            lhs.getAs<loc::ConcreteInt>()) {
1126      const llvm::APSInt &leftI = lhsInt->getValue();
1127      assert(leftI.isUnsigned());
1128      llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1129
1130      // Convert the bitwidth of rightI.  This should deal with overflow
1131      // since we are dealing with concrete values.
1132      rightI = rightI.extOrTrunc(leftI.getBitWidth());
1133
1134      // Offset the increment by the pointer size.
1135      llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1136      QualType pointeeType = resultTy->getPointeeType();
1137      Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1138      rightI *= Multiplicand;
1139
1140      // Compute the adjusted pointer.
1141      switch (op) {
1142        case BO_Add:
1143          rightI = leftI + rightI;
1144          break;
1145        case BO_Sub:
1146          rightI = leftI - rightI;
1147          break;
1148        default:
1149          llvm_unreachable("Invalid pointer arithmetic operation");
1150      }
1151      return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1152    }
1153  }
1154
1155  // Handle cases where 'lhs' is a region.
1156  if (const MemRegion *region = lhs.getAsRegion()) {
1157    rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1158    SVal index = UnknownVal();
1159    const SubRegion *superR = nullptr;
1160    // We need to know the type of the pointer in order to add an integer to it.
1161    // Depending on the type, different amount of bytes is added.
1162    QualType elementType;
1163
1164    if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1165      assert(op == BO_Add || op == BO_Sub);
1166      index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1167                          getArrayIndexType());
1168      superR = cast<SubRegion>(elemReg->getSuperRegion());
1169      elementType = elemReg->getElementType();
1170    }
1171    else if (isa<SubRegion>(region)) {
1172      assert(op == BO_Add || op == BO_Sub);
1173      index = (op == BO_Add) ? rhs : evalMinus(rhs);
1174      superR = cast<SubRegion>(region);
1175      // TODO: Is this actually reliable? Maybe improving our MemRegion
1176      // hierarchy to provide typed regions for all non-void pointers would be
1177      // better. For instance, we cannot extend this towards LocAsInteger
1178      // operations, where result type of the expression is integer.
1179      if (resultTy->isAnyPointerType())
1180        elementType = resultTy->getPointeeType();
1181    }
1182
1183    // Represent arithmetic on void pointers as arithmetic on char pointers.
1184    // It is fine when a TypedValueRegion of char value type represents
1185    // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1186    if (elementType->isVoidType())
1187      elementType = getContext().CharTy;
1188
1189    if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1190      return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1191                                                       superR, getContext()));
1192    }
1193  }
1194  return UnknownVal();
1195}
1196
1197const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1198                                                     SVal V) {
1199  if (const llvm::APSInt *Res = getConcreteValue(V))
1200    return Res;
1201
1202  if (SymbolRef Sym = V.getAsSymbol())
1203    return state->getConstraintManager().getSymVal(state, Sym);
1204
1205  return nullptr;
1206}
1207
1208const llvm::APSInt *SimpleSValBuilder::getConcreteValue(SVal V) {
1209  if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1210    return &X->getValue();
1211
1212  if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1213    return &X->getValue();
1214
1215  return nullptr;
1216}
1217
1218const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1219                                                     SVal V) {
1220  return getConstValue(state, simplifySVal(state, V));
1221}
1222
1223const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state,
1224                                                   SVal V) {
1225  V = simplifySVal(state, V);
1226
1227  if (const llvm::APSInt *Res = getConcreteValue(V))
1228    return Res;
1229
1230  if (SymbolRef Sym = V.getAsSymbol())
1231    return state->getConstraintManager().getSymMinVal(state, Sym);
1232
1233  return nullptr;
1234}
1235
1236const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state,
1237                                                   SVal V) {
1238  V = simplifySVal(state, V);
1239
1240  if (const llvm::APSInt *Res = getConcreteValue(V))
1241    return Res;
1242
1243  if (SymbolRef Sym = V.getAsSymbol())
1244    return state->getConstraintManager().getSymMaxVal(state, Sym);
1245
1246  return nullptr;
1247}
1248
1249SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1250  SVal SimplifiedVal = simplifySValOnce(State, Val);
1251  while (SimplifiedVal != Val) {
1252    Val = SimplifiedVal;
1253    SimplifiedVal = simplifySValOnce(State, Val);
1254  }
1255  return SimplifiedVal;
1256}
1257
1258SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1259  return simplifyUntilFixpoint(State, V);
1260}
1261
1262SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1263  // For now, this function tries to constant-fold symbols inside a
1264  // nonloc::SymbolVal, and does nothing else. More simplifications should
1265  // be possible, such as constant-folding an index in an ElementRegion.
1266
1267  class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1268    ProgramStateRef State;
1269    SValBuilder &SVB;
1270
1271    // Cache results for the lifetime of the Simplifier. Results change every
1272    // time new constraints are added to the program state, which is the whole
1273    // point of simplifying, and for that very reason it's pointless to maintain
1274    // the same cache for the duration of the whole analysis.
1275    llvm::DenseMap<SymbolRef, SVal> Cached;
1276
1277    static bool isUnchanged(SymbolRef Sym, SVal Val) {
1278      return Sym == Val.getAsSymbol();
1279    }
1280
1281    SVal cache(SymbolRef Sym, SVal V) {
1282      Cached[Sym] = V;
1283      return V;
1284    }
1285
1286    SVal skip(SymbolRef Sym) {
1287      return cache(Sym, SVB.makeSymbolVal(Sym));
1288    }
1289
1290    // Return the known const value for the Sym if available, or return Undef
1291    // otherwise.
1292    SVal getConst(SymbolRef Sym) {
1293      const llvm::APSInt *Const =
1294          State->getConstraintManager().getSymVal(State, Sym);
1295      if (Const)
1296        return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1297                                              : (SVal)SVB.makeIntVal(*Const);
1298      return UndefinedVal();
1299    }
1300
1301    SVal getConstOrVisit(SymbolRef Sym) {
1302      const SVal Ret = getConst(Sym);
1303      if (Ret.isUndef())
1304        return Visit(Sym);
1305      return Ret;
1306    }
1307
1308  public:
1309    Simplifier(ProgramStateRef State)
1310        : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1311
1312    SVal VisitSymbolData(const SymbolData *S) {
1313      // No cache here.
1314      if (const llvm::APSInt *I =
1315              State->getConstraintManager().getSymVal(State, S))
1316        return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1317                                            : (SVal)SVB.makeIntVal(*I);
1318      return SVB.makeSymbolVal(S);
1319    }
1320
1321    SVal VisitSymIntExpr(const SymIntExpr *S) {
1322      auto I = Cached.find(S);
1323      if (I != Cached.end())
1324        return I->second;
1325
1326      SVal LHS = getConstOrVisit(S->getLHS());
1327      if (isUnchanged(S->getLHS(), LHS))
1328        return skip(S);
1329
1330      SVal RHS;
1331      // By looking at the APSInt in the right-hand side of S, we cannot
1332      // figure out if it should be treated as a Loc or as a NonLoc.
1333      // So make our guess by recalling that we cannot multiply pointers
1334      // or compare a pointer to an integer.
1335      if (Loc::isLocType(S->getLHS()->getType()) &&
1336          BinaryOperator::isComparisonOp(S->getOpcode())) {
1337        // The usual conversion of $sym to &SymRegion{$sym}, as they have
1338        // the same meaning for Loc-type symbols, but the latter form
1339        // is preferred in SVal computations for being Loc itself.
1340        if (SymbolRef Sym = LHS.getAsSymbol()) {
1341          assert(Loc::isLocType(Sym->getType()));
1342          LHS = SVB.makeLoc(Sym);
1343        }
1344        RHS = SVB.makeIntLocVal(S->getRHS());
1345      } else {
1346        RHS = SVB.makeIntVal(S->getRHS());
1347      }
1348
1349      return cache(
1350          S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1351    }
1352
1353    SVal VisitIntSymExpr(const IntSymExpr *S) {
1354      auto I = Cached.find(S);
1355      if (I != Cached.end())
1356        return I->second;
1357
1358      SVal RHS = getConstOrVisit(S->getRHS());
1359      if (isUnchanged(S->getRHS(), RHS))
1360        return skip(S);
1361
1362      SVal LHS = SVB.makeIntVal(S->getLHS());
1363      return cache(
1364          S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1365    }
1366
1367    SVal VisitSymSymExpr(const SymSymExpr *S) {
1368      auto I = Cached.find(S);
1369      if (I != Cached.end())
1370        return I->second;
1371
1372      // For now don't try to simplify mixed Loc/NonLoc expressions
1373      // because they often appear from LocAsInteger operations
1374      // and we don't know how to combine a LocAsInteger
1375      // with a concrete value.
1376      if (Loc::isLocType(S->getLHS()->getType()) !=
1377          Loc::isLocType(S->getRHS()->getType()))
1378        return skip(S);
1379
1380      SVal LHS = getConstOrVisit(S->getLHS());
1381      SVal RHS = getConstOrVisit(S->getRHS());
1382
1383      if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1384        return skip(S);
1385
1386      return cache(
1387          S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1388    }
1389
1390    SVal VisitSymbolCast(const SymbolCast *S) {
1391      auto I = Cached.find(S);
1392      if (I != Cached.end())
1393        return I->second;
1394      const SymExpr *OpSym = S->getOperand();
1395      SVal OpVal = getConstOrVisit(OpSym);
1396      if (isUnchanged(OpSym, OpVal))
1397        return skip(S);
1398
1399      return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType()));
1400    }
1401
1402    SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1403      auto I = Cached.find(S);
1404      if (I != Cached.end())
1405        return I->second;
1406      SVal Op = getConstOrVisit(S->getOperand());
1407      if (isUnchanged(S->getOperand(), Op))
1408        return skip(S);
1409
1410      return cache(
1411          S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType()));
1412    }
1413
1414    SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1415
1416    SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1417
1418    SVal VisitSymbolVal(nonloc::SymbolVal V) {
1419      // Simplification is much more costly than computing complexity.
1420      // For high complexity, it may be not worth it.
1421      return Visit(V.getSymbol());
1422    }
1423
1424    SVal VisitSVal(SVal V) { return V; }
1425  };
1426
1427  SVal SimplifiedV = Simplifier(State).Visit(V);
1428  return SimplifiedV;
1429}
1430