Reassociate.cpp revision 285830
1296417Sdim//===- Reassociate.cpp - Reassociate binary expressions -------------------===// 2283627Sdim// 3283627Sdim// The LLVM Compiler Infrastructure 4283627Sdim// 5283627Sdim// This file is distributed under the University of Illinois Open Source 6283627Sdim// License. See LICENSE.TXT for details. 7283627Sdim// 8283627Sdim//===----------------------------------------------------------------------===// 9283627Sdim// 10283627Sdim// This pass reassociates commutative expressions in an order that is designed 11283627Sdim// to promote better constant propagation, GCSE, LICM, PRE, etc. 12283627Sdim// 13283627Sdim// For example: 4 + (x + 5) -> x + (4 + 5) 14283627Sdim// 15283627Sdim// In the implementation of this algorithm, constants are assigned rank = 0, 16283627Sdim// function arguments are rank = 1, and other values are assigned ranks 17283627Sdim// corresponding to the reverse post order traversal of current function 18283627Sdim// (starting at 2), which effectively gives values in deep loops higher rank 19283627Sdim// than values not in loops. 20283627Sdim// 21283627Sdim//===----------------------------------------------------------------------===// 22283627Sdim 23296417Sdim#define DEBUG_TYPE "reassociate" 24283627Sdim#include "llvm/Transforms/Scalar.h" 25283627Sdim#include "llvm/ADT/DenseMap.h" 26283627Sdim#include "llvm/ADT/PostOrderIterator.h" 27283627Sdim#include "llvm/ADT/STLExtras.h" 28283627Sdim#include "llvm/ADT/SetVector.h" 29283627Sdim#include "llvm/ADT/Statistic.h" 30283627Sdim#include "llvm/Assembly/Writer.h" 31284734Sdim#include "llvm/IR/Constants.h" 32296417Sdim#include "llvm/IR/DerivedTypes.h" 33283627Sdim#include "llvm/IR/Function.h" 34285181Sdim#include "llvm/IR/IRBuilder.h" 35283627Sdim#include "llvm/IR/Instructions.h" 36283627Sdim#include "llvm/IR/IntrinsicInst.h" 37283627Sdim#include "llvm/Pass.h" 38283627Sdim#include "llvm/Support/CFG.h" 39285181Sdim#include "llvm/Support/Debug.h" 40283627Sdim#include "llvm/Support/ValueHandle.h" 41283627Sdim#include "llvm/Support/raw_ostream.h" 42283627Sdim#include "llvm/Transforms/Utils/Local.h" 43283627Sdim#include <algorithm> 44283627Sdimusing namespace llvm; 45283627Sdim 46283627SdimSTATISTIC(NumChanged, "Number of insts reassociated"); 47285181SdimSTATISTIC(NumAnnihil, "Number of expr tree annihilated"); 48283627SdimSTATISTIC(NumFactor , "Number of multiplies factored"); 49283627Sdim 50283627Sdimnamespace { 51283627Sdim struct ValueEntry { 52283627Sdim unsigned Rank; 53283627Sdim Value *Op; 54283627Sdim ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} 55283627Sdim }; 56285181Sdim inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { 57283627Sdim return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. 58283627Sdim } 59283627Sdim} 60283627Sdim 61285181Sdim#ifndef NDEBUG 62283627Sdim/// PrintOps - Print out the expression identified in the Ops list. 63283627Sdim/// 64283627Sdimstatic void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 65283627Sdim Module *M = I->getParent()->getParent()->getParent(); 66283627Sdim dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 67283627Sdim << *Ops[0].Op->getType() << '\t'; 68283627Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 69285181Sdim dbgs() << "[ "; 70283627Sdim WriteAsOperand(dbgs(), Ops[i].Op, false, M); 71283627Sdim dbgs() << ", #" << Ops[i].Rank << "] "; 72283627Sdim } 73283627Sdim} 74283627Sdim#endif 75283627Sdim 76283627Sdimnamespace { 77283627Sdim /// \brief Utility class representing a base and exponent pair which form one 78285181Sdim /// factor of some product. 79283627Sdim struct Factor { 80283627Sdim Value *Base; 81283627Sdim unsigned Power; 82283627Sdim 83283627Sdim Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} 84283627Sdim 85283627Sdim /// \brief Sort factors by their Base. 86285181Sdim struct BaseSorter { 87283627Sdim bool operator()(const Factor &LHS, const Factor &RHS) { 88283627Sdim return LHS.Base < RHS.Base; 89283627Sdim } 90283627Sdim }; 91283627Sdim 92283627Sdim /// \brief Compare factors for equal bases. 93283627Sdim struct BaseEqual { 94283627Sdim bool operator()(const Factor &LHS, const Factor &RHS) { 95285181Sdim return LHS.Base == RHS.Base; 96283627Sdim } 97283627Sdim }; 98283627Sdim 99283627Sdim /// \brief Sort factors in descending order by their power. 100283627Sdim struct PowerDescendingSorter { 101283627Sdim bool operator()(const Factor &LHS, const Factor &RHS) { 102283627Sdim return LHS.Power > RHS.Power; 103285181Sdim } 104283627Sdim }; 105283627Sdim 106283627Sdim /// \brief Compare factors for equal powers. 107283627Sdim struct PowerEqual { 108283627Sdim bool operator()(const Factor &LHS, const Factor &RHS) { 109283627Sdim return LHS.Power == RHS.Power; 110283627Sdim } 111283627Sdim }; 112285181Sdim }; 113283627Sdim 114283627Sdim /// Utility class representing a non-constant Xor-operand. We classify 115283627Sdim /// non-constant Xor-Operands into two categories: 116283627Sdim /// C1) The operand is in the form "X & C", where C is a constant and C != ~0 117283627Sdim /// C2) 118283627Sdim /// C2.1) The operand is in the form of "X | C", where C is a non-zero 119283627Sdim /// constant. 120285181Sdim /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this 121283627Sdim /// operand as "E | 0" 122283627Sdim class XorOpnd { 123283627Sdim public: 124283627Sdim XorOpnd(Value *V); 125283627Sdim 126283627Sdim bool isInvalid() const { return SymbolicPart == 0; } 127283627Sdim bool isOrExpr() const { return isOr; } 128283627Sdim Value *getValue() const { return OrigVal; } 129285181Sdim Value *getSymbolicPart() const { return SymbolicPart; } 130283627Sdim unsigned getSymbolicRank() const { return SymbolicRank; } 131283627Sdim const APInt &getConstPart() const { return ConstPart; } 132283627Sdim 133283627Sdim void Invalidate() { SymbolicPart = OrigVal = 0; } 134283627Sdim void setSymbolicRank(unsigned R) { SymbolicRank = R; } 135283627Sdim 136283627Sdim // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. 137285181Sdim // The purpose is twofold: 138283627Sdim // 1) Cluster together the operands sharing the same symbolic-value. 139283627Sdim // 2) Operand having smaller symbolic-value-rank is permuted earlier, which 140283627Sdim // could potentially shorten crital path, and expose more loop-invariants. 141283627Sdim // Note that values' rank are basically defined in RPO order (FIXME). 142283627Sdim // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier 143283627Sdim // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", 144283627Sdim // "z" in the order of X-Y-Z is better than any other orders. 145283627Sdim struct PtrSortFunctor { 146285181Sdim bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) { 147283627Sdim return LHS->getSymbolicRank() < RHS->getSymbolicRank(); 148283627Sdim } 149283627Sdim }; 150283627Sdim private: 151283627Sdim Value *OrigVal; 152283627Sdim Value *SymbolicPart; 153283627Sdim APInt ConstPart; 154285181Sdim unsigned SymbolicRank; 155283627Sdim bool isOr; 156283627Sdim }; 157283627Sdim} 158283627Sdim 159283627Sdimnamespace { 160283627Sdim class Reassociate : public FunctionPass { 161283627Sdim DenseMap<BasicBlock*, unsigned> RankMap; 162283627Sdim DenseMap<AssertingVH<Value>, unsigned> ValueRankMap; 163285181Sdim SetVector<AssertingVH<Instruction> > RedoInsts; 164283627Sdim bool MadeChange; 165283627Sdim public: 166283627Sdim static char ID; // Pass identification, replacement for typeid 167283627Sdim Reassociate() : FunctionPass(ID) { 168283627Sdim initializeReassociatePass(*PassRegistry::getPassRegistry()); 169283627Sdim } 170283627Sdim 171285181Sdim bool runOnFunction(Function &F); 172283627Sdim 173283627Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 174283627Sdim AU.setPreservesCFG(); 175283627Sdim } 176283627Sdim private: 177283627Sdim void BuildRankMap(Function &F); 178283627Sdim unsigned getRank(Value *V); 179283627Sdim void ReassociateExpression(BinaryOperator *I); 180285181Sdim void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 181283627Sdim Value *OptimizeExpression(BinaryOperator *I, 182283627Sdim SmallVectorImpl<ValueEntry> &Ops); 183283627Sdim Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 184283627Sdim Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 185283627Sdim bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, 186283627Sdim Value *&Res); 187283627Sdim bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, 188285181Sdim APInt &ConstOpnd, Value *&Res); 189283627Sdim bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, 190283627Sdim SmallVectorImpl<Factor> &Factors); 191283627Sdim Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, 192283627Sdim SmallVectorImpl<Factor> &Factors); 193283627Sdim Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 194283627Sdim Value *RemoveFactorFromExpression(Value *V, Value *Factor); 195283627Sdim void EraseInst(Instruction *I); 196283627Sdim void OptimizeInst(Instruction *I); 197285181Sdim }; 198283627Sdim} 199283627Sdim 200283627SdimXorOpnd::XorOpnd(Value *V) { 201283627Sdim assert(!isa<ConstantInt>(V) && "No ConstantInt"); 202283627Sdim OrigVal = V; 203283627Sdim Instruction *I = dyn_cast<Instruction>(V); 204283627Sdim SymbolicRank = 0; 205285181Sdim 206283627Sdim if (I && (I->getOpcode() == Instruction::Or || 207283627Sdim I->getOpcode() == Instruction::And)) { 208283627Sdim Value *V0 = I->getOperand(0); 209283627Sdim Value *V1 = I->getOperand(1); 210283627Sdim if (isa<ConstantInt>(V0)) 211283627Sdim std::swap(V0, V1); 212283627Sdim 213283627Sdim if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) { 214285181Sdim ConstPart = C->getValue(); 215283627Sdim SymbolicPart = V0; 216283627Sdim isOr = (I->getOpcode() == Instruction::Or); 217283627Sdim return; 218283627Sdim } 219283627Sdim } 220283627Sdim 221283627Sdim // view the operand as "V | 0" 222283627Sdim SymbolicPart = V; 223285181Sdim ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); 224283627Sdim isOr = true; 225283627Sdim} 226283627Sdim 227283627Sdimchar Reassociate::ID = 0; 228283627SdimINITIALIZE_PASS(Reassociate, "reassociate", 229283627Sdim "Reassociate expressions", false, false) 230283627Sdim 231283627Sdim// Public interface to the Reassociate pass 232285181SdimFunctionPass *llvm::createReassociatePass() { return new Reassociate(); } 233283627Sdim 234283627Sdim/// isReassociableOp - Return true if V is an instruction of the specified 235283627Sdim/// opcode and if it only has one use. 236283627Sdimstatic BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 237283627Sdim if (V->hasOneUse() && isa<Instruction>(V) && 238283627Sdim cast<Instruction>(V)->getOpcode() == Opcode) 239283627Sdim return cast<BinaryOperator>(V); 240285181Sdim return 0; 241283627Sdim} 242283627Sdim 243283627Sdimstatic bool isUnmovableInstruction(Instruction *I) { 244283627Sdim switch (I->getOpcode()) { 245283627Sdim case Instruction::PHI: 246283627Sdim case Instruction::LandingPad: 247283627Sdim case Instruction::Alloca: 248283627Sdim case Instruction::Load: 249285181Sdim case Instruction::Invoke: 250283627Sdim case Instruction::UDiv: 251283627Sdim case Instruction::SDiv: 252283627Sdim case Instruction::FDiv: 253283627Sdim case Instruction::URem: 254283627Sdim case Instruction::SRem: 255283627Sdim case Instruction::FRem: 256283627Sdim return true; 257285181Sdim case Instruction::Call: 258283627Sdim return !isa<DbgInfoIntrinsic>(I); 259283627Sdim default: 260283627Sdim return false; 261283627Sdim } 262283627Sdim} 263283627Sdim 264283627Sdimvoid Reassociate::BuildRankMap(Function &F) { 265283627Sdim unsigned i = 2; 266285181Sdim 267283627Sdim // Assign distinct ranks to function arguments 268283627Sdim for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) 269283627Sdim ValueRankMap[&*I] = ++i; 270283627Sdim 271283627Sdim ReversePostOrderTraversal<Function*> RPOT(&F); 272283627Sdim for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 273283627Sdim E = RPOT.end(); I != E; ++I) { 274285181Sdim BasicBlock *BB = *I; 275283627Sdim unsigned BBRank = RankMap[BB] = ++i << 16; 276283627Sdim 277283627Sdim // Walk the basic block, adding precomputed ranks for any instructions that 278283627Sdim // we cannot move. This ensures that the ranks for these instructions are 279283627Sdim // all different in the block. 280283627Sdim for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 281283627Sdim if (isUnmovableInstruction(I)) 282283627Sdim ValueRankMap[&*I] = ++BBRank; 283285181Sdim } 284283627Sdim} 285283627Sdim 286283627Sdimunsigned Reassociate::getRank(Value *V) { 287283627Sdim Instruction *I = dyn_cast<Instruction>(V); 288283627Sdim if (I == 0) { 289283627Sdim if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 290283627Sdim return 0; // Otherwise it's a global or constant, rank 0. 291285181Sdim } 292283627Sdim 293283627Sdim if (unsigned Rank = ValueRankMap[I]) 294283627Sdim return Rank; // Rank already known? 295283627Sdim 296283627Sdim // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 297283627Sdim // we can reassociate expressions for code motion! Since we do not recurse 298283627Sdim // for PHI nodes, we cannot have infinite recursion here, because there 299283627Sdim // cannot be loops in the value graph that do not go through PHI nodes. 300285181Sdim unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 301283627Sdim for (unsigned i = 0, e = I->getNumOperands(); 302283627Sdim i != e && Rank != MaxRank; ++i) 303283627Sdim Rank = std::max(Rank, getRank(I->getOperand(i))); 304283627Sdim 305283627Sdim // If this is a not or neg instruction, do not count it for rank. This 306283627Sdim // assures us that X and ~X will have the same rank. 307283627Sdim if (!I->getType()->isIntegerTy() || 308285181Sdim (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) 309283627Sdim ++Rank; 310283627Sdim 311283627Sdim //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " 312283627Sdim // << Rank << "\n"); 313283627Sdim 314283627Sdim return ValueRankMap[I] = Rank; 315283627Sdim} 316283627Sdim 317285181Sdim/// LowerNegateToMultiply - Replace 0-X with X*-1. 318283627Sdim/// 319283627Sdimstatic BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { 320283627Sdim Constant *Cst = Constant::getAllOnesValue(Neg->getType()); 321283627Sdim 322283627Sdim BinaryOperator *Res = 323283627Sdim BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); 324283627Sdim Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop use of op. 325285181Sdim Res->takeName(Neg); 326283627Sdim Neg->replaceAllUsesWith(Res); 327283627Sdim Res->setDebugLoc(Neg->getDebugLoc()); 328283627Sdim return Res; 329283627Sdim} 330283627Sdim 331283627Sdim/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda 332283627Sdim/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for 333283627Sdim/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic. 334285181Sdim/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every 335283627Sdim/// even x in Bitwidth-bit arithmetic. 336283627Sdimstatic unsigned CarmichaelShift(unsigned Bitwidth) { 337283627Sdim if (Bitwidth < 3) 338283627Sdim return Bitwidth - 1; 339283627Sdim return Bitwidth - 2; 340283627Sdim} 341283627Sdim 342285181Sdim/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS', 343283627Sdim/// reducing the combined weight using any special properties of the operation. 344283627Sdim/// The existing weight LHS represents the computation X op X op ... op X where 345283627Sdim/// X occurs LHS times. The combined weight represents X op X op ... op X with 346283627Sdim/// X occurring LHS + RHS times. If op is "Xor" for example then the combined 347283627Sdim/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even; 348283627Sdim/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second. 349283627Sdimstatic void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { 350283627Sdim // If we were working with infinite precision arithmetic then the combined 351296417Sdim // weight would be LHS + RHS. But we are using finite precision arithmetic, 352296417Sdim // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct 353296417Sdim // for nilpotent operations and addition, but not for idempotent operations 354296417Sdim // and multiplication), so it is important to correctly reduce the combined 355296417Sdim // weight back into range if wrapping would be wrong. 356296417Sdim 357296417Sdim // If RHS is zero then the weight didn't change. 358296417Sdim if (RHS.isMinValue()) 359296417Sdim return; 360296417Sdim // If LHS is zero then the combined weight is RHS. 361296417Sdim if (LHS.isMinValue()) { 362296417Sdim LHS = RHS; 363296417Sdim return; 364296417Sdim } 365296417Sdim // From this point on we know that neither LHS nor RHS is zero. 366296417Sdim 367296417Sdim if (Instruction::isIdempotent(Opcode)) { 368296417Sdim // Idempotent means X op X === X, so any non-zero weight is equivalent to a 369296417Sdim // weight of 1. Keeping weights at zero or one also means that wrapping is 370296417Sdim // not a problem. 371296417Sdim assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); 372296417Sdim return; // Return a weight of 1. 373296417Sdim } 374296417Sdim if (Instruction::isNilpotent(Opcode)) { 375296417Sdim // Nilpotent means X op X === 0, so reduce weights modulo 2. 376296417Sdim assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); 377296417Sdim LHS = 0; // 1 + 1 === 0 modulo 2. 378296417Sdim return; 379296417Sdim } 380296417Sdim if (Opcode == Instruction::Add) { 381296417Sdim // TODO: Reduce the weight by exploiting nsw/nuw? 382296417Sdim LHS += RHS; 383296417Sdim return; 384296417Sdim } 385296417Sdim 386296417Sdim assert(Opcode == Instruction::Mul && "Unknown associative operation!"); 387296417Sdim unsigned Bitwidth = LHS.getBitWidth(); 388296417Sdim // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth 389296417Sdim // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth 390296417Sdim // bit number x, since either x is odd in which case x^CM = 1, or x is even in 391296417Sdim // which case both x^W and x^(W - CM) are zero. By subtracting off multiples 392296417Sdim // of CM like this weights can always be reduced to the range [0, CM+Bitwidth) 393296417Sdim // which by a happy accident means that they can always be represented using 394296417Sdim // Bitwidth bits. 395296417Sdim // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than 396296417Sdim // the Carmichael number). 397296417Sdim if (Bitwidth > 3) { 398296417Sdim /// CM - The value of Carmichael's lambda function. 399296417Sdim APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth)); 400296417Sdim // Any weight W >= Threshold can be replaced with W - CM. 401296417Sdim APInt Threshold = CM + Bitwidth; 402296417Sdim assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!"); 403296417Sdim // For Bitwidth 4 or more the following sum does not overflow. 404296417Sdim LHS += RHS; 405296417Sdim while (LHS.uge(Threshold)) 406296417Sdim LHS -= CM; 407296417Sdim } else { 408296417Sdim // To avoid problems with overflow do everything the same as above but using 409296417Sdim // a larger type. 410296417Sdim unsigned CM = 1U << CarmichaelShift(Bitwidth); 411296417Sdim unsigned Threshold = CM + Bitwidth; 412296417Sdim assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold && 413296417Sdim "Weights not reduced!"); 414296417Sdim unsigned Total = LHS.getZExtValue() + RHS.getZExtValue(); 415296417Sdim while (Total >= Threshold) 416296417Sdim Total -= CM; 417296417Sdim LHS = Total; 418296417Sdim } 419296417Sdim} 420296417Sdim 421296417Sdimtypedef std::pair<Value*, APInt> RepeatedValue; 422296417Sdim 423296417Sdim/// LinearizeExprTree - Given an associative binary expression, return the leaf 424296417Sdim/// nodes in Ops along with their weights (how many times the leaf occurs). The 425296417Sdim/// original expression is the same as 426296417Sdim/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times 427296417Sdim/// op 428296417Sdim/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times 429296417Sdim/// op 430296417Sdim/// ... 431296417Sdim/// op 432296417Sdim/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times 433296417Sdim/// 434296417Sdim/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct. 435296417Sdim/// 436296417Sdim/// This routine may modify the function, in which case it returns 'true'. The 437296417Sdim/// changes it makes may well be destructive, changing the value computed by 'I' 438296417Sdim/// to something completely different. Thus if the routine returns 'true' then 439296417Sdim/// you MUST either replace I with a new expression computed from the Ops array, 440296417Sdim/// or use RewriteExprTree to put the values back in. 441296417Sdim/// 442296417Sdim/// A leaf node is either not a binary operation of the same kind as the root 443296417Sdim/// node 'I' (i.e. is not a binary operator at all, or is, but with a different 444296417Sdim/// opcode), or is the same kind of binary operator but has a use which either 445296417Sdim/// does not belong to the expression, or does belong to the expression but is 446296417Sdim/// a leaf node. Every leaf node has at least one use that is a non-leaf node 447296417Sdim/// of the expression, while for non-leaf nodes (except for the root 'I') every 448296417Sdim/// use is a non-leaf node of the expression. 449296417Sdim/// 450296417Sdim/// For example: 451296417Sdim/// expression graph node names 452296417Sdim/// 453296417Sdim/// + | I 454296417Sdim/// / \ | 455296417Sdim/// + + | A, B 456296417Sdim/// / \ / \ | 457296417Sdim/// * + * | C, D, E 458296417Sdim/// / \ / \ / \ | 459296417Sdim/// + * | F, G 460296417Sdim/// 461296417Sdim/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in 462296417Sdim/// that order) (C, 1), (E, 1), (F, 2), (G, 2). 463296417Sdim/// 464296417Sdim/// The expression is maximal: if some instruction is a binary operator of the 465296417Sdim/// same kind as 'I', and all of its uses are non-leaf nodes of the expression, 466296417Sdim/// then the instruction also belongs to the expression, is not a leaf node of 467296417Sdim/// it, and its operands also belong to the expression (but may be leaf nodes). 468296417Sdim/// 469296417Sdim/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in 470296417Sdim/// order to ensure that every non-root node in the expression has *exactly one* 471296417Sdim/// use by a non-leaf node of the expression. This destruction means that the 472296417Sdim/// caller MUST either replace 'I' with a new expression or use something like 473296417Sdim/// RewriteExprTree to put the values back in if the routine indicates that it 474296417Sdim/// made a change by returning 'true'. 475296417Sdim/// 476296417Sdim/// In the above example either the right operand of A or the left operand of B 477296417Sdim/// will be replaced by undef. If it is B's operand then this gives: 478296417Sdim/// 479296417Sdim/// + | I 480296417Sdim/// / \ | 481296417Sdim/// + + | A, B - operand of B replaced with undef 482296417Sdim/// / \ \ | 483296417Sdim/// * + * | C, D, E 484296417Sdim/// / \ / \ / \ | 485296417Sdim/// + * | F, G 486296417Sdim/// 487296417Sdim/// Note that such undef operands can only be reached by passing through 'I'. 488296417Sdim/// For example, if you visit operands recursively starting from a leaf node 489296417Sdim/// then you will never see such an undef operand unless you get back to 'I', 490296417Sdim/// which requires passing through a phi node. 491296417Sdim/// 492296417Sdim/// Note that this routine may also mutate binary operators of the wrong type 493296417Sdim/// that have all uses inside the expression (i.e. only used by non-leaf nodes 494296417Sdim/// of the expression) if it can turn them into binary operators of the right 495296417Sdim/// type and thus make the expression bigger. 496296417Sdim 497296417Sdimstatic bool LinearizeExprTree(BinaryOperator *I, 498296417Sdim SmallVectorImpl<RepeatedValue> &Ops) { 499296417Sdim DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); 500296417Sdim unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); 501296417Sdim unsigned Opcode = I->getOpcode(); 502296417Sdim assert(Instruction::isAssociative(Opcode) && 503296417Sdim Instruction::isCommutative(Opcode) && 504296417Sdim "Expected an associative and commutative operation!"); 505296417Sdim 506296417Sdim // Visit all operands of the expression, keeping track of their weight (the 507296417Sdim // number of paths from the expression root to the operand, or if you like 508296417Sdim // the number of times that operand occurs in the linearized expression). 509296417Sdim // For example, if I = X + A, where X = A + B, then I, X and B have weight 1 510296417Sdim // while A has weight two. 511296417Sdim 512296417Sdim // Worklist of non-leaf nodes (their operands are in the expression too) along 513296417Sdim // with their weights, representing a certain number of paths to the operator. 514296417Sdim // If an operator occurs in the worklist multiple times then we found multiple 515296417Sdim // ways to get to it. 516296417Sdim SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight) 517296417Sdim Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1))); 518296417Sdim bool MadeChange = false; 519296417Sdim 520296417Sdim // Leaves of the expression are values that either aren't the right kind of 521296417Sdim // operation (eg: a constant, or a multiply in an add tree), or are, but have 522296417Sdim // some uses that are not inside the expression. For example, in I = X + X, 523296417Sdim // X = A + B, the value X has two uses (by I) that are in the expression. If 524296417Sdim // X has any other uses, for example in a return instruction, then we consider 525296417Sdim // X to be a leaf, and won't analyze it further. When we first visit a value, 526296417Sdim // if it has more than one use then at first we conservatively consider it to 527296417Sdim // be a leaf. Later, as the expression is explored, we may discover some more 528296417Sdim // uses of the value from inside the expression. If all uses turn out to be 529296417Sdim // from within the expression (and the value is a binary operator of the right 530296417Sdim // kind) then the value is no longer considered to be a leaf, and its operands 531296417Sdim // are explored. 532296417Sdim 533296417Sdim // Leaves - Keeps track of the set of putative leaves as well as the number of 534296417Sdim // paths to each leaf seen so far. 535296417Sdim typedef DenseMap<Value*, APInt> LeafMap; 536296417Sdim LeafMap Leaves; // Leaf -> Total weight so far. 537296417Sdim SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order. 538296417Sdim 539296417Sdim#ifndef NDEBUG 540296417Sdim SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme. 541296417Sdim#endif 542296417Sdim while (!Worklist.empty()) { 543296417Sdim std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val(); 544296417Sdim I = P.first; // We examine the operands of this binary operator. 545296417Sdim 546296417Sdim for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands. 547296417Sdim Value *Op = I->getOperand(OpIdx); 548296417Sdim APInt Weight = P.second; // Number of paths to this operand. 549296417Sdim DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n"); 550296417Sdim assert(!Op->use_empty() && "No uses, so how did we get to it?!"); 551296417Sdim 552296417Sdim // If this is a binary operation of the right kind with only one use then 553296417Sdim // add its operands to the expression. 554296417Sdim if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { 555296417Sdim assert(Visited.insert(Op) && "Not first visit!"); 556296417Sdim DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n"); 557296417Sdim Worklist.push_back(std::make_pair(BO, Weight)); 558296417Sdim continue; 559296417Sdim } 560296417Sdim 561296417Sdim // Appears to be a leaf. Is the operand already in the set of leaves? 562296417Sdim LeafMap::iterator It = Leaves.find(Op); 563296417Sdim if (It == Leaves.end()) { 564296417Sdim // Not in the leaf map. Must be the first time we saw this operand. 565296417Sdim assert(Visited.insert(Op) && "Not first visit!"); 566296417Sdim if (!Op->hasOneUse()) { 567296417Sdim // This value has uses not accounted for by the expression, so it is 568296417Sdim // not safe to modify. Mark it as being a leaf. 569296417Sdim DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n"); 570296417Sdim LeafOrder.push_back(Op); 571296417Sdim Leaves[Op] = Weight; 572296417Sdim continue; 573296417Sdim } 574296417Sdim // No uses outside the expression, try morphing it. 575296417Sdim } else if (It != Leaves.end()) { 576296417Sdim // Already in the leaf map. 577296417Sdim assert(Visited.count(Op) && "In leaf map but not visited!"); 578296417Sdim 579296417Sdim // Update the number of paths to the leaf. 580296417Sdim IncorporateWeight(It->second, Weight, Opcode); 581296417Sdim 582296417Sdim#if 0 // TODO: Re-enable once PR13021 is fixed. 583296417Sdim // The leaf already has one use from inside the expression. As we want 584296417Sdim // exactly one such use, drop this new use of the leaf. 585296417Sdim assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); 586296417Sdim I->setOperand(OpIdx, UndefValue::get(I->getType())); 587296417Sdim MadeChange = true; 588296417Sdim 589296417Sdim // If the leaf is a binary operation of the right kind and we now see 590296417Sdim // that its multiple original uses were in fact all by nodes belonging 591296417Sdim // to the expression, then no longer consider it to be a leaf and add 592296417Sdim // its operands to the expression. 593296417Sdim if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { 594296417Sdim DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n"); 595296417Sdim Worklist.push_back(std::make_pair(BO, It->second)); 596296417Sdim Leaves.erase(It); 597296417Sdim continue; 598296417Sdim } 599296417Sdim#endif 600296417Sdim 601296417Sdim // If we still have uses that are not accounted for by the expression 602296417Sdim // then it is not safe to modify the value. 603296417Sdim if (!Op->hasOneUse()) 604296417Sdim continue; 605296417Sdim 606296417Sdim // No uses outside the expression, try morphing it. 607296417Sdim Weight = It->second; 608296417Sdim Leaves.erase(It); // Since the value may be morphed below. 609296417Sdim } 610296417Sdim 611296417Sdim // At this point we have a value which, first of all, is not a binary 612296417Sdim // expression of the right kind, and secondly, is only used inside the 613296417Sdim // expression. This means that it can safely be modified. See if we 614296417Sdim // can usefully morph it into an expression of the right kind. 615296417Sdim assert((!isa<Instruction>(Op) || 616296417Sdim cast<Instruction>(Op)->getOpcode() != Opcode) && 617296417Sdim "Should have been handled above!"); 618296417Sdim assert(Op->hasOneUse() && "Has uses outside the expression tree!"); 619296417Sdim 620296417Sdim // If this is a multiply expression, turn any internal negations into 621296417Sdim // multiplies by -1 so they can be reassociated. 622296417Sdim BinaryOperator *BO = dyn_cast<BinaryOperator>(Op); 623296417Sdim if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) { 624296417Sdim DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); 625296417Sdim BO = LowerNegateToMultiply(BO); 626296417Sdim DEBUG(dbgs() << *BO << 'n'); 627296417Sdim Worklist.push_back(std::make_pair(BO, Weight)); 628296417Sdim MadeChange = true; 629296417Sdim continue; 630296417Sdim } 631296417Sdim 632296417Sdim // Failed to morph into an expression of the right type. This really is 633296417Sdim // a leaf. 634296417Sdim DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n"); 635296417Sdim assert(!isReassociableOp(Op, Opcode) && "Value was morphed?"); 636296417Sdim LeafOrder.push_back(Op); 637296417Sdim Leaves[Op] = Weight; 638296417Sdim } 639296417Sdim } 640296417Sdim 641296417Sdim // The leaves, repeated according to their weights, represent the linearized 642296417Sdim // form of the expression. 643296417Sdim for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) { 644296417Sdim Value *V = LeafOrder[i]; 645296417Sdim LeafMap::iterator It = Leaves.find(V); 646296417Sdim if (It == Leaves.end()) 647296417Sdim // Node initially thought to be a leaf wasn't. 648296417Sdim continue; 649296417Sdim assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!"); 650296417Sdim APInt Weight = It->second; 651296417Sdim if (Weight.isMinValue()) 652296417Sdim // Leaf already output or weight reduction eliminated it. 653296417Sdim continue; 654296417Sdim // Ensure the leaf is only output once. 655296417Sdim It->second = 0; 656296417Sdim Ops.push_back(std::make_pair(V, Weight)); 657296417Sdim } 658296417Sdim 659296417Sdim // For nilpotent operations or addition there may be no operands, for example 660296417Sdim // because the expression was "X xor X" or consisted of 2^Bitwidth additions: 661296417Sdim // in both cases the weight reduces to 0 causing the value to be skipped. 662296417Sdim if (Ops.empty()) { 663296417Sdim Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); 664296417Sdim assert(Identity && "Associative operation without identity!"); 665296417Sdim Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1))); 666296417Sdim } 667296417Sdim 668296417Sdim return MadeChange; 669296417Sdim} 670296417Sdim 671296417Sdim// RewriteExprTree - Now that the operands for this expression tree are 672296417Sdim// linearized and optimized, emit them in-order. 673296417Sdimvoid Reassociate::RewriteExprTree(BinaryOperator *I, 674296417Sdim SmallVectorImpl<ValueEntry> &Ops) { 675296417Sdim assert(Ops.size() > 1 && "Single values should be used directly!"); 676296417Sdim 677296417Sdim // Since our optimizations should never increase the number of operations, the 678296417Sdim // new expression can usually be written reusing the existing binary operators 679296417Sdim // from the original expression tree, without creating any new instructions, 680296417Sdim // though the rewritten expression may have a completely different topology. 681296417Sdim // We take care to not change anything if the new expression will be the same 682296417Sdim // as the original. If more than trivial changes (like commuting operands) 683296417Sdim // were made then we are obliged to clear out any optional subclass data like 684296417Sdim // nsw flags. 685296417Sdim 686296417Sdim /// NodesToRewrite - Nodes from the original expression available for writing 687296417Sdim /// the new expression into. 688296417Sdim SmallVector<BinaryOperator*, 8> NodesToRewrite; 689296417Sdim unsigned Opcode = I->getOpcode(); 690296417Sdim BinaryOperator *Op = I; 691296417Sdim 692296417Sdim /// NotRewritable - The operands being written will be the leaves of the new 693296417Sdim /// expression and must not be used as inner nodes (via NodesToRewrite) by 694296417Sdim /// mistake. Inner nodes are always reassociable, and usually leaves are not 695296417Sdim /// (if they were they would have been incorporated into the expression and so 696296417Sdim /// would not be leaves), so most of the time there is no danger of this. But 697296417Sdim /// in rare cases a leaf may become reassociable if an optimization kills uses 698296417Sdim /// of it, or it may momentarily become reassociable during rewriting (below) 699296417Sdim /// due it being removed as an operand of one of its uses. Ensure that misuse 700296417Sdim /// of leaf nodes as inner nodes cannot occur by remembering all of the future 701296417Sdim /// leaves and refusing to reuse any of them as inner nodes. 702296417Sdim SmallPtrSet<Value*, 8> NotRewritable; 703296417Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) 704296417Sdim NotRewritable.insert(Ops[i].Op); 705296417Sdim 706296417Sdim // ExpressionChanged - Non-null if the rewritten expression differs from the 707296417Sdim // original in some non-trivial way, requiring the clearing of optional flags. 708296417Sdim // Flags are cleared from the operator in ExpressionChanged up to I inclusive. 709296417Sdim BinaryOperator *ExpressionChanged = 0; 710296417Sdim for (unsigned i = 0; ; ++i) { 711296417Sdim // The last operation (which comes earliest in the IR) is special as both 712296417Sdim // operands will come from Ops, rather than just one with the other being 713296417Sdim // a subexpression. 714296417Sdim if (i+2 == Ops.size()) { 715296417Sdim Value *NewLHS = Ops[i].Op; 716296417Sdim Value *NewRHS = Ops[i+1].Op; 717296417Sdim Value *OldLHS = Op->getOperand(0); 718296417Sdim Value *OldRHS = Op->getOperand(1); 719296417Sdim 720296417Sdim if (NewLHS == OldLHS && NewRHS == OldRHS) 721296417Sdim // Nothing changed, leave it alone. 722296417Sdim break; 723296417Sdim 724296417Sdim if (NewLHS == OldRHS && NewRHS == OldLHS) { 725296417Sdim // The order of the operands was reversed. Swap them. 726296417Sdim DEBUG(dbgs() << "RA: " << *Op << '\n'); 727296417Sdim Op->swapOperands(); 728296417Sdim DEBUG(dbgs() << "TO: " << *Op << '\n'); 729296417Sdim MadeChange = true; 730296417Sdim ++NumChanged; 731296417Sdim break; 732296417Sdim } 733296417Sdim 734296417Sdim // The new operation differs non-trivially from the original. Overwrite 735296417Sdim // the old operands with the new ones. 736296417Sdim DEBUG(dbgs() << "RA: " << *Op << '\n'); 737296417Sdim if (NewLHS != OldLHS) { 738296417Sdim BinaryOperator *BO = isReassociableOp(OldLHS, Opcode); 739296417Sdim if (BO && !NotRewritable.count(BO)) 740296417Sdim NodesToRewrite.push_back(BO); 741296417Sdim Op->setOperand(0, NewLHS); 742296417Sdim } 743296417Sdim if (NewRHS != OldRHS) { 744296417Sdim BinaryOperator *BO = isReassociableOp(OldRHS, Opcode); 745296417Sdim if (BO && !NotRewritable.count(BO)) 746296417Sdim NodesToRewrite.push_back(BO); 747296417Sdim Op->setOperand(1, NewRHS); 748296417Sdim } 749296417Sdim DEBUG(dbgs() << "TO: " << *Op << '\n'); 750296417Sdim 751296417Sdim ExpressionChanged = Op; 752296417Sdim MadeChange = true; 753296417Sdim ++NumChanged; 754296417Sdim 755296417Sdim break; 756296417Sdim } 757296417Sdim 758296417Sdim // Not the last operation. The left-hand side will be a sub-expression 759296417Sdim // while the right-hand side will be the current element of Ops. 760296417Sdim Value *NewRHS = Ops[i].Op; 761296417Sdim if (NewRHS != Op->getOperand(1)) { 762296417Sdim DEBUG(dbgs() << "RA: " << *Op << '\n'); 763296417Sdim if (NewRHS == Op->getOperand(0)) { 764296417Sdim // The new right-hand side was already present as the left operand. If 765296417Sdim // we are lucky then swapping the operands will sort out both of them. 766296417Sdim Op->swapOperands(); 767296417Sdim } else { 768296417Sdim // Overwrite with the new right-hand side. 769296417Sdim BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode); 770296417Sdim if (BO && !NotRewritable.count(BO)) 771296417Sdim NodesToRewrite.push_back(BO); 772296417Sdim Op->setOperand(1, NewRHS); 773296417Sdim ExpressionChanged = Op; 774296417Sdim } 775296417Sdim DEBUG(dbgs() << "TO: " << *Op << '\n'); 776296417Sdim MadeChange = true; 777296417Sdim ++NumChanged; 778296417Sdim } 779296417Sdim 780296417Sdim // Now deal with the left-hand side. If this is already an operation node 781296417Sdim // from the original expression then just rewrite the rest of the expression 782296417Sdim // into it. 783296417Sdim BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode); 784296417Sdim if (BO && !NotRewritable.count(BO)) { 785296417Sdim Op = BO; 786296417Sdim continue; 787296417Sdim } 788296417Sdim 789296417Sdim // Otherwise, grab a spare node from the original expression and use that as 790296417Sdim // the left-hand side. If there are no nodes left then the optimizers made 791296417Sdim // an expression with more nodes than the original! This usually means that 792296417Sdim // they did something stupid but it might mean that the problem was just too 793296417Sdim // hard (finding the mimimal number of multiplications needed to realize a 794296417Sdim // multiplication expression is NP-complete). Whatever the reason, smart or 795296417Sdim // stupid, create a new node if there are none left. 796296417Sdim BinaryOperator *NewOp; 797296417Sdim if (NodesToRewrite.empty()) { 798296417Sdim Constant *Undef = UndefValue::get(I->getType()); 799296417Sdim NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), 800296417Sdim Undef, Undef, "", I); 801296417Sdim } else { 802296417Sdim NewOp = NodesToRewrite.pop_back_val(); 803296417Sdim } 804296417Sdim 805296417Sdim DEBUG(dbgs() << "RA: " << *Op << '\n'); 806296417Sdim Op->setOperand(0, NewOp); 807296417Sdim DEBUG(dbgs() << "TO: " << *Op << '\n'); 808296417Sdim ExpressionChanged = Op; 809296417Sdim MadeChange = true; 810296417Sdim ++NumChanged; 811296417Sdim Op = NewOp; 812296417Sdim } 813296417Sdim 814296417Sdim // If the expression changed non-trivially then clear out all subclass data 815296417Sdim // starting from the operator specified in ExpressionChanged, and compactify 816296417Sdim // the operators to just before the expression root to guarantee that the 817296417Sdim // expression tree is dominated by all of Ops. 818296417Sdim if (ExpressionChanged) 819296417Sdim do { 820296417Sdim ExpressionChanged->clearSubclassOptionalData(); 821296417Sdim if (ExpressionChanged == I) 822296417Sdim break; 823296417Sdim ExpressionChanged->moveBefore(I); 824296417Sdim ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin()); 825296417Sdim } while (1); 826296417Sdim 827296417Sdim // Throw away any left over nodes from the original expression. 828296417Sdim for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i) 829296417Sdim RedoInsts.insert(NodesToRewrite[i]); 830296417Sdim} 831296417Sdim 832296417Sdim/// NegateValue - Insert instructions before the instruction pointed to by BI, 833296417Sdim/// that computes the negative version of the value specified. The negative 834296417Sdim/// version of the value is returned, and BI is left pointing at the instruction 835296417Sdim/// that should be processed next by the reassociation pass. 836296417Sdimstatic Value *NegateValue(Value *V, Instruction *BI) { 837296417Sdim if (Constant *C = dyn_cast<Constant>(V)) 838296417Sdim return ConstantExpr::getNeg(C); 839296417Sdim 840296417Sdim // We are trying to expose opportunity for reassociation. One of the things 841296417Sdim // that we want to do to achieve this is to push a negation as deep into an 842296417Sdim // expression chain as possible, to expose the add instructions. In practice, 843296417Sdim // this means that we turn this: 844296417Sdim // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 845296417Sdim // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 846296417Sdim // the constants. We assume that instcombine will clean up the mess later if 847296417Sdim // we introduce tons of unnecessary negation instructions. 848296417Sdim // 849296417Sdim if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) { 850296417Sdim // Push the negates through the add. 851296417Sdim I->setOperand(0, NegateValue(I->getOperand(0), BI)); 852296417Sdim I->setOperand(1, NegateValue(I->getOperand(1), BI)); 853296417Sdim 854296417Sdim // We must move the add instruction here, because the neg instructions do 855296417Sdim // not dominate the old add instruction in general. By moving it, we are 856296417Sdim // assured that the neg instructions we just inserted dominate the 857296417Sdim // instruction we are about to insert after them. 858296417Sdim // 859296417Sdim I->moveBefore(BI); 860296417Sdim I->setName(I->getName()+".neg"); 861296417Sdim return I; 862296417Sdim } 863296417Sdim 864296417Sdim // Okay, we need to materialize a negated version of V with an instruction. 865296417Sdim // Scan the use lists of V to see if we have one already. 866296417Sdim for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 867296417Sdim User *U = *UI; 868296417Sdim if (!BinaryOperator::isNeg(U)) continue; 869296417Sdim 870296417Sdim // We found one! Now we have to make sure that the definition dominates 871296417Sdim // this use. We do this by moving it to the entry block (if it is a 872296417Sdim // non-instruction value) or right after the definition. These negates will 873296417Sdim // be zapped by reassociate later, so we don't need much finesse here. 874296417Sdim BinaryOperator *TheNeg = cast<BinaryOperator>(U); 875296417Sdim 876296417Sdim // Verify that the negate is in this function, V might be a constant expr. 877296417Sdim if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 878296417Sdim continue; 879296417Sdim 880296417Sdim BasicBlock::iterator InsertPt; 881296417Sdim if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 882296417Sdim if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 883296417Sdim InsertPt = II->getNormalDest()->begin(); 884296417Sdim } else { 885296417Sdim InsertPt = InstInput; 886296417Sdim ++InsertPt; 887296417Sdim } 888296417Sdim while (isa<PHINode>(InsertPt)) ++InsertPt; 889296417Sdim } else { 890296417Sdim InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 891296417Sdim } 892296417Sdim TheNeg->moveBefore(InsertPt); 893296417Sdim return TheNeg; 894296417Sdim } 895296417Sdim 896296417Sdim // Insert a 'neg' instruction that subtracts the value from zero to get the 897296417Sdim // negation. 898296417Sdim return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); 899296417Sdim} 900296417Sdim 901296417Sdim/// ShouldBreakUpSubtract - Return true if we should break up this subtract of 902296417Sdim/// X-Y into (X + -Y). 903296417Sdimstatic bool ShouldBreakUpSubtract(Instruction *Sub) { 904296417Sdim // If this is a negation, we can't split it up! 905296417Sdim if (BinaryOperator::isNeg(Sub)) 906296417Sdim return false; 907296417Sdim 908296417Sdim // Don't bother to break this up unless either the LHS is an associable add or 909296417Sdim // subtract or if this is only used by one. 910296417Sdim if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || 911296417Sdim isReassociableOp(Sub->getOperand(0), Instruction::Sub)) 912296417Sdim return true; 913296417Sdim if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || 914296417Sdim isReassociableOp(Sub->getOperand(1), Instruction::Sub)) 915296417Sdim return true; 916296417Sdim if (Sub->hasOneUse() && 917296417Sdim (isReassociableOp(Sub->use_back(), Instruction::Add) || 918296417Sdim isReassociableOp(Sub->use_back(), Instruction::Sub))) 919296417Sdim return true; 920296417Sdim 921296417Sdim return false; 922296417Sdim} 923296417Sdim 924296417Sdim/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is 925296417Sdim/// only used by an add, transform this into (X+(0-Y)) to promote better 926296417Sdim/// reassociation. 927296417Sdimstatic BinaryOperator *BreakUpSubtract(Instruction *Sub) { 928296417Sdim // Convert a subtract into an add and a neg instruction. This allows sub 929296417Sdim // instructions to be commuted with other add instructions. 930296417Sdim // 931296417Sdim // Calculate the negative value of Operand 1 of the sub instruction, 932296417Sdim // and set it as the RHS of the add instruction we just made. 933296417Sdim // 934296417Sdim Value *NegVal = NegateValue(Sub->getOperand(1), Sub); 935296417Sdim BinaryOperator *New = 936296417Sdim BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); 937296417Sdim Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. 938296417Sdim Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. 939296417Sdim New->takeName(Sub); 940296417Sdim 941296417Sdim // Everyone now refers to the add instruction. 942296417Sdim Sub->replaceAllUsesWith(New); 943296417Sdim New->setDebugLoc(Sub->getDebugLoc()); 944296417Sdim 945296417Sdim DEBUG(dbgs() << "Negated: " << *New << '\n'); 946296417Sdim return New; 947296417Sdim} 948296417Sdim 949296417Sdim/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used 950296417Sdim/// by one, change this into a multiply by a constant to assist with further 951285181Sdim/// reassociation. 952284734Sdimstatic BinaryOperator *ConvertShiftToMul(Instruction *Shl) { 953283627Sdim Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 954 MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); 955 956 BinaryOperator *Mul = 957 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 958 Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op. 959 Mul->takeName(Shl); 960 Shl->replaceAllUsesWith(Mul); 961 Mul->setDebugLoc(Shl->getDebugLoc()); 962 return Mul; 963} 964 965/// FindInOperandList - Scan backwards and forwards among values with the same 966/// rank as element i to see if X exists. If X does not exist, return i. This 967/// is useful when scanning for 'x' when we see '-x' because they both get the 968/// same rank. 969static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, 970 Value *X) { 971 unsigned XRank = Ops[i].Rank; 972 unsigned e = Ops.size(); 973 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) 974 if (Ops[j].Op == X) 975 return j; 976 // Scan backwards. 977 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) 978 if (Ops[j].Op == X) 979 return j; 980 return i; 981} 982 983/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together 984/// and returning the result. Insert the tree before I. 985static Value *EmitAddTreeOfValues(Instruction *I, 986 SmallVectorImpl<WeakVH> &Ops){ 987 if (Ops.size() == 1) return Ops.back(); 988 989 Value *V1 = Ops.back(); 990 Ops.pop_back(); 991 Value *V2 = EmitAddTreeOfValues(I, Ops); 992 return BinaryOperator::CreateAdd(V2, V1, "tmp", I); 993} 994 995/// RemoveFactorFromExpression - If V is an expression tree that is a 996/// multiplication sequence, and if this sequence contains a multiply by Factor, 997/// remove Factor from the tree and return the new tree. 998Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { 999 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 1000 if (!BO) return 0; 1001 1002 SmallVector<RepeatedValue, 8> Tree; 1003 MadeChange |= LinearizeExprTree(BO, Tree); 1004 SmallVector<ValueEntry, 8> Factors; 1005 Factors.reserve(Tree.size()); 1006 for (unsigned i = 0, e = Tree.size(); i != e; ++i) { 1007 RepeatedValue E = Tree[i]; 1008 Factors.append(E.second.getZExtValue(), 1009 ValueEntry(getRank(E.first), E.first)); 1010 } 1011 1012 bool FoundFactor = false; 1013 bool NeedsNegate = false; 1014 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 1015 if (Factors[i].Op == Factor) { 1016 FoundFactor = true; 1017 Factors.erase(Factors.begin()+i); 1018 break; 1019 } 1020 1021 // If this is a negative version of this factor, remove it. 1022 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) 1023 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 1024 if (FC1->getValue() == -FC2->getValue()) { 1025 FoundFactor = NeedsNegate = true; 1026 Factors.erase(Factors.begin()+i); 1027 break; 1028 } 1029 } 1030 1031 if (!FoundFactor) { 1032 // Make sure to restore the operands to the expression tree. 1033 RewriteExprTree(BO, Factors); 1034 return 0; 1035 } 1036 1037 BasicBlock::iterator InsertPt = BO; ++InsertPt; 1038 1039 // If this was just a single multiply, remove the multiply and return the only 1040 // remaining operand. 1041 if (Factors.size() == 1) { 1042 RedoInsts.insert(BO); 1043 V = Factors[0].Op; 1044 } else { 1045 RewriteExprTree(BO, Factors); 1046 V = BO; 1047 } 1048 1049 if (NeedsNegate) 1050 V = BinaryOperator::CreateNeg(V, "neg", InsertPt); 1051 1052 return V; 1053} 1054 1055/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively 1056/// add its operands as factors, otherwise add V to the list of factors. 1057/// 1058/// Ops is the top-level list of add operands we're trying to factor. 1059static void FindSingleUseMultiplyFactors(Value *V, 1060 SmallVectorImpl<Value*> &Factors, 1061 const SmallVectorImpl<ValueEntry> &Ops) { 1062 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 1063 if (!BO) { 1064 Factors.push_back(V); 1065 return; 1066 } 1067 1068 // Otherwise, add the LHS and RHS to the list of factors. 1069 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops); 1070 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops); 1071} 1072 1073/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' 1074/// instruction. This optimizes based on identities. If it can be reduced to 1075/// a single Value, it is returned, otherwise the Ops list is mutated as 1076/// necessary. 1077static Value *OptimizeAndOrXor(unsigned Opcode, 1078 SmallVectorImpl<ValueEntry> &Ops) { 1079 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 1080 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 1081 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1082 // First, check for X and ~X in the operand list. 1083 assert(i < Ops.size()); 1084 if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. 1085 Value *X = BinaryOperator::getNotArgument(Ops[i].Op); 1086 unsigned FoundX = FindInOperandList(Ops, i, X); 1087 if (FoundX != i) { 1088 if (Opcode == Instruction::And) // ...&X&~X = 0 1089 return Constant::getNullValue(X->getType()); 1090 1091 if (Opcode == Instruction::Or) // ...|X|~X = -1 1092 return Constant::getAllOnesValue(X->getType()); 1093 } 1094 } 1095 1096 // Next, check for duplicate pairs of values, which we assume are next to 1097 // each other, due to our sorting criteria. 1098 assert(i < Ops.size()); 1099 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 1100 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 1101 // Drop duplicate values for And and Or. 1102 Ops.erase(Ops.begin()+i); 1103 --i; --e; 1104 ++NumAnnihil; 1105 continue; 1106 } 1107 1108 // Drop pairs of values for Xor. 1109 assert(Opcode == Instruction::Xor); 1110 if (e == 2) 1111 return Constant::getNullValue(Ops[0].Op->getType()); 1112 1113 // Y ^ X^X -> Y 1114 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 1115 i -= 1; e -= 2; 1116 ++NumAnnihil; 1117 } 1118 } 1119 return 0; 1120} 1121 1122/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and 1123/// instruction with the given two operands, and return the resulting 1124/// instruction. There are two special cases: 1) if the constant operand is 0, 1125/// it will return NULL. 2) if the constant is ~0, the symbolic operand will 1126/// be returned. 1127static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, 1128 const APInt &ConstOpnd) { 1129 if (ConstOpnd != 0) { 1130 if (!ConstOpnd.isAllOnesValue()) { 1131 LLVMContext &Ctx = Opnd->getType()->getContext(); 1132 Instruction *I; 1133 I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), 1134 "and.ra", InsertBefore); 1135 I->setDebugLoc(InsertBefore->getDebugLoc()); 1136 return I; 1137 } 1138 return Opnd; 1139 } 1140 return 0; 1141} 1142 1143// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" 1144// into "R ^ C", where C would be 0, and R is a symbolic value. 1145// 1146// If it was successful, true is returned, and the "R" and "C" is returned 1147// via "Res" and "ConstOpnd", respectively; otherwise, false is returned, 1148// and both "Res" and "ConstOpnd" remain unchanged. 1149// 1150bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, 1151 APInt &ConstOpnd, Value *&Res) { 1152 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 1153 // = ((x | c1) ^ c1) ^ (c1 ^ c2) 1154 // = (x & ~c1) ^ (c1 ^ c2) 1155 // It is useful only when c1 == c2. 1156 if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) { 1157 if (!Opnd1->getValue()->hasOneUse()) 1158 return false; 1159 1160 const APInt &C1 = Opnd1->getConstPart(); 1161 if (C1 != ConstOpnd) 1162 return false; 1163 1164 Value *X = Opnd1->getSymbolicPart(); 1165 Res = createAndInstr(I, X, ~C1); 1166 // ConstOpnd was C2, now C1 ^ C2. 1167 ConstOpnd ^= C1; 1168 1169 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) 1170 RedoInsts.insert(T); 1171 return true; 1172 } 1173 return false; 1174} 1175 1176 1177// Helper function of OptimizeXor(). It tries to simplify 1178// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a 1179// symbolic value. 1180// 1181// If it was successful, true is returned, and the "R" and "C" is returned 1182// via "Res" and "ConstOpnd", respectively (If the entire expression is 1183// evaluated to a constant, the Res is set to NULL); otherwise, false is 1184// returned, and both "Res" and "ConstOpnd" remain unchanged. 1185bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, 1186 APInt &ConstOpnd, Value *&Res) { 1187 Value *X = Opnd1->getSymbolicPart(); 1188 if (X != Opnd2->getSymbolicPart()) 1189 return false; 1190 1191 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) 1192 int DeadInstNum = 1; 1193 if (Opnd1->getValue()->hasOneUse()) 1194 DeadInstNum++; 1195 if (Opnd2->getValue()->hasOneUse()) 1196 DeadInstNum++; 1197 1198 // Xor-Rule 2: 1199 // (x | c1) ^ (x & c2) 1200 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1 1201 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1 1202 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3 1203 // 1204 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) { 1205 if (Opnd2->isOrExpr()) 1206 std::swap(Opnd1, Opnd2); 1207 1208 const APInt &C1 = Opnd1->getConstPart(); 1209 const APInt &C2 = Opnd2->getConstPart(); 1210 APInt C3((~C1) ^ C2); 1211 1212 // Do not increase code size! 1213 if (C3 != 0 && !C3.isAllOnesValue()) { 1214 int NewInstNum = ConstOpnd != 0 ? 1 : 2; 1215 if (NewInstNum > DeadInstNum) 1216 return false; 1217 } 1218 1219 Res = createAndInstr(I, X, C3); 1220 ConstOpnd ^= C1; 1221 1222 } else if (Opnd1->isOrExpr()) { 1223 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 1224 // 1225 const APInt &C1 = Opnd1->getConstPart(); 1226 const APInt &C2 = Opnd2->getConstPart(); 1227 APInt C3 = C1 ^ C2; 1228 1229 // Do not increase code size 1230 if (C3 != 0 && !C3.isAllOnesValue()) { 1231 int NewInstNum = ConstOpnd != 0 ? 1 : 2; 1232 if (NewInstNum > DeadInstNum) 1233 return false; 1234 } 1235 1236 Res = createAndInstr(I, X, C3); 1237 ConstOpnd ^= C3; 1238 } else { 1239 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) 1240 // 1241 const APInt &C1 = Opnd1->getConstPart(); 1242 const APInt &C2 = Opnd2->getConstPart(); 1243 APInt C3 = C1 ^ C2; 1244 Res = createAndInstr(I, X, C3); 1245 } 1246 1247 // Put the original operands in the Redo list; hope they will be deleted 1248 // as dead code. 1249 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) 1250 RedoInsts.insert(T); 1251 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue())) 1252 RedoInsts.insert(T); 1253 1254 return true; 1255} 1256 1257/// Optimize a series of operands to an 'xor' instruction. If it can be reduced 1258/// to a single Value, it is returned, otherwise the Ops list is mutated as 1259/// necessary. 1260Value *Reassociate::OptimizeXor(Instruction *I, 1261 SmallVectorImpl<ValueEntry> &Ops) { 1262 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) 1263 return V; 1264 1265 if (Ops.size() == 1) 1266 return 0; 1267 1268 SmallVector<XorOpnd, 8> Opnds; 1269 SmallVector<XorOpnd*, 8> OpndPtrs; 1270 Type *Ty = Ops[0].Op->getType(); 1271 APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); 1272 1273 // Step 1: Convert ValueEntry to XorOpnd 1274 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1275 Value *V = Ops[i].Op; 1276 if (!isa<ConstantInt>(V)) { 1277 XorOpnd O(V); 1278 O.setSymbolicRank(getRank(O.getSymbolicPart())); 1279 Opnds.push_back(O); 1280 } else 1281 ConstOpnd ^= cast<ConstantInt>(V)->getValue(); 1282 } 1283 1284 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". 1285 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate 1286 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop 1287 // with the previous loop --- the iterator of the "Opnds" may be invalidated 1288 // when new elements are added to the vector. 1289 for (unsigned i = 0, e = Opnds.size(); i != e; ++i) 1290 OpndPtrs.push_back(&Opnds[i]); 1291 1292 // Step 2: Sort the Xor-Operands in a way such that the operands containing 1293 // the same symbolic value cluster together. For instance, the input operand 1294 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: 1295 // ("x | 123", "x & 789", "y & 456"). 1296 std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); 1297 1298 // Step 3: Combine adjacent operands 1299 XorOpnd *PrevOpnd = 0; 1300 bool Changed = false; 1301 for (unsigned i = 0, e = Opnds.size(); i < e; i++) { 1302 XorOpnd *CurrOpnd = OpndPtrs[i]; 1303 // The combined value 1304 Value *CV; 1305 1306 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" 1307 if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { 1308 Changed = true; 1309 if (CV) 1310 *CurrOpnd = XorOpnd(CV); 1311 else { 1312 CurrOpnd->Invalidate(); 1313 continue; 1314 } 1315 } 1316 1317 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) { 1318 PrevOpnd = CurrOpnd; 1319 continue; 1320 } 1321 1322 // step 3.2: When previous and current operands share the same symbolic 1323 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 1324 // 1325 if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { 1326 // Remove previous operand 1327 PrevOpnd->Invalidate(); 1328 if (CV) { 1329 *CurrOpnd = XorOpnd(CV); 1330 PrevOpnd = CurrOpnd; 1331 } else { 1332 CurrOpnd->Invalidate(); 1333 PrevOpnd = 0; 1334 } 1335 Changed = true; 1336 } 1337 } 1338 1339 // Step 4: Reassemble the Ops 1340 if (Changed) { 1341 Ops.clear(); 1342 for (unsigned int i = 0, e = Opnds.size(); i < e; i++) { 1343 XorOpnd &O = Opnds[i]; 1344 if (O.isInvalid()) 1345 continue; 1346 ValueEntry VE(getRank(O.getValue()), O.getValue()); 1347 Ops.push_back(VE); 1348 } 1349 if (ConstOpnd != 0) { 1350 Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); 1351 ValueEntry VE(getRank(C), C); 1352 Ops.push_back(VE); 1353 } 1354 int Sz = Ops.size(); 1355 if (Sz == 1) 1356 return Ops.back().Op; 1357 else if (Sz == 0) { 1358 assert(ConstOpnd == 0); 1359 return ConstantInt::get(Ty->getContext(), ConstOpnd); 1360 } 1361 } 1362 1363 return 0; 1364} 1365 1366/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This 1367/// optimizes based on identities. If it can be reduced to a single Value, it 1368/// is returned, otherwise the Ops list is mutated as necessary. 1369Value *Reassociate::OptimizeAdd(Instruction *I, 1370 SmallVectorImpl<ValueEntry> &Ops) { 1371 // Scan the operand lists looking for X and -X pairs. If we find any, we 1372 // can simplify the expression. X+-X == 0. While we're at it, scan for any 1373 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 1374 // 1375 // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". 1376 // 1377 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1378 Value *TheOp = Ops[i].Op; 1379 // Check to see if we've seen this operand before. If so, we factor all 1380 // instances of the operand together. Due to our sorting criteria, we know 1381 // that these need to be next to each other in the vector. 1382 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 1383 // Rescan the list, remove all instances of this operand from the expr. 1384 unsigned NumFound = 0; 1385 do { 1386 Ops.erase(Ops.begin()+i); 1387 ++NumFound; 1388 } while (i != Ops.size() && Ops[i].Op == TheOp); 1389 1390 DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); 1391 ++NumFactor; 1392 1393 // Insert a new multiply. 1394 Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); 1395 Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); 1396 1397 // Now that we have inserted a multiply, optimize it. This allows us to 1398 // handle cases that require multiple factoring steps, such as this: 1399 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 1400 RedoInsts.insert(cast<Instruction>(Mul)); 1401 1402 // If every add operand was a duplicate, return the multiply. 1403 if (Ops.empty()) 1404 return Mul; 1405 1406 // Otherwise, we had some input that didn't have the dupe, such as 1407 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 1408 // things being added by this operation. 1409 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 1410 1411 --i; 1412 e = Ops.size(); 1413 continue; 1414 } 1415 1416 // Check for X and -X in the operand list. 1417 if (!BinaryOperator::isNeg(TheOp)) 1418 continue; 1419 1420 Value *X = BinaryOperator::getNegArgument(TheOp); 1421 unsigned FoundX = FindInOperandList(Ops, i, X); 1422 if (FoundX == i) 1423 continue; 1424 1425 // Remove X and -X from the operand list. 1426 if (Ops.size() == 2) 1427 return Constant::getNullValue(X->getType()); 1428 1429 Ops.erase(Ops.begin()+i); 1430 if (i < FoundX) 1431 --FoundX; 1432 else 1433 --i; // Need to back up an extra one. 1434 Ops.erase(Ops.begin()+FoundX); 1435 ++NumAnnihil; 1436 --i; // Revisit element. 1437 e -= 2; // Removed two elements. 1438 } 1439 1440 // Scan the operand list, checking to see if there are any common factors 1441 // between operands. Consider something like A*A+A*B*C+D. We would like to 1442 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 1443 // To efficiently find this, we count the number of times a factor occurs 1444 // for any ADD operands that are MULs. 1445 DenseMap<Value*, unsigned> FactorOccurrences; 1446 1447 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 1448 // where they are actually the same multiply. 1449 unsigned MaxOcc = 0; 1450 Value *MaxOccVal = 0; 1451 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1452 BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); 1453 if (!BOp) 1454 continue; 1455 1456 // Compute all of the factors of this added value. 1457 SmallVector<Value*, 8> Factors; 1458 FindSingleUseMultiplyFactors(BOp, Factors, Ops); 1459 assert(Factors.size() > 1 && "Bad linearize!"); 1460 1461 // Add one to FactorOccurrences for each unique factor in this op. 1462 SmallPtrSet<Value*, 8> Duplicates; 1463 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 1464 Value *Factor = Factors[i]; 1465 if (!Duplicates.insert(Factor)) continue; 1466 1467 unsigned Occ = ++FactorOccurrences[Factor]; 1468 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 1469 1470 // If Factor is a negative constant, add the negated value as a factor 1471 // because we can percolate the negate out. Watch for minint, which 1472 // cannot be positivified. 1473 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) 1474 if (CI->isNegative() && !CI->isMinValue(true)) { 1475 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 1476 assert(!Duplicates.count(Factor) && 1477 "Shouldn't have two constant factors, missed a canonicalize"); 1478 1479 unsigned Occ = ++FactorOccurrences[Factor]; 1480 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 1481 } 1482 } 1483 } 1484 1485 // If any factor occurred more than one time, we can pull it out. 1486 if (MaxOcc > 1) { 1487 DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); 1488 ++NumFactor; 1489 1490 // Create a new instruction that uses the MaxOccVal twice. If we don't do 1491 // this, we could otherwise run into situations where removing a factor 1492 // from an expression will drop a use of maxocc, and this can cause 1493 // RemoveFactorFromExpression on successive values to behave differently. 1494 Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); 1495 SmallVector<WeakVH, 4> NewMulOps; 1496 for (unsigned i = 0; i != Ops.size(); ++i) { 1497 // Only try to remove factors from expressions we're allowed to. 1498 BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); 1499 if (!BOp) 1500 continue; 1501 1502 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 1503 // The factorized operand may occur several times. Convert them all in 1504 // one fell swoop. 1505 for (unsigned j = Ops.size(); j != i;) { 1506 --j; 1507 if (Ops[j].Op == Ops[i].Op) { 1508 NewMulOps.push_back(V); 1509 Ops.erase(Ops.begin()+j); 1510 } 1511 } 1512 --i; 1513 } 1514 } 1515 1516 // No need for extra uses anymore. 1517 delete DummyInst; 1518 1519 unsigned NumAddedValues = NewMulOps.size(); 1520 Value *V = EmitAddTreeOfValues(I, NewMulOps); 1521 1522 // Now that we have inserted the add tree, optimize it. This allows us to 1523 // handle cases that require multiple factoring steps, such as this: 1524 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 1525 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 1526 (void)NumAddedValues; 1527 if (Instruction *VI = dyn_cast<Instruction>(V)) 1528 RedoInsts.insert(VI); 1529 1530 // Create the multiply. 1531 Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); 1532 1533 // Rerun associate on the multiply in case the inner expression turned into 1534 // a multiply. We want to make sure that we keep things in canonical form. 1535 RedoInsts.insert(V2); 1536 1537 // If every add operand included the factor (e.g. "A*B + A*C"), then the 1538 // entire result expression is just the multiply "A*(B+C)". 1539 if (Ops.empty()) 1540 return V2; 1541 1542 // Otherwise, we had some input that didn't have the factor, such as 1543 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 1544 // things being added by this operation. 1545 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 1546 } 1547 1548 return 0; 1549} 1550 1551namespace { 1552 /// \brief Predicate tests whether a ValueEntry's op is in a map. 1553 struct IsValueInMap { 1554 const DenseMap<Value *, unsigned> ⤅ 1555 1556 IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {} 1557 1558 bool operator()(const ValueEntry &Entry) { 1559 return Map.find(Entry.Op) != Map.end(); 1560 } 1561 }; 1562} 1563 1564/// \brief Build up a vector of value/power pairs factoring a product. 1565/// 1566/// Given a series of multiplication operands, build a vector of factors and 1567/// the powers each is raised to when forming the final product. Sort them in 1568/// the order of descending power. 1569/// 1570/// (x*x) -> [(x, 2)] 1571/// ((x*x)*x) -> [(x, 3)] 1572/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)] 1573/// 1574/// \returns Whether any factors have a power greater than one. 1575bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, 1576 SmallVectorImpl<Factor> &Factors) { 1577 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this. 1578 // Compute the sum of powers of simplifiable factors. 1579 unsigned FactorPowerSum = 0; 1580 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) { 1581 Value *Op = Ops[Idx-1].Op; 1582 1583 // Count the number of occurrences of this value. 1584 unsigned Count = 1; 1585 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx) 1586 ++Count; 1587 // Track for simplification all factors which occur 2 or more times. 1588 if (Count > 1) 1589 FactorPowerSum += Count; 1590 } 1591 1592 // We can only simplify factors if the sum of the powers of our simplifiable 1593 // factors is 4 or higher. When that is the case, we will *always* have 1594 // a simplification. This is an important invariant to prevent cyclicly 1595 // trying to simplify already minimal formations. 1596 if (FactorPowerSum < 4) 1597 return false; 1598 1599 // Now gather the simplifiable factors, removing them from Ops. 1600 FactorPowerSum = 0; 1601 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) { 1602 Value *Op = Ops[Idx-1].Op; 1603 1604 // Count the number of occurrences of this value. 1605 unsigned Count = 1; 1606 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx) 1607 ++Count; 1608 if (Count == 1) 1609 continue; 1610 // Move an even number of occurrences to Factors. 1611 Count &= ~1U; 1612 Idx -= Count; 1613 FactorPowerSum += Count; 1614 Factors.push_back(Factor(Op, Count)); 1615 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count); 1616 } 1617 1618 // None of the adjustments above should have reduced the sum of factor powers 1619 // below our mininum of '4'. 1620 assert(FactorPowerSum >= 4); 1621 1622 std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter()); 1623 return true; 1624} 1625 1626/// \brief Build a tree of multiplies, computing the product of Ops. 1627static Value *buildMultiplyTree(IRBuilder<> &Builder, 1628 SmallVectorImpl<Value*> &Ops) { 1629 if (Ops.size() == 1) 1630 return Ops.back(); 1631 1632 Value *LHS = Ops.pop_back_val(); 1633 do { 1634 LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); 1635 } while (!Ops.empty()); 1636 1637 return LHS; 1638} 1639 1640/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*... 1641/// 1642/// Given a vector of values raised to various powers, where no two values are 1643/// equal and the powers are sorted in decreasing order, compute the minimal 1644/// DAG of multiplies to compute the final product, and return that product 1645/// value. 1646Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder, 1647 SmallVectorImpl<Factor> &Factors) { 1648 assert(Factors[0].Power); 1649 SmallVector<Value *, 4> OuterProduct; 1650 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size(); 1651 Idx < Size && Factors[Idx].Power > 0; ++Idx) { 1652 if (Factors[Idx].Power != Factors[LastIdx].Power) { 1653 LastIdx = Idx; 1654 continue; 1655 } 1656 1657 // We want to multiply across all the factors with the same power so that 1658 // we can raise them to that power as a single entity. Build a mini tree 1659 // for that. 1660 SmallVector<Value *, 4> InnerProduct; 1661 InnerProduct.push_back(Factors[LastIdx].Base); 1662 do { 1663 InnerProduct.push_back(Factors[Idx].Base); 1664 ++Idx; 1665 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power); 1666 1667 // Reset the base value of the first factor to the new expression tree. 1668 // We'll remove all the factors with the same power in a second pass. 1669 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct); 1670 if (Instruction *MI = dyn_cast<Instruction>(M)) 1671 RedoInsts.insert(MI); 1672 1673 LastIdx = Idx; 1674 } 1675 // Unique factors with equal powers -- we've folded them into the first one's 1676 // base. 1677 Factors.erase(std::unique(Factors.begin(), Factors.end(), 1678 Factor::PowerEqual()), 1679 Factors.end()); 1680 1681 // Iteratively collect the base of each factor with an add power into the 1682 // outer product, and halve each power in preparation for squaring the 1683 // expression. 1684 for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) { 1685 if (Factors[Idx].Power & 1) 1686 OuterProduct.push_back(Factors[Idx].Base); 1687 Factors[Idx].Power >>= 1; 1688 } 1689 if (Factors[0].Power) { 1690 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors); 1691 OuterProduct.push_back(SquareRoot); 1692 OuterProduct.push_back(SquareRoot); 1693 } 1694 if (OuterProduct.size() == 1) 1695 return OuterProduct.front(); 1696 1697 Value *V = buildMultiplyTree(Builder, OuterProduct); 1698 return V; 1699} 1700 1701Value *Reassociate::OptimizeMul(BinaryOperator *I, 1702 SmallVectorImpl<ValueEntry> &Ops) { 1703 // We can only optimize the multiplies when there is a chain of more than 1704 // three, such that a balanced tree might require fewer total multiplies. 1705 if (Ops.size() < 4) 1706 return 0; 1707 1708 // Try to turn linear trees of multiplies without other uses of the 1709 // intermediate stages into minimal multiply DAGs with perfect sub-expression 1710 // re-use. 1711 SmallVector<Factor, 4> Factors; 1712 if (!collectMultiplyFactors(Ops, Factors)) 1713 return 0; // All distinct factors, so nothing left for us to do. 1714 1715 IRBuilder<> Builder(I); 1716 Value *V = buildMinimalMultiplyDAG(Builder, Factors); 1717 if (Ops.empty()) 1718 return V; 1719 1720 ValueEntry NewEntry = ValueEntry(getRank(V), V); 1721 Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry); 1722 return 0; 1723} 1724 1725Value *Reassociate::OptimizeExpression(BinaryOperator *I, 1726 SmallVectorImpl<ValueEntry> &Ops) { 1727 // Now that we have the linearized expression tree, try to optimize it. 1728 // Start by folding any constants that we found. 1729 Constant *Cst = 0; 1730 unsigned Opcode = I->getOpcode(); 1731 while (!Ops.empty() && isa<Constant>(Ops.back().Op)) { 1732 Constant *C = cast<Constant>(Ops.pop_back_val().Op); 1733 Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C; 1734 } 1735 // If there was nothing but constants then we are done. 1736 if (Ops.empty()) 1737 return Cst; 1738 1739 // Put the combined constant back at the end of the operand list, except if 1740 // there is no point. For example, an add of 0 gets dropped here, while a 1741 // multiplication by zero turns the whole expression into zero. 1742 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) { 1743 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType())) 1744 return Cst; 1745 Ops.push_back(ValueEntry(0, Cst)); 1746 } 1747 1748 if (Ops.size() == 1) return Ops[0].Op; 1749 1750 // Handle destructive annihilation due to identities between elements in the 1751 // argument list here. 1752 unsigned NumOps = Ops.size(); 1753 switch (Opcode) { 1754 default: break; 1755 case Instruction::And: 1756 case Instruction::Or: 1757 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 1758 return Result; 1759 break; 1760 1761 case Instruction::Xor: 1762 if (Value *Result = OptimizeXor(I, Ops)) 1763 return Result; 1764 break; 1765 1766 case Instruction::Add: 1767 if (Value *Result = OptimizeAdd(I, Ops)) 1768 return Result; 1769 break; 1770 1771 case Instruction::Mul: 1772 if (Value *Result = OptimizeMul(I, Ops)) 1773 return Result; 1774 break; 1775 } 1776 1777 if (Ops.size() != NumOps) 1778 return OptimizeExpression(I, Ops); 1779 return 0; 1780} 1781 1782/// EraseInst - Zap the given instruction, adding interesting operands to the 1783/// work list. 1784void Reassociate::EraseInst(Instruction *I) { 1785 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); 1786 SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); 1787 // Erase the dead instruction. 1788 ValueRankMap.erase(I); 1789 RedoInsts.remove(I); 1790 I->eraseFromParent(); 1791 // Optimize its operands. 1792 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes. 1793 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1794 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) { 1795 // If this is a node in an expression tree, climb to the expression root 1796 // and add that since that's where optimization actually happens. 1797 unsigned Opcode = Op->getOpcode(); 1798 while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode && 1799 Visited.insert(Op)) 1800 Op = Op->use_back(); 1801 RedoInsts.insert(Op); 1802 } 1803} 1804 1805/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing 1806/// instructions is not allowed. 1807void Reassociate::OptimizeInst(Instruction *I) { 1808 // Only consider operations that we understand. 1809 if (!isa<BinaryOperator>(I)) 1810 return; 1811 1812 if (I->getOpcode() == Instruction::Shl && 1813 isa<ConstantInt>(I->getOperand(1))) 1814 // If an operand of this shift is a reassociable multiply, or if the shift 1815 // is used by a reassociable multiply or add, turn into a multiply. 1816 if (isReassociableOp(I->getOperand(0), Instruction::Mul) || 1817 (I->hasOneUse() && 1818 (isReassociableOp(I->use_back(), Instruction::Mul) || 1819 isReassociableOp(I->use_back(), Instruction::Add)))) { 1820 Instruction *NI = ConvertShiftToMul(I); 1821 RedoInsts.insert(I); 1822 MadeChange = true; 1823 I = NI; 1824 } 1825 1826 // Floating point binary operators are not associative, but we can still 1827 // commute (some) of them, to canonicalize the order of their operands. 1828 // This can potentially expose more CSE opportunities, and makes writing 1829 // other transformations simpler. 1830 if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) { 1831 // FAdd and FMul can be commuted. 1832 if (I->getOpcode() != Instruction::FMul && 1833 I->getOpcode() != Instruction::FAdd) 1834 return; 1835 1836 Value *LHS = I->getOperand(0); 1837 Value *RHS = I->getOperand(1); 1838 unsigned LHSRank = getRank(LHS); 1839 unsigned RHSRank = getRank(RHS); 1840 1841 // Sort the operands by rank. 1842 if (RHSRank < LHSRank) { 1843 I->setOperand(0, RHS); 1844 I->setOperand(1, LHS); 1845 } 1846 1847 return; 1848 } 1849 1850 // Do not reassociate boolean (i1) expressions. We want to preserve the 1851 // original order of evaluation for short-circuited comparisons that 1852 // SimplifyCFG has folded to AND/OR expressions. If the expression 1853 // is not further optimized, it is likely to be transformed back to a 1854 // short-circuited form for code gen, and the source order may have been 1855 // optimized for the most likely conditions. 1856 if (I->getType()->isIntegerTy(1)) 1857 return; 1858 1859 // If this is a subtract instruction which is not already in negate form, 1860 // see if we can convert it to X+-Y. 1861 if (I->getOpcode() == Instruction::Sub) { 1862 if (ShouldBreakUpSubtract(I)) { 1863 Instruction *NI = BreakUpSubtract(I); 1864 RedoInsts.insert(I); 1865 MadeChange = true; 1866 I = NI; 1867 } else if (BinaryOperator::isNeg(I)) { 1868 // Otherwise, this is a negation. See if the operand is a multiply tree 1869 // and if this is not an inner node of a multiply tree. 1870 if (isReassociableOp(I->getOperand(1), Instruction::Mul) && 1871 (!I->hasOneUse() || 1872 !isReassociableOp(I->use_back(), Instruction::Mul))) { 1873 Instruction *NI = LowerNegateToMultiply(I); 1874 RedoInsts.insert(I); 1875 MadeChange = true; 1876 I = NI; 1877 } 1878 } 1879 } 1880 1881 // If this instruction is an associative binary operator, process it. 1882 if (!I->isAssociative()) return; 1883 BinaryOperator *BO = cast<BinaryOperator>(I); 1884 1885 // If this is an interior node of a reassociable tree, ignore it until we 1886 // get to the root of the tree, to avoid N^2 analysis. 1887 unsigned Opcode = BO->getOpcode(); 1888 if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode) 1889 return; 1890 1891 // If this is an add tree that is used by a sub instruction, ignore it 1892 // until we process the subtract. 1893 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add && 1894 cast<Instruction>(BO->use_back())->getOpcode() == Instruction::Sub) 1895 return; 1896 1897 ReassociateExpression(BO); 1898} 1899 1900void Reassociate::ReassociateExpression(BinaryOperator *I) { 1901 1902 // First, walk the expression tree, linearizing the tree, collecting the 1903 // operand information. 1904 SmallVector<RepeatedValue, 8> Tree; 1905 MadeChange |= LinearizeExprTree(I, Tree); 1906 SmallVector<ValueEntry, 8> Ops; 1907 Ops.reserve(Tree.size()); 1908 for (unsigned i = 0, e = Tree.size(); i != e; ++i) { 1909 RepeatedValue E = Tree[i]; 1910 Ops.append(E.second.getZExtValue(), 1911 ValueEntry(getRank(E.first), E.first)); 1912 } 1913 1914 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1915 1916 // Now that we have linearized the tree to a list and have gathered all of 1917 // the operands and their ranks, sort the operands by their rank. Use a 1918 // stable_sort so that values with equal ranks will have their relative 1919 // positions maintained (and so the compiler is deterministic). Note that 1920 // this sorts so that the highest ranking values end up at the beginning of 1921 // the vector. 1922 std::stable_sort(Ops.begin(), Ops.end()); 1923 1924 // OptimizeExpression - Now that we have the expression tree in a convenient 1925 // sorted form, optimize it globally if possible. 1926 if (Value *V = OptimizeExpression(I, Ops)) { 1927 if (V == I) 1928 // Self-referential expression in unreachable code. 1929 return; 1930 // This expression tree simplified to something that isn't a tree, 1931 // eliminate it. 1932 DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 1933 I->replaceAllUsesWith(V); 1934 if (Instruction *VI = dyn_cast<Instruction>(V)) 1935 VI->setDebugLoc(I->getDebugLoc()); 1936 RedoInsts.insert(I); 1937 ++NumAnnihil; 1938 return; 1939 } 1940 1941 // We want to sink immediates as deeply as possible except in the case where 1942 // this is a multiply tree used only by an add, and the immediate is a -1. 1943 // In this case we reassociate to put the negation on the outside so that we 1944 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 1945 if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && 1946 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && 1947 isa<ConstantInt>(Ops.back().Op) && 1948 cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { 1949 ValueEntry Tmp = Ops.pop_back_val(); 1950 Ops.insert(Ops.begin(), Tmp); 1951 } 1952 1953 DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1954 1955 if (Ops.size() == 1) { 1956 if (Ops[0].Op == I) 1957 // Self-referential expression in unreachable code. 1958 return; 1959 1960 // This expression tree simplified to something that isn't a tree, 1961 // eliminate it. 1962 I->replaceAllUsesWith(Ops[0].Op); 1963 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) 1964 OI->setDebugLoc(I->getDebugLoc()); 1965 RedoInsts.insert(I); 1966 return; 1967 } 1968 1969 // Now that we ordered and optimized the expressions, splat them back into 1970 // the expression tree, removing any unneeded nodes. 1971 RewriteExprTree(I, Ops); 1972} 1973 1974bool Reassociate::runOnFunction(Function &F) { 1975 // Calculate the rank map for F 1976 BuildRankMap(F); 1977 1978 MadeChange = false; 1979 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { 1980 // Optimize every instruction in the basic block. 1981 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ) 1982 if (isInstructionTriviallyDead(II)) { 1983 EraseInst(II++); 1984 } else { 1985 OptimizeInst(II); 1986 assert(II->getParent() == BI && "Moved to a different block!"); 1987 ++II; 1988 } 1989 1990 // If this produced extra instructions to optimize, handle them now. 1991 while (!RedoInsts.empty()) { 1992 Instruction *I = RedoInsts.pop_back_val(); 1993 if (isInstructionTriviallyDead(I)) 1994 EraseInst(I); 1995 else 1996 OptimizeInst(I); 1997 } 1998 } 1999 2000 // We are done with the rank map. 2001 RankMap.clear(); 2002 ValueRankMap.clear(); 2003 2004 return MadeChange; 2005} 2006