Reassociate.cpp revision 234982
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass reassociates commutative expressions in an order that is designed 11// to promote better constant propagation, GCSE, LICM, PRE, etc. 12// 13// For example: 4 + (x + 5) -> x + (4 + 5) 14// 15// In the implementation of this algorithm, constants are assigned rank = 0, 16// function arguments are rank = 1, and other values are assigned ranks 17// corresponding to the reverse post order traversal of current function 18// (starting at 2), which effectively gives values in deep loops higher rank 19// than values not in loops. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "reassociate" 24#include "llvm/Transforms/Scalar.h" 25#include "llvm/Transforms/Utils/Local.h" 26#include "llvm/Constants.h" 27#include "llvm/DerivedTypes.h" 28#include "llvm/Function.h" 29#include "llvm/Instructions.h" 30#include "llvm/IntrinsicInst.h" 31#include "llvm/Pass.h" 32#include "llvm/Assembly/Writer.h" 33#include "llvm/Support/CFG.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/ValueHandle.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/ADT/PostOrderIterator.h" 38#include "llvm/ADT/Statistic.h" 39#include "llvm/ADT/DenseMap.h" 40#include <algorithm> 41using namespace llvm; 42 43STATISTIC(NumLinear , "Number of insts linearized"); 44STATISTIC(NumChanged, "Number of insts reassociated"); 45STATISTIC(NumAnnihil, "Number of expr tree annihilated"); 46STATISTIC(NumFactor , "Number of multiplies factored"); 47 48namespace { 49 struct ValueEntry { 50 unsigned Rank; 51 Value *Op; 52 ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} 53 }; 54 inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { 55 return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. 56 } 57} 58 59#ifndef NDEBUG 60/// PrintOps - Print out the expression identified in the Ops list. 61/// 62static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 63 Module *M = I->getParent()->getParent()->getParent(); 64 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 65 << *Ops[0].Op->getType() << '\t'; 66 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 67 dbgs() << "[ "; 68 WriteAsOperand(dbgs(), Ops[i].Op, false, M); 69 dbgs() << ", #" << Ops[i].Rank << "] "; 70 } 71} 72#endif 73 74namespace { 75 class Reassociate : public FunctionPass { 76 DenseMap<BasicBlock*, unsigned> RankMap; 77 DenseMap<AssertingVH<Value>, unsigned> ValueRankMap; 78 SmallVector<WeakVH, 8> RedoInsts; 79 SmallVector<WeakVH, 8> DeadInsts; 80 bool MadeChange; 81 public: 82 static char ID; // Pass identification, replacement for typeid 83 Reassociate() : FunctionPass(ID) { 84 initializeReassociatePass(*PassRegistry::getPassRegistry()); 85 } 86 87 bool runOnFunction(Function &F); 88 89 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 90 AU.setPreservesCFG(); 91 } 92 private: 93 void BuildRankMap(Function &F); 94 unsigned getRank(Value *V); 95 Value *ReassociateExpression(BinaryOperator *I); 96 void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops, 97 unsigned Idx = 0); 98 Value *OptimizeExpression(BinaryOperator *I, 99 SmallVectorImpl<ValueEntry> &Ops); 100 Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 101 void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 102 void LinearizeExpr(BinaryOperator *I); 103 Value *RemoveFactorFromExpression(Value *V, Value *Factor); 104 void ReassociateInst(BasicBlock::iterator &BBI); 105 106 void RemoveDeadBinaryOp(Value *V); 107 }; 108} 109 110char Reassociate::ID = 0; 111INITIALIZE_PASS(Reassociate, "reassociate", 112 "Reassociate expressions", false, false) 113 114// Public interface to the Reassociate pass 115FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } 116 117void Reassociate::RemoveDeadBinaryOp(Value *V) { 118 Instruction *Op = dyn_cast<Instruction>(V); 119 if (!Op || !isa<BinaryOperator>(Op)) 120 return; 121 122 Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); 123 124 ValueRankMap.erase(Op); 125 DeadInsts.push_back(Op); 126 RemoveDeadBinaryOp(LHS); 127 RemoveDeadBinaryOp(RHS); 128} 129 130 131static bool isUnmovableInstruction(Instruction *I) { 132 if (I->getOpcode() == Instruction::PHI || 133 I->getOpcode() == Instruction::Alloca || 134 I->getOpcode() == Instruction::Load || 135 I->getOpcode() == Instruction::Invoke || 136 (I->getOpcode() == Instruction::Call && 137 !isa<DbgInfoIntrinsic>(I)) || 138 I->getOpcode() == Instruction::UDiv || 139 I->getOpcode() == Instruction::SDiv || 140 I->getOpcode() == Instruction::FDiv || 141 I->getOpcode() == Instruction::URem || 142 I->getOpcode() == Instruction::SRem || 143 I->getOpcode() == Instruction::FRem) 144 return true; 145 return false; 146} 147 148void Reassociate::BuildRankMap(Function &F) { 149 unsigned i = 2; 150 151 // Assign distinct ranks to function arguments 152 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) 153 ValueRankMap[&*I] = ++i; 154 155 ReversePostOrderTraversal<Function*> RPOT(&F); 156 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 157 E = RPOT.end(); I != E; ++I) { 158 BasicBlock *BB = *I; 159 unsigned BBRank = RankMap[BB] = ++i << 16; 160 161 // Walk the basic block, adding precomputed ranks for any instructions that 162 // we cannot move. This ensures that the ranks for these instructions are 163 // all different in the block. 164 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 165 if (isUnmovableInstruction(I)) 166 ValueRankMap[&*I] = ++BBRank; 167 } 168} 169 170unsigned Reassociate::getRank(Value *V) { 171 Instruction *I = dyn_cast<Instruction>(V); 172 if (I == 0) { 173 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 174 return 0; // Otherwise it's a global or constant, rank 0. 175 } 176 177 if (unsigned Rank = ValueRankMap[I]) 178 return Rank; // Rank already known? 179 180 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 181 // we can reassociate expressions for code motion! Since we do not recurse 182 // for PHI nodes, we cannot have infinite recursion here, because there 183 // cannot be loops in the value graph that do not go through PHI nodes. 184 unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 185 for (unsigned i = 0, e = I->getNumOperands(); 186 i != e && Rank != MaxRank; ++i) 187 Rank = std::max(Rank, getRank(I->getOperand(i))); 188 189 // If this is a not or neg instruction, do not count it for rank. This 190 // assures us that X and ~X will have the same rank. 191 if (!I->getType()->isIntegerTy() || 192 (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) 193 ++Rank; 194 195 //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " 196 // << Rank << "\n"); 197 198 return ValueRankMap[I] = Rank; 199} 200 201/// isReassociableOp - Return true if V is an instruction of the specified 202/// opcode and if it only has one use. 203static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 204 if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) && 205 cast<Instruction>(V)->getOpcode() == Opcode) 206 return cast<BinaryOperator>(V); 207 return 0; 208} 209 210/// LowerNegateToMultiply - Replace 0-X with X*-1. 211/// 212static Instruction *LowerNegateToMultiply(Instruction *Neg, 213 DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { 214 Constant *Cst = Constant::getAllOnesValue(Neg->getType()); 215 216 Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); 217 ValueRankMap.erase(Neg); 218 Res->takeName(Neg); 219 Neg->replaceAllUsesWith(Res); 220 Res->setDebugLoc(Neg->getDebugLoc()); 221 Neg->eraseFromParent(); 222 return Res; 223} 224 225// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. 226// Note that if D is also part of the expression tree that we recurse to 227// linearize it as well. Besides that case, this does not recurse into A,B, or 228// C. 229void Reassociate::LinearizeExpr(BinaryOperator *I) { 230 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 231 BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1)); 232 assert(isReassociableOp(LHS, I->getOpcode()) && 233 isReassociableOp(RHS, I->getOpcode()) && 234 "Not an expression that needs linearization?"); 235 236 DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); 237 238 // Move the RHS instruction to live immediately before I, avoiding breaking 239 // dominator properties. 240 RHS->moveBefore(I); 241 242 // Move operands around to do the linearization. 243 I->setOperand(1, RHS->getOperand(0)); 244 RHS->setOperand(0, LHS); 245 I->setOperand(0, RHS); 246 247 // Conservatively clear all the optional flags, which may not hold 248 // after the reassociation. 249 I->clearSubclassOptionalData(); 250 LHS->clearSubclassOptionalData(); 251 RHS->clearSubclassOptionalData(); 252 253 ++NumLinear; 254 MadeChange = true; 255 DEBUG(dbgs() << "Linearized: " << *I << '\n'); 256 257 // If D is part of this expression tree, tail recurse. 258 if (isReassociableOp(I->getOperand(1), I->getOpcode())) 259 LinearizeExpr(I); 260} 261 262 263/// LinearizeExprTree - Given an associative binary expression tree, traverse 264/// all of the uses putting it into canonical form. This forces a left-linear 265/// form of the expression (((a+b)+c)+d), and collects information about the 266/// rank of the non-tree operands. 267/// 268/// NOTE: These intentionally destroys the expression tree operands (turning 269/// them into undef values) to reduce #uses of the values. This means that the 270/// caller MUST use something like RewriteExprTree to put the values back in. 271/// 272void Reassociate::LinearizeExprTree(BinaryOperator *I, 273 SmallVectorImpl<ValueEntry> &Ops) { 274 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 275 unsigned Opcode = I->getOpcode(); 276 277 // First step, linearize the expression if it is in ((A+B)+(C+D)) form. 278 BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); 279 BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); 280 281 // If this is a multiply expression tree and it contains internal negations, 282 // transform them into multiplies by -1 so they can be reassociated. 283 if (I->getOpcode() == Instruction::Mul) { 284 if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { 285 LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); 286 LHSBO = isReassociableOp(LHS, Opcode); 287 } 288 if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { 289 RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); 290 RHSBO = isReassociableOp(RHS, Opcode); 291 } 292 } 293 294 if (!LHSBO) { 295 if (!RHSBO) { 296 // Neither the LHS or RHS as part of the tree, thus this is a leaf. As 297 // such, just remember these operands and their rank. 298 Ops.push_back(ValueEntry(getRank(LHS), LHS)); 299 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 300 301 // Clear the leaves out. 302 I->setOperand(0, UndefValue::get(I->getType())); 303 I->setOperand(1, UndefValue::get(I->getType())); 304 return; 305 } 306 307 // Turn X+(Y+Z) -> (Y+Z)+X 308 std::swap(LHSBO, RHSBO); 309 std::swap(LHS, RHS); 310 bool Success = !I->swapOperands(); 311 assert(Success && "swapOperands failed"); 312 (void)Success; 313 MadeChange = true; 314 } else if (RHSBO) { 315 // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not 316 // part of the expression tree. 317 LinearizeExpr(I); 318 LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); 319 RHS = I->getOperand(1); 320 RHSBO = 0; 321 } 322 323 // Okay, now we know that the LHS is a nested expression and that the RHS is 324 // not. Perform reassociation. 325 assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); 326 327 // Move LHS right before I to make sure that the tree expression dominates all 328 // values. 329 LHSBO->moveBefore(I); 330 331 // Linearize the expression tree on the LHS. 332 LinearizeExprTree(LHSBO, Ops); 333 334 // Remember the RHS operand and its rank. 335 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 336 337 // Clear the RHS leaf out. 338 I->setOperand(1, UndefValue::get(I->getType())); 339} 340 341// RewriteExprTree - Now that the operands for this expression tree are 342// linearized and optimized, emit them in-order. This function is written to be 343// tail recursive. 344void Reassociate::RewriteExprTree(BinaryOperator *I, 345 SmallVectorImpl<ValueEntry> &Ops, 346 unsigned i) { 347 if (i+2 == Ops.size()) { 348 if (I->getOperand(0) != Ops[i].Op || 349 I->getOperand(1) != Ops[i+1].Op) { 350 Value *OldLHS = I->getOperand(0); 351 DEBUG(dbgs() << "RA: " << *I << '\n'); 352 I->setOperand(0, Ops[i].Op); 353 I->setOperand(1, Ops[i+1].Op); 354 355 // Clear all the optional flags, which may not hold after the 356 // reassociation if the expression involved more than just this operation. 357 if (Ops.size() != 2) 358 I->clearSubclassOptionalData(); 359 360 DEBUG(dbgs() << "TO: " << *I << '\n'); 361 MadeChange = true; 362 ++NumChanged; 363 364 // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) 365 // delete the extra, now dead, nodes. 366 RemoveDeadBinaryOp(OldLHS); 367 } 368 return; 369 } 370 assert(i+2 < Ops.size() && "Ops index out of range!"); 371 372 if (I->getOperand(1) != Ops[i].Op) { 373 DEBUG(dbgs() << "RA: " << *I << '\n'); 374 I->setOperand(1, Ops[i].Op); 375 376 // Conservatively clear all the optional flags, which may not hold 377 // after the reassociation. 378 I->clearSubclassOptionalData(); 379 380 DEBUG(dbgs() << "TO: " << *I << '\n'); 381 MadeChange = true; 382 ++NumChanged; 383 } 384 385 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 386 assert(LHS->getOpcode() == I->getOpcode() && 387 "Improper expression tree!"); 388 389 // Compactify the tree instructions together with each other to guarantee 390 // that the expression tree is dominated by all of Ops. 391 LHS->moveBefore(I); 392 RewriteExprTree(LHS, Ops, i+1); 393} 394 395 396 397// NegateValue - Insert instructions before the instruction pointed to by BI, 398// that computes the negative version of the value specified. The negative 399// version of the value is returned, and BI is left pointing at the instruction 400// that should be processed next by the reassociation pass. 401// 402static Value *NegateValue(Value *V, Instruction *BI) { 403 if (Constant *C = dyn_cast<Constant>(V)) 404 return ConstantExpr::getNeg(C); 405 406 // We are trying to expose opportunity for reassociation. One of the things 407 // that we want to do to achieve this is to push a negation as deep into an 408 // expression chain as possible, to expose the add instructions. In practice, 409 // this means that we turn this: 410 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 411 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 412 // the constants. We assume that instcombine will clean up the mess later if 413 // we introduce tons of unnecessary negation instructions. 414 // 415 if (Instruction *I = dyn_cast<Instruction>(V)) 416 if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { 417 // Push the negates through the add. 418 I->setOperand(0, NegateValue(I->getOperand(0), BI)); 419 I->setOperand(1, NegateValue(I->getOperand(1), BI)); 420 421 // We must move the add instruction here, because the neg instructions do 422 // not dominate the old add instruction in general. By moving it, we are 423 // assured that the neg instructions we just inserted dominate the 424 // instruction we are about to insert after them. 425 // 426 I->moveBefore(BI); 427 I->setName(I->getName()+".neg"); 428 return I; 429 } 430 431 // Okay, we need to materialize a negated version of V with an instruction. 432 // Scan the use lists of V to see if we have one already. 433 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 434 User *U = *UI; 435 if (!BinaryOperator::isNeg(U)) continue; 436 437 // We found one! Now we have to make sure that the definition dominates 438 // this use. We do this by moving it to the entry block (if it is a 439 // non-instruction value) or right after the definition. These negates will 440 // be zapped by reassociate later, so we don't need much finesse here. 441 BinaryOperator *TheNeg = cast<BinaryOperator>(U); 442 443 // Verify that the negate is in this function, V might be a constant expr. 444 if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 445 continue; 446 447 BasicBlock::iterator InsertPt; 448 if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 449 if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 450 InsertPt = II->getNormalDest()->begin(); 451 } else { 452 InsertPt = InstInput; 453 ++InsertPt; 454 } 455 while (isa<PHINode>(InsertPt)) ++InsertPt; 456 } else { 457 InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 458 } 459 TheNeg->moveBefore(InsertPt); 460 return TheNeg; 461 } 462 463 // Insert a 'neg' instruction that subtracts the value from zero to get the 464 // negation. 465 return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); 466} 467 468/// ShouldBreakUpSubtract - Return true if we should break up this subtract of 469/// X-Y into (X + -Y). 470static bool ShouldBreakUpSubtract(Instruction *Sub) { 471 // If this is a negation, we can't split it up! 472 if (BinaryOperator::isNeg(Sub)) 473 return false; 474 475 // Don't bother to break this up unless either the LHS is an associable add or 476 // subtract or if this is only used by one. 477 if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || 478 isReassociableOp(Sub->getOperand(0), Instruction::Sub)) 479 return true; 480 if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || 481 isReassociableOp(Sub->getOperand(1), Instruction::Sub)) 482 return true; 483 if (Sub->hasOneUse() && 484 (isReassociableOp(Sub->use_back(), Instruction::Add) || 485 isReassociableOp(Sub->use_back(), Instruction::Sub))) 486 return true; 487 488 return false; 489} 490 491/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is 492/// only used by an add, transform this into (X+(0-Y)) to promote better 493/// reassociation. 494static Instruction *BreakUpSubtract(Instruction *Sub, 495 DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { 496 // Convert a subtract into an add and a neg instruction. This allows sub 497 // instructions to be commuted with other add instructions. 498 // 499 // Calculate the negative value of Operand 1 of the sub instruction, 500 // and set it as the RHS of the add instruction we just made. 501 // 502 Value *NegVal = NegateValue(Sub->getOperand(1), Sub); 503 Instruction *New = 504 BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); 505 New->takeName(Sub); 506 507 // Everyone now refers to the add instruction. 508 ValueRankMap.erase(Sub); 509 Sub->replaceAllUsesWith(New); 510 New->setDebugLoc(Sub->getDebugLoc()); 511 Sub->eraseFromParent(); 512 513 DEBUG(dbgs() << "Negated: " << *New << '\n'); 514 return New; 515} 516 517/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used 518/// by one, change this into a multiply by a constant to assist with further 519/// reassociation. 520static Instruction *ConvertShiftToMul(Instruction *Shl, 521 DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { 522 // If an operand of this shift is a reassociable multiply, or if the shift 523 // is used by a reassociable multiply or add, turn into a multiply. 524 if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || 525 (Shl->hasOneUse() && 526 (isReassociableOp(Shl->use_back(), Instruction::Mul) || 527 isReassociableOp(Shl->use_back(), Instruction::Add)))) { 528 Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 529 MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); 530 531 Instruction *Mul = 532 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 533 ValueRankMap.erase(Shl); 534 Mul->takeName(Shl); 535 Shl->replaceAllUsesWith(Mul); 536 Mul->setDebugLoc(Shl->getDebugLoc()); 537 Shl->eraseFromParent(); 538 return Mul; 539 } 540 return 0; 541} 542 543// Scan backwards and forwards among values with the same rank as element i to 544// see if X exists. If X does not exist, return i. This is useful when 545// scanning for 'x' when we see '-x' because they both get the same rank. 546static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, 547 Value *X) { 548 unsigned XRank = Ops[i].Rank; 549 unsigned e = Ops.size(); 550 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) 551 if (Ops[j].Op == X) 552 return j; 553 // Scan backwards. 554 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) 555 if (Ops[j].Op == X) 556 return j; 557 return i; 558} 559 560/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together 561/// and returning the result. Insert the tree before I. 562static Value *EmitAddTreeOfValues(Instruction *I, 563 SmallVectorImpl<WeakVH> &Ops){ 564 if (Ops.size() == 1) return Ops.back(); 565 566 Value *V1 = Ops.back(); 567 Ops.pop_back(); 568 Value *V2 = EmitAddTreeOfValues(I, Ops); 569 return BinaryOperator::CreateAdd(V2, V1, "tmp", I); 570} 571 572/// RemoveFactorFromExpression - If V is an expression tree that is a 573/// multiplication sequence, and if this sequence contains a multiply by Factor, 574/// remove Factor from the tree and return the new tree. 575Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { 576 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 577 if (!BO) return 0; 578 579 SmallVector<ValueEntry, 8> Factors; 580 LinearizeExprTree(BO, Factors); 581 582 bool FoundFactor = false; 583 bool NeedsNegate = false; 584 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 585 if (Factors[i].Op == Factor) { 586 FoundFactor = true; 587 Factors.erase(Factors.begin()+i); 588 break; 589 } 590 591 // If this is a negative version of this factor, remove it. 592 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) 593 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 594 if (FC1->getValue() == -FC2->getValue()) { 595 FoundFactor = NeedsNegate = true; 596 Factors.erase(Factors.begin()+i); 597 break; 598 } 599 } 600 601 if (!FoundFactor) { 602 // Make sure to restore the operands to the expression tree. 603 RewriteExprTree(BO, Factors); 604 return 0; 605 } 606 607 BasicBlock::iterator InsertPt = BO; ++InsertPt; 608 609 // If this was just a single multiply, remove the multiply and return the only 610 // remaining operand. 611 if (Factors.size() == 1) { 612 ValueRankMap.erase(BO); 613 DeadInsts.push_back(BO); 614 V = Factors[0].Op; 615 } else { 616 RewriteExprTree(BO, Factors); 617 V = BO; 618 } 619 620 if (NeedsNegate) 621 V = BinaryOperator::CreateNeg(V, "neg", InsertPt); 622 623 return V; 624} 625 626/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively 627/// add its operands as factors, otherwise add V to the list of factors. 628/// 629/// Ops is the top-level list of add operands we're trying to factor. 630static void FindSingleUseMultiplyFactors(Value *V, 631 SmallVectorImpl<Value*> &Factors, 632 const SmallVectorImpl<ValueEntry> &Ops, 633 bool IsRoot) { 634 BinaryOperator *BO; 635 if (!(V->hasOneUse() || V->use_empty()) || // More than one use. 636 !(BO = dyn_cast<BinaryOperator>(V)) || 637 BO->getOpcode() != Instruction::Mul) { 638 Factors.push_back(V); 639 return; 640 } 641 642 // If this value has a single use because it is another input to the add 643 // tree we're reassociating and we dropped its use, it actually has two 644 // uses and we can't factor it. 645 if (!IsRoot) { 646 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 647 if (Ops[i].Op == V) { 648 Factors.push_back(V); 649 return; 650 } 651 } 652 653 654 // Otherwise, add the LHS and RHS to the list of factors. 655 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); 656 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); 657} 658 659/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' 660/// instruction. This optimizes based on identities. If it can be reduced to 661/// a single Value, it is returned, otherwise the Ops list is mutated as 662/// necessary. 663static Value *OptimizeAndOrXor(unsigned Opcode, 664 SmallVectorImpl<ValueEntry> &Ops) { 665 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 666 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 667 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 668 // First, check for X and ~X in the operand list. 669 assert(i < Ops.size()); 670 if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. 671 Value *X = BinaryOperator::getNotArgument(Ops[i].Op); 672 unsigned FoundX = FindInOperandList(Ops, i, X); 673 if (FoundX != i) { 674 if (Opcode == Instruction::And) // ...&X&~X = 0 675 return Constant::getNullValue(X->getType()); 676 677 if (Opcode == Instruction::Or) // ...|X|~X = -1 678 return Constant::getAllOnesValue(X->getType()); 679 } 680 } 681 682 // Next, check for duplicate pairs of values, which we assume are next to 683 // each other, due to our sorting criteria. 684 assert(i < Ops.size()); 685 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 686 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 687 // Drop duplicate values for And and Or. 688 Ops.erase(Ops.begin()+i); 689 --i; --e; 690 ++NumAnnihil; 691 continue; 692 } 693 694 // Drop pairs of values for Xor. 695 assert(Opcode == Instruction::Xor); 696 if (e == 2) 697 return Constant::getNullValue(Ops[0].Op->getType()); 698 699 // Y ^ X^X -> Y 700 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 701 i -= 1; e -= 2; 702 ++NumAnnihil; 703 } 704 } 705 return 0; 706} 707 708/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This 709/// optimizes based on identities. If it can be reduced to a single Value, it 710/// is returned, otherwise the Ops list is mutated as necessary. 711Value *Reassociate::OptimizeAdd(Instruction *I, 712 SmallVectorImpl<ValueEntry> &Ops) { 713 // Scan the operand lists looking for X and -X pairs. If we find any, we 714 // can simplify the expression. X+-X == 0. While we're at it, scan for any 715 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 716 // 717 // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". 718 // 719 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 720 Value *TheOp = Ops[i].Op; 721 // Check to see if we've seen this operand before. If so, we factor all 722 // instances of the operand together. Due to our sorting criteria, we know 723 // that these need to be next to each other in the vector. 724 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 725 // Rescan the list, remove all instances of this operand from the expr. 726 unsigned NumFound = 0; 727 do { 728 Ops.erase(Ops.begin()+i); 729 ++NumFound; 730 } while (i != Ops.size() && Ops[i].Op == TheOp); 731 732 DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); 733 ++NumFactor; 734 735 // Insert a new multiply. 736 Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); 737 Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); 738 739 // Now that we have inserted a multiply, optimize it. This allows us to 740 // handle cases that require multiple factoring steps, such as this: 741 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 742 RedoInsts.push_back(Mul); 743 744 // If every add operand was a duplicate, return the multiply. 745 if (Ops.empty()) 746 return Mul; 747 748 // Otherwise, we had some input that didn't have the dupe, such as 749 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 750 // things being added by this operation. 751 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 752 753 --i; 754 e = Ops.size(); 755 continue; 756 } 757 758 // Check for X and -X in the operand list. 759 if (!BinaryOperator::isNeg(TheOp)) 760 continue; 761 762 Value *X = BinaryOperator::getNegArgument(TheOp); 763 unsigned FoundX = FindInOperandList(Ops, i, X); 764 if (FoundX == i) 765 continue; 766 767 // Remove X and -X from the operand list. 768 if (Ops.size() == 2) 769 return Constant::getNullValue(X->getType()); 770 771 Ops.erase(Ops.begin()+i); 772 if (i < FoundX) 773 --FoundX; 774 else 775 --i; // Need to back up an extra one. 776 Ops.erase(Ops.begin()+FoundX); 777 ++NumAnnihil; 778 --i; // Revisit element. 779 e -= 2; // Removed two elements. 780 } 781 782 // Scan the operand list, checking to see if there are any common factors 783 // between operands. Consider something like A*A+A*B*C+D. We would like to 784 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 785 // To efficiently find this, we count the number of times a factor occurs 786 // for any ADD operands that are MULs. 787 DenseMap<Value*, unsigned> FactorOccurrences; 788 789 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 790 // where they are actually the same multiply. 791 unsigned MaxOcc = 0; 792 Value *MaxOccVal = 0; 793 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 794 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 795 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 796 continue; 797 798 // Compute all of the factors of this added value. 799 SmallVector<Value*, 8> Factors; 800 FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); 801 assert(Factors.size() > 1 && "Bad linearize!"); 802 803 // Add one to FactorOccurrences for each unique factor in this op. 804 SmallPtrSet<Value*, 8> Duplicates; 805 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 806 Value *Factor = Factors[i]; 807 if (!Duplicates.insert(Factor)) continue; 808 809 unsigned Occ = ++FactorOccurrences[Factor]; 810 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 811 812 // If Factor is a negative constant, add the negated value as a factor 813 // because we can percolate the negate out. Watch for minint, which 814 // cannot be positivified. 815 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) 816 if (CI->isNegative() && !CI->isMinValue(true)) { 817 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 818 assert(!Duplicates.count(Factor) && 819 "Shouldn't have two constant factors, missed a canonicalize"); 820 821 unsigned Occ = ++FactorOccurrences[Factor]; 822 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 823 } 824 } 825 } 826 827 // If any factor occurred more than one time, we can pull it out. 828 if (MaxOcc > 1) { 829 DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); 830 ++NumFactor; 831 832 // Create a new instruction that uses the MaxOccVal twice. If we don't do 833 // this, we could otherwise run into situations where removing a factor 834 // from an expression will drop a use of maxocc, and this can cause 835 // RemoveFactorFromExpression on successive values to behave differently. 836 Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); 837 SmallVector<WeakVH, 4> NewMulOps; 838 for (unsigned i = 0; i != Ops.size(); ++i) { 839 // Only try to remove factors from expressions we're allowed to. 840 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 841 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 842 continue; 843 844 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 845 // The factorized operand may occur several times. Convert them all in 846 // one fell swoop. 847 for (unsigned j = Ops.size(); j != i;) { 848 --j; 849 if (Ops[j].Op == Ops[i].Op) { 850 NewMulOps.push_back(V); 851 Ops.erase(Ops.begin()+j); 852 } 853 } 854 --i; 855 } 856 } 857 858 // No need for extra uses anymore. 859 delete DummyInst; 860 861 unsigned NumAddedValues = NewMulOps.size(); 862 Value *V = EmitAddTreeOfValues(I, NewMulOps); 863 864 // Now that we have inserted the add tree, optimize it. This allows us to 865 // handle cases that require multiple factoring steps, such as this: 866 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 867 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 868 (void)NumAddedValues; 869 V = ReassociateExpression(cast<BinaryOperator>(V)); 870 871 // Create the multiply. 872 Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); 873 874 // Rerun associate on the multiply in case the inner expression turned into 875 // a multiply. We want to make sure that we keep things in canonical form. 876 V2 = ReassociateExpression(cast<BinaryOperator>(V2)); 877 878 // If every add operand included the factor (e.g. "A*B + A*C"), then the 879 // entire result expression is just the multiply "A*(B+C)". 880 if (Ops.empty()) 881 return V2; 882 883 // Otherwise, we had some input that didn't have the factor, such as 884 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 885 // things being added by this operation. 886 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 887 } 888 889 return 0; 890} 891 892Value *Reassociate::OptimizeExpression(BinaryOperator *I, 893 SmallVectorImpl<ValueEntry> &Ops) { 894 // Now that we have the linearized expression tree, try to optimize it. 895 // Start by folding any constants that we found. 896 bool IterateOptimization = false; 897 if (Ops.size() == 1) return Ops[0].Op; 898 899 unsigned Opcode = I->getOpcode(); 900 901 if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op)) 902 if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) { 903 Ops.pop_back(); 904 Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); 905 return OptimizeExpression(I, Ops); 906 } 907 908 // Check for destructive annihilation due to a constant being used. 909 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op)) 910 switch (Opcode) { 911 default: break; 912 case Instruction::And: 913 if (CstVal->isZero()) // X & 0 -> 0 914 return CstVal; 915 if (CstVal->isAllOnesValue()) // X & -1 -> X 916 Ops.pop_back(); 917 break; 918 case Instruction::Mul: 919 if (CstVal->isZero()) { // X * 0 -> 0 920 ++NumAnnihil; 921 return CstVal; 922 } 923 924 if (cast<ConstantInt>(CstVal)->isOne()) 925 Ops.pop_back(); // X * 1 -> X 926 break; 927 case Instruction::Or: 928 if (CstVal->isAllOnesValue()) // X | -1 -> -1 929 return CstVal; 930 // FALLTHROUGH! 931 case Instruction::Add: 932 case Instruction::Xor: 933 if (CstVal->isZero()) // X [|^+] 0 -> X 934 Ops.pop_back(); 935 break; 936 } 937 if (Ops.size() == 1) return Ops[0].Op; 938 939 // Handle destructive annihilation due to identities between elements in the 940 // argument list here. 941 switch (Opcode) { 942 default: break; 943 case Instruction::And: 944 case Instruction::Or: 945 case Instruction::Xor: { 946 unsigned NumOps = Ops.size(); 947 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 948 return Result; 949 IterateOptimization |= Ops.size() != NumOps; 950 break; 951 } 952 953 case Instruction::Add: { 954 unsigned NumOps = Ops.size(); 955 if (Value *Result = OptimizeAdd(I, Ops)) 956 return Result; 957 IterateOptimization |= Ops.size() != NumOps; 958 } 959 960 break; 961 //case Instruction::Mul: 962 } 963 964 if (IterateOptimization) 965 return OptimizeExpression(I, Ops); 966 return 0; 967} 968 969 970/// ReassociateInst - Inspect and reassociate the instruction at the 971/// given position, post-incrementing the position. 972void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { 973 Instruction *BI = BBI++; 974 if (BI->getOpcode() == Instruction::Shl && 975 isa<ConstantInt>(BI->getOperand(1))) 976 if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { 977 MadeChange = true; 978 BI = NI; 979 } 980 981 // Reject cases where it is pointless to do this. 982 if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 983 BI->getType()->isVectorTy()) 984 return; // Floating point ops are not associative. 985 986 // Do not reassociate boolean (i1) expressions. We want to preserve the 987 // original order of evaluation for short-circuited comparisons that 988 // SimplifyCFG has folded to AND/OR expressions. If the expression 989 // is not further optimized, it is likely to be transformed back to a 990 // short-circuited form for code gen, and the source order may have been 991 // optimized for the most likely conditions. 992 if (BI->getType()->isIntegerTy(1)) 993 return; 994 995 // If this is a subtract instruction which is not already in negate form, 996 // see if we can convert it to X+-Y. 997 if (BI->getOpcode() == Instruction::Sub) { 998 if (ShouldBreakUpSubtract(BI)) { 999 BI = BreakUpSubtract(BI, ValueRankMap); 1000 // Reset the BBI iterator in case BreakUpSubtract changed the 1001 // instruction it points to. 1002 BBI = BI; 1003 ++BBI; 1004 MadeChange = true; 1005 } else if (BinaryOperator::isNeg(BI)) { 1006 // Otherwise, this is a negation. See if the operand is a multiply tree 1007 // and if this is not an inner node of a multiply tree. 1008 if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && 1009 (!BI->hasOneUse() || 1010 !isReassociableOp(BI->use_back(), Instruction::Mul))) { 1011 BI = LowerNegateToMultiply(BI, ValueRankMap); 1012 MadeChange = true; 1013 } 1014 } 1015 } 1016 1017 // If this instruction is a commutative binary operator, process it. 1018 if (!BI->isAssociative()) return; 1019 BinaryOperator *I = cast<BinaryOperator>(BI); 1020 1021 // If this is an interior node of a reassociable tree, ignore it until we 1022 // get to the root of the tree, to avoid N^2 analysis. 1023 if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) 1024 return; 1025 1026 // If this is an add tree that is used by a sub instruction, ignore it 1027 // until we process the subtract. 1028 if (I->hasOneUse() && I->getOpcode() == Instruction::Add && 1029 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) 1030 return; 1031 1032 ReassociateExpression(I); 1033} 1034 1035Value *Reassociate::ReassociateExpression(BinaryOperator *I) { 1036 1037 // First, walk the expression tree, linearizing the tree, collecting the 1038 // operand information. 1039 SmallVector<ValueEntry, 8> Ops; 1040 LinearizeExprTree(I, Ops); 1041 1042 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1043 1044 // Now that we have linearized the tree to a list and have gathered all of 1045 // the operands and their ranks, sort the operands by their rank. Use a 1046 // stable_sort so that values with equal ranks will have their relative 1047 // positions maintained (and so the compiler is deterministic). Note that 1048 // this sorts so that the highest ranking values end up at the beginning of 1049 // the vector. 1050 std::stable_sort(Ops.begin(), Ops.end()); 1051 1052 // OptimizeExpression - Now that we have the expression tree in a convenient 1053 // sorted form, optimize it globally if possible. 1054 if (Value *V = OptimizeExpression(I, Ops)) { 1055 // This expression tree simplified to something that isn't a tree, 1056 // eliminate it. 1057 DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 1058 I->replaceAllUsesWith(V); 1059 if (Instruction *VI = dyn_cast<Instruction>(V)) 1060 VI->setDebugLoc(I->getDebugLoc()); 1061 RemoveDeadBinaryOp(I); 1062 ++NumAnnihil; 1063 return V; 1064 } 1065 1066 // We want to sink immediates as deeply as possible except in the case where 1067 // this is a multiply tree used only by an add, and the immediate is a -1. 1068 // In this case we reassociate to put the negation on the outside so that we 1069 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 1070 if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && 1071 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && 1072 isa<ConstantInt>(Ops.back().Op) && 1073 cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { 1074 ValueEntry Tmp = Ops.pop_back_val(); 1075 Ops.insert(Ops.begin(), Tmp); 1076 } 1077 1078 DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1079 1080 if (Ops.size() == 1) { 1081 // This expression tree simplified to something that isn't a tree, 1082 // eliminate it. 1083 I->replaceAllUsesWith(Ops[0].Op); 1084 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) 1085 OI->setDebugLoc(I->getDebugLoc()); 1086 RemoveDeadBinaryOp(I); 1087 return Ops[0].Op; 1088 } 1089 1090 // Now that we ordered and optimized the expressions, splat them back into 1091 // the expression tree, removing any unneeded nodes. 1092 RewriteExprTree(I, Ops); 1093 return I; 1094} 1095 1096 1097bool Reassociate::runOnFunction(Function &F) { 1098 // Recalculate the rank map for F 1099 BuildRankMap(F); 1100 1101 MadeChange = false; 1102 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) 1103 for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) 1104 ReassociateInst(BBI); 1105 1106 // Now that we're done, revisit any instructions which are likely to 1107 // have secondary reassociation opportunities. 1108 while (!RedoInsts.empty()) 1109 if (Value *V = RedoInsts.pop_back_val()) { 1110 BasicBlock::iterator BBI = cast<Instruction>(V); 1111 ReassociateInst(BBI); 1112 } 1113 1114 // Now that we're done, delete any instructions which are no longer used. 1115 while (!DeadInsts.empty()) 1116 if (Value *V = DeadInsts.pop_back_val()) 1117 RecursivelyDeleteTriviallyDeadInstructions(V); 1118 1119 // We are done with the rank map. 1120 RankMap.clear(); 1121 ValueRankMap.clear(); 1122 return MadeChange; 1123} 1124 1125