GlobalOpt.cpp revision 210299
1206084Srdivacky//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2198092Srdivacky// 3198092Srdivacky// The LLVM Compiler Infrastructure 4198092Srdivacky// 5198092Srdivacky// This file is distributed under the University of Illinois Open Source 6198092Srdivacky// License. See LICENSE.TXT for details. 7198092Srdivacky// 8198092Srdivacky//===----------------------------------------------------------------------===// 9198092Srdivacky// 10206084Srdivacky// This pass transforms simple global variables that never have their address 11198092Srdivacky// taken. If obviously true, it marks read/write globals as constant, deletes 12198092Srdivacky// variables only stored to, etc. 13198092Srdivacky// 14206084Srdivacky//===----------------------------------------------------------------------===// 15249423Sdim 16249423Sdim#define DEBUG_TYPE "globalopt" 17198092Srdivacky#include "llvm/Transforms/IPO.h" 18198092Srdivacky#include "llvm/CallingConv.h" 19218893Sdim#include "llvm/Constants.h" 20198092Srdivacky#include "llvm/DerivedTypes.h" 21198092Srdivacky#include "llvm/Instructions.h" 22198092Srdivacky#include "llvm/IntrinsicInst.h" 23224145Sdim#include "llvm/Module.h" 24249423Sdim#include "llvm/Pass.h" 25249423Sdim#include "llvm/Analysis/ConstantFolding.h" 26249423Sdim#include "llvm/Analysis/MemoryBuiltins.h" 27207619Srdivacky#include "llvm/Target/TargetData.h" 28207619Srdivacky#include "llvm/Support/CallSite.h" 29198092Srdivacky#include "llvm/Support/Debug.h" 30198092Srdivacky#include "llvm/Support/ErrorHandling.h" 31198092Srdivacky#include "llvm/Support/GetElementPtrTypeIterator.h" 32218893Sdim#include "llvm/Support/MathExtras.h" 33206084Srdivacky#include "llvm/Support/raw_ostream.h" 34206084Srdivacky#include "llvm/ADT/DenseMap.h" 35206084Srdivacky#include "llvm/ADT/SmallPtrSet.h" 36206084Srdivacky#include "llvm/ADT/SmallVector.h" 37218893Sdim#include "llvm/ADT/Statistic.h" 38226633Sdim#include "llvm/ADT/STLExtras.h" 39206084Srdivacky#include <algorithm> 40218893Sdimusing namespace llvm; 41218893Sdim 42218893SdimSTATISTIC(NumMarked , "Number of globals marked constant"); 43218893SdimSTATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 44218893SdimSTATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); 45218893SdimSTATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 46218893SdimSTATISTIC(NumDeleted , "Number of globals deleted"); 47218893SdimSTATISTIC(NumFnDeleted , "Number of functions deleted"); 48218893SdimSTATISTIC(NumGlobUses , "Number of global uses devirtualized"); 49218893SdimSTATISTIC(NumLocalized , "Number of globals localized"); 50218893SdimSTATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 51218893SdimSTATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 52218893SdimSTATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 53218893SdimSTATISTIC(NumNestRemoved , "Number of nest attributes removed"); 54218893SdimSTATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 55224145SdimSTATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 56206084Srdivacky 57218893Sdimnamespace { 58218893Sdim struct GlobalOpt : public ModulePass { 59206084Srdivacky virtual void getAnalysisUsage(AnalysisUsage &AU) const { 60218893Sdim } 61218893Sdim static char ID; // Pass identification, replacement for typeid 62218893Sdim GlobalOpt() : ModulePass(&ID) {} 63218893Sdim 64218893Sdim bool runOnModule(Module &M); 65218893Sdim 66218893Sdim private: 67218893Sdim GlobalVariable *FindGlobalCtors(Module &M); 68218893Sdim bool OptimizeFunctions(Module &M); 69218893Sdim bool OptimizeGlobalVars(Module &M); 70218893Sdim bool OptimizeGlobalAliases(Module &M); 71218893Sdim bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); 72218893Sdim bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); 73208600Srdivacky }; 74212904Sdim} 75212904Sdim 76212904Sdimchar GlobalOpt::ID = 0; 77218893Sdimstatic RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); 78206084Srdivacky 79206084SrdivackyModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } 80206084Srdivacky 81221345Sdimnamespace { 82221345Sdim 83221345Sdim/// GlobalStatus - As we analyze each global, keep track of some information 84206084Srdivacky/// about it. If we find out that the address of the global is taken, none of 85206084Srdivacky/// this info will be accurate. 86206084Srdivackystruct GlobalStatus { 87206084Srdivacky /// isLoaded - True if the global is ever loaded. If the global isn't ever 88221345Sdim /// loaded it can be deleted. 89221345Sdim bool isLoaded; 90221345Sdim 91221345Sdim /// StoredType - Keep track of what stores to the global look like. 92221345Sdim /// 93221345Sdim enum StoredType { 94221345Sdim /// NotStored - There is no store to this global. It can thus be marked 95221345Sdim /// constant. 96221345Sdim NotStored, 97221345Sdim 98221345Sdim /// isInitializerStored - This global is stored to, but the only thing 99221345Sdim /// stored is the constant it was initialized with. This is only tracked 100206084Srdivacky /// for scalar globals. 101218893Sdim isInitializerStored, 102206084Srdivacky 103218893Sdim /// isStoredOnce - This global is stored to, but only its initializer and 104218893Sdim /// one other value is ever stored to it. If this global isStoredOnce, we 105206084Srdivacky /// track the value stored to it in StoredOnceValue below. This is only 106207619Srdivacky /// tracked for scalar globals. 107207619Srdivacky isStoredOnce, 108224145Sdim 109224145Sdim /// isStored - This global is stored to by multiple values or something else 110207619Srdivacky /// that we cannot track. 111206084Srdivacky isStored 112206084Srdivacky } StoredType; 113206084Srdivacky 114249423Sdim /// StoredOnceValue - If only one value (besides the initializer constant) is 115249423Sdim /// ever stored to this global, keep track of what value it is. 116249423Sdim Value *StoredOnceValue; 117249423Sdim 118249423Sdim /// AccessingFunction/HasMultipleAccessingFunctions - These start out 119249423Sdim /// null/false. When the first accessing function is noticed, it is recorded. 120206084Srdivacky /// When a second different accessing function is noticed, 121206084Srdivacky /// HasMultipleAccessingFunctions is set to true. 122206084Srdivacky const Function *AccessingFunction; 123206084Srdivacky bool HasMultipleAccessingFunctions; 124218893Sdim 125234353Sdim /// HasNonInstructionUser - Set to true if this global has a user that is not 126218893Sdim /// an instruction (e.g. a constant expr or GV initializer). 127218893Sdim bool HasNonInstructionUser; 128218893Sdim 129218893Sdim /// HasPHIUser - Set to true if this global has a user that is a PHI node. 130234353Sdim bool HasPHIUser; 131218893Sdim 132218893Sdim GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), 133218893Sdim AccessingFunction(0), HasMultipleAccessingFunctions(false), 134234353Sdim HasNonInstructionUser(false), HasPHIUser(false) {} 135218893Sdim}; 136234353Sdim 137234353Sdim} 138234353Sdim 139234353Sdim// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used 140234353Sdim// by constants itself. Note that constants cannot be cyclic, so this test is 141218893Sdim// pretty easy to implement recursively. 142208600Srdivacky// 143234353Sdimstatic bool SafeToDestroyConstant(const Constant *C) { 144218893Sdim if (isa<GlobalValue>(C)) return false; 145208600Srdivacky 146218893Sdim for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; 147234353Sdim ++UI) 148208600Srdivacky if (const Constant *CU = dyn_cast<Constant>(*UI)) { 149206084Srdivacky if (!SafeToDestroyConstant(CU)) return false; 150218893Sdim } else 151218893Sdim return false; 152218893Sdim return true; 153206084Srdivacky} 154206084Srdivacky 155206084Srdivacky 156206084Srdivacky/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus 157206084Srdivacky/// structure. If the global has its address taken, return true to indicate we 158206084Srdivacky/// can't do anything with it. 159206084Srdivacky/// 160206084Srdivackystatic bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, 161224145Sdim SmallPtrSet<const PHINode*, 16> &PHIUsers) { 162206084Srdivacky for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 163206084Srdivacky ++UI) { 164206084Srdivacky const User *U = *UI; 165218893Sdim if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 166206084Srdivacky GS.HasNonInstructionUser = true; 167221345Sdim if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; 168221345Sdim } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 169221345Sdim if (!GS.HasMultipleAccessingFunctions) { 170221345Sdim const Function *F = I->getParent()->getParent(); 171221345Sdim if (GS.AccessingFunction == 0) 172221345Sdim GS.AccessingFunction = F; 173218893Sdim else if (GS.AccessingFunction != F) 174218893Sdim GS.HasMultipleAccessingFunctions = true; 175224145Sdim } 176218893Sdim if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { 177206084Srdivacky GS.isLoaded = true; 178218893Sdim if (LI->isVolatile()) return true; // Don't hack on volatile loads. 179206084Srdivacky } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { 180206084Srdivacky // Don't allow a store OF the address, only stores TO the address. 181206084Srdivacky if (SI->getOperand(0) == V) return true; 182221345Sdim 183206084Srdivacky if (SI->isVolatile()) return true; // Don't hack on volatile stores. 184226633Sdim 185206084Srdivacky // If this is a direct store to the global (i.e., the global is a scalar 186218893Sdim // value, not an aggregate), keep more specific information about 187218893Sdim // stores. 188218893Sdim if (GS.StoredType != GlobalStatus::isStored) { 189218893Sdim if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( 190212904Sdim SI->getOperand(1))) { 191206084Srdivacky Value *StoredVal = SI->getOperand(0); 192212904Sdim if (StoredVal == GV->getInitializer()) { 193206084Srdivacky if (GS.StoredType < GlobalStatus::isInitializerStored) 194206084Srdivacky GS.StoredType = GlobalStatus::isInitializerStored; 195206084Srdivacky } else if (isa<LoadInst>(StoredVal) && 196218893Sdim cast<LoadInst>(StoredVal)->getOperand(0) == GV) { 197218893Sdim if (GS.StoredType < GlobalStatus::isInitializerStored) 198221345Sdim GS.StoredType = GlobalStatus::isInitializerStored; 199249423Sdim } else if (GS.StoredType < GlobalStatus::isStoredOnce) { 200206084Srdivacky GS.StoredType = GlobalStatus::isStoredOnce; 201206084Srdivacky GS.StoredOnceValue = StoredVal; 202206084Srdivacky } else if (GS.StoredType == GlobalStatus::isStoredOnce && 203206084Srdivacky GS.StoredOnceValue == StoredVal) { 204206084Srdivacky // noop. 205206084Srdivacky } else { 206206084Srdivacky GS.StoredType = GlobalStatus::isStored; 207198092Srdivacky } 208218893Sdim } else { 209198092Srdivacky GS.StoredType = GlobalStatus::isStored; 210221345Sdim } 211243830Sdim } 212198092Srdivacky } else if (isa<GetElementPtrInst>(I)) { 213198092Srdivacky if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 214198092Srdivacky } else if (isa<SelectInst>(I)) { 215198092Srdivacky if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 216198092Srdivacky } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { 217198092Srdivacky // PHI nodes we can check just like select or GEP instructions, but we 218198092Srdivacky // have to be careful about infinite recursion. 219198092Srdivacky if (PHIUsers.insert(PN)) // Not already visited. 220198092Srdivacky if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 221198092Srdivacky GS.HasPHIUser = true; 222198092Srdivacky } else if (isa<CmpInst>(I)) { 223221345Sdim // Nothing to analyse. 224218893Sdim } else if (isa<MemTransferInst>(I)) { 225198092Srdivacky const MemTransferInst *MTI = cast<MemTransferInst>(I); 226218893Sdim if (MTI->getArgOperand(0) == V) 227218893Sdim GS.StoredType = GlobalStatus::isStored; 228218893Sdim if (MTI->getArgOperand(1) == V) 229218893Sdim GS.isLoaded = true; 230198092Srdivacky } else if (isa<MemSetInst>(I)) { 231198092Srdivacky assert(cast<MemSetInst>(I)->getArgOperand(0) == V && 232198092Srdivacky "Memset only takes one pointer!"); 233198092Srdivacky GS.StoredType = GlobalStatus::isStored; 234212904Sdim } else { 235249423Sdim return true; // Any other non-load instruction might take address! 236249423Sdim } 237249423Sdim } else if (const Constant *C = dyn_cast<Constant>(U)) { 238249423Sdim GS.HasNonInstructionUser = true; 239226633Sdim // We might have a dead and dangling constant hanging off of here. 240219077Sdim if (!SafeToDestroyConstant(C)) 241243830Sdim return true; 242219077Sdim } else { 243207619Srdivacky GS.HasNonInstructionUser = true; 244223017Sdim // Otherwise must be some other user. 245207619Srdivacky return true; 246249423Sdim } 247207619Srdivacky } 248207619Srdivacky 249207619Srdivacky return false; 250207619Srdivacky} 251207619Srdivacky 252207619Srdivackystatic Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { 253207619Srdivacky ConstantInt *CI = dyn_cast<ConstantInt>(Idx); 254207619Srdivacky if (!CI) return 0; 255207619Srdivacky unsigned IdxV = CI->getZExtValue(); 256249423Sdim 257207619Srdivacky if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) { 258207619Srdivacky if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); 259249423Sdim } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) { 260249423Sdim if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); 261249423Sdim } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { 262249423Sdim if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); 263243830Sdim } else if (isa<ConstantAggregateZero>(Agg)) { 264249423Sdim if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 265218893Sdim if (IdxV < STy->getNumElements()) 266218893Sdim return Constant::getNullValue(STy->getElementType(IdxV)); 267249423Sdim } else if (const SequentialType *STy = 268249423Sdim dyn_cast<SequentialType>(Agg->getType())) { 269207619Srdivacky return Constant::getNullValue(STy->getElementType()); 270249423Sdim } 271249423Sdim } else if (isa<UndefValue>(Agg)) { 272249423Sdim if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 273249423Sdim if (IdxV < STy->getNumElements()) 274249423Sdim return UndefValue::get(STy->getElementType(IdxV)); 275249423Sdim } else if (const SequentialType *STy = 276249423Sdim dyn_cast<SequentialType>(Agg->getType())) { 277249423Sdim return UndefValue::get(STy->getElementType()); 278224145Sdim } 279251662Sdim } 280249423Sdim return 0; 281249423Sdim} 282249423Sdim 283249423Sdim 284207619Srdivacky/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all 285249423Sdim/// users of the global, cleaning up the obvious ones. This is largely just a 286249423Sdim/// quick scan over the use list to clean up the easy and obvious cruft. This 287249423Sdim/// returns true if it made a change. 288207619Srdivackystatic bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { 289249423Sdim bool Changed = false; 290249423Sdim for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { 291249423Sdim User *U = *UI++; 292207619Srdivacky 293249423Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 294249423Sdim if (Init) { 295249423Sdim // Replace the load with the initializer. 296249423Sdim LI->replaceAllUsesWith(Init); 297207619Srdivacky LI->eraseFromParent(); 298249423Sdim Changed = true; 299249423Sdim } 300249423Sdim } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 301207619Srdivacky // Store must be unreachable or storing Init into the global. 302249423Sdim SI->eraseFromParent(); 303249423Sdim Changed = true; 304249423Sdim } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 305218893Sdim if (CE->getOpcode() == Instruction::GetElementPtr) { 306249423Sdim Constant *SubInit = 0; 307249423Sdim if (Init) 308249423Sdim SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 309249423Sdim Changed |= CleanupConstantGlobalUsers(CE, SubInit); 310249423Sdim } else if (CE->getOpcode() == Instruction::BitCast && 311249423Sdim CE->getType()->isPointerTy()) { 312249423Sdim // Pointer cast, delete any stores and memsets to the global. 313249423Sdim Changed |= CleanupConstantGlobalUsers(CE, 0); 314249423Sdim } 315249423Sdim 316207619Srdivacky if (CE->use_empty()) { 317249423Sdim CE->destroyConstant(); 318249423Sdim Changed = true; 319249423Sdim } 320249423Sdim } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 321207619Srdivacky // Do not transform "gepinst (gep constexpr (GV))" here, because forming 322249423Sdim // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold 323249423Sdim // and will invalidate our notion of what Init is. 324249423Sdim Constant *SubInit = 0; 325249423Sdim if (!isa<ConstantExpr>(GEP->getOperand(0))) { 326207619Srdivacky ConstantExpr *CE = 327249423Sdim dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); 328249423Sdim if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) 329249423Sdim SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 330249423Sdim } 331249423Sdim Changed |= CleanupConstantGlobalUsers(GEP, SubInit); 332249423Sdim 333249423Sdim if (GEP->use_empty()) { 334249423Sdim GEP->eraseFromParent(); 335207619Srdivacky Changed = true; 336249423Sdim } 337249423Sdim } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 338249423Sdim if (MI->getRawDest() == V) { 339249423Sdim MI->eraseFromParent(); 340249423Sdim Changed = true; 341249423Sdim } 342249423Sdim 343249423Sdim } else if (Constant *C = dyn_cast<Constant>(U)) { 344249423Sdim // If we have a chain of dead constantexprs or other things dangling from 345249423Sdim // us, and if they are all dead, nuke them without remorse. 346249423Sdim if (SafeToDestroyConstant(C)) { 347249423Sdim C->destroyConstant(); 348249423Sdim // This could have invalidated UI, start over from scratch. 349249423Sdim CleanupConstantGlobalUsers(V, Init); 350249423Sdim return true; 351249423Sdim } 352249423Sdim } 353249423Sdim } 354249423Sdim return Changed; 355251662Sdim} 356249423Sdim 357249423Sdim/// isSafeSROAElementUse - Return true if the specified instruction is a safe 358249423Sdim/// user of a derived expression from a global that we want to SROA. 359249423Sdimstatic bool isSafeSROAElementUse(Value *V) { 360249423Sdim // We might have a dead and dangling constant hanging off of here. 361249423Sdim if (Constant *C = dyn_cast<Constant>(V)) 362249423Sdim return SafeToDestroyConstant(C); 363221345Sdim 364221345Sdim Instruction *I = dyn_cast<Instruction>(V); 365249423Sdim if (!I) return false; 366249423Sdim 367198092Srdivacky // Loads are ok. 368249423Sdim if (isa<LoadInst>(I)) return true; 369249423Sdim 370249423Sdim // Stores *to* the pointer are ok. 371249423Sdim if (StoreInst *SI = dyn_cast<StoreInst>(I)) 372249423Sdim return SI->getOperand(0) != V; 373249423Sdim 374249423Sdim // Otherwise, it must be a GEP. 375249423Sdim GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); 376249423Sdim if (GEPI == 0) return false; 377249423Sdim 378198092Srdivacky if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || 379249423Sdim !cast<Constant>(GEPI->getOperand(1))->isNullValue()) 380249423Sdim return false; 381198092Srdivacky 382198092Srdivacky for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); 383198092Srdivacky I != E; ++I) 384218893Sdim if (!isSafeSROAElementUse(*I)) 385198092Srdivacky return false; 386198092Srdivacky return true; 387198092Srdivacky} 388198092Srdivacky 389249423Sdim 390198092Srdivacky/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. 391212904Sdim/// Look at it and its uses and decide whether it is safe to SROA this global. 392206084Srdivacky/// 393219077Sdimstatic bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { 394219077Sdim // The user of the global must be a GEP Inst or a ConstantExpr GEP. 395219077Sdim if (!isa<GetElementPtrInst>(U) && 396219077Sdim (!isa<ConstantExpr>(U) || 397198092Srdivacky cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) 398224145Sdim return false; 399218893Sdim 400198092Srdivacky // Check to see if this ConstantExpr GEP is SRA'able. In particular, we 401198092Srdivacky // don't like < 3 operand CE's, and we don't like non-constant integer 402198092Srdivacky // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some 403218893Sdim // value of C. 404198092Srdivacky if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || 405198092Srdivacky !cast<Constant>(U->getOperand(1))->isNullValue() || 406198092Srdivacky !isa<ConstantInt>(U->getOperand(2))) 407198092Srdivacky return false; 408218893Sdim 409218893Sdim gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); 410218893Sdim ++GEPI; // Skip over the pointer index. 411218893Sdim 412218893Sdim // If this is a use of an array allocation, do a bit more checking for sanity. 413219077Sdim if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { 414218893Sdim uint64_t NumElements = AT->getNumElements(); 415218893Sdim ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); 416198092Srdivacky 417198092Srdivacky // Check to make sure that index falls within the array. If not, 418198092Srdivacky // something funny is going on, so we won't do the optimization. 419198092Srdivacky // 420218893Sdim if (Idx->getZExtValue() >= NumElements) 421218893Sdim return false; 422198092Srdivacky 423218893Sdim // We cannot scalar repl this level of the array unless any array 424221345Sdim // sub-indices are in-range constants. In particular, consider: 425221345Sdim // A[0][i]. We cannot know that the user isn't doing invalid things like 426221345Sdim // allowing i to index an out-of-range subscript that accesses A[1]. 427221345Sdim // 428221345Sdim // Scalar replacing *just* the outer index of the array is probably not 429221345Sdim // going to be a win anyway, so just give up. 430221345Sdim for (++GEPI; // Skip array index. 431221345Sdim GEPI != E; 432198092Srdivacky ++GEPI) { 433198092Srdivacky uint64_t NumElements; 434198092Srdivacky if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) 435198092Srdivacky NumElements = SubArrayTy->getNumElements(); 436218893Sdim else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) 437198092Srdivacky NumElements = SubVectorTy->getNumElements(); 438198092Srdivacky else { 439218893Sdim assert((*GEPI)->isStructTy() && 440218893Sdim "Indexed GEP type is not array, vector, or struct!"); 441198092Srdivacky continue; 442221345Sdim } 443198092Srdivacky 444198092Srdivacky ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); 445198092Srdivacky if (!IdxVal || IdxVal->getZExtValue() >= NumElements) 446224145Sdim return false; 447207619Srdivacky } 448207619Srdivacky } 449249423Sdim 450207619Srdivacky for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) 451226633Sdim if (!isSafeSROAElementUse(*I)) 452207619Srdivacky return false; 453207619Srdivacky return true; 454207619Srdivacky} 455207619Srdivacky 456207619Srdivacky/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it 457249423Sdim/// is safe for us to perform this transformation. 458251662Sdim/// 459249423Sdimstatic bool GlobalUsersSafeToSRA(GlobalValue *GV) { 460249423Sdim for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); 461249423Sdim UI != E; ++UI) { 462224145Sdim if (!IsUserOfGlobalSafeForSRA(*UI, GV)) 463221345Sdim return false; 464221345Sdim } 465207619Srdivacky return true; 466207619Srdivacky} 467249423Sdim 468249423Sdim 469249423Sdim/// SRAGlobal - Perform scalar replacement of aggregates on the specified global 470207619Srdivacky/// variable. This opens the door for other optimizations by exposing the 471207619Srdivacky/// behavior of the program in a more fine-grained way. We have determined that 472207619Srdivacky/// this transformation is safe already. We return the first global variable we 473207619Srdivacky/// insert so that the caller can reprocess it. 474224145Sdimstatic GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { 475207619Srdivacky // Make sure this global only has simple uses that we can SRA. 476207619Srdivacky if (!GlobalUsersSafeToSRA(GV)) 477198092Srdivacky return 0; 478198092Srdivacky 479198092Srdivacky assert(GV->hasLocalLinkage() && !GV->isConstant()); 480218893Sdim Constant *Init = GV->getInitializer(); 481198092Srdivacky const Type *Ty = Init->getType(); 482224145Sdim 483218893Sdim std::vector<GlobalVariable*> NewGlobals; 484218893Sdim Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); 485198092Srdivacky 486218893Sdim // Get the alignment of the global, either explicit or target-specific. 487234353Sdim unsigned StartAlignment = GV->getAlignment(); 488206084Srdivacky if (StartAlignment == 0) 489218893Sdim StartAlignment = TD.getABITypeAlignment(GV->getType()); 490218893Sdim 491218893Sdim if (const StructType *STy = dyn_cast<StructType>(Ty)) { 492218893Sdim NewGlobals.reserve(STy->getNumElements()); 493198092Srdivacky const StructLayout &Layout = *TD.getStructLayout(STy); 494224145Sdim for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 495198092Srdivacky Constant *In = getAggregateConstantElement(Init, 496218893Sdim ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); 497207619Srdivacky assert(In && "Couldn't get element of initializer?"); 498198092Srdivacky GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, 499234353Sdim GlobalVariable::InternalLinkage, 500234353Sdim In, GV->getName()+"."+Twine(i), 501234353Sdim GV->isThreadLocal(), 502234353Sdim GV->getType()->getAddressSpace()); 503234353Sdim Globals.insert(GV, NGV); 504218893Sdim NewGlobals.push_back(NGV); 505206084Srdivacky 506218893Sdim // Calculate the known alignment of the field. If the original aggregate 507243830Sdim // had 256 byte alignment for example, something might depend on that: 508218893Sdim // propagate info to each field. 509243830Sdim uint64_t FieldOffset = Layout.getElementOffset(i); 510198092Srdivacky unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); 511218893Sdim if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) 512198092Srdivacky NGV->setAlignment(NewAlign); 513198092Srdivacky } 514218893Sdim } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { 515218893Sdim unsigned NumElements = 0; 516218893Sdim if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) 517218893Sdim NumElements = ATy->getNumElements(); 518198092Srdivacky else 519198092Srdivacky NumElements = cast<VectorType>(STy)->getNumElements(); 520198092Srdivacky 521198092Srdivacky if (NumElements > 16 && GV->hasNUsesOrMore(16)) 522218893Sdim return 0; // It's not worth it. 523218893Sdim NewGlobals.reserve(NumElements); 524198092Srdivacky 525218893Sdim uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); 526198092Srdivacky unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); 527198092Srdivacky for (unsigned i = 0, e = NumElements; i != e; ++i) { 528218893Sdim Constant *In = getAggregateConstantElement(Init, 529198092Srdivacky ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); 530198092Srdivacky assert(In && "Couldn't get element of initializer?"); 531218893Sdim 532234353Sdim GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, 533218893Sdim GlobalVariable::InternalLinkage, 534203955Srdivacky In, GV->getName()+"."+Twine(i), 535218893Sdim GV->isThreadLocal(), 536199482Srdivacky GV->getType()->getAddressSpace()); 537206084Srdivacky Globals.insert(GV, NGV); 538198092Srdivacky NewGlobals.push_back(NGV); 539218893Sdim 540218893Sdim // Calculate the known alignment of the field. If the original aggregate 541218893Sdim // had 256 byte alignment for example, something might depend on that: 542198092Srdivacky // propagate info to each field. 543198092Srdivacky unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); 544234353Sdim if (NewAlign > EltAlign) 545218893Sdim NGV->setAlignment(NewAlign); 546218893Sdim } 547221345Sdim } 548221345Sdim 549218893Sdim if (NewGlobals.empty()) 550208600Srdivacky return 0; 551218893Sdim 552218893Sdim DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); 553208600Srdivacky 554221345Sdim Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); 555221345Sdim 556221345Sdim // Loop over all of the uses of the global, replacing the constantexpr geps, 557234353Sdim // with smaller constantexpr geps or direct references. 558234353Sdim while (!GV->use_empty()) { 559234353Sdim User *GEP = GV->use_back(); 560218893Sdim assert(((isa<ConstantExpr>(GEP) && 561221345Sdim cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| 562234353Sdim isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); 563218893Sdim 564218893Sdim // Ignore the 1th operand, which has to be zero or else the program is quite 565234353Sdim // broken (undefined). Get the 2nd operand, which is the structure or array 566218893Sdim // index. 567218893Sdim unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 568234353Sdim if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. 569218893Sdim 570218893Sdim Value *NewPtr = NewGlobals[Val]; 571218893Sdim 572218893Sdim // Form a shorter GEP if needed. 573218893Sdim if (GEP->getNumOperands() > 3) { 574218893Sdim if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { 575218893Sdim SmallVector<Constant*, 8> Idxs; 576218893Sdim Idxs.push_back(NullInt); 577208600Srdivacky for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) 578208600Srdivacky Idxs.push_back(CE->getOperand(i)); 579234353Sdim NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), 580234353Sdim &Idxs[0], Idxs.size()); 581218893Sdim } else { 582234353Sdim GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); 583218893Sdim SmallVector<Value*, 8> Idxs; 584208600Srdivacky Idxs.push_back(NullInt); 585234353Sdim for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) 586218893Sdim Idxs.push_back(GEPI->getOperand(i)); 587218893Sdim NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), 588218893Sdim GEPI->getName()+"."+Twine(Val),GEPI); 589234353Sdim } 590208600Srdivacky } 591218893Sdim GEP->replaceAllUsesWith(NewPtr); 592218893Sdim 593218893Sdim if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) 594218893Sdim GEPI->eraseFromParent(); 595234353Sdim else 596234353Sdim cast<ConstantExpr>(GEP)->destroyConstant(); 597218893Sdim } 598234353Sdim 599208600Srdivacky // Delete the old global, now that it is dead. 600208600Srdivacky Globals.erase(GV); 601234353Sdim ++NumSRA; 602234353Sdim 603234353Sdim // Loop over the new globals array deleting any globals that are obviously 604234353Sdim // dead. This can arise due to scalarization of a structure or an array that 605234353Sdim // has elements that are dead. 606234353Sdim unsigned FirstGlobal = 0; 607234353Sdim for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) 608234353Sdim if (NewGlobals[i]->use_empty()) { 609234353Sdim Globals.erase(NewGlobals[i]); 610234353Sdim if (FirstGlobal == i) ++FirstGlobal; 611234353Sdim } 612234353Sdim 613234353Sdim return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; 614234353Sdim} 615234353Sdim 616234353Sdim/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified 617234353Sdim/// value will trap if the value is dynamically null. PHIs keeps track of any 618234353Sdim/// phi nodes we've seen to avoid reprocessing them. 619234353Sdimstatic bool AllUsesOfValueWillTrapIfNull(const Value *V, 620234353Sdim SmallPtrSet<const PHINode*, 8> &PHIs) { 621234353Sdim for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 622218893Sdim ++UI) { 623234353Sdim const User *U = *UI; 624218893Sdim 625218893Sdim if (isa<LoadInst>(U)) { 626218893Sdim // Will trap. 627218893Sdim } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 628218893Sdim if (SI->getOperand(0) == V) { 629218893Sdim //cerr << "NONTRAPPING USE: " << *U; 630218893Sdim return false; // Storing the value. 631218893Sdim } 632218893Sdim } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { 633218893Sdim if (CI->getCalledValue() != V) { 634218893Sdim //cerr << "NONTRAPPING USE: " << *U; 635218893Sdim return false; // Not calling the ptr 636218893Sdim } 637218893Sdim } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { 638218893Sdim if (II->getCalledValue() != V) { 639234353Sdim //cerr << "NONTRAPPING USE: " << *U; 640234353Sdim return false; // Not calling the ptr 641218893Sdim } 642218893Sdim } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { 643218893Sdim if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; 644218893Sdim } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 645218893Sdim if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 646218893Sdim } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { 647218893Sdim // If we've already seen this phi node, ignore it, it has already been 648234353Sdim // checked. 649234353Sdim if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) 650218893Sdim return false; 651234353Sdim } else if (isa<ICmpInst>(U) && 652218893Sdim isa<ConstantPointerNull>(UI->getOperand(1))) { 653218893Sdim // Ignore icmp X, null 654234353Sdim } else { 655208600Srdivacky //cerr << "NONTRAPPING USE: " << *U; 656208600Srdivacky return false; 657208600Srdivacky } 658208600Srdivacky } 659234353Sdim return true; 660234353Sdim} 661234353Sdim 662234353Sdim/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads 663234353Sdim/// from GV will trap if the loaded value is null. Note that this also permits 664208600Srdivacky/// comparisons of the loaded value against null, as a special case. 665234353Sdimstatic bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { 666234353Sdim for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); 667208600Srdivacky UI != E; ++UI) { 668234353Sdim const User *U = *UI; 669234353Sdim 670239462Sdim if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 671234353Sdim SmallPtrSet<const PHINode*, 8> PHIs; 672234353Sdim if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 673234353Sdim return false; 674234353Sdim } else if (isa<StoreInst>(U)) { 675239462Sdim // Ignore stores to the global. 676239462Sdim } else { 677239462Sdim // We don't know or understand this user, bail out. 678239462Sdim //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; 679239462Sdim return false; 680239462Sdim } 681239462Sdim } 682234353Sdim return true; 683234353Sdim} 684234353Sdim 685201361Srdivackystatic bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 686208600Srdivacky bool Changed = false; 687208600Srdivacky for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { 688208600Srdivacky Instruction *I = cast<Instruction>(*UI++); 689208600Srdivacky if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 690208600Srdivacky LI->setOperand(0, NewV); 691208600Srdivacky Changed = true; 692208600Srdivacky } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 693208600Srdivacky if (SI->getOperand(1) == V) { 694208600Srdivacky SI->setOperand(1, NewV); 695208600Srdivacky Changed = true; 696208600Srdivacky } 697218893Sdim } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 698208600Srdivacky CallSite CS(I); 699208600Srdivacky if (CS.getCalledValue() == V) { 700234353Sdim // Calling through the pointer! Turn into a direct call, but be careful 701234353Sdim // that the pointer is not also being passed as an argument. 702208600Srdivacky CS.setCalledFunction(NewV); 703234353Sdim Changed = true; 704234353Sdim bool PassedAsArg = false; 705234353Sdim for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 706234353Sdim if (CS.getArgument(i) == V) { 707234353Sdim PassedAsArg = true; 708234353Sdim CS.setArgument(i, NewV); 709234353Sdim } 710234353Sdim 711234353Sdim if (PassedAsArg) { 712234353Sdim // Being passed as an argument also. Be careful to not invalidate UI! 713201361Srdivacky UI = V->use_begin(); 714201361Srdivacky } 715218893Sdim } 716218893Sdim } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 717218893Sdim Changed |= OptimizeAwayTrappingUsesOfValue(CI, 718218893Sdim ConstantExpr::getCast(CI->getOpcode(), 719218893Sdim NewV, CI->getType())); 720218893Sdim if (CI->use_empty()) { 721218893Sdim Changed = true; 722218893Sdim CI->eraseFromParent(); 723218893Sdim } 724218893Sdim } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 725218893Sdim // Should handle GEP here. 726218893Sdim SmallVector<Constant*, 8> Idxs; 727218893Sdim Idxs.reserve(GEPI->getNumOperands()-1); 728218893Sdim for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 729218893Sdim i != e; ++i) 730218893Sdim if (Constant *C = dyn_cast<Constant>(*i)) 731218893Sdim Idxs.push_back(C); 732218893Sdim else 733218893Sdim break; 734218893Sdim if (Idxs.size() == GEPI->getNumOperands()-1) 735218893Sdim Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, 736218893Sdim ConstantExpr::getGetElementPtr(NewV, &Idxs[0], 737218893Sdim Idxs.size())); 738218893Sdim if (GEPI->use_empty()) { 739218893Sdim Changed = true; 740218893Sdim GEPI->eraseFromParent(); 741218893Sdim } 742218893Sdim } 743224145Sdim } 744226633Sdim 745226633Sdim return Changed; 746224145Sdim} 747218893Sdim 748224145Sdim 749224145Sdim/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null 750218893Sdim/// value stored into it. If there are uses of the loaded value that would trap 751218893Sdim/// if the loaded value is dynamically null, then we know that they cannot be 752218893Sdim/// reachable with a null optimize away the load. 753218893Sdimstatic bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { 754218893Sdim bool Changed = false; 755198092Srdivacky 756198092Srdivacky // Keep track of whether we are able to remove all the uses of the global 757218893Sdim // other than the store that defines it. 758198092Srdivacky bool AllNonStoreUsesGone = true; 759198092Srdivacky 760198092Srdivacky // Replace all uses of loads with uses of uses of the stored value. 761218893Sdim for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ 762218893Sdim User *GlobalUser = *GUI++; 763234353Sdim if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 764234353Sdim Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 765206084Srdivacky // If we were able to delete all uses of the loads 766198092Srdivacky if (LI->use_empty()) { 767221345Sdim LI->eraseFromParent(); 768221345Sdim Changed = true; 769249423Sdim } else { 770249423Sdim AllNonStoreUsesGone = false; 771249423Sdim } 772221345Sdim } else if (isa<StoreInst>(GlobalUser)) { 773221345Sdim // Ignore the store that stores "LV" to the global. 774221345Sdim assert(GlobalUser->getOperand(1) == GV && 775223017Sdim "Must be storing *to* the global"); 776221345Sdim } else { 777221345Sdim AllNonStoreUsesGone = false; 778221345Sdim 779221345Sdim // If we get here we could have other crazy uses that are transitively 780221345Sdim // loaded. 781249423Sdim assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 782249423Sdim isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); 783249423Sdim } 784249423Sdim } 785249423Sdim 786249423Sdim if (Changed) { 787249423Sdim DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); 788249423Sdim ++NumGlobUses; 789249423Sdim } 790249423Sdim 791249423Sdim // If we nuked all of the loads, then none of the stores are needed either, 792249423Sdim // nor is the global. 793249423Sdim if (AllNonStoreUsesGone) { 794249423Sdim DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); 795249423Sdim CleanupConstantGlobalUsers(GV, 0); 796249423Sdim if (GV->use_empty()) { 797249423Sdim GV->eraseFromParent(); 798249423Sdim ++NumDeleted; 799249423Sdim } 800249423Sdim Changed = true; 801198092Srdivacky } 802198092Srdivacky return Changed; 803198092Srdivacky} 804198092Srdivacky 805198092Srdivacky/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the 806198092Srdivacky/// instructions that are foldable. 807218893Sdimstatic void ConstantPropUsersOf(Value *V) { 808218893Sdim for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) 809218893Sdim if (Instruction *I = dyn_cast<Instruction>(*UI++)) 810218893Sdim if (Constant *NewC = ConstantFoldInstruction(I)) { 811218893Sdim I->replaceAllUsesWith(NewC); 812218893Sdim 813218893Sdim // Advance UI to the next non-I use to avoid invalidating it! 814218893Sdim // Instructions could multiply use V. 815234353Sdim while (UI != E && *UI == I) 816234353Sdim ++UI; 817251662Sdim I->eraseFromParent(); 818234353Sdim } 819234353Sdim} 820234353Sdim 821234353Sdim/// OptimizeGlobalAddressOfMalloc - This function takes the specified global 822234353Sdim/// variable, and transforms the program as if it always contained the result of 823234353Sdim/// the specified malloc. Because it is always the result of the specified 824234353Sdim/// malloc, there is no reason to actually DO the malloc. Instead, turn the 825234353Sdim/// malloc into a global, and any loads of GV as uses of the new global. 826234353Sdimstatic GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, 827234353Sdim CallInst *CI, 828218893Sdim const Type *AllocTy, 829218893Sdim ConstantInt *NElements, 830198092Srdivacky TargetData* TD) { 831221345Sdim DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); 832198092Srdivacky 833198092Srdivacky const Type *GlobalType; 834198092Srdivacky if (NElements->getZExtValue() == 1) 835198092Srdivacky GlobalType = AllocTy; 836221345Sdim else 837221345Sdim // If we have an array allocation, the global variable is of an array. 838198092Srdivacky GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); 839221345Sdim 840198092Srdivacky // Create the new global variable. The contents of the malloc'd memory is 841218893Sdim // undefined, so initialize with an undef value. 842218893Sdim GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 843200583Srdivacky GlobalType, false, 844221345Sdim GlobalValue::InternalLinkage, 845200583Srdivacky UndefValue::get(GlobalType), 846200583Srdivacky GV->getName()+".body", 847200583Srdivacky GV, 848206084Srdivacky GV->isThreadLocal()); 849221345Sdim 850198092Srdivacky // If there are bitcast users of the malloc (which is typical, usually we have 851198092Srdivacky // a malloc + bitcast) then replace them with uses of the new global. Update 852198092Srdivacky // other users to use the global as well. 853218893Sdim BitCastInst *TheBC = 0; 854224145Sdim while (!CI->use_empty()) { 855218893Sdim Instruction *User = cast<Instruction>(CI->use_back()); 856243830Sdim if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 857198092Srdivacky if (BCI->getType() == NewGV->getType()) { 858218893Sdim BCI->replaceAllUsesWith(NewGV); 859198092Srdivacky BCI->eraseFromParent(); 860218893Sdim } else { 861198092Srdivacky BCI->setOperand(0, NewGV); 862198092Srdivacky } 863218893Sdim } else { 864218893Sdim if (TheBC == 0) 865218893Sdim TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); 866198092Srdivacky User->replaceUsesOfWith(CI, TheBC); 867198092Srdivacky } 868234353Sdim } 869234353Sdim 870198092Srdivacky Constant *RepValue = NewGV; 871234353Sdim if (NewGV->getType() != GV->getType()->getElementType()) 872234353Sdim RepValue = ConstantExpr::getBitCast(RepValue, 873234353Sdim GV->getType()->getElementType()); 874234353Sdim 875234353Sdim // If there is a comparison against null, we will insert a global bool to 876234353Sdim // keep track of whether the global was initialized yet or not. 877234353Sdim GlobalVariable *InitBool = 878198092Srdivacky new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, 879234353Sdim GlobalValue::InternalLinkage, 880234353Sdim ConstantInt::getFalse(GV->getContext()), 881198092Srdivacky GV->getName()+".init", GV->isThreadLocal()); 882234353Sdim bool InitBoolUsed = false; 883234353Sdim 884234353Sdim // Loop over all uses of GV, processing them in turn. 885234353Sdim while (!GV->use_empty()) { 886198092Srdivacky if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) { 887198092Srdivacky // The global is initialized when the store to it occurs. 888221345Sdim new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); 889221345Sdim SI->eraseFromParent(); 890221345Sdim continue; 891221345Sdim } 892221345Sdim 893221345Sdim LoadInst *LI = cast<LoadInst>(GV->use_back()); 894221345Sdim while (!LI->use_empty()) { 895221345Sdim Use &LoadUse = LI->use_begin().getUse(); 896221345Sdim if (!isa<ICmpInst>(LoadUse.getUser())) { 897221345Sdim LoadUse = RepValue; 898221345Sdim continue; 899221345Sdim } 900221345Sdim 901221345Sdim ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); 902221345Sdim // Replace the cmp X, 0 with a use of the bool value. 903221345Sdim Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); 904221345Sdim InitBoolUsed = true; 905221345Sdim switch (ICI->getPredicate()) { 906224145Sdim default: llvm_unreachable("Unknown ICmp Predicate!"); 907218893Sdim case ICmpInst::ICMP_ULT: 908198092Srdivacky case ICmpInst::ICMP_SLT: // X < null -> always false 909224145Sdim LV = ConstantInt::getFalse(GV->getContext()); 910218893Sdim break; 911218893Sdim case ICmpInst::ICMP_ULE: 912198092Srdivacky case ICmpInst::ICMP_SLE: 913218893Sdim case ICmpInst::ICMP_EQ: 914218893Sdim LV = BinaryOperator::CreateNot(LV, "notinit", ICI); 915218893Sdim break; 916218893Sdim case ICmpInst::ICMP_NE: 917218893Sdim case ICmpInst::ICMP_UGE: 918218893Sdim case ICmpInst::ICMP_SGE: 919218893Sdim case ICmpInst::ICMP_UGT: 920198092Srdivacky case ICmpInst::ICMP_SGT: 921218893Sdim break; // no change. 922198092Srdivacky } 923198092Srdivacky ICI->replaceAllUsesWith(LV); 924226633Sdim ICI->eraseFromParent(); 925198092Srdivacky } 926218893Sdim LI->eraseFromParent(); 927198092Srdivacky } 928243830Sdim 929198092Srdivacky // If the initialization boolean was used, insert it, otherwise delete it. 930198092Srdivacky if (!InitBoolUsed) { 931218893Sdim while (!InitBool->use_empty()) // Delete initializations 932218893Sdim cast<StoreInst>(InitBool->use_back())->eraseFromParent(); 933218893Sdim delete InitBool; 934218893Sdim } else 935218893Sdim GV->getParent()->getGlobalList().insert(GV, InitBool); 936218893Sdim 937218893Sdim // Now the GV is dead, nuke it and the malloc.. 938218893Sdim GV->eraseFromParent(); 939218893Sdim CI->eraseFromParent(); 940218893Sdim 941218893Sdim // To further other optimizations, loop over all users of NewGV and try to 942218893Sdim // constant prop them. This will promote GEP instructions with constant 943212904Sdim // indices into GEP constant-exprs, which will allow global-opt to hack on it. 944198092Srdivacky ConstantPropUsersOf(NewGV); 945218893Sdim if (RepValue != NewGV) 946198092Srdivacky ConstantPropUsersOf(RepValue); 947198092Srdivacky 948198092Srdivacky return NewGV; 949234353Sdim} 950198092Srdivacky 951198092Srdivacky/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking 952218893Sdim/// to make sure that there are no complex uses of V. We permit simple things 953198092Srdivacky/// like dereferencing the pointer, but not storing through the address, unless 954218893Sdim/// it is to the specified global. 955212904Sdimstatic bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, 956218893Sdim const GlobalVariable *GV, 957218893Sdim SmallPtrSet<const PHINode*, 8> &PHIs) { 958203955Srdivacky for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); 959218893Sdim UI != E; ++UI) { 960218893Sdim const Instruction *Inst = cast<Instruction>(*UI); 961218893Sdim 962208600Srdivacky if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { 963208600Srdivacky continue; // Fine, ignore. 964206084Srdivacky } 965224145Sdim 966224145Sdim if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 967206084Srdivacky if (SI->getOperand(0) == V && SI->getOperand(1) != GV) 968198092Srdivacky return false; // Storing the pointer itself... bad. 969198092Srdivacky continue; // Otherwise, storing through it, or storing into GV... fine. 970198092Srdivacky } 971224145Sdim 972198092Srdivacky // Must index into the array and into the struct. 973218893Sdim if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { 974224145Sdim if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) 975234353Sdim return false; 976218893Sdim continue; 977218893Sdim } 978218893Sdim 979218893Sdim if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { 980206084Srdivacky // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI 981218893Sdim // cycles. 982218893Sdim if (PHIs.insert(PN)) 983206084Srdivacky if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) 984218893Sdim return false; 985218893Sdim continue; 986208600Srdivacky } 987198092Srdivacky 988218893Sdim if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { 989198092Srdivacky if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 990198092Srdivacky return false; 991218893Sdim continue; 992198092Srdivacky } 993207619Srdivacky 994234353Sdim return false; 995207619Srdivacky } 996207619Srdivacky return true; 997207619Srdivacky} 998207619Srdivacky 999207619Srdivacky/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV 1000207619Srdivacky/// somewhere. Transform all uses of the allocation into loads from the 1001207619Srdivacky/// global and uses of the resultant pointer. Further, delete the store into 1002207619Srdivacky/// GV. This assumes that these value pass the 1003207619Srdivacky/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. 1004218893Sdimstatic void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 1005218893Sdim GlobalVariable *GV) { 1006218893Sdim while (!Alloc->use_empty()) { 1007243830Sdim Instruction *U = cast<Instruction>(*Alloc->use_begin()); 1008207619Srdivacky Instruction *InsertPt = U; 1009207619Srdivacky if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1010218893Sdim // If this is the store of the allocation into the global, remove it. 1011218893Sdim if (SI->getOperand(1) == GV) { 1012218893Sdim SI->eraseFromParent(); 1013218893Sdim continue; 1014218893Sdim } 1015218893Sdim } else if (PHINode *PN = dyn_cast<PHINode>(U)) { 1016218893Sdim // Insert the load in the corresponding predecessor, not right before the 1017218893Sdim // PHI. 1018218893Sdim InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); 1019218893Sdim } else if (isa<BitCastInst>(U)) { 1020243830Sdim // Must be bitcast between the malloc and store to initialize the global. 1021218893Sdim ReplaceUsesOfMallocWithGlobal(U, GV); 1022218893Sdim U->eraseFromParent(); 1023218893Sdim continue; 1024207619Srdivacky } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1025226633Sdim // If this is a "GEP bitcast" and the user is a store to the global, then 1026207619Srdivacky // just process it as a bitcast. 1027243830Sdim if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) 1028207619Srdivacky if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) 1029207619Srdivacky if (SI->getOperand(1) == GV) { 1030207619Srdivacky // Must be bitcast GEP between the malloc and store to initialize 1031221345Sdim // the global. 1032243830Sdim ReplaceUsesOfMallocWithGlobal(GEPI, GV); 1033207619Srdivacky GEPI->eraseFromParent(); 1034207619Srdivacky continue; 1035207619Srdivacky } 1036207619Srdivacky } 1037207619Srdivacky 1038207619Srdivacky // Insert a load from the global, and use it instead of the malloc. 1039207619Srdivacky Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); 1040207619Srdivacky U->replaceUsesOfWith(Alloc, NL); 1041207619Srdivacky } 1042221345Sdim} 1043207619Srdivacky 1044207619Srdivacky/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi 1045207619Srdivacky/// of a load) are simple enough to perform heap SRA on. This permits GEP's 1046221345Sdim/// that index through the array and struct field, icmps of null, and PHIs. 1047221345Sdimstatic bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, 1048221345Sdim SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, 1049223017Sdim SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { 1050221345Sdim // We permit two users of the load: setcc comparing against the null 1051221345Sdim // pointer, and a getelementptr of a specific form. 1052221345Sdim for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 1053221345Sdim ++UI) { 1054221345Sdim const Instruction *User = cast<Instruction>(*UI); 1055221345Sdim 1056207619Srdivacky // Comparison against null is ok. 1057221345Sdim if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { 1058221345Sdim if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 1059207619Srdivacky return false; 1060221345Sdim continue; 1061249423Sdim } 1062249423Sdim 1063249423Sdim // getelementptr is also ok, but only a simple form. 1064249423Sdim if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1065249423Sdim // Must index into the array and into the struct. 1066207619Srdivacky if (GEPI->getNumOperands() < 3) 1067249423Sdim return false; 1068207619Srdivacky 1069249423Sdim // Otherwise the GEP is ok. 1070249423Sdim continue; 1071249423Sdim } 1072249423Sdim 1073249423Sdim if (const PHINode *PN = dyn_cast<PHINode>(User)) { 1074249423Sdim if (!LoadUsingPHIsPerLoad.insert(PN)) 1075249423Sdim // This means some phi nodes are dependent on each other. 1076249423Sdim // Avoid infinite looping! 1077249423Sdim return false; 1078249423Sdim if (!LoadUsingPHIs.insert(PN)) 1079249423Sdim // If we have already analyzed this PHI, then it is safe. 1080249423Sdim continue; 1081249423Sdim 1082249423Sdim // Make sure all uses of the PHI are simple enough to transform. 1083249423Sdim if (!LoadUsesSimpleEnoughForHeapSRA(PN, 1084249423Sdim LoadUsingPHIs, LoadUsingPHIsPerLoad)) 1085249423Sdim return false; 1086249423Sdim 1087249423Sdim continue; 1088249423Sdim } 1089249423Sdim 1090207619Srdivacky // Otherwise we don't know what this is, not ok. 1091249423Sdim return false; 1092249423Sdim } 1093249423Sdim 1094207619Srdivacky return true; 1095207619Srdivacky} 1096207619Srdivacky 1097206084Srdivacky 1098198092Srdivacky/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from 1099207619Srdivacky/// GV are simple enough to perform HeapSRA, return true. 1100226633Sdimstatic bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, 1101207619Srdivacky Instruction *StoredVal) { 1102218893Sdim SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; 1103218893Sdim SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; 1104218893Sdim for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); 1105212904Sdim UI != E; ++UI) 1106207619Srdivacky if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 1107207619Srdivacky if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, 1108207619Srdivacky LoadUsingPHIsPerLoad)) 1109207619Srdivacky return false; 1110207619Srdivacky LoadUsingPHIsPerLoad.clear(); 1111207619Srdivacky } 1112207619Srdivacky 1113207619Srdivacky // If we reach here, we know that all uses of the loads and transitive uses 1114207619Srdivacky // (through PHI nodes) are simple enough to transform. However, we don't know 1115207619Srdivacky // that all inputs the to the PHI nodes are in the same equivalence sets. 1116207619Srdivacky // Check to verify that all operands of the PHIs are either PHIS that can be 1117207619Srdivacky // transformed, loads from GV, or MI itself. 1118207619Srdivacky for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() 1119207619Srdivacky , E = LoadUsingPHIs.end(); I != E; ++I) { 1120207619Srdivacky const PHINode *PN = *I; 1121207619Srdivacky for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { 1122207619Srdivacky Value *InVal = PN->getIncomingValue(op); 1123207619Srdivacky 1124207619Srdivacky // PHI of the stored value itself is ok. 1125207619Srdivacky if (InVal == StoredVal) continue; 1126207619Srdivacky 1127207619Srdivacky if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { 1128207619Srdivacky // One of the PHIs in our set is (optimistically) ok. 1129207619Srdivacky if (LoadUsingPHIs.count(InPN)) 1130207619Srdivacky continue; 1131207619Srdivacky return false; 1132207619Srdivacky } 1133207619Srdivacky 1134226633Sdim // Load from GV is ok. 1135249423Sdim if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) 1136249423Sdim if (LI->getOperand(0) == GV) 1137249423Sdim continue; 1138249423Sdim 1139249423Sdim // UNDEF? NULL? 1140249423Sdim 1141207619Srdivacky // Anything else is rejected. 1142207619Srdivacky return false; 1143207619Srdivacky } 1144207619Srdivacky } 1145207619Srdivacky 1146 return true; 1147} 1148 1149static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, 1150 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1151 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1152 std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; 1153 1154 if (FieldNo >= FieldVals.size()) 1155 FieldVals.resize(FieldNo+1); 1156 1157 // If we already have this value, just reuse the previously scalarized 1158 // version. 1159 if (Value *FieldVal = FieldVals[FieldNo]) 1160 return FieldVal; 1161 1162 // Depending on what instruction this is, we have several cases. 1163 Value *Result; 1164 if (LoadInst *LI = dyn_cast<LoadInst>(V)) { 1165 // This is a scalarized version of the load from the global. Just create 1166 // a new Load of the scalarized global. 1167 Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, 1168 InsertedScalarizedValues, 1169 PHIsToRewrite), 1170 LI->getName()+".f"+Twine(FieldNo), LI); 1171 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 1172 // PN's type is pointer to struct. Make a new PHI of pointer to struct 1173 // field. 1174 const StructType *ST = 1175 cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); 1176 1177 Result = 1178 PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), 1179 PN->getName()+".f"+Twine(FieldNo), PN); 1180 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); 1181 } else { 1182 llvm_unreachable("Unknown usable value"); 1183 Result = 0; 1184 } 1185 1186 return FieldVals[FieldNo] = Result; 1187} 1188 1189/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from 1190/// the load, rewrite the derived value to use the HeapSRoA'd load. 1191static void RewriteHeapSROALoadUser(Instruction *LoadUser, 1192 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1193 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1194 // If this is a comparison against null, handle it. 1195 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { 1196 assert(isa<ConstantPointerNull>(SCI->getOperand(1))); 1197 // If we have a setcc of the loaded pointer, we can use a setcc of any 1198 // field. 1199 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, 1200 InsertedScalarizedValues, PHIsToRewrite); 1201 1202 Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, 1203 Constant::getNullValue(NPtr->getType()), 1204 SCI->getName()); 1205 SCI->replaceAllUsesWith(New); 1206 SCI->eraseFromParent(); 1207 return; 1208 } 1209 1210 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' 1211 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { 1212 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) 1213 && "Unexpected GEPI!"); 1214 1215 // Load the pointer for this field. 1216 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 1217 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, 1218 InsertedScalarizedValues, PHIsToRewrite); 1219 1220 // Create the new GEP idx vector. 1221 SmallVector<Value*, 8> GEPIdx; 1222 GEPIdx.push_back(GEPI->getOperand(1)); 1223 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); 1224 1225 Value *NGEPI = GetElementPtrInst::Create(NewPtr, 1226 GEPIdx.begin(), GEPIdx.end(), 1227 GEPI->getName(), GEPI); 1228 GEPI->replaceAllUsesWith(NGEPI); 1229 GEPI->eraseFromParent(); 1230 return; 1231 } 1232 1233 // Recursively transform the users of PHI nodes. This will lazily create the 1234 // PHIs that are needed for individual elements. Keep track of what PHIs we 1235 // see in InsertedScalarizedValues so that we don't get infinite loops (very 1236 // antisocial). If the PHI is already in InsertedScalarizedValues, it has 1237 // already been seen first by another load, so its uses have already been 1238 // processed. 1239 PHINode *PN = cast<PHINode>(LoadUser); 1240 bool Inserted; 1241 DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; 1242 tie(InsertPos, Inserted) = 1243 InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); 1244 if (!Inserted) return; 1245 1246 // If this is the first time we've seen this PHI, recursively process all 1247 // users. 1248 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { 1249 Instruction *User = cast<Instruction>(*UI++); 1250 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1251 } 1252} 1253 1254/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr 1255/// is a value loaded from the global. Eliminate all uses of Ptr, making them 1256/// use FieldGlobals instead. All uses of loaded values satisfy 1257/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. 1258static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 1259 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1260 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1261 for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); 1262 UI != E; ) { 1263 Instruction *User = cast<Instruction>(*UI++); 1264 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1265 } 1266 1267 if (Load->use_empty()) { 1268 Load->eraseFromParent(); 1269 InsertedScalarizedValues.erase(Load); 1270 } 1271} 1272 1273/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break 1274/// it up into multiple allocations of arrays of the fields. 1275static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, 1276 Value* NElems, TargetData *TD) { 1277 DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); 1278 const Type* MAT = getMallocAllocatedType(CI); 1279 const StructType *STy = cast<StructType>(MAT); 1280 1281 // There is guaranteed to be at least one use of the malloc (storing 1282 // it into GV). If there are other uses, change them to be uses of 1283 // the global to simplify later code. This also deletes the store 1284 // into GV. 1285 ReplaceUsesOfMallocWithGlobal(CI, GV); 1286 1287 // Okay, at this point, there are no users of the malloc. Insert N 1288 // new mallocs at the same place as CI, and N globals. 1289 std::vector<Value*> FieldGlobals; 1290 std::vector<Value*> FieldMallocs; 1291 1292 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ 1293 const Type *FieldTy = STy->getElementType(FieldNo); 1294 const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); 1295 1296 GlobalVariable *NGV = 1297 new GlobalVariable(*GV->getParent(), 1298 PFieldTy, false, GlobalValue::InternalLinkage, 1299 Constant::getNullValue(PFieldTy), 1300 GV->getName() + ".f" + Twine(FieldNo), GV, 1301 GV->isThreadLocal()); 1302 FieldGlobals.push_back(NGV); 1303 1304 unsigned TypeSize = TD->getTypeAllocSize(FieldTy); 1305 if (const StructType *ST = dyn_cast<StructType>(FieldTy)) 1306 TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); 1307 const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); 1308 Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, 1309 ConstantInt::get(IntPtrTy, TypeSize), 1310 NElems, 0, 1311 CI->getName() + ".f" + Twine(FieldNo)); 1312 FieldMallocs.push_back(NMI); 1313 new StoreInst(NMI, NGV, CI); 1314 } 1315 1316 // The tricky aspect of this transformation is handling the case when malloc 1317 // fails. In the original code, malloc failing would set the result pointer 1318 // of malloc to null. In this case, some mallocs could succeed and others 1319 // could fail. As such, we emit code that looks like this: 1320 // F0 = malloc(field0) 1321 // F1 = malloc(field1) 1322 // F2 = malloc(field2) 1323 // if (F0 == 0 || F1 == 0 || F2 == 0) { 1324 // if (F0) { free(F0); F0 = 0; } 1325 // if (F1) { free(F1); F1 = 0; } 1326 // if (F2) { free(F2); F2 = 0; } 1327 // } 1328 // The malloc can also fail if its argument is too large. 1329 Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); 1330 Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), 1331 ConstantZero, "isneg"); 1332 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { 1333 Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], 1334 Constant::getNullValue(FieldMallocs[i]->getType()), 1335 "isnull"); 1336 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); 1337 } 1338 1339 // Split the basic block at the old malloc. 1340 BasicBlock *OrigBB = CI->getParent(); 1341 BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); 1342 1343 // Create the block to check the first condition. Put all these blocks at the 1344 // end of the function as they are unlikely to be executed. 1345 BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), 1346 "malloc_ret_null", 1347 OrigBB->getParent()); 1348 1349 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond 1350 // branch on RunningOr. 1351 OrigBB->getTerminator()->eraseFromParent(); 1352 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); 1353 1354 // Within the NullPtrBlock, we need to emit a comparison and branch for each 1355 // pointer, because some may be null while others are not. 1356 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1357 Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); 1358 Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 1359 Constant::getNullValue(GVVal->getType()), 1360 "tmp"); 1361 BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", 1362 OrigBB->getParent()); 1363 BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", 1364 OrigBB->getParent()); 1365 Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, 1366 Cmp, NullPtrBlock); 1367 1368 // Fill in FreeBlock. 1369 CallInst::CreateFree(GVVal, BI); 1370 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], 1371 FreeBlock); 1372 BranchInst::Create(NextBlock, FreeBlock); 1373 1374 NullPtrBlock = NextBlock; 1375 } 1376 1377 BranchInst::Create(ContBB, NullPtrBlock); 1378 1379 // CI is no longer needed, remove it. 1380 CI->eraseFromParent(); 1381 1382 /// InsertedScalarizedLoads - As we process loads, if we can't immediately 1383 /// update all uses of the load, keep track of what scalarized loads are 1384 /// inserted for a given load. 1385 DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; 1386 InsertedScalarizedValues[GV] = FieldGlobals; 1387 1388 std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; 1389 1390 // Okay, the malloc site is completely handled. All of the uses of GV are now 1391 // loads, and all uses of those loads are simple. Rewrite them to use loads 1392 // of the per-field globals instead. 1393 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { 1394 Instruction *User = cast<Instruction>(*UI++); 1395 1396 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1397 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); 1398 continue; 1399 } 1400 1401 // Must be a store of null. 1402 StoreInst *SI = cast<StoreInst>(User); 1403 assert(isa<ConstantPointerNull>(SI->getOperand(0)) && 1404 "Unexpected heap-sra user!"); 1405 1406 // Insert a store of null into each global. 1407 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1408 const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); 1409 Constant *Null = Constant::getNullValue(PT->getElementType()); 1410 new StoreInst(Null, FieldGlobals[i], SI); 1411 } 1412 // Erase the original store. 1413 SI->eraseFromParent(); 1414 } 1415 1416 // While we have PHIs that are interesting to rewrite, do it. 1417 while (!PHIsToRewrite.empty()) { 1418 PHINode *PN = PHIsToRewrite.back().first; 1419 unsigned FieldNo = PHIsToRewrite.back().second; 1420 PHIsToRewrite.pop_back(); 1421 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); 1422 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); 1423 1424 // Add all the incoming values. This can materialize more phis. 1425 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1426 Value *InVal = PN->getIncomingValue(i); 1427 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, 1428 PHIsToRewrite); 1429 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); 1430 } 1431 } 1432 1433 // Drop all inter-phi links and any loads that made it this far. 1434 for (DenseMap<Value*, std::vector<Value*> >::iterator 1435 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1436 I != E; ++I) { 1437 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1438 PN->dropAllReferences(); 1439 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1440 LI->dropAllReferences(); 1441 } 1442 1443 // Delete all the phis and loads now that inter-references are dead. 1444 for (DenseMap<Value*, std::vector<Value*> >::iterator 1445 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1446 I != E; ++I) { 1447 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1448 PN->eraseFromParent(); 1449 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1450 LI->eraseFromParent(); 1451 } 1452 1453 // The old global is now dead, remove it. 1454 GV->eraseFromParent(); 1455 1456 ++NumHeapSRA; 1457 return cast<GlobalVariable>(FieldGlobals[0]); 1458} 1459 1460/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a 1461/// pointer global variable with a single value stored it that is a malloc or 1462/// cast of malloc. 1463static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, 1464 CallInst *CI, 1465 const Type *AllocTy, 1466 Module::global_iterator &GVI, 1467 TargetData *TD) { 1468 if (!TD) 1469 return false; 1470 1471 // If this is a malloc of an abstract type, don't touch it. 1472 if (!AllocTy->isSized()) 1473 return false; 1474 1475 // We can't optimize this global unless all uses of it are *known* to be 1476 // of the malloc value, not of the null initializer value (consider a use 1477 // that compares the global's value against zero to see if the malloc has 1478 // been reached). To do this, we check to see if all uses of the global 1479 // would trap if the global were null: this proves that they must all 1480 // happen after the malloc. 1481 if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) 1482 return false; 1483 1484 // We can't optimize this if the malloc itself is used in a complex way, 1485 // for example, being stored into multiple globals. This allows the 1486 // malloc to be stored into the specified global, loaded setcc'd, and 1487 // GEP'd. These are all things we could transform to using the global 1488 // for. 1489 SmallPtrSet<const PHINode*, 8> PHIs; 1490 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) 1491 return false; 1492 1493 // If we have a global that is only initialized with a fixed size malloc, 1494 // transform the program to use global memory instead of malloc'd memory. 1495 // This eliminates dynamic allocation, avoids an indirection accessing the 1496 // data, and exposes the resultant global to further GlobalOpt. 1497 // We cannot optimize the malloc if we cannot determine malloc array size. 1498 Value *NElems = getMallocArraySize(CI, TD, true); 1499 if (!NElems) 1500 return false; 1501 1502 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) 1503 // Restrict this transformation to only working on small allocations 1504 // (2048 bytes currently), as we don't want to introduce a 16M global or 1505 // something. 1506 if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { 1507 GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); 1508 return true; 1509 } 1510 1511 // If the allocation is an array of structures, consider transforming this 1512 // into multiple malloc'd arrays, one for each field. This is basically 1513 // SRoA for malloc'd memory. 1514 1515 // If this is an allocation of a fixed size array of structs, analyze as a 1516 // variable size array. malloc [100 x struct],1 -> malloc struct, 100 1517 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) 1518 if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) 1519 AllocTy = AT->getElementType(); 1520 1521 const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); 1522 if (!AllocSTy) 1523 return false; 1524 1525 // This the structure has an unreasonable number of fields, leave it 1526 // alone. 1527 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && 1528 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { 1529 1530 // If this is a fixed size array, transform the Malloc to be an alloc of 1531 // structs. malloc [100 x struct],1 -> malloc struct, 100 1532 if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { 1533 const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); 1534 unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); 1535 Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); 1536 Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); 1537 Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, 1538 AllocSize, NumElements, 1539 0, CI->getName()); 1540 Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); 1541 CI->replaceAllUsesWith(Cast); 1542 CI->eraseFromParent(); 1543 CI = dyn_cast<BitCastInst>(Malloc) ? 1544 extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); 1545 } 1546 1547 GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); 1548 return true; 1549 } 1550 1551 return false; 1552} 1553 1554// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge 1555// that only one value (besides its initializer) is ever stored to the global. 1556static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1557 Module::global_iterator &GVI, 1558 TargetData *TD) { 1559 // Ignore no-op GEPs and bitcasts. 1560 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1561 1562 // If we are dealing with a pointer global that is initialized to null and 1563 // only has one (non-null) value stored into it, then we can optimize any 1564 // users of the loaded value (often calls and loads) that would trap if the 1565 // value was null. 1566 if (GV->getInitializer()->getType()->isPointerTy() && 1567 GV->getInitializer()->isNullValue()) { 1568 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1569 if (GV->getInitializer()->getType() != SOVC->getType()) 1570 SOVC = 1571 ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); 1572 1573 // Optimize away any trapping uses of the loaded value. 1574 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) 1575 return true; 1576 } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { 1577 const Type* MallocType = getMallocAllocatedType(CI); 1578 if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 1579 GVI, TD)) 1580 return true; 1581 } 1582 } 1583 1584 return false; 1585} 1586 1587/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only 1588/// two values ever stored into GV are its initializer and OtherVal. See if we 1589/// can shrink the global into a boolean and select between the two values 1590/// whenever it is used. This exposes the values to other scalar optimizations. 1591static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1592 const Type *GVElType = GV->getType()->getElementType(); 1593 1594 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1595 // an FP value, pointer or vector, don't do this optimization because a select 1596 // between them is very expensive and unlikely to lead to later 1597 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1598 // where v1 and v2 both require constant pool loads, a big loss. 1599 if (GVElType == Type::getInt1Ty(GV->getContext()) || 1600 GVElType->isFloatingPointTy() || 1601 GVElType->isPointerTy() || GVElType->isVectorTy()) 1602 return false; 1603 1604 // Walk the use list of the global seeing if all the uses are load or store. 1605 // If there is anything else, bail out. 1606 for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ 1607 User *U = *I; 1608 if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) 1609 return false; 1610 } 1611 1612 DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); 1613 1614 // Create the new global, initializing it to false. 1615 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), 1616 false, 1617 GlobalValue::InternalLinkage, 1618 ConstantInt::getFalse(GV->getContext()), 1619 GV->getName()+".b", 1620 GV->isThreadLocal()); 1621 GV->getParent()->getGlobalList().insert(GV, NewGV); 1622 1623 Constant *InitVal = GV->getInitializer(); 1624 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && 1625 "No reason to shrink to bool!"); 1626 1627 // If initialized to zero and storing one into the global, we can use a cast 1628 // instead of a select to synthesize the desired value. 1629 bool IsOneZero = false; 1630 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) 1631 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1632 1633 while (!GV->use_empty()) { 1634 Instruction *UI = cast<Instruction>(GV->use_back()); 1635 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1636 // Change the store into a boolean store. 1637 bool StoringOther = SI->getOperand(0) == OtherVal; 1638 // Only do this if we weren't storing a loaded value. 1639 Value *StoreVal; 1640 if (StoringOther || SI->getOperand(0) == InitVal) 1641 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), 1642 StoringOther); 1643 else { 1644 // Otherwise, we are storing a previously loaded copy. To do this, 1645 // change the copy from copying the original value to just copying the 1646 // bool. 1647 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1648 1649 // If we've already replaced the input, StoredVal will be a cast or 1650 // select instruction. If not, it will be a load of the original 1651 // global. 1652 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1653 assert(LI->getOperand(0) == GV && "Not a copy!"); 1654 // Insert a new load, to preserve the saved value. 1655 StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); 1656 } else { 1657 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1658 "This is not a form that we understand!"); 1659 StoreVal = StoredVal->getOperand(0); 1660 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1661 } 1662 } 1663 new StoreInst(StoreVal, NewGV, SI); 1664 } else { 1665 // Change the load into a load of bool then a select. 1666 LoadInst *LI = cast<LoadInst>(UI); 1667 LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); 1668 Value *NSI; 1669 if (IsOneZero) 1670 NSI = new ZExtInst(NLI, LI->getType(), "", LI); 1671 else 1672 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); 1673 NSI->takeName(LI); 1674 LI->replaceAllUsesWith(NSI); 1675 } 1676 UI->eraseFromParent(); 1677 } 1678 1679 GV->eraseFromParent(); 1680 return true; 1681} 1682 1683 1684/// ProcessInternalGlobal - Analyze the specified global variable and optimize 1685/// it if possible. If we make a change, return true. 1686bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, 1687 Module::global_iterator &GVI) { 1688 SmallPtrSet<const PHINode*, 16> PHIUsers; 1689 GlobalStatus GS; 1690 GV->removeDeadConstantUsers(); 1691 1692 if (GV->use_empty()) { 1693 DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); 1694 GV->eraseFromParent(); 1695 ++NumDeleted; 1696 return true; 1697 } 1698 1699 if (!AnalyzeGlobal(GV, GS, PHIUsers)) { 1700#if 0 1701 DEBUG(dbgs() << "Global: " << *GV); 1702 DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); 1703 DEBUG(dbgs() << " StoredType = "); 1704 switch (GS.StoredType) { 1705 case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; 1706 case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); 1707 break; 1708 case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; 1709 case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; 1710 } 1711 if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) 1712 DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); 1713 if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) 1714 DEBUG(dbgs() << " AccessingFunction = " 1715 << GS.AccessingFunction->getName() << "\n"); 1716 DEBUG(dbgs() << " HasMultipleAccessingFunctions = " 1717 << GS.HasMultipleAccessingFunctions << "\n"); 1718 DEBUG(dbgs() << " HasNonInstructionUser = " 1719 << GS.HasNonInstructionUser<<"\n"); 1720 DEBUG(dbgs() << "\n"); 1721#endif 1722 1723 // If this is a first class global and has only one accessing function 1724 // and this function is main (which we know is not recursive we can make 1725 // this global a local variable) we replace the global with a local alloca 1726 // in this function. 1727 // 1728 // NOTE: It doesn't make sense to promote non single-value types since we 1729 // are just replacing static memory to stack memory. 1730 // 1731 // If the global is in different address space, don't bring it to stack. 1732 if (!GS.HasMultipleAccessingFunctions && 1733 GS.AccessingFunction && !GS.HasNonInstructionUser && 1734 GV->getType()->getElementType()->isSingleValueType() && 1735 GS.AccessingFunction->getName() == "main" && 1736 GS.AccessingFunction->hasExternalLinkage() && 1737 GV->getType()->getAddressSpace() == 0) { 1738 DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); 1739 Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction 1740 ->getEntryBlock().begin()); 1741 const Type* ElemTy = GV->getType()->getElementType(); 1742 // FIXME: Pass Global's alignment when globals have alignment 1743 AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); 1744 if (!isa<UndefValue>(GV->getInitializer())) 1745 new StoreInst(GV->getInitializer(), Alloca, &FirstI); 1746 1747 GV->replaceAllUsesWith(Alloca); 1748 GV->eraseFromParent(); 1749 ++NumLocalized; 1750 return true; 1751 } 1752 1753 // If the global is never loaded (but may be stored to), it is dead. 1754 // Delete it now. 1755 if (!GS.isLoaded) { 1756 DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); 1757 1758 // Delete any stores we can find to the global. We may not be able to 1759 // make it completely dead though. 1760 bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1761 1762 // If the global is dead now, delete it. 1763 if (GV->use_empty()) { 1764 GV->eraseFromParent(); 1765 ++NumDeleted; 1766 Changed = true; 1767 } 1768 return Changed; 1769 1770 } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { 1771 DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); 1772 GV->setConstant(true); 1773 1774 // Clean up any obviously simplifiable users now. 1775 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1776 1777 // If the global is dead now, just nuke it. 1778 if (GV->use_empty()) { 1779 DEBUG(dbgs() << " *** Marking constant allowed us to simplify " 1780 << "all users and delete global!\n"); 1781 GV->eraseFromParent(); 1782 ++NumDeleted; 1783 } 1784 1785 ++NumMarked; 1786 return true; 1787 } else if (!GV->getInitializer()->getType()->isSingleValueType()) { 1788 if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) 1789 if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { 1790 GVI = FirstNewGV; // Don't skip the newly produced globals! 1791 return true; 1792 } 1793 } else if (GS.StoredType == GlobalStatus::isStoredOnce) { 1794 // If the initial value for the global was an undef value, and if only 1795 // one other value was stored into it, we can just change the 1796 // initializer to be the stored value, then delete all stores to the 1797 // global. This allows us to mark it constant. 1798 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1799 if (isa<UndefValue>(GV->getInitializer())) { 1800 // Change the initial value here. 1801 GV->setInitializer(SOVConstant); 1802 1803 // Clean up any obviously simplifiable users now. 1804 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1805 1806 if (GV->use_empty()) { 1807 DEBUG(dbgs() << " *** Substituting initializer allowed us to " 1808 << "simplify all users and delete global!\n"); 1809 GV->eraseFromParent(); 1810 ++NumDeleted; 1811 } else { 1812 GVI = GV; 1813 } 1814 ++NumSubstitute; 1815 return true; 1816 } 1817 1818 // Try to optimize globals based on the knowledge that only one value 1819 // (besides its initializer) is ever stored to the global. 1820 if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, 1821 getAnalysisIfAvailable<TargetData>())) 1822 return true; 1823 1824 // Otherwise, if the global was not a boolean, we can shrink it to be a 1825 // boolean. 1826 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1827 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 1828 ++NumShrunkToBool; 1829 return true; 1830 } 1831 } 1832 } 1833 return false; 1834} 1835 1836/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified 1837/// function, changing them to FastCC. 1838static void ChangeCalleesToFastCall(Function *F) { 1839 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1840 CallSite User(cast<Instruction>(*UI)); 1841 User.setCallingConv(CallingConv::Fast); 1842 } 1843} 1844 1845static AttrListPtr StripNest(const AttrListPtr &Attrs) { 1846 for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { 1847 if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) 1848 continue; 1849 1850 // There can be only one. 1851 return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); 1852 } 1853 1854 return Attrs; 1855} 1856 1857static void RemoveNestAttribute(Function *F) { 1858 F->setAttributes(StripNest(F->getAttributes())); 1859 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1860 CallSite User(cast<Instruction>(*UI)); 1861 User.setAttributes(StripNest(User.getAttributes())); 1862 } 1863} 1864 1865bool GlobalOpt::OptimizeFunctions(Module &M) { 1866 bool Changed = false; 1867 // Optimize functions. 1868 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { 1869 Function *F = FI++; 1870 // Functions without names cannot be referenced outside this module. 1871 if (!F->hasName() && !F->isDeclaration()) 1872 F->setLinkage(GlobalValue::InternalLinkage); 1873 F->removeDeadConstantUsers(); 1874 if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) { 1875 F->eraseFromParent(); 1876 Changed = true; 1877 ++NumFnDeleted; 1878 } else if (F->hasLocalLinkage()) { 1879 if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && 1880 !F->hasAddressTaken()) { 1881 // If this function has C calling conventions, is not a varargs 1882 // function, and is only called directly, promote it to use the Fast 1883 // calling convention. 1884 F->setCallingConv(CallingConv::Fast); 1885 ChangeCalleesToFastCall(F); 1886 ++NumFastCallFns; 1887 Changed = true; 1888 } 1889 1890 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && 1891 !F->hasAddressTaken()) { 1892 // The function is not used by a trampoline intrinsic, so it is safe 1893 // to remove the 'nest' attribute. 1894 RemoveNestAttribute(F); 1895 ++NumNestRemoved; 1896 Changed = true; 1897 } 1898 } 1899 } 1900 return Changed; 1901} 1902 1903bool GlobalOpt::OptimizeGlobalVars(Module &M) { 1904 bool Changed = false; 1905 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); 1906 GVI != E; ) { 1907 GlobalVariable *GV = GVI++; 1908 // Global variables without names cannot be referenced outside this module. 1909 if (!GV->hasName() && !GV->isDeclaration()) 1910 GV->setLinkage(GlobalValue::InternalLinkage); 1911 // Simplify the initializer. 1912 if (GV->hasInitializer()) 1913 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { 1914 TargetData *TD = getAnalysisIfAvailable<TargetData>(); 1915 Constant *New = ConstantFoldConstantExpression(CE, TD); 1916 if (New && New != CE) 1917 GV->setInitializer(New); 1918 } 1919 // Do more involved optimizations if the global is internal. 1920 if (!GV->isConstant() && GV->hasLocalLinkage() && 1921 GV->hasInitializer()) 1922 Changed |= ProcessInternalGlobal(GV, GVI); 1923 } 1924 return Changed; 1925} 1926 1927/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all 1928/// initializers have an init priority of 65535. 1929GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { 1930 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 1931 I != E; ++I) 1932 if (I->getName() == "llvm.global_ctors") { 1933 // Found it, verify it's an array of { int, void()* }. 1934 const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); 1935 if (!ATy) return 0; 1936 const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); 1937 if (!STy || STy->getNumElements() != 2 || 1938 !STy->getElementType(0)->isIntegerTy(32)) return 0; 1939 const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); 1940 if (!PFTy) return 0; 1941 const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); 1942 if (!FTy || !FTy->getReturnType()->isVoidTy() || 1943 FTy->isVarArg() || FTy->getNumParams() != 0) 1944 return 0; 1945 1946 // Verify that the initializer is simple enough for us to handle. 1947 if (!I->hasDefinitiveInitializer()) return 0; 1948 ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); 1949 if (!CA) return 0; 1950 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 1951 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { 1952 if (isa<ConstantPointerNull>(CS->getOperand(1))) 1953 continue; 1954 1955 // Must have a function or null ptr. 1956 if (!isa<Function>(CS->getOperand(1))) 1957 return 0; 1958 1959 // Init priority must be standard. 1960 ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); 1961 if (!CI || CI->getZExtValue() != 65535) 1962 return 0; 1963 } else { 1964 return 0; 1965 } 1966 1967 return I; 1968 } 1969 return 0; 1970} 1971 1972/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, 1973/// return a list of the functions and null terminator as a vector. 1974static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { 1975 ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); 1976 std::vector<Function*> Result; 1977 Result.reserve(CA->getNumOperands()); 1978 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { 1979 ConstantStruct *CS = cast<ConstantStruct>(*i); 1980 Result.push_back(dyn_cast<Function>(CS->getOperand(1))); 1981 } 1982 return Result; 1983} 1984 1985/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the 1986/// specified array, returning the new global to use. 1987static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 1988 const std::vector<Function*> &Ctors) { 1989 // If we made a change, reassemble the initializer list. 1990 std::vector<Constant*> CSVals; 1991 CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); 1992 CSVals.push_back(0); 1993 1994 // Create the new init list. 1995 std::vector<Constant*> CAList; 1996 for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { 1997 if (Ctors[i]) { 1998 CSVals[1] = Ctors[i]; 1999 } else { 2000 const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), 2001 false); 2002 const PointerType *PFTy = PointerType::getUnqual(FTy); 2003 CSVals[1] = Constant::getNullValue(PFTy); 2004 CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 2005 2147483647); 2006 } 2007 CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); 2008 } 2009 2010 // Create the array initializer. 2011 const Type *StructTy = 2012 cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); 2013 Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 2014 CAList.size()), CAList); 2015 2016 // If we didn't change the number of elements, don't create a new GV. 2017 if (CA->getType() == GCL->getInitializer()->getType()) { 2018 GCL->setInitializer(CA); 2019 return GCL; 2020 } 2021 2022 // Create the new global and insert it next to the existing list. 2023 GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), 2024 GCL->getLinkage(), CA, "", 2025 GCL->isThreadLocal()); 2026 GCL->getParent()->getGlobalList().insert(GCL, NGV); 2027 NGV->takeName(GCL); 2028 2029 // Nuke the old list, replacing any uses with the new one. 2030 if (!GCL->use_empty()) { 2031 Constant *V = NGV; 2032 if (V->getType() != GCL->getType()) 2033 V = ConstantExpr::getBitCast(V, GCL->getType()); 2034 GCL->replaceAllUsesWith(V); 2035 } 2036 GCL->eraseFromParent(); 2037 2038 if (Ctors.size()) 2039 return NGV; 2040 else 2041 return 0; 2042} 2043 2044 2045static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, 2046 Value *V) { 2047 if (Constant *CV = dyn_cast<Constant>(V)) return CV; 2048 Constant *R = ComputedValues[V]; 2049 assert(R && "Reference to an uncomputed value!"); 2050 return R; 2051} 2052 2053/// isSimpleEnoughPointerToCommit - Return true if this constant is simple 2054/// enough for us to understand. In particular, if it is a cast of something, 2055/// we punt. We basically just support direct accesses to globals and GEP's of 2056/// globals. This should be kept up to date with CommitValueTo. 2057static bool isSimpleEnoughPointerToCommit(Constant *C) { 2058 // Conservatively, avoid aggregate types. This is because we don't 2059 // want to worry about them partially overlapping other stores. 2060 if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) 2061 return false; 2062 2063 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) 2064 // Do not allow weak/linkonce/dllimport/dllexport linkage or 2065 // external globals. 2066 return GV->hasDefinitiveInitializer(); 2067 2068 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2069 // Handle a constantexpr gep. 2070 if (CE->getOpcode() == Instruction::GetElementPtr && 2071 isa<GlobalVariable>(CE->getOperand(0)) && 2072 cast<GEPOperator>(CE)->isInBounds()) { 2073 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2074 // Do not allow weak/linkonce/dllimport/dllexport linkage or 2075 // external globals. 2076 if (!GV->hasDefinitiveInitializer()) 2077 return false; 2078 2079 // The first index must be zero. 2080 ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); 2081 if (!CI || !CI->isZero()) return false; 2082 2083 // The remaining indices must be compile-time known integers within the 2084 // notional bounds of the corresponding static array types. 2085 if (!CE->isGEPWithNoNotionalOverIndexing()) 2086 return false; 2087 2088 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2089 } 2090 return false; 2091} 2092 2093/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global 2094/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. 2095/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. 2096static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, 2097 ConstantExpr *Addr, unsigned OpNo) { 2098 // Base case of the recursion. 2099 if (OpNo == Addr->getNumOperands()) { 2100 assert(Val->getType() == Init->getType() && "Type mismatch!"); 2101 return Val; 2102 } 2103 2104 std::vector<Constant*> Elts; 2105 if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { 2106 2107 // Break up the constant into its elements. 2108 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { 2109 for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) 2110 Elts.push_back(cast<Constant>(*i)); 2111 } else if (isa<ConstantAggregateZero>(Init)) { 2112 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2113 Elts.push_back(Constant::getNullValue(STy->getElementType(i))); 2114 } else if (isa<UndefValue>(Init)) { 2115 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2116 Elts.push_back(UndefValue::get(STy->getElementType(i))); 2117 } else { 2118 llvm_unreachable("This code is out of sync with " 2119 " ConstantFoldLoadThroughGEPConstantExpr"); 2120 } 2121 2122 // Replace the element that we are supposed to. 2123 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); 2124 unsigned Idx = CU->getZExtValue(); 2125 assert(Idx < STy->getNumElements() && "Struct index out of range!"); 2126 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); 2127 2128 // Return the modified struct. 2129 return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), 2130 STy->isPacked()); 2131 } else { 2132 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); 2133 const SequentialType *InitTy = cast<SequentialType>(Init->getType()); 2134 2135 uint64_t NumElts; 2136 if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) 2137 NumElts = ATy->getNumElements(); 2138 else 2139 NumElts = cast<VectorType>(InitTy)->getNumElements(); 2140 2141 2142 // Break up the array into elements. 2143 if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { 2144 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 2145 Elts.push_back(cast<Constant>(*i)); 2146 } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { 2147 for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) 2148 Elts.push_back(cast<Constant>(*i)); 2149 } else if (isa<ConstantAggregateZero>(Init)) { 2150 Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); 2151 } else { 2152 assert(isa<UndefValue>(Init) && "This code is out of sync with " 2153 " ConstantFoldLoadThroughGEPConstantExpr"); 2154 Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); 2155 } 2156 2157 assert(CI->getZExtValue() < NumElts); 2158 Elts[CI->getZExtValue()] = 2159 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); 2160 2161 if (Init->getType()->isArrayTy()) 2162 return ConstantArray::get(cast<ArrayType>(InitTy), Elts); 2163 else 2164 return ConstantVector::get(&Elts[0], Elts.size()); 2165 } 2166} 2167 2168/// CommitValueTo - We have decided that Addr (which satisfies the predicate 2169/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. 2170static void CommitValueTo(Constant *Val, Constant *Addr) { 2171 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 2172 assert(GV->hasInitializer()); 2173 GV->setInitializer(Val); 2174 return; 2175 } 2176 2177 ConstantExpr *CE = cast<ConstantExpr>(Addr); 2178 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2179 GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); 2180} 2181 2182/// ComputeLoadResult - Return the value that would be computed by a load from 2183/// P after the stores reflected by 'memory' have been performed. If we can't 2184/// decide, return null. 2185static Constant *ComputeLoadResult(Constant *P, 2186 const DenseMap<Constant*, Constant*> &Memory) { 2187 // If this memory location has been recently stored, use the stored value: it 2188 // is the most up-to-date. 2189 DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); 2190 if (I != Memory.end()) return I->second; 2191 2192 // Access it. 2193 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 2194 if (GV->hasDefinitiveInitializer()) 2195 return GV->getInitializer(); 2196 return 0; 2197 } 2198 2199 // Handle a constantexpr getelementptr. 2200 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) 2201 if (CE->getOpcode() == Instruction::GetElementPtr && 2202 isa<GlobalVariable>(CE->getOperand(0))) { 2203 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2204 if (GV->hasDefinitiveInitializer()) 2205 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2206 } 2207 2208 return 0; // don't know how to evaluate. 2209} 2210 2211/// EvaluateFunction - Evaluate a call to function F, returning true if 2212/// successful, false if we can't evaluate it. ActualArgs contains the formal 2213/// arguments for the function. 2214static bool EvaluateFunction(Function *F, Constant *&RetVal, 2215 const SmallVectorImpl<Constant*> &ActualArgs, 2216 std::vector<Function*> &CallStack, 2217 DenseMap<Constant*, Constant*> &MutatedMemory, 2218 std::vector<GlobalVariable*> &AllocaTmps) { 2219 // Check to see if this function is already executing (recursion). If so, 2220 // bail out. TODO: we might want to accept limited recursion. 2221 if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) 2222 return false; 2223 2224 CallStack.push_back(F); 2225 2226 /// Values - As we compute SSA register values, we store their contents here. 2227 DenseMap<Value*, Constant*> Values; 2228 2229 // Initialize arguments to the incoming values specified. 2230 unsigned ArgNo = 0; 2231 for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; 2232 ++AI, ++ArgNo) 2233 Values[AI] = ActualArgs[ArgNo]; 2234 2235 /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, 2236 /// we can only evaluate any one basic block at most once. This set keeps 2237 /// track of what we have executed so we can detect recursive cases etc. 2238 SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; 2239 2240 // CurInst - The current instruction we're evaluating. 2241 BasicBlock::iterator CurInst = F->begin()->begin(); 2242 2243 // This is the main evaluation loop. 2244 while (1) { 2245 Constant *InstResult = 0; 2246 2247 if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { 2248 if (SI->isVolatile()) return false; // no volatile accesses. 2249 Constant *Ptr = getVal(Values, SI->getOperand(1)); 2250 if (!isSimpleEnoughPointerToCommit(Ptr)) 2251 // If this is too complex for us to commit, reject it. 2252 return false; 2253 Constant *Val = getVal(Values, SI->getOperand(0)); 2254 MutatedMemory[Ptr] = Val; 2255 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { 2256 InstResult = ConstantExpr::get(BO->getOpcode(), 2257 getVal(Values, BO->getOperand(0)), 2258 getVal(Values, BO->getOperand(1))); 2259 } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { 2260 InstResult = ConstantExpr::getCompare(CI->getPredicate(), 2261 getVal(Values, CI->getOperand(0)), 2262 getVal(Values, CI->getOperand(1))); 2263 } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { 2264 InstResult = ConstantExpr::getCast(CI->getOpcode(), 2265 getVal(Values, CI->getOperand(0)), 2266 CI->getType()); 2267 } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { 2268 InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), 2269 getVal(Values, SI->getOperand(1)), 2270 getVal(Values, SI->getOperand(2))); 2271 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { 2272 Constant *P = getVal(Values, GEP->getOperand(0)); 2273 SmallVector<Constant*, 8> GEPOps; 2274 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); 2275 i != e; ++i) 2276 GEPOps.push_back(getVal(Values, *i)); 2277 InstResult = cast<GEPOperator>(GEP)->isInBounds() ? 2278 ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : 2279 ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); 2280 } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { 2281 if (LI->isVolatile()) return false; // no volatile accesses. 2282 InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), 2283 MutatedMemory); 2284 if (InstResult == 0) return false; // Could not evaluate load. 2285 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { 2286 if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. 2287 const Type *Ty = AI->getType()->getElementType(); 2288 AllocaTmps.push_back(new GlobalVariable(Ty, false, 2289 GlobalValue::InternalLinkage, 2290 UndefValue::get(Ty), 2291 AI->getName())); 2292 InstResult = AllocaTmps.back(); 2293 } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { 2294 2295 // Debug info can safely be ignored here. 2296 if (isa<DbgInfoIntrinsic>(CI)) { 2297 ++CurInst; 2298 continue; 2299 } 2300 2301 // Cannot handle inline asm. 2302 if (isa<InlineAsm>(CI->getCalledValue())) return false; 2303 2304 // Resolve function pointers. 2305 Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); 2306 if (!Callee) return false; // Cannot resolve. 2307 2308 SmallVector<Constant*, 8> Formals; 2309 CallSite CS(CI); 2310 for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); 2311 i != e; ++i) 2312 Formals.push_back(getVal(Values, *i)); 2313 2314 if (Callee->isDeclaration()) { 2315 // If this is a function we can constant fold, do it. 2316 if (Constant *C = ConstantFoldCall(Callee, Formals.data(), 2317 Formals.size())) { 2318 InstResult = C; 2319 } else { 2320 return false; 2321 } 2322 } else { 2323 if (Callee->getFunctionType()->isVarArg()) 2324 return false; 2325 2326 Constant *RetVal; 2327 // Execute the call, if successful, use the return value. 2328 if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, 2329 MutatedMemory, AllocaTmps)) 2330 return false; 2331 InstResult = RetVal; 2332 } 2333 } else if (isa<TerminatorInst>(CurInst)) { 2334 BasicBlock *NewBB = 0; 2335 if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { 2336 if (BI->isUnconditional()) { 2337 NewBB = BI->getSuccessor(0); 2338 } else { 2339 ConstantInt *Cond = 2340 dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); 2341 if (!Cond) return false; // Cannot determine. 2342 2343 NewBB = BI->getSuccessor(!Cond->getZExtValue()); 2344 } 2345 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { 2346 ConstantInt *Val = 2347 dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); 2348 if (!Val) return false; // Cannot determine. 2349 NewBB = SI->getSuccessor(SI->findCaseValue(Val)); 2350 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { 2351 Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts(); 2352 if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) 2353 NewBB = BA->getBasicBlock(); 2354 else 2355 return false; // Cannot determine. 2356 } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { 2357 if (RI->getNumOperands()) 2358 RetVal = getVal(Values, RI->getOperand(0)); 2359 2360 CallStack.pop_back(); // return from fn. 2361 return true; // We succeeded at evaluating this ctor! 2362 } else { 2363 // invoke, unwind, unreachable. 2364 return false; // Cannot handle this terminator. 2365 } 2366 2367 // Okay, we succeeded in evaluating this control flow. See if we have 2368 // executed the new block before. If so, we have a looping function, 2369 // which we cannot evaluate in reasonable time. 2370 if (!ExecutedBlocks.insert(NewBB)) 2371 return false; // looped! 2372 2373 // Okay, we have never been in this block before. Check to see if there 2374 // are any PHI nodes. If so, evaluate them with information about where 2375 // we came from. 2376 BasicBlock *OldBB = CurInst->getParent(); 2377 CurInst = NewBB->begin(); 2378 PHINode *PN; 2379 for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) 2380 Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); 2381 2382 // Do NOT increment CurInst. We know that the terminator had no value. 2383 continue; 2384 } else { 2385 // Did not know how to evaluate this! 2386 return false; 2387 } 2388 2389 if (!CurInst->use_empty()) 2390 Values[CurInst] = InstResult; 2391 2392 // Advance program counter. 2393 ++CurInst; 2394 } 2395} 2396 2397/// EvaluateStaticConstructor - Evaluate static constructors in the function, if 2398/// we can. Return true if we can, false otherwise. 2399static bool EvaluateStaticConstructor(Function *F) { 2400 /// MutatedMemory - For each store we execute, we update this map. Loads 2401 /// check this to get the most up-to-date value. If evaluation is successful, 2402 /// this state is committed to the process. 2403 DenseMap<Constant*, Constant*> MutatedMemory; 2404 2405 /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable 2406 /// to represent its body. This vector is needed so we can delete the 2407 /// temporary globals when we are done. 2408 std::vector<GlobalVariable*> AllocaTmps; 2409 2410 /// CallStack - This is used to detect recursion. In pathological situations 2411 /// we could hit exponential behavior, but at least there is nothing 2412 /// unbounded. 2413 std::vector<Function*> CallStack; 2414 2415 // Call the function. 2416 Constant *RetValDummy; 2417 bool EvalSuccess = EvaluateFunction(F, RetValDummy, 2418 SmallVector<Constant*, 0>(), CallStack, 2419 MutatedMemory, AllocaTmps); 2420 if (EvalSuccess) { 2421 // We succeeded at evaluation: commit the result. 2422 DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2423 << F->getName() << "' to " << MutatedMemory.size() 2424 << " stores.\n"); 2425 for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), 2426 E = MutatedMemory.end(); I != E; ++I) 2427 CommitValueTo(I->second, I->first); 2428 } 2429 2430 // At this point, we are done interpreting. If we created any 'alloca' 2431 // temporaries, release them now. 2432 while (!AllocaTmps.empty()) { 2433 GlobalVariable *Tmp = AllocaTmps.back(); 2434 AllocaTmps.pop_back(); 2435 2436 // If there are still users of the alloca, the program is doing something 2437 // silly, e.g. storing the address of the alloca somewhere and using it 2438 // later. Since this is undefined, we'll just make it be null. 2439 if (!Tmp->use_empty()) 2440 Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); 2441 delete Tmp; 2442 } 2443 2444 return EvalSuccess; 2445} 2446 2447 2448 2449/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. 2450/// Return true if anything changed. 2451bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { 2452 std::vector<Function*> Ctors = ParseGlobalCtors(GCL); 2453 bool MadeChange = false; 2454 if (Ctors.empty()) return false; 2455 2456 // Loop over global ctors, optimizing them when we can. 2457 for (unsigned i = 0; i != Ctors.size(); ++i) { 2458 Function *F = Ctors[i]; 2459 // Found a null terminator in the middle of the list, prune off the rest of 2460 // the list. 2461 if (F == 0) { 2462 if (i != Ctors.size()-1) { 2463 Ctors.resize(i+1); 2464 MadeChange = true; 2465 } 2466 break; 2467 } 2468 2469 // We cannot simplify external ctor functions. 2470 if (F->empty()) continue; 2471 2472 // If we can evaluate the ctor at compile time, do. 2473 if (EvaluateStaticConstructor(F)) { 2474 Ctors.erase(Ctors.begin()+i); 2475 MadeChange = true; 2476 --i; 2477 ++NumCtorsEvaluated; 2478 continue; 2479 } 2480 } 2481 2482 if (!MadeChange) return false; 2483 2484 GCL = InstallGlobalCtors(GCL, Ctors); 2485 return true; 2486} 2487 2488bool GlobalOpt::OptimizeGlobalAliases(Module &M) { 2489 bool Changed = false; 2490 2491 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 2492 I != E;) { 2493 Module::alias_iterator J = I++; 2494 // Aliases without names cannot be referenced outside this module. 2495 if (!J->hasName() && !J->isDeclaration()) 2496 J->setLinkage(GlobalValue::InternalLinkage); 2497 // If the aliasee may change at link time, nothing can be done - bail out. 2498 if (J->mayBeOverridden()) 2499 continue; 2500 2501 Constant *Aliasee = J->getAliasee(); 2502 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2503 Target->removeDeadConstantUsers(); 2504 bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); 2505 2506 // Make all users of the alias use the aliasee instead. 2507 if (!J->use_empty()) { 2508 J->replaceAllUsesWith(Aliasee); 2509 ++NumAliasesResolved; 2510 Changed = true; 2511 } 2512 2513 // If the alias is externally visible, we may still be able to simplify it. 2514 if (!J->hasLocalLinkage()) { 2515 // If the aliasee has internal linkage, give it the name and linkage 2516 // of the alias, and delete the alias. This turns: 2517 // define internal ... @f(...) 2518 // @a = alias ... @f 2519 // into: 2520 // define ... @a(...) 2521 if (!Target->hasLocalLinkage()) 2522 continue; 2523 2524 // Do not perform the transform if multiple aliases potentially target the 2525 // aliasee. This check also ensures that it is safe to replace the section 2526 // and other attributes of the aliasee with those of the alias. 2527 if (!hasOneUse) 2528 continue; 2529 2530 // Give the aliasee the name, linkage and other attributes of the alias. 2531 Target->takeName(J); 2532 Target->setLinkage(J->getLinkage()); 2533 Target->GlobalValue::copyAttributesFrom(J); 2534 } 2535 2536 // Delete the alias. 2537 M.getAliasList().erase(J); 2538 ++NumAliasesRemoved; 2539 Changed = true; 2540 } 2541 2542 return Changed; 2543} 2544 2545bool GlobalOpt::runOnModule(Module &M) { 2546 bool Changed = false; 2547 2548 // Try to find the llvm.globalctors list. 2549 GlobalVariable *GlobalCtors = FindGlobalCtors(M); 2550 2551 bool LocalChange = true; 2552 while (LocalChange) { 2553 LocalChange = false; 2554 2555 // Delete functions that are trivially dead, ccc -> fastcc 2556 LocalChange |= OptimizeFunctions(M); 2557 2558 // Optimize global_ctors list. 2559 if (GlobalCtors) 2560 LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); 2561 2562 // Optimize non-address-taken globals. 2563 LocalChange |= OptimizeGlobalVars(M); 2564 2565 // Resolve aliases, when possible. 2566 LocalChange |= OptimizeGlobalAliases(M); 2567 Changed |= LocalChange; 2568 } 2569 2570 // TODO: Move all global ctors functions to the end of the module for code 2571 // layout. 2572 2573 return Changed; 2574} 2575