1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass transforms simple global variables that never have their address
11// taken.  If obviously true, it marks read/write globals as constant, deletes
12// variables only stored to, etc.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "globalopt"
17#include "llvm/Transforms/IPO.h"
18#include "llvm/CallingConv.h"
19#include "llvm/Constants.h"
20#include "llvm/DerivedTypes.h"
21#include "llvm/Instructions.h"
22#include "llvm/IntrinsicInst.h"
23#include "llvm/Module.h"
24#include "llvm/Operator.h"
25#include "llvm/Pass.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/MemoryBuiltins.h"
28#include "llvm/Target/TargetData.h"
29#include "llvm/Target/TargetLibraryInfo.h"
30#include "llvm/Support/CallSite.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/GetElementPtrTypeIterator.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/ADT/DenseMap.h"
37#include "llvm/ADT/SmallPtrSet.h"
38#include "llvm/ADT/SmallVector.h"
39#include "llvm/ADT/Statistic.h"
40#include "llvm/ADT/STLExtras.h"
41#include <algorithm>
42using namespace llvm;
43
44STATISTIC(NumMarked    , "Number of globals marked constant");
45STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
46STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
47STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
48STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
49STATISTIC(NumDeleted   , "Number of globals deleted");
50STATISTIC(NumFnDeleted , "Number of functions deleted");
51STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
52STATISTIC(NumLocalized , "Number of globals localized");
53STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
54STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
55STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
56STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
57STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
58STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
59STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
60
61namespace {
62  struct GlobalStatus;
63  struct GlobalOpt : public ModulePass {
64    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
65      AU.addRequired<TargetLibraryInfo>();
66    }
67    static char ID; // Pass identification, replacement for typeid
68    GlobalOpt() : ModulePass(ID) {
69      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
70    }
71
72    bool runOnModule(Module &M);
73
74  private:
75    GlobalVariable *FindGlobalCtors(Module &M);
76    bool OptimizeFunctions(Module &M);
77    bool OptimizeGlobalVars(Module &M);
78    bool OptimizeGlobalAliases(Module &M);
79    bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
80    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
81    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
82                               const SmallPtrSet<const PHINode*, 16> &PHIUsers,
83                               const GlobalStatus &GS);
84    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
85
86    TargetData *TD;
87    TargetLibraryInfo *TLI;
88  };
89}
90
91char GlobalOpt::ID = 0;
92INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
93                "Global Variable Optimizer", false, false)
94INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
95INITIALIZE_PASS_END(GlobalOpt, "globalopt",
96                "Global Variable Optimizer", false, false)
97
98ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
99
100namespace {
101
102/// GlobalStatus - As we analyze each global, keep track of some information
103/// about it.  If we find out that the address of the global is taken, none of
104/// this info will be accurate.
105struct GlobalStatus {
106  /// isCompared - True if the global's address is used in a comparison.
107  bool isCompared;
108
109  /// isLoaded - True if the global is ever loaded.  If the global isn't ever
110  /// loaded it can be deleted.
111  bool isLoaded;
112
113  /// StoredType - Keep track of what stores to the global look like.
114  ///
115  enum StoredType {
116    /// NotStored - There is no store to this global.  It can thus be marked
117    /// constant.
118    NotStored,
119
120    /// isInitializerStored - This global is stored to, but the only thing
121    /// stored is the constant it was initialized with.  This is only tracked
122    /// for scalar globals.
123    isInitializerStored,
124
125    /// isStoredOnce - This global is stored to, but only its initializer and
126    /// one other value is ever stored to it.  If this global isStoredOnce, we
127    /// track the value stored to it in StoredOnceValue below.  This is only
128    /// tracked for scalar globals.
129    isStoredOnce,
130
131    /// isStored - This global is stored to by multiple values or something else
132    /// that we cannot track.
133    isStored
134  } StoredType;
135
136  /// StoredOnceValue - If only one value (besides the initializer constant) is
137  /// ever stored to this global, keep track of what value it is.
138  Value *StoredOnceValue;
139
140  /// AccessingFunction/HasMultipleAccessingFunctions - These start out
141  /// null/false.  When the first accessing function is noticed, it is recorded.
142  /// When a second different accessing function is noticed,
143  /// HasMultipleAccessingFunctions is set to true.
144  const Function *AccessingFunction;
145  bool HasMultipleAccessingFunctions;
146
147  /// HasNonInstructionUser - Set to true if this global has a user that is not
148  /// an instruction (e.g. a constant expr or GV initializer).
149  bool HasNonInstructionUser;
150
151  /// HasPHIUser - Set to true if this global has a user that is a PHI node.
152  bool HasPHIUser;
153
154  /// AtomicOrdering - Set to the strongest atomic ordering requirement.
155  AtomicOrdering Ordering;
156
157  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
158                   StoredOnceValue(0), AccessingFunction(0),
159                   HasMultipleAccessingFunctions(false),
160                   HasNonInstructionUser(false), HasPHIUser(false),
161                   Ordering(NotAtomic) {}
162};
163
164}
165
166/// StrongerOrdering - Return the stronger of the two ordering. If the two
167/// orderings are acquire and release, then return AcquireRelease.
168///
169static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
170  if (X == Acquire && Y == Release) return AcquireRelease;
171  if (Y == Acquire && X == Release) return AcquireRelease;
172  return (AtomicOrdering)std::max(X, Y);
173}
174
175/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
176/// by constants itself.  Note that constants cannot be cyclic, so this test is
177/// pretty easy to implement recursively.
178///
179static bool SafeToDestroyConstant(const Constant *C) {
180  if (isa<GlobalValue>(C)) return false;
181
182  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
183       ++UI)
184    if (const Constant *CU = dyn_cast<Constant>(*UI)) {
185      if (!SafeToDestroyConstant(CU)) return false;
186    } else
187      return false;
188  return true;
189}
190
191
192/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
193/// structure.  If the global has its address taken, return true to indicate we
194/// can't do anything with it.
195///
196static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
197                          SmallPtrSet<const PHINode*, 16> &PHIUsers) {
198  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
199       ++UI) {
200    const User *U = *UI;
201    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
202      GS.HasNonInstructionUser = true;
203
204      // If the result of the constantexpr isn't pointer type, then we won't
205      // know to expect it in various places.  Just reject early.
206      if (!isa<PointerType>(CE->getType())) return true;
207
208      if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
209    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
210      if (!GS.HasMultipleAccessingFunctions) {
211        const Function *F = I->getParent()->getParent();
212        if (GS.AccessingFunction == 0)
213          GS.AccessingFunction = F;
214        else if (GS.AccessingFunction != F)
215          GS.HasMultipleAccessingFunctions = true;
216      }
217      if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
218        GS.isLoaded = true;
219        // Don't hack on volatile loads.
220        if (LI->isVolatile()) return true;
221        GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
222      } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
223        // Don't allow a store OF the address, only stores TO the address.
224        if (SI->getOperand(0) == V) return true;
225
226        // Don't hack on volatile stores.
227        if (SI->isVolatile()) return true;
228        GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
229
230        // If this is a direct store to the global (i.e., the global is a scalar
231        // value, not an aggregate), keep more specific information about
232        // stores.
233        if (GS.StoredType != GlobalStatus::isStored) {
234          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
235                                                           SI->getOperand(1))) {
236            Value *StoredVal = SI->getOperand(0);
237            if (StoredVal == GV->getInitializer()) {
238              if (GS.StoredType < GlobalStatus::isInitializerStored)
239                GS.StoredType = GlobalStatus::isInitializerStored;
240            } else if (isa<LoadInst>(StoredVal) &&
241                       cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
242              if (GS.StoredType < GlobalStatus::isInitializerStored)
243                GS.StoredType = GlobalStatus::isInitializerStored;
244            } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
245              GS.StoredType = GlobalStatus::isStoredOnce;
246              GS.StoredOnceValue = StoredVal;
247            } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
248                       GS.StoredOnceValue == StoredVal) {
249              // noop.
250            } else {
251              GS.StoredType = GlobalStatus::isStored;
252            }
253          } else {
254            GS.StoredType = GlobalStatus::isStored;
255          }
256        }
257      } else if (isa<BitCastInst>(I)) {
258        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
259      } else if (isa<GetElementPtrInst>(I)) {
260        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
261      } else if (isa<SelectInst>(I)) {
262        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
263      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
264        // PHI nodes we can check just like select or GEP instructions, but we
265        // have to be careful about infinite recursion.
266        if (PHIUsers.insert(PN))  // Not already visited.
267          if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
268        GS.HasPHIUser = true;
269      } else if (isa<CmpInst>(I)) {
270        GS.isCompared = true;
271      } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
272        if (MTI->isVolatile()) return true;
273        if (MTI->getArgOperand(0) == V)
274          GS.StoredType = GlobalStatus::isStored;
275        if (MTI->getArgOperand(1) == V)
276          GS.isLoaded = true;
277      } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
278        assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
279        if (MSI->isVolatile()) return true;
280        GS.StoredType = GlobalStatus::isStored;
281      } else {
282        return true;  // Any other non-load instruction might take address!
283      }
284    } else if (const Constant *C = dyn_cast<Constant>(U)) {
285      GS.HasNonInstructionUser = true;
286      // We might have a dead and dangling constant hanging off of here.
287      if (!SafeToDestroyConstant(C))
288        return true;
289    } else {
290      GS.HasNonInstructionUser = true;
291      // Otherwise must be some other user.
292      return true;
293    }
294  }
295
296  return false;
297}
298
299/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
300/// as a root?  If so, we might not really want to eliminate the stores to it.
301static bool isLeakCheckerRoot(GlobalVariable *GV) {
302  // A global variable is a root if it is a pointer, or could plausibly contain
303  // a pointer.  There are two challenges; one is that we could have a struct
304  // the has an inner member which is a pointer.  We recurse through the type to
305  // detect these (up to a point).  The other is that we may actually be a union
306  // of a pointer and another type, and so our LLVM type is an integer which
307  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
308  // potentially contained here.
309
310  if (GV->hasPrivateLinkage())
311    return false;
312
313  SmallVector<Type *, 4> Types;
314  Types.push_back(cast<PointerType>(GV->getType())->getElementType());
315
316  unsigned Limit = 20;
317  do {
318    Type *Ty = Types.pop_back_val();
319    switch (Ty->getTypeID()) {
320      default: break;
321      case Type::PointerTyID: return true;
322      case Type::ArrayTyID:
323      case Type::VectorTyID: {
324        SequentialType *STy = cast<SequentialType>(Ty);
325        Types.push_back(STy->getElementType());
326        break;
327      }
328      case Type::StructTyID: {
329        StructType *STy = cast<StructType>(Ty);
330        if (STy->isOpaque()) return true;
331        for (StructType::element_iterator I = STy->element_begin(),
332                 E = STy->element_end(); I != E; ++I) {
333          Type *InnerTy = *I;
334          if (isa<PointerType>(InnerTy)) return true;
335          if (isa<CompositeType>(InnerTy))
336            Types.push_back(InnerTy);
337        }
338        break;
339      }
340    }
341    if (--Limit == 0) return true;
342  } while (!Types.empty());
343  return false;
344}
345
346/// Given a value that is stored to a global but never read, determine whether
347/// it's safe to remove the store and the chain of computation that feeds the
348/// store.
349static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
350  do {
351    if (isa<Constant>(V))
352      return true;
353    if (!V->hasOneUse())
354      return false;
355    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
356        isa<GlobalValue>(V))
357      return false;
358    if (isAllocationFn(V, TLI))
359      return true;
360
361    Instruction *I = cast<Instruction>(V);
362    if (I->mayHaveSideEffects())
363      return false;
364    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
365      if (!GEP->hasAllConstantIndices())
366        return false;
367    } else if (I->getNumOperands() != 1) {
368      return false;
369    }
370
371    V = I->getOperand(0);
372  } while (1);
373}
374
375/// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
376/// of the global and clean up any that obviously don't assign the global a
377/// value that isn't dynamically allocated.
378///
379static bool CleanupPointerRootUsers(GlobalVariable *GV,
380                                    const TargetLibraryInfo *TLI) {
381  // A brief explanation of leak checkers.  The goal is to find bugs where
382  // pointers are forgotten, causing an accumulating growth in memory
383  // usage over time.  The common strategy for leak checkers is to whitelist the
384  // memory pointed to by globals at exit.  This is popular because it also
385  // solves another problem where the main thread of a C++ program may shut down
386  // before other threads that are still expecting to use those globals.  To
387  // handle that case, we expect the program may create a singleton and never
388  // destroy it.
389
390  bool Changed = false;
391
392  // If Dead[n].first is the only use of a malloc result, we can delete its
393  // chain of computation and the store to the global in Dead[n].second.
394  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
395
396  // Constants can't be pointers to dynamically allocated memory.
397  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
398       UI != E;) {
399    User *U = *UI++;
400    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
401      Value *V = SI->getValueOperand();
402      if (isa<Constant>(V)) {
403        Changed = true;
404        SI->eraseFromParent();
405      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
406        if (I->hasOneUse())
407          Dead.push_back(std::make_pair(I, SI));
408      }
409    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
410      if (isa<Constant>(MSI->getValue())) {
411        Changed = true;
412        MSI->eraseFromParent();
413      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
414        if (I->hasOneUse())
415          Dead.push_back(std::make_pair(I, MSI));
416      }
417    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
418      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
419      if (MemSrc && MemSrc->isConstant()) {
420        Changed = true;
421        MTI->eraseFromParent();
422      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
423        if (I->hasOneUse())
424          Dead.push_back(std::make_pair(I, MTI));
425      }
426    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
427      if (CE->use_empty()) {
428        CE->destroyConstant();
429        Changed = true;
430      }
431    } else if (Constant *C = dyn_cast<Constant>(U)) {
432      if (SafeToDestroyConstant(C)) {
433        C->destroyConstant();
434        // This could have invalidated UI, start over from scratch.
435        Dead.clear();
436        CleanupPointerRootUsers(GV, TLI);
437        return true;
438      }
439    }
440  }
441
442  for (int i = 0, e = Dead.size(); i != e; ++i) {
443    if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
444      Dead[i].second->eraseFromParent();
445      Instruction *I = Dead[i].first;
446      do {
447	if (isAllocationFn(I, TLI))
448	  break;
449        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
450        if (!J)
451          break;
452        I->eraseFromParent();
453        I = J;
454      } while (1);
455      I->eraseFromParent();
456    }
457  }
458
459  return Changed;
460}
461
462/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
463/// users of the global, cleaning up the obvious ones.  This is largely just a
464/// quick scan over the use list to clean up the easy and obvious cruft.  This
465/// returns true if it made a change.
466static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
467                                       TargetData *TD, TargetLibraryInfo *TLI) {
468  bool Changed = false;
469  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
470    User *U = *UI++;
471
472    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
473      if (Init) {
474        // Replace the load with the initializer.
475        LI->replaceAllUsesWith(Init);
476        LI->eraseFromParent();
477        Changed = true;
478      }
479    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
480      // Store must be unreachable or storing Init into the global.
481      SI->eraseFromParent();
482      Changed = true;
483    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
484      if (CE->getOpcode() == Instruction::GetElementPtr) {
485        Constant *SubInit = 0;
486        if (Init)
487          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
488        Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
489      } else if (CE->getOpcode() == Instruction::BitCast &&
490                 CE->getType()->isPointerTy()) {
491        // Pointer cast, delete any stores and memsets to the global.
492        Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
493      }
494
495      if (CE->use_empty()) {
496        CE->destroyConstant();
497        Changed = true;
498      }
499    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
500      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
501      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
502      // and will invalidate our notion of what Init is.
503      Constant *SubInit = 0;
504      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
505        ConstantExpr *CE =
506          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
507        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
508          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
509
510        // If the initializer is an all-null value and we have an inbounds GEP,
511        // we already know what the result of any load from that GEP is.
512        // TODO: Handle splats.
513        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
514          SubInit = Constant::getNullValue(GEP->getType()->getElementType());
515      }
516      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
517
518      if (GEP->use_empty()) {
519        GEP->eraseFromParent();
520        Changed = true;
521      }
522    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
523      if (MI->getRawDest() == V) {
524        MI->eraseFromParent();
525        Changed = true;
526      }
527
528    } else if (Constant *C = dyn_cast<Constant>(U)) {
529      // If we have a chain of dead constantexprs or other things dangling from
530      // us, and if they are all dead, nuke them without remorse.
531      if (SafeToDestroyConstant(C)) {
532        C->destroyConstant();
533        // This could have invalidated UI, start over from scratch.
534        CleanupConstantGlobalUsers(V, Init, TD, TLI);
535        return true;
536      }
537    }
538  }
539  return Changed;
540}
541
542/// isSafeSROAElementUse - Return true if the specified instruction is a safe
543/// user of a derived expression from a global that we want to SROA.
544static bool isSafeSROAElementUse(Value *V) {
545  // We might have a dead and dangling constant hanging off of here.
546  if (Constant *C = dyn_cast<Constant>(V))
547    return SafeToDestroyConstant(C);
548
549  Instruction *I = dyn_cast<Instruction>(V);
550  if (!I) return false;
551
552  // Loads are ok.
553  if (isa<LoadInst>(I)) return true;
554
555  // Stores *to* the pointer are ok.
556  if (StoreInst *SI = dyn_cast<StoreInst>(I))
557    return SI->getOperand(0) != V;
558
559  // Otherwise, it must be a GEP.
560  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
561  if (GEPI == 0) return false;
562
563  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
564      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
565    return false;
566
567  for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
568       I != E; ++I)
569    if (!isSafeSROAElementUse(*I))
570      return false;
571  return true;
572}
573
574
575/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
576/// Look at it and its uses and decide whether it is safe to SROA this global.
577///
578static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
579  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
580  if (!isa<GetElementPtrInst>(U) &&
581      (!isa<ConstantExpr>(U) ||
582       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
583    return false;
584
585  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
586  // don't like < 3 operand CE's, and we don't like non-constant integer
587  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
588  // value of C.
589  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
590      !cast<Constant>(U->getOperand(1))->isNullValue() ||
591      !isa<ConstantInt>(U->getOperand(2)))
592    return false;
593
594  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
595  ++GEPI;  // Skip over the pointer index.
596
597  // If this is a use of an array allocation, do a bit more checking for sanity.
598  if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
599    uint64_t NumElements = AT->getNumElements();
600    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
601
602    // Check to make sure that index falls within the array.  If not,
603    // something funny is going on, so we won't do the optimization.
604    //
605    if (Idx->getZExtValue() >= NumElements)
606      return false;
607
608    // We cannot scalar repl this level of the array unless any array
609    // sub-indices are in-range constants.  In particular, consider:
610    // A[0][i].  We cannot know that the user isn't doing invalid things like
611    // allowing i to index an out-of-range subscript that accesses A[1].
612    //
613    // Scalar replacing *just* the outer index of the array is probably not
614    // going to be a win anyway, so just give up.
615    for (++GEPI; // Skip array index.
616         GEPI != E;
617         ++GEPI) {
618      uint64_t NumElements;
619      if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
620        NumElements = SubArrayTy->getNumElements();
621      else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
622        NumElements = SubVectorTy->getNumElements();
623      else {
624        assert((*GEPI)->isStructTy() &&
625               "Indexed GEP type is not array, vector, or struct!");
626        continue;
627      }
628
629      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
630      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
631        return false;
632    }
633  }
634
635  for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
636    if (!isSafeSROAElementUse(*I))
637      return false;
638  return true;
639}
640
641/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
642/// is safe for us to perform this transformation.
643///
644static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
645  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
646       UI != E; ++UI) {
647    if (!IsUserOfGlobalSafeForSRA(*UI, GV))
648      return false;
649  }
650  return true;
651}
652
653
654/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
655/// variable.  This opens the door for other optimizations by exposing the
656/// behavior of the program in a more fine-grained way.  We have determined that
657/// this transformation is safe already.  We return the first global variable we
658/// insert so that the caller can reprocess it.
659static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
660  // Make sure this global only has simple uses that we can SRA.
661  if (!GlobalUsersSafeToSRA(GV))
662    return 0;
663
664  assert(GV->hasLocalLinkage() && !GV->isConstant());
665  Constant *Init = GV->getInitializer();
666  Type *Ty = Init->getType();
667
668  std::vector<GlobalVariable*> NewGlobals;
669  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
670
671  // Get the alignment of the global, either explicit or target-specific.
672  unsigned StartAlignment = GV->getAlignment();
673  if (StartAlignment == 0)
674    StartAlignment = TD.getABITypeAlignment(GV->getType());
675
676  if (StructType *STy = dyn_cast<StructType>(Ty)) {
677    NewGlobals.reserve(STy->getNumElements());
678    const StructLayout &Layout = *TD.getStructLayout(STy);
679    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
680      Constant *In = Init->getAggregateElement(i);
681      assert(In && "Couldn't get element of initializer?");
682      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
683                                               GlobalVariable::InternalLinkage,
684                                               In, GV->getName()+"."+Twine(i),
685                                               GV->getThreadLocalMode(),
686                                              GV->getType()->getAddressSpace());
687      Globals.insert(GV, NGV);
688      NewGlobals.push_back(NGV);
689
690      // Calculate the known alignment of the field.  If the original aggregate
691      // had 256 byte alignment for example, something might depend on that:
692      // propagate info to each field.
693      uint64_t FieldOffset = Layout.getElementOffset(i);
694      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
695      if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
696        NGV->setAlignment(NewAlign);
697    }
698  } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
699    unsigned NumElements = 0;
700    if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
701      NumElements = ATy->getNumElements();
702    else
703      NumElements = cast<VectorType>(STy)->getNumElements();
704
705    if (NumElements > 16 && GV->hasNUsesOrMore(16))
706      return 0; // It's not worth it.
707    NewGlobals.reserve(NumElements);
708
709    uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
710    unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
711    for (unsigned i = 0, e = NumElements; i != e; ++i) {
712      Constant *In = Init->getAggregateElement(i);
713      assert(In && "Couldn't get element of initializer?");
714
715      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
716                                               GlobalVariable::InternalLinkage,
717                                               In, GV->getName()+"."+Twine(i),
718                                               GV->getThreadLocalMode(),
719                                              GV->getType()->getAddressSpace());
720      Globals.insert(GV, NGV);
721      NewGlobals.push_back(NGV);
722
723      // Calculate the known alignment of the field.  If the original aggregate
724      // had 256 byte alignment for example, something might depend on that:
725      // propagate info to each field.
726      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
727      if (NewAlign > EltAlign)
728        NGV->setAlignment(NewAlign);
729    }
730  }
731
732  if (NewGlobals.empty())
733    return 0;
734
735  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
736
737  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
738
739  // Loop over all of the uses of the global, replacing the constantexpr geps,
740  // with smaller constantexpr geps or direct references.
741  while (!GV->use_empty()) {
742    User *GEP = GV->use_back();
743    assert(((isa<ConstantExpr>(GEP) &&
744             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
745            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
746
747    // Ignore the 1th operand, which has to be zero or else the program is quite
748    // broken (undefined).  Get the 2nd operand, which is the structure or array
749    // index.
750    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
751    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
752
753    Value *NewPtr = NewGlobals[Val];
754
755    // Form a shorter GEP if needed.
756    if (GEP->getNumOperands() > 3) {
757      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
758        SmallVector<Constant*, 8> Idxs;
759        Idxs.push_back(NullInt);
760        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
761          Idxs.push_back(CE->getOperand(i));
762        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
763      } else {
764        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
765        SmallVector<Value*, 8> Idxs;
766        Idxs.push_back(NullInt);
767        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
768          Idxs.push_back(GEPI->getOperand(i));
769        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
770                                           GEPI->getName()+"."+Twine(Val),GEPI);
771      }
772    }
773    GEP->replaceAllUsesWith(NewPtr);
774
775    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
776      GEPI->eraseFromParent();
777    else
778      cast<ConstantExpr>(GEP)->destroyConstant();
779  }
780
781  // Delete the old global, now that it is dead.
782  Globals.erase(GV);
783  ++NumSRA;
784
785  // Loop over the new globals array deleting any globals that are obviously
786  // dead.  This can arise due to scalarization of a structure or an array that
787  // has elements that are dead.
788  unsigned FirstGlobal = 0;
789  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
790    if (NewGlobals[i]->use_empty()) {
791      Globals.erase(NewGlobals[i]);
792      if (FirstGlobal == i) ++FirstGlobal;
793    }
794
795  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
796}
797
798/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
799/// value will trap if the value is dynamically null.  PHIs keeps track of any
800/// phi nodes we've seen to avoid reprocessing them.
801static bool AllUsesOfValueWillTrapIfNull(const Value *V,
802                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
803  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
804       ++UI) {
805    const User *U = *UI;
806
807    if (isa<LoadInst>(U)) {
808      // Will trap.
809    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
810      if (SI->getOperand(0) == V) {
811        //cerr << "NONTRAPPING USE: " << *U;
812        return false;  // Storing the value.
813      }
814    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
815      if (CI->getCalledValue() != V) {
816        //cerr << "NONTRAPPING USE: " << *U;
817        return false;  // Not calling the ptr
818      }
819    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
820      if (II->getCalledValue() != V) {
821        //cerr << "NONTRAPPING USE: " << *U;
822        return false;  // Not calling the ptr
823      }
824    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
825      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
826    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
827      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
828    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
829      // If we've already seen this phi node, ignore it, it has already been
830      // checked.
831      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
832        return false;
833    } else if (isa<ICmpInst>(U) &&
834               isa<ConstantPointerNull>(UI->getOperand(1))) {
835      // Ignore icmp X, null
836    } else {
837      //cerr << "NONTRAPPING USE: " << *U;
838      return false;
839    }
840  }
841  return true;
842}
843
844/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
845/// from GV will trap if the loaded value is null.  Note that this also permits
846/// comparisons of the loaded value against null, as a special case.
847static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
848  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
849       UI != E; ++UI) {
850    const User *U = *UI;
851
852    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
853      SmallPtrSet<const PHINode*, 8> PHIs;
854      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
855        return false;
856    } else if (isa<StoreInst>(U)) {
857      // Ignore stores to the global.
858    } else {
859      // We don't know or understand this user, bail out.
860      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
861      return false;
862    }
863  }
864  return true;
865}
866
867static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
868  bool Changed = false;
869  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
870    Instruction *I = cast<Instruction>(*UI++);
871    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
872      LI->setOperand(0, NewV);
873      Changed = true;
874    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
875      if (SI->getOperand(1) == V) {
876        SI->setOperand(1, NewV);
877        Changed = true;
878      }
879    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
880      CallSite CS(I);
881      if (CS.getCalledValue() == V) {
882        // Calling through the pointer!  Turn into a direct call, but be careful
883        // that the pointer is not also being passed as an argument.
884        CS.setCalledFunction(NewV);
885        Changed = true;
886        bool PassedAsArg = false;
887        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
888          if (CS.getArgument(i) == V) {
889            PassedAsArg = true;
890            CS.setArgument(i, NewV);
891          }
892
893        if (PassedAsArg) {
894          // Being passed as an argument also.  Be careful to not invalidate UI!
895          UI = V->use_begin();
896        }
897      }
898    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
899      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
900                                ConstantExpr::getCast(CI->getOpcode(),
901                                                      NewV, CI->getType()));
902      if (CI->use_empty()) {
903        Changed = true;
904        CI->eraseFromParent();
905      }
906    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
907      // Should handle GEP here.
908      SmallVector<Constant*, 8> Idxs;
909      Idxs.reserve(GEPI->getNumOperands()-1);
910      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
911           i != e; ++i)
912        if (Constant *C = dyn_cast<Constant>(*i))
913          Idxs.push_back(C);
914        else
915          break;
916      if (Idxs.size() == GEPI->getNumOperands()-1)
917        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
918                          ConstantExpr::getGetElementPtr(NewV, Idxs));
919      if (GEPI->use_empty()) {
920        Changed = true;
921        GEPI->eraseFromParent();
922      }
923    }
924  }
925
926  return Changed;
927}
928
929
930/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
931/// value stored into it.  If there are uses of the loaded value that would trap
932/// if the loaded value is dynamically null, then we know that they cannot be
933/// reachable with a null optimize away the load.
934static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
935                                            TargetData *TD,
936                                            TargetLibraryInfo *TLI) {
937  bool Changed = false;
938
939  // Keep track of whether we are able to remove all the uses of the global
940  // other than the store that defines it.
941  bool AllNonStoreUsesGone = true;
942
943  // Replace all uses of loads with uses of uses of the stored value.
944  for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
945    User *GlobalUser = *GUI++;
946    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
947      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
948      // If we were able to delete all uses of the loads
949      if (LI->use_empty()) {
950        LI->eraseFromParent();
951        Changed = true;
952      } else {
953        AllNonStoreUsesGone = false;
954      }
955    } else if (isa<StoreInst>(GlobalUser)) {
956      // Ignore the store that stores "LV" to the global.
957      assert(GlobalUser->getOperand(1) == GV &&
958             "Must be storing *to* the global");
959    } else {
960      AllNonStoreUsesGone = false;
961
962      // If we get here we could have other crazy uses that are transitively
963      // loaded.
964      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
965              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
966              isa<BitCastInst>(GlobalUser) ||
967              isa<GetElementPtrInst>(GlobalUser)) &&
968             "Only expect load and stores!");
969    }
970  }
971
972  if (Changed) {
973    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
974    ++NumGlobUses;
975  }
976
977  // If we nuked all of the loads, then none of the stores are needed either,
978  // nor is the global.
979  if (AllNonStoreUsesGone) {
980    if (isLeakCheckerRoot(GV)) {
981      Changed |= CleanupPointerRootUsers(GV, TLI);
982    } else {
983      Changed = true;
984      CleanupConstantGlobalUsers(GV, 0, TD, TLI);
985    }
986    if (GV->use_empty()) {
987      DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
988      Changed = true;
989      GV->eraseFromParent();
990      ++NumDeleted;
991    }
992  }
993  return Changed;
994}
995
996/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
997/// instructions that are foldable.
998static void ConstantPropUsersOf(Value *V,
999                                TargetData *TD, TargetLibraryInfo *TLI) {
1000  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
1001    if (Instruction *I = dyn_cast<Instruction>(*UI++))
1002      if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
1003        I->replaceAllUsesWith(NewC);
1004
1005        // Advance UI to the next non-I use to avoid invalidating it!
1006        // Instructions could multiply use V.
1007        while (UI != E && *UI == I)
1008          ++UI;
1009        I->eraseFromParent();
1010      }
1011}
1012
1013/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
1014/// variable, and transforms the program as if it always contained the result of
1015/// the specified malloc.  Because it is always the result of the specified
1016/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
1017/// malloc into a global, and any loads of GV as uses of the new global.
1018static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
1019                                                     CallInst *CI,
1020                                                     Type *AllocTy,
1021                                                     ConstantInt *NElements,
1022                                                     TargetData *TD,
1023                                                     TargetLibraryInfo *TLI) {
1024  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
1025
1026  Type *GlobalType;
1027  if (NElements->getZExtValue() == 1)
1028    GlobalType = AllocTy;
1029  else
1030    // If we have an array allocation, the global variable is of an array.
1031    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
1032
1033  // Create the new global variable.  The contents of the malloc'd memory is
1034  // undefined, so initialize with an undef value.
1035  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
1036                                             GlobalType, false,
1037                                             GlobalValue::InternalLinkage,
1038                                             UndefValue::get(GlobalType),
1039                                             GV->getName()+".body",
1040                                             GV,
1041                                             GV->getThreadLocalMode());
1042
1043  // If there are bitcast users of the malloc (which is typical, usually we have
1044  // a malloc + bitcast) then replace them with uses of the new global.  Update
1045  // other users to use the global as well.
1046  BitCastInst *TheBC = 0;
1047  while (!CI->use_empty()) {
1048    Instruction *User = cast<Instruction>(CI->use_back());
1049    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
1050      if (BCI->getType() == NewGV->getType()) {
1051        BCI->replaceAllUsesWith(NewGV);
1052        BCI->eraseFromParent();
1053      } else {
1054        BCI->setOperand(0, NewGV);
1055      }
1056    } else {
1057      if (TheBC == 0)
1058        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
1059      User->replaceUsesOfWith(CI, TheBC);
1060    }
1061  }
1062
1063  Constant *RepValue = NewGV;
1064  if (NewGV->getType() != GV->getType()->getElementType())
1065    RepValue = ConstantExpr::getBitCast(RepValue,
1066                                        GV->getType()->getElementType());
1067
1068  // If there is a comparison against null, we will insert a global bool to
1069  // keep track of whether the global was initialized yet or not.
1070  GlobalVariable *InitBool =
1071    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
1072                       GlobalValue::InternalLinkage,
1073                       ConstantInt::getFalse(GV->getContext()),
1074                       GV->getName()+".init", GV->getThreadLocalMode());
1075  bool InitBoolUsed = false;
1076
1077  // Loop over all uses of GV, processing them in turn.
1078  while (!GV->use_empty()) {
1079    if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
1080      // The global is initialized when the store to it occurs.
1081      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
1082                    SI->getOrdering(), SI->getSynchScope(), SI);
1083      SI->eraseFromParent();
1084      continue;
1085    }
1086
1087    LoadInst *LI = cast<LoadInst>(GV->use_back());
1088    while (!LI->use_empty()) {
1089      Use &LoadUse = LI->use_begin().getUse();
1090      if (!isa<ICmpInst>(LoadUse.getUser())) {
1091        LoadUse = RepValue;
1092        continue;
1093      }
1094
1095      ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
1096      // Replace the cmp X, 0 with a use of the bool value.
1097      // Sink the load to where the compare was, if atomic rules allow us to.
1098      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
1099                               LI->getOrdering(), LI->getSynchScope(),
1100                               LI->isUnordered() ? (Instruction*)ICI : LI);
1101      InitBoolUsed = true;
1102      switch (ICI->getPredicate()) {
1103      default: llvm_unreachable("Unknown ICmp Predicate!");
1104      case ICmpInst::ICMP_ULT:
1105      case ICmpInst::ICMP_SLT:   // X < null -> always false
1106        LV = ConstantInt::getFalse(GV->getContext());
1107        break;
1108      case ICmpInst::ICMP_ULE:
1109      case ICmpInst::ICMP_SLE:
1110      case ICmpInst::ICMP_EQ:
1111        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
1112        break;
1113      case ICmpInst::ICMP_NE:
1114      case ICmpInst::ICMP_UGE:
1115      case ICmpInst::ICMP_SGE:
1116      case ICmpInst::ICMP_UGT:
1117      case ICmpInst::ICMP_SGT:
1118        break;  // no change.
1119      }
1120      ICI->replaceAllUsesWith(LV);
1121      ICI->eraseFromParent();
1122    }
1123    LI->eraseFromParent();
1124  }
1125
1126  // If the initialization boolean was used, insert it, otherwise delete it.
1127  if (!InitBoolUsed) {
1128    while (!InitBool->use_empty())  // Delete initializations
1129      cast<StoreInst>(InitBool->use_back())->eraseFromParent();
1130    delete InitBool;
1131  } else
1132    GV->getParent()->getGlobalList().insert(GV, InitBool);
1133
1134  // Now the GV is dead, nuke it and the malloc..
1135  GV->eraseFromParent();
1136  CI->eraseFromParent();
1137
1138  // To further other optimizations, loop over all users of NewGV and try to
1139  // constant prop them.  This will promote GEP instructions with constant
1140  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
1141  ConstantPropUsersOf(NewGV, TD, TLI);
1142  if (RepValue != NewGV)
1143    ConstantPropUsersOf(RepValue, TD, TLI);
1144
1145  return NewGV;
1146}
1147
1148/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
1149/// to make sure that there are no complex uses of V.  We permit simple things
1150/// like dereferencing the pointer, but not storing through the address, unless
1151/// it is to the specified global.
1152static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
1153                                                      const GlobalVariable *GV,
1154                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
1155  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
1156       UI != E; ++UI) {
1157    const Instruction *Inst = cast<Instruction>(*UI);
1158
1159    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
1160      continue; // Fine, ignore.
1161    }
1162
1163    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1164      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
1165        return false;  // Storing the pointer itself... bad.
1166      continue; // Otherwise, storing through it, or storing into GV... fine.
1167    }
1168
1169    // Must index into the array and into the struct.
1170    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
1171      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
1172        return false;
1173      continue;
1174    }
1175
1176    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
1177      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
1178      // cycles.
1179      if (PHIs.insert(PN))
1180        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
1181          return false;
1182      continue;
1183    }
1184
1185    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
1186      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
1187        return false;
1188      continue;
1189    }
1190
1191    return false;
1192  }
1193  return true;
1194}
1195
1196/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
1197/// somewhere.  Transform all uses of the allocation into loads from the
1198/// global and uses of the resultant pointer.  Further, delete the store into
1199/// GV.  This assumes that these value pass the
1200/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1201static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1202                                          GlobalVariable *GV) {
1203  while (!Alloc->use_empty()) {
1204    Instruction *U = cast<Instruction>(*Alloc->use_begin());
1205    Instruction *InsertPt = U;
1206    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1207      // If this is the store of the allocation into the global, remove it.
1208      if (SI->getOperand(1) == GV) {
1209        SI->eraseFromParent();
1210        continue;
1211      }
1212    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1213      // Insert the load in the corresponding predecessor, not right before the
1214      // PHI.
1215      InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
1216    } else if (isa<BitCastInst>(U)) {
1217      // Must be bitcast between the malloc and store to initialize the global.
1218      ReplaceUsesOfMallocWithGlobal(U, GV);
1219      U->eraseFromParent();
1220      continue;
1221    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1222      // If this is a "GEP bitcast" and the user is a store to the global, then
1223      // just process it as a bitcast.
1224      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1225        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
1226          if (SI->getOperand(1) == GV) {
1227            // Must be bitcast GEP between the malloc and store to initialize
1228            // the global.
1229            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1230            GEPI->eraseFromParent();
1231            continue;
1232          }
1233    }
1234
1235    // Insert a load from the global, and use it instead of the malloc.
1236    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
1237    U->replaceUsesOfWith(Alloc, NL);
1238  }
1239}
1240
1241/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
1242/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
1243/// that index through the array and struct field, icmps of null, and PHIs.
1244static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1245                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
1246                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
1247  // We permit two users of the load: setcc comparing against the null
1248  // pointer, and a getelementptr of a specific form.
1249  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
1250       ++UI) {
1251    const Instruction *User = cast<Instruction>(*UI);
1252
1253    // Comparison against null is ok.
1254    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
1255      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1256        return false;
1257      continue;
1258    }
1259
1260    // getelementptr is also ok, but only a simple form.
1261    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1262      // Must index into the array and into the struct.
1263      if (GEPI->getNumOperands() < 3)
1264        return false;
1265
1266      // Otherwise the GEP is ok.
1267      continue;
1268    }
1269
1270    if (const PHINode *PN = dyn_cast<PHINode>(User)) {
1271      if (!LoadUsingPHIsPerLoad.insert(PN))
1272        // This means some phi nodes are dependent on each other.
1273        // Avoid infinite looping!
1274        return false;
1275      if (!LoadUsingPHIs.insert(PN))
1276        // If we have already analyzed this PHI, then it is safe.
1277        continue;
1278
1279      // Make sure all uses of the PHI are simple enough to transform.
1280      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1281                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1282        return false;
1283
1284      continue;
1285    }
1286
1287    // Otherwise we don't know what this is, not ok.
1288    return false;
1289  }
1290
1291  return true;
1292}
1293
1294
1295/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
1296/// GV are simple enough to perform HeapSRA, return true.
1297static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1298                                                    Instruction *StoredVal) {
1299  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1300  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1301  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
1302       UI != E; ++UI)
1303    if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
1304      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1305                                          LoadUsingPHIsPerLoad))
1306        return false;
1307      LoadUsingPHIsPerLoad.clear();
1308    }
1309
1310  // If we reach here, we know that all uses of the loads and transitive uses
1311  // (through PHI nodes) are simple enough to transform.  However, we don't know
1312  // that all inputs the to the PHI nodes are in the same equivalence sets.
1313  // Check to verify that all operands of the PHIs are either PHIS that can be
1314  // transformed, loads from GV, or MI itself.
1315  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
1316       , E = LoadUsingPHIs.end(); I != E; ++I) {
1317    const PHINode *PN = *I;
1318    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1319      Value *InVal = PN->getIncomingValue(op);
1320
1321      // PHI of the stored value itself is ok.
1322      if (InVal == StoredVal) continue;
1323
1324      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1325        // One of the PHIs in our set is (optimistically) ok.
1326        if (LoadUsingPHIs.count(InPN))
1327          continue;
1328        return false;
1329      }
1330
1331      // Load from GV is ok.
1332      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1333        if (LI->getOperand(0) == GV)
1334          continue;
1335
1336      // UNDEF? NULL?
1337
1338      // Anything else is rejected.
1339      return false;
1340    }
1341  }
1342
1343  return true;
1344}
1345
1346static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1347               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1348                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1349  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
1350
1351  if (FieldNo >= FieldVals.size())
1352    FieldVals.resize(FieldNo+1);
1353
1354  // If we already have this value, just reuse the previously scalarized
1355  // version.
1356  if (Value *FieldVal = FieldVals[FieldNo])
1357    return FieldVal;
1358
1359  // Depending on what instruction this is, we have several cases.
1360  Value *Result;
1361  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1362    // This is a scalarized version of the load from the global.  Just create
1363    // a new Load of the scalarized global.
1364    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
1365                                           InsertedScalarizedValues,
1366                                           PHIsToRewrite),
1367                          LI->getName()+".f"+Twine(FieldNo), LI);
1368  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
1369    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1370    // field.
1371    StructType *ST =
1372      cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
1373
1374    PHINode *NewPN =
1375     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
1376                     PN->getNumIncomingValues(),
1377                     PN->getName()+".f"+Twine(FieldNo), PN);
1378    Result = NewPN;
1379    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1380  } else {
1381    llvm_unreachable("Unknown usable value");
1382  }
1383
1384  return FieldVals[FieldNo] = Result;
1385}
1386
1387/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
1388/// the load, rewrite the derived value to use the HeapSRoA'd load.
1389static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1390             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1391                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1392  // If this is a comparison against null, handle it.
1393  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1394    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1395    // If we have a setcc of the loaded pointer, we can use a setcc of any
1396    // field.
1397    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1398                                   InsertedScalarizedValues, PHIsToRewrite);
1399
1400    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1401                              Constant::getNullValue(NPtr->getType()),
1402                              SCI->getName());
1403    SCI->replaceAllUsesWith(New);
1404    SCI->eraseFromParent();
1405    return;
1406  }
1407
1408  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1409  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1410    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1411           && "Unexpected GEPI!");
1412
1413    // Load the pointer for this field.
1414    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1415    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1416                                     InsertedScalarizedValues, PHIsToRewrite);
1417
1418    // Create the new GEP idx vector.
1419    SmallVector<Value*, 8> GEPIdx;
1420    GEPIdx.push_back(GEPI->getOperand(1));
1421    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1422
1423    Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
1424                                             GEPI->getName(), GEPI);
1425    GEPI->replaceAllUsesWith(NGEPI);
1426    GEPI->eraseFromParent();
1427    return;
1428  }
1429
1430  // Recursively transform the users of PHI nodes.  This will lazily create the
1431  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1432  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1433  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1434  // already been seen first by another load, so its uses have already been
1435  // processed.
1436  PHINode *PN = cast<PHINode>(LoadUser);
1437  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1438                                              std::vector<Value*>())).second)
1439    return;
1440
1441  // If this is the first time we've seen this PHI, recursively process all
1442  // users.
1443  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
1444    Instruction *User = cast<Instruction>(*UI++);
1445    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1446  }
1447}
1448
1449/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
1450/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
1451/// use FieldGlobals instead.  All uses of loaded values satisfy
1452/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1453static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1454               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1455                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1456  for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
1457       UI != E; ) {
1458    Instruction *User = cast<Instruction>(*UI++);
1459    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1460  }
1461
1462  if (Load->use_empty()) {
1463    Load->eraseFromParent();
1464    InsertedScalarizedValues.erase(Load);
1465  }
1466}
1467
1468/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
1469/// it up into multiple allocations of arrays of the fields.
1470static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1471                                            Value *NElems, TargetData *TD,
1472                                            const TargetLibraryInfo *TLI) {
1473  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
1474  Type *MAT = getMallocAllocatedType(CI, TLI);
1475  StructType *STy = cast<StructType>(MAT);
1476
1477  // There is guaranteed to be at least one use of the malloc (storing
1478  // it into GV).  If there are other uses, change them to be uses of
1479  // the global to simplify later code.  This also deletes the store
1480  // into GV.
1481  ReplaceUsesOfMallocWithGlobal(CI, GV);
1482
1483  // Okay, at this point, there are no users of the malloc.  Insert N
1484  // new mallocs at the same place as CI, and N globals.
1485  std::vector<Value*> FieldGlobals;
1486  std::vector<Value*> FieldMallocs;
1487
1488  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1489    Type *FieldTy = STy->getElementType(FieldNo);
1490    PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
1491
1492    GlobalVariable *NGV =
1493      new GlobalVariable(*GV->getParent(),
1494                         PFieldTy, false, GlobalValue::InternalLinkage,
1495                         Constant::getNullValue(PFieldTy),
1496                         GV->getName() + ".f" + Twine(FieldNo), GV,
1497                         GV->getThreadLocalMode());
1498    FieldGlobals.push_back(NGV);
1499
1500    unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
1501    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1502      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
1503    Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
1504    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1505                                        ConstantInt::get(IntPtrTy, TypeSize),
1506                                        NElems, 0,
1507                                        CI->getName() + ".f" + Twine(FieldNo));
1508    FieldMallocs.push_back(NMI);
1509    new StoreInst(NMI, NGV, CI);
1510  }
1511
1512  // The tricky aspect of this transformation is handling the case when malloc
1513  // fails.  In the original code, malloc failing would set the result pointer
1514  // of malloc to null.  In this case, some mallocs could succeed and others
1515  // could fail.  As such, we emit code that looks like this:
1516  //    F0 = malloc(field0)
1517  //    F1 = malloc(field1)
1518  //    F2 = malloc(field2)
1519  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1520  //      if (F0) { free(F0); F0 = 0; }
1521  //      if (F1) { free(F1); F1 = 0; }
1522  //      if (F2) { free(F2); F2 = 0; }
1523  //    }
1524  // The malloc can also fail if its argument is too large.
1525  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1526  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1527                                  ConstantZero, "isneg");
1528  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1529    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1530                             Constant::getNullValue(FieldMallocs[i]->getType()),
1531                               "isnull");
1532    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1533  }
1534
1535  // Split the basic block at the old malloc.
1536  BasicBlock *OrigBB = CI->getParent();
1537  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
1538
1539  // Create the block to check the first condition.  Put all these blocks at the
1540  // end of the function as they are unlikely to be executed.
1541  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1542                                                "malloc_ret_null",
1543                                                OrigBB->getParent());
1544
1545  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1546  // branch on RunningOr.
1547  OrigBB->getTerminator()->eraseFromParent();
1548  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1549
1550  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1551  // pointer, because some may be null while others are not.
1552  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1553    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
1554    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1555                              Constant::getNullValue(GVVal->getType()));
1556    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1557                                               OrigBB->getParent());
1558    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1559                                               OrigBB->getParent());
1560    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1561                                         Cmp, NullPtrBlock);
1562
1563    // Fill in FreeBlock.
1564    CallInst::CreateFree(GVVal, BI);
1565    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1566                  FreeBlock);
1567    BranchInst::Create(NextBlock, FreeBlock);
1568
1569    NullPtrBlock = NextBlock;
1570  }
1571
1572  BranchInst::Create(ContBB, NullPtrBlock);
1573
1574  // CI is no longer needed, remove it.
1575  CI->eraseFromParent();
1576
1577  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
1578  /// update all uses of the load, keep track of what scalarized loads are
1579  /// inserted for a given load.
1580  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
1581  InsertedScalarizedValues[GV] = FieldGlobals;
1582
1583  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
1584
1585  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1586  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1587  // of the per-field globals instead.
1588  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
1589    Instruction *User = cast<Instruction>(*UI++);
1590
1591    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1592      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1593      continue;
1594    }
1595
1596    // Must be a store of null.
1597    StoreInst *SI = cast<StoreInst>(User);
1598    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1599           "Unexpected heap-sra user!");
1600
1601    // Insert a store of null into each global.
1602    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1603      PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
1604      Constant *Null = Constant::getNullValue(PT->getElementType());
1605      new StoreInst(Null, FieldGlobals[i], SI);
1606    }
1607    // Erase the original store.
1608    SI->eraseFromParent();
1609  }
1610
1611  // While we have PHIs that are interesting to rewrite, do it.
1612  while (!PHIsToRewrite.empty()) {
1613    PHINode *PN = PHIsToRewrite.back().first;
1614    unsigned FieldNo = PHIsToRewrite.back().second;
1615    PHIsToRewrite.pop_back();
1616    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1617    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1618
1619    // Add all the incoming values.  This can materialize more phis.
1620    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1621      Value *InVal = PN->getIncomingValue(i);
1622      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1623                               PHIsToRewrite);
1624      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1625    }
1626  }
1627
1628  // Drop all inter-phi links and any loads that made it this far.
1629  for (DenseMap<Value*, std::vector<Value*> >::iterator
1630       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1631       I != E; ++I) {
1632    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1633      PN->dropAllReferences();
1634    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1635      LI->dropAllReferences();
1636  }
1637
1638  // Delete all the phis and loads now that inter-references are dead.
1639  for (DenseMap<Value*, std::vector<Value*> >::iterator
1640       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1641       I != E; ++I) {
1642    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1643      PN->eraseFromParent();
1644    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1645      LI->eraseFromParent();
1646  }
1647
1648  // The old global is now dead, remove it.
1649  GV->eraseFromParent();
1650
1651  ++NumHeapSRA;
1652  return cast<GlobalVariable>(FieldGlobals[0]);
1653}
1654
1655/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
1656/// pointer global variable with a single value stored it that is a malloc or
1657/// cast of malloc.
1658static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
1659                                               CallInst *CI,
1660                                               Type *AllocTy,
1661                                               AtomicOrdering Ordering,
1662                                               Module::global_iterator &GVI,
1663                                               TargetData *TD,
1664                                               TargetLibraryInfo *TLI) {
1665  if (!TD)
1666    return false;
1667
1668  // If this is a malloc of an abstract type, don't touch it.
1669  if (!AllocTy->isSized())
1670    return false;
1671
1672  // We can't optimize this global unless all uses of it are *known* to be
1673  // of the malloc value, not of the null initializer value (consider a use
1674  // that compares the global's value against zero to see if the malloc has
1675  // been reached).  To do this, we check to see if all uses of the global
1676  // would trap if the global were null: this proves that they must all
1677  // happen after the malloc.
1678  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1679    return false;
1680
1681  // We can't optimize this if the malloc itself is used in a complex way,
1682  // for example, being stored into multiple globals.  This allows the
1683  // malloc to be stored into the specified global, loaded icmp'd, and
1684  // GEP'd.  These are all things we could transform to using the global
1685  // for.
1686  SmallPtrSet<const PHINode*, 8> PHIs;
1687  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1688    return false;
1689
1690  // If we have a global that is only initialized with a fixed size malloc,
1691  // transform the program to use global memory instead of malloc'd memory.
1692  // This eliminates dynamic allocation, avoids an indirection accessing the
1693  // data, and exposes the resultant global to further GlobalOpt.
1694  // We cannot optimize the malloc if we cannot determine malloc array size.
1695  Value *NElems = getMallocArraySize(CI, TD, TLI, true);
1696  if (!NElems)
1697    return false;
1698
1699  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1700    // Restrict this transformation to only working on small allocations
1701    // (2048 bytes currently), as we don't want to introduce a 16M global or
1702    // something.
1703    if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
1704      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
1705      return true;
1706    }
1707
1708  // If the allocation is an array of structures, consider transforming this
1709  // into multiple malloc'd arrays, one for each field.  This is basically
1710  // SRoA for malloc'd memory.
1711
1712  if (Ordering != NotAtomic)
1713    return false;
1714
1715  // If this is an allocation of a fixed size array of structs, analyze as a
1716  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1717  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1718    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1719      AllocTy = AT->getElementType();
1720
1721  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1722  if (!AllocSTy)
1723    return false;
1724
1725  // This the structure has an unreasonable number of fields, leave it
1726  // alone.
1727  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1728      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1729
1730    // If this is a fixed size array, transform the Malloc to be an alloc of
1731    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1732    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1733      Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
1734      unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
1735      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1736      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1737      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
1738                                                   AllocSize, NumElements,
1739                                                   0, CI->getName());
1740      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1741      CI->replaceAllUsesWith(Cast);
1742      CI->eraseFromParent();
1743      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1744        CI = cast<CallInst>(BCI->getOperand(0));
1745      else
1746        CI = cast<CallInst>(Malloc);
1747    }
1748
1749    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
1750                               TD, TLI);
1751    return true;
1752  }
1753
1754  return false;
1755}
1756
1757// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
1758// that only one value (besides its initializer) is ever stored to the global.
1759static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1760                                     AtomicOrdering Ordering,
1761                                     Module::global_iterator &GVI,
1762                                     TargetData *TD, TargetLibraryInfo *TLI) {
1763  // Ignore no-op GEPs and bitcasts.
1764  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1765
1766  // If we are dealing with a pointer global that is initialized to null and
1767  // only has one (non-null) value stored into it, then we can optimize any
1768  // users of the loaded value (often calls and loads) that would trap if the
1769  // value was null.
1770  if (GV->getInitializer()->getType()->isPointerTy() &&
1771      GV->getInitializer()->isNullValue()) {
1772    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1773      if (GV->getInitializer()->getType() != SOVC->getType())
1774        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1775
1776      // Optimize away any trapping uses of the loaded value.
1777      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
1778        return true;
1779    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
1780      Type *MallocType = getMallocAllocatedType(CI, TLI);
1781      if (MallocType &&
1782          TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
1783                                             TD, TLI))
1784        return true;
1785    }
1786  }
1787
1788  return false;
1789}
1790
1791/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
1792/// two values ever stored into GV are its initializer and OtherVal.  See if we
1793/// can shrink the global into a boolean and select between the two values
1794/// whenever it is used.  This exposes the values to other scalar optimizations.
1795static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1796  Type *GVElType = GV->getType()->getElementType();
1797
1798  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1799  // an FP value, pointer or vector, don't do this optimization because a select
1800  // between them is very expensive and unlikely to lead to later
1801  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1802  // where v1 and v2 both require constant pool loads, a big loss.
1803  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1804      GVElType->isFloatingPointTy() ||
1805      GVElType->isPointerTy() || GVElType->isVectorTy())
1806    return false;
1807
1808  // Walk the use list of the global seeing if all the uses are load or store.
1809  // If there is anything else, bail out.
1810  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
1811    User *U = *I;
1812    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1813      return false;
1814  }
1815
1816  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
1817
1818  // Create the new global, initializing it to false.
1819  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1820                                             false,
1821                                             GlobalValue::InternalLinkage,
1822                                        ConstantInt::getFalse(GV->getContext()),
1823                                             GV->getName()+".b",
1824                                             GV->getThreadLocalMode());
1825  GV->getParent()->getGlobalList().insert(GV, NewGV);
1826
1827  Constant *InitVal = GV->getInitializer();
1828  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1829         "No reason to shrink to bool!");
1830
1831  // If initialized to zero and storing one into the global, we can use a cast
1832  // instead of a select to synthesize the desired value.
1833  bool IsOneZero = false;
1834  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
1835    IsOneZero = InitVal->isNullValue() && CI->isOne();
1836
1837  while (!GV->use_empty()) {
1838    Instruction *UI = cast<Instruction>(GV->use_back());
1839    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1840      // Change the store into a boolean store.
1841      bool StoringOther = SI->getOperand(0) == OtherVal;
1842      // Only do this if we weren't storing a loaded value.
1843      Value *StoreVal;
1844      if (StoringOther || SI->getOperand(0) == InitVal)
1845        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1846                                    StoringOther);
1847      else {
1848        // Otherwise, we are storing a previously loaded copy.  To do this,
1849        // change the copy from copying the original value to just copying the
1850        // bool.
1851        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1852
1853        // If we've already replaced the input, StoredVal will be a cast or
1854        // select instruction.  If not, it will be a load of the original
1855        // global.
1856        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1857          assert(LI->getOperand(0) == GV && "Not a copy!");
1858          // Insert a new load, to preserve the saved value.
1859          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1860                                  LI->getOrdering(), LI->getSynchScope(), LI);
1861        } else {
1862          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1863                 "This is not a form that we understand!");
1864          StoreVal = StoredVal->getOperand(0);
1865          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1866        }
1867      }
1868      new StoreInst(StoreVal, NewGV, false, 0,
1869                    SI->getOrdering(), SI->getSynchScope(), SI);
1870    } else {
1871      // Change the load into a load of bool then a select.
1872      LoadInst *LI = cast<LoadInst>(UI);
1873      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1874                                   LI->getOrdering(), LI->getSynchScope(), LI);
1875      Value *NSI;
1876      if (IsOneZero)
1877        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1878      else
1879        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1880      NSI->takeName(LI);
1881      LI->replaceAllUsesWith(NSI);
1882    }
1883    UI->eraseFromParent();
1884  }
1885
1886  GV->eraseFromParent();
1887  return true;
1888}
1889
1890
1891/// ProcessGlobal - Analyze the specified global variable and optimize it if
1892/// possible.  If we make a change, return true.
1893bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
1894                              Module::global_iterator &GVI) {
1895  if (!GV->isDiscardableIfUnused())
1896    return false;
1897
1898  // Do more involved optimizations if the global is internal.
1899  GV->removeDeadConstantUsers();
1900
1901  if (GV->use_empty()) {
1902    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
1903    GV->eraseFromParent();
1904    ++NumDeleted;
1905    return true;
1906  }
1907
1908  if (!GV->hasLocalLinkage())
1909    return false;
1910
1911  SmallPtrSet<const PHINode*, 16> PHIUsers;
1912  GlobalStatus GS;
1913
1914  if (AnalyzeGlobal(GV, GS, PHIUsers))
1915    return false;
1916
1917  if (!GS.isCompared && !GV->hasUnnamedAddr()) {
1918    GV->setUnnamedAddr(true);
1919    NumUnnamed++;
1920  }
1921
1922  if (GV->isConstant() || !GV->hasInitializer())
1923    return false;
1924
1925  return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
1926}
1927
1928/// ProcessInternalGlobal - Analyze the specified global variable and optimize
1929/// it if possible.  If we make a change, return true.
1930bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
1931                                      Module::global_iterator &GVI,
1932                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
1933                                      const GlobalStatus &GS) {
1934  // If this is a first class global and has only one accessing function
1935  // and this function is main (which we know is not recursive we can make
1936  // this global a local variable) we replace the global with a local alloca
1937  // in this function.
1938  //
1939  // NOTE: It doesn't make sense to promote non single-value types since we
1940  // are just replacing static memory to stack memory.
1941  //
1942  // If the global is in different address space, don't bring it to stack.
1943  if (!GS.HasMultipleAccessingFunctions &&
1944      GS.AccessingFunction && !GS.HasNonInstructionUser &&
1945      GV->getType()->getElementType()->isSingleValueType() &&
1946      GS.AccessingFunction->getName() == "main" &&
1947      GS.AccessingFunction->hasExternalLinkage() &&
1948      GV->getType()->getAddressSpace() == 0) {
1949    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
1950    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1951                                                   ->getEntryBlock().begin());
1952    Type *ElemTy = GV->getType()->getElementType();
1953    // FIXME: Pass Global's alignment when globals have alignment
1954    AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
1955    if (!isa<UndefValue>(GV->getInitializer()))
1956      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1957
1958    GV->replaceAllUsesWith(Alloca);
1959    GV->eraseFromParent();
1960    ++NumLocalized;
1961    return true;
1962  }
1963
1964  // If the global is never loaded (but may be stored to), it is dead.
1965  // Delete it now.
1966  if (!GS.isLoaded) {
1967    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
1968
1969    bool Changed;
1970    if (isLeakCheckerRoot(GV)) {
1971      // Delete any constant stores to the global.
1972      Changed = CleanupPointerRootUsers(GV, TLI);
1973    } else {
1974      // Delete any stores we can find to the global.  We may not be able to
1975      // make it completely dead though.
1976      Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
1977    }
1978
1979    // If the global is dead now, delete it.
1980    if (GV->use_empty()) {
1981      GV->eraseFromParent();
1982      ++NumDeleted;
1983      Changed = true;
1984    }
1985    return Changed;
1986
1987  } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
1988    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
1989    GV->setConstant(true);
1990
1991    // Clean up any obviously simplifiable users now.
1992    CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
1993
1994    // If the global is dead now, just nuke it.
1995    if (GV->use_empty()) {
1996      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1997            << "all users and delete global!\n");
1998      GV->eraseFromParent();
1999      ++NumDeleted;
2000    }
2001
2002    ++NumMarked;
2003    return true;
2004  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
2005    if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
2006      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
2007        GVI = FirstNewGV;  // Don't skip the newly produced globals!
2008        return true;
2009      }
2010  } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
2011    // If the initial value for the global was an undef value, and if only
2012    // one other value was stored into it, we can just change the
2013    // initializer to be the stored value, then delete all stores to the
2014    // global.  This allows us to mark it constant.
2015    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
2016      if (isa<UndefValue>(GV->getInitializer())) {
2017        // Change the initial value here.
2018        GV->setInitializer(SOVConstant);
2019
2020        // Clean up any obviously simplifiable users now.
2021        CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
2022
2023        if (GV->use_empty()) {
2024          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
2025                       << "simplify all users and delete global!\n");
2026          GV->eraseFromParent();
2027          ++NumDeleted;
2028        } else {
2029          GVI = GV;
2030        }
2031        ++NumSubstitute;
2032        return true;
2033      }
2034
2035    // Try to optimize globals based on the knowledge that only one value
2036    // (besides its initializer) is ever stored to the global.
2037    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
2038                                 TD, TLI))
2039      return true;
2040
2041    // Otherwise, if the global was not a boolean, we can shrink it to be a
2042    // boolean.
2043    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
2044      if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
2045        ++NumShrunkToBool;
2046        return true;
2047      }
2048  }
2049
2050  return false;
2051}
2052
2053/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
2054/// function, changing them to FastCC.
2055static void ChangeCalleesToFastCall(Function *F) {
2056  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
2057    if (isa<BlockAddress>(*UI))
2058      continue;
2059    CallSite User(cast<Instruction>(*UI));
2060    User.setCallingConv(CallingConv::Fast);
2061  }
2062}
2063
2064static AttrListPtr StripNest(const AttrListPtr &Attrs) {
2065  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
2066    if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
2067      continue;
2068
2069    // There can be only one.
2070    return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
2071  }
2072
2073  return Attrs;
2074}
2075
2076static void RemoveNestAttribute(Function *F) {
2077  F->setAttributes(StripNest(F->getAttributes()));
2078  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
2079    if (isa<BlockAddress>(*UI))
2080      continue;
2081    CallSite User(cast<Instruction>(*UI));
2082    User.setAttributes(StripNest(User.getAttributes()));
2083  }
2084}
2085
2086bool GlobalOpt::OptimizeFunctions(Module &M) {
2087  bool Changed = false;
2088  // Optimize functions.
2089  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
2090    Function *F = FI++;
2091    // Functions without names cannot be referenced outside this module.
2092    if (!F->hasName() && !F->isDeclaration())
2093      F->setLinkage(GlobalValue::InternalLinkage);
2094    F->removeDeadConstantUsers();
2095    if (F->isDefTriviallyDead()) {
2096      F->eraseFromParent();
2097      Changed = true;
2098      ++NumFnDeleted;
2099    } else if (F->hasLocalLinkage()) {
2100      if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
2101          !F->hasAddressTaken()) {
2102        // If this function has C calling conventions, is not a varargs
2103        // function, and is only called directly, promote it to use the Fast
2104        // calling convention.
2105        F->setCallingConv(CallingConv::Fast);
2106        ChangeCalleesToFastCall(F);
2107        ++NumFastCallFns;
2108        Changed = true;
2109      }
2110
2111      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2112          !F->hasAddressTaken()) {
2113        // The function is not used by a trampoline intrinsic, so it is safe
2114        // to remove the 'nest' attribute.
2115        RemoveNestAttribute(F);
2116        ++NumNestRemoved;
2117        Changed = true;
2118      }
2119    }
2120  }
2121  return Changed;
2122}
2123
2124bool GlobalOpt::OptimizeGlobalVars(Module &M) {
2125  bool Changed = false;
2126  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2127       GVI != E; ) {
2128    GlobalVariable *GV = GVI++;
2129    // Global variables without names cannot be referenced outside this module.
2130    if (!GV->hasName() && !GV->isDeclaration())
2131      GV->setLinkage(GlobalValue::InternalLinkage);
2132    // Simplify the initializer.
2133    if (GV->hasInitializer())
2134      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
2135        Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
2136        if (New && New != CE)
2137          GV->setInitializer(New);
2138      }
2139
2140    Changed |= ProcessGlobal(GV, GVI);
2141  }
2142  return Changed;
2143}
2144
2145/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
2146/// initializers have an init priority of 65535.
2147GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
2148  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
2149  if (GV == 0) return 0;
2150
2151  // Verify that the initializer is simple enough for us to handle. We are
2152  // only allowed to optimize the initializer if it is unique.
2153  if (!GV->hasUniqueInitializer()) return 0;
2154
2155  if (isa<ConstantAggregateZero>(GV->getInitializer()))
2156    return GV;
2157  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
2158
2159  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
2160    if (isa<ConstantAggregateZero>(*i))
2161      continue;
2162    ConstantStruct *CS = cast<ConstantStruct>(*i);
2163    if (isa<ConstantPointerNull>(CS->getOperand(1)))
2164      continue;
2165
2166    // Must have a function or null ptr.
2167    if (!isa<Function>(CS->getOperand(1)))
2168      return 0;
2169
2170    // Init priority must be standard.
2171    ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
2172    if (CI->getZExtValue() != 65535)
2173      return 0;
2174  }
2175
2176  return GV;
2177}
2178
2179/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
2180/// return a list of the functions and null terminator as a vector.
2181static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
2182  if (GV->getInitializer()->isNullValue())
2183    return std::vector<Function*>();
2184  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
2185  std::vector<Function*> Result;
2186  Result.reserve(CA->getNumOperands());
2187  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
2188    ConstantStruct *CS = cast<ConstantStruct>(*i);
2189    Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
2190  }
2191  return Result;
2192}
2193
2194/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
2195/// specified array, returning the new global to use.
2196static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
2197                                          const std::vector<Function*> &Ctors) {
2198  // If we made a change, reassemble the initializer list.
2199  Constant *CSVals[2];
2200  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
2201  CSVals[1] = 0;
2202
2203  StructType *StructTy =
2204    cast <StructType>(
2205    cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
2206
2207  // Create the new init list.
2208  std::vector<Constant*> CAList;
2209  for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
2210    if (Ctors[i]) {
2211      CSVals[1] = Ctors[i];
2212    } else {
2213      Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
2214                                          false);
2215      PointerType *PFTy = PointerType::getUnqual(FTy);
2216      CSVals[1] = Constant::getNullValue(PFTy);
2217      CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
2218                                   0x7fffffff);
2219    }
2220    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
2221  }
2222
2223  // Create the array initializer.
2224  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
2225                                                   CAList.size()), CAList);
2226
2227  // If we didn't change the number of elements, don't create a new GV.
2228  if (CA->getType() == GCL->getInitializer()->getType()) {
2229    GCL->setInitializer(CA);
2230    return GCL;
2231  }
2232
2233  // Create the new global and insert it next to the existing list.
2234  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
2235                                           GCL->getLinkage(), CA, "",
2236                                           GCL->getThreadLocalMode());
2237  GCL->getParent()->getGlobalList().insert(GCL, NGV);
2238  NGV->takeName(GCL);
2239
2240  // Nuke the old list, replacing any uses with the new one.
2241  if (!GCL->use_empty()) {
2242    Constant *V = NGV;
2243    if (V->getType() != GCL->getType())
2244      V = ConstantExpr::getBitCast(V, GCL->getType());
2245    GCL->replaceAllUsesWith(V);
2246  }
2247  GCL->eraseFromParent();
2248
2249  if (Ctors.size())
2250    return NGV;
2251  else
2252    return 0;
2253}
2254
2255
2256static inline bool
2257isSimpleEnoughValueToCommit(Constant *C,
2258                            SmallPtrSet<Constant*, 8> &SimpleConstants,
2259                            const TargetData *TD);
2260
2261
2262/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
2263/// handled by the code generator.  We don't want to generate something like:
2264///   void *X = &X/42;
2265/// because the code generator doesn't have a relocation that can handle that.
2266///
2267/// This function should be called if C was not found (but just got inserted)
2268/// in SimpleConstants to avoid having to rescan the same constants all the
2269/// time.
2270static bool isSimpleEnoughValueToCommitHelper(Constant *C,
2271                                   SmallPtrSet<Constant*, 8> &SimpleConstants,
2272                                   const TargetData *TD) {
2273  // Simple integer, undef, constant aggregate zero, global addresses, etc are
2274  // all supported.
2275  if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
2276      isa<GlobalValue>(C))
2277    return true;
2278
2279  // Aggregate values are safe if all their elements are.
2280  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
2281      isa<ConstantVector>(C)) {
2282    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
2283      Constant *Op = cast<Constant>(C->getOperand(i));
2284      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
2285        return false;
2286    }
2287    return true;
2288  }
2289
2290  // We don't know exactly what relocations are allowed in constant expressions,
2291  // so we allow &global+constantoffset, which is safe and uniformly supported
2292  // across targets.
2293  ConstantExpr *CE = cast<ConstantExpr>(C);
2294  switch (CE->getOpcode()) {
2295  case Instruction::BitCast:
2296    // Bitcast is fine if the casted value is fine.
2297    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2298
2299  case Instruction::IntToPtr:
2300  case Instruction::PtrToInt:
2301    // int <=> ptr is fine if the int type is the same size as the
2302    // pointer type.
2303    if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
2304               TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
2305      return false;
2306    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2307
2308  // GEP is fine if it is simple + constant offset.
2309  case Instruction::GetElementPtr:
2310    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
2311      if (!isa<ConstantInt>(CE->getOperand(i)))
2312        return false;
2313    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2314
2315  case Instruction::Add:
2316    // We allow simple+cst.
2317    if (!isa<ConstantInt>(CE->getOperand(1)))
2318      return false;
2319    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2320  }
2321  return false;
2322}
2323
2324static inline bool
2325isSimpleEnoughValueToCommit(Constant *C,
2326                            SmallPtrSet<Constant*, 8> &SimpleConstants,
2327                            const TargetData *TD) {
2328  // If we already checked this constant, we win.
2329  if (!SimpleConstants.insert(C)) return true;
2330  // Check the constant.
2331  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
2332}
2333
2334
2335/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
2336/// enough for us to understand.  In particular, if it is a cast to anything
2337/// other than from one pointer type to another pointer type, we punt.
2338/// We basically just support direct accesses to globals and GEP's of
2339/// globals.  This should be kept up to date with CommitValueTo.
2340static bool isSimpleEnoughPointerToCommit(Constant *C) {
2341  // Conservatively, avoid aggregate types. This is because we don't
2342  // want to worry about them partially overlapping other stores.
2343  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
2344    return false;
2345
2346  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
2347    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2348    // external globals.
2349    return GV->hasUniqueInitializer();
2350
2351  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
2352    // Handle a constantexpr gep.
2353    if (CE->getOpcode() == Instruction::GetElementPtr &&
2354        isa<GlobalVariable>(CE->getOperand(0)) &&
2355        cast<GEPOperator>(CE)->isInBounds()) {
2356      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2357      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2358      // external globals.
2359      if (!GV->hasUniqueInitializer())
2360        return false;
2361
2362      // The first index must be zero.
2363      ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
2364      if (!CI || !CI->isZero()) return false;
2365
2366      // The remaining indices must be compile-time known integers within the
2367      // notional bounds of the corresponding static array types.
2368      if (!CE->isGEPWithNoNotionalOverIndexing())
2369        return false;
2370
2371      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2372
2373    // A constantexpr bitcast from a pointer to another pointer is a no-op,
2374    // and we know how to evaluate it by moving the bitcast from the pointer
2375    // operand to the value operand.
2376    } else if (CE->getOpcode() == Instruction::BitCast &&
2377               isa<GlobalVariable>(CE->getOperand(0))) {
2378      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2379      // external globals.
2380      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
2381    }
2382  }
2383
2384  return false;
2385}
2386
2387/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
2388/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
2389/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
2390static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2391                                   ConstantExpr *Addr, unsigned OpNo) {
2392  // Base case of the recursion.
2393  if (OpNo == Addr->getNumOperands()) {
2394    assert(Val->getType() == Init->getType() && "Type mismatch!");
2395    return Val;
2396  }
2397
2398  SmallVector<Constant*, 32> Elts;
2399  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2400    // Break up the constant into its elements.
2401    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2402      Elts.push_back(Init->getAggregateElement(i));
2403
2404    // Replace the element that we are supposed to.
2405    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2406    unsigned Idx = CU->getZExtValue();
2407    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2408    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2409
2410    // Return the modified struct.
2411    return ConstantStruct::get(STy, Elts);
2412  }
2413
2414  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2415  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2416
2417  uint64_t NumElts;
2418  if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
2419    NumElts = ATy->getNumElements();
2420  else
2421    NumElts = InitTy->getVectorNumElements();
2422
2423  // Break up the array into elements.
2424  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2425    Elts.push_back(Init->getAggregateElement(i));
2426
2427  assert(CI->getZExtValue() < NumElts);
2428  Elts[CI->getZExtValue()] =
2429    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2430
2431  if (Init->getType()->isArrayTy())
2432    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2433  return ConstantVector::get(Elts);
2434}
2435
2436/// CommitValueTo - We have decided that Addr (which satisfies the predicate
2437/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2438static void CommitValueTo(Constant *Val, Constant *Addr) {
2439  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2440    assert(GV->hasInitializer());
2441    GV->setInitializer(Val);
2442    return;
2443  }
2444
2445  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2446  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2447  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2448}
2449
2450namespace {
2451
2452/// Evaluator - This class evaluates LLVM IR, producing the Constant
2453/// representing each SSA instruction.  Changes to global variables are stored
2454/// in a mapping that can be iterated over after the evaluation is complete.
2455/// Once an evaluation call fails, the evaluation object should not be reused.
2456class Evaluator {
2457public:
2458  Evaluator(const TargetData *TD, const TargetLibraryInfo *TLI)
2459    : TD(TD), TLI(TLI) {
2460    ValueStack.push_back(new DenseMap<Value*, Constant*>);
2461  }
2462
2463  ~Evaluator() {
2464    DeleteContainerPointers(ValueStack);
2465    while (!AllocaTmps.empty()) {
2466      GlobalVariable *Tmp = AllocaTmps.back();
2467      AllocaTmps.pop_back();
2468
2469      // If there are still users of the alloca, the program is doing something
2470      // silly, e.g. storing the address of the alloca somewhere and using it
2471      // later.  Since this is undefined, we'll just make it be null.
2472      if (!Tmp->use_empty())
2473        Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
2474      delete Tmp;
2475    }
2476  }
2477
2478  /// EvaluateFunction - Evaluate a call to function F, returning true if
2479  /// successful, false if we can't evaluate it.  ActualArgs contains the formal
2480  /// arguments for the function.
2481  bool EvaluateFunction(Function *F, Constant *&RetVal,
2482                        const SmallVectorImpl<Constant*> &ActualArgs);
2483
2484  /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2485  /// successful, false if we can't evaluate it.  NewBB returns the next BB that
2486  /// control flows into, or null upon return.
2487  bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
2488
2489  Constant *getVal(Value *V) {
2490    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
2491    Constant *R = ValueStack.back()->lookup(V);
2492    assert(R && "Reference to an uncomputed value!");
2493    return R;
2494  }
2495
2496  void setVal(Value *V, Constant *C) {
2497    ValueStack.back()->operator[](V) = C;
2498  }
2499
2500  const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
2501    return MutatedMemory;
2502  }
2503
2504  const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
2505    return Invariants;
2506  }
2507
2508private:
2509  Constant *ComputeLoadResult(Constant *P);
2510
2511  /// ValueStack - As we compute SSA register values, we store their contents
2512  /// here. The back of the vector contains the current function and the stack
2513  /// contains the values in the calling frames.
2514  SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
2515
2516  /// CallStack - This is used to detect recursion.  In pathological situations
2517  /// we could hit exponential behavior, but at least there is nothing
2518  /// unbounded.
2519  SmallVector<Function*, 4> CallStack;
2520
2521  /// MutatedMemory - For each store we execute, we update this map.  Loads
2522  /// check this to get the most up-to-date value.  If evaluation is successful,
2523  /// this state is committed to the process.
2524  DenseMap<Constant*, Constant*> MutatedMemory;
2525
2526  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
2527  /// to represent its body.  This vector is needed so we can delete the
2528  /// temporary globals when we are done.
2529  SmallVector<GlobalVariable*, 32> AllocaTmps;
2530
2531  /// Invariants - These global variables have been marked invariant by the
2532  /// static constructor.
2533  SmallPtrSet<GlobalVariable*, 8> Invariants;
2534
2535  /// SimpleConstants - These are constants we have checked and know to be
2536  /// simple enough to live in a static initializer of a global.
2537  SmallPtrSet<Constant*, 8> SimpleConstants;
2538
2539  const TargetData *TD;
2540  const TargetLibraryInfo *TLI;
2541};
2542
2543}  // anonymous namespace
2544
2545/// ComputeLoadResult - Return the value that would be computed by a load from
2546/// P after the stores reflected by 'memory' have been performed.  If we can't
2547/// decide, return null.
2548Constant *Evaluator::ComputeLoadResult(Constant *P) {
2549  // If this memory location has been recently stored, use the stored value: it
2550  // is the most up-to-date.
2551  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
2552  if (I != MutatedMemory.end()) return I->second;
2553
2554  // Access it.
2555  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
2556    if (GV->hasDefinitiveInitializer())
2557      return GV->getInitializer();
2558    return 0;
2559  }
2560
2561  // Handle a constantexpr getelementptr.
2562  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
2563    if (CE->getOpcode() == Instruction::GetElementPtr &&
2564        isa<GlobalVariable>(CE->getOperand(0))) {
2565      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2566      if (GV->hasDefinitiveInitializer())
2567        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2568    }
2569
2570  return 0;  // don't know how to evaluate.
2571}
2572
2573/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2574/// successful, false if we can't evaluate it.  NewBB returns the next BB that
2575/// control flows into, or null upon return.
2576bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
2577                              BasicBlock *&NextBB) {
2578  // This is the main evaluation loop.
2579  while (1) {
2580    Constant *InstResult = 0;
2581
2582    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
2583      if (!SI->isSimple()) return false;  // no volatile/atomic accesses.
2584      Constant *Ptr = getVal(SI->getOperand(1));
2585      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2586        Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
2587      if (!isSimpleEnoughPointerToCommit(Ptr))
2588        // If this is too complex for us to commit, reject it.
2589        return false;
2590
2591      Constant *Val = getVal(SI->getOperand(0));
2592
2593      // If this might be too difficult for the backend to handle (e.g. the addr
2594      // of one global variable divided by another) then we can't commit it.
2595      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
2596        return false;
2597
2598      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2599        if (CE->getOpcode() == Instruction::BitCast) {
2600          // If we're evaluating a store through a bitcast, then we need
2601          // to pull the bitcast off the pointer type and push it onto the
2602          // stored value.
2603          Ptr = CE->getOperand(0);
2604
2605          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
2606
2607          // In order to push the bitcast onto the stored value, a bitcast
2608          // from NewTy to Val's type must be legal.  If it's not, we can try
2609          // introspecting NewTy to find a legal conversion.
2610          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
2611            // If NewTy is a struct, we can convert the pointer to the struct
2612            // into a pointer to its first member.
2613            // FIXME: This could be extended to support arrays as well.
2614            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
2615              NewTy = STy->getTypeAtIndex(0U);
2616
2617              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
2618              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
2619              Constant * const IdxList[] = {IdxZero, IdxZero};
2620
2621              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
2622              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2623                Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
2624
2625            // If we can't improve the situation by introspecting NewTy,
2626            // we have to give up.
2627            } else {
2628              return false;
2629            }
2630          }
2631
2632          // If we found compatible types, go ahead and push the bitcast
2633          // onto the stored value.
2634          Val = ConstantExpr::getBitCast(Val, NewTy);
2635        }
2636
2637      MutatedMemory[Ptr] = Val;
2638    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
2639      InstResult = ConstantExpr::get(BO->getOpcode(),
2640                                     getVal(BO->getOperand(0)),
2641                                     getVal(BO->getOperand(1)));
2642    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
2643      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
2644                                            getVal(CI->getOperand(0)),
2645                                            getVal(CI->getOperand(1)));
2646    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
2647      InstResult = ConstantExpr::getCast(CI->getOpcode(),
2648                                         getVal(CI->getOperand(0)),
2649                                         CI->getType());
2650    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
2651      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
2652                                           getVal(SI->getOperand(1)),
2653                                           getVal(SI->getOperand(2)));
2654    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
2655      Constant *P = getVal(GEP->getOperand(0));
2656      SmallVector<Constant*, 8> GEPOps;
2657      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
2658           i != e; ++i)
2659        GEPOps.push_back(getVal(*i));
2660      InstResult =
2661        ConstantExpr::getGetElementPtr(P, GEPOps,
2662                                       cast<GEPOperator>(GEP)->isInBounds());
2663    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
2664      if (!LI->isSimple()) return false;  // no volatile/atomic accesses.
2665      Constant *Ptr = getVal(LI->getOperand(0));
2666      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2667        Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
2668      InstResult = ComputeLoadResult(Ptr);
2669      if (InstResult == 0) return false; // Could not evaluate load.
2670    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
2671      if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
2672      Type *Ty = AI->getType()->getElementType();
2673      AllocaTmps.push_back(new GlobalVariable(Ty, false,
2674                                              GlobalValue::InternalLinkage,
2675                                              UndefValue::get(Ty),
2676                                              AI->getName()));
2677      InstResult = AllocaTmps.back();
2678    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
2679      CallSite CS(CurInst);
2680
2681      // Debug info can safely be ignored here.
2682      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
2683        ++CurInst;
2684        continue;
2685      }
2686
2687      // Cannot handle inline asm.
2688      if (isa<InlineAsm>(CS.getCalledValue())) return false;
2689
2690      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
2691        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
2692          if (MSI->isVolatile()) return false;
2693          Constant *Ptr = getVal(MSI->getDest());
2694          Constant *Val = getVal(MSI->getValue());
2695          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
2696          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
2697            // This memset is a no-op.
2698            ++CurInst;
2699            continue;
2700          }
2701        }
2702
2703        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
2704            II->getIntrinsicID() == Intrinsic::lifetime_end) {
2705          ++CurInst;
2706          continue;
2707        }
2708
2709        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2710          // We don't insert an entry into Values, as it doesn't have a
2711          // meaningful return value.
2712          if (!II->use_empty())
2713            return false;
2714          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
2715          Value *PtrArg = getVal(II->getArgOperand(1));
2716          Value *Ptr = PtrArg->stripPointerCasts();
2717          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
2718            Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
2719            if (!Size->isAllOnesValue() &&
2720                Size->getValue().getLimitedValue() >=
2721                TD->getTypeStoreSize(ElemTy))
2722              Invariants.insert(GV);
2723          }
2724          // Continue even if we do nothing.
2725          ++CurInst;
2726          continue;
2727        }
2728        return false;
2729      }
2730
2731      // Resolve function pointers.
2732      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
2733      if (!Callee || Callee->mayBeOverridden())
2734        return false;  // Cannot resolve.
2735
2736      SmallVector<Constant*, 8> Formals;
2737      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
2738        Formals.push_back(getVal(*i));
2739
2740      if (Callee->isDeclaration()) {
2741        // If this is a function we can constant fold, do it.
2742        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
2743          InstResult = C;
2744        } else {
2745          return false;
2746        }
2747      } else {
2748        if (Callee->getFunctionType()->isVarArg())
2749          return false;
2750
2751        Constant *RetVal;
2752        // Execute the call, if successful, use the return value.
2753        ValueStack.push_back(new DenseMap<Value*, Constant*>);
2754        if (!EvaluateFunction(Callee, RetVal, Formals))
2755          return false;
2756        delete ValueStack.pop_back_val();
2757        InstResult = RetVal;
2758      }
2759    } else if (isa<TerminatorInst>(CurInst)) {
2760      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
2761        if (BI->isUnconditional()) {
2762          NextBB = BI->getSuccessor(0);
2763        } else {
2764          ConstantInt *Cond =
2765            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
2766          if (!Cond) return false;  // Cannot determine.
2767
2768          NextBB = BI->getSuccessor(!Cond->getZExtValue());
2769        }
2770      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
2771        ConstantInt *Val =
2772          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
2773        if (!Val) return false;  // Cannot determine.
2774        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
2775      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
2776        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
2777        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
2778          NextBB = BA->getBasicBlock();
2779        else
2780          return false;  // Cannot determine.
2781      } else if (isa<ReturnInst>(CurInst)) {
2782        NextBB = 0;
2783      } else {
2784        // invoke, unwind, resume, unreachable.
2785        return false;  // Cannot handle this terminator.
2786      }
2787
2788      // We succeeded at evaluating this block!
2789      return true;
2790    } else {
2791      // Did not know how to evaluate this!
2792      return false;
2793    }
2794
2795    if (!CurInst->use_empty()) {
2796      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
2797        InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
2798
2799      setVal(CurInst, InstResult);
2800    }
2801
2802    // If we just processed an invoke, we finished evaluating the block.
2803    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
2804      NextBB = II->getNormalDest();
2805      return true;
2806    }
2807
2808    // Advance program counter.
2809    ++CurInst;
2810  }
2811}
2812
2813/// EvaluateFunction - Evaluate a call to function F, returning true if
2814/// successful, false if we can't evaluate it.  ActualArgs contains the formal
2815/// arguments for the function.
2816bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
2817                                 const SmallVectorImpl<Constant*> &ActualArgs) {
2818  // Check to see if this function is already executing (recursion).  If so,
2819  // bail out.  TODO: we might want to accept limited recursion.
2820  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
2821    return false;
2822
2823  CallStack.push_back(F);
2824
2825  // Initialize arguments to the incoming values specified.
2826  unsigned ArgNo = 0;
2827  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
2828       ++AI, ++ArgNo)
2829    setVal(AI, ActualArgs[ArgNo]);
2830
2831  // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
2832  // we can only evaluate any one basic block at most once.  This set keeps
2833  // track of what we have executed so we can detect recursive cases etc.
2834  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
2835
2836  // CurBB - The current basic block we're evaluating.
2837  BasicBlock *CurBB = F->begin();
2838
2839  BasicBlock::iterator CurInst = CurBB->begin();
2840
2841  while (1) {
2842    BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
2843    if (!EvaluateBlock(CurInst, NextBB))
2844      return false;
2845
2846    if (NextBB == 0) {
2847      // Successfully running until there's no next block means that we found
2848      // the return.  Fill it the return value and pop the call stack.
2849      ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
2850      if (RI->getNumOperands())
2851        RetVal = getVal(RI->getOperand(0));
2852      CallStack.pop_back();
2853      return true;
2854    }
2855
2856    // Okay, we succeeded in evaluating this control flow.  See if we have
2857    // executed the new block before.  If so, we have a looping function,
2858    // which we cannot evaluate in reasonable time.
2859    if (!ExecutedBlocks.insert(NextBB))
2860      return false;  // looped!
2861
2862    // Okay, we have never been in this block before.  Check to see if there
2863    // are any PHI nodes.  If so, evaluate them with information about where
2864    // we came from.
2865    PHINode *PN = 0;
2866    for (CurInst = NextBB->begin();
2867         (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
2868      setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
2869
2870    // Advance to the next block.
2871    CurBB = NextBB;
2872  }
2873}
2874
2875/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
2876/// we can.  Return true if we can, false otherwise.
2877static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
2878                                      const TargetLibraryInfo *TLI) {
2879  // Call the function.
2880  Evaluator Eval(TD, TLI);
2881  Constant *RetValDummy;
2882  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2883                                           SmallVector<Constant*, 0>());
2884
2885  if (EvalSuccess) {
2886    // We succeeded at evaluation: commit the result.
2887    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2888          << F->getName() << "' to " << Eval.getMutatedMemory().size()
2889          << " stores.\n");
2890    for (DenseMap<Constant*, Constant*>::const_iterator I =
2891           Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
2892         I != E; ++I)
2893      CommitValueTo(I->second, I->first);
2894    for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
2895           Eval.getInvariants().begin(), E = Eval.getInvariants().end();
2896         I != E; ++I)
2897      (*I)->setConstant(true);
2898  }
2899
2900  return EvalSuccess;
2901}
2902
2903/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
2904/// Return true if anything changed.
2905bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
2906  std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
2907  bool MadeChange = false;
2908  if (Ctors.empty()) return false;
2909
2910  // Loop over global ctors, optimizing them when we can.
2911  for (unsigned i = 0; i != Ctors.size(); ++i) {
2912    Function *F = Ctors[i];
2913    // Found a null terminator in the middle of the list, prune off the rest of
2914    // the list.
2915    if (F == 0) {
2916      if (i != Ctors.size()-1) {
2917        Ctors.resize(i+1);
2918        MadeChange = true;
2919      }
2920      break;
2921    }
2922
2923    // We cannot simplify external ctor functions.
2924    if (F->empty()) continue;
2925
2926    // If we can evaluate the ctor at compile time, do.
2927    if (EvaluateStaticConstructor(F, TD, TLI)) {
2928      Ctors.erase(Ctors.begin()+i);
2929      MadeChange = true;
2930      --i;
2931      ++NumCtorsEvaluated;
2932      continue;
2933    }
2934  }
2935
2936  if (!MadeChange) return false;
2937
2938  GCL = InstallGlobalCtors(GCL, Ctors);
2939  return true;
2940}
2941
2942bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
2943  bool Changed = false;
2944
2945  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2946       I != E;) {
2947    Module::alias_iterator J = I++;
2948    // Aliases without names cannot be referenced outside this module.
2949    if (!J->hasName() && !J->isDeclaration())
2950      J->setLinkage(GlobalValue::InternalLinkage);
2951    // If the aliasee may change at link time, nothing can be done - bail out.
2952    if (J->mayBeOverridden())
2953      continue;
2954
2955    Constant *Aliasee = J->getAliasee();
2956    GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2957    Target->removeDeadConstantUsers();
2958    bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
2959
2960    // Make all users of the alias use the aliasee instead.
2961    if (!J->use_empty()) {
2962      J->replaceAllUsesWith(Aliasee);
2963      ++NumAliasesResolved;
2964      Changed = true;
2965    }
2966
2967    // If the alias is externally visible, we may still be able to simplify it.
2968    if (!J->hasLocalLinkage()) {
2969      // If the aliasee has internal linkage, give it the name and linkage
2970      // of the alias, and delete the alias.  This turns:
2971      //   define internal ... @f(...)
2972      //   @a = alias ... @f
2973      // into:
2974      //   define ... @a(...)
2975      if (!Target->hasLocalLinkage())
2976        continue;
2977
2978      // Do not perform the transform if multiple aliases potentially target the
2979      // aliasee. This check also ensures that it is safe to replace the section
2980      // and other attributes of the aliasee with those of the alias.
2981      if (!hasOneUse)
2982        continue;
2983
2984      // Give the aliasee the name, linkage and other attributes of the alias.
2985      Target->takeName(J);
2986      Target->setLinkage(J->getLinkage());
2987      Target->GlobalValue::copyAttributesFrom(J);
2988    }
2989
2990    // Delete the alias.
2991    M.getAliasList().erase(J);
2992    ++NumAliasesRemoved;
2993    Changed = true;
2994  }
2995
2996  return Changed;
2997}
2998
2999static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
3000  if (!TLI->has(LibFunc::cxa_atexit))
3001    return 0;
3002
3003  Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
3004
3005  if (!Fn)
3006    return 0;
3007
3008  FunctionType *FTy = Fn->getFunctionType();
3009
3010  // Checking that the function has the right return type, the right number of
3011  // parameters and that they all have pointer types should be enough.
3012  if (!FTy->getReturnType()->isIntegerTy() ||
3013      FTy->getNumParams() != 3 ||
3014      !FTy->getParamType(0)->isPointerTy() ||
3015      !FTy->getParamType(1)->isPointerTy() ||
3016      !FTy->getParamType(2)->isPointerTy())
3017    return 0;
3018
3019  return Fn;
3020}
3021
3022/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
3023/// destructor and can therefore be eliminated.
3024/// Note that we assume that other optimization passes have already simplified
3025/// the code so we only look for a function with a single basic block, where
3026/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
3027/// other side-effect free instructions.
3028static bool cxxDtorIsEmpty(const Function &Fn,
3029                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
3030  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
3031  // nounwind, but that doesn't seem worth doing.
3032  if (Fn.isDeclaration())
3033    return false;
3034
3035  if (++Fn.begin() != Fn.end())
3036    return false;
3037
3038  const BasicBlock &EntryBlock = Fn.getEntryBlock();
3039  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
3040       I != E; ++I) {
3041    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
3042      // Ignore debug intrinsics.
3043      if (isa<DbgInfoIntrinsic>(CI))
3044        continue;
3045
3046      const Function *CalledFn = CI->getCalledFunction();
3047
3048      if (!CalledFn)
3049        return false;
3050
3051      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
3052
3053      // Don't treat recursive functions as empty.
3054      if (!NewCalledFunctions.insert(CalledFn))
3055        return false;
3056
3057      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
3058        return false;
3059    } else if (isa<ReturnInst>(*I))
3060      return true; // We're done.
3061    else if (I->mayHaveSideEffects())
3062      return false; // Destructor with side effects, bail.
3063  }
3064
3065  return false;
3066}
3067
3068bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
3069  /// Itanium C++ ABI p3.3.5:
3070  ///
3071  ///   After constructing a global (or local static) object, that will require
3072  ///   destruction on exit, a termination function is registered as follows:
3073  ///
3074  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
3075  ///
3076  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
3077  ///   call f(p) when DSO d is unloaded, before all such termination calls
3078  ///   registered before this one. It returns zero if registration is
3079  ///   successful, nonzero on failure.
3080
3081  // This pass will look for calls to __cxa_atexit where the function is trivial
3082  // and remove them.
3083  bool Changed = false;
3084
3085  for (Function::use_iterator I = CXAAtExitFn->use_begin(),
3086       E = CXAAtExitFn->use_end(); I != E;) {
3087    // We're only interested in calls. Theoretically, we could handle invoke
3088    // instructions as well, but neither llvm-gcc nor clang generate invokes
3089    // to __cxa_atexit.
3090    CallInst *CI = dyn_cast<CallInst>(*I++);
3091    if (!CI)
3092      continue;
3093
3094    Function *DtorFn =
3095      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
3096    if (!DtorFn)
3097      continue;
3098
3099    SmallPtrSet<const Function *, 8> CalledFunctions;
3100    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
3101      continue;
3102
3103    // Just remove the call.
3104    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
3105    CI->eraseFromParent();
3106
3107    ++NumCXXDtorsRemoved;
3108
3109    Changed |= true;
3110  }
3111
3112  return Changed;
3113}
3114
3115bool GlobalOpt::runOnModule(Module &M) {
3116  bool Changed = false;
3117
3118  TD = getAnalysisIfAvailable<TargetData>();
3119  TLI = &getAnalysis<TargetLibraryInfo>();
3120
3121  // Try to find the llvm.globalctors list.
3122  GlobalVariable *GlobalCtors = FindGlobalCtors(M);
3123
3124  Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
3125
3126  bool LocalChange = true;
3127  while (LocalChange) {
3128    LocalChange = false;
3129
3130    // Delete functions that are trivially dead, ccc -> fastcc
3131    LocalChange |= OptimizeFunctions(M);
3132
3133    // Optimize global_ctors list.
3134    if (GlobalCtors)
3135      LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
3136
3137    // Optimize non-address-taken globals.
3138    LocalChange |= OptimizeGlobalVars(M);
3139
3140    // Resolve aliases, when possible.
3141    LocalChange |= OptimizeGlobalAliases(M);
3142
3143    // Try to remove trivial global destructors.
3144    if (CXAAtExitFn)
3145      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
3146
3147    Changed |= LocalChange;
3148  }
3149
3150  // TODO: Move all global ctors functions to the end of the module for code
3151  // layout.
3152
3153  return Changed;
3154}
3155