GlobalOpt.cpp revision 363496
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass transforms simple global variables that never have their address
10// taken.  If obviously true, it marks read/write globals as constant, deletes
11// variables only stored to, etc.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/GlobalOpt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/ADT/iterator_range.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/ConstantFolding.h"
25#include "llvm/Analysis/MemoryBuiltins.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/TargetTransformInfo.h"
28#include "llvm/BinaryFormat/Dwarf.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CallSite.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DebugInfoMetadata.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GetElementPtrTypeIterator.h"
41#include "llvm/IR/GlobalAlias.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/GlobalVariable.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/IntrinsicInst.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/Operator.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/User.h"
53#include "llvm/IR/Value.h"
54#include "llvm/IR/ValueHandle.h"
55#include "llvm/InitializePasses.h"
56#include "llvm/Pass.h"
57#include "llvm/Support/AtomicOrdering.h"
58#include "llvm/Support/Casting.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/MathExtras.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/IPO.h"
65#include "llvm/Transforms/Utils/CtorUtils.h"
66#include "llvm/Transforms/Utils/Evaluator.h"
67#include "llvm/Transforms/Utils/GlobalStatus.h"
68#include "llvm/Transforms/Utils/Local.h"
69#include <cassert>
70#include <cstdint>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "globalopt"
77
78STATISTIC(NumMarked    , "Number of globals marked constant");
79STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
80STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
81STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
82STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
83STATISTIC(NumDeleted   , "Number of globals deleted");
84STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
85STATISTIC(NumLocalized , "Number of globals localized");
86STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
87STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
88STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
89STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
90STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
91STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
92STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
93STATISTIC(NumInternalFunc, "Number of internal functions");
94STATISTIC(NumColdCC, "Number of functions marked coldcc");
95
96static cl::opt<bool>
97    EnableColdCCStressTest("enable-coldcc-stress-test",
98                           cl::desc("Enable stress test of coldcc by adding "
99                                    "calling conv to all internal functions."),
100                           cl::init(false), cl::Hidden);
101
102static cl::opt<int> ColdCCRelFreq(
103    "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
104    cl::desc(
105        "Maximum block frequency, expressed as a percentage of caller's "
106        "entry frequency, for a call site to be considered cold for enabling"
107        "coldcc"));
108
109/// Is this global variable possibly used by a leak checker as a root?  If so,
110/// we might not really want to eliminate the stores to it.
111static bool isLeakCheckerRoot(GlobalVariable *GV) {
112  // A global variable is a root if it is a pointer, or could plausibly contain
113  // a pointer.  There are two challenges; one is that we could have a struct
114  // the has an inner member which is a pointer.  We recurse through the type to
115  // detect these (up to a point).  The other is that we may actually be a union
116  // of a pointer and another type, and so our LLVM type is an integer which
117  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
118  // potentially contained here.
119
120  if (GV->hasPrivateLinkage())
121    return false;
122
123  SmallVector<Type *, 4> Types;
124  Types.push_back(GV->getValueType());
125
126  unsigned Limit = 20;
127  do {
128    Type *Ty = Types.pop_back_val();
129    switch (Ty->getTypeID()) {
130      default: break;
131      case Type::PointerTyID: return true;
132      case Type::ArrayTyID:
133      case Type::VectorTyID: {
134        SequentialType *STy = cast<SequentialType>(Ty);
135        Types.push_back(STy->getElementType());
136        break;
137      }
138      case Type::StructTyID: {
139        StructType *STy = cast<StructType>(Ty);
140        if (STy->isOpaque()) return true;
141        for (StructType::element_iterator I = STy->element_begin(),
142                 E = STy->element_end(); I != E; ++I) {
143          Type *InnerTy = *I;
144          if (isa<PointerType>(InnerTy)) return true;
145          if (isa<CompositeType>(InnerTy))
146            Types.push_back(InnerTy);
147        }
148        break;
149      }
150    }
151    if (--Limit == 0) return true;
152  } while (!Types.empty());
153  return false;
154}
155
156/// Given a value that is stored to a global but never read, determine whether
157/// it's safe to remove the store and the chain of computation that feeds the
158/// store.
159static bool IsSafeComputationToRemove(
160    Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
161  do {
162    if (isa<Constant>(V))
163      return true;
164    if (!V->hasOneUse())
165      return false;
166    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
167        isa<GlobalValue>(V))
168      return false;
169    if (isAllocationFn(V, GetTLI))
170      return true;
171
172    Instruction *I = cast<Instruction>(V);
173    if (I->mayHaveSideEffects())
174      return false;
175    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
176      if (!GEP->hasAllConstantIndices())
177        return false;
178    } else if (I->getNumOperands() != 1) {
179      return false;
180    }
181
182    V = I->getOperand(0);
183  } while (true);
184}
185
186/// This GV is a pointer root.  Loop over all users of the global and clean up
187/// any that obviously don't assign the global a value that isn't dynamically
188/// allocated.
189static bool
190CleanupPointerRootUsers(GlobalVariable *GV,
191                        function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
192  // A brief explanation of leak checkers.  The goal is to find bugs where
193  // pointers are forgotten, causing an accumulating growth in memory
194  // usage over time.  The common strategy for leak checkers is to whitelist the
195  // memory pointed to by globals at exit.  This is popular because it also
196  // solves another problem where the main thread of a C++ program may shut down
197  // before other threads that are still expecting to use those globals.  To
198  // handle that case, we expect the program may create a singleton and never
199  // destroy it.
200
201  bool Changed = false;
202
203  // If Dead[n].first is the only use of a malloc result, we can delete its
204  // chain of computation and the store to the global in Dead[n].second.
205  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
206
207  // Constants can't be pointers to dynamically allocated memory.
208  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
209       UI != E;) {
210    User *U = *UI++;
211    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
212      Value *V = SI->getValueOperand();
213      if (isa<Constant>(V)) {
214        Changed = true;
215        SI->eraseFromParent();
216      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
217        if (I->hasOneUse())
218          Dead.push_back(std::make_pair(I, SI));
219      }
220    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
221      if (isa<Constant>(MSI->getValue())) {
222        Changed = true;
223        MSI->eraseFromParent();
224      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
225        if (I->hasOneUse())
226          Dead.push_back(std::make_pair(I, MSI));
227      }
228    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
229      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
230      if (MemSrc && MemSrc->isConstant()) {
231        Changed = true;
232        MTI->eraseFromParent();
233      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
234        if (I->hasOneUse())
235          Dead.push_back(std::make_pair(I, MTI));
236      }
237    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
238      if (CE->use_empty()) {
239        CE->destroyConstant();
240        Changed = true;
241      }
242    } else if (Constant *C = dyn_cast<Constant>(U)) {
243      if (isSafeToDestroyConstant(C)) {
244        C->destroyConstant();
245        // This could have invalidated UI, start over from scratch.
246        Dead.clear();
247        CleanupPointerRootUsers(GV, GetTLI);
248        return true;
249      }
250    }
251  }
252
253  for (int i = 0, e = Dead.size(); i != e; ++i) {
254    if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
255      Dead[i].second->eraseFromParent();
256      Instruction *I = Dead[i].first;
257      do {
258        if (isAllocationFn(I, GetTLI))
259          break;
260        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
261        if (!J)
262          break;
263        I->eraseFromParent();
264        I = J;
265      } while (true);
266      I->eraseFromParent();
267    }
268  }
269
270  return Changed;
271}
272
273/// We just marked GV constant.  Loop over all users of the global, cleaning up
274/// the obvious ones.  This is largely just a quick scan over the use list to
275/// clean up the easy and obvious cruft.  This returns true if it made a change.
276static bool CleanupConstantGlobalUsers(
277    Value *V, Constant *Init, const DataLayout &DL,
278    function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
279  bool Changed = false;
280  // Note that we need to use a weak value handle for the worklist items. When
281  // we delete a constant array, we may also be holding pointer to one of its
282  // elements (or an element of one of its elements if we're dealing with an
283  // array of arrays) in the worklist.
284  SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
285  while (!WorkList.empty()) {
286    Value *UV = WorkList.pop_back_val();
287    if (!UV)
288      continue;
289
290    User *U = cast<User>(UV);
291
292    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
293      if (Init) {
294        // Replace the load with the initializer.
295        LI->replaceAllUsesWith(Init);
296        LI->eraseFromParent();
297        Changed = true;
298      }
299    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
300      // Store must be unreachable or storing Init into the global.
301      SI->eraseFromParent();
302      Changed = true;
303    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
304      if (CE->getOpcode() == Instruction::GetElementPtr) {
305        Constant *SubInit = nullptr;
306        if (Init)
307          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
308        Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
309      } else if ((CE->getOpcode() == Instruction::BitCast &&
310                  CE->getType()->isPointerTy()) ||
311                 CE->getOpcode() == Instruction::AddrSpaceCast) {
312        // Pointer cast, delete any stores and memsets to the global.
313        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
314      }
315
316      if (CE->use_empty()) {
317        CE->destroyConstant();
318        Changed = true;
319      }
320    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
321      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
322      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
323      // and will invalidate our notion of what Init is.
324      Constant *SubInit = nullptr;
325      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
326        ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
327            ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
328        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
329          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
330
331        // If the initializer is an all-null value and we have an inbounds GEP,
332        // we already know what the result of any load from that GEP is.
333        // TODO: Handle splats.
334        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
335          SubInit = Constant::getNullValue(GEP->getResultElementType());
336      }
337      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
338
339      if (GEP->use_empty()) {
340        GEP->eraseFromParent();
341        Changed = true;
342      }
343    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
344      if (MI->getRawDest() == V) {
345        MI->eraseFromParent();
346        Changed = true;
347      }
348
349    } else if (Constant *C = dyn_cast<Constant>(U)) {
350      // If we have a chain of dead constantexprs or other things dangling from
351      // us, and if they are all dead, nuke them without remorse.
352      if (isSafeToDestroyConstant(C)) {
353        C->destroyConstant();
354        CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
355        return true;
356      }
357    }
358  }
359  return Changed;
360}
361
362static bool isSafeSROAElementUse(Value *V);
363
364/// Return true if the specified GEP is a safe user of a derived
365/// expression from a global that we want to SROA.
366static bool isSafeSROAGEP(User *U) {
367  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
368  // don't like < 3 operand CE's, and we don't like non-constant integer
369  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
370  // value of C.
371  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
372      !cast<Constant>(U->getOperand(1))->isNullValue())
373    return false;
374
375  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
376  ++GEPI; // Skip over the pointer index.
377
378  // For all other level we require that the indices are constant and inrange.
379  // In particular, consider: A[0][i].  We cannot know that the user isn't doing
380  // invalid things like allowing i to index an out-of-range subscript that
381  // accesses A[1]. This can also happen between different members of a struct
382  // in llvm IR.
383  for (; GEPI != E; ++GEPI) {
384    if (GEPI.isStruct())
385      continue;
386
387    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
388    if (!IdxVal || (GEPI.isBoundedSequential() &&
389                    IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
390      return false;
391  }
392
393  return llvm::all_of(U->users(),
394                      [](User *UU) { return isSafeSROAElementUse(UU); });
395}
396
397/// Return true if the specified instruction is a safe user of a derived
398/// expression from a global that we want to SROA.
399static bool isSafeSROAElementUse(Value *V) {
400  // We might have a dead and dangling constant hanging off of here.
401  if (Constant *C = dyn_cast<Constant>(V))
402    return isSafeToDestroyConstant(C);
403
404  Instruction *I = dyn_cast<Instruction>(V);
405  if (!I) return false;
406
407  // Loads are ok.
408  if (isa<LoadInst>(I)) return true;
409
410  // Stores *to* the pointer are ok.
411  if (StoreInst *SI = dyn_cast<StoreInst>(I))
412    return SI->getOperand(0) != V;
413
414  // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
415  return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
416}
417
418/// Look at all uses of the global and decide whether it is safe for us to
419/// perform this transformation.
420static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
421  for (User *U : GV->users()) {
422    // The user of the global must be a GEP Inst or a ConstantExpr GEP.
423    if (!isa<GetElementPtrInst>(U) &&
424        (!isa<ConstantExpr>(U) ||
425        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
426      return false;
427
428    // Check the gep and it's users are safe to SRA
429    if (!isSafeSROAGEP(U))
430      return false;
431  }
432
433  return true;
434}
435
436static bool CanDoGlobalSRA(GlobalVariable *GV) {
437  Constant *Init = GV->getInitializer();
438
439  if (isa<StructType>(Init->getType())) {
440    // nothing to check
441  } else if (SequentialType *STy = dyn_cast<SequentialType>(Init->getType())) {
442    if (STy->getNumElements() > 16 && GV->hasNUsesOrMore(16))
443      return false; // It's not worth it.
444  } else
445    return false;
446
447  return GlobalUsersSafeToSRA(GV);
448}
449
450/// Copy over the debug info for a variable to its SRA replacements.
451static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
452                                 uint64_t FragmentOffsetInBits,
453                                 uint64_t FragmentSizeInBits) {
454  SmallVector<DIGlobalVariableExpression *, 1> GVs;
455  GV->getDebugInfo(GVs);
456  for (auto *GVE : GVs) {
457    DIVariable *Var = GVE->getVariable();
458    Optional<uint64_t> VarSize = Var->getSizeInBits();
459
460    DIExpression *Expr = GVE->getExpression();
461    // If the FragmentSize is smaller than the variable,
462    // emit a fragment expression.
463    // If the variable size is unknown a fragment must be
464    // emitted to be safe.
465    if (!VarSize || FragmentSizeInBits < *VarSize) {
466      if (auto E = DIExpression::createFragmentExpression(
467              Expr, FragmentOffsetInBits, FragmentSizeInBits))
468        Expr = *E;
469      else
470        return;
471    }
472    auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
473    NGV->addDebugInfo(NGVE);
474  }
475}
476
477/// Perform scalar replacement of aggregates on the specified global variable.
478/// This opens the door for other optimizations by exposing the behavior of the
479/// program in a more fine-grained way.  We have determined that this
480/// transformation is safe already.  We return the first global variable we
481/// insert so that the caller can reprocess it.
482static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
483  // Make sure this global only has simple uses that we can SRA.
484  if (!CanDoGlobalSRA(GV))
485    return nullptr;
486
487  assert(GV->hasLocalLinkage());
488  Constant *Init = GV->getInitializer();
489  Type *Ty = Init->getType();
490
491  std::map<unsigned, GlobalVariable *> NewGlobals;
492
493  // Get the alignment of the global, either explicit or target-specific.
494  unsigned StartAlignment = GV->getAlignment();
495  if (StartAlignment == 0)
496    StartAlignment = DL.getABITypeAlignment(GV->getType());
497
498  // Loop over all users and create replacement variables for used aggregate
499  // elements.
500  for (User *GEP : GV->users()) {
501    assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==
502                                           Instruction::GetElementPtr) ||
503            isa<GetElementPtrInst>(GEP)) &&
504           "NonGEP CE's are not SRAable!");
505
506    // Ignore the 1th operand, which has to be zero or else the program is quite
507    // broken (undefined).  Get the 2nd operand, which is the structure or array
508    // index.
509    unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
510    if (NewGlobals.count(ElementIdx) == 1)
511      continue; // we`ve already created replacement variable
512    assert(NewGlobals.count(ElementIdx) == 0);
513
514    Type *ElTy = nullptr;
515    if (StructType *STy = dyn_cast<StructType>(Ty))
516      ElTy = STy->getElementType(ElementIdx);
517    else if (SequentialType *STy = dyn_cast<SequentialType>(Ty))
518      ElTy = STy->getElementType();
519    assert(ElTy);
520
521    Constant *In = Init->getAggregateElement(ElementIdx);
522    assert(In && "Couldn't get element of initializer?");
523
524    GlobalVariable *NGV = new GlobalVariable(
525        ElTy, false, GlobalVariable::InternalLinkage, In,
526        GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(),
527        GV->getType()->getAddressSpace());
528    NGV->setExternallyInitialized(GV->isExternallyInitialized());
529    NGV->copyAttributesFrom(GV);
530    NewGlobals.insert(std::make_pair(ElementIdx, NGV));
531
532    if (StructType *STy = dyn_cast<StructType>(Ty)) {
533      const StructLayout &Layout = *DL.getStructLayout(STy);
534
535      // Calculate the known alignment of the field.  If the original aggregate
536      // had 256 byte alignment for example, something might depend on that:
537      // propagate info to each field.
538      uint64_t FieldOffset = Layout.getElementOffset(ElementIdx);
539      Align NewAlign(MinAlign(StartAlignment, FieldOffset));
540      if (NewAlign >
541          Align(DL.getABITypeAlignment(STy->getElementType(ElementIdx))))
542        NGV->setAlignment(NewAlign);
543
544      // Copy over the debug info for the variable.
545      uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
546      uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
547      transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size);
548    } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
549      uint64_t EltSize = DL.getTypeAllocSize(ElTy);
550      Align EltAlign(DL.getABITypeAlignment(ElTy));
551      uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
552
553      // Calculate the known alignment of the field.  If the original aggregate
554      // had 256 byte alignment for example, something might depend on that:
555      // propagate info to each field.
556      Align NewAlign(MinAlign(StartAlignment, EltSize * ElementIdx));
557      if (NewAlign > EltAlign)
558        NGV->setAlignment(NewAlign);
559      transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
560                           FragmentSizeInBits);
561    }
562  }
563
564  if (NewGlobals.empty())
565    return nullptr;
566
567  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
568  for (auto NewGlobalVar : NewGlobals)
569    Globals.push_back(NewGlobalVar.second);
570
571  LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
572
573  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
574
575  // Loop over all of the uses of the global, replacing the constantexpr geps,
576  // with smaller constantexpr geps or direct references.
577  while (!GV->use_empty()) {
578    User *GEP = GV->user_back();
579    assert(((isa<ConstantExpr>(GEP) &&
580             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
581            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
582
583    // Ignore the 1th operand, which has to be zero or else the program is quite
584    // broken (undefined).  Get the 2nd operand, which is the structure or array
585    // index.
586    unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
587    assert(NewGlobals.count(ElementIdx) == 1);
588
589    Value *NewPtr = NewGlobals[ElementIdx];
590    Type *NewTy = NewGlobals[ElementIdx]->getValueType();
591
592    // Form a shorter GEP if needed.
593    if (GEP->getNumOperands() > 3) {
594      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
595        SmallVector<Constant*, 8> Idxs;
596        Idxs.push_back(NullInt);
597        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
598          Idxs.push_back(CE->getOperand(i));
599        NewPtr =
600            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
601      } else {
602        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
603        SmallVector<Value*, 8> Idxs;
604        Idxs.push_back(NullInt);
605        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
606          Idxs.push_back(GEPI->getOperand(i));
607        NewPtr = GetElementPtrInst::Create(
608            NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx),
609            GEPI);
610      }
611    }
612    GEP->replaceAllUsesWith(NewPtr);
613
614    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
615      GEPI->eraseFromParent();
616    else
617      cast<ConstantExpr>(GEP)->destroyConstant();
618  }
619
620  // Delete the old global, now that it is dead.
621  Globals.erase(GV);
622  ++NumSRA;
623
624  assert(NewGlobals.size() > 0);
625  return NewGlobals.begin()->second;
626}
627
628/// Return true if all users of the specified value will trap if the value is
629/// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
630/// reprocessing them.
631static bool AllUsesOfValueWillTrapIfNull(const Value *V,
632                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
633  for (const User *U : V->users()) {
634    if (const Instruction *I = dyn_cast<Instruction>(U)) {
635      // If null pointer is considered valid, then all uses are non-trapping.
636      // Non address-space 0 globals have already been pruned by the caller.
637      if (NullPointerIsDefined(I->getFunction()))
638        return false;
639    }
640    if (isa<LoadInst>(U)) {
641      // Will trap.
642    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
643      if (SI->getOperand(0) == V) {
644        //cerr << "NONTRAPPING USE: " << *U;
645        return false;  // Storing the value.
646      }
647    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
648      if (CI->getCalledValue() != V) {
649        //cerr << "NONTRAPPING USE: " << *U;
650        return false;  // Not calling the ptr
651      }
652    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
653      if (II->getCalledValue() != V) {
654        //cerr << "NONTRAPPING USE: " << *U;
655        return false;  // Not calling the ptr
656      }
657    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
658      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
659    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
660      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
661    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
662      // If we've already seen this phi node, ignore it, it has already been
663      // checked.
664      if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
665        return false;
666    } else if (isa<ICmpInst>(U) &&
667               isa<ConstantPointerNull>(U->getOperand(1))) {
668      // Ignore icmp X, null
669    } else {
670      //cerr << "NONTRAPPING USE: " << *U;
671      return false;
672    }
673  }
674  return true;
675}
676
677/// Return true if all uses of any loads from GV will trap if the loaded value
678/// is null.  Note that this also permits comparisons of the loaded value
679/// against null, as a special case.
680static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
681  for (const User *U : GV->users())
682    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
683      SmallPtrSet<const PHINode*, 8> PHIs;
684      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
685        return false;
686    } else if (isa<StoreInst>(U)) {
687      // Ignore stores to the global.
688    } else {
689      // We don't know or understand this user, bail out.
690      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
691      return false;
692    }
693  return true;
694}
695
696static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
697  bool Changed = false;
698  for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
699    Instruction *I = cast<Instruction>(*UI++);
700    // Uses are non-trapping if null pointer is considered valid.
701    // Non address-space 0 globals are already pruned by the caller.
702    if (NullPointerIsDefined(I->getFunction()))
703      return false;
704    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
705      LI->setOperand(0, NewV);
706      Changed = true;
707    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
708      if (SI->getOperand(1) == V) {
709        SI->setOperand(1, NewV);
710        Changed = true;
711      }
712    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
713      CallSite CS(I);
714      if (CS.getCalledValue() == V) {
715        // Calling through the pointer!  Turn into a direct call, but be careful
716        // that the pointer is not also being passed as an argument.
717        CS.setCalledFunction(NewV);
718        Changed = true;
719        bool PassedAsArg = false;
720        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
721          if (CS.getArgument(i) == V) {
722            PassedAsArg = true;
723            CS.setArgument(i, NewV);
724          }
725
726        if (PassedAsArg) {
727          // Being passed as an argument also.  Be careful to not invalidate UI!
728          UI = V->user_begin();
729        }
730      }
731    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
732      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
733                                ConstantExpr::getCast(CI->getOpcode(),
734                                                      NewV, CI->getType()));
735      if (CI->use_empty()) {
736        Changed = true;
737        CI->eraseFromParent();
738      }
739    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
740      // Should handle GEP here.
741      SmallVector<Constant*, 8> Idxs;
742      Idxs.reserve(GEPI->getNumOperands()-1);
743      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
744           i != e; ++i)
745        if (Constant *C = dyn_cast<Constant>(*i))
746          Idxs.push_back(C);
747        else
748          break;
749      if (Idxs.size() == GEPI->getNumOperands()-1)
750        Changed |= OptimizeAwayTrappingUsesOfValue(
751            GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
752                                                 NewV, Idxs));
753      if (GEPI->use_empty()) {
754        Changed = true;
755        GEPI->eraseFromParent();
756      }
757    }
758  }
759
760  return Changed;
761}
762
763/// The specified global has only one non-null value stored into it.  If there
764/// are uses of the loaded value that would trap if the loaded value is
765/// dynamically null, then we know that they cannot be reachable with a null
766/// optimize away the load.
767static bool OptimizeAwayTrappingUsesOfLoads(
768    GlobalVariable *GV, Constant *LV, const DataLayout &DL,
769    function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
770  bool Changed = false;
771
772  // Keep track of whether we are able to remove all the uses of the global
773  // other than the store that defines it.
774  bool AllNonStoreUsesGone = true;
775
776  // Replace all uses of loads with uses of uses of the stored value.
777  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
778    User *GlobalUser = *GUI++;
779    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
780      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
781      // If we were able to delete all uses of the loads
782      if (LI->use_empty()) {
783        LI->eraseFromParent();
784        Changed = true;
785      } else {
786        AllNonStoreUsesGone = false;
787      }
788    } else if (isa<StoreInst>(GlobalUser)) {
789      // Ignore the store that stores "LV" to the global.
790      assert(GlobalUser->getOperand(1) == GV &&
791             "Must be storing *to* the global");
792    } else {
793      AllNonStoreUsesGone = false;
794
795      // If we get here we could have other crazy uses that are transitively
796      // loaded.
797      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
798              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
799              isa<BitCastInst>(GlobalUser) ||
800              isa<GetElementPtrInst>(GlobalUser)) &&
801             "Only expect load and stores!");
802    }
803  }
804
805  if (Changed) {
806    LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
807                      << "\n");
808    ++NumGlobUses;
809  }
810
811  // If we nuked all of the loads, then none of the stores are needed either,
812  // nor is the global.
813  if (AllNonStoreUsesGone) {
814    if (isLeakCheckerRoot(GV)) {
815      Changed |= CleanupPointerRootUsers(GV, GetTLI);
816    } else {
817      Changed = true;
818      CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
819    }
820    if (GV->use_empty()) {
821      LLVM_DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
822      Changed = true;
823      GV->eraseFromParent();
824      ++NumDeleted;
825    }
826  }
827  return Changed;
828}
829
830/// Walk the use list of V, constant folding all of the instructions that are
831/// foldable.
832static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
833                                TargetLibraryInfo *TLI) {
834  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
835    if (Instruction *I = dyn_cast<Instruction>(*UI++))
836      if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
837        I->replaceAllUsesWith(NewC);
838
839        // Advance UI to the next non-I use to avoid invalidating it!
840        // Instructions could multiply use V.
841        while (UI != E && *UI == I)
842          ++UI;
843        if (isInstructionTriviallyDead(I, TLI))
844          I->eraseFromParent();
845      }
846}
847
848/// This function takes the specified global variable, and transforms the
849/// program as if it always contained the result of the specified malloc.
850/// Because it is always the result of the specified malloc, there is no reason
851/// to actually DO the malloc.  Instead, turn the malloc into a global, and any
852/// loads of GV as uses of the new global.
853static GlobalVariable *
854OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
855                              ConstantInt *NElements, const DataLayout &DL,
856                              TargetLibraryInfo *TLI) {
857  LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI
858                    << '\n');
859
860  Type *GlobalType;
861  if (NElements->getZExtValue() == 1)
862    GlobalType = AllocTy;
863  else
864    // If we have an array allocation, the global variable is of an array.
865    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
866
867  // Create the new global variable.  The contents of the malloc'd memory is
868  // undefined, so initialize with an undef value.
869  GlobalVariable *NewGV = new GlobalVariable(
870      *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
871      UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
872      GV->getThreadLocalMode());
873
874  // If there are bitcast users of the malloc (which is typical, usually we have
875  // a malloc + bitcast) then replace them with uses of the new global.  Update
876  // other users to use the global as well.
877  BitCastInst *TheBC = nullptr;
878  while (!CI->use_empty()) {
879    Instruction *User = cast<Instruction>(CI->user_back());
880    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
881      if (BCI->getType() == NewGV->getType()) {
882        BCI->replaceAllUsesWith(NewGV);
883        BCI->eraseFromParent();
884      } else {
885        BCI->setOperand(0, NewGV);
886      }
887    } else {
888      if (!TheBC)
889        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
890      User->replaceUsesOfWith(CI, TheBC);
891    }
892  }
893
894  Constant *RepValue = NewGV;
895  if (NewGV->getType() != GV->getValueType())
896    RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
897
898  // If there is a comparison against null, we will insert a global bool to
899  // keep track of whether the global was initialized yet or not.
900  GlobalVariable *InitBool =
901    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
902                       GlobalValue::InternalLinkage,
903                       ConstantInt::getFalse(GV->getContext()),
904                       GV->getName()+".init", GV->getThreadLocalMode());
905  bool InitBoolUsed = false;
906
907  // Loop over all uses of GV, processing them in turn.
908  while (!GV->use_empty()) {
909    if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
910      // The global is initialized when the store to it occurs.
911      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false,
912                    None, SI->getOrdering(), SI->getSyncScopeID(), SI);
913      SI->eraseFromParent();
914      continue;
915    }
916
917    LoadInst *LI = cast<LoadInst>(GV->user_back());
918    while (!LI->use_empty()) {
919      Use &LoadUse = *LI->use_begin();
920      ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
921      if (!ICI) {
922        LoadUse = RepValue;
923        continue;
924      }
925
926      // Replace the cmp X, 0 with a use of the bool value.
927      // Sink the load to where the compare was, if atomic rules allow us to.
928      Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
929                               InitBool->getName() + ".val", false, None,
930                               LI->getOrdering(), LI->getSyncScopeID(),
931                               LI->isUnordered() ? (Instruction *)ICI : LI);
932      InitBoolUsed = true;
933      switch (ICI->getPredicate()) {
934      default: llvm_unreachable("Unknown ICmp Predicate!");
935      case ICmpInst::ICMP_ULT:
936      case ICmpInst::ICMP_SLT:   // X < null -> always false
937        LV = ConstantInt::getFalse(GV->getContext());
938        break;
939      case ICmpInst::ICMP_ULE:
940      case ICmpInst::ICMP_SLE:
941      case ICmpInst::ICMP_EQ:
942        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
943        break;
944      case ICmpInst::ICMP_NE:
945      case ICmpInst::ICMP_UGE:
946      case ICmpInst::ICMP_SGE:
947      case ICmpInst::ICMP_UGT:
948      case ICmpInst::ICMP_SGT:
949        break;  // no change.
950      }
951      ICI->replaceAllUsesWith(LV);
952      ICI->eraseFromParent();
953    }
954    LI->eraseFromParent();
955  }
956
957  // If the initialization boolean was used, insert it, otherwise delete it.
958  if (!InitBoolUsed) {
959    while (!InitBool->use_empty())  // Delete initializations
960      cast<StoreInst>(InitBool->user_back())->eraseFromParent();
961    delete InitBool;
962  } else
963    GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
964
965  // Now the GV is dead, nuke it and the malloc..
966  GV->eraseFromParent();
967  CI->eraseFromParent();
968
969  // To further other optimizations, loop over all users of NewGV and try to
970  // constant prop them.  This will promote GEP instructions with constant
971  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
972  ConstantPropUsersOf(NewGV, DL, TLI);
973  if (RepValue != NewGV)
974    ConstantPropUsersOf(RepValue, DL, TLI);
975
976  return NewGV;
977}
978
979/// Scan the use-list of V checking to make sure that there are no complex uses
980/// of V.  We permit simple things like dereferencing the pointer, but not
981/// storing through the address, unless it is to the specified global.
982static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
983                                                      const GlobalVariable *GV,
984                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
985  for (const User *U : V->users()) {
986    const Instruction *Inst = cast<Instruction>(U);
987
988    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
989      continue; // Fine, ignore.
990    }
991
992    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
993      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
994        return false;  // Storing the pointer itself... bad.
995      continue; // Otherwise, storing through it, or storing into GV... fine.
996    }
997
998    // Must index into the array and into the struct.
999    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
1000      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
1001        return false;
1002      continue;
1003    }
1004
1005    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
1006      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
1007      // cycles.
1008      if (PHIs.insert(PN).second)
1009        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
1010          return false;
1011      continue;
1012    }
1013
1014    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
1015      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
1016        return false;
1017      continue;
1018    }
1019
1020    return false;
1021  }
1022  return true;
1023}
1024
1025/// The Alloc pointer is stored into GV somewhere.  Transform all uses of the
1026/// allocation into loads from the global and uses of the resultant pointer.
1027/// Further, delete the store into GV.  This assumes that these value pass the
1028/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1029static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1030                                          GlobalVariable *GV) {
1031  while (!Alloc->use_empty()) {
1032    Instruction *U = cast<Instruction>(*Alloc->user_begin());
1033    Instruction *InsertPt = U;
1034    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1035      // If this is the store of the allocation into the global, remove it.
1036      if (SI->getOperand(1) == GV) {
1037        SI->eraseFromParent();
1038        continue;
1039      }
1040    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1041      // Insert the load in the corresponding predecessor, not right before the
1042      // PHI.
1043      InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
1044    } else if (isa<BitCastInst>(U)) {
1045      // Must be bitcast between the malloc and store to initialize the global.
1046      ReplaceUsesOfMallocWithGlobal(U, GV);
1047      U->eraseFromParent();
1048      continue;
1049    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1050      // If this is a "GEP bitcast" and the user is a store to the global, then
1051      // just process it as a bitcast.
1052      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1053        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
1054          if (SI->getOperand(1) == GV) {
1055            // Must be bitcast GEP between the malloc and store to initialize
1056            // the global.
1057            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1058            GEPI->eraseFromParent();
1059            continue;
1060          }
1061    }
1062
1063    // Insert a load from the global, and use it instead of the malloc.
1064    Value *NL =
1065        new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
1066    U->replaceUsesOfWith(Alloc, NL);
1067  }
1068}
1069
1070/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
1071/// perform heap SRA on.  This permits GEP's that index through the array and
1072/// struct field, icmps of null, and PHIs.
1073static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1074                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
1075                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
1076  // We permit two users of the load: setcc comparing against the null
1077  // pointer, and a getelementptr of a specific form.
1078  for (const User *U : V->users()) {
1079    const Instruction *UI = cast<Instruction>(U);
1080
1081    // Comparison against null is ok.
1082    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
1083      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1084        return false;
1085      continue;
1086    }
1087
1088    // getelementptr is also ok, but only a simple form.
1089    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
1090      // Must index into the array and into the struct.
1091      if (GEPI->getNumOperands() < 3)
1092        return false;
1093
1094      // Otherwise the GEP is ok.
1095      continue;
1096    }
1097
1098    if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1099      if (!LoadUsingPHIsPerLoad.insert(PN).second)
1100        // This means some phi nodes are dependent on each other.
1101        // Avoid infinite looping!
1102        return false;
1103      if (!LoadUsingPHIs.insert(PN).second)
1104        // If we have already analyzed this PHI, then it is safe.
1105        continue;
1106
1107      // Make sure all uses of the PHI are simple enough to transform.
1108      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1109                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1110        return false;
1111
1112      continue;
1113    }
1114
1115    // Otherwise we don't know what this is, not ok.
1116    return false;
1117  }
1118
1119  return true;
1120}
1121
1122/// If all users of values loaded from GV are simple enough to perform HeapSRA,
1123/// return true.
1124static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1125                                                    Instruction *StoredVal) {
1126  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1127  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1128  for (const User *U : GV->users())
1129    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
1130      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1131                                          LoadUsingPHIsPerLoad))
1132        return false;
1133      LoadUsingPHIsPerLoad.clear();
1134    }
1135
1136  // If we reach here, we know that all uses of the loads and transitive uses
1137  // (through PHI nodes) are simple enough to transform.  However, we don't know
1138  // that all inputs the to the PHI nodes are in the same equivalence sets.
1139  // Check to verify that all operands of the PHIs are either PHIS that can be
1140  // transformed, loads from GV, or MI itself.
1141  for (const PHINode *PN : LoadUsingPHIs) {
1142    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1143      Value *InVal = PN->getIncomingValue(op);
1144
1145      // PHI of the stored value itself is ok.
1146      if (InVal == StoredVal) continue;
1147
1148      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1149        // One of the PHIs in our set is (optimistically) ok.
1150        if (LoadUsingPHIs.count(InPN))
1151          continue;
1152        return false;
1153      }
1154
1155      // Load from GV is ok.
1156      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1157        if (LI->getOperand(0) == GV)
1158          continue;
1159
1160      // UNDEF? NULL?
1161
1162      // Anything else is rejected.
1163      return false;
1164    }
1165  }
1166
1167  return true;
1168}
1169
1170static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1171              DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1172                   std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1173  std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
1174
1175  if (FieldNo >= FieldVals.size())
1176    FieldVals.resize(FieldNo+1);
1177
1178  // If we already have this value, just reuse the previously scalarized
1179  // version.
1180  if (Value *FieldVal = FieldVals[FieldNo])
1181    return FieldVal;
1182
1183  // Depending on what instruction this is, we have several cases.
1184  Value *Result;
1185  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1186    // This is a scalarized version of the load from the global.  Just create
1187    // a new Load of the scalarized global.
1188    Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
1189                                InsertedScalarizedValues, PHIsToRewrite);
1190    Result = new LoadInst(V->getType()->getPointerElementType(), V,
1191                          LI->getName() + ".f" + Twine(FieldNo), LI);
1192  } else {
1193    PHINode *PN = cast<PHINode>(V);
1194    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1195    // field.
1196
1197    PointerType *PTy = cast<PointerType>(PN->getType());
1198    StructType *ST = cast<StructType>(PTy->getElementType());
1199
1200    unsigned AS = PTy->getAddressSpace();
1201    PHINode *NewPN =
1202      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1203                     PN->getNumIncomingValues(),
1204                     PN->getName()+".f"+Twine(FieldNo), PN);
1205    Result = NewPN;
1206    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1207  }
1208
1209  return FieldVals[FieldNo] = Result;
1210}
1211
1212/// Given a load instruction and a value derived from the load, rewrite the
1213/// derived value to use the HeapSRoA'd load.
1214static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1215              DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1216                   std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1217  // If this is a comparison against null, handle it.
1218  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1219    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1220    // If we have a setcc of the loaded pointer, we can use a setcc of any
1221    // field.
1222    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1223                                   InsertedScalarizedValues, PHIsToRewrite);
1224
1225    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1226                              Constant::getNullValue(NPtr->getType()),
1227                              SCI->getName());
1228    SCI->replaceAllUsesWith(New);
1229    SCI->eraseFromParent();
1230    return;
1231  }
1232
1233  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1234  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1235    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1236           && "Unexpected GEPI!");
1237
1238    // Load the pointer for this field.
1239    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1240    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1241                                     InsertedScalarizedValues, PHIsToRewrite);
1242
1243    // Create the new GEP idx vector.
1244    SmallVector<Value*, 8> GEPIdx;
1245    GEPIdx.push_back(GEPI->getOperand(1));
1246    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1247
1248    Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
1249                                             GEPI->getName(), GEPI);
1250    GEPI->replaceAllUsesWith(NGEPI);
1251    GEPI->eraseFromParent();
1252    return;
1253  }
1254
1255  // Recursively transform the users of PHI nodes.  This will lazily create the
1256  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1257  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1258  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1259  // already been seen first by another load, so its uses have already been
1260  // processed.
1261  PHINode *PN = cast<PHINode>(LoadUser);
1262  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1263                                              std::vector<Value *>())).second)
1264    return;
1265
1266  // If this is the first time we've seen this PHI, recursively process all
1267  // users.
1268  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1269    Instruction *User = cast<Instruction>(*UI++);
1270    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1271  }
1272}
1273
1274/// We are performing Heap SRoA on a global.  Ptr is a value loaded from the
1275/// global.  Eliminate all uses of Ptr, making them use FieldGlobals instead.
1276/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1277static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1278              DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1279                  std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
1280  for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
1281    Instruction *User = cast<Instruction>(*UI++);
1282    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1283  }
1284
1285  if (Load->use_empty()) {
1286    Load->eraseFromParent();
1287    InsertedScalarizedValues.erase(Load);
1288  }
1289}
1290
1291/// CI is an allocation of an array of structures.  Break it up into multiple
1292/// allocations of arrays of the fields.
1293static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1294                                            Value *NElems, const DataLayout &DL,
1295                                            const TargetLibraryInfo *TLI) {
1296  LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI
1297                    << '\n');
1298  Type *MAT = getMallocAllocatedType(CI, TLI);
1299  StructType *STy = cast<StructType>(MAT);
1300
1301  // There is guaranteed to be at least one use of the malloc (storing
1302  // it into GV).  If there are other uses, change them to be uses of
1303  // the global to simplify later code.  This also deletes the store
1304  // into GV.
1305  ReplaceUsesOfMallocWithGlobal(CI, GV);
1306
1307  // Okay, at this point, there are no users of the malloc.  Insert N
1308  // new mallocs at the same place as CI, and N globals.
1309  std::vector<Value *> FieldGlobals;
1310  std::vector<Value *> FieldMallocs;
1311
1312  SmallVector<OperandBundleDef, 1> OpBundles;
1313  CI->getOperandBundlesAsDefs(OpBundles);
1314
1315  unsigned AS = GV->getType()->getPointerAddressSpace();
1316  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1317    Type *FieldTy = STy->getElementType(FieldNo);
1318    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1319
1320    GlobalVariable *NGV = new GlobalVariable(
1321        *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
1322        Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
1323        nullptr, GV->getThreadLocalMode());
1324    NGV->copyAttributesFrom(GV);
1325    FieldGlobals.push_back(NGV);
1326
1327    unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
1328    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1329      TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
1330    Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1331    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1332                                        ConstantInt::get(IntPtrTy, TypeSize),
1333                                        NElems, OpBundles, nullptr,
1334                                        CI->getName() + ".f" + Twine(FieldNo));
1335    FieldMallocs.push_back(NMI);
1336    new StoreInst(NMI, NGV, CI);
1337  }
1338
1339  // The tricky aspect of this transformation is handling the case when malloc
1340  // fails.  In the original code, malloc failing would set the result pointer
1341  // of malloc to null.  In this case, some mallocs could succeed and others
1342  // could fail.  As such, we emit code that looks like this:
1343  //    F0 = malloc(field0)
1344  //    F1 = malloc(field1)
1345  //    F2 = malloc(field2)
1346  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1347  //      if (F0) { free(F0); F0 = 0; }
1348  //      if (F1) { free(F1); F1 = 0; }
1349  //      if (F2) { free(F2); F2 = 0; }
1350  //    }
1351  // The malloc can also fail if its argument is too large.
1352  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1353  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1354                                  ConstantZero, "isneg");
1355  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1356    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1357                             Constant::getNullValue(FieldMallocs[i]->getType()),
1358                               "isnull");
1359    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1360  }
1361
1362  // Split the basic block at the old malloc.
1363  BasicBlock *OrigBB = CI->getParent();
1364  BasicBlock *ContBB =
1365      OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
1366
1367  // Create the block to check the first condition.  Put all these blocks at the
1368  // end of the function as they are unlikely to be executed.
1369  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1370                                                "malloc_ret_null",
1371                                                OrigBB->getParent());
1372
1373  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1374  // branch on RunningOr.
1375  OrigBB->getTerminator()->eraseFromParent();
1376  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1377
1378  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1379  // pointer, because some may be null while others are not.
1380  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1381    Value *GVVal =
1382        new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
1383                     FieldGlobals[i], "tmp", NullPtrBlock);
1384    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1385                              Constant::getNullValue(GVVal->getType()));
1386    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1387                                               OrigBB->getParent());
1388    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1389                                               OrigBB->getParent());
1390    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1391                                         Cmp, NullPtrBlock);
1392
1393    // Fill in FreeBlock.
1394    CallInst::CreateFree(GVVal, OpBundles, BI);
1395    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1396                  FreeBlock);
1397    BranchInst::Create(NextBlock, FreeBlock);
1398
1399    NullPtrBlock = NextBlock;
1400  }
1401
1402  BranchInst::Create(ContBB, NullPtrBlock);
1403
1404  // CI is no longer needed, remove it.
1405  CI->eraseFromParent();
1406
1407  /// As we process loads, if we can't immediately update all uses of the load,
1408  /// keep track of what scalarized loads are inserted for a given load.
1409  DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
1410  InsertedScalarizedValues[GV] = FieldGlobals;
1411
1412  std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
1413
1414  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1415  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1416  // of the per-field globals instead.
1417  for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
1418    Instruction *User = cast<Instruction>(*UI++);
1419
1420    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1421      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1422      continue;
1423    }
1424
1425    // Must be a store of null.
1426    StoreInst *SI = cast<StoreInst>(User);
1427    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1428           "Unexpected heap-sra user!");
1429
1430    // Insert a store of null into each global.
1431    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1432      Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
1433      Constant *Null = Constant::getNullValue(ValTy);
1434      new StoreInst(Null, FieldGlobals[i], SI);
1435    }
1436    // Erase the original store.
1437    SI->eraseFromParent();
1438  }
1439
1440  // While we have PHIs that are interesting to rewrite, do it.
1441  while (!PHIsToRewrite.empty()) {
1442    PHINode *PN = PHIsToRewrite.back().first;
1443    unsigned FieldNo = PHIsToRewrite.back().second;
1444    PHIsToRewrite.pop_back();
1445    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1446    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1447
1448    // Add all the incoming values.  This can materialize more phis.
1449    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1450      Value *InVal = PN->getIncomingValue(i);
1451      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1452                               PHIsToRewrite);
1453      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1454    }
1455  }
1456
1457  // Drop all inter-phi links and any loads that made it this far.
1458  for (DenseMap<Value *, std::vector<Value *>>::iterator
1459       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1460       I != E; ++I) {
1461    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1462      PN->dropAllReferences();
1463    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1464      LI->dropAllReferences();
1465  }
1466
1467  // Delete all the phis and loads now that inter-references are dead.
1468  for (DenseMap<Value *, std::vector<Value *>>::iterator
1469       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1470       I != E; ++I) {
1471    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1472      PN->eraseFromParent();
1473    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1474      LI->eraseFromParent();
1475  }
1476
1477  // The old global is now dead, remove it.
1478  GV->eraseFromParent();
1479
1480  ++NumHeapSRA;
1481  return cast<GlobalVariable>(FieldGlobals[0]);
1482}
1483
1484/// This function is called when we see a pointer global variable with a single
1485/// value stored it that is a malloc or cast of malloc.
1486static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
1487                                               Type *AllocTy,
1488                                               AtomicOrdering Ordering,
1489                                               const DataLayout &DL,
1490                                               TargetLibraryInfo *TLI) {
1491  // If this is a malloc of an abstract type, don't touch it.
1492  if (!AllocTy->isSized())
1493    return false;
1494
1495  // We can't optimize this global unless all uses of it are *known* to be
1496  // of the malloc value, not of the null initializer value (consider a use
1497  // that compares the global's value against zero to see if the malloc has
1498  // been reached).  To do this, we check to see if all uses of the global
1499  // would trap if the global were null: this proves that they must all
1500  // happen after the malloc.
1501  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1502    return false;
1503
1504  // We can't optimize this if the malloc itself is used in a complex way,
1505  // for example, being stored into multiple globals.  This allows the
1506  // malloc to be stored into the specified global, loaded icmp'd, and
1507  // GEP'd.  These are all things we could transform to using the global
1508  // for.
1509  SmallPtrSet<const PHINode*, 8> PHIs;
1510  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1511    return false;
1512
1513  // If we have a global that is only initialized with a fixed size malloc,
1514  // transform the program to use global memory instead of malloc'd memory.
1515  // This eliminates dynamic allocation, avoids an indirection accessing the
1516  // data, and exposes the resultant global to further GlobalOpt.
1517  // We cannot optimize the malloc if we cannot determine malloc array size.
1518  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1519  if (!NElems)
1520    return false;
1521
1522  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1523    // Restrict this transformation to only working on small allocations
1524    // (2048 bytes currently), as we don't want to introduce a 16M global or
1525    // something.
1526    if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
1527      OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1528      return true;
1529    }
1530
1531  // If the allocation is an array of structures, consider transforming this
1532  // into multiple malloc'd arrays, one for each field.  This is basically
1533  // SRoA for malloc'd memory.
1534
1535  if (Ordering != AtomicOrdering::NotAtomic)
1536    return false;
1537
1538  // If this is an allocation of a fixed size array of structs, analyze as a
1539  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1540  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1541    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1542      AllocTy = AT->getElementType();
1543
1544  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1545  if (!AllocSTy)
1546    return false;
1547
1548  // This the structure has an unreasonable number of fields, leave it
1549  // alone.
1550  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1551      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1552
1553    // If this is a fixed size array, transform the Malloc to be an alloc of
1554    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1555    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1556      Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1557      unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
1558      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1559      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1560      SmallVector<OperandBundleDef, 1> OpBundles;
1561      CI->getOperandBundlesAsDefs(OpBundles);
1562      Instruction *Malloc =
1563          CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
1564                                 OpBundles, nullptr, CI->getName());
1565      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1566      CI->replaceAllUsesWith(Cast);
1567      CI->eraseFromParent();
1568      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1569        CI = cast<CallInst>(BCI->getOperand(0));
1570      else
1571        CI = cast<CallInst>(Malloc);
1572    }
1573
1574    PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
1575                         TLI);
1576    return true;
1577  }
1578
1579  return false;
1580}
1581
1582// Try to optimize globals based on the knowledge that only one value (besides
1583// its initializer) is ever stored to the global.
1584static bool
1585optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1586                         AtomicOrdering Ordering, const DataLayout &DL,
1587                         function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1588  // Ignore no-op GEPs and bitcasts.
1589  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1590
1591  // If we are dealing with a pointer global that is initialized to null and
1592  // only has one (non-null) value stored into it, then we can optimize any
1593  // users of the loaded value (often calls and loads) that would trap if the
1594  // value was null.
1595  if (GV->getInitializer()->getType()->isPointerTy() &&
1596      GV->getInitializer()->isNullValue() &&
1597      !NullPointerIsDefined(
1598          nullptr /* F */,
1599          GV->getInitializer()->getType()->getPointerAddressSpace())) {
1600    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1601      if (GV->getInitializer()->getType() != SOVC->getType())
1602        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1603
1604      // Optimize away any trapping uses of the loaded value.
1605      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1606        return true;
1607    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) {
1608      auto *TLI = &GetTLI(*CI->getFunction());
1609      Type *MallocType = getMallocAllocatedType(CI, TLI);
1610      if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1611                                                           Ordering, DL, TLI))
1612        return true;
1613    }
1614  }
1615
1616  return false;
1617}
1618
1619/// At this point, we have learned that the only two values ever stored into GV
1620/// are its initializer and OtherVal.  See if we can shrink the global into a
1621/// boolean and select between the two values whenever it is used.  This exposes
1622/// the values to other scalar optimizations.
1623static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1624  Type *GVElType = GV->getValueType();
1625
1626  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1627  // an FP value, pointer or vector, don't do this optimization because a select
1628  // between them is very expensive and unlikely to lead to later
1629  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1630  // where v1 and v2 both require constant pool loads, a big loss.
1631  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1632      GVElType->isFloatingPointTy() ||
1633      GVElType->isPointerTy() || GVElType->isVectorTy())
1634    return false;
1635
1636  // Walk the use list of the global seeing if all the uses are load or store.
1637  // If there is anything else, bail out.
1638  for (User *U : GV->users())
1639    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1640      return false;
1641
1642  LLVM_DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV << "\n");
1643
1644  // Create the new global, initializing it to false.
1645  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1646                                             false,
1647                                             GlobalValue::InternalLinkage,
1648                                        ConstantInt::getFalse(GV->getContext()),
1649                                             GV->getName()+".b",
1650                                             GV->getThreadLocalMode(),
1651                                             GV->getType()->getAddressSpace());
1652  NewGV->copyAttributesFrom(GV);
1653  GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1654
1655  Constant *InitVal = GV->getInitializer();
1656  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1657         "No reason to shrink to bool!");
1658
1659  SmallVector<DIGlobalVariableExpression *, 1> GVs;
1660  GV->getDebugInfo(GVs);
1661
1662  // If initialized to zero and storing one into the global, we can use a cast
1663  // instead of a select to synthesize the desired value.
1664  bool IsOneZero = false;
1665  bool EmitOneOrZero = true;
1666  auto *CI = dyn_cast<ConstantInt>(OtherVal);
1667  if (CI && CI->getValue().getActiveBits() <= 64) {
1668    IsOneZero = InitVal->isNullValue() && CI->isOne();
1669
1670    auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1671    if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1672      uint64_t ValInit = CIInit->getZExtValue();
1673      uint64_t ValOther = CI->getZExtValue();
1674      uint64_t ValMinus = ValOther - ValInit;
1675
1676      for(auto *GVe : GVs){
1677        DIGlobalVariable *DGV = GVe->getVariable();
1678        DIExpression *E = GVe->getExpression();
1679        const DataLayout &DL = GV->getParent()->getDataLayout();
1680        unsigned SizeInOctets =
1681          DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
1682
1683        // It is expected that the address of global optimized variable is on
1684        // top of the stack. After optimization, value of that variable will
1685        // be ether 0 for initial value or 1 for other value. The following
1686        // expression should return constant integer value depending on the
1687        // value at global object address:
1688        // val * (ValOther - ValInit) + ValInit:
1689        // DW_OP_deref DW_OP_constu <ValMinus>
1690        // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1691        SmallVector<uint64_t, 12> Ops = {
1692            dwarf::DW_OP_deref_size, SizeInOctets,
1693            dwarf::DW_OP_constu, ValMinus,
1694            dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1695            dwarf::DW_OP_plus};
1696        bool WithStackValue = true;
1697        E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1698        DIGlobalVariableExpression *DGVE =
1699          DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1700        NewGV->addDebugInfo(DGVE);
1701     }
1702     EmitOneOrZero = false;
1703    }
1704  }
1705
1706  if (EmitOneOrZero) {
1707     // FIXME: This will only emit address for debugger on which will
1708     // be written only 0 or 1.
1709     for(auto *GV : GVs)
1710       NewGV->addDebugInfo(GV);
1711   }
1712
1713  while (!GV->use_empty()) {
1714    Instruction *UI = cast<Instruction>(GV->user_back());
1715    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1716      // Change the store into a boolean store.
1717      bool StoringOther = SI->getOperand(0) == OtherVal;
1718      // Only do this if we weren't storing a loaded value.
1719      Value *StoreVal;
1720      if (StoringOther || SI->getOperand(0) == InitVal) {
1721        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1722                                    StoringOther);
1723      } else {
1724        // Otherwise, we are storing a previously loaded copy.  To do this,
1725        // change the copy from copying the original value to just copying the
1726        // bool.
1727        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1728
1729        // If we've already replaced the input, StoredVal will be a cast or
1730        // select instruction.  If not, it will be a load of the original
1731        // global.
1732        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1733          assert(LI->getOperand(0) == GV && "Not a copy!");
1734          // Insert a new load, to preserve the saved value.
1735          StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1736                                  LI->getName() + ".b", false, None,
1737                                  LI->getOrdering(), LI->getSyncScopeID(), LI);
1738        } else {
1739          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1740                 "This is not a form that we understand!");
1741          StoreVal = StoredVal->getOperand(0);
1742          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1743        }
1744      }
1745      StoreInst *NSI =
1746          new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(),
1747                        SI->getSyncScopeID(), SI);
1748      NSI->setDebugLoc(SI->getDebugLoc());
1749    } else {
1750      // Change the load into a load of bool then a select.
1751      LoadInst *LI = cast<LoadInst>(UI);
1752      LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
1753                                   LI->getName() + ".b", false, None,
1754                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
1755      Instruction *NSI;
1756      if (IsOneZero)
1757        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1758      else
1759        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1760      NSI->takeName(LI);
1761      // Since LI is split into two instructions, NLI and NSI both inherit the
1762      // same DebugLoc
1763      NLI->setDebugLoc(LI->getDebugLoc());
1764      NSI->setDebugLoc(LI->getDebugLoc());
1765      LI->replaceAllUsesWith(NSI);
1766    }
1767    UI->eraseFromParent();
1768  }
1769
1770  // Retain the name of the old global variable. People who are debugging their
1771  // programs may expect these variables to be named the same.
1772  NewGV->takeName(GV);
1773  GV->eraseFromParent();
1774  return true;
1775}
1776
1777static bool deleteIfDead(
1778    GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1779  GV.removeDeadConstantUsers();
1780
1781  if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1782    return false;
1783
1784  if (const Comdat *C = GV.getComdat())
1785    if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1786      return false;
1787
1788  bool Dead;
1789  if (auto *F = dyn_cast<Function>(&GV))
1790    Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1791  else
1792    Dead = GV.use_empty();
1793  if (!Dead)
1794    return false;
1795
1796  LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1797  GV.eraseFromParent();
1798  ++NumDeleted;
1799  return true;
1800}
1801
1802static bool isPointerValueDeadOnEntryToFunction(
1803    const Function *F, GlobalValue *GV,
1804    function_ref<DominatorTree &(Function &)> LookupDomTree) {
1805  // Find all uses of GV. We expect them all to be in F, and if we can't
1806  // identify any of the uses we bail out.
1807  //
1808  // On each of these uses, identify if the memory that GV points to is
1809  // used/required/live at the start of the function. If it is not, for example
1810  // if the first thing the function does is store to the GV, the GV can
1811  // possibly be demoted.
1812  //
1813  // We don't do an exhaustive search for memory operations - simply look
1814  // through bitcasts as they're quite common and benign.
1815  const DataLayout &DL = GV->getParent()->getDataLayout();
1816  SmallVector<LoadInst *, 4> Loads;
1817  SmallVector<StoreInst *, 4> Stores;
1818  for (auto *U : GV->users()) {
1819    if (Operator::getOpcode(U) == Instruction::BitCast) {
1820      for (auto *UU : U->users()) {
1821        if (auto *LI = dyn_cast<LoadInst>(UU))
1822          Loads.push_back(LI);
1823        else if (auto *SI = dyn_cast<StoreInst>(UU))
1824          Stores.push_back(SI);
1825        else
1826          return false;
1827      }
1828      continue;
1829    }
1830
1831    Instruction *I = dyn_cast<Instruction>(U);
1832    if (!I)
1833      return false;
1834    assert(I->getParent()->getParent() == F);
1835
1836    if (auto *LI = dyn_cast<LoadInst>(I))
1837      Loads.push_back(LI);
1838    else if (auto *SI = dyn_cast<StoreInst>(I))
1839      Stores.push_back(SI);
1840    else
1841      return false;
1842  }
1843
1844  // We have identified all uses of GV into loads and stores. Now check if all
1845  // of them are known not to depend on the value of the global at the function
1846  // entry point. We do this by ensuring that every load is dominated by at
1847  // least one store.
1848  auto &DT = LookupDomTree(*const_cast<Function *>(F));
1849
1850  // The below check is quadratic. Check we're not going to do too many tests.
1851  // FIXME: Even though this will always have worst-case quadratic time, we
1852  // could put effort into minimizing the average time by putting stores that
1853  // have been shown to dominate at least one load at the beginning of the
1854  // Stores array, making subsequent dominance checks more likely to succeed
1855  // early.
1856  //
1857  // The threshold here is fairly large because global->local demotion is a
1858  // very powerful optimization should it fire.
1859  const unsigned Threshold = 100;
1860  if (Loads.size() * Stores.size() > Threshold)
1861    return false;
1862
1863  for (auto *L : Loads) {
1864    auto *LTy = L->getType();
1865    if (none_of(Stores, [&](const StoreInst *S) {
1866          auto *STy = S->getValueOperand()->getType();
1867          // The load is only dominated by the store if DomTree says so
1868          // and the number of bits loaded in L is less than or equal to
1869          // the number of bits stored in S.
1870          return DT.dominates(S, L) &&
1871                 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
1872        }))
1873      return false;
1874  }
1875  // All loads have known dependences inside F, so the global can be localized.
1876  return true;
1877}
1878
1879/// C may have non-instruction users. Can all of those users be turned into
1880/// instructions?
1881static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1882  // We don't do this exhaustively. The most common pattern that we really need
1883  // to care about is a constant GEP or constant bitcast - so just looking
1884  // through one single ConstantExpr.
1885  //
1886  // The set of constants that this function returns true for must be able to be
1887  // handled by makeAllConstantUsesInstructions.
1888  for (auto *U : C->users()) {
1889    if (isa<Instruction>(U))
1890      continue;
1891    if (!isa<ConstantExpr>(U))
1892      // Non instruction, non-constantexpr user; cannot convert this.
1893      return false;
1894    for (auto *UU : U->users())
1895      if (!isa<Instruction>(UU))
1896        // A constantexpr used by another constant. We don't try and recurse any
1897        // further but just bail out at this point.
1898        return false;
1899  }
1900
1901  return true;
1902}
1903
1904/// C may have non-instruction users, and
1905/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1906/// non-instruction users to instructions.
1907static void makeAllConstantUsesInstructions(Constant *C) {
1908  SmallVector<ConstantExpr*,4> Users;
1909  for (auto *U : C->users()) {
1910    if (isa<ConstantExpr>(U))
1911      Users.push_back(cast<ConstantExpr>(U));
1912    else
1913      // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1914      // should not have returned true for C.
1915      assert(
1916          isa<Instruction>(U) &&
1917          "Can't transform non-constantexpr non-instruction to instruction!");
1918  }
1919
1920  SmallVector<Value*,4> UUsers;
1921  for (auto *U : Users) {
1922    UUsers.clear();
1923    for (auto *UU : U->users())
1924      UUsers.push_back(UU);
1925    for (auto *UU : UUsers) {
1926      Instruction *UI = cast<Instruction>(UU);
1927      Instruction *NewU = U->getAsInstruction();
1928      NewU->insertBefore(UI);
1929      UI->replaceUsesOfWith(U, NewU);
1930    }
1931    // We've replaced all the uses, so destroy the constant. (destroyConstant
1932    // will update value handles and metadata.)
1933    U->destroyConstant();
1934  }
1935}
1936
1937/// Analyze the specified global variable and optimize
1938/// it if possible.  If we make a change, return true.
1939static bool
1940processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1941                      function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1942                      function_ref<DominatorTree &(Function &)> LookupDomTree) {
1943  auto &DL = GV->getParent()->getDataLayout();
1944  // If this is a first class global and has only one accessing function and
1945  // this function is non-recursive, we replace the global with a local alloca
1946  // in this function.
1947  //
1948  // NOTE: It doesn't make sense to promote non-single-value types since we
1949  // are just replacing static memory to stack memory.
1950  //
1951  // If the global is in different address space, don't bring it to stack.
1952  if (!GS.HasMultipleAccessingFunctions &&
1953      GS.AccessingFunction &&
1954      GV->getValueType()->isSingleValueType() &&
1955      GV->getType()->getAddressSpace() == 0 &&
1956      !GV->isExternallyInitialized() &&
1957      allNonInstructionUsersCanBeMadeInstructions(GV) &&
1958      GS.AccessingFunction->doesNotRecurse() &&
1959      isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1960                                          LookupDomTree)) {
1961    const DataLayout &DL = GV->getParent()->getDataLayout();
1962
1963    LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1964    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1965                                                   ->getEntryBlock().begin());
1966    Type *ElemTy = GV->getValueType();
1967    // FIXME: Pass Global's alignment when globals have alignment
1968    AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1969                                        GV->getName(), &FirstI);
1970    if (!isa<UndefValue>(GV->getInitializer()))
1971      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1972
1973    makeAllConstantUsesInstructions(GV);
1974
1975    GV->replaceAllUsesWith(Alloca);
1976    GV->eraseFromParent();
1977    ++NumLocalized;
1978    return true;
1979  }
1980
1981  // If the global is never loaded (but may be stored to), it is dead.
1982  // Delete it now.
1983  if (!GS.IsLoaded) {
1984    LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1985
1986    bool Changed;
1987    if (isLeakCheckerRoot(GV)) {
1988      // Delete any constant stores to the global.
1989      Changed = CleanupPointerRootUsers(GV, GetTLI);
1990    } else {
1991      // Delete any stores we can find to the global.  We may not be able to
1992      // make it completely dead though.
1993      Changed =
1994          CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
1995    }
1996
1997    // If the global is dead now, delete it.
1998    if (GV->use_empty()) {
1999      GV->eraseFromParent();
2000      ++NumDeleted;
2001      Changed = true;
2002    }
2003    return Changed;
2004
2005  }
2006  if (GS.StoredType <= GlobalStatus::InitializerStored) {
2007    LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
2008
2009    // Don't actually mark a global constant if it's atomic because atomic loads
2010    // are implemented by a trivial cmpxchg in some edge-cases and that usually
2011    // requires write access to the variable even if it's not actually changed.
2012    if (GS.Ordering == AtomicOrdering::NotAtomic)
2013      GV->setConstant(true);
2014
2015    // Clean up any obviously simplifiable users now.
2016    CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
2017
2018    // If the global is dead now, just nuke it.
2019    if (GV->use_empty()) {
2020      LLVM_DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
2021                        << "all users and delete global!\n");
2022      GV->eraseFromParent();
2023      ++NumDeleted;
2024      return true;
2025    }
2026
2027    // Fall through to the next check; see if we can optimize further.
2028    ++NumMarked;
2029  }
2030  if (!GV->getInitializer()->getType()->isSingleValueType()) {
2031    const DataLayout &DL = GV->getParent()->getDataLayout();
2032    if (SRAGlobal(GV, DL))
2033      return true;
2034  }
2035  if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
2036    // If the initial value for the global was an undef value, and if only
2037    // one other value was stored into it, we can just change the
2038    // initializer to be the stored value, then delete all stores to the
2039    // global.  This allows us to mark it constant.
2040    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
2041      if (isa<UndefValue>(GV->getInitializer())) {
2042        // Change the initial value here.
2043        GV->setInitializer(SOVConstant);
2044
2045        // Clean up any obviously simplifiable users now.
2046        CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
2047
2048        if (GV->use_empty()) {
2049          LLVM_DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
2050                            << "simplify all users and delete global!\n");
2051          GV->eraseFromParent();
2052          ++NumDeleted;
2053        }
2054        ++NumSubstitute;
2055        return true;
2056      }
2057
2058    // Try to optimize globals based on the knowledge that only one value
2059    // (besides its initializer) is ever stored to the global.
2060    if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
2061                                 GetTLI))
2062      return true;
2063
2064    // Otherwise, if the global was not a boolean, we can shrink it to be a
2065    // boolean.
2066    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
2067      if (GS.Ordering == AtomicOrdering::NotAtomic) {
2068        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
2069          ++NumShrunkToBool;
2070          return true;
2071        }
2072      }
2073    }
2074  }
2075
2076  return false;
2077}
2078
2079/// Analyze the specified global variable and optimize it if possible.  If we
2080/// make a change, return true.
2081static bool
2082processGlobal(GlobalValue &GV,
2083              function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2084              function_ref<DominatorTree &(Function &)> LookupDomTree) {
2085  if (GV.getName().startswith("llvm."))
2086    return false;
2087
2088  GlobalStatus GS;
2089
2090  if (GlobalStatus::analyzeGlobal(&GV, GS))
2091    return false;
2092
2093  bool Changed = false;
2094  if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
2095    auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
2096                                               : GlobalValue::UnnamedAddr::Local;
2097    if (NewUnnamedAddr != GV.getUnnamedAddr()) {
2098      GV.setUnnamedAddr(NewUnnamedAddr);
2099      NumUnnamed++;
2100      Changed = true;
2101    }
2102  }
2103
2104  // Do more involved optimizations if the global is internal.
2105  if (!GV.hasLocalLinkage())
2106    return Changed;
2107
2108  auto *GVar = dyn_cast<GlobalVariable>(&GV);
2109  if (!GVar)
2110    return Changed;
2111
2112  if (GVar->isConstant() || !GVar->hasInitializer())
2113    return Changed;
2114
2115  return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
2116}
2117
2118/// Walk all of the direct calls of the specified function, changing them to
2119/// FastCC.
2120static void ChangeCalleesToFastCall(Function *F) {
2121  for (User *U : F->users()) {
2122    if (isa<BlockAddress>(U))
2123      continue;
2124    CallSite CS(cast<Instruction>(U));
2125    CS.setCallingConv(CallingConv::Fast);
2126  }
2127}
2128
2129static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
2130                               Attribute::AttrKind A) {
2131  unsigned AttrIndex;
2132  if (Attrs.hasAttrSomewhere(A, &AttrIndex))
2133    return Attrs.removeAttribute(C, AttrIndex, A);
2134  return Attrs;
2135}
2136
2137static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
2138  F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
2139  for (User *U : F->users()) {
2140    if (isa<BlockAddress>(U))
2141      continue;
2142    CallSite CS(cast<Instruction>(U));
2143    CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
2144  }
2145}
2146
2147/// Return true if this is a calling convention that we'd like to change.  The
2148/// idea here is that we don't want to mess with the convention if the user
2149/// explicitly requested something with performance implications like coldcc,
2150/// GHC, or anyregcc.
2151static bool hasChangeableCC(Function *F) {
2152  CallingConv::ID CC = F->getCallingConv();
2153
2154  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
2155  if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
2156    return false;
2157
2158  // FIXME: Change CC for the whole chain of musttail calls when possible.
2159  //
2160  // Can't change CC of the function that either has musttail calls, or is a
2161  // musttail callee itself
2162  for (User *U : F->users()) {
2163    if (isa<BlockAddress>(U))
2164      continue;
2165    CallInst* CI = dyn_cast<CallInst>(U);
2166    if (!CI)
2167      continue;
2168
2169    if (CI->isMustTailCall())
2170      return false;
2171  }
2172
2173  for (BasicBlock &BB : *F)
2174    if (BB.getTerminatingMustTailCall())
2175      return false;
2176
2177  return true;
2178}
2179
2180/// Return true if the block containing the call site has a BlockFrequency of
2181/// less than ColdCCRelFreq% of the entry block.
2182static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) {
2183  const BranchProbability ColdProb(ColdCCRelFreq, 100);
2184  auto CallSiteBB = CS.getInstruction()->getParent();
2185  auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
2186  auto CallerEntryFreq =
2187      CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock()));
2188  return CallSiteFreq < CallerEntryFreq * ColdProb;
2189}
2190
2191// This function checks if the input function F is cold at all call sites. It
2192// also looks each call site's containing function, returning false if the
2193// caller function contains other non cold calls. The input vector AllCallsCold
2194// contains a list of functions that only have call sites in cold blocks.
2195static bool
2196isValidCandidateForColdCC(Function &F,
2197                          function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2198                          const std::vector<Function *> &AllCallsCold) {
2199
2200  if (F.user_empty())
2201    return false;
2202
2203  for (User *U : F.users()) {
2204    if (isa<BlockAddress>(U))
2205      continue;
2206
2207    CallSite CS(cast<Instruction>(U));
2208    Function *CallerFunc = CS.getInstruction()->getParent()->getParent();
2209    BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
2210    if (!isColdCallSite(CS, CallerBFI))
2211      return false;
2212    auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
2213    if (It == AllCallsCold.end())
2214      return false;
2215  }
2216  return true;
2217}
2218
2219static void changeCallSitesToColdCC(Function *F) {
2220  for (User *U : F->users()) {
2221    if (isa<BlockAddress>(U))
2222      continue;
2223    CallSite CS(cast<Instruction>(U));
2224    CS.setCallingConv(CallingConv::Cold);
2225  }
2226}
2227
2228// This function iterates over all the call instructions in the input Function
2229// and checks that all call sites are in cold blocks and are allowed to use the
2230// coldcc calling convention.
2231static bool
2232hasOnlyColdCalls(Function &F,
2233                 function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
2234  for (BasicBlock &BB : F) {
2235    for (Instruction &I : BB) {
2236      if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2237        CallSite CS(cast<Instruction>(CI));
2238        // Skip over isline asm instructions since they aren't function calls.
2239        if (CI->isInlineAsm())
2240          continue;
2241        Function *CalledFn = CI->getCalledFunction();
2242        if (!CalledFn)
2243          return false;
2244        if (!CalledFn->hasLocalLinkage())
2245          return false;
2246        // Skip over instrinsics since they won't remain as function calls.
2247        if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
2248          continue;
2249        // Check if it's valid to use coldcc calling convention.
2250        if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
2251            CalledFn->hasAddressTaken())
2252          return false;
2253        BlockFrequencyInfo &CallerBFI = GetBFI(F);
2254        if (!isColdCallSite(CS, CallerBFI))
2255          return false;
2256      }
2257    }
2258  }
2259  return true;
2260}
2261
2262static bool
2263OptimizeFunctions(Module &M,
2264                  function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2265                  function_ref<TargetTransformInfo &(Function &)> GetTTI,
2266                  function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2267                  function_ref<DominatorTree &(Function &)> LookupDomTree,
2268                  SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2269
2270  bool Changed = false;
2271
2272  std::vector<Function *> AllCallsCold;
2273  for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
2274    Function *F = &*FI++;
2275    if (hasOnlyColdCalls(*F, GetBFI))
2276      AllCallsCold.push_back(F);
2277  }
2278
2279  // Optimize functions.
2280  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
2281    Function *F = &*FI++;
2282
2283    // Don't perform global opt pass on naked functions; we don't want fast
2284    // calling conventions for naked functions.
2285    if (F->hasFnAttribute(Attribute::Naked))
2286      continue;
2287
2288    // Functions without names cannot be referenced outside this module.
2289    if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
2290      F->setLinkage(GlobalValue::InternalLinkage);
2291
2292    if (deleteIfDead(*F, NotDiscardableComdats)) {
2293      Changed = true;
2294      continue;
2295    }
2296
2297    // LLVM's definition of dominance allows instructions that are cyclic
2298    // in unreachable blocks, e.g.:
2299    // %pat = select i1 %condition, @global, i16* %pat
2300    // because any instruction dominates an instruction in a block that's
2301    // not reachable from entry.
2302    // So, remove unreachable blocks from the function, because a) there's
2303    // no point in analyzing them and b) GlobalOpt should otherwise grow
2304    // some more complicated logic to break these cycles.
2305    // Removing unreachable blocks might invalidate the dominator so we
2306    // recalculate it.
2307    if (!F->isDeclaration()) {
2308      if (removeUnreachableBlocks(*F)) {
2309        auto &DT = LookupDomTree(*F);
2310        DT.recalculate(*F);
2311        Changed = true;
2312      }
2313    }
2314
2315    Changed |= processGlobal(*F, GetTLI, LookupDomTree);
2316
2317    if (!F->hasLocalLinkage())
2318      continue;
2319
2320    // If we have an inalloca parameter that we can safely remove the
2321    // inalloca attribute from, do so. This unlocks optimizations that
2322    // wouldn't be safe in the presence of inalloca.
2323    // FIXME: We should also hoist alloca affected by this to the entry
2324    // block if possible.
2325    if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
2326        !F->hasAddressTaken()) {
2327      RemoveAttribute(F, Attribute::InAlloca);
2328      Changed = true;
2329    }
2330
2331    if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
2332      NumInternalFunc++;
2333      TargetTransformInfo &TTI = GetTTI(*F);
2334      // Change the calling convention to coldcc if either stress testing is
2335      // enabled or the target would like to use coldcc on functions which are
2336      // cold at all call sites and the callers contain no other non coldcc
2337      // calls.
2338      if (EnableColdCCStressTest ||
2339          (TTI.useColdCCForColdCall(*F) &&
2340           isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
2341        F->setCallingConv(CallingConv::Cold);
2342        changeCallSitesToColdCC(F);
2343        Changed = true;
2344        NumColdCC++;
2345      }
2346    }
2347
2348    if (hasChangeableCC(F) && !F->isVarArg() &&
2349        !F->hasAddressTaken()) {
2350      // If this function has a calling convention worth changing, is not a
2351      // varargs function, and is only called directly, promote it to use the
2352      // Fast calling convention.
2353      F->setCallingConv(CallingConv::Fast);
2354      ChangeCalleesToFastCall(F);
2355      ++NumFastCallFns;
2356      Changed = true;
2357    }
2358
2359    if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2360        !F->hasAddressTaken()) {
2361      // The function is not used by a trampoline intrinsic, so it is safe
2362      // to remove the 'nest' attribute.
2363      RemoveAttribute(F, Attribute::Nest);
2364      ++NumNestRemoved;
2365      Changed = true;
2366    }
2367  }
2368  return Changed;
2369}
2370
2371static bool
2372OptimizeGlobalVars(Module &M,
2373                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2374                   function_ref<DominatorTree &(Function &)> LookupDomTree,
2375                   SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2376  bool Changed = false;
2377
2378  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2379       GVI != E; ) {
2380    GlobalVariable *GV = &*GVI++;
2381    // Global variables without names cannot be referenced outside this module.
2382    if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
2383      GV->setLinkage(GlobalValue::InternalLinkage);
2384    // Simplify the initializer.
2385    if (GV->hasInitializer())
2386      if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
2387        auto &DL = M.getDataLayout();
2388        // TLI is not used in the case of a Constant, so use default nullptr
2389        // for that optional parameter, since we don't have a Function to
2390        // provide GetTLI anyway.
2391        Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2392        if (New && New != C)
2393          GV->setInitializer(New);
2394      }
2395
2396    if (deleteIfDead(*GV, NotDiscardableComdats)) {
2397      Changed = true;
2398      continue;
2399    }
2400
2401    Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
2402  }
2403  return Changed;
2404}
2405
2406/// Evaluate a piece of a constantexpr store into a global initializer.  This
2407/// returns 'Init' modified to reflect 'Val' stored into it.  At this point, the
2408/// GEP operands of Addr [0, OpNo) have been stepped into.
2409static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2410                                   ConstantExpr *Addr, unsigned OpNo) {
2411  // Base case of the recursion.
2412  if (OpNo == Addr->getNumOperands()) {
2413    assert(Val->getType() == Init->getType() && "Type mismatch!");
2414    return Val;
2415  }
2416
2417  SmallVector<Constant*, 32> Elts;
2418  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2419    // Break up the constant into its elements.
2420    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2421      Elts.push_back(Init->getAggregateElement(i));
2422
2423    // Replace the element that we are supposed to.
2424    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2425    unsigned Idx = CU->getZExtValue();
2426    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2427    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2428
2429    // Return the modified struct.
2430    return ConstantStruct::get(STy, Elts);
2431  }
2432
2433  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2434  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2435  uint64_t NumElts = InitTy->getNumElements();
2436
2437  // Break up the array into elements.
2438  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2439    Elts.push_back(Init->getAggregateElement(i));
2440
2441  assert(CI->getZExtValue() < NumElts);
2442  Elts[CI->getZExtValue()] =
2443    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2444
2445  if (Init->getType()->isArrayTy())
2446    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2447  return ConstantVector::get(Elts);
2448}
2449
2450/// We have decided that Addr (which satisfies the predicate
2451/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2452static void CommitValueTo(Constant *Val, Constant *Addr) {
2453  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2454    assert(GV->hasInitializer());
2455    GV->setInitializer(Val);
2456    return;
2457  }
2458
2459  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2460  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2461  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2462}
2463
2464/// Given a map of address -> value, where addresses are expected to be some form
2465/// of either a global or a constant GEP, set the initializer for the address to
2466/// be the value. This performs mostly the same function as CommitValueTo()
2467/// and EvaluateStoreInto() but is optimized to be more efficient for the common
2468/// case where the set of addresses are GEPs sharing the same underlying global,
2469/// processing the GEPs in batches rather than individually.
2470///
2471/// To give an example, consider the following C++ code adapted from the clang
2472/// regression tests:
2473/// struct S {
2474///  int n = 10;
2475///  int m = 2 * n;
2476///  S(int a) : n(a) {}
2477/// };
2478///
2479/// template<typename T>
2480/// struct U {
2481///  T *r = &q;
2482///  T q = 42;
2483///  U *p = this;
2484/// };
2485///
2486/// U<S> e;
2487///
2488/// The global static constructor for 'e' will need to initialize 'r' and 'p' of
2489/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm'
2490/// members. This batch algorithm will simply use general CommitValueTo() method
2491/// to handle the complex nested S struct initialization of 'q', before
2492/// processing the outermost members in a single batch. Using CommitValueTo() to
2493/// handle member in the outer struct is inefficient when the struct/array is
2494/// very large as we end up creating and destroy constant arrays for each
2495/// initialization.
2496/// For the above case, we expect the following IR to be generated:
2497///
2498/// %struct.U = type { %struct.S*, %struct.S, %struct.U* }
2499/// %struct.S = type { i32, i32 }
2500/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e,
2501///                                                  i64 0, i32 1),
2502///                         %struct.S { i32 42, i32 84 }, %struct.U* @e }
2503/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex
2504/// constant expression, while the other two elements of @e are "simple".
2505static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) {
2506  SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs;
2507  SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs;
2508  SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs;
2509  SimpleCEs.reserve(Mem.size());
2510
2511  for (const auto &I : Mem) {
2512    if (auto *GV = dyn_cast<GlobalVariable>(I.first)) {
2513      GVs.push_back(std::make_pair(GV, I.second));
2514    } else {
2515      ConstantExpr *GEP = cast<ConstantExpr>(I.first);
2516      // We don't handle the deeply recursive case using the batch method.
2517      if (GEP->getNumOperands() > 3)
2518        ComplexCEs.push_back(std::make_pair(GEP, I.second));
2519      else
2520        SimpleCEs.push_back(std::make_pair(GEP, I.second));
2521    }
2522  }
2523
2524  // The algorithm below doesn't handle cases like nested structs, so use the
2525  // slower fully general method if we have to.
2526  for (auto ComplexCE : ComplexCEs)
2527    CommitValueTo(ComplexCE.second, ComplexCE.first);
2528
2529  for (auto GVPair : GVs) {
2530    assert(GVPair.first->hasInitializer());
2531    GVPair.first->setInitializer(GVPair.second);
2532  }
2533
2534  if (SimpleCEs.empty())
2535    return;
2536
2537  // We cache a single global's initializer elements in the case where the
2538  // subsequent address/val pair uses the same one. This avoids throwing away and
2539  // rebuilding the constant struct/vector/array just because one element is
2540  // modified at a time.
2541  SmallVector<Constant *, 32> Elts;
2542  Elts.reserve(SimpleCEs.size());
2543  GlobalVariable *CurrentGV = nullptr;
2544
2545  auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) {
2546    Constant *Init = GV->getInitializer();
2547    Type *Ty = Init->getType();
2548    if (Update) {
2549      if (CurrentGV) {
2550        assert(CurrentGV && "Expected a GV to commit to!");
2551        Type *CurrentInitTy = CurrentGV->getInitializer()->getType();
2552        // We have a valid cache that needs to be committed.
2553        if (StructType *STy = dyn_cast<StructType>(CurrentInitTy))
2554          CurrentGV->setInitializer(ConstantStruct::get(STy, Elts));
2555        else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy))
2556          CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts));
2557        else
2558          CurrentGV->setInitializer(ConstantVector::get(Elts));
2559      }
2560      if (CurrentGV == GV)
2561        return;
2562      // Need to clear and set up cache for new initializer.
2563      CurrentGV = GV;
2564      Elts.clear();
2565      unsigned NumElts;
2566      if (auto *STy = dyn_cast<StructType>(Ty))
2567        NumElts = STy->getNumElements();
2568      else
2569        NumElts = cast<SequentialType>(Ty)->getNumElements();
2570      for (unsigned i = 0, e = NumElts; i != e; ++i)
2571        Elts.push_back(Init->getAggregateElement(i));
2572    }
2573  };
2574
2575  for (auto CEPair : SimpleCEs) {
2576    ConstantExpr *GEP = CEPair.first;
2577    Constant *Val = CEPair.second;
2578
2579    GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0));
2580    commitAndSetupCache(GV, GV != CurrentGV);
2581    ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2));
2582    Elts[CI->getZExtValue()] = Val;
2583  }
2584  // The last initializer in the list needs to be committed, others
2585  // will be committed on a new initializer being processed.
2586  commitAndSetupCache(CurrentGV, true);
2587}
2588
2589/// Evaluate static constructors in the function, if we can.  Return true if we
2590/// can, false otherwise.
2591static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2592                                      TargetLibraryInfo *TLI) {
2593  // Call the function.
2594  Evaluator Eval(DL, TLI);
2595  Constant *RetValDummy;
2596  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2597                                           SmallVector<Constant*, 0>());
2598
2599  if (EvalSuccess) {
2600    ++NumCtorsEvaluated;
2601
2602    // We succeeded at evaluation: commit the result.
2603    LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2604                      << F->getName() << "' to "
2605                      << Eval.getMutatedMemory().size() << " stores.\n");
2606    BatchCommitValueTo(Eval.getMutatedMemory());
2607    for (GlobalVariable *GV : Eval.getInvariants())
2608      GV->setConstant(true);
2609  }
2610
2611  return EvalSuccess;
2612}
2613
2614static int compareNames(Constant *const *A, Constant *const *B) {
2615  Value *AStripped = (*A)->stripPointerCasts();
2616  Value *BStripped = (*B)->stripPointerCasts();
2617  return AStripped->getName().compare(BStripped->getName());
2618}
2619
2620static void setUsedInitializer(GlobalVariable &V,
2621                               const SmallPtrSetImpl<GlobalValue *> &Init) {
2622  if (Init.empty()) {
2623    V.eraseFromParent();
2624    return;
2625  }
2626
2627  // Type of pointer to the array of pointers.
2628  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2629
2630  SmallVector<Constant *, 8> UsedArray;
2631  for (GlobalValue *GV : Init) {
2632    Constant *Cast
2633      = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2634    UsedArray.push_back(Cast);
2635  }
2636  // Sort to get deterministic order.
2637  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2638  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2639
2640  Module *M = V.getParent();
2641  V.removeFromParent();
2642  GlobalVariable *NV =
2643      new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2644                         ConstantArray::get(ATy, UsedArray), "");
2645  NV->takeName(&V);
2646  NV->setSection("llvm.metadata");
2647  delete &V;
2648}
2649
2650namespace {
2651
2652/// An easy to access representation of llvm.used and llvm.compiler.used.
2653class LLVMUsed {
2654  SmallPtrSet<GlobalValue *, 8> Used;
2655  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2656  GlobalVariable *UsedV;
2657  GlobalVariable *CompilerUsedV;
2658
2659public:
2660  LLVMUsed(Module &M) {
2661    UsedV = collectUsedGlobalVariables(M, Used, false);
2662    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2663  }
2664
2665  using iterator = SmallPtrSet<GlobalValue *, 8>::iterator;
2666  using used_iterator_range = iterator_range<iterator>;
2667
2668  iterator usedBegin() { return Used.begin(); }
2669  iterator usedEnd() { return Used.end(); }
2670
2671  used_iterator_range used() {
2672    return used_iterator_range(usedBegin(), usedEnd());
2673  }
2674
2675  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2676  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2677
2678  used_iterator_range compilerUsed() {
2679    return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2680  }
2681
2682  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2683
2684  bool compilerUsedCount(GlobalValue *GV) const {
2685    return CompilerUsed.count(GV);
2686  }
2687
2688  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2689  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2690  bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2691
2692  bool compilerUsedInsert(GlobalValue *GV) {
2693    return CompilerUsed.insert(GV).second;
2694  }
2695
2696  void syncVariablesAndSets() {
2697    if (UsedV)
2698      setUsedInitializer(*UsedV, Used);
2699    if (CompilerUsedV)
2700      setUsedInitializer(*CompilerUsedV, CompilerUsed);
2701  }
2702};
2703
2704} // end anonymous namespace
2705
2706static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2707  if (GA.use_empty()) // No use at all.
2708    return false;
2709
2710  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2711         "We should have removed the duplicated "
2712         "element from llvm.compiler.used");
2713  if (!GA.hasOneUse())
2714    // Strictly more than one use. So at least one is not in llvm.used and
2715    // llvm.compiler.used.
2716    return true;
2717
2718  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2719  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2720}
2721
2722static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2723                                               const LLVMUsed &U) {
2724  unsigned N = 2;
2725  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2726         "We should have removed the duplicated "
2727         "element from llvm.compiler.used");
2728  if (U.usedCount(&V) || U.compilerUsedCount(&V))
2729    ++N;
2730  return V.hasNUsesOrMore(N);
2731}
2732
2733static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2734  if (!GA.hasLocalLinkage())
2735    return true;
2736
2737  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2738}
2739
2740static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2741                             bool &RenameTarget) {
2742  RenameTarget = false;
2743  bool Ret = false;
2744  if (hasUseOtherThanLLVMUsed(GA, U))
2745    Ret = true;
2746
2747  // If the alias is externally visible, we may still be able to simplify it.
2748  if (!mayHaveOtherReferences(GA, U))
2749    return Ret;
2750
2751  // If the aliasee has internal linkage, give it the name and linkage
2752  // of the alias, and delete the alias.  This turns:
2753  //   define internal ... @f(...)
2754  //   @a = alias ... @f
2755  // into:
2756  //   define ... @a(...)
2757  Constant *Aliasee = GA.getAliasee();
2758  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2759  if (!Target->hasLocalLinkage())
2760    return Ret;
2761
2762  // Do not perform the transform if multiple aliases potentially target the
2763  // aliasee. This check also ensures that it is safe to replace the section
2764  // and other attributes of the aliasee with those of the alias.
2765  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2766    return Ret;
2767
2768  RenameTarget = true;
2769  return true;
2770}
2771
2772static bool
2773OptimizeGlobalAliases(Module &M,
2774                      SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2775  bool Changed = false;
2776  LLVMUsed Used(M);
2777
2778  for (GlobalValue *GV : Used.used())
2779    Used.compilerUsedErase(GV);
2780
2781  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2782       I != E;) {
2783    GlobalAlias *J = &*I++;
2784
2785    // Aliases without names cannot be referenced outside this module.
2786    if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
2787      J->setLinkage(GlobalValue::InternalLinkage);
2788
2789    if (deleteIfDead(*J, NotDiscardableComdats)) {
2790      Changed = true;
2791      continue;
2792    }
2793
2794    // If the alias can change at link time, nothing can be done - bail out.
2795    if (J->isInterposable())
2796      continue;
2797
2798    Constant *Aliasee = J->getAliasee();
2799    GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2800    // We can't trivially replace the alias with the aliasee if the aliasee is
2801    // non-trivial in some way.
2802    // TODO: Try to handle non-zero GEPs of local aliasees.
2803    if (!Target)
2804      continue;
2805    Target->removeDeadConstantUsers();
2806
2807    // Make all users of the alias use the aliasee instead.
2808    bool RenameTarget;
2809    if (!hasUsesToReplace(*J, Used, RenameTarget))
2810      continue;
2811
2812    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2813    ++NumAliasesResolved;
2814    Changed = true;
2815
2816    if (RenameTarget) {
2817      // Give the aliasee the name, linkage and other attributes of the alias.
2818      Target->takeName(&*J);
2819      Target->setLinkage(J->getLinkage());
2820      Target->setDSOLocal(J->isDSOLocal());
2821      Target->setVisibility(J->getVisibility());
2822      Target->setDLLStorageClass(J->getDLLStorageClass());
2823
2824      if (Used.usedErase(&*J))
2825        Used.usedInsert(Target);
2826
2827      if (Used.compilerUsedErase(&*J))
2828        Used.compilerUsedInsert(Target);
2829    } else if (mayHaveOtherReferences(*J, Used))
2830      continue;
2831
2832    // Delete the alias.
2833    M.getAliasList().erase(J);
2834    ++NumAliasesRemoved;
2835    Changed = true;
2836  }
2837
2838  Used.syncVariablesAndSets();
2839
2840  return Changed;
2841}
2842
2843static Function *
2844FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
2845  // Hack to get a default TLI before we have actual Function.
2846  auto FuncIter = M.begin();
2847  if (FuncIter == M.end())
2848    return nullptr;
2849  auto *TLI = &GetTLI(*FuncIter);
2850
2851  LibFunc F = LibFunc_cxa_atexit;
2852  if (!TLI->has(F))
2853    return nullptr;
2854
2855  Function *Fn = M.getFunction(TLI->getName(F));
2856  if (!Fn)
2857    return nullptr;
2858
2859  // Now get the actual TLI for Fn.
2860  TLI = &GetTLI(*Fn);
2861
2862  // Make sure that the function has the correct prototype.
2863  if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2864    return nullptr;
2865
2866  return Fn;
2867}
2868
2869/// Returns whether the given function is an empty C++ destructor and can
2870/// therefore be eliminated.
2871/// Note that we assume that other optimization passes have already simplified
2872/// the code so we simply check for 'ret'.
2873static bool cxxDtorIsEmpty(const Function &Fn) {
2874  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2875  // nounwind, but that doesn't seem worth doing.
2876  if (Fn.isDeclaration())
2877    return false;
2878
2879  for (auto &I : Fn.getEntryBlock()) {
2880    if (isa<DbgInfoIntrinsic>(I))
2881      continue;
2882    if (isa<ReturnInst>(I))
2883      return true;
2884    break;
2885  }
2886  return false;
2887}
2888
2889static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2890  /// Itanium C++ ABI p3.3.5:
2891  ///
2892  ///   After constructing a global (or local static) object, that will require
2893  ///   destruction on exit, a termination function is registered as follows:
2894  ///
2895  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2896  ///
2897  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2898  ///   call f(p) when DSO d is unloaded, before all such termination calls
2899  ///   registered before this one. It returns zero if registration is
2900  ///   successful, nonzero on failure.
2901
2902  // This pass will look for calls to __cxa_atexit where the function is trivial
2903  // and remove them.
2904  bool Changed = false;
2905
2906  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2907       I != E;) {
2908    // We're only interested in calls. Theoretically, we could handle invoke
2909    // instructions as well, but neither llvm-gcc nor clang generate invokes
2910    // to __cxa_atexit.
2911    CallInst *CI = dyn_cast<CallInst>(*I++);
2912    if (!CI)
2913      continue;
2914
2915    Function *DtorFn =
2916      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2917    if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
2918      continue;
2919
2920    // Just remove the call.
2921    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2922    CI->eraseFromParent();
2923
2924    ++NumCXXDtorsRemoved;
2925
2926    Changed |= true;
2927  }
2928
2929  return Changed;
2930}
2931
2932static bool optimizeGlobalsInModule(
2933    Module &M, const DataLayout &DL,
2934    function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2935    function_ref<TargetTransformInfo &(Function &)> GetTTI,
2936    function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2937    function_ref<DominatorTree &(Function &)> LookupDomTree) {
2938  SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2939  bool Changed = false;
2940  bool LocalChange = true;
2941  while (LocalChange) {
2942    LocalChange = false;
2943
2944    NotDiscardableComdats.clear();
2945    for (const GlobalVariable &GV : M.globals())
2946      if (const Comdat *C = GV.getComdat())
2947        if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2948          NotDiscardableComdats.insert(C);
2949    for (Function &F : M)
2950      if (const Comdat *C = F.getComdat())
2951        if (!F.isDefTriviallyDead())
2952          NotDiscardableComdats.insert(C);
2953    for (GlobalAlias &GA : M.aliases())
2954      if (const Comdat *C = GA.getComdat())
2955        if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2956          NotDiscardableComdats.insert(C);
2957
2958    // Delete functions that are trivially dead, ccc -> fastcc
2959    LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
2960                                     NotDiscardableComdats);
2961
2962    // Optimize global_ctors list.
2963    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
2964      return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
2965    });
2966
2967    // Optimize non-address-taken globals.
2968    LocalChange |=
2969        OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
2970
2971    // Resolve aliases, when possible.
2972    LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2973
2974    // Try to remove trivial global destructors if they are not removed
2975    // already.
2976    Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
2977    if (CXAAtExitFn)
2978      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2979
2980    Changed |= LocalChange;
2981  }
2982
2983  // TODO: Move all global ctors functions to the end of the module for code
2984  // layout.
2985
2986  return Changed;
2987}
2988
2989PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2990    auto &DL = M.getDataLayout();
2991    auto &FAM =
2992        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2993    auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2994      return FAM.getResult<DominatorTreeAnalysis>(F);
2995    };
2996    auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2997      return FAM.getResult<TargetLibraryAnalysis>(F);
2998    };
2999    auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
3000      return FAM.getResult<TargetIRAnalysis>(F);
3001    };
3002
3003    auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
3004      return FAM.getResult<BlockFrequencyAnalysis>(F);
3005    };
3006
3007    if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
3008      return PreservedAnalyses::all();
3009    return PreservedAnalyses::none();
3010}
3011
3012namespace {
3013
3014struct GlobalOptLegacyPass : public ModulePass {
3015  static char ID; // Pass identification, replacement for typeid
3016
3017  GlobalOptLegacyPass() : ModulePass(ID) {
3018    initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
3019  }
3020
3021  bool runOnModule(Module &M) override {
3022    if (skipModule(M))
3023      return false;
3024
3025    auto &DL = M.getDataLayout();
3026    auto LookupDomTree = [this](Function &F) -> DominatorTree & {
3027      return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
3028    };
3029    auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
3030      return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
3031    };
3032    auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
3033      return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3034    };
3035
3036    auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
3037      return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
3038    };
3039
3040    return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
3041                                   LookupDomTree);
3042  }
3043
3044  void getAnalysisUsage(AnalysisUsage &AU) const override {
3045    AU.addRequired<TargetLibraryInfoWrapperPass>();
3046    AU.addRequired<TargetTransformInfoWrapperPass>();
3047    AU.addRequired<DominatorTreeWrapperPass>();
3048    AU.addRequired<BlockFrequencyInfoWrapperPass>();
3049  }
3050};
3051
3052} // end anonymous namespace
3053
3054char GlobalOptLegacyPass::ID = 0;
3055
3056INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
3057                      "Global Variable Optimizer", false, false)
3058INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3059INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
3060INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
3061INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3062INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
3063                    "Global Variable Optimizer", false, false)
3064
3065ModulePass *llvm::createGlobalOptimizerPass() {
3066  return new GlobalOptLegacyPass();
3067}
3068