1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass transforms simple global variables that never have their address
10// taken.  If obviously true, it marks read/write globals as constant, deletes
11// variables only stored to, etc.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/IPO/GlobalOpt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/ADT/iterator_range.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/ConstantFolding.h"
25#include "llvm/Analysis/MemoryBuiltins.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/TargetTransformInfo.h"
28#include "llvm/BinaryFormat/Dwarf.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CallingConv.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DebugInfoMetadata.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/GetElementPtrTypeIterator.h"
40#include "llvm/IR/GlobalAlias.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/IR/GlobalVariable.h"
43#include "llvm/IR/IRBuilder.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/IntrinsicInst.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/Operator.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/User.h"
53#include "llvm/IR/Value.h"
54#include "llvm/IR/ValueHandle.h"
55#include "llvm/InitializePasses.h"
56#include "llvm/Pass.h"
57#include "llvm/Support/AtomicOrdering.h"
58#include "llvm/Support/Casting.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/MathExtras.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/IPO.h"
65#include "llvm/Transforms/Utils/CtorUtils.h"
66#include "llvm/Transforms/Utils/Evaluator.h"
67#include "llvm/Transforms/Utils/GlobalStatus.h"
68#include "llvm/Transforms/Utils/Local.h"
69#include <cassert>
70#include <cstdint>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "globalopt"
77
78STATISTIC(NumMarked    , "Number of globals marked constant");
79STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
80STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
81STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
82STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
83STATISTIC(NumDeleted   , "Number of globals deleted");
84STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
85STATISTIC(NumLocalized , "Number of globals localized");
86STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
87STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
88STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
89STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
90STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
91STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
92STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
93STATISTIC(NumInternalFunc, "Number of internal functions");
94STATISTIC(NumColdCC, "Number of functions marked coldcc");
95
96static cl::opt<bool>
97    EnableColdCCStressTest("enable-coldcc-stress-test",
98                           cl::desc("Enable stress test of coldcc by adding "
99                                    "calling conv to all internal functions."),
100                           cl::init(false), cl::Hidden);
101
102static cl::opt<int> ColdCCRelFreq(
103    "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
104    cl::desc(
105        "Maximum block frequency, expressed as a percentage of caller's "
106        "entry frequency, for a call site to be considered cold for enabling"
107        "coldcc"));
108
109/// Is this global variable possibly used by a leak checker as a root?  If so,
110/// we might not really want to eliminate the stores to it.
111static bool isLeakCheckerRoot(GlobalVariable *GV) {
112  // A global variable is a root if it is a pointer, or could plausibly contain
113  // a pointer.  There are two challenges; one is that we could have a struct
114  // the has an inner member which is a pointer.  We recurse through the type to
115  // detect these (up to a point).  The other is that we may actually be a union
116  // of a pointer and another type, and so our LLVM type is an integer which
117  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
118  // potentially contained here.
119
120  if (GV->hasPrivateLinkage())
121    return false;
122
123  SmallVector<Type *, 4> Types;
124  Types.push_back(GV->getValueType());
125
126  unsigned Limit = 20;
127  do {
128    Type *Ty = Types.pop_back_val();
129    switch (Ty->getTypeID()) {
130      default: break;
131      case Type::PointerTyID:
132        return true;
133      case Type::FixedVectorTyID:
134      case Type::ScalableVectorTyID:
135        if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
136          return true;
137        break;
138      case Type::ArrayTyID:
139        Types.push_back(cast<ArrayType>(Ty)->getElementType());
140        break;
141      case Type::StructTyID: {
142        StructType *STy = cast<StructType>(Ty);
143        if (STy->isOpaque()) return true;
144        for (StructType::element_iterator I = STy->element_begin(),
145                 E = STy->element_end(); I != E; ++I) {
146          Type *InnerTy = *I;
147          if (isa<PointerType>(InnerTy)) return true;
148          if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
149              isa<VectorType>(InnerTy))
150            Types.push_back(InnerTy);
151        }
152        break;
153      }
154    }
155    if (--Limit == 0) return true;
156  } while (!Types.empty());
157  return false;
158}
159
160/// Given a value that is stored to a global but never read, determine whether
161/// it's safe to remove the store and the chain of computation that feeds the
162/// store.
163static bool IsSafeComputationToRemove(
164    Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
165  do {
166    if (isa<Constant>(V))
167      return true;
168    if (!V->hasOneUse())
169      return false;
170    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
171        isa<GlobalValue>(V))
172      return false;
173    if (isAllocationFn(V, GetTLI))
174      return true;
175
176    Instruction *I = cast<Instruction>(V);
177    if (I->mayHaveSideEffects())
178      return false;
179    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
180      if (!GEP->hasAllConstantIndices())
181        return false;
182    } else if (I->getNumOperands() != 1) {
183      return false;
184    }
185
186    V = I->getOperand(0);
187  } while (true);
188}
189
190/// This GV is a pointer root.  Loop over all users of the global and clean up
191/// any that obviously don't assign the global a value that isn't dynamically
192/// allocated.
193static bool
194CleanupPointerRootUsers(GlobalVariable *GV,
195                        function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
196  // A brief explanation of leak checkers.  The goal is to find bugs where
197  // pointers are forgotten, causing an accumulating growth in memory
198  // usage over time.  The common strategy for leak checkers is to explicitly
199  // allow the memory pointed to by globals at exit.  This is popular because it
200  // also solves another problem where the main thread of a C++ program may shut
201  // down before other threads that are still expecting to use those globals. To
202  // handle that case, we expect the program may create a singleton and never
203  // destroy it.
204
205  bool Changed = false;
206
207  // If Dead[n].first is the only use of a malloc result, we can delete its
208  // chain of computation and the store to the global in Dead[n].second.
209  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
210
211  // Constants can't be pointers to dynamically allocated memory.
212  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
213       UI != E;) {
214    User *U = *UI++;
215    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
216      Value *V = SI->getValueOperand();
217      if (isa<Constant>(V)) {
218        Changed = true;
219        SI->eraseFromParent();
220      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
221        if (I->hasOneUse())
222          Dead.push_back(std::make_pair(I, SI));
223      }
224    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
225      if (isa<Constant>(MSI->getValue())) {
226        Changed = true;
227        MSI->eraseFromParent();
228      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
229        if (I->hasOneUse())
230          Dead.push_back(std::make_pair(I, MSI));
231      }
232    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
233      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
234      if (MemSrc && MemSrc->isConstant()) {
235        Changed = true;
236        MTI->eraseFromParent();
237      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
238        if (I->hasOneUse())
239          Dead.push_back(std::make_pair(I, MTI));
240      }
241    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
242      if (CE->use_empty()) {
243        CE->destroyConstant();
244        Changed = true;
245      }
246    } else if (Constant *C = dyn_cast<Constant>(U)) {
247      if (isSafeToDestroyConstant(C)) {
248        C->destroyConstant();
249        // This could have invalidated UI, start over from scratch.
250        Dead.clear();
251        CleanupPointerRootUsers(GV, GetTLI);
252        return true;
253      }
254    }
255  }
256
257  for (int i = 0, e = Dead.size(); i != e; ++i) {
258    if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
259      Dead[i].second->eraseFromParent();
260      Instruction *I = Dead[i].first;
261      do {
262        if (isAllocationFn(I, GetTLI))
263          break;
264        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
265        if (!J)
266          break;
267        I->eraseFromParent();
268        I = J;
269      } while (true);
270      I->eraseFromParent();
271    }
272  }
273
274  return Changed;
275}
276
277/// We just marked GV constant.  Loop over all users of the global, cleaning up
278/// the obvious ones.  This is largely just a quick scan over the use list to
279/// clean up the easy and obvious cruft.  This returns true if it made a change.
280static bool CleanupConstantGlobalUsers(
281    Value *V, Constant *Init, const DataLayout &DL,
282    function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
283  bool Changed = false;
284  // Note that we need to use a weak value handle for the worklist items. When
285  // we delete a constant array, we may also be holding pointer to one of its
286  // elements (or an element of one of its elements if we're dealing with an
287  // array of arrays) in the worklist.
288  SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
289  while (!WorkList.empty()) {
290    Value *UV = WorkList.pop_back_val();
291    if (!UV)
292      continue;
293
294    User *U = cast<User>(UV);
295
296    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
297      if (Init) {
298        // Replace the load with the initializer.
299        LI->replaceAllUsesWith(Init);
300        LI->eraseFromParent();
301        Changed = true;
302      }
303    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
304      // Store must be unreachable or storing Init into the global.
305      SI->eraseFromParent();
306      Changed = true;
307    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
308      if (CE->getOpcode() == Instruction::GetElementPtr) {
309        Constant *SubInit = nullptr;
310        if (Init)
311          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
312        Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
313      } else if ((CE->getOpcode() == Instruction::BitCast &&
314                  CE->getType()->isPointerTy()) ||
315                 CE->getOpcode() == Instruction::AddrSpaceCast) {
316        // Pointer cast, delete any stores and memsets to the global.
317        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
318      }
319
320      if (CE->use_empty()) {
321        CE->destroyConstant();
322        Changed = true;
323      }
324    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
325      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
326      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
327      // and will invalidate our notion of what Init is.
328      Constant *SubInit = nullptr;
329      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
330        ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
331            ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
332        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
333          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
334
335        // If the initializer is an all-null value and we have an inbounds GEP,
336        // we already know what the result of any load from that GEP is.
337        // TODO: Handle splats.
338        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
339          SubInit = Constant::getNullValue(GEP->getResultElementType());
340      }
341      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
342
343      if (GEP->use_empty()) {
344        GEP->eraseFromParent();
345        Changed = true;
346      }
347    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
348      if (MI->getRawDest() == V) {
349        MI->eraseFromParent();
350        Changed = true;
351      }
352
353    } else if (Constant *C = dyn_cast<Constant>(U)) {
354      // If we have a chain of dead constantexprs or other things dangling from
355      // us, and if they are all dead, nuke them without remorse.
356      if (isSafeToDestroyConstant(C)) {
357        C->destroyConstant();
358        CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
359        return true;
360      }
361    }
362  }
363  return Changed;
364}
365
366static bool isSafeSROAElementUse(Value *V);
367
368/// Return true if the specified GEP is a safe user of a derived
369/// expression from a global that we want to SROA.
370static bool isSafeSROAGEP(User *U) {
371  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
372  // don't like < 3 operand CE's, and we don't like non-constant integer
373  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
374  // value of C.
375  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
376      !cast<Constant>(U->getOperand(1))->isNullValue())
377    return false;
378
379  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
380  ++GEPI; // Skip over the pointer index.
381
382  // For all other level we require that the indices are constant and inrange.
383  // In particular, consider: A[0][i].  We cannot know that the user isn't doing
384  // invalid things like allowing i to index an out-of-range subscript that
385  // accesses A[1]. This can also happen between different members of a struct
386  // in llvm IR.
387  for (; GEPI != E; ++GEPI) {
388    if (GEPI.isStruct())
389      continue;
390
391    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
392    if (!IdxVal || (GEPI.isBoundedSequential() &&
393                    IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
394      return false;
395  }
396
397  return llvm::all_of(U->users(),
398                      [](User *UU) { return isSafeSROAElementUse(UU); });
399}
400
401/// Return true if the specified instruction is a safe user of a derived
402/// expression from a global that we want to SROA.
403static bool isSafeSROAElementUse(Value *V) {
404  // We might have a dead and dangling constant hanging off of here.
405  if (Constant *C = dyn_cast<Constant>(V))
406    return isSafeToDestroyConstant(C);
407
408  Instruction *I = dyn_cast<Instruction>(V);
409  if (!I) return false;
410
411  // Loads are ok.
412  if (isa<LoadInst>(I)) return true;
413
414  // Stores *to* the pointer are ok.
415  if (StoreInst *SI = dyn_cast<StoreInst>(I))
416    return SI->getOperand(0) != V;
417
418  // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
419  return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
420}
421
422/// Look at all uses of the global and decide whether it is safe for us to
423/// perform this transformation.
424static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
425  for (User *U : GV->users()) {
426    // The user of the global must be a GEP Inst or a ConstantExpr GEP.
427    if (!isa<GetElementPtrInst>(U) &&
428        (!isa<ConstantExpr>(U) ||
429        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
430      return false;
431
432    // Check the gep and it's users are safe to SRA
433    if (!isSafeSROAGEP(U))
434      return false;
435  }
436
437  return true;
438}
439
440static bool IsSRASequential(Type *T) {
441  return isa<ArrayType>(T) || isa<VectorType>(T);
442}
443static uint64_t GetSRASequentialNumElements(Type *T) {
444  if (ArrayType *AT = dyn_cast<ArrayType>(T))
445    return AT->getNumElements();
446  return cast<FixedVectorType>(T)->getNumElements();
447}
448static Type *GetSRASequentialElementType(Type *T) {
449  if (ArrayType *AT = dyn_cast<ArrayType>(T))
450    return AT->getElementType();
451  return cast<VectorType>(T)->getElementType();
452}
453static bool CanDoGlobalSRA(GlobalVariable *GV) {
454  Constant *Init = GV->getInitializer();
455
456  if (isa<StructType>(Init->getType())) {
457    // nothing to check
458  } else if (IsSRASequential(Init->getType())) {
459    if (GetSRASequentialNumElements(Init->getType()) > 16 &&
460        GV->hasNUsesOrMore(16))
461      return false; // It's not worth it.
462  } else
463    return false;
464
465  return GlobalUsersSafeToSRA(GV);
466}
467
468/// Copy over the debug info for a variable to its SRA replacements.
469static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
470                                 uint64_t FragmentOffsetInBits,
471                                 uint64_t FragmentSizeInBits,
472                                 uint64_t VarSize) {
473  SmallVector<DIGlobalVariableExpression *, 1> GVs;
474  GV->getDebugInfo(GVs);
475  for (auto *GVE : GVs) {
476    DIVariable *Var = GVE->getVariable();
477    DIExpression *Expr = GVE->getExpression();
478    // If the FragmentSize is smaller than the variable,
479    // emit a fragment expression.
480    if (FragmentSizeInBits < VarSize) {
481      if (auto E = DIExpression::createFragmentExpression(
482              Expr, FragmentOffsetInBits, FragmentSizeInBits))
483        Expr = *E;
484      else
485        return;
486    }
487    auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
488    NGV->addDebugInfo(NGVE);
489  }
490}
491
492/// Perform scalar replacement of aggregates on the specified global variable.
493/// This opens the door for other optimizations by exposing the behavior of the
494/// program in a more fine-grained way.  We have determined that this
495/// transformation is safe already.  We return the first global variable we
496/// insert so that the caller can reprocess it.
497static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
498  // Make sure this global only has simple uses that we can SRA.
499  if (!CanDoGlobalSRA(GV))
500    return nullptr;
501
502  assert(GV->hasLocalLinkage());
503  Constant *Init = GV->getInitializer();
504  Type *Ty = Init->getType();
505  uint64_t VarSize = DL.getTypeSizeInBits(Ty);
506
507  std::map<unsigned, GlobalVariable *> NewGlobals;
508
509  // Get the alignment of the global, either explicit or target-specific.
510  Align StartAlignment =
511      DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType());
512
513  // Loop over all users and create replacement variables for used aggregate
514  // elements.
515  for (User *GEP : GV->users()) {
516    assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==
517                                           Instruction::GetElementPtr) ||
518            isa<GetElementPtrInst>(GEP)) &&
519           "NonGEP CE's are not SRAable!");
520
521    // Ignore the 1th operand, which has to be zero or else the program is quite
522    // broken (undefined).  Get the 2nd operand, which is the structure or array
523    // index.
524    unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
525    if (NewGlobals.count(ElementIdx) == 1)
526      continue; // we`ve already created replacement variable
527    assert(NewGlobals.count(ElementIdx) == 0);
528
529    Type *ElTy = nullptr;
530    if (StructType *STy = dyn_cast<StructType>(Ty))
531      ElTy = STy->getElementType(ElementIdx);
532    else
533      ElTy = GetSRASequentialElementType(Ty);
534    assert(ElTy);
535
536    Constant *In = Init->getAggregateElement(ElementIdx);
537    assert(In && "Couldn't get element of initializer?");
538
539    GlobalVariable *NGV = new GlobalVariable(
540        ElTy, false, GlobalVariable::InternalLinkage, In,
541        GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(),
542        GV->getType()->getAddressSpace());
543    NGV->setExternallyInitialized(GV->isExternallyInitialized());
544    NGV->copyAttributesFrom(GV);
545    NewGlobals.insert(std::make_pair(ElementIdx, NGV));
546
547    if (StructType *STy = dyn_cast<StructType>(Ty)) {
548      const StructLayout &Layout = *DL.getStructLayout(STy);
549
550      // Calculate the known alignment of the field.  If the original aggregate
551      // had 256 byte alignment for example, something might depend on that:
552      // propagate info to each field.
553      uint64_t FieldOffset = Layout.getElementOffset(ElementIdx);
554      Align NewAlign = commonAlignment(StartAlignment, FieldOffset);
555      if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx)))
556        NGV->setAlignment(NewAlign);
557
558      // Copy over the debug info for the variable.
559      uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
560      uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
561      transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize);
562    } else {
563      uint64_t EltSize = DL.getTypeAllocSize(ElTy);
564      Align EltAlign = DL.getABITypeAlign(ElTy);
565      uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
566
567      // Calculate the known alignment of the field.  If the original aggregate
568      // had 256 byte alignment for example, something might depend on that:
569      // propagate info to each field.
570      Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx);
571      if (NewAlign > EltAlign)
572        NGV->setAlignment(NewAlign);
573      transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
574                           FragmentSizeInBits, VarSize);
575    }
576  }
577
578  if (NewGlobals.empty())
579    return nullptr;
580
581  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
582  for (auto NewGlobalVar : NewGlobals)
583    Globals.push_back(NewGlobalVar.second);
584
585  LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
586
587  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
588
589  // Loop over all of the uses of the global, replacing the constantexpr geps,
590  // with smaller constantexpr geps or direct references.
591  while (!GV->use_empty()) {
592    User *GEP = GV->user_back();
593    assert(((isa<ConstantExpr>(GEP) &&
594             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
595            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
596
597    // Ignore the 1th operand, which has to be zero or else the program is quite
598    // broken (undefined).  Get the 2nd operand, which is the structure or array
599    // index.
600    unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
601    assert(NewGlobals.count(ElementIdx) == 1);
602
603    Value *NewPtr = NewGlobals[ElementIdx];
604    Type *NewTy = NewGlobals[ElementIdx]->getValueType();
605
606    // Form a shorter GEP if needed.
607    if (GEP->getNumOperands() > 3) {
608      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
609        SmallVector<Constant*, 8> Idxs;
610        Idxs.push_back(NullInt);
611        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
612          Idxs.push_back(CE->getOperand(i));
613        NewPtr =
614            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
615      } else {
616        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
617        SmallVector<Value*, 8> Idxs;
618        Idxs.push_back(NullInt);
619        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
620          Idxs.push_back(GEPI->getOperand(i));
621        NewPtr = GetElementPtrInst::Create(
622            NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx),
623            GEPI);
624      }
625    }
626    GEP->replaceAllUsesWith(NewPtr);
627
628    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
629      GEPI->eraseFromParent();
630    else
631      cast<ConstantExpr>(GEP)->destroyConstant();
632  }
633
634  // Delete the old global, now that it is dead.
635  Globals.erase(GV);
636  ++NumSRA;
637
638  assert(NewGlobals.size() > 0);
639  return NewGlobals.begin()->second;
640}
641
642/// Return true if all users of the specified value will trap if the value is
643/// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
644/// reprocessing them.
645static bool AllUsesOfValueWillTrapIfNull(const Value *V,
646                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
647  for (const User *U : V->users()) {
648    if (const Instruction *I = dyn_cast<Instruction>(U)) {
649      // If null pointer is considered valid, then all uses are non-trapping.
650      // Non address-space 0 globals have already been pruned by the caller.
651      if (NullPointerIsDefined(I->getFunction()))
652        return false;
653    }
654    if (isa<LoadInst>(U)) {
655      // Will trap.
656    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
657      if (SI->getOperand(0) == V) {
658        //cerr << "NONTRAPPING USE: " << *U;
659        return false;  // Storing the value.
660      }
661    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
662      if (CI->getCalledOperand() != V) {
663        //cerr << "NONTRAPPING USE: " << *U;
664        return false;  // Not calling the ptr
665      }
666    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
667      if (II->getCalledOperand() != V) {
668        //cerr << "NONTRAPPING USE: " << *U;
669        return false;  // Not calling the ptr
670      }
671    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
672      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
673    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
674      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
675    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
676      // If we've already seen this phi node, ignore it, it has already been
677      // checked.
678      if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
679        return false;
680    } else {
681      //cerr << "NONTRAPPING USE: " << *U;
682      return false;
683    }
684  }
685  return true;
686}
687
688/// Return true if all uses of any loads from GV will trap if the loaded value
689/// is null.  Note that this also permits comparisons of the loaded value
690/// against null, as a special case.
691static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
692  for (const User *U : GV->users())
693    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
694      SmallPtrSet<const PHINode*, 8> PHIs;
695      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
696        return false;
697    } else if (isa<StoreInst>(U)) {
698      // Ignore stores to the global.
699    } else {
700      // We don't know or understand this user, bail out.
701      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
702      return false;
703    }
704  return true;
705}
706
707static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
708  bool Changed = false;
709  for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
710    Instruction *I = cast<Instruction>(*UI++);
711    // Uses are non-trapping if null pointer is considered valid.
712    // Non address-space 0 globals are already pruned by the caller.
713    if (NullPointerIsDefined(I->getFunction()))
714      return false;
715    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
716      LI->setOperand(0, NewV);
717      Changed = true;
718    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
719      if (SI->getOperand(1) == V) {
720        SI->setOperand(1, NewV);
721        Changed = true;
722      }
723    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
724      CallBase *CB = cast<CallBase>(I);
725      if (CB->getCalledOperand() == V) {
726        // Calling through the pointer!  Turn into a direct call, but be careful
727        // that the pointer is not also being passed as an argument.
728        CB->setCalledOperand(NewV);
729        Changed = true;
730        bool PassedAsArg = false;
731        for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
732          if (CB->getArgOperand(i) == V) {
733            PassedAsArg = true;
734            CB->setArgOperand(i, NewV);
735          }
736
737        if (PassedAsArg) {
738          // Being passed as an argument also.  Be careful to not invalidate UI!
739          UI = V->user_begin();
740        }
741      }
742    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
743      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
744                                ConstantExpr::getCast(CI->getOpcode(),
745                                                      NewV, CI->getType()));
746      if (CI->use_empty()) {
747        Changed = true;
748        CI->eraseFromParent();
749      }
750    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
751      // Should handle GEP here.
752      SmallVector<Constant*, 8> Idxs;
753      Idxs.reserve(GEPI->getNumOperands()-1);
754      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
755           i != e; ++i)
756        if (Constant *C = dyn_cast<Constant>(*i))
757          Idxs.push_back(C);
758        else
759          break;
760      if (Idxs.size() == GEPI->getNumOperands()-1)
761        Changed |= OptimizeAwayTrappingUsesOfValue(
762            GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
763                                                 NewV, Idxs));
764      if (GEPI->use_empty()) {
765        Changed = true;
766        GEPI->eraseFromParent();
767      }
768    }
769  }
770
771  return Changed;
772}
773
774/// The specified global has only one non-null value stored into it.  If there
775/// are uses of the loaded value that would trap if the loaded value is
776/// dynamically null, then we know that they cannot be reachable with a null
777/// optimize away the load.
778static bool OptimizeAwayTrappingUsesOfLoads(
779    GlobalVariable *GV, Constant *LV, const DataLayout &DL,
780    function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
781  bool Changed = false;
782
783  // Keep track of whether we are able to remove all the uses of the global
784  // other than the store that defines it.
785  bool AllNonStoreUsesGone = true;
786
787  // Replace all uses of loads with uses of uses of the stored value.
788  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
789    User *GlobalUser = *GUI++;
790    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
791      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
792      // If we were able to delete all uses of the loads
793      if (LI->use_empty()) {
794        LI->eraseFromParent();
795        Changed = true;
796      } else {
797        AllNonStoreUsesGone = false;
798      }
799    } else if (isa<StoreInst>(GlobalUser)) {
800      // Ignore the store that stores "LV" to the global.
801      assert(GlobalUser->getOperand(1) == GV &&
802             "Must be storing *to* the global");
803    } else {
804      AllNonStoreUsesGone = false;
805
806      // If we get here we could have other crazy uses that are transitively
807      // loaded.
808      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
809              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
810              isa<BitCastInst>(GlobalUser) ||
811              isa<GetElementPtrInst>(GlobalUser)) &&
812             "Only expect load and stores!");
813    }
814  }
815
816  if (Changed) {
817    LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
818                      << "\n");
819    ++NumGlobUses;
820  }
821
822  // If we nuked all of the loads, then none of the stores are needed either,
823  // nor is the global.
824  if (AllNonStoreUsesGone) {
825    if (isLeakCheckerRoot(GV)) {
826      Changed |= CleanupPointerRootUsers(GV, GetTLI);
827    } else {
828      Changed = true;
829      CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
830    }
831    if (GV->use_empty()) {
832      LLVM_DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
833      Changed = true;
834      GV->eraseFromParent();
835      ++NumDeleted;
836    }
837  }
838  return Changed;
839}
840
841/// Walk the use list of V, constant folding all of the instructions that are
842/// foldable.
843static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
844                                TargetLibraryInfo *TLI) {
845  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
846    if (Instruction *I = dyn_cast<Instruction>(*UI++))
847      if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
848        I->replaceAllUsesWith(NewC);
849
850        // Advance UI to the next non-I use to avoid invalidating it!
851        // Instructions could multiply use V.
852        while (UI != E && *UI == I)
853          ++UI;
854        if (isInstructionTriviallyDead(I, TLI))
855          I->eraseFromParent();
856      }
857}
858
859/// This function takes the specified global variable, and transforms the
860/// program as if it always contained the result of the specified malloc.
861/// Because it is always the result of the specified malloc, there is no reason
862/// to actually DO the malloc.  Instead, turn the malloc into a global, and any
863/// loads of GV as uses of the new global.
864static GlobalVariable *
865OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
866                              ConstantInt *NElements, const DataLayout &DL,
867                              TargetLibraryInfo *TLI) {
868  LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI
869                    << '\n');
870
871  Type *GlobalType;
872  if (NElements->getZExtValue() == 1)
873    GlobalType = AllocTy;
874  else
875    // If we have an array allocation, the global variable is of an array.
876    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
877
878  // Create the new global variable.  The contents of the malloc'd memory is
879  // undefined, so initialize with an undef value.
880  GlobalVariable *NewGV = new GlobalVariable(
881      *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
882      UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
883      GV->getThreadLocalMode());
884
885  // If there are bitcast users of the malloc (which is typical, usually we have
886  // a malloc + bitcast) then replace them with uses of the new global.  Update
887  // other users to use the global as well.
888  BitCastInst *TheBC = nullptr;
889  while (!CI->use_empty()) {
890    Instruction *User = cast<Instruction>(CI->user_back());
891    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
892      if (BCI->getType() == NewGV->getType()) {
893        BCI->replaceAllUsesWith(NewGV);
894        BCI->eraseFromParent();
895      } else {
896        BCI->setOperand(0, NewGV);
897      }
898    } else {
899      if (!TheBC)
900        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
901      User->replaceUsesOfWith(CI, TheBC);
902    }
903  }
904
905  Constant *RepValue = NewGV;
906  if (NewGV->getType() != GV->getValueType())
907    RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
908
909  // If there is a comparison against null, we will insert a global bool to
910  // keep track of whether the global was initialized yet or not.
911  GlobalVariable *InitBool =
912    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
913                       GlobalValue::InternalLinkage,
914                       ConstantInt::getFalse(GV->getContext()),
915                       GV->getName()+".init", GV->getThreadLocalMode());
916  bool InitBoolUsed = false;
917
918  // Loop over all uses of GV, processing them in turn.
919  while (!GV->use_empty()) {
920    if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
921      // The global is initialized when the store to it occurs.
922      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false,
923                    Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI);
924      SI->eraseFromParent();
925      continue;
926    }
927
928    LoadInst *LI = cast<LoadInst>(GV->user_back());
929    while (!LI->use_empty()) {
930      Use &LoadUse = *LI->use_begin();
931      ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
932      if (!ICI) {
933        LoadUse = RepValue;
934        continue;
935      }
936
937      // Replace the cmp X, 0 with a use of the bool value.
938      // Sink the load to where the compare was, if atomic rules allow us to.
939      Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
940                               InitBool->getName() + ".val", false, Align(1),
941                               LI->getOrdering(), LI->getSyncScopeID(),
942                               LI->isUnordered() ? (Instruction *)ICI : LI);
943      InitBoolUsed = true;
944      switch (ICI->getPredicate()) {
945      default: llvm_unreachable("Unknown ICmp Predicate!");
946      case ICmpInst::ICMP_ULT:
947      case ICmpInst::ICMP_SLT:   // X < null -> always false
948        LV = ConstantInt::getFalse(GV->getContext());
949        break;
950      case ICmpInst::ICMP_ULE:
951      case ICmpInst::ICMP_SLE:
952      case ICmpInst::ICMP_EQ:
953        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
954        break;
955      case ICmpInst::ICMP_NE:
956      case ICmpInst::ICMP_UGE:
957      case ICmpInst::ICMP_SGE:
958      case ICmpInst::ICMP_UGT:
959      case ICmpInst::ICMP_SGT:
960        break;  // no change.
961      }
962      ICI->replaceAllUsesWith(LV);
963      ICI->eraseFromParent();
964    }
965    LI->eraseFromParent();
966  }
967
968  // If the initialization boolean was used, insert it, otherwise delete it.
969  if (!InitBoolUsed) {
970    while (!InitBool->use_empty())  // Delete initializations
971      cast<StoreInst>(InitBool->user_back())->eraseFromParent();
972    delete InitBool;
973  } else
974    GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
975
976  // Now the GV is dead, nuke it and the malloc..
977  GV->eraseFromParent();
978  CI->eraseFromParent();
979
980  // To further other optimizations, loop over all users of NewGV and try to
981  // constant prop them.  This will promote GEP instructions with constant
982  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
983  ConstantPropUsersOf(NewGV, DL, TLI);
984  if (RepValue != NewGV)
985    ConstantPropUsersOf(RepValue, DL, TLI);
986
987  return NewGV;
988}
989
990/// Scan the use-list of V checking to make sure that there are no complex uses
991/// of V.  We permit simple things like dereferencing the pointer, but not
992/// storing through the address, unless it is to the specified global.
993static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
994                                                      const GlobalVariable *GV,
995                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
996  for (const User *U : V->users()) {
997    const Instruction *Inst = cast<Instruction>(U);
998
999    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
1000      continue; // Fine, ignore.
1001    }
1002
1003    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1004      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
1005        return false;  // Storing the pointer itself... bad.
1006      continue; // Otherwise, storing through it, or storing into GV... fine.
1007    }
1008
1009    // Must index into the array and into the struct.
1010    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
1011      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
1012        return false;
1013      continue;
1014    }
1015
1016    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
1017      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
1018      // cycles.
1019      if (PHIs.insert(PN).second)
1020        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
1021          return false;
1022      continue;
1023    }
1024
1025    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
1026      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
1027        return false;
1028      continue;
1029    }
1030
1031    return false;
1032  }
1033  return true;
1034}
1035
1036/// The Alloc pointer is stored into GV somewhere.  Transform all uses of the
1037/// allocation into loads from the global and uses of the resultant pointer.
1038/// Further, delete the store into GV.  This assumes that these value pass the
1039/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1040static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1041                                          GlobalVariable *GV) {
1042  while (!Alloc->use_empty()) {
1043    Instruction *U = cast<Instruction>(*Alloc->user_begin());
1044    Instruction *InsertPt = U;
1045    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1046      // If this is the store of the allocation into the global, remove it.
1047      if (SI->getOperand(1) == GV) {
1048        SI->eraseFromParent();
1049        continue;
1050      }
1051    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1052      // Insert the load in the corresponding predecessor, not right before the
1053      // PHI.
1054      InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
1055    } else if (isa<BitCastInst>(U)) {
1056      // Must be bitcast between the malloc and store to initialize the global.
1057      ReplaceUsesOfMallocWithGlobal(U, GV);
1058      U->eraseFromParent();
1059      continue;
1060    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1061      // If this is a "GEP bitcast" and the user is a store to the global, then
1062      // just process it as a bitcast.
1063      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1064        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
1065          if (SI->getOperand(1) == GV) {
1066            // Must be bitcast GEP between the malloc and store to initialize
1067            // the global.
1068            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1069            GEPI->eraseFromParent();
1070            continue;
1071          }
1072    }
1073
1074    // Insert a load from the global, and use it instead of the malloc.
1075    Value *NL =
1076        new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
1077    U->replaceUsesOfWith(Alloc, NL);
1078  }
1079}
1080
1081/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
1082/// perform heap SRA on.  This permits GEP's that index through the array and
1083/// struct field, icmps of null, and PHIs.
1084static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1085                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
1086                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
1087  // We permit two users of the load: setcc comparing against the null
1088  // pointer, and a getelementptr of a specific form.
1089  for (const User *U : V->users()) {
1090    const Instruction *UI = cast<Instruction>(U);
1091
1092    // Comparison against null is ok.
1093    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
1094      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1095        return false;
1096      continue;
1097    }
1098
1099    // getelementptr is also ok, but only a simple form.
1100    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
1101      // Must index into the array and into the struct.
1102      if (GEPI->getNumOperands() < 3)
1103        return false;
1104
1105      // Otherwise the GEP is ok.
1106      continue;
1107    }
1108
1109    if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1110      if (!LoadUsingPHIsPerLoad.insert(PN).second)
1111        // This means some phi nodes are dependent on each other.
1112        // Avoid infinite looping!
1113        return false;
1114      if (!LoadUsingPHIs.insert(PN).second)
1115        // If we have already analyzed this PHI, then it is safe.
1116        continue;
1117
1118      // Make sure all uses of the PHI are simple enough to transform.
1119      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1120                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1121        return false;
1122
1123      continue;
1124    }
1125
1126    // Otherwise we don't know what this is, not ok.
1127    return false;
1128  }
1129
1130  return true;
1131}
1132
1133/// If all users of values loaded from GV are simple enough to perform HeapSRA,
1134/// return true.
1135static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1136                                                    Instruction *StoredVal) {
1137  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1138  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1139  for (const User *U : GV->users())
1140    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
1141      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1142                                          LoadUsingPHIsPerLoad))
1143        return false;
1144      LoadUsingPHIsPerLoad.clear();
1145    }
1146
1147  // If we reach here, we know that all uses of the loads and transitive uses
1148  // (through PHI nodes) are simple enough to transform.  However, we don't know
1149  // that all inputs the to the PHI nodes are in the same equivalence sets.
1150  // Check to verify that all operands of the PHIs are either PHIS that can be
1151  // transformed, loads from GV, or MI itself.
1152  for (const PHINode *PN : LoadUsingPHIs) {
1153    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1154      Value *InVal = PN->getIncomingValue(op);
1155
1156      // PHI of the stored value itself is ok.
1157      if (InVal == StoredVal) continue;
1158
1159      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1160        // One of the PHIs in our set is (optimistically) ok.
1161        if (LoadUsingPHIs.count(InPN))
1162          continue;
1163        return false;
1164      }
1165
1166      // Load from GV is ok.
1167      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1168        if (LI->getOperand(0) == GV)
1169          continue;
1170
1171      // UNDEF? NULL?
1172
1173      // Anything else is rejected.
1174      return false;
1175    }
1176  }
1177
1178  return true;
1179}
1180
1181static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1182              DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1183                   std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1184  std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
1185
1186  if (FieldNo >= FieldVals.size())
1187    FieldVals.resize(FieldNo+1);
1188
1189  // If we already have this value, just reuse the previously scalarized
1190  // version.
1191  if (Value *FieldVal = FieldVals[FieldNo])
1192    return FieldVal;
1193
1194  // Depending on what instruction this is, we have several cases.
1195  Value *Result;
1196  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1197    // This is a scalarized version of the load from the global.  Just create
1198    // a new Load of the scalarized global.
1199    Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
1200                                InsertedScalarizedValues, PHIsToRewrite);
1201    Result = new LoadInst(V->getType()->getPointerElementType(), V,
1202                          LI->getName() + ".f" + Twine(FieldNo), LI);
1203  } else {
1204    PHINode *PN = cast<PHINode>(V);
1205    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1206    // field.
1207
1208    PointerType *PTy = cast<PointerType>(PN->getType());
1209    StructType *ST = cast<StructType>(PTy->getElementType());
1210
1211    unsigned AS = PTy->getAddressSpace();
1212    PHINode *NewPN =
1213      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1214                     PN->getNumIncomingValues(),
1215                     PN->getName()+".f"+Twine(FieldNo), PN);
1216    Result = NewPN;
1217    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1218  }
1219
1220  return FieldVals[FieldNo] = Result;
1221}
1222
1223/// Given a load instruction and a value derived from the load, rewrite the
1224/// derived value to use the HeapSRoA'd load.
1225static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1226              DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1227                   std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1228  // If this is a comparison against null, handle it.
1229  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1230    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1231    // If we have a setcc of the loaded pointer, we can use a setcc of any
1232    // field.
1233    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1234                                   InsertedScalarizedValues, PHIsToRewrite);
1235
1236    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1237                              Constant::getNullValue(NPtr->getType()),
1238                              SCI->getName());
1239    SCI->replaceAllUsesWith(New);
1240    SCI->eraseFromParent();
1241    return;
1242  }
1243
1244  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1245  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1246    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1247           && "Unexpected GEPI!");
1248
1249    // Load the pointer for this field.
1250    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1251    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1252                                     InsertedScalarizedValues, PHIsToRewrite);
1253
1254    // Create the new GEP idx vector.
1255    SmallVector<Value*, 8> GEPIdx;
1256    GEPIdx.push_back(GEPI->getOperand(1));
1257    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1258
1259    Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
1260                                             GEPI->getName(), GEPI);
1261    GEPI->replaceAllUsesWith(NGEPI);
1262    GEPI->eraseFromParent();
1263    return;
1264  }
1265
1266  // Recursively transform the users of PHI nodes.  This will lazily create the
1267  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1268  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1269  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1270  // already been seen first by another load, so its uses have already been
1271  // processed.
1272  PHINode *PN = cast<PHINode>(LoadUser);
1273  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1274                                              std::vector<Value *>())).second)
1275    return;
1276
1277  // If this is the first time we've seen this PHI, recursively process all
1278  // users.
1279  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1280    Instruction *User = cast<Instruction>(*UI++);
1281    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1282  }
1283}
1284
1285/// We are performing Heap SRoA on a global.  Ptr is a value loaded from the
1286/// global.  Eliminate all uses of Ptr, making them use FieldGlobals instead.
1287/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1288static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1289              DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1290                  std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
1291  for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
1292    Instruction *User = cast<Instruction>(*UI++);
1293    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1294  }
1295
1296  if (Load->use_empty()) {
1297    Load->eraseFromParent();
1298    InsertedScalarizedValues.erase(Load);
1299  }
1300}
1301
1302/// CI is an allocation of an array of structures.  Break it up into multiple
1303/// allocations of arrays of the fields.
1304static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1305                                            Value *NElems, const DataLayout &DL,
1306                                            const TargetLibraryInfo *TLI) {
1307  LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI
1308                    << '\n');
1309  Type *MAT = getMallocAllocatedType(CI, TLI);
1310  StructType *STy = cast<StructType>(MAT);
1311
1312  // There is guaranteed to be at least one use of the malloc (storing
1313  // it into GV).  If there are other uses, change them to be uses of
1314  // the global to simplify later code.  This also deletes the store
1315  // into GV.
1316  ReplaceUsesOfMallocWithGlobal(CI, GV);
1317
1318  // Okay, at this point, there are no users of the malloc.  Insert N
1319  // new mallocs at the same place as CI, and N globals.
1320  std::vector<Value *> FieldGlobals;
1321  std::vector<Value *> FieldMallocs;
1322
1323  SmallVector<OperandBundleDef, 1> OpBundles;
1324  CI->getOperandBundlesAsDefs(OpBundles);
1325
1326  unsigned AS = GV->getType()->getPointerAddressSpace();
1327  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1328    Type *FieldTy = STy->getElementType(FieldNo);
1329    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1330
1331    GlobalVariable *NGV = new GlobalVariable(
1332        *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
1333        Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
1334        nullptr, GV->getThreadLocalMode());
1335    NGV->copyAttributesFrom(GV);
1336    FieldGlobals.push_back(NGV);
1337
1338    unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
1339    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1340      TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
1341    Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1342    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1343                                        ConstantInt::get(IntPtrTy, TypeSize),
1344                                        NElems, OpBundles, nullptr,
1345                                        CI->getName() + ".f" + Twine(FieldNo));
1346    FieldMallocs.push_back(NMI);
1347    new StoreInst(NMI, NGV, CI);
1348  }
1349
1350  // The tricky aspect of this transformation is handling the case when malloc
1351  // fails.  In the original code, malloc failing would set the result pointer
1352  // of malloc to null.  In this case, some mallocs could succeed and others
1353  // could fail.  As such, we emit code that looks like this:
1354  //    F0 = malloc(field0)
1355  //    F1 = malloc(field1)
1356  //    F2 = malloc(field2)
1357  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1358  //      if (F0) { free(F0); F0 = 0; }
1359  //      if (F1) { free(F1); F1 = 0; }
1360  //      if (F2) { free(F2); F2 = 0; }
1361  //    }
1362  // The malloc can also fail if its argument is too large.
1363  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1364  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1365                                  ConstantZero, "isneg");
1366  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1367    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1368                             Constant::getNullValue(FieldMallocs[i]->getType()),
1369                               "isnull");
1370    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1371  }
1372
1373  // Split the basic block at the old malloc.
1374  BasicBlock *OrigBB = CI->getParent();
1375  BasicBlock *ContBB =
1376      OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
1377
1378  // Create the block to check the first condition.  Put all these blocks at the
1379  // end of the function as they are unlikely to be executed.
1380  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1381                                                "malloc_ret_null",
1382                                                OrigBB->getParent());
1383
1384  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1385  // branch on RunningOr.
1386  OrigBB->getTerminator()->eraseFromParent();
1387  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1388
1389  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1390  // pointer, because some may be null while others are not.
1391  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1392    Value *GVVal =
1393        new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
1394                     FieldGlobals[i], "tmp", NullPtrBlock);
1395    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1396                              Constant::getNullValue(GVVal->getType()));
1397    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1398                                               OrigBB->getParent());
1399    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1400                                               OrigBB->getParent());
1401    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1402                                         Cmp, NullPtrBlock);
1403
1404    // Fill in FreeBlock.
1405    CallInst::CreateFree(GVVal, OpBundles, BI);
1406    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1407                  FreeBlock);
1408    BranchInst::Create(NextBlock, FreeBlock);
1409
1410    NullPtrBlock = NextBlock;
1411  }
1412
1413  BranchInst::Create(ContBB, NullPtrBlock);
1414
1415  // CI is no longer needed, remove it.
1416  CI->eraseFromParent();
1417
1418  /// As we process loads, if we can't immediately update all uses of the load,
1419  /// keep track of what scalarized loads are inserted for a given load.
1420  DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
1421  InsertedScalarizedValues[GV] = FieldGlobals;
1422
1423  std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
1424
1425  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1426  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1427  // of the per-field globals instead.
1428  for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
1429    Instruction *User = cast<Instruction>(*UI++);
1430
1431    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1432      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1433      continue;
1434    }
1435
1436    // Must be a store of null.
1437    StoreInst *SI = cast<StoreInst>(User);
1438    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1439           "Unexpected heap-sra user!");
1440
1441    // Insert a store of null into each global.
1442    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1443      Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
1444      Constant *Null = Constant::getNullValue(ValTy);
1445      new StoreInst(Null, FieldGlobals[i], SI);
1446    }
1447    // Erase the original store.
1448    SI->eraseFromParent();
1449  }
1450
1451  // While we have PHIs that are interesting to rewrite, do it.
1452  while (!PHIsToRewrite.empty()) {
1453    PHINode *PN = PHIsToRewrite.back().first;
1454    unsigned FieldNo = PHIsToRewrite.back().second;
1455    PHIsToRewrite.pop_back();
1456    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1457    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1458
1459    // Add all the incoming values.  This can materialize more phis.
1460    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1461      Value *InVal = PN->getIncomingValue(i);
1462      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1463                               PHIsToRewrite);
1464      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1465    }
1466  }
1467
1468  // Drop all inter-phi links and any loads that made it this far.
1469  for (DenseMap<Value *, std::vector<Value *>>::iterator
1470       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1471       I != E; ++I) {
1472    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1473      PN->dropAllReferences();
1474    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1475      LI->dropAllReferences();
1476  }
1477
1478  // Delete all the phis and loads now that inter-references are dead.
1479  for (DenseMap<Value *, std::vector<Value *>>::iterator
1480       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1481       I != E; ++I) {
1482    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1483      PN->eraseFromParent();
1484    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1485      LI->eraseFromParent();
1486  }
1487
1488  // The old global is now dead, remove it.
1489  GV->eraseFromParent();
1490
1491  ++NumHeapSRA;
1492  return cast<GlobalVariable>(FieldGlobals[0]);
1493}
1494
1495/// This function is called when we see a pointer global variable with a single
1496/// value stored it that is a malloc or cast of malloc.
1497static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
1498                                               Type *AllocTy,
1499                                               AtomicOrdering Ordering,
1500                                               const DataLayout &DL,
1501                                               TargetLibraryInfo *TLI) {
1502  // If this is a malloc of an abstract type, don't touch it.
1503  if (!AllocTy->isSized())
1504    return false;
1505
1506  // We can't optimize this global unless all uses of it are *known* to be
1507  // of the malloc value, not of the null initializer value (consider a use
1508  // that compares the global's value against zero to see if the malloc has
1509  // been reached).  To do this, we check to see if all uses of the global
1510  // would trap if the global were null: this proves that they must all
1511  // happen after the malloc.
1512  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1513    return false;
1514
1515  // We can't optimize this if the malloc itself is used in a complex way,
1516  // for example, being stored into multiple globals.  This allows the
1517  // malloc to be stored into the specified global, loaded icmp'd, and
1518  // GEP'd.  These are all things we could transform to using the global
1519  // for.
1520  SmallPtrSet<const PHINode*, 8> PHIs;
1521  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1522    return false;
1523
1524  // If we have a global that is only initialized with a fixed size malloc,
1525  // transform the program to use global memory instead of malloc'd memory.
1526  // This eliminates dynamic allocation, avoids an indirection accessing the
1527  // data, and exposes the resultant global to further GlobalOpt.
1528  // We cannot optimize the malloc if we cannot determine malloc array size.
1529  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1530  if (!NElems)
1531    return false;
1532
1533  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1534    // Restrict this transformation to only working on small allocations
1535    // (2048 bytes currently), as we don't want to introduce a 16M global or
1536    // something.
1537    if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
1538      OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1539      return true;
1540    }
1541
1542  // If the allocation is an array of structures, consider transforming this
1543  // into multiple malloc'd arrays, one for each field.  This is basically
1544  // SRoA for malloc'd memory.
1545
1546  if (Ordering != AtomicOrdering::NotAtomic)
1547    return false;
1548
1549  // If this is an allocation of a fixed size array of structs, analyze as a
1550  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1551  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1552    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1553      AllocTy = AT->getElementType();
1554
1555  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1556  if (!AllocSTy)
1557    return false;
1558
1559  // This the structure has an unreasonable number of fields, leave it
1560  // alone.
1561  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1562      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1563
1564    // If this is a fixed size array, transform the Malloc to be an alloc of
1565    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1566    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1567      Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1568      unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
1569      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1570      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1571      SmallVector<OperandBundleDef, 1> OpBundles;
1572      CI->getOperandBundlesAsDefs(OpBundles);
1573      Instruction *Malloc =
1574          CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
1575                                 OpBundles, nullptr, CI->getName());
1576      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1577      CI->replaceAllUsesWith(Cast);
1578      CI->eraseFromParent();
1579      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1580        CI = cast<CallInst>(BCI->getOperand(0));
1581      else
1582        CI = cast<CallInst>(Malloc);
1583    }
1584
1585    PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
1586                         TLI);
1587    return true;
1588  }
1589
1590  return false;
1591}
1592
1593// Try to optimize globals based on the knowledge that only one value (besides
1594// its initializer) is ever stored to the global.
1595static bool
1596optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1597                         AtomicOrdering Ordering, const DataLayout &DL,
1598                         function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1599  // Ignore no-op GEPs and bitcasts.
1600  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1601
1602  // If we are dealing with a pointer global that is initialized to null and
1603  // only has one (non-null) value stored into it, then we can optimize any
1604  // users of the loaded value (often calls and loads) that would trap if the
1605  // value was null.
1606  if (GV->getInitializer()->getType()->isPointerTy() &&
1607      GV->getInitializer()->isNullValue() &&
1608      !NullPointerIsDefined(
1609          nullptr /* F */,
1610          GV->getInitializer()->getType()->getPointerAddressSpace())) {
1611    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1612      if (GV->getInitializer()->getType() != SOVC->getType())
1613        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1614
1615      // Optimize away any trapping uses of the loaded value.
1616      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1617        return true;
1618    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) {
1619      auto *TLI = &GetTLI(*CI->getFunction());
1620      Type *MallocType = getMallocAllocatedType(CI, TLI);
1621      if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1622                                                           Ordering, DL, TLI))
1623        return true;
1624    }
1625  }
1626
1627  return false;
1628}
1629
1630/// At this point, we have learned that the only two values ever stored into GV
1631/// are its initializer and OtherVal.  See if we can shrink the global into a
1632/// boolean and select between the two values whenever it is used.  This exposes
1633/// the values to other scalar optimizations.
1634static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1635  Type *GVElType = GV->getValueType();
1636
1637  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1638  // an FP value, pointer or vector, don't do this optimization because a select
1639  // between them is very expensive and unlikely to lead to later
1640  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1641  // where v1 and v2 both require constant pool loads, a big loss.
1642  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1643      GVElType->isFloatingPointTy() ||
1644      GVElType->isPointerTy() || GVElType->isVectorTy())
1645    return false;
1646
1647  // Walk the use list of the global seeing if all the uses are load or store.
1648  // If there is anything else, bail out.
1649  for (User *U : GV->users())
1650    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1651      return false;
1652
1653  LLVM_DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV << "\n");
1654
1655  // Create the new global, initializing it to false.
1656  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1657                                             false,
1658                                             GlobalValue::InternalLinkage,
1659                                        ConstantInt::getFalse(GV->getContext()),
1660                                             GV->getName()+".b",
1661                                             GV->getThreadLocalMode(),
1662                                             GV->getType()->getAddressSpace());
1663  NewGV->copyAttributesFrom(GV);
1664  GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1665
1666  Constant *InitVal = GV->getInitializer();
1667  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1668         "No reason to shrink to bool!");
1669
1670  SmallVector<DIGlobalVariableExpression *, 1> GVs;
1671  GV->getDebugInfo(GVs);
1672
1673  // If initialized to zero and storing one into the global, we can use a cast
1674  // instead of a select to synthesize the desired value.
1675  bool IsOneZero = false;
1676  bool EmitOneOrZero = true;
1677  auto *CI = dyn_cast<ConstantInt>(OtherVal);
1678  if (CI && CI->getValue().getActiveBits() <= 64) {
1679    IsOneZero = InitVal->isNullValue() && CI->isOne();
1680
1681    auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1682    if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1683      uint64_t ValInit = CIInit->getZExtValue();
1684      uint64_t ValOther = CI->getZExtValue();
1685      uint64_t ValMinus = ValOther - ValInit;
1686
1687      for(auto *GVe : GVs){
1688        DIGlobalVariable *DGV = GVe->getVariable();
1689        DIExpression *E = GVe->getExpression();
1690        const DataLayout &DL = GV->getParent()->getDataLayout();
1691        unsigned SizeInOctets =
1692          DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
1693
1694        // It is expected that the address of global optimized variable is on
1695        // top of the stack. After optimization, value of that variable will
1696        // be ether 0 for initial value or 1 for other value. The following
1697        // expression should return constant integer value depending on the
1698        // value at global object address:
1699        // val * (ValOther - ValInit) + ValInit:
1700        // DW_OP_deref DW_OP_constu <ValMinus>
1701        // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1702        SmallVector<uint64_t, 12> Ops = {
1703            dwarf::DW_OP_deref_size, SizeInOctets,
1704            dwarf::DW_OP_constu, ValMinus,
1705            dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1706            dwarf::DW_OP_plus};
1707        bool WithStackValue = true;
1708        E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1709        DIGlobalVariableExpression *DGVE =
1710          DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1711        NewGV->addDebugInfo(DGVE);
1712     }
1713     EmitOneOrZero = false;
1714    }
1715  }
1716
1717  if (EmitOneOrZero) {
1718     // FIXME: This will only emit address for debugger on which will
1719     // be written only 0 or 1.
1720     for(auto *GV : GVs)
1721       NewGV->addDebugInfo(GV);
1722   }
1723
1724  while (!GV->use_empty()) {
1725    Instruction *UI = cast<Instruction>(GV->user_back());
1726    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1727      // Change the store into a boolean store.
1728      bool StoringOther = SI->getOperand(0) == OtherVal;
1729      // Only do this if we weren't storing a loaded value.
1730      Value *StoreVal;
1731      if (StoringOther || SI->getOperand(0) == InitVal) {
1732        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1733                                    StoringOther);
1734      } else {
1735        // Otherwise, we are storing a previously loaded copy.  To do this,
1736        // change the copy from copying the original value to just copying the
1737        // bool.
1738        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1739
1740        // If we've already replaced the input, StoredVal will be a cast or
1741        // select instruction.  If not, it will be a load of the original
1742        // global.
1743        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1744          assert(LI->getOperand(0) == GV && "Not a copy!");
1745          // Insert a new load, to preserve the saved value.
1746          StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1747                                  LI->getName() + ".b", false, Align(1),
1748                                  LI->getOrdering(), LI->getSyncScopeID(), LI);
1749        } else {
1750          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1751                 "This is not a form that we understand!");
1752          StoreVal = StoredVal->getOperand(0);
1753          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1754        }
1755      }
1756      StoreInst *NSI =
1757          new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
1758                        SI->getSyncScopeID(), SI);
1759      NSI->setDebugLoc(SI->getDebugLoc());
1760    } else {
1761      // Change the load into a load of bool then a select.
1762      LoadInst *LI = cast<LoadInst>(UI);
1763      LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
1764                                   LI->getName() + ".b", false, Align(1),
1765                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
1766      Instruction *NSI;
1767      if (IsOneZero)
1768        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1769      else
1770        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1771      NSI->takeName(LI);
1772      // Since LI is split into two instructions, NLI and NSI both inherit the
1773      // same DebugLoc
1774      NLI->setDebugLoc(LI->getDebugLoc());
1775      NSI->setDebugLoc(LI->getDebugLoc());
1776      LI->replaceAllUsesWith(NSI);
1777    }
1778    UI->eraseFromParent();
1779  }
1780
1781  // Retain the name of the old global variable. People who are debugging their
1782  // programs may expect these variables to be named the same.
1783  NewGV->takeName(GV);
1784  GV->eraseFromParent();
1785  return true;
1786}
1787
1788static bool deleteIfDead(
1789    GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1790  GV.removeDeadConstantUsers();
1791
1792  if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1793    return false;
1794
1795  if (const Comdat *C = GV.getComdat())
1796    if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1797      return false;
1798
1799  bool Dead;
1800  if (auto *F = dyn_cast<Function>(&GV))
1801    Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1802  else
1803    Dead = GV.use_empty();
1804  if (!Dead)
1805    return false;
1806
1807  LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1808  GV.eraseFromParent();
1809  ++NumDeleted;
1810  return true;
1811}
1812
1813static bool isPointerValueDeadOnEntryToFunction(
1814    const Function *F, GlobalValue *GV,
1815    function_ref<DominatorTree &(Function &)> LookupDomTree) {
1816  // Find all uses of GV. We expect them all to be in F, and if we can't
1817  // identify any of the uses we bail out.
1818  //
1819  // On each of these uses, identify if the memory that GV points to is
1820  // used/required/live at the start of the function. If it is not, for example
1821  // if the first thing the function does is store to the GV, the GV can
1822  // possibly be demoted.
1823  //
1824  // We don't do an exhaustive search for memory operations - simply look
1825  // through bitcasts as they're quite common and benign.
1826  const DataLayout &DL = GV->getParent()->getDataLayout();
1827  SmallVector<LoadInst *, 4> Loads;
1828  SmallVector<StoreInst *, 4> Stores;
1829  for (auto *U : GV->users()) {
1830    if (Operator::getOpcode(U) == Instruction::BitCast) {
1831      for (auto *UU : U->users()) {
1832        if (auto *LI = dyn_cast<LoadInst>(UU))
1833          Loads.push_back(LI);
1834        else if (auto *SI = dyn_cast<StoreInst>(UU))
1835          Stores.push_back(SI);
1836        else
1837          return false;
1838      }
1839      continue;
1840    }
1841
1842    Instruction *I = dyn_cast<Instruction>(U);
1843    if (!I)
1844      return false;
1845    assert(I->getParent()->getParent() == F);
1846
1847    if (auto *LI = dyn_cast<LoadInst>(I))
1848      Loads.push_back(LI);
1849    else if (auto *SI = dyn_cast<StoreInst>(I))
1850      Stores.push_back(SI);
1851    else
1852      return false;
1853  }
1854
1855  // We have identified all uses of GV into loads and stores. Now check if all
1856  // of them are known not to depend on the value of the global at the function
1857  // entry point. We do this by ensuring that every load is dominated by at
1858  // least one store.
1859  auto &DT = LookupDomTree(*const_cast<Function *>(F));
1860
1861  // The below check is quadratic. Check we're not going to do too many tests.
1862  // FIXME: Even though this will always have worst-case quadratic time, we
1863  // could put effort into minimizing the average time by putting stores that
1864  // have been shown to dominate at least one load at the beginning of the
1865  // Stores array, making subsequent dominance checks more likely to succeed
1866  // early.
1867  //
1868  // The threshold here is fairly large because global->local demotion is a
1869  // very powerful optimization should it fire.
1870  const unsigned Threshold = 100;
1871  if (Loads.size() * Stores.size() > Threshold)
1872    return false;
1873
1874  for (auto *L : Loads) {
1875    auto *LTy = L->getType();
1876    if (none_of(Stores, [&](const StoreInst *S) {
1877          auto *STy = S->getValueOperand()->getType();
1878          // The load is only dominated by the store if DomTree says so
1879          // and the number of bits loaded in L is less than or equal to
1880          // the number of bits stored in S.
1881          return DT.dominates(S, L) &&
1882                 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
1883        }))
1884      return false;
1885  }
1886  // All loads have known dependences inside F, so the global can be localized.
1887  return true;
1888}
1889
1890/// C may have non-instruction users. Can all of those users be turned into
1891/// instructions?
1892static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1893  // We don't do this exhaustively. The most common pattern that we really need
1894  // to care about is a constant GEP or constant bitcast - so just looking
1895  // through one single ConstantExpr.
1896  //
1897  // The set of constants that this function returns true for must be able to be
1898  // handled by makeAllConstantUsesInstructions.
1899  for (auto *U : C->users()) {
1900    if (isa<Instruction>(U))
1901      continue;
1902    if (!isa<ConstantExpr>(U))
1903      // Non instruction, non-constantexpr user; cannot convert this.
1904      return false;
1905    for (auto *UU : U->users())
1906      if (!isa<Instruction>(UU))
1907        // A constantexpr used by another constant. We don't try and recurse any
1908        // further but just bail out at this point.
1909        return false;
1910  }
1911
1912  return true;
1913}
1914
1915/// C may have non-instruction users, and
1916/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1917/// non-instruction users to instructions.
1918static void makeAllConstantUsesInstructions(Constant *C) {
1919  SmallVector<ConstantExpr*,4> Users;
1920  for (auto *U : C->users()) {
1921    if (isa<ConstantExpr>(U))
1922      Users.push_back(cast<ConstantExpr>(U));
1923    else
1924      // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1925      // should not have returned true for C.
1926      assert(
1927          isa<Instruction>(U) &&
1928          "Can't transform non-constantexpr non-instruction to instruction!");
1929  }
1930
1931  SmallVector<Value*,4> UUsers;
1932  for (auto *U : Users) {
1933    UUsers.clear();
1934    for (auto *UU : U->users())
1935      UUsers.push_back(UU);
1936    for (auto *UU : UUsers) {
1937      Instruction *UI = cast<Instruction>(UU);
1938      Instruction *NewU = U->getAsInstruction();
1939      NewU->insertBefore(UI);
1940      UI->replaceUsesOfWith(U, NewU);
1941    }
1942    // We've replaced all the uses, so destroy the constant. (destroyConstant
1943    // will update value handles and metadata.)
1944    U->destroyConstant();
1945  }
1946}
1947
1948/// Analyze the specified global variable and optimize
1949/// it if possible.  If we make a change, return true.
1950static bool
1951processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1952                      function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1953                      function_ref<DominatorTree &(Function &)> LookupDomTree) {
1954  auto &DL = GV->getParent()->getDataLayout();
1955  // If this is a first class global and has only one accessing function and
1956  // this function is non-recursive, we replace the global with a local alloca
1957  // in this function.
1958  //
1959  // NOTE: It doesn't make sense to promote non-single-value types since we
1960  // are just replacing static memory to stack memory.
1961  //
1962  // If the global is in different address space, don't bring it to stack.
1963  if (!GS.HasMultipleAccessingFunctions &&
1964      GS.AccessingFunction &&
1965      GV->getValueType()->isSingleValueType() &&
1966      GV->getType()->getAddressSpace() == 0 &&
1967      !GV->isExternallyInitialized() &&
1968      allNonInstructionUsersCanBeMadeInstructions(GV) &&
1969      GS.AccessingFunction->doesNotRecurse() &&
1970      isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1971                                          LookupDomTree)) {
1972    const DataLayout &DL = GV->getParent()->getDataLayout();
1973
1974    LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1975    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1976                                                   ->getEntryBlock().begin());
1977    Type *ElemTy = GV->getValueType();
1978    // FIXME: Pass Global's alignment when globals have alignment
1979    AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1980                                        GV->getName(), &FirstI);
1981    if (!isa<UndefValue>(GV->getInitializer()))
1982      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1983
1984    makeAllConstantUsesInstructions(GV);
1985
1986    GV->replaceAllUsesWith(Alloca);
1987    GV->eraseFromParent();
1988    ++NumLocalized;
1989    return true;
1990  }
1991
1992  // If the global is never loaded (but may be stored to), it is dead.
1993  // Delete it now.
1994  if (!GS.IsLoaded) {
1995    LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1996
1997    bool Changed;
1998    if (isLeakCheckerRoot(GV)) {
1999      // Delete any constant stores to the global.
2000      Changed = CleanupPointerRootUsers(GV, GetTLI);
2001    } else {
2002      // Delete any stores we can find to the global.  We may not be able to
2003      // make it completely dead though.
2004      Changed =
2005          CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
2006    }
2007
2008    // If the global is dead now, delete it.
2009    if (GV->use_empty()) {
2010      GV->eraseFromParent();
2011      ++NumDeleted;
2012      Changed = true;
2013    }
2014    return Changed;
2015
2016  }
2017  if (GS.StoredType <= GlobalStatus::InitializerStored) {
2018    LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
2019
2020    // Don't actually mark a global constant if it's atomic because atomic loads
2021    // are implemented by a trivial cmpxchg in some edge-cases and that usually
2022    // requires write access to the variable even if it's not actually changed.
2023    if (GS.Ordering == AtomicOrdering::NotAtomic)
2024      GV->setConstant(true);
2025
2026    // Clean up any obviously simplifiable users now.
2027    CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
2028
2029    // If the global is dead now, just nuke it.
2030    if (GV->use_empty()) {
2031      LLVM_DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
2032                        << "all users and delete global!\n");
2033      GV->eraseFromParent();
2034      ++NumDeleted;
2035      return true;
2036    }
2037
2038    // Fall through to the next check; see if we can optimize further.
2039    ++NumMarked;
2040  }
2041  if (!GV->getInitializer()->getType()->isSingleValueType()) {
2042    const DataLayout &DL = GV->getParent()->getDataLayout();
2043    if (SRAGlobal(GV, DL))
2044      return true;
2045  }
2046  if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
2047    // If the initial value for the global was an undef value, and if only
2048    // one other value was stored into it, we can just change the
2049    // initializer to be the stored value, then delete all stores to the
2050    // global.  This allows us to mark it constant.
2051    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
2052      if (isa<UndefValue>(GV->getInitializer())) {
2053        // Change the initial value here.
2054        GV->setInitializer(SOVConstant);
2055
2056        // Clean up any obviously simplifiable users now.
2057        CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
2058
2059        if (GV->use_empty()) {
2060          LLVM_DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
2061                            << "simplify all users and delete global!\n");
2062          GV->eraseFromParent();
2063          ++NumDeleted;
2064        }
2065        ++NumSubstitute;
2066        return true;
2067      }
2068
2069    // Try to optimize globals based on the knowledge that only one value
2070    // (besides its initializer) is ever stored to the global.
2071    if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
2072                                 GetTLI))
2073      return true;
2074
2075    // Otherwise, if the global was not a boolean, we can shrink it to be a
2076    // boolean.
2077    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
2078      if (GS.Ordering == AtomicOrdering::NotAtomic) {
2079        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
2080          ++NumShrunkToBool;
2081          return true;
2082        }
2083      }
2084    }
2085  }
2086
2087  return false;
2088}
2089
2090/// Analyze the specified global variable and optimize it if possible.  If we
2091/// make a change, return true.
2092static bool
2093processGlobal(GlobalValue &GV,
2094              function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2095              function_ref<DominatorTree &(Function &)> LookupDomTree) {
2096  if (GV.getName().startswith("llvm."))
2097    return false;
2098
2099  GlobalStatus GS;
2100
2101  if (GlobalStatus::analyzeGlobal(&GV, GS))
2102    return false;
2103
2104  bool Changed = false;
2105  if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
2106    auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
2107                                               : GlobalValue::UnnamedAddr::Local;
2108    if (NewUnnamedAddr != GV.getUnnamedAddr()) {
2109      GV.setUnnamedAddr(NewUnnamedAddr);
2110      NumUnnamed++;
2111      Changed = true;
2112    }
2113  }
2114
2115  // Do more involved optimizations if the global is internal.
2116  if (!GV.hasLocalLinkage())
2117    return Changed;
2118
2119  auto *GVar = dyn_cast<GlobalVariable>(&GV);
2120  if (!GVar)
2121    return Changed;
2122
2123  if (GVar->isConstant() || !GVar->hasInitializer())
2124    return Changed;
2125
2126  return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
2127}
2128
2129/// Walk all of the direct calls of the specified function, changing them to
2130/// FastCC.
2131static void ChangeCalleesToFastCall(Function *F) {
2132  for (User *U : F->users()) {
2133    if (isa<BlockAddress>(U))
2134      continue;
2135    cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
2136  }
2137}
2138
2139static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
2140                               Attribute::AttrKind A) {
2141  unsigned AttrIndex;
2142  if (Attrs.hasAttrSomewhere(A, &AttrIndex))
2143    return Attrs.removeAttribute(C, AttrIndex, A);
2144  return Attrs;
2145}
2146
2147static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
2148  F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
2149  for (User *U : F->users()) {
2150    if (isa<BlockAddress>(U))
2151      continue;
2152    CallBase *CB = cast<CallBase>(U);
2153    CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
2154  }
2155}
2156
2157/// Return true if this is a calling convention that we'd like to change.  The
2158/// idea here is that we don't want to mess with the convention if the user
2159/// explicitly requested something with performance implications like coldcc,
2160/// GHC, or anyregcc.
2161static bool hasChangeableCC(Function *F) {
2162  CallingConv::ID CC = F->getCallingConv();
2163
2164  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
2165  if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
2166    return false;
2167
2168  // FIXME: Change CC for the whole chain of musttail calls when possible.
2169  //
2170  // Can't change CC of the function that either has musttail calls, or is a
2171  // musttail callee itself
2172  for (User *U : F->users()) {
2173    if (isa<BlockAddress>(U))
2174      continue;
2175    CallInst* CI = dyn_cast<CallInst>(U);
2176    if (!CI)
2177      continue;
2178
2179    if (CI->isMustTailCall())
2180      return false;
2181  }
2182
2183  for (BasicBlock &BB : *F)
2184    if (BB.getTerminatingMustTailCall())
2185      return false;
2186
2187  return true;
2188}
2189
2190/// Return true if the block containing the call site has a BlockFrequency of
2191/// less than ColdCCRelFreq% of the entry block.
2192static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
2193  const BranchProbability ColdProb(ColdCCRelFreq, 100);
2194  auto *CallSiteBB = CB.getParent();
2195  auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
2196  auto CallerEntryFreq =
2197      CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
2198  return CallSiteFreq < CallerEntryFreq * ColdProb;
2199}
2200
2201// This function checks if the input function F is cold at all call sites. It
2202// also looks each call site's containing function, returning false if the
2203// caller function contains other non cold calls. The input vector AllCallsCold
2204// contains a list of functions that only have call sites in cold blocks.
2205static bool
2206isValidCandidateForColdCC(Function &F,
2207                          function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2208                          const std::vector<Function *> &AllCallsCold) {
2209
2210  if (F.user_empty())
2211    return false;
2212
2213  for (User *U : F.users()) {
2214    if (isa<BlockAddress>(U))
2215      continue;
2216
2217    CallBase &CB = cast<CallBase>(*U);
2218    Function *CallerFunc = CB.getParent()->getParent();
2219    BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
2220    if (!isColdCallSite(CB, CallerBFI))
2221      return false;
2222    auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
2223    if (It == AllCallsCold.end())
2224      return false;
2225  }
2226  return true;
2227}
2228
2229static void changeCallSitesToColdCC(Function *F) {
2230  for (User *U : F->users()) {
2231    if (isa<BlockAddress>(U))
2232      continue;
2233    cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
2234  }
2235}
2236
2237// This function iterates over all the call instructions in the input Function
2238// and checks that all call sites are in cold blocks and are allowed to use the
2239// coldcc calling convention.
2240static bool
2241hasOnlyColdCalls(Function &F,
2242                 function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
2243  for (BasicBlock &BB : F) {
2244    for (Instruction &I : BB) {
2245      if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2246        // Skip over isline asm instructions since they aren't function calls.
2247        if (CI->isInlineAsm())
2248          continue;
2249        Function *CalledFn = CI->getCalledFunction();
2250        if (!CalledFn)
2251          return false;
2252        if (!CalledFn->hasLocalLinkage())
2253          return false;
2254        // Skip over instrinsics since they won't remain as function calls.
2255        if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
2256          continue;
2257        // Check if it's valid to use coldcc calling convention.
2258        if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
2259            CalledFn->hasAddressTaken())
2260          return false;
2261        BlockFrequencyInfo &CallerBFI = GetBFI(F);
2262        if (!isColdCallSite(*CI, CallerBFI))
2263          return false;
2264      }
2265    }
2266  }
2267  return true;
2268}
2269
2270static bool hasMustTailCallers(Function *F) {
2271  for (User *U : F->users()) {
2272    CallBase *CB = dyn_cast<CallBase>(U);
2273    if (!CB) {
2274      assert(isa<BlockAddress>(U) &&
2275             "Expected either CallBase or BlockAddress");
2276      continue;
2277    }
2278    if (CB->isMustTailCall())
2279      return true;
2280  }
2281  return false;
2282}
2283
2284static bool hasInvokeCallers(Function *F) {
2285  for (User *U : F->users())
2286    if (isa<InvokeInst>(U))
2287      return true;
2288  return false;
2289}
2290
2291static void RemovePreallocated(Function *F) {
2292  RemoveAttribute(F, Attribute::Preallocated);
2293
2294  auto *M = F->getParent();
2295
2296  IRBuilder<> Builder(M->getContext());
2297
2298  // Cannot modify users() while iterating over it, so make a copy.
2299  SmallVector<User *, 4> PreallocatedCalls(F->users());
2300  for (User *U : PreallocatedCalls) {
2301    CallBase *CB = dyn_cast<CallBase>(U);
2302    if (!CB)
2303      continue;
2304
2305    assert(
2306        !CB->isMustTailCall() &&
2307        "Shouldn't call RemotePreallocated() on a musttail preallocated call");
2308    // Create copy of call without "preallocated" operand bundle.
2309    SmallVector<OperandBundleDef, 1> OpBundles;
2310    CB->getOperandBundlesAsDefs(OpBundles);
2311    CallBase *PreallocatedSetup = nullptr;
2312    for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
2313      if (It->getTag() == "preallocated") {
2314        PreallocatedSetup = cast<CallBase>(*It->input_begin());
2315        OpBundles.erase(It);
2316        break;
2317      }
2318    }
2319    assert(PreallocatedSetup && "Did not find preallocated bundle");
2320    uint64_t ArgCount =
2321        cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
2322
2323    assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
2324           "Unknown indirect call type");
2325    CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
2326    CB->replaceAllUsesWith(NewCB);
2327    NewCB->takeName(CB);
2328    CB->eraseFromParent();
2329
2330    Builder.SetInsertPoint(PreallocatedSetup);
2331    auto *StackSave =
2332        Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
2333
2334    Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
2335    Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
2336                       StackSave);
2337
2338    // Replace @llvm.call.preallocated.arg() with alloca.
2339    // Cannot modify users() while iterating over it, so make a copy.
2340    // @llvm.call.preallocated.arg() can be called with the same index multiple
2341    // times. So for each @llvm.call.preallocated.arg(), we see if we have
2342    // already created a Value* for the index, and if not, create an alloca and
2343    // bitcast right after the @llvm.call.preallocated.setup() so that it
2344    // dominates all uses.
2345    SmallVector<Value *, 2> ArgAllocas(ArgCount);
2346    SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
2347    for (auto *User : PreallocatedArgs) {
2348      auto *UseCall = cast<CallBase>(User);
2349      assert(UseCall->getCalledFunction()->getIntrinsicID() ==
2350                 Intrinsic::call_preallocated_arg &&
2351             "preallocated token use was not a llvm.call.preallocated.arg");
2352      uint64_t AllocArgIndex =
2353          cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
2354      Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
2355      if (!AllocaReplacement) {
2356        auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
2357        auto *ArgType = UseCall
2358                            ->getAttribute(AttributeList::FunctionIndex,
2359                                           Attribute::Preallocated)
2360                            .getValueAsType();
2361        auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
2362        Builder.SetInsertPoint(InsertBefore);
2363        auto *Alloca =
2364            Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
2365        auto *BitCast = Builder.CreateBitCast(
2366            Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
2367        ArgAllocas[AllocArgIndex] = BitCast;
2368        AllocaReplacement = BitCast;
2369      }
2370
2371      UseCall->replaceAllUsesWith(AllocaReplacement);
2372      UseCall->eraseFromParent();
2373    }
2374    // Remove @llvm.call.preallocated.setup().
2375    cast<Instruction>(PreallocatedSetup)->eraseFromParent();
2376  }
2377}
2378
2379static bool
2380OptimizeFunctions(Module &M,
2381                  function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2382                  function_ref<TargetTransformInfo &(Function &)> GetTTI,
2383                  function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2384                  function_ref<DominatorTree &(Function &)> LookupDomTree,
2385                  SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2386
2387  bool Changed = false;
2388
2389  std::vector<Function *> AllCallsCold;
2390  for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
2391    Function *F = &*FI++;
2392    if (hasOnlyColdCalls(*F, GetBFI))
2393      AllCallsCold.push_back(F);
2394  }
2395
2396  // Optimize functions.
2397  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
2398    Function *F = &*FI++;
2399
2400    // Don't perform global opt pass on naked functions; we don't want fast
2401    // calling conventions for naked functions.
2402    if (F->hasFnAttribute(Attribute::Naked))
2403      continue;
2404
2405    // Functions without names cannot be referenced outside this module.
2406    if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
2407      F->setLinkage(GlobalValue::InternalLinkage);
2408
2409    if (deleteIfDead(*F, NotDiscardableComdats)) {
2410      Changed = true;
2411      continue;
2412    }
2413
2414    // LLVM's definition of dominance allows instructions that are cyclic
2415    // in unreachable blocks, e.g.:
2416    // %pat = select i1 %condition, @global, i16* %pat
2417    // because any instruction dominates an instruction in a block that's
2418    // not reachable from entry.
2419    // So, remove unreachable blocks from the function, because a) there's
2420    // no point in analyzing them and b) GlobalOpt should otherwise grow
2421    // some more complicated logic to break these cycles.
2422    // Removing unreachable blocks might invalidate the dominator so we
2423    // recalculate it.
2424    if (!F->isDeclaration()) {
2425      if (removeUnreachableBlocks(*F)) {
2426        auto &DT = LookupDomTree(*F);
2427        DT.recalculate(*F);
2428        Changed = true;
2429      }
2430    }
2431
2432    Changed |= processGlobal(*F, GetTLI, LookupDomTree);
2433
2434    if (!F->hasLocalLinkage())
2435      continue;
2436
2437    // If we have an inalloca parameter that we can safely remove the
2438    // inalloca attribute from, do so. This unlocks optimizations that
2439    // wouldn't be safe in the presence of inalloca.
2440    // FIXME: We should also hoist alloca affected by this to the entry
2441    // block if possible.
2442    if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
2443        !F->hasAddressTaken() && !hasMustTailCallers(F)) {
2444      RemoveAttribute(F, Attribute::InAlloca);
2445      Changed = true;
2446    }
2447
2448    // FIXME: handle invokes
2449    // FIXME: handle musttail
2450    if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
2451      if (!F->hasAddressTaken() && !hasMustTailCallers(F) &&
2452          !hasInvokeCallers(F)) {
2453        RemovePreallocated(F);
2454        Changed = true;
2455      }
2456      continue;
2457    }
2458
2459    if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
2460      NumInternalFunc++;
2461      TargetTransformInfo &TTI = GetTTI(*F);
2462      // Change the calling convention to coldcc if either stress testing is
2463      // enabled or the target would like to use coldcc on functions which are
2464      // cold at all call sites and the callers contain no other non coldcc
2465      // calls.
2466      if (EnableColdCCStressTest ||
2467          (TTI.useColdCCForColdCall(*F) &&
2468           isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
2469        F->setCallingConv(CallingConv::Cold);
2470        changeCallSitesToColdCC(F);
2471        Changed = true;
2472        NumColdCC++;
2473      }
2474    }
2475
2476    if (hasChangeableCC(F) && !F->isVarArg() &&
2477        !F->hasAddressTaken()) {
2478      // If this function has a calling convention worth changing, is not a
2479      // varargs function, and is only called directly, promote it to use the
2480      // Fast calling convention.
2481      F->setCallingConv(CallingConv::Fast);
2482      ChangeCalleesToFastCall(F);
2483      ++NumFastCallFns;
2484      Changed = true;
2485    }
2486
2487    if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2488        !F->hasAddressTaken()) {
2489      // The function is not used by a trampoline intrinsic, so it is safe
2490      // to remove the 'nest' attribute.
2491      RemoveAttribute(F, Attribute::Nest);
2492      ++NumNestRemoved;
2493      Changed = true;
2494    }
2495  }
2496  return Changed;
2497}
2498
2499static bool
2500OptimizeGlobalVars(Module &M,
2501                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2502                   function_ref<DominatorTree &(Function &)> LookupDomTree,
2503                   SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2504  bool Changed = false;
2505
2506  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2507       GVI != E; ) {
2508    GlobalVariable *GV = &*GVI++;
2509    // Global variables without names cannot be referenced outside this module.
2510    if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
2511      GV->setLinkage(GlobalValue::InternalLinkage);
2512    // Simplify the initializer.
2513    if (GV->hasInitializer())
2514      if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
2515        auto &DL = M.getDataLayout();
2516        // TLI is not used in the case of a Constant, so use default nullptr
2517        // for that optional parameter, since we don't have a Function to
2518        // provide GetTLI anyway.
2519        Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2520        if (New != C)
2521          GV->setInitializer(New);
2522      }
2523
2524    if (deleteIfDead(*GV, NotDiscardableComdats)) {
2525      Changed = true;
2526      continue;
2527    }
2528
2529    Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
2530  }
2531  return Changed;
2532}
2533
2534/// Evaluate a piece of a constantexpr store into a global initializer.  This
2535/// returns 'Init' modified to reflect 'Val' stored into it.  At this point, the
2536/// GEP operands of Addr [0, OpNo) have been stepped into.
2537static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2538                                   ConstantExpr *Addr, unsigned OpNo) {
2539  // Base case of the recursion.
2540  if (OpNo == Addr->getNumOperands()) {
2541    assert(Val->getType() == Init->getType() && "Type mismatch!");
2542    return Val;
2543  }
2544
2545  SmallVector<Constant*, 32> Elts;
2546  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2547    // Break up the constant into its elements.
2548    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2549      Elts.push_back(Init->getAggregateElement(i));
2550
2551    // Replace the element that we are supposed to.
2552    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2553    unsigned Idx = CU->getZExtValue();
2554    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2555    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2556
2557    // Return the modified struct.
2558    return ConstantStruct::get(STy, Elts);
2559  }
2560
2561  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2562  uint64_t NumElts;
2563  if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType()))
2564    NumElts = ATy->getNumElements();
2565  else
2566    NumElts = cast<FixedVectorType>(Init->getType())->getNumElements();
2567
2568  // Break up the array into elements.
2569  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2570    Elts.push_back(Init->getAggregateElement(i));
2571
2572  assert(CI->getZExtValue() < NumElts);
2573  Elts[CI->getZExtValue()] =
2574    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2575
2576  if (Init->getType()->isArrayTy())
2577    return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts);
2578  return ConstantVector::get(Elts);
2579}
2580
2581/// We have decided that Addr (which satisfies the predicate
2582/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2583static void CommitValueTo(Constant *Val, Constant *Addr) {
2584  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2585    assert(GV->hasInitializer());
2586    GV->setInitializer(Val);
2587    return;
2588  }
2589
2590  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2591  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2592  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2593}
2594
2595/// Given a map of address -> value, where addresses are expected to be some form
2596/// of either a global or a constant GEP, set the initializer for the address to
2597/// be the value. This performs mostly the same function as CommitValueTo()
2598/// and EvaluateStoreInto() but is optimized to be more efficient for the common
2599/// case where the set of addresses are GEPs sharing the same underlying global,
2600/// processing the GEPs in batches rather than individually.
2601///
2602/// To give an example, consider the following C++ code adapted from the clang
2603/// regression tests:
2604/// struct S {
2605///  int n = 10;
2606///  int m = 2 * n;
2607///  S(int a) : n(a) {}
2608/// };
2609///
2610/// template<typename T>
2611/// struct U {
2612///  T *r = &q;
2613///  T q = 42;
2614///  U *p = this;
2615/// };
2616///
2617/// U<S> e;
2618///
2619/// The global static constructor for 'e' will need to initialize 'r' and 'p' of
2620/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm'
2621/// members. This batch algorithm will simply use general CommitValueTo() method
2622/// to handle the complex nested S struct initialization of 'q', before
2623/// processing the outermost members in a single batch. Using CommitValueTo() to
2624/// handle member in the outer struct is inefficient when the struct/array is
2625/// very large as we end up creating and destroy constant arrays for each
2626/// initialization.
2627/// For the above case, we expect the following IR to be generated:
2628///
2629/// %struct.U = type { %struct.S*, %struct.S, %struct.U* }
2630/// %struct.S = type { i32, i32 }
2631/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e,
2632///                                                  i64 0, i32 1),
2633///                         %struct.S { i32 42, i32 84 }, %struct.U* @e }
2634/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex
2635/// constant expression, while the other two elements of @e are "simple".
2636static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) {
2637  SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs;
2638  SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs;
2639  SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs;
2640  SimpleCEs.reserve(Mem.size());
2641
2642  for (const auto &I : Mem) {
2643    if (auto *GV = dyn_cast<GlobalVariable>(I.first)) {
2644      GVs.push_back(std::make_pair(GV, I.second));
2645    } else {
2646      ConstantExpr *GEP = cast<ConstantExpr>(I.first);
2647      // We don't handle the deeply recursive case using the batch method.
2648      if (GEP->getNumOperands() > 3)
2649        ComplexCEs.push_back(std::make_pair(GEP, I.second));
2650      else
2651        SimpleCEs.push_back(std::make_pair(GEP, I.second));
2652    }
2653  }
2654
2655  // The algorithm below doesn't handle cases like nested structs, so use the
2656  // slower fully general method if we have to.
2657  for (auto ComplexCE : ComplexCEs)
2658    CommitValueTo(ComplexCE.second, ComplexCE.first);
2659
2660  for (auto GVPair : GVs) {
2661    assert(GVPair.first->hasInitializer());
2662    GVPair.first->setInitializer(GVPair.second);
2663  }
2664
2665  if (SimpleCEs.empty())
2666    return;
2667
2668  // We cache a single global's initializer elements in the case where the
2669  // subsequent address/val pair uses the same one. This avoids throwing away and
2670  // rebuilding the constant struct/vector/array just because one element is
2671  // modified at a time.
2672  SmallVector<Constant *, 32> Elts;
2673  Elts.reserve(SimpleCEs.size());
2674  GlobalVariable *CurrentGV = nullptr;
2675
2676  auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) {
2677    Constant *Init = GV->getInitializer();
2678    Type *Ty = Init->getType();
2679    if (Update) {
2680      if (CurrentGV) {
2681        assert(CurrentGV && "Expected a GV to commit to!");
2682        Type *CurrentInitTy = CurrentGV->getInitializer()->getType();
2683        // We have a valid cache that needs to be committed.
2684        if (StructType *STy = dyn_cast<StructType>(CurrentInitTy))
2685          CurrentGV->setInitializer(ConstantStruct::get(STy, Elts));
2686        else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy))
2687          CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts));
2688        else
2689          CurrentGV->setInitializer(ConstantVector::get(Elts));
2690      }
2691      if (CurrentGV == GV)
2692        return;
2693      // Need to clear and set up cache for new initializer.
2694      CurrentGV = GV;
2695      Elts.clear();
2696      unsigned NumElts;
2697      if (auto *STy = dyn_cast<StructType>(Ty))
2698        NumElts = STy->getNumElements();
2699      else if (auto *ATy = dyn_cast<ArrayType>(Ty))
2700        NumElts = ATy->getNumElements();
2701      else
2702        NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2703      for (unsigned i = 0, e = NumElts; i != e; ++i)
2704        Elts.push_back(Init->getAggregateElement(i));
2705    }
2706  };
2707
2708  for (auto CEPair : SimpleCEs) {
2709    ConstantExpr *GEP = CEPair.first;
2710    Constant *Val = CEPair.second;
2711
2712    GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0));
2713    commitAndSetupCache(GV, GV != CurrentGV);
2714    ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2));
2715    Elts[CI->getZExtValue()] = Val;
2716  }
2717  // The last initializer in the list needs to be committed, others
2718  // will be committed on a new initializer being processed.
2719  commitAndSetupCache(CurrentGV, true);
2720}
2721
2722/// Evaluate static constructors in the function, if we can.  Return true if we
2723/// can, false otherwise.
2724static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2725                                      TargetLibraryInfo *TLI) {
2726  // Call the function.
2727  Evaluator Eval(DL, TLI);
2728  Constant *RetValDummy;
2729  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2730                                           SmallVector<Constant*, 0>());
2731
2732  if (EvalSuccess) {
2733    ++NumCtorsEvaluated;
2734
2735    // We succeeded at evaluation: commit the result.
2736    LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2737                      << F->getName() << "' to "
2738                      << Eval.getMutatedMemory().size() << " stores.\n");
2739    BatchCommitValueTo(Eval.getMutatedMemory());
2740    for (GlobalVariable *GV : Eval.getInvariants())
2741      GV->setConstant(true);
2742  }
2743
2744  return EvalSuccess;
2745}
2746
2747static int compareNames(Constant *const *A, Constant *const *B) {
2748  Value *AStripped = (*A)->stripPointerCasts();
2749  Value *BStripped = (*B)->stripPointerCasts();
2750  return AStripped->getName().compare(BStripped->getName());
2751}
2752
2753static void setUsedInitializer(GlobalVariable &V,
2754                               const SmallPtrSetImpl<GlobalValue *> &Init) {
2755  if (Init.empty()) {
2756    V.eraseFromParent();
2757    return;
2758  }
2759
2760  // Type of pointer to the array of pointers.
2761  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2762
2763  SmallVector<Constant *, 8> UsedArray;
2764  for (GlobalValue *GV : Init) {
2765    Constant *Cast
2766      = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2767    UsedArray.push_back(Cast);
2768  }
2769  // Sort to get deterministic order.
2770  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2771  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2772
2773  Module *M = V.getParent();
2774  V.removeFromParent();
2775  GlobalVariable *NV =
2776      new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2777                         ConstantArray::get(ATy, UsedArray), "");
2778  NV->takeName(&V);
2779  NV->setSection("llvm.metadata");
2780  delete &V;
2781}
2782
2783namespace {
2784
2785/// An easy to access representation of llvm.used and llvm.compiler.used.
2786class LLVMUsed {
2787  SmallPtrSet<GlobalValue *, 8> Used;
2788  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2789  GlobalVariable *UsedV;
2790  GlobalVariable *CompilerUsedV;
2791
2792public:
2793  LLVMUsed(Module &M) {
2794    UsedV = collectUsedGlobalVariables(M, Used, false);
2795    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2796  }
2797
2798  using iterator = SmallPtrSet<GlobalValue *, 8>::iterator;
2799  using used_iterator_range = iterator_range<iterator>;
2800
2801  iterator usedBegin() { return Used.begin(); }
2802  iterator usedEnd() { return Used.end(); }
2803
2804  used_iterator_range used() {
2805    return used_iterator_range(usedBegin(), usedEnd());
2806  }
2807
2808  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2809  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2810
2811  used_iterator_range compilerUsed() {
2812    return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2813  }
2814
2815  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2816
2817  bool compilerUsedCount(GlobalValue *GV) const {
2818    return CompilerUsed.count(GV);
2819  }
2820
2821  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2822  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2823  bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2824
2825  bool compilerUsedInsert(GlobalValue *GV) {
2826    return CompilerUsed.insert(GV).second;
2827  }
2828
2829  void syncVariablesAndSets() {
2830    if (UsedV)
2831      setUsedInitializer(*UsedV, Used);
2832    if (CompilerUsedV)
2833      setUsedInitializer(*CompilerUsedV, CompilerUsed);
2834  }
2835};
2836
2837} // end anonymous namespace
2838
2839static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2840  if (GA.use_empty()) // No use at all.
2841    return false;
2842
2843  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2844         "We should have removed the duplicated "
2845         "element from llvm.compiler.used");
2846  if (!GA.hasOneUse())
2847    // Strictly more than one use. So at least one is not in llvm.used and
2848    // llvm.compiler.used.
2849    return true;
2850
2851  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2852  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2853}
2854
2855static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2856                                               const LLVMUsed &U) {
2857  unsigned N = 2;
2858  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2859         "We should have removed the duplicated "
2860         "element from llvm.compiler.used");
2861  if (U.usedCount(&V) || U.compilerUsedCount(&V))
2862    ++N;
2863  return V.hasNUsesOrMore(N);
2864}
2865
2866static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2867  if (!GA.hasLocalLinkage())
2868    return true;
2869
2870  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2871}
2872
2873static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2874                             bool &RenameTarget) {
2875  RenameTarget = false;
2876  bool Ret = false;
2877  if (hasUseOtherThanLLVMUsed(GA, U))
2878    Ret = true;
2879
2880  // If the alias is externally visible, we may still be able to simplify it.
2881  if (!mayHaveOtherReferences(GA, U))
2882    return Ret;
2883
2884  // If the aliasee has internal linkage, give it the name and linkage
2885  // of the alias, and delete the alias.  This turns:
2886  //   define internal ... @f(...)
2887  //   @a = alias ... @f
2888  // into:
2889  //   define ... @a(...)
2890  Constant *Aliasee = GA.getAliasee();
2891  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2892  if (!Target->hasLocalLinkage())
2893    return Ret;
2894
2895  // Do not perform the transform if multiple aliases potentially target the
2896  // aliasee. This check also ensures that it is safe to replace the section
2897  // and other attributes of the aliasee with those of the alias.
2898  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2899    return Ret;
2900
2901  RenameTarget = true;
2902  return true;
2903}
2904
2905static bool
2906OptimizeGlobalAliases(Module &M,
2907                      SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2908  bool Changed = false;
2909  LLVMUsed Used(M);
2910
2911  for (GlobalValue *GV : Used.used())
2912    Used.compilerUsedErase(GV);
2913
2914  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2915       I != E;) {
2916    GlobalAlias *J = &*I++;
2917
2918    // Aliases without names cannot be referenced outside this module.
2919    if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
2920      J->setLinkage(GlobalValue::InternalLinkage);
2921
2922    if (deleteIfDead(*J, NotDiscardableComdats)) {
2923      Changed = true;
2924      continue;
2925    }
2926
2927    // If the alias can change at link time, nothing can be done - bail out.
2928    if (J->isInterposable())
2929      continue;
2930
2931    Constant *Aliasee = J->getAliasee();
2932    GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2933    // We can't trivially replace the alias with the aliasee if the aliasee is
2934    // non-trivial in some way.
2935    // TODO: Try to handle non-zero GEPs of local aliasees.
2936    if (!Target)
2937      continue;
2938    Target->removeDeadConstantUsers();
2939
2940    // Make all users of the alias use the aliasee instead.
2941    bool RenameTarget;
2942    if (!hasUsesToReplace(*J, Used, RenameTarget))
2943      continue;
2944
2945    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2946    ++NumAliasesResolved;
2947    Changed = true;
2948
2949    if (RenameTarget) {
2950      // Give the aliasee the name, linkage and other attributes of the alias.
2951      Target->takeName(&*J);
2952      Target->setLinkage(J->getLinkage());
2953      Target->setDSOLocal(J->isDSOLocal());
2954      Target->setVisibility(J->getVisibility());
2955      Target->setDLLStorageClass(J->getDLLStorageClass());
2956
2957      if (Used.usedErase(&*J))
2958        Used.usedInsert(Target);
2959
2960      if (Used.compilerUsedErase(&*J))
2961        Used.compilerUsedInsert(Target);
2962    } else if (mayHaveOtherReferences(*J, Used))
2963      continue;
2964
2965    // Delete the alias.
2966    M.getAliasList().erase(J);
2967    ++NumAliasesRemoved;
2968    Changed = true;
2969  }
2970
2971  Used.syncVariablesAndSets();
2972
2973  return Changed;
2974}
2975
2976static Function *
2977FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
2978  // Hack to get a default TLI before we have actual Function.
2979  auto FuncIter = M.begin();
2980  if (FuncIter == M.end())
2981    return nullptr;
2982  auto *TLI = &GetTLI(*FuncIter);
2983
2984  LibFunc F = LibFunc_cxa_atexit;
2985  if (!TLI->has(F))
2986    return nullptr;
2987
2988  Function *Fn = M.getFunction(TLI->getName(F));
2989  if (!Fn)
2990    return nullptr;
2991
2992  // Now get the actual TLI for Fn.
2993  TLI = &GetTLI(*Fn);
2994
2995  // Make sure that the function has the correct prototype.
2996  if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2997    return nullptr;
2998
2999  return Fn;
3000}
3001
3002/// Returns whether the given function is an empty C++ destructor and can
3003/// therefore be eliminated.
3004/// Note that we assume that other optimization passes have already simplified
3005/// the code so we simply check for 'ret'.
3006static bool cxxDtorIsEmpty(const Function &Fn) {
3007  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
3008  // nounwind, but that doesn't seem worth doing.
3009  if (Fn.isDeclaration())
3010    return false;
3011
3012  for (auto &I : Fn.getEntryBlock()) {
3013    if (isa<DbgInfoIntrinsic>(I))
3014      continue;
3015    if (isa<ReturnInst>(I))
3016      return true;
3017    break;
3018  }
3019  return false;
3020}
3021
3022static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
3023  /// Itanium C++ ABI p3.3.5:
3024  ///
3025  ///   After constructing a global (or local static) object, that will require
3026  ///   destruction on exit, a termination function is registered as follows:
3027  ///
3028  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
3029  ///
3030  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
3031  ///   call f(p) when DSO d is unloaded, before all such termination calls
3032  ///   registered before this one. It returns zero if registration is
3033  ///   successful, nonzero on failure.
3034
3035  // This pass will look for calls to __cxa_atexit where the function is trivial
3036  // and remove them.
3037  bool Changed = false;
3038
3039  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
3040       I != E;) {
3041    // We're only interested in calls. Theoretically, we could handle invoke
3042    // instructions as well, but neither llvm-gcc nor clang generate invokes
3043    // to __cxa_atexit.
3044    CallInst *CI = dyn_cast<CallInst>(*I++);
3045    if (!CI)
3046      continue;
3047
3048    Function *DtorFn =
3049      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
3050    if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
3051      continue;
3052
3053    // Just remove the call.
3054    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
3055    CI->eraseFromParent();
3056
3057    ++NumCXXDtorsRemoved;
3058
3059    Changed |= true;
3060  }
3061
3062  return Changed;
3063}
3064
3065static bool optimizeGlobalsInModule(
3066    Module &M, const DataLayout &DL,
3067    function_ref<TargetLibraryInfo &(Function &)> GetTLI,
3068    function_ref<TargetTransformInfo &(Function &)> GetTTI,
3069    function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3070    function_ref<DominatorTree &(Function &)> LookupDomTree) {
3071  SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
3072  bool Changed = false;
3073  bool LocalChange = true;
3074  while (LocalChange) {
3075    LocalChange = false;
3076
3077    NotDiscardableComdats.clear();
3078    for (const GlobalVariable &GV : M.globals())
3079      if (const Comdat *C = GV.getComdat())
3080        if (!GV.isDiscardableIfUnused() || !GV.use_empty())
3081          NotDiscardableComdats.insert(C);
3082    for (Function &F : M)
3083      if (const Comdat *C = F.getComdat())
3084        if (!F.isDefTriviallyDead())
3085          NotDiscardableComdats.insert(C);
3086    for (GlobalAlias &GA : M.aliases())
3087      if (const Comdat *C = GA.getComdat())
3088        if (!GA.isDiscardableIfUnused() || !GA.use_empty())
3089          NotDiscardableComdats.insert(C);
3090
3091    // Delete functions that are trivially dead, ccc -> fastcc
3092    LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
3093                                     NotDiscardableComdats);
3094
3095    // Optimize global_ctors list.
3096    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
3097      return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
3098    });
3099
3100    // Optimize non-address-taken globals.
3101    LocalChange |=
3102        OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
3103
3104    // Resolve aliases, when possible.
3105    LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
3106
3107    // Try to remove trivial global destructors if they are not removed
3108    // already.
3109    Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
3110    if (CXAAtExitFn)
3111      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
3112
3113    Changed |= LocalChange;
3114  }
3115
3116  // TODO: Move all global ctors functions to the end of the module for code
3117  // layout.
3118
3119  return Changed;
3120}
3121
3122PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
3123    auto &DL = M.getDataLayout();
3124    auto &FAM =
3125        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3126    auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
3127      return FAM.getResult<DominatorTreeAnalysis>(F);
3128    };
3129    auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
3130      return FAM.getResult<TargetLibraryAnalysis>(F);
3131    };
3132    auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
3133      return FAM.getResult<TargetIRAnalysis>(F);
3134    };
3135
3136    auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
3137      return FAM.getResult<BlockFrequencyAnalysis>(F);
3138    };
3139
3140    if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
3141      return PreservedAnalyses::all();
3142    return PreservedAnalyses::none();
3143}
3144
3145namespace {
3146
3147struct GlobalOptLegacyPass : public ModulePass {
3148  static char ID; // Pass identification, replacement for typeid
3149
3150  GlobalOptLegacyPass() : ModulePass(ID) {
3151    initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
3152  }
3153
3154  bool runOnModule(Module &M) override {
3155    if (skipModule(M))
3156      return false;
3157
3158    auto &DL = M.getDataLayout();
3159    auto LookupDomTree = [this](Function &F) -> DominatorTree & {
3160      return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
3161    };
3162    auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
3163      return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
3164    };
3165    auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
3166      return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3167    };
3168
3169    auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
3170      return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
3171    };
3172
3173    return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
3174                                   LookupDomTree);
3175  }
3176
3177  void getAnalysisUsage(AnalysisUsage &AU) const override {
3178    AU.addRequired<TargetLibraryInfoWrapperPass>();
3179    AU.addRequired<TargetTransformInfoWrapperPass>();
3180    AU.addRequired<DominatorTreeWrapperPass>();
3181    AU.addRequired<BlockFrequencyInfoWrapperPass>();
3182  }
3183};
3184
3185} // end anonymous namespace
3186
3187char GlobalOptLegacyPass::ID = 0;
3188
3189INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
3190                      "Global Variable Optimizer", false, false)
3191INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3192INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
3193INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
3194INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3195INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
3196                    "Global Variable Optimizer", false, false)
3197
3198ModulePass *llvm::createGlobalOptimizerPass() {
3199  return new GlobalOptLegacyPass();
3200}
3201