1//===- StackColoring.cpp --------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the stack-coloring optimization that looks for
10// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
11// which represent the possible lifetime of stack slots. It attempts to
12// merge disjoint stack slots and reduce the used stack space.
13// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14//
15// TODO: In the future we plan to improve stack coloring in the following ways:
16// 1. Allow merging multiple small slots into a single larger slot at different
17//    offsets.
18// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19//    spill slots.
20//
21//===----------------------------------------------------------------------===//
22
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DepthFirstIterator.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/CodeGen/LiveInterval.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineOperand.h"
38#include "llvm/CodeGen/Passes.h"
39#include "llvm/CodeGen/SelectionDAGNodes.h"
40#include "llvm/CodeGen/SlotIndexes.h"
41#include "llvm/CodeGen/TargetOpcodes.h"
42#include "llvm/CodeGen/WinEHFuncInfo.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DebugInfoMetadata.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/Use.h"
50#include "llvm/IR/Value.h"
51#include "llvm/InitializePasses.h"
52#include "llvm/Pass.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Compiler.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/raw_ostream.h"
58#include <algorithm>
59#include <cassert>
60#include <limits>
61#include <memory>
62#include <utility>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "stack-coloring"
67
68static cl::opt<bool>
69DisableColoring("no-stack-coloring",
70        cl::init(false), cl::Hidden,
71        cl::desc("Disable stack coloring"));
72
73/// The user may write code that uses allocas outside of the declared lifetime
74/// zone. This can happen when the user returns a reference to a local
75/// data-structure. We can detect these cases and decide not to optimize the
76/// code. If this flag is enabled, we try to save the user. This option
77/// is treated as overriding LifetimeStartOnFirstUse below.
78static cl::opt<bool>
79ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80                          cl::init(false), cl::Hidden,
81                          cl::desc("Do not optimize lifetime zones that "
82                                   "are broken"));
83
84/// Enable enhanced dataflow scheme for lifetime analysis (treat first
85/// use of stack slot as start of slot lifetime, as opposed to looking
86/// for LIFETIME_START marker). See "Implementation notes" below for
87/// more info.
88static cl::opt<bool>
89LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90        cl::init(true), cl::Hidden,
91        cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92
93
94STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
95STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98
99//===----------------------------------------------------------------------===//
100//                           StackColoring Pass
101//===----------------------------------------------------------------------===//
102//
103// Stack Coloring reduces stack usage by merging stack slots when they
104// can't be used together. For example, consider the following C program:
105//
106//     void bar(char *, int);
107//     void foo(bool var) {
108//         A: {
109//             char z[4096];
110//             bar(z, 0);
111//         }
112//
113//         char *p;
114//         char x[4096];
115//         char y[4096];
116//         if (var) {
117//             p = x;
118//         } else {
119//             bar(y, 1);
120//             p = y + 1024;
121//         }
122//     B:
123//         bar(p, 2);
124//     }
125//
126// Naively-compiled, this program would use 12k of stack space. However, the
127// stack slot corresponding to `z` is always destroyed before either of the
128// stack slots for `x` or `y` are used, and then `x` is only used if `var`
129// is true, while `y` is only used if `var` is false. So in no time are 2
130// of the stack slots used together, and therefore we can merge them,
131// compiling the function using only a single 4k alloca:
132//
133//     void foo(bool var) { // equivalent
134//         char x[4096];
135//         char *p;
136//         bar(x, 0);
137//         if (var) {
138//             p = x;
139//         } else {
140//             bar(x, 1);
141//             p = x + 1024;
142//         }
143//         bar(p, 2);
144//     }
145//
146// This is an important optimization if we want stack space to be under
147// control in large functions, both open-coded ones and ones created by
148// inlining.
149//
150// Implementation Notes:
151// ---------------------
152//
153// An important part of the above reasoning is that `z` can't be accessed
154// while the latter 2 calls to `bar` are running. This is justified because
155// `z`'s lifetime is over after we exit from block `A:`, so any further
156// accesses to it would be UB. The way we represent this information
157// in LLVM is by having frontends delimit blocks with `lifetime.start`
158// and `lifetime.end` intrinsics.
159//
160// The effect of these intrinsics seems to be as follows (maybe I should
161// specify this in the reference?):
162//
163//   L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164//   lifetime intrinsic refers to that stack slot, in which case
165//   it is marked as *in-scope*.
166//   L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167//   the stack slot is overwritten with `undef`.
168//   L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169//   L4) on function exit, all stack slots are marked as *out-of-scope*.
170//   L5) `lifetime.end` is a no-op when called on a slot that is already
171//   *out-of-scope*.
172//   L6) memory accesses to *out-of-scope* stack slots are UB.
173//   L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174//   are invalidated, unless the slot is "degenerate". This is used to
175//   justify not marking slots as in-use until the pointer to them is
176//   used, but feels a bit hacky in the presence of things like LICM. See
177//   the "Degenerate Slots" section for more details.
178//
179// Now, let's ground stack coloring on these rules. We'll define a slot
180// as *in-use* at a (dynamic) point in execution if it either can be
181// written to at that point, or if it has a live and non-undef content
182// at that point.
183//
184// Obviously, slots that are never *in-use* together can be merged, and
185// in our example `foo`, the slots for `x`, `y` and `z` are never
186// in-use together (of course, sometimes slots that *are* in-use together
187// might still be mergable, but we don't care about that here).
188//
189// In this implementation, we successively merge pairs of slots that are
190// not *in-use* together. We could be smarter - for example, we could merge
191// a single large slot with 2 small slots, or we could construct the
192// interference graph and run a "smart" graph coloring algorithm, but with
193// that aside, how do we find out whether a pair of slots might be *in-use*
194// together?
195//
196// From our rules, we see that *out-of-scope* slots are never *in-use*,
197// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198// until their address is taken. Therefore, we can approximate slot activity
199// using dataflow.
200//
201// A subtle point: naively, we might try to figure out which pairs of
202// stack-slots interfere by propagating `S in-use` through the CFG for every
203// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204// which they are both *in-use*.
205//
206// That is sound, but overly conservative in some cases: in our (artificial)
207// example `foo`, either `x` or `y` might be in use at the label `B:`, but
208// as `x` is only in use if we came in from the `var` edge and `y` only
209// if we came from the `!var` edge, they still can't be in use together.
210// See PR32488 for an important real-life case.
211//
212// If we wanted to find all points of interference precisely, we could
213// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214// would be precise, but requires propagating `O(n^2)` dataflow facts.
215//
216// However, we aren't interested in the *set* of points of interference
217// between 2 stack slots, only *whether* there *is* such a point. So we
218// can rely on a little trick: for `S` and `T` to be in-use together,
219// one of them needs to become in-use while the other is in-use (or
220// they might both become in use simultaneously). We can check this
221// by also keeping track of the points at which a stack slot might *start*
222// being in-use.
223//
224// Exact first use:
225// ----------------
226//
227// Consider the following motivating example:
228//
229//     int foo() {
230//       char b1[1024], b2[1024];
231//       if (...) {
232//         char b3[1024];
233//         <uses of b1, b3>;
234//         return x;
235//       } else {
236//         char b4[1024], b5[1024];
237//         <uses of b2, b4, b5>;
238//         return y;
239//       }
240//     }
241//
242// In the code above, "b3" and "b4" are declared in distinct lexical
243// scopes, meaning that it is easy to prove that they can share the
244// same stack slot. Variables "b1" and "b2" are declared in the same
245// scope, meaning that from a lexical point of view, their lifetimes
246// overlap. From a control flow pointer of view, however, the two
247// variables are accessed in disjoint regions of the CFG, thus it
248// should be possible for them to share the same stack slot. An ideal
249// stack allocation for the function above would look like:
250//
251//     slot 0: b1, b2
252//     slot 1: b3, b4
253//     slot 2: b5
254//
255// Achieving this allocation is tricky, however, due to the way
256// lifetime markers are inserted. Here is a simplified view of the
257// control flow graph for the code above:
258//
259//                +------  block 0 -------+
260//               0| LIFETIME_START b1, b2 |
261//               1| <test 'if' condition> |
262//                +-----------------------+
263//                   ./              \.
264//   +------  block 1 -------+   +------  block 2 -------+
265//  2| LIFETIME_START b3     |  5| LIFETIME_START b4, b5 |
266//  3| <uses of b1, b3>      |  6| <uses of b2, b4, b5>  |
267//  4| LIFETIME_END b3       |  7| LIFETIME_END b4, b5   |
268//   +-----------------------+   +-----------------------+
269//                   \.              /.
270//                +------  block 3 -------+
271//               8| <cleanupcode>         |
272//               9| LIFETIME_END b1, b2   |
273//              10| return                |
274//                +-----------------------+
275//
276// If we create live intervals for the variables above strictly based
277// on the lifetime markers, we'll get the set of intervals on the
278// left. If we ignore the lifetime start markers and instead treat a
279// variable's lifetime as beginning with the first reference to the
280// var, then we get the intervals on the right.
281//
282//            LIFETIME_START      First Use
283//     b1:    [0,9]               [3,4] [8,9]
284//     b2:    [0,9]               [6,9]
285//     b3:    [2,4]               [3,4]
286//     b4:    [5,7]               [6,7]
287//     b5:    [5,7]               [6,7]
288//
289// For the intervals on the left, the best we can do is overlap two
290// variables (b3 and b4, for example); this gives us a stack size of
291// 4*1024 bytes, not ideal. When treating first-use as the start of a
292// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293// byte stack (better).
294//
295// Degenerate Slots:
296// -----------------
297//
298// Relying entirely on first-use of stack slots is problematic,
299// however, due to the fact that optimizations can sometimes migrate
300// uses of a variable outside of its lifetime start/end region. Here
301// is an example:
302//
303//     int bar() {
304//       char b1[1024], b2[1024];
305//       if (...) {
306//         <uses of b2>
307//         return y;
308//       } else {
309//         <uses of b1>
310//         while (...) {
311//           char b3[1024];
312//           <uses of b3>
313//         }
314//       }
315//     }
316//
317// Before optimization, the control flow graph for the code above
318// might look like the following:
319//
320//                +------  block 0 -------+
321//               0| LIFETIME_START b1, b2 |
322//               1| <test 'if' condition> |
323//                +-----------------------+
324//                   ./              \.
325//   +------  block 1 -------+    +------- block 2 -------+
326//  2| <uses of b2>          |   3| <uses of b1>          |
327//   +-----------------------+    +-----------------------+
328//              |                            |
329//              |                 +------- block 3 -------+ <-\.
330//              |                4| <while condition>     |    |
331//              |                 +-----------------------+    |
332//              |               /          |                   |
333//              |              /  +------- block 4 -------+
334//              \             /  5| LIFETIME_START b3     |    |
335//               \           /   6| <uses of b3>          |    |
336//                \         /    7| LIFETIME_END b3       |    |
337//                 \        |    +------------------------+    |
338//                  \       |                 \                /
339//                +------  block 5 -----+      \---------------
340//               8| <cleanupcode>       |
341//               9| LIFETIME_END b1, b2 |
342//              10| return              |
343//                +---------------------+
344//
345// During optimization, however, it can happen that an instruction
346// computing an address in "b3" (for example, a loop-invariant GEP) is
347// hoisted up out of the loop from block 4 to block 2.  [Note that
348// this is not an actual load from the stack, only an instruction that
349// computes the address to be loaded]. If this happens, there is now a
350// path leading from the first use of b3 to the return instruction
351// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352// now larger than if we were computing live intervals strictly based
353// on lifetime markers. In the example above, this lengthened lifetime
354// would mean that it would appear illegal to overlap b3 with b2.
355//
356// To deal with this such cases, the code in ::collectMarkers() below
357// tries to identify "degenerate" slots -- those slots where on a single
358// forward pass through the CFG we encounter a first reference to slot
359// K before we hit the slot K lifetime start marker. For such slots,
360// we fall back on using the lifetime start marker as the beginning of
361// the variable's lifetime.  NB: with this implementation, slots can
362// appear degenerate in cases where there is unstructured control flow:
363//
364//    if (q) goto mid;
365//    if (x > 9) {
366//         int b[100];
367//         memcpy(&b[0], ...);
368//    mid: b[k] = ...;
369//         abc(&b);
370//    }
371//
372// If in RPO ordering chosen to walk the CFG  we happen to visit the b[k]
373// before visiting the memcpy block (which will contain the lifetime start
374// for "b" then it will appear that 'b' has a degenerate lifetime.
375//
376
377namespace {
378
379/// StackColoring - A machine pass for merging disjoint stack allocations,
380/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
381class StackColoring : public MachineFunctionPass {
382  MachineFrameInfo *MFI;
383  MachineFunction *MF;
384
385  /// A class representing liveness information for a single basic block.
386  /// Each bit in the BitVector represents the liveness property
387  /// for a different stack slot.
388  struct BlockLifetimeInfo {
389    /// Which slots BEGINs in each basic block.
390    BitVector Begin;
391
392    /// Which slots ENDs in each basic block.
393    BitVector End;
394
395    /// Which slots are marked as LIVE_IN, coming into each basic block.
396    BitVector LiveIn;
397
398    /// Which slots are marked as LIVE_OUT, coming out of each basic block.
399    BitVector LiveOut;
400  };
401
402  /// Maps active slots (per bit) for each basic block.
403  using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
404  LivenessMap BlockLiveness;
405
406  /// Maps serial numbers to basic blocks.
407  DenseMap<const MachineBasicBlock *, int> BasicBlocks;
408
409  /// Maps basic blocks to a serial number.
410  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
411
412  /// Maps slots to their use interval. Outside of this interval, slots
413  /// values are either dead or `undef` and they will not be written to.
414  SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
415
416  /// Maps slots to the points where they can become in-use.
417  SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
418
419  /// VNInfo is used for the construction of LiveIntervals.
420  VNInfo::Allocator VNInfoAllocator;
421
422  /// SlotIndex analysis object.
423  SlotIndexes *Indexes;
424
425  /// The list of lifetime markers found. These markers are to be removed
426  /// once the coloring is done.
427  SmallVector<MachineInstr*, 8> Markers;
428
429  /// Record the FI slots for which we have seen some sort of
430  /// lifetime marker (either start or end).
431  BitVector InterestingSlots;
432
433  /// FI slots that need to be handled conservatively (for these
434  /// slots lifetime-start-on-first-use is disabled).
435  BitVector ConservativeSlots;
436
437  /// Number of iterations taken during data flow analysis.
438  unsigned NumIterations;
439
440public:
441  static char ID;
442
443  StackColoring() : MachineFunctionPass(ID) {
444    initializeStackColoringPass(*PassRegistry::getPassRegistry());
445  }
446
447  void getAnalysisUsage(AnalysisUsage &AU) const override;
448  bool runOnMachineFunction(MachineFunction &Func) override;
449
450private:
451  /// Used in collectMarkers
452  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
453
454  /// Debug.
455  void dump() const;
456  void dumpIntervals() const;
457  void dumpBB(MachineBasicBlock *MBB) const;
458  void dumpBV(const char *tag, const BitVector &BV) const;
459
460  /// Removes all of the lifetime marker instructions from the function.
461  /// \returns true if any markers were removed.
462  bool removeAllMarkers();
463
464  /// Scan the machine function and find all of the lifetime markers.
465  /// Record the findings in the BEGIN and END vectors.
466  /// \returns the number of markers found.
467  unsigned collectMarkers(unsigned NumSlot);
468
469  /// Perform the dataflow calculation and calculate the lifetime for each of
470  /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
471  /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
472  /// in and out blocks.
473  void calculateLocalLiveness();
474
475  /// Returns TRUE if we're using the first-use-begins-lifetime method for
476  /// this slot (if FALSE, then the start marker is treated as start of lifetime).
477  bool applyFirstUse(int Slot) {
478    if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
479      return false;
480    if (ConservativeSlots.test(Slot))
481      return false;
482    return true;
483  }
484
485  /// Examines the specified instruction and returns TRUE if the instruction
486  /// represents the start or end of an interesting lifetime. The slot or slots
487  /// starting or ending are added to the vector "slots" and "isStart" is set
488  /// accordingly.
489  /// \returns True if inst contains a lifetime start or end
490  bool isLifetimeStartOrEnd(const MachineInstr &MI,
491                            SmallVector<int, 4> &slots,
492                            bool &isStart);
493
494  /// Construct the LiveIntervals for the slots.
495  void calculateLiveIntervals(unsigned NumSlots);
496
497  /// Go over the machine function and change instructions which use stack
498  /// slots to use the joint slots.
499  void remapInstructions(DenseMap<int, int> &SlotRemap);
500
501  /// The input program may contain instructions which are not inside lifetime
502  /// markers. This can happen due to a bug in the compiler or due to a bug in
503  /// user code (for example, returning a reference to a local variable).
504  /// This procedure checks all of the instructions in the function and
505  /// invalidates lifetime ranges which do not contain all of the instructions
506  /// which access that frame slot.
507  void removeInvalidSlotRanges();
508
509  /// Map entries which point to other entries to their destination.
510  ///   A->B->C becomes A->C.
511  void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
512};
513
514} // end anonymous namespace
515
516char StackColoring::ID = 0;
517
518char &llvm::StackColoringID = StackColoring::ID;
519
520INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
521                      "Merge disjoint stack slots", false, false)
522INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
523INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
524                    "Merge disjoint stack slots", false, false)
525
526void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
527  AU.addRequired<SlotIndexes>();
528  MachineFunctionPass::getAnalysisUsage(AU);
529}
530
531#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
532LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
533                                            const BitVector &BV) const {
534  dbgs() << tag << " : { ";
535  for (unsigned I = 0, E = BV.size(); I != E; ++I)
536    dbgs() << BV.test(I) << " ";
537  dbgs() << "}\n";
538}
539
540LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
541  LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
542  assert(BI != BlockLiveness.end() && "Block not found");
543  const BlockLifetimeInfo &BlockInfo = BI->second;
544
545  dumpBV("BEGIN", BlockInfo.Begin);
546  dumpBV("END", BlockInfo.End);
547  dumpBV("LIVE_IN", BlockInfo.LiveIn);
548  dumpBV("LIVE_OUT", BlockInfo.LiveOut);
549}
550
551LLVM_DUMP_METHOD void StackColoring::dump() const {
552  for (MachineBasicBlock *MBB : depth_first(MF)) {
553    dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
554           << MBB->getName() << "]\n";
555    dumpBB(MBB);
556  }
557}
558
559LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
560  for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
561    dbgs() << "Interval[" << I << "]:\n";
562    Intervals[I]->dump();
563  }
564}
565#endif
566
567static inline int getStartOrEndSlot(const MachineInstr &MI)
568{
569  assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
570          MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
571         "Expected LIFETIME_START or LIFETIME_END op");
572  const MachineOperand &MO = MI.getOperand(0);
573  int Slot = MO.getIndex();
574  if (Slot >= 0)
575    return Slot;
576  return -1;
577}
578
579// At the moment the only way to end a variable lifetime is with
580// a VARIABLE_LIFETIME op (which can't contain a start). If things
581// change and the IR allows for a single inst that both begins
582// and ends lifetime(s), this interface will need to be reworked.
583bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
584                                         SmallVector<int, 4> &slots,
585                                         bool &isStart) {
586  if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
587      MI.getOpcode() == TargetOpcode::LIFETIME_END) {
588    int Slot = getStartOrEndSlot(MI);
589    if (Slot < 0)
590      return false;
591    if (!InterestingSlots.test(Slot))
592      return false;
593    slots.push_back(Slot);
594    if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
595      isStart = false;
596      return true;
597    }
598    if (!applyFirstUse(Slot)) {
599      isStart = true;
600      return true;
601    }
602  } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
603    if (!MI.isDebugInstr()) {
604      bool found = false;
605      for (const MachineOperand &MO : MI.operands()) {
606        if (!MO.isFI())
607          continue;
608        int Slot = MO.getIndex();
609        if (Slot<0)
610          continue;
611        if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
612          slots.push_back(Slot);
613          found = true;
614        }
615      }
616      if (found) {
617        isStart = true;
618        return true;
619      }
620    }
621  }
622  return false;
623}
624
625unsigned StackColoring::collectMarkers(unsigned NumSlot) {
626  unsigned MarkersFound = 0;
627  BlockBitVecMap SeenStartMap;
628  InterestingSlots.clear();
629  InterestingSlots.resize(NumSlot);
630  ConservativeSlots.clear();
631  ConservativeSlots.resize(NumSlot);
632
633  // number of start and end lifetime ops for each slot
634  SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
635  SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
636
637  // Step 1: collect markers and populate the "InterestingSlots"
638  // and "ConservativeSlots" sets.
639  for (MachineBasicBlock *MBB : depth_first(MF)) {
640    // Compute the set of slots for which we've seen a START marker but have
641    // not yet seen an END marker at this point in the walk (e.g. on entry
642    // to this bb).
643    BitVector BetweenStartEnd;
644    BetweenStartEnd.resize(NumSlot);
645    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
646             PE = MBB->pred_end(); PI != PE; ++PI) {
647      BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
648      if (I != SeenStartMap.end()) {
649        BetweenStartEnd |= I->second;
650      }
651    }
652
653    // Walk the instructions in the block to look for start/end ops.
654    for (MachineInstr &MI : *MBB) {
655      if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
656          MI.getOpcode() == TargetOpcode::LIFETIME_END) {
657        int Slot = getStartOrEndSlot(MI);
658        if (Slot < 0)
659          continue;
660        InterestingSlots.set(Slot);
661        if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
662          BetweenStartEnd.set(Slot);
663          NumStartLifetimes[Slot] += 1;
664        } else {
665          BetweenStartEnd.reset(Slot);
666          NumEndLifetimes[Slot] += 1;
667        }
668        const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
669        if (Allocation) {
670          LLVM_DEBUG(dbgs() << "Found a lifetime ");
671          LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
672                                    ? "start"
673                                    : "end"));
674          LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
675          LLVM_DEBUG(dbgs()
676                     << " with allocation: " << Allocation->getName() << "\n");
677        }
678        Markers.push_back(&MI);
679        MarkersFound += 1;
680      } else {
681        for (const MachineOperand &MO : MI.operands()) {
682          if (!MO.isFI())
683            continue;
684          int Slot = MO.getIndex();
685          if (Slot < 0)
686            continue;
687          if (! BetweenStartEnd.test(Slot)) {
688            ConservativeSlots.set(Slot);
689          }
690        }
691      }
692    }
693    BitVector &SeenStart = SeenStartMap[MBB];
694    SeenStart |= BetweenStartEnd;
695  }
696  if (!MarkersFound) {
697    return 0;
698  }
699
700  // PR27903: slots with multiple start or end lifetime ops are not
701  // safe to enable for "lifetime-start-on-first-use".
702  for (unsigned slot = 0; slot < NumSlot; ++slot)
703    if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
704      ConservativeSlots.set(slot);
705  LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
706
707  // Step 2: compute begin/end sets for each block
708
709  // NOTE: We use a depth-first iteration to ensure that we obtain a
710  // deterministic numbering.
711  for (MachineBasicBlock *MBB : depth_first(MF)) {
712    // Assign a serial number to this basic block.
713    BasicBlocks[MBB] = BasicBlockNumbering.size();
714    BasicBlockNumbering.push_back(MBB);
715
716    // Keep a reference to avoid repeated lookups.
717    BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
718
719    BlockInfo.Begin.resize(NumSlot);
720    BlockInfo.End.resize(NumSlot);
721
722    SmallVector<int, 4> slots;
723    for (MachineInstr &MI : *MBB) {
724      bool isStart = false;
725      slots.clear();
726      if (isLifetimeStartOrEnd(MI, slots, isStart)) {
727        if (!isStart) {
728          assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
729          int Slot = slots[0];
730          if (BlockInfo.Begin.test(Slot)) {
731            BlockInfo.Begin.reset(Slot);
732          }
733          BlockInfo.End.set(Slot);
734        } else {
735          for (auto Slot : slots) {
736            LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
737            LLVM_DEBUG(dbgs()
738                       << " at " << printMBBReference(*MBB) << " index ");
739            LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
740            const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
741            if (Allocation) {
742              LLVM_DEBUG(dbgs()
743                         << " with allocation: " << Allocation->getName());
744            }
745            LLVM_DEBUG(dbgs() << "\n");
746            if (BlockInfo.End.test(Slot)) {
747              BlockInfo.End.reset(Slot);
748            }
749            BlockInfo.Begin.set(Slot);
750          }
751        }
752      }
753    }
754  }
755
756  // Update statistics.
757  NumMarkerSeen += MarkersFound;
758  return MarkersFound;
759}
760
761void StackColoring::calculateLocalLiveness() {
762  unsigned NumIters = 0;
763  bool changed = true;
764  while (changed) {
765    changed = false;
766    ++NumIters;
767
768    for (const MachineBasicBlock *BB : BasicBlockNumbering) {
769      // Use an iterator to avoid repeated lookups.
770      LivenessMap::iterator BI = BlockLiveness.find(BB);
771      assert(BI != BlockLiveness.end() && "Block not found");
772      BlockLifetimeInfo &BlockInfo = BI->second;
773
774      // Compute LiveIn by unioning together the LiveOut sets of all preds.
775      BitVector LocalLiveIn;
776      for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
777           PE = BB->pred_end(); PI != PE; ++PI) {
778        LivenessMap::const_iterator I = BlockLiveness.find(*PI);
779        // PR37130: transformations prior to stack coloring can
780        // sometimes leave behind statically unreachable blocks; these
781        // can be safely skipped here.
782        if (I != BlockLiveness.end())
783          LocalLiveIn |= I->second.LiveOut;
784      }
785
786      // Compute LiveOut by subtracting out lifetimes that end in this
787      // block, then adding in lifetimes that begin in this block.  If
788      // we have both BEGIN and END markers in the same basic block
789      // then we know that the BEGIN marker comes after the END,
790      // because we already handle the case where the BEGIN comes
791      // before the END when collecting the markers (and building the
792      // BEGIN/END vectors).
793      BitVector LocalLiveOut = LocalLiveIn;
794      LocalLiveOut.reset(BlockInfo.End);
795      LocalLiveOut |= BlockInfo.Begin;
796
797      // Update block LiveIn set, noting whether it has changed.
798      if (LocalLiveIn.test(BlockInfo.LiveIn)) {
799        changed = true;
800        BlockInfo.LiveIn |= LocalLiveIn;
801      }
802
803      // Update block LiveOut set, noting whether it has changed.
804      if (LocalLiveOut.test(BlockInfo.LiveOut)) {
805        changed = true;
806        BlockInfo.LiveOut |= LocalLiveOut;
807      }
808    }
809  } // while changed.
810
811  NumIterations = NumIters;
812}
813
814void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
815  SmallVector<SlotIndex, 16> Starts;
816  SmallVector<bool, 16> DefinitelyInUse;
817
818  // For each block, find which slots are active within this block
819  // and update the live intervals.
820  for (const MachineBasicBlock &MBB : *MF) {
821    Starts.clear();
822    Starts.resize(NumSlots);
823    DefinitelyInUse.clear();
824    DefinitelyInUse.resize(NumSlots);
825
826    // Start the interval of the slots that we previously found to be 'in-use'.
827    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
828    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
829         pos = MBBLiveness.LiveIn.find_next(pos)) {
830      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
831    }
832
833    // Create the interval for the basic blocks containing lifetime begin/end.
834    for (const MachineInstr &MI : MBB) {
835      SmallVector<int, 4> slots;
836      bool IsStart = false;
837      if (!isLifetimeStartOrEnd(MI, slots, IsStart))
838        continue;
839      SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
840      for (auto Slot : slots) {
841        if (IsStart) {
842          // If a slot is already definitely in use, we don't have to emit
843          // a new start marker because there is already a pre-existing
844          // one.
845          if (!DefinitelyInUse[Slot]) {
846            LiveStarts[Slot].push_back(ThisIndex);
847            DefinitelyInUse[Slot] = true;
848          }
849          if (!Starts[Slot].isValid())
850            Starts[Slot] = ThisIndex;
851        } else {
852          if (Starts[Slot].isValid()) {
853            VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
854            Intervals[Slot]->addSegment(
855                LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
856            Starts[Slot] = SlotIndex(); // Invalidate the start index
857            DefinitelyInUse[Slot] = false;
858          }
859        }
860      }
861    }
862
863    // Finish up started segments
864    for (unsigned i = 0; i < NumSlots; ++i) {
865      if (!Starts[i].isValid())
866        continue;
867
868      SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
869      VNInfo *VNI = Intervals[i]->getValNumInfo(0);
870      Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
871    }
872  }
873}
874
875bool StackColoring::removeAllMarkers() {
876  unsigned Count = 0;
877  for (MachineInstr *MI : Markers) {
878    MI->eraseFromParent();
879    Count++;
880  }
881  Markers.clear();
882
883  LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
884  return Count;
885}
886
887void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
888  unsigned FixedInstr = 0;
889  unsigned FixedMemOp = 0;
890  unsigned FixedDbg = 0;
891
892  // Remap debug information that refers to stack slots.
893  for (auto &VI : MF->getVariableDbgInfo()) {
894    if (!VI.Var)
895      continue;
896    if (SlotRemap.count(VI.Slot)) {
897      LLVM_DEBUG(dbgs() << "Remapping debug info for ["
898                        << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
899      VI.Slot = SlotRemap[VI.Slot];
900      FixedDbg++;
901    }
902  }
903
904  // Keep a list of *allocas* which need to be remapped.
905  DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
906
907  // Keep a list of allocas which has been affected by the remap.
908  SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
909
910  for (const std::pair<int, int> &SI : SlotRemap) {
911    const AllocaInst *From = MFI->getObjectAllocation(SI.first);
912    const AllocaInst *To = MFI->getObjectAllocation(SI.second);
913    assert(To && From && "Invalid allocation object");
914    Allocas[From] = To;
915
916    // If From is before wo, its possible that there is a use of From between
917    // them.
918    if (From->comesBefore(To))
919      const_cast<AllocaInst*>(To)->moveBefore(const_cast<AllocaInst*>(From));
920
921    // AA might be used later for instruction scheduling, and we need it to be
922    // able to deduce the correct aliasing releationships between pointers
923    // derived from the alloca being remapped and the target of that remapping.
924    // The only safe way, without directly informing AA about the remapping
925    // somehow, is to directly update the IR to reflect the change being made
926    // here.
927    Instruction *Inst = const_cast<AllocaInst *>(To);
928    if (From->getType() != To->getType()) {
929      BitCastInst *Cast = new BitCastInst(Inst, From->getType());
930      Cast->insertAfter(Inst);
931      Inst = Cast;
932    }
933
934    // We keep both slots to maintain AliasAnalysis metadata later.
935    MergedAllocas.insert(From);
936    MergedAllocas.insert(To);
937
938    // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
939    // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
940    // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
941    MachineFrameInfo::SSPLayoutKind FromKind
942        = MFI->getObjectSSPLayout(SI.first);
943    MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second);
944    if (FromKind != MachineFrameInfo::SSPLK_None &&
945        (ToKind == MachineFrameInfo::SSPLK_None ||
946         (ToKind != MachineFrameInfo::SSPLK_LargeArray &&
947          FromKind != MachineFrameInfo::SSPLK_AddrOf)))
948      MFI->setObjectSSPLayout(SI.second, FromKind);
949
950    // The new alloca might not be valid in a llvm.dbg.declare for this
951    // variable, so undef out the use to make the verifier happy.
952    AllocaInst *FromAI = const_cast<AllocaInst *>(From);
953    if (FromAI->isUsedByMetadata())
954      ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
955    for (auto &Use : FromAI->uses()) {
956      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
957        if (BCI->isUsedByMetadata())
958          ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
959    }
960
961    // Note that this will not replace uses in MMOs (which we'll update below),
962    // or anywhere else (which is why we won't delete the original
963    // instruction).
964    FromAI->replaceAllUsesWith(Inst);
965  }
966
967  // Remap all instructions to the new stack slots.
968  std::vector<std::vector<MachineMemOperand *>> SSRefs(
969      MFI->getObjectIndexEnd());
970  for (MachineBasicBlock &BB : *MF)
971    for (MachineInstr &I : BB) {
972      // Skip lifetime markers. We'll remove them soon.
973      if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
974          I.getOpcode() == TargetOpcode::LIFETIME_END)
975        continue;
976
977      // Update the MachineMemOperand to use the new alloca.
978      for (MachineMemOperand *MMO : I.memoperands()) {
979        // We've replaced IR-level uses of the remapped allocas, so we only
980        // need to replace direct uses here.
981        const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
982        if (!AI)
983          continue;
984
985        if (!Allocas.count(AI))
986          continue;
987
988        MMO->setValue(Allocas[AI]);
989        FixedMemOp++;
990      }
991
992      // Update all of the machine instruction operands.
993      for (MachineOperand &MO : I.operands()) {
994        if (!MO.isFI())
995          continue;
996        int FromSlot = MO.getIndex();
997
998        // Don't touch arguments.
999        if (FromSlot<0)
1000          continue;
1001
1002        // Only look at mapped slots.
1003        if (!SlotRemap.count(FromSlot))
1004          continue;
1005
1006        // In a debug build, check that the instruction that we are modifying is
1007        // inside the expected live range. If the instruction is not inside
1008        // the calculated range then it means that the alloca usage moved
1009        // outside of the lifetime markers, or that the user has a bug.
1010        // NOTE: Alloca address calculations which happen outside the lifetime
1011        // zone are okay, despite the fact that we don't have a good way
1012        // for validating all of the usages of the calculation.
1013#ifndef NDEBUG
1014        bool TouchesMemory = I.mayLoadOrStore();
1015        // If we *don't* protect the user from escaped allocas, don't bother
1016        // validating the instructions.
1017        if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1018          SlotIndex Index = Indexes->getInstructionIndex(I);
1019          const LiveInterval *Interval = &*Intervals[FromSlot];
1020          assert(Interval->find(Index) != Interval->end() &&
1021                 "Found instruction usage outside of live range.");
1022        }
1023#endif
1024
1025        // Fix the machine instructions.
1026        int ToSlot = SlotRemap[FromSlot];
1027        MO.setIndex(ToSlot);
1028        FixedInstr++;
1029      }
1030
1031      // We adjust AliasAnalysis information for merged stack slots.
1032      SmallVector<MachineMemOperand *, 2> NewMMOs;
1033      bool ReplaceMemOps = false;
1034      for (MachineMemOperand *MMO : I.memoperands()) {
1035        // Collect MachineMemOperands which reference
1036        // FixedStackPseudoSourceValues with old frame indices.
1037        if (const auto *FSV = dyn_cast_or_null<FixedStackPseudoSourceValue>(
1038                MMO->getPseudoValue())) {
1039          int FI = FSV->getFrameIndex();
1040          auto To = SlotRemap.find(FI);
1041          if (To != SlotRemap.end())
1042            SSRefs[FI].push_back(MMO);
1043        }
1044
1045        // If this memory location can be a slot remapped here,
1046        // we remove AA information.
1047        bool MayHaveConflictingAAMD = false;
1048        if (MMO->getAAInfo()) {
1049          if (const Value *MMOV = MMO->getValue()) {
1050            SmallVector<Value *, 4> Objs;
1051            getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
1052
1053            if (Objs.empty())
1054              MayHaveConflictingAAMD = true;
1055            else
1056              for (Value *V : Objs) {
1057                // If this memory location comes from a known stack slot
1058                // that is not remapped, we continue checking.
1059                // Otherwise, we need to invalidate AA infomation.
1060                const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1061                if (AI && MergedAllocas.count(AI)) {
1062                  MayHaveConflictingAAMD = true;
1063                  break;
1064                }
1065              }
1066          }
1067        }
1068        if (MayHaveConflictingAAMD) {
1069          NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1070          ReplaceMemOps = true;
1071        } else {
1072          NewMMOs.push_back(MMO);
1073        }
1074      }
1075
1076      // If any memory operand is updated, set memory references of
1077      // this instruction.
1078      if (ReplaceMemOps)
1079        I.setMemRefs(*MF, NewMMOs);
1080    }
1081
1082  // Rewrite MachineMemOperands that reference old frame indices.
1083  for (auto E : enumerate(SSRefs))
1084    if (!E.value().empty()) {
1085      const PseudoSourceValue *NewSV =
1086          MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
1087      for (MachineMemOperand *Ref : E.value())
1088        Ref->setValue(NewSV);
1089    }
1090
1091  // Update the location of C++ catch objects for the MSVC personality routine.
1092  if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1093    for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1094      for (WinEHHandlerType &H : TBME.HandlerArray)
1095        if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
1096            SlotRemap.count(H.CatchObj.FrameIndex))
1097          H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
1098
1099  LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1100  LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1101  LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1102}
1103
1104void StackColoring::removeInvalidSlotRanges() {
1105  for (MachineBasicBlock &BB : *MF)
1106    for (MachineInstr &I : BB) {
1107      if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1108          I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1109        continue;
1110
1111      // Some intervals are suspicious! In some cases we find address
1112      // calculations outside of the lifetime zone, but not actual memory
1113      // read or write. Memory accesses outside of the lifetime zone are a clear
1114      // violation, but address calculations are okay. This can happen when
1115      // GEPs are hoisted outside of the lifetime zone.
1116      // So, in here we only check instructions which can read or write memory.
1117      if (!I.mayLoad() && !I.mayStore())
1118        continue;
1119
1120      // Check all of the machine operands.
1121      for (const MachineOperand &MO : I.operands()) {
1122        if (!MO.isFI())
1123          continue;
1124
1125        int Slot = MO.getIndex();
1126
1127        if (Slot<0)
1128          continue;
1129
1130        if (Intervals[Slot]->empty())
1131          continue;
1132
1133        // Check that the used slot is inside the calculated lifetime range.
1134        // If it is not, warn about it and invalidate the range.
1135        LiveInterval *Interval = &*Intervals[Slot];
1136        SlotIndex Index = Indexes->getInstructionIndex(I);
1137        if (Interval->find(Index) == Interval->end()) {
1138          Interval->clear();
1139          LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1140          EscapedAllocas++;
1141        }
1142      }
1143    }
1144}
1145
1146void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1147                                   unsigned NumSlots) {
1148  // Expunge slot remap map.
1149  for (unsigned i=0; i < NumSlots; ++i) {
1150    // If we are remapping i
1151    if (SlotRemap.count(i)) {
1152      int Target = SlotRemap[i];
1153      // As long as our target is mapped to something else, follow it.
1154      while (SlotRemap.count(Target)) {
1155        Target = SlotRemap[Target];
1156        SlotRemap[i] = Target;
1157      }
1158    }
1159  }
1160}
1161
1162bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1163  LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1164                    << "********** Function: " << Func.getName() << '\n');
1165  MF = &Func;
1166  MFI = &MF->getFrameInfo();
1167  Indexes = &getAnalysis<SlotIndexes>();
1168  BlockLiveness.clear();
1169  BasicBlocks.clear();
1170  BasicBlockNumbering.clear();
1171  Markers.clear();
1172  Intervals.clear();
1173  LiveStarts.clear();
1174  VNInfoAllocator.Reset();
1175
1176  unsigned NumSlots = MFI->getObjectIndexEnd();
1177
1178  // If there are no stack slots then there are no markers to remove.
1179  if (!NumSlots)
1180    return false;
1181
1182  SmallVector<int, 8> SortedSlots;
1183  SortedSlots.reserve(NumSlots);
1184  Intervals.reserve(NumSlots);
1185  LiveStarts.resize(NumSlots);
1186
1187  unsigned NumMarkers = collectMarkers(NumSlots);
1188
1189  unsigned TotalSize = 0;
1190  LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1191                    << " slots\n");
1192  LLVM_DEBUG(dbgs() << "Slot structure:\n");
1193
1194  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1195    LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1196                      << " bytes.\n");
1197    TotalSize += MFI->getObjectSize(i);
1198  }
1199
1200  LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1201
1202  // Don't continue because there are not enough lifetime markers, or the
1203  // stack is too small, or we are told not to optimize the slots.
1204  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1205      skipFunction(Func.getFunction())) {
1206    LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1207    return removeAllMarkers();
1208  }
1209
1210  for (unsigned i=0; i < NumSlots; ++i) {
1211    std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1212    LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1213    Intervals.push_back(std::move(LI));
1214    SortedSlots.push_back(i);
1215  }
1216
1217  // Calculate the liveness of each block.
1218  calculateLocalLiveness();
1219  LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1220  LLVM_DEBUG(dump());
1221
1222  // Propagate the liveness information.
1223  calculateLiveIntervals(NumSlots);
1224  LLVM_DEBUG(dumpIntervals());
1225
1226  // Search for allocas which are used outside of the declared lifetime
1227  // markers.
1228  if (ProtectFromEscapedAllocas)
1229    removeInvalidSlotRanges();
1230
1231  // Maps old slots to new slots.
1232  DenseMap<int, int> SlotRemap;
1233  unsigned RemovedSlots = 0;
1234  unsigned ReducedSize = 0;
1235
1236  // Do not bother looking at empty intervals.
1237  for (unsigned I = 0; I < NumSlots; ++I) {
1238    if (Intervals[SortedSlots[I]]->empty())
1239      SortedSlots[I] = -1;
1240  }
1241
1242  // This is a simple greedy algorithm for merging allocas. First, sort the
1243  // slots, placing the largest slots first. Next, perform an n^2 scan and look
1244  // for disjoint slots. When you find disjoint slots, merge the samller one
1245  // into the bigger one and update the live interval. Remove the small alloca
1246  // and continue.
1247
1248  // Sort the slots according to their size. Place unused slots at the end.
1249  // Use stable sort to guarantee deterministic code generation.
1250  llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
1251    // We use -1 to denote a uninteresting slot. Place these slots at the end.
1252    if (LHS == -1)
1253      return false;
1254    if (RHS == -1)
1255      return true;
1256    // Sort according to size.
1257    return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1258  });
1259
1260  for (auto &s : LiveStarts)
1261    llvm::sort(s);
1262
1263  bool Changed = true;
1264  while (Changed) {
1265    Changed = false;
1266    for (unsigned I = 0; I < NumSlots; ++I) {
1267      if (SortedSlots[I] == -1)
1268        continue;
1269
1270      for (unsigned J=I+1; J < NumSlots; ++J) {
1271        if (SortedSlots[J] == -1)
1272          continue;
1273
1274        int FirstSlot = SortedSlots[I];
1275        int SecondSlot = SortedSlots[J];
1276        LiveInterval *First = &*Intervals[FirstSlot];
1277        LiveInterval *Second = &*Intervals[SecondSlot];
1278        auto &FirstS = LiveStarts[FirstSlot];
1279        auto &SecondS = LiveStarts[SecondSlot];
1280        assert(!First->empty() && !Second->empty() && "Found an empty range");
1281
1282        // Merge disjoint slots. This is a little bit tricky - see the
1283        // Implementation Notes section for an explanation.
1284        if (!First->isLiveAtIndexes(SecondS) &&
1285            !Second->isLiveAtIndexes(FirstS)) {
1286          Changed = true;
1287          First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1288
1289          int OldSize = FirstS.size();
1290          FirstS.append(SecondS.begin(), SecondS.end());
1291          auto Mid = FirstS.begin() + OldSize;
1292          std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1293
1294          SlotRemap[SecondSlot] = FirstSlot;
1295          SortedSlots[J] = -1;
1296          LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1297                            << SecondSlot << " together.\n");
1298          Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
1299                                        MFI->getObjectAlign(SecondSlot));
1300
1301          assert(MFI->getObjectSize(FirstSlot) >=
1302                 MFI->getObjectSize(SecondSlot) &&
1303                 "Merging a small object into a larger one");
1304
1305          RemovedSlots+=1;
1306          ReducedSize += MFI->getObjectSize(SecondSlot);
1307          MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1308          MFI->RemoveStackObject(SecondSlot);
1309        }
1310      }
1311    }
1312  }// While changed.
1313
1314  // Record statistics.
1315  StackSpaceSaved += ReducedSize;
1316  StackSlotMerged += RemovedSlots;
1317  LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1318                    << ReducedSize << " bytes\n");
1319
1320  // Scan the entire function and update all machine operands that use frame
1321  // indices to use the remapped frame index.
1322  expungeSlotMap(SlotRemap, NumSlots);
1323  remapInstructions(SlotRemap);
1324
1325  return removeAllMarkers();
1326}
1327