1//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a shrink wrapping variant of prolog/epilog insertion:
11// - Spills and restores of callee-saved registers (CSRs) are placed in the
12//   machine CFG to tightly surround their uses so that execution paths that
13//   do not use CSRs do not pay the spill/restore penalty.
14//
15// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
16//   loop the spills are placed in the loop preheader, and restores are
17//   placed in the loop exit nodes (the successors of loop _exiting_ nodes).
18//
19// - Covering paths without CSR uses:
20//   If a region in a CFG uses CSRs and has multiple entry and/or exit points,
21//   the use info for the CSRs inside the region is propagated outward in the
22//   CFG to ensure validity of the spill/restore placements. This decreases
23//   the effectiveness of shrink wrapping but does not require edge splitting
24//   in the machine CFG.
25//
26// This shrink wrapping implementation uses an iterative analysis to determine
27// which basic blocks require spills and restores for CSRs.
28//
29// This pass uses MachineDominators and MachineLoopInfo. Loop information
30// is used to prevent placement of callee-saved register spills/restores
31// in the bodies of loops.
32//
33//===----------------------------------------------------------------------===//
34
35#define DEBUG_TYPE "shrink-wrap"
36
37#include "PrologEpilogInserter.h"
38#include "llvm/CodeGen/MachineDominators.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/Target/TargetMachine.h"
44#include "llvm/Target/TargetRegisterInfo.h"
45#include "llvm/ADT/SparseBitVector.h"
46#include "llvm/ADT/DenseMap.h"
47#include "llvm/ADT/PostOrderIterator.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/ADT/STLExtras.h"
53#include "llvm/ADT/Statistic.h"
54#include <sstream>
55
56using namespace llvm;
57
58STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
59
60// Shrink Wrapping:
61static cl::opt<bool>
62ShrinkWrapping("shrink-wrap",
63               cl::desc("Shrink wrap callee-saved register spills/restores"));
64
65// Shrink wrap only the specified function, a debugging aid.
66static cl::opt<std::string>
67ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
68               cl::desc("Shrink wrap the specified function"),
69               cl::value_desc("funcname"),
70               cl::init(""));
71
72// Debugging level for shrink wrapping.
73enum ShrinkWrapDebugLevel {
74  None, BasicInfo, Iterations, Details
75};
76
77static cl::opt<enum ShrinkWrapDebugLevel>
78ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
79  cl::desc("Print shrink wrapping debugging information"),
80  cl::values(
81    clEnumVal(None      , "disable debug output"),
82    clEnumVal(BasicInfo , "print basic DF sets"),
83    clEnumVal(Iterations, "print SR sets for each iteration"),
84    clEnumVal(Details   , "print all DF sets"),
85    clEnumValEnd));
86
87
88void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
89  AU.setPreservesCFG();
90  if (ShrinkWrapping || ShrinkWrapFunc != "") {
91    AU.addRequired<MachineLoopInfo>();
92    AU.addRequired<MachineDominatorTree>();
93  }
94  AU.addPreserved<MachineLoopInfo>();
95  AU.addPreserved<MachineDominatorTree>();
96  AU.addRequired<TargetPassConfig>();
97  MachineFunctionPass::getAnalysisUsage(AU);
98}
99
100//===----------------------------------------------------------------------===//
101//  ShrinkWrapping implementation
102//===----------------------------------------------------------------------===//
103
104// Convienences for dealing with machine loops.
105MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
106  assert(LP && "Machine loop is NULL.");
107  MachineBasicBlock* PHDR = LP->getLoopPreheader();
108  MachineLoop* PLP = LP->getParentLoop();
109  while (PLP) {
110    PHDR = PLP->getLoopPreheader();
111    PLP = PLP->getParentLoop();
112  }
113  return PHDR;
114}
115
116MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
117  if (LP == 0)
118    return 0;
119  MachineLoop* PLP = LP->getParentLoop();
120  while (PLP) {
121    LP = PLP;
122    PLP = PLP->getParentLoop();
123  }
124  return LP;
125}
126
127bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
128  return (MBB && !MBB->empty() && MBB->back().isReturn());
129}
130
131// Initialize shrink wrapping DFA sets, called before iterations.
132void PEI::clearAnticAvailSets() {
133  AnticIn.clear();
134  AnticOut.clear();
135  AvailIn.clear();
136  AvailOut.clear();
137}
138
139// Clear all sets constructed by shrink wrapping.
140void PEI::clearAllSets() {
141  ReturnBlocks.clear();
142  clearAnticAvailSets();
143  UsedCSRegs.clear();
144  CSRUsed.clear();
145  TLLoops.clear();
146  CSRSave.clear();
147  CSRRestore.clear();
148}
149
150// Initialize all shrink wrapping data.
151void PEI::initShrinkWrappingInfo() {
152  clearAllSets();
153  EntryBlock = 0;
154#ifndef NDEBUG
155  HasFastExitPath = false;
156#endif
157  ShrinkWrapThisFunction = ShrinkWrapping;
158  // DEBUG: enable or disable shrink wrapping for the current function
159  // via --shrink-wrap-func=<funcname>.
160#ifndef NDEBUG
161  if (ShrinkWrapFunc != "") {
162    std::string MFName = MF->getName().str();
163    ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
164  }
165#endif
166}
167
168
169/// placeCSRSpillsAndRestores - determine which MBBs of the function
170/// need save, restore code for callee-saved registers by doing a DF analysis
171/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
172/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
173/// is used to ensure that CSR save/restore code is not placed inside loops.
174/// This function computes the maps of MBBs -> CSRs to spill and restore
175/// in CSRSave, CSRRestore.
176///
177/// If shrink wrapping is not being performed, place all spills in
178/// the entry block, all restores in return blocks. In this case,
179/// CSRSave has a single mapping, CSRRestore has mappings for each
180/// return block.
181///
182void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
183
184  DEBUG(MF = &Fn);
185
186  initShrinkWrappingInfo();
187
188  DEBUG(if (ShrinkWrapThisFunction) {
189      dbgs() << "Place CSR spills/restores for "
190             << MF->getName() << "\n";
191    });
192
193  if (calculateSets(Fn))
194    placeSpillsAndRestores(Fn);
195}
196
197/// calcAnticInOut - calculate the anticipated in/out reg sets
198/// for the given MBB by looking forward in the MCFG at MBB's
199/// successors.
200///
201bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
202  bool changed = false;
203
204  // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
205  SmallVector<MachineBasicBlock*, 4> successors;
206  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
207         SE = MBB->succ_end(); SI != SE; ++SI) {
208    MachineBasicBlock* SUCC = *SI;
209    if (SUCC != MBB)
210      successors.push_back(SUCC);
211  }
212
213  unsigned i = 0, e = successors.size();
214  if (i != e) {
215    CSRegSet prevAnticOut = AnticOut[MBB];
216    MachineBasicBlock* SUCC = successors[i];
217
218    AnticOut[MBB] = AnticIn[SUCC];
219    for (++i; i != e; ++i) {
220      SUCC = successors[i];
221      AnticOut[MBB] &= AnticIn[SUCC];
222    }
223    if (prevAnticOut != AnticOut[MBB])
224      changed = true;
225  }
226
227  // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
228  CSRegSet prevAnticIn = AnticIn[MBB];
229  AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
230  if (prevAnticIn != AnticIn[MBB])
231    changed = true;
232  return changed;
233}
234
235/// calcAvailInOut - calculate the available in/out reg sets
236/// for the given MBB by looking backward in the MCFG at MBB's
237/// predecessors.
238///
239bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
240  bool changed = false;
241
242  // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
243  SmallVector<MachineBasicBlock*, 4> predecessors;
244  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
245         PE = MBB->pred_end(); PI != PE; ++PI) {
246    MachineBasicBlock* PRED = *PI;
247    if (PRED != MBB)
248      predecessors.push_back(PRED);
249  }
250
251  unsigned i = 0, e = predecessors.size();
252  if (i != e) {
253    CSRegSet prevAvailIn = AvailIn[MBB];
254    MachineBasicBlock* PRED = predecessors[i];
255
256    AvailIn[MBB] = AvailOut[PRED];
257    for (++i; i != e; ++i) {
258      PRED = predecessors[i];
259      AvailIn[MBB] &= AvailOut[PRED];
260    }
261    if (prevAvailIn != AvailIn[MBB])
262      changed = true;
263  }
264
265  // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
266  CSRegSet prevAvailOut = AvailOut[MBB];
267  AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
268  if (prevAvailOut != AvailOut[MBB])
269    changed = true;
270  return changed;
271}
272
273/// calculateAnticAvail - build the sets anticipated and available
274/// registers in the MCFG of the current function iteratively,
275/// doing a combined forward and backward analysis.
276///
277void PEI::calculateAnticAvail(MachineFunction &Fn) {
278  // Initialize data flow sets.
279  clearAnticAvailSets();
280
281  // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
282  bool changed = true;
283  unsigned iterations = 0;
284  while (changed) {
285    changed = false;
286    ++iterations;
287    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
288         MBBI != MBBE; ++MBBI) {
289      MachineBasicBlock* MBB = MBBI;
290
291      // Calculate anticipated in, out regs at MBB from
292      // anticipated at successors of MBB.
293      changed |= calcAnticInOut(MBB);
294
295      // Calculate available in, out regs at MBB from
296      // available at predecessors of MBB.
297      changed |= calcAvailInOut(MBB);
298    }
299  }
300
301  DEBUG({
302      if (ShrinkWrapDebugging >= Details) {
303        dbgs()
304          << "-----------------------------------------------------------\n"
305          << " Antic/Avail Sets:\n"
306          << "-----------------------------------------------------------\n"
307          << "iterations = " << iterations << "\n"
308          << "-----------------------------------------------------------\n"
309          << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
310          << "-----------------------------------------------------------\n";
311
312        for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
313             MBBI != MBBE; ++MBBI) {
314          MachineBasicBlock* MBB = MBBI;
315          dumpSets(MBB);
316        }
317
318        dbgs()
319          << "-----------------------------------------------------------\n";
320      }
321    });
322}
323
324/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
325/// of the loop given by LP and its parent loops. This prevents spills/restores
326/// from being placed in the bodies of loops.
327///
328void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
329  if (! MBB || !LP)
330    return;
331
332  std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
333  for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
334    MachineBasicBlock* LBB = loopBlocks[i];
335    if (LBB == MBB)
336      continue;
337    if (CSRUsed[LBB].contains(CSRUsed[MBB]))
338      continue;
339    CSRUsed[LBB] |= CSRUsed[MBB];
340  }
341}
342
343/// calculateSets - collect the CSRs used in this function, compute
344/// the DF sets that describe the initial minimal regions in the
345/// Machine CFG around which CSR spills and restores must be placed.
346///
347/// Additionally, this function decides if shrink wrapping should
348/// be disabled for the current function, checking the following:
349///  1. the current function has more than 500 MBBs: heuristic limit
350///     on function size to reduce compile time impact of the current
351///     iterative algorithm.
352///  2. all CSRs are used in the entry block.
353///  3. all CSRs are used in all immediate successors of the entry block.
354///  4. all CSRs are used in a subset of blocks, each of which dominates
355///     all return blocks. These blocks, taken as a subgraph of the MCFG,
356///     are equivalent to the entry block since all execution paths pass
357///     through them.
358///
359bool PEI::calculateSets(MachineFunction &Fn) {
360  // Sets used to compute spill, restore placement sets.
361  const std::vector<CalleeSavedInfo> CSI =
362    Fn.getFrameInfo()->getCalleeSavedInfo();
363
364  // If no CSRs used, we are done.
365  if (CSI.empty()) {
366    DEBUG(if (ShrinkWrapThisFunction)
367            dbgs() << "DISABLED: " << Fn.getName()
368                   << ": uses no callee-saved registers\n");
369    return false;
370  }
371
372  // Save refs to entry and return blocks.
373  EntryBlock = Fn.begin();
374  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
375       MBB != E; ++MBB)
376    if (isReturnBlock(MBB))
377      ReturnBlocks.push_back(MBB);
378
379  // Determine if this function has fast exit paths.
380  DEBUG(if (ShrinkWrapThisFunction)
381          findFastExitPath());
382
383  // Limit shrink wrapping via the current iterative bit vector
384  // implementation to functions with <= 500 MBBs.
385  if (Fn.size() > 500) {
386    DEBUG(if (ShrinkWrapThisFunction)
387            dbgs() << "DISABLED: " << Fn.getName()
388                   << ": too large (" << Fn.size() << " MBBs)\n");
389    ShrinkWrapThisFunction = false;
390  }
391
392  // Return now if not shrink wrapping.
393  if (! ShrinkWrapThisFunction)
394    return false;
395
396  // Collect set of used CSRs.
397  for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
398    UsedCSRegs.set(inx);
399  }
400
401  // Walk instructions in all MBBs, create CSRUsed[] sets, choose
402  // whether or not to shrink wrap this function.
403  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
404  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
405  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
406
407  bool allCSRUsesInEntryBlock = true;
408  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
409       MBBI != MBBE; ++MBBI) {
410    MachineBasicBlock* MBB = MBBI;
411    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
412      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
413        unsigned Reg = CSI[inx].getReg();
414        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
415        // CSRUsed map for the current block.
416        for (unsigned opInx = 0, opEnd = I->getNumOperands();
417             opInx != opEnd; ++opInx) {
418          const MachineOperand &MO = I->getOperand(opInx);
419          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
420            continue;
421          unsigned MOReg = MO.getReg();
422          if (!MOReg)
423            continue;
424          if (MOReg == Reg ||
425              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
426               TargetRegisterInfo::isPhysicalRegister(Reg) &&
427               TRI->isSubRegister(Reg, MOReg))) {
428            // CSR Reg is defined/used in block MBB.
429            CSRUsed[MBB].set(inx);
430            // Check for uses in EntryBlock.
431            if (MBB != EntryBlock)
432              allCSRUsesInEntryBlock = false;
433          }
434        }
435      }
436    }
437
438    if (CSRUsed[MBB].empty())
439      continue;
440
441    // Propagate CSRUsed[MBB] in loops
442    if (MachineLoop* LP = LI.getLoopFor(MBB)) {
443      // Add top level loop to work list.
444      MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
445      MachineLoop* PLP = getTopLevelLoopParent(LP);
446
447      if (! HDR) {
448        HDR = PLP->getHeader();
449        assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
450        MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
451        HDR = *PI;
452      }
453      TLLoops[HDR] = PLP;
454
455      // Push uses from inside loop to its parent loops,
456      // or to all other MBBs in its loop.
457      if (LP->getLoopDepth() > 1) {
458        for (MachineLoop* PLP = LP->getParentLoop(); PLP;
459             PLP = PLP->getParentLoop()) {
460          propagateUsesAroundLoop(MBB, PLP);
461        }
462      } else {
463        propagateUsesAroundLoop(MBB, LP);
464      }
465    }
466  }
467
468  if (allCSRUsesInEntryBlock) {
469    DEBUG(dbgs() << "DISABLED: " << Fn.getName()
470                 << ": all CSRs used in EntryBlock\n");
471    ShrinkWrapThisFunction = false;
472  } else {
473    bool allCSRsUsedInEntryFanout = true;
474    for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
475           SE = EntryBlock->succ_end(); SI != SE; ++SI) {
476      MachineBasicBlock* SUCC = *SI;
477      if (CSRUsed[SUCC] != UsedCSRegs)
478        allCSRsUsedInEntryFanout = false;
479    }
480    if (allCSRsUsedInEntryFanout) {
481      DEBUG(dbgs() << "DISABLED: " << Fn.getName()
482                   << ": all CSRs used in imm successors of EntryBlock\n");
483      ShrinkWrapThisFunction = false;
484    }
485  }
486
487  if (ShrinkWrapThisFunction) {
488    // Check if MBB uses CSRs and dominates all exit nodes.
489    // Such nodes are equiv. to the entry node w.r.t.
490    // CSR uses: every path through the function must
491    // pass through this node. If each CSR is used at least
492    // once by these nodes, shrink wrapping is disabled.
493    CSRegSet CSRUsedInChokePoints;
494    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
495         MBBI != MBBE; ++MBBI) {
496      MachineBasicBlock* MBB = MBBI;
497      if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
498        continue;
499      bool dominatesExitNodes = true;
500      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
501        if (! DT.dominates(MBB, ReturnBlocks[ri])) {
502          dominatesExitNodes = false;
503          break;
504        }
505      if (dominatesExitNodes) {
506        CSRUsedInChokePoints |= CSRUsed[MBB];
507        if (CSRUsedInChokePoints == UsedCSRegs) {
508          DEBUG(dbgs() << "DISABLED: " << Fn.getName()
509                       << ": all CSRs used in choke point(s) at "
510                       << getBasicBlockName(MBB) << "\n");
511          ShrinkWrapThisFunction = false;
512          break;
513        }
514      }
515    }
516  }
517
518  // Return now if we have decided not to apply shrink wrapping
519  // to the current function.
520  if (! ShrinkWrapThisFunction)
521    return false;
522
523  DEBUG({
524      dbgs() << "ENABLED: " << Fn.getName();
525      if (HasFastExitPath)
526        dbgs() << " (fast exit path)";
527      dbgs() << "\n";
528      if (ShrinkWrapDebugging >= BasicInfo) {
529        dbgs() << "------------------------------"
530             << "-----------------------------\n";
531        dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
532        if (ShrinkWrapDebugging >= Details) {
533          dbgs() << "------------------------------"
534               << "-----------------------------\n";
535          dumpAllUsed();
536        }
537      }
538    });
539
540  // Build initial DF sets to determine minimal regions in the
541  // Machine CFG around which CSRs must be spilled and restored.
542  calculateAnticAvail(Fn);
543
544  return true;
545}
546
547/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
548/// multi-entry, multi-exit (MEME) regions so spill and restore
549/// placement will not break code that enters or leaves a
550/// shrink-wrapped region by inducing spills with no matching
551/// restores or restores with no matching spills. A MEME region
552/// is a subgraph of the MCFG with multiple entry edges, multiple
553/// exit edges, or both. This code propagates use information
554/// through the MCFG until all paths requiring spills and restores
555/// _outside_ the computed minimal placement regions have been covered.
556///
557bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
558                               SmallVector<MachineBasicBlock*, 4>& blks) {
559  if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
560    bool processThisBlock = false;
561    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
562           SE = MBB->succ_end(); SI != SE; ++SI) {
563      MachineBasicBlock* SUCC = *SI;
564      if (SUCC->pred_size() > 1) {
565        processThisBlock = true;
566        break;
567      }
568    }
569    if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
570      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
571             PE = MBB->pred_end(); PI != PE; ++PI) {
572        MachineBasicBlock* PRED = *PI;
573        if (PRED->succ_size() > 1) {
574          processThisBlock = true;
575          break;
576        }
577      }
578    }
579    if (! processThisBlock)
580      return false;
581  }
582
583  CSRegSet prop;
584  if (!CSRSave[MBB].empty())
585    prop = CSRSave[MBB];
586  else if (!CSRRestore[MBB].empty())
587    prop = CSRRestore[MBB];
588  else
589    prop = CSRUsed[MBB];
590  if (prop.empty())
591    return false;
592
593  // Propagate selected bits to successors, predecessors of MBB.
594  bool addedUses = false;
595  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
596         SE = MBB->succ_end(); SI != SE; ++SI) {
597    MachineBasicBlock* SUCC = *SI;
598    // Self-loop
599    if (SUCC == MBB)
600      continue;
601    if (! CSRUsed[SUCC].contains(prop)) {
602      CSRUsed[SUCC] |= prop;
603      addedUses = true;
604      blks.push_back(SUCC);
605      DEBUG(if (ShrinkWrapDebugging >= Iterations)
606              dbgs() << getBasicBlockName(MBB)
607                   << "(" << stringifyCSRegSet(prop) << ")->"
608                   << "successor " << getBasicBlockName(SUCC) << "\n");
609    }
610  }
611  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
612         PE = MBB->pred_end(); PI != PE; ++PI) {
613    MachineBasicBlock* PRED = *PI;
614    // Self-loop
615    if (PRED == MBB)
616      continue;
617    if (! CSRUsed[PRED].contains(prop)) {
618      CSRUsed[PRED] |= prop;
619      addedUses = true;
620      blks.push_back(PRED);
621      DEBUG(if (ShrinkWrapDebugging >= Iterations)
622              dbgs() << getBasicBlockName(MBB)
623                   << "(" << stringifyCSRegSet(prop) << ")->"
624                   << "predecessor " << getBasicBlockName(PRED) << "\n");
625    }
626  }
627  return addedUses;
628}
629
630/// addUsesForTopLevelLoops - add uses for CSRs used inside top
631/// level loops to the exit blocks of those loops.
632///
633bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
634  bool addedUses = false;
635
636  // Place restores for top level loops where needed.
637  for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
638         I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
639    MachineBasicBlock* MBB = I->first;
640    MachineLoop* LP = I->second;
641    MachineBasicBlock* HDR = LP->getHeader();
642    SmallVector<MachineBasicBlock*, 4> exitBlocks;
643    CSRegSet loopSpills;
644
645    loopSpills = CSRSave[MBB];
646    if (CSRSave[MBB].empty()) {
647      loopSpills = CSRUsed[HDR];
648      assert(!loopSpills.empty() && "No CSRs used in loop?");
649    } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
650      continue;
651
652    LP->getExitBlocks(exitBlocks);
653    assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
654    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
655      MachineBasicBlock* EXB = exitBlocks[i];
656      if (! CSRUsed[EXB].contains(loopSpills)) {
657        CSRUsed[EXB] |= loopSpills;
658        addedUses = true;
659        DEBUG(if (ShrinkWrapDebugging >= Iterations)
660                dbgs() << "LOOP " << getBasicBlockName(MBB)
661                     << "(" << stringifyCSRegSet(loopSpills) << ")->"
662                     << getBasicBlockName(EXB) << "\n");
663        if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
664          blks.push_back(EXB);
665      }
666    }
667  }
668  return addedUses;
669}
670
671/// calcSpillPlacements - determine which CSRs should be spilled
672/// in MBB using AnticIn sets of MBB's predecessors, keeping track
673/// of changes to spilled reg sets. Add MBB to the set of blocks
674/// that need to be processed for propagating use info to cover
675/// multi-entry/exit regions.
676///
677bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
678                              SmallVector<MachineBasicBlock*, 4> &blks,
679                              CSRegBlockMap &prevSpills) {
680  bool placedSpills = false;
681  // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
682  CSRegSet anticInPreds;
683  SmallVector<MachineBasicBlock*, 4> predecessors;
684  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
685         PE = MBB->pred_end(); PI != PE; ++PI) {
686    MachineBasicBlock* PRED = *PI;
687    if (PRED != MBB)
688      predecessors.push_back(PRED);
689  }
690  unsigned i = 0, e = predecessors.size();
691  if (i != e) {
692    MachineBasicBlock* PRED = predecessors[i];
693    anticInPreds = UsedCSRegs - AnticIn[PRED];
694    for (++i; i != e; ++i) {
695      PRED = predecessors[i];
696      anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
697    }
698  } else {
699    // Handle uses in entry blocks (which have no predecessors).
700    // This is necessary because the DFA formulation assumes the
701    // entry and (multiple) exit nodes cannot have CSR uses, which
702    // is not the case in the real world.
703    anticInPreds = UsedCSRegs;
704  }
705  // Compute spills required at MBB:
706  CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
707
708  if (! CSRSave[MBB].empty()) {
709    if (MBB == EntryBlock) {
710      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
711        CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
712    } else {
713      // Reset all regs spilled in MBB that are also spilled in EntryBlock.
714      if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
715        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
716      }
717    }
718  }
719  placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
720  prevSpills[MBB] = CSRSave[MBB];
721  // Remember this block for adding restores to successor
722  // blocks for multi-entry region.
723  if (placedSpills)
724    blks.push_back(MBB);
725
726  DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
727          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
728               << stringifyCSRegSet(CSRSave[MBB]) << "\n");
729
730  return placedSpills;
731}
732
733/// calcRestorePlacements - determine which CSRs should be restored
734/// in MBB using AvailOut sets of MBB's succcessors, keeping track
735/// of changes to restored reg sets. Add MBB to the set of blocks
736/// that need to be processed for propagating use info to cover
737/// multi-entry/exit regions.
738///
739bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
740                                SmallVector<MachineBasicBlock*, 4> &blks,
741                                CSRegBlockMap &prevRestores) {
742  bool placedRestores = false;
743  // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
744  CSRegSet availOutSucc;
745  SmallVector<MachineBasicBlock*, 4> successors;
746  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
747         SE = MBB->succ_end(); SI != SE; ++SI) {
748    MachineBasicBlock* SUCC = *SI;
749    if (SUCC != MBB)
750      successors.push_back(SUCC);
751  }
752  unsigned i = 0, e = successors.size();
753  if (i != e) {
754    MachineBasicBlock* SUCC = successors[i];
755    availOutSucc = UsedCSRegs - AvailOut[SUCC];
756    for (++i; i != e; ++i) {
757      SUCC = successors[i];
758      availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
759    }
760  } else {
761    if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
762      // Handle uses in return blocks (which have no successors).
763      // This is necessary because the DFA formulation assumes the
764      // entry and (multiple) exit nodes cannot have CSR uses, which
765      // is not the case in the real world.
766      availOutSucc = UsedCSRegs;
767    }
768  }
769  // Compute restores required at MBB:
770  CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
771
772  // Postprocess restore placements at MBB.
773  // Remove the CSRs that are restored in the return blocks.
774  // Lest this be confusing, note that:
775  // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
776  if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
777    if (! CSRSave[EntryBlock].empty())
778      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
779  }
780  placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
781  prevRestores[MBB] = CSRRestore[MBB];
782  // Remember this block for adding saves to predecessor
783  // blocks for multi-entry region.
784  if (placedRestores)
785    blks.push_back(MBB);
786
787  DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
788          dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
789               << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
790
791  return placedRestores;
792}
793
794/// placeSpillsAndRestores - place spills and restores of CSRs
795/// used in MBBs in minimal regions that contain the uses.
796///
797void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
798  CSRegBlockMap prevCSRSave;
799  CSRegBlockMap prevCSRRestore;
800  SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
801  bool changed = true;
802  unsigned iterations = 0;
803
804  // Iterate computation of spill and restore placements in the MCFG until:
805  //   1. CSR use info has been fully propagated around the MCFG, and
806  //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
807  while (changed) {
808    changed = false;
809    ++iterations;
810
811    DEBUG(if (ShrinkWrapDebugging >= Iterations)
812            dbgs() << "iter " << iterations
813                 << " --------------------------------------------------\n");
814
815    // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
816    // which determines the placements of spills and restores.
817    // Keep track of changes to spills, restores in each iteration to
818    // minimize the total iterations.
819    bool SRChanged = false;
820    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
821         MBBI != MBBE; ++MBBI) {
822      MachineBasicBlock* MBB = MBBI;
823
824      // Place spills for CSRs in MBB.
825      SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
826
827      // Place restores for CSRs in MBB.
828      SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
829    }
830
831    // Add uses of CSRs used inside loops where needed.
832    changed |= addUsesForTopLevelLoops(cvBlocks);
833
834    // Add uses for CSRs spilled or restored at branch, join points.
835    if (changed || SRChanged) {
836      while (! cvBlocks.empty()) {
837        MachineBasicBlock* MBB = cvBlocks.pop_back_val();
838        changed |= addUsesForMEMERegion(MBB, ncvBlocks);
839      }
840      if (! ncvBlocks.empty()) {
841        cvBlocks = ncvBlocks;
842        ncvBlocks.clear();
843      }
844    }
845
846    if (changed) {
847      calculateAnticAvail(Fn);
848      CSRSave.clear();
849      CSRRestore.clear();
850    }
851  }
852
853  // Check for effectiveness:
854  //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
855  //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
856  // Gives a measure of how many CSR spills have been moved from EntryBlock
857  // to minimal regions enclosing their uses.
858  CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
859  unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
860  numSRReduced += numSRReducedThisFunc;
861  DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
862      dbgs() << "-----------------------------------------------------------\n";
863      dbgs() << "total iterations = " << iterations << " ( "
864           << Fn.getName()
865           << " " << numSRReducedThisFunc
866           << " " << Fn.size()
867           << " )\n";
868      dbgs() << "-----------------------------------------------------------\n";
869      dumpSRSets();
870      dbgs() << "-----------------------------------------------------------\n";
871      if (numSRReducedThisFunc)
872        verifySpillRestorePlacement();
873    });
874}
875
876// Debugging methods.
877#ifndef NDEBUG
878/// findFastExitPath - debugging method used to detect functions
879/// with at least one path from the entry block to a return block
880/// directly or which has a very small number of edges.
881///
882void PEI::findFastExitPath() {
883  if (! EntryBlock)
884    return;
885  // Fina a path from EntryBlock to any return block that does not branch:
886  //        Entry
887  //          |     ...
888  //          v      |
889  //         B1<-----+
890  //          |
891  //          v
892  //       Return
893  for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
894         SE = EntryBlock->succ_end(); SI != SE; ++SI) {
895    MachineBasicBlock* SUCC = *SI;
896
897    // Assume positive, disprove existence of fast path.
898    HasFastExitPath = true;
899
900    // Check the immediate successors.
901    if (isReturnBlock(SUCC)) {
902      if (ShrinkWrapDebugging >= BasicInfo)
903        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
904             << "->" << getBasicBlockName(SUCC) << "\n";
905      break;
906    }
907    // Traverse df from SUCC, look for a branch block.
908    std::string exitPath = getBasicBlockName(SUCC);
909    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
910           BE = df_end(SUCC); BI != BE; ++BI) {
911      MachineBasicBlock* SBB = *BI;
912      // Reject paths with branch nodes.
913      if (SBB->succ_size() > 1) {
914        HasFastExitPath = false;
915        break;
916      }
917      exitPath += "->" + getBasicBlockName(SBB);
918    }
919    if (HasFastExitPath) {
920      if (ShrinkWrapDebugging >= BasicInfo)
921        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
922             << "->" << exitPath << "\n";
923      break;
924    }
925  }
926}
927
928/// verifySpillRestorePlacement - check the current spill/restore
929/// sets for safety. Attempt to find spills without restores or
930/// restores without spills.
931/// Spills: walk df from each MBB in spill set ensuring that
932///         all CSRs spilled at MMBB are restored on all paths
933///         from MBB to all exit blocks.
934/// Restores: walk idf from each MBB in restore set ensuring that
935///           all CSRs restored at MBB are spilled on all paths
936///           reaching MBB.
937///
938void PEI::verifySpillRestorePlacement() {
939  unsigned numReturnBlocks = 0;
940  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
941       MBBI != MBBE; ++MBBI) {
942    MachineBasicBlock* MBB = MBBI;
943    if (isReturnBlock(MBB) || MBB->succ_size() == 0)
944      ++numReturnBlocks;
945  }
946  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
947         BE = CSRSave.end(); BI != BE; ++BI) {
948    MachineBasicBlock* MBB = BI->first;
949    CSRegSet spilled = BI->second;
950    CSRegSet restored;
951
952    if (spilled.empty())
953      continue;
954
955    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
956                 << stringifyCSRegSet(spilled)
957                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
958                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
959
960    if (CSRRestore[MBB].intersects(spilled)) {
961      restored |= (CSRRestore[MBB] & spilled);
962    }
963
964    // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
965    // we must find restores for all spills w/no intervening spills on all
966    // paths from MBB to all return blocks.
967    for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
968           BE = df_end(MBB); BI != BE; ++BI) {
969      MachineBasicBlock* SBB = *BI;
970      if (SBB == MBB)
971        continue;
972      // Stop when we encounter spills of any CSRs spilled at MBB that
973      // have not yet been seen to be restored.
974      if (CSRSave[SBB].intersects(spilled) &&
975          !restored.contains(CSRSave[SBB] & spilled))
976        break;
977      // Collect the CSRs spilled at MBB that are restored
978      // at this DF successor of MBB.
979      if (CSRRestore[SBB].intersects(spilled))
980        restored |= (CSRRestore[SBB] & spilled);
981      // If we are at a retun block, check that the restores
982      // we have seen so far exhaust the spills at MBB, then
983      // reset the restores.
984      if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
985        if (restored != spilled) {
986          CSRegSet notRestored = (spilled - restored);
987          DEBUG(dbgs() << MF->getName() << ": "
988                       << stringifyCSRegSet(notRestored)
989                       << " spilled at " << getBasicBlockName(MBB)
990                       << " are never restored on path to return "
991                       << getBasicBlockName(SBB) << "\n");
992        }
993        restored.clear();
994      }
995    }
996  }
997
998  // Check restore placements.
999  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1000         BE = CSRRestore.end(); BI != BE; ++BI) {
1001    MachineBasicBlock* MBB = BI->first;
1002    CSRegSet restored = BI->second;
1003    CSRegSet spilled;
1004
1005    if (restored.empty())
1006      continue;
1007
1008    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
1009                 << stringifyCSRegSet(CSRSave[MBB])
1010                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1011                 << stringifyCSRegSet(restored) << "\n");
1012
1013    if (CSRSave[MBB].intersects(restored)) {
1014      spilled |= (CSRSave[MBB] & restored);
1015    }
1016    // Walk inverse depth first from MBB to find spills of all
1017    // CSRs restored at MBB:
1018    for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1019           BE = idf_end(MBB); BI != BE; ++BI) {
1020      MachineBasicBlock* PBB = *BI;
1021      if (PBB == MBB)
1022        continue;
1023      // Stop when we encounter restores of any CSRs restored at MBB that
1024      // have not yet been seen to be spilled.
1025      if (CSRRestore[PBB].intersects(restored) &&
1026          !spilled.contains(CSRRestore[PBB] & restored))
1027        break;
1028      // Collect the CSRs restored at MBB that are spilled
1029      // at this DF predecessor of MBB.
1030      if (CSRSave[PBB].intersects(restored))
1031        spilled |= (CSRSave[PBB] & restored);
1032    }
1033    if (spilled != restored) {
1034      CSRegSet notSpilled = (restored - spilled);
1035      DEBUG(dbgs() << MF->getName() << ": "
1036                   << stringifyCSRegSet(notSpilled)
1037                   << " restored at " << getBasicBlockName(MBB)
1038                   << " are never spilled\n");
1039    }
1040  }
1041}
1042
1043// Debugging print methods.
1044std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1045  if (!MBB)
1046    return "";
1047
1048  if (MBB->getBasicBlock())
1049    return MBB->getBasicBlock()->getName().str();
1050
1051  std::ostringstream name;
1052  name << "_MBB_" << MBB->getNumber();
1053  return name.str();
1054}
1055
1056std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1057  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1058  const std::vector<CalleeSavedInfo> CSI =
1059    MF->getFrameInfo()->getCalleeSavedInfo();
1060
1061  std::ostringstream srep;
1062  if (CSI.size() == 0) {
1063    srep << "[]";
1064    return srep.str();
1065  }
1066  srep << "[";
1067  CSRegSet::iterator I = s.begin(), E = s.end();
1068  if (I != E) {
1069    unsigned reg = CSI[*I].getReg();
1070    srep << TRI->getName(reg);
1071    for (++I; I != E; ++I) {
1072      reg = CSI[*I].getReg();
1073      srep << ",";
1074      srep << TRI->getName(reg);
1075    }
1076  }
1077  srep << "]";
1078  return srep.str();
1079}
1080
1081void PEI::dumpSet(const CSRegSet& s) {
1082  DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
1083}
1084
1085void PEI::dumpUsed(MachineBasicBlock* MBB) {
1086  DEBUG({
1087      if (MBB)
1088        dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1089               << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
1090    });
1091}
1092
1093void PEI::dumpAllUsed() {
1094    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1095         MBBI != MBBE; ++MBBI) {
1096      MachineBasicBlock* MBB = MBBI;
1097      dumpUsed(MBB);
1098    }
1099}
1100
1101void PEI::dumpSets(MachineBasicBlock* MBB) {
1102  DEBUG({
1103      if (MBB)
1104        dbgs() << getBasicBlockName(MBB)           << " | "
1105               << stringifyCSRegSet(CSRUsed[MBB])  << " | "
1106               << stringifyCSRegSet(AnticIn[MBB])  << " | "
1107               << stringifyCSRegSet(AnticOut[MBB]) << " | "
1108               << stringifyCSRegSet(AvailIn[MBB])  << " | "
1109               << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1110    });
1111}
1112
1113void PEI::dumpSets1(MachineBasicBlock* MBB) {
1114  DEBUG({
1115      if (MBB)
1116        dbgs() << getBasicBlockName(MBB)             << " | "
1117               << stringifyCSRegSet(CSRUsed[MBB])    << " | "
1118               << stringifyCSRegSet(AnticIn[MBB])    << " | "
1119               << stringifyCSRegSet(AnticOut[MBB])   << " | "
1120               << stringifyCSRegSet(AvailIn[MBB])    << " | "
1121               << stringifyCSRegSet(AvailOut[MBB])   << " | "
1122               << stringifyCSRegSet(CSRSave[MBB])    << " | "
1123               << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1124    });
1125}
1126
1127void PEI::dumpAllSets() {
1128    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1129         MBBI != MBBE; ++MBBI) {
1130      MachineBasicBlock* MBB = MBBI;
1131      dumpSets1(MBB);
1132    }
1133}
1134
1135void PEI::dumpSRSets() {
1136  DEBUG({
1137      for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1138           MBB != E; ++MBB) {
1139        if (!CSRSave[MBB].empty()) {
1140          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
1141                 << stringifyCSRegSet(CSRSave[MBB]);
1142          if (CSRRestore[MBB].empty())
1143            dbgs() << '\n';
1144        }
1145
1146        if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
1147          dbgs() << "    "
1148                 << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1149                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1150      }
1151    });
1152}
1153#endif
1154