RegAllocPBQP.cpp revision 212904
1240116Smarcel//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
2240116Smarcel//
3240116Smarcel//                     The LLVM Compiler Infrastructure
4240116Smarcel//
5240116Smarcel// This file is distributed under the University of Illinois Open Source
6240116Smarcel// License. See LICENSE.TXT for details.
7240116Smarcel//
8240116Smarcel//===----------------------------------------------------------------------===//
9240116Smarcel//
10240116Smarcel// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
11240116Smarcel// register allocator for LLVM. This allocator works by constructing a PBQP
12240116Smarcel// problem representing the register allocation problem under consideration,
13240116Smarcel// solving this using a PBQP solver, and mapping the solution back to a
14240116Smarcel// register assignment. If any variables are selected for spilling then spill
15240116Smarcel// code is inserted and the process repeated.
16240116Smarcel//
17240116Smarcel// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
18240116Smarcel// for register allocation. For more information on PBQP for register
19240116Smarcel// allocation, see the following papers:
20240116Smarcel//
21240116Smarcel//   (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
22240116Smarcel//   PBQP. In Proceedings of the 7th Joint Modular Languages Conference
23240116Smarcel//   (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
24240116Smarcel//
25240116Smarcel//   (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
26275988Sngie//   architectures. In Proceedings of the Joint Conference on Languages,
27275988Sngie//   Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
28240116Smarcel//   NY, USA, 139-148.
29240116Smarcel//
30240116Smarcel//===----------------------------------------------------------------------===//
31240116Smarcel
32240116Smarcel#define DEBUG_TYPE "regalloc"
33240116Smarcel
34240116Smarcel#include "PBQP/HeuristicSolver.h"
35240116Smarcel#include "PBQP/Graph.h"
36240116Smarcel#include "PBQP/Heuristics/Briggs.h"
37240116Smarcel#include "RenderMachineFunction.h"
38275988Sngie#include "Splitter.h"
39240116Smarcel#include "VirtRegMap.h"
40275988Sngie#include "VirtRegRewriter.h"
41275988Sngie#include "llvm/CodeGen/CalcSpillWeights.h"
42275988Sngie#include "llvm/CodeGen/LiveIntervalAnalysis.h"
43275988Sngie#include "llvm/CodeGen/LiveStackAnalysis.h"
44275988Sngie#include "llvm/CodeGen/MachineFunctionPass.h"
45275988Sngie#include "llvm/CodeGen/MachineLoopInfo.h"
46240116Smarcel#include "llvm/CodeGen/MachineRegisterInfo.h"
47240116Smarcel#include "llvm/CodeGen/RegAllocRegistry.h"
48240116Smarcel#include "llvm/CodeGen/RegisterCoalescer.h"
49240116Smarcel#include "llvm/Support/Debug.h"
50240116Smarcel#include "llvm/Support/raw_ostream.h"
51240116Smarcel#include "llvm/Target/TargetInstrInfo.h"
52240116Smarcel#include "llvm/Target/TargetMachine.h"
53240116Smarcel#include <limits>
54240116Smarcel#include <map>
55240116Smarcel#include <memory>
56240116Smarcel#include <set>
57240116Smarcel#include <vector>
58240116Smarcel
59240116Smarcelusing namespace llvm;
60240116Smarcel
61240116Smarcelstatic RegisterRegAlloc
62240116SmarcelregisterPBQPRepAlloc("pbqp", "PBQP register allocator",
63240116Smarcel                       llvm::createPBQPRegisterAllocator);
64240116Smarcel
65240116Smarcelstatic cl::opt<bool>
66240116SmarcelpbqpCoalescing("pbqp-coalescing",
67240116Smarcel                cl::desc("Attempt coalescing during PBQP register allocation."),
68240116Smarcel                cl::init(false), cl::Hidden);
69240116Smarcel
70240116Smarcelstatic cl::opt<bool>
71240116SmarcelpbqpPreSplitting("pbqp-pre-splitting",
72240116Smarcel                 cl::desc("Pre-splite before PBQP register allocation."),
73240116Smarcel                 cl::init(false), cl::Hidden);
74240116Smarcel
75240116Smarcelnamespace {
76240116Smarcel
77240116Smarcel  ///
78240116Smarcel  /// PBQP based allocators solve the register allocation problem by mapping
79240116Smarcel  /// register allocation problems to Partitioned Boolean Quadratic
80240116Smarcel  /// Programming problems.
81240116Smarcel  class PBQPRegAlloc : public MachineFunctionPass {
82240116Smarcel  public:
83240116Smarcel
84240116Smarcel    static char ID;
85240116Smarcel
86240116Smarcel    /// Construct a PBQP register allocator.
87240116Smarcel    PBQPRegAlloc() : MachineFunctionPass(ID) {}
88240116Smarcel
89240116Smarcel    /// Return the pass name.
90240116Smarcel    virtual const char* getPassName() const {
91240116Smarcel      return "PBQP Register Allocator";
92240116Smarcel    }
93240116Smarcel
94240116Smarcel    /// PBQP analysis usage.
95240116Smarcel    virtual void getAnalysisUsage(AnalysisUsage &au) const {
96240116Smarcel      au.addRequired<SlotIndexes>();
97240116Smarcel      au.addPreserved<SlotIndexes>();
98240116Smarcel      au.addRequired<LiveIntervals>();
99240116Smarcel      //au.addRequiredID(SplitCriticalEdgesID);
100240116Smarcel      au.addRequired<RegisterCoalescer>();
101240116Smarcel      au.addRequired<CalculateSpillWeights>();
102240116Smarcel      au.addRequired<LiveStacks>();
103240116Smarcel      au.addPreserved<LiveStacks>();
104240116Smarcel      au.addRequired<MachineLoopInfo>();
105240116Smarcel      au.addPreserved<MachineLoopInfo>();
106240116Smarcel      if (pbqpPreSplitting)
107240116Smarcel        au.addRequired<LoopSplitter>();
108240116Smarcel      au.addRequired<VirtRegMap>();
109240116Smarcel      au.addRequired<RenderMachineFunction>();
110240116Smarcel      MachineFunctionPass::getAnalysisUsage(au);
111240116Smarcel    }
112240116Smarcel
113240116Smarcel    /// Perform register allocation
114240116Smarcel    virtual bool runOnMachineFunction(MachineFunction &MF);
115240116Smarcel
116240116Smarcel  private:
117240116Smarcel
118240116Smarcel    class LIOrdering {
119240116Smarcel    public:
120240116Smarcel      bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
121240116Smarcel        return li1->reg < li2->reg;
122240116Smarcel      }
123240116Smarcel    };
124240116Smarcel
125240116Smarcel    typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
126240116Smarcel    typedef std::vector<const LiveInterval*> Node2LIMap;
127240116Smarcel    typedef std::vector<unsigned> AllowedSet;
128240116Smarcel    typedef std::vector<AllowedSet> AllowedSetMap;
129240116Smarcel    typedef std::set<unsigned> RegSet;
130240116Smarcel    typedef std::pair<unsigned, unsigned> RegPair;
131240116Smarcel    typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
132240116Smarcel
133240116Smarcel    typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
134240116Smarcel
135240116Smarcel    typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
136240116Smarcel
137240116Smarcel    MachineFunction *mf;
138240116Smarcel    const TargetMachine *tm;
139240116Smarcel    const TargetRegisterInfo *tri;
140240116Smarcel    const TargetInstrInfo *tii;
141240116Smarcel    const MachineLoopInfo *loopInfo;
142240116Smarcel    MachineRegisterInfo *mri;
143240116Smarcel    RenderMachineFunction *rmf;
144240116Smarcel
145240116Smarcel    LiveIntervals *lis;
146240116Smarcel    LiveStacks *lss;
147240116Smarcel    VirtRegMap *vrm;
148240116Smarcel
149240116Smarcel    LI2NodeMap li2Node;
150240116Smarcel    Node2LIMap node2LI;
151240116Smarcel    AllowedSetMap allowedSets;
152240116Smarcel    LiveIntervalSet vregIntervalsToAlloc,
153240116Smarcel                    emptyVRegIntervals;
154240116Smarcel    NodeVector problemNodes;
155240116Smarcel
156240116Smarcel
157240116Smarcel    /// Builds a PBQP cost vector.
158240116Smarcel    template <typename RegContainer>
159240116Smarcel    PBQP::Vector buildCostVector(unsigned vReg,
160240116Smarcel                                 const RegContainer &allowed,
161240116Smarcel                                 const CoalesceMap &cealesces,
162240116Smarcel                                 PBQP::PBQPNum spillCost) const;
163240116Smarcel
164240116Smarcel    /// \brief Builds a PBQP interference matrix.
165240116Smarcel    ///
166240116Smarcel    /// @return Either a pointer to a non-zero PBQP matrix representing the
167240116Smarcel    ///         allocation option costs, or a null pointer for a zero matrix.
168240116Smarcel    ///
169240116Smarcel    /// Expects allowed sets for two interfering LiveIntervals. These allowed
170240116Smarcel    /// sets should contain only allocable registers from the LiveInterval's
171240116Smarcel    /// register class, with any interfering pre-colored registers removed.
172240116Smarcel    template <typename RegContainer>
173240116Smarcel    PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
174240116Smarcel                                          const RegContainer &allowed2) const;
175240116Smarcel
176240116Smarcel    ///
177240116Smarcel    /// Expects allowed sets for two potentially coalescable LiveIntervals,
178240116Smarcel    /// and an estimated benefit due to coalescing. The allowed sets should
179240116Smarcel    /// contain only allocable registers from the LiveInterval's register
180240116Smarcel    /// classes, with any interfering pre-colored registers removed.
181240116Smarcel    template <typename RegContainer>
182240116Smarcel    PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
183240116Smarcel                                        const RegContainer &allowed2,
184240116Smarcel                                        PBQP::PBQPNum cBenefit) const;
185240116Smarcel
186240116Smarcel    /// \brief Finds coalescing opportunities and returns them as a map.
187240116Smarcel    ///
188240116Smarcel    /// Any entries in the map are guaranteed coalescable, even if their
189240116Smarcel    /// corresponding live intervals overlap.
190240116Smarcel    CoalesceMap findCoalesces();
191240116Smarcel
192240116Smarcel    /// \brief Finds the initial set of vreg intervals to allocate.
193240116Smarcel    void findVRegIntervalsToAlloc();
194240116Smarcel
195240116Smarcel    /// \brief Constructs a PBQP problem representation of the register
196240116Smarcel    /// allocation problem for this function.
197240116Smarcel    ///
198240116Smarcel    /// @return a PBQP solver object for the register allocation problem.
199240116Smarcel    PBQP::Graph constructPBQPProblem();
200240116Smarcel
201240116Smarcel    /// \brief Adds a stack interval if the given live interval has been
202240116Smarcel    /// spilled. Used to support stack slot coloring.
203240116Smarcel    void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
204240116Smarcel
205240116Smarcel    /// \brief Given a solved PBQP problem maps this solution back to a register
206240116Smarcel    /// assignment.
207240116Smarcel    bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
208240116Smarcel
209240116Smarcel    /// \brief Postprocessing before final spilling. Sets basic block "live in"
210240116Smarcel    /// variables.
211240116Smarcel    void finalizeAlloc() const;
212240116Smarcel
213240116Smarcel  };
214240116Smarcel
215240116Smarcel  char PBQPRegAlloc::ID = 0;
216240116Smarcel}
217240116Smarcel
218240116Smarcel
219240116Smarceltemplate <typename RegContainer>
220240116SmarcelPBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
221240116Smarcel                                           const RegContainer &allowed,
222240116Smarcel                                           const CoalesceMap &coalesces,
223240116Smarcel                                           PBQP::PBQPNum spillCost) const {
224240116Smarcel
225240116Smarcel  typedef typename RegContainer::const_iterator AllowedItr;
226240116Smarcel
227240116Smarcel  // Allocate vector. Additional element (0th) used for spill option
228240116Smarcel  PBQP::Vector v(allowed.size() + 1, 0);
229240116Smarcel
230240116Smarcel  v[0] = spillCost;
231240116Smarcel
232240116Smarcel  // Iterate over the allowed registers inserting coalesce benefits if there
233240116Smarcel  // are any.
234240116Smarcel  unsigned ai = 0;
235240116Smarcel  for (AllowedItr itr = allowed.begin(), end = allowed.end();
236240116Smarcel       itr != end; ++itr, ++ai) {
237240116Smarcel
238240116Smarcel    unsigned pReg = *itr;
239240116Smarcel
240240116Smarcel    CoalesceMap::const_iterator cmItr =
241240116Smarcel      coalesces.find(RegPair(vReg, pReg));
242240116Smarcel
243240116Smarcel    // No coalesce - on to the next preg.
244240116Smarcel    if (cmItr == coalesces.end())
245240116Smarcel      continue;
246240116Smarcel
247240116Smarcel    // We have a coalesce - insert the benefit.
248240116Smarcel    v[ai + 1] = -cmItr->second;
249240116Smarcel  }
250240116Smarcel
251240116Smarcel  return v;
252240116Smarcel}
253240116Smarcel
254240116Smarceltemplate <typename RegContainer>
255240116SmarcelPBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
256240116Smarcel      const RegContainer &allowed1, const RegContainer &allowed2) const {
257240116Smarcel
258240116Smarcel  typedef typename RegContainer::const_iterator RegContainerIterator;
259240116Smarcel
260240116Smarcel  // Construct a PBQP matrix representing the cost of allocation options. The
261240116Smarcel  // rows and columns correspond to the allocation options for the two live
262240116Smarcel  // intervals.  Elements will be infinite where corresponding registers alias,
263240116Smarcel  // since we cannot allocate aliasing registers to interfering live intervals.
264240116Smarcel  // All other elements (non-aliasing combinations) will have zero cost. Note
265240116Smarcel  // that the spill option (element 0,0) has zero cost, since we can allocate
266240116Smarcel  // both intervals to memory safely (the cost for each individual allocation
267240116Smarcel  // to memory is accounted for by the cost vectors for each live interval).
268240116Smarcel  PBQP::Matrix *m =
269240116Smarcel    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
270240116Smarcel
271240116Smarcel  // Assume this is a zero matrix until proven otherwise.  Zero matrices occur
272240116Smarcel  // between interfering live ranges with non-overlapping register sets (e.g.
273240116Smarcel  // non-overlapping reg classes, or disjoint sets of allowed regs within the
274240116Smarcel  // same class). The term "overlapping" is used advisedly: sets which do not
275240116Smarcel  // intersect, but contain registers which alias, will have non-zero matrices.
276240116Smarcel  // We optimize zero matrices away to improve solver speed.
277240116Smarcel  bool isZeroMatrix = true;
278240116Smarcel
279240116Smarcel
280240116Smarcel  // Row index. Starts at 1, since the 0th row is for the spill option, which
281240116Smarcel  // is always zero.
282240116Smarcel  unsigned ri = 1;
283240116Smarcel
284240116Smarcel  // Iterate over allowed sets, insert infinities where required.
285240116Smarcel  for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
286240116Smarcel       a1Itr != a1End; ++a1Itr) {
287240116Smarcel
288240116Smarcel    // Column index, starts at 1 as for row index.
289240116Smarcel    unsigned ci = 1;
290240116Smarcel    unsigned reg1 = *a1Itr;
291240116Smarcel
292240116Smarcel    for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
293260029Sjmmv         a2Itr != a2End; ++a2Itr) {
294240116Smarcel
295240116Smarcel      unsigned reg2 = *a2Itr;
296240116Smarcel
297240116Smarcel      // If the row/column regs are identical or alias insert an infinity.
298240116Smarcel      if (tri->regsOverlap(reg1, reg2)) {
299240116Smarcel        (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
300240116Smarcel        isZeroMatrix = false;
301240116Smarcel      }
302240116Smarcel
303240116Smarcel      ++ci;
304240116Smarcel    }
305240116Smarcel
306240116Smarcel    ++ri;
307260029Sjmmv  }
308240116Smarcel
309240116Smarcel  // If this turns out to be a zero matrix...
310240116Smarcel  if (isZeroMatrix) {
311240116Smarcel    // free it and return null.
312240116Smarcel    delete m;
313240116Smarcel    return 0;
314240116Smarcel  }
315240116Smarcel
316240116Smarcel  // ...otherwise return the cost matrix.
317240116Smarcel  return m;
318240116Smarcel}
319240116Smarcel
320240116Smarceltemplate <typename RegContainer>
321260029SjmmvPBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
322260029Sjmmv      const RegContainer &allowed1, const RegContainer &allowed2,
323240116Smarcel      PBQP::PBQPNum cBenefit) const {
324240116Smarcel
325240116Smarcel  typedef typename RegContainer::const_iterator RegContainerIterator;
326240116Smarcel
327240116Smarcel  // Construct a PBQP Matrix representing the benefits of coalescing. As with
328240116Smarcel  // interference matrices the rows and columns represent allowed registers
329240116Smarcel  // for the LiveIntervals which are (potentially) to be coalesced. The amount
330240116Smarcel  // -cBenefit will be placed in any element representing the same register
331240116Smarcel  // for both intervals.
332240116Smarcel  PBQP::Matrix *m =
333240116Smarcel    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
334240116Smarcel
335240116Smarcel  // Reset costs to zero.
336240116Smarcel  m->reset(0);
337240116Smarcel
338240116Smarcel  // Assume the matrix is zero till proven otherwise. Zero matrices will be
339240116Smarcel  // optimized away as in the interference case.
340240116Smarcel  bool isZeroMatrix = true;
341240116Smarcel
342240116Smarcel  // Row index. Starts at 1, since the 0th row is for the spill option, which
343240116Smarcel  // is always zero.
344240116Smarcel  unsigned ri = 1;
345240116Smarcel
346240116Smarcel  // Iterate over the allowed sets, insert coalescing benefits where
347240116Smarcel  // appropriate.
348240116Smarcel  for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
349240116Smarcel       a1Itr != a1End; ++a1Itr) {
350240116Smarcel
351240116Smarcel    // Column index, starts at 1 as for row index.
352240116Smarcel    unsigned ci = 1;
353240116Smarcel    unsigned reg1 = *a1Itr;
354240116Smarcel
355240116Smarcel    for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
356240116Smarcel         a2Itr != a2End; ++a2Itr) {
357260029Sjmmv
358240116Smarcel      // If the row and column represent the same register insert a beneficial
359240116Smarcel      // cost to preference this allocation - it would allow us to eliminate a
360260029Sjmmv      // move instruction.
361260029Sjmmv      if (reg1 == *a2Itr) {
362240116Smarcel        (*m)[ri][ci] = -cBenefit;
363240116Smarcel        isZeroMatrix = false;
364240116Smarcel      }
365240116Smarcel
366240116Smarcel      ++ci;
367240116Smarcel    }
368240116Smarcel
369240116Smarcel    ++ri;
370240116Smarcel  }
371240116Smarcel
372240116Smarcel  // If this turns out to be a zero matrix...
373240116Smarcel  if (isZeroMatrix) {
374240116Smarcel    // ...free it and return null.
375240116Smarcel    delete m;
376240116Smarcel    return 0;
377240116Smarcel  }
378240116Smarcel
379240116Smarcel  return m;
380240116Smarcel}
381240116Smarcel
382240116SmarcelPBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
383240116Smarcel
384240116Smarcel  typedef MachineFunction::const_iterator MFIterator;
385240116Smarcel  typedef MachineBasicBlock::const_iterator MBBIterator;
386240116Smarcel  typedef LiveInterval::const_vni_iterator VNIIterator;
387240116Smarcel
388240116Smarcel  CoalesceMap coalescesFound;
389240116Smarcel
390240116Smarcel  // To find coalesces we need to iterate over the function looking for
391240116Smarcel  // copy instructions.
392240116Smarcel  for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
393240116Smarcel       bbItr != bbEnd; ++bbItr) {
394240116Smarcel
395240116Smarcel    const MachineBasicBlock *mbb = &*bbItr;
396240116Smarcel
397240116Smarcel    for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
398240116Smarcel         iItr != iEnd; ++iItr) {
399240116Smarcel
400240116Smarcel      const MachineInstr *instr = &*iItr;
401240116Smarcel
402240116Smarcel      // If this isn't a copy then continue to the next instruction.
403240116Smarcel      if (!instr->isCopy())
404240116Smarcel        continue;
405240116Smarcel
406240116Smarcel      unsigned srcReg = instr->getOperand(1).getReg();
407260029Sjmmv      unsigned dstReg = instr->getOperand(0).getReg();
408240116Smarcel
409240116Smarcel      // If the registers are already the same our job is nice and easy.
410260029Sjmmv      if (dstReg == srcReg)
411240116Smarcel        continue;
412240116Smarcel
413240116Smarcel      bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
414240116Smarcel           dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
415240116Smarcel
416240116Smarcel      // If both registers are physical then we can't coalesce.
417240116Smarcel      if (srcRegIsPhysical && dstRegIsPhysical)
418240116Smarcel        continue;
419240116Smarcel
420240116Smarcel      // If it's a copy that includes two virtual register but the source and
421240116Smarcel      // destination classes differ then we can't coalesce.
422240116Smarcel      if (!srcRegIsPhysical && !dstRegIsPhysical &&
423240116Smarcel          mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
424240116Smarcel        continue;
425240116Smarcel
426240116Smarcel      // If one is physical and one is virtual, check that the physical is
427240116Smarcel      // allocatable in the class of the virtual.
428240116Smarcel      if (srcRegIsPhysical && !dstRegIsPhysical) {
429240116Smarcel        const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
430240116Smarcel        if (std::find(dstRegClass->allocation_order_begin(*mf),
431240116Smarcel                      dstRegClass->allocation_order_end(*mf), srcReg) ==
432240116Smarcel            dstRegClass->allocation_order_end(*mf))
433240116Smarcel          continue;
434240116Smarcel      }
435240116Smarcel      if (!srcRegIsPhysical && dstRegIsPhysical) {
436240116Smarcel        const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
437240116Smarcel        if (std::find(srcRegClass->allocation_order_begin(*mf),
438240116Smarcel                      srcRegClass->allocation_order_end(*mf), dstReg) ==
439240116Smarcel            srcRegClass->allocation_order_end(*mf))
440240116Smarcel          continue;
441240116Smarcel      }
442240116Smarcel
443240116Smarcel      // If we've made it here we have a copy with compatible register classes.
444240116Smarcel      // We can probably coalesce, but we need to consider overlap.
445240116Smarcel      const LiveInterval *srcLI = &lis->getInterval(srcReg),
446240116Smarcel                         *dstLI = &lis->getInterval(dstReg);
447240116Smarcel
448240116Smarcel      if (srcLI->overlaps(*dstLI)) {
449240116Smarcel        // Even in the case of an overlap we might still be able to coalesce,
450240116Smarcel        // but we need to make sure that no definition of either range occurs
451240116Smarcel        // while the other range is live.
452260029Sjmmv
453240116Smarcel        // Otherwise start by assuming we're ok.
454240116Smarcel        bool badDef = false;
455260029Sjmmv
456240116Smarcel        // Test all defs of the source range.
457240116Smarcel        for (VNIIterator
458240116Smarcel               vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
459240116Smarcel               vniItr != vniEnd; ++vniItr) {
460240116Smarcel
461240116Smarcel          // If we find a poorly defined def we err on the side of caution.
462240116Smarcel          if (!(*vniItr)->def.isValid()) {
463240116Smarcel            badDef = true;
464240116Smarcel            break;
465240116Smarcel          }
466240116Smarcel
467240116Smarcel          // If we find a def that kills the coalescing opportunity then
468240116Smarcel          // record it and break from the loop.
469240116Smarcel          if (dstLI->liveAt((*vniItr)->def)) {
470240116Smarcel            badDef = true;
471240116Smarcel            break;
472240116Smarcel          }
473240116Smarcel        }
474240116Smarcel
475240116Smarcel        // If we have a bad def give up, continue to the next instruction.
476240116Smarcel        if (badDef)
477240116Smarcel          continue;
478240116Smarcel
479240116Smarcel        // Otherwise test definitions of the destination range.
480240116Smarcel        for (VNIIterator
481240116Smarcel               vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
482240116Smarcel               vniItr != vniEnd; ++vniItr) {
483240116Smarcel
484240116Smarcel          // We want to make sure we skip the copy instruction itself.
485240116Smarcel          if ((*vniItr)->getCopy() == instr)
486240116Smarcel            continue;
487240116Smarcel
488240116Smarcel          if (!(*vniItr)->def.isValid()) {
489240116Smarcel            badDef = true;
490240116Smarcel            break;
491240116Smarcel          }
492240116Smarcel
493240116Smarcel          if (srcLI->liveAt((*vniItr)->def)) {
494240116Smarcel            badDef = true;
495240116Smarcel            break;
496240116Smarcel          }
497240116Smarcel        }
498240116Smarcel
499260029Sjmmv        // As before a bad def we give up and continue to the next instr.
500240116Smarcel        if (badDef)
501240116Smarcel          continue;
502260029Sjmmv      }
503240116Smarcel
504240116Smarcel      // If we make it to here then either the ranges didn't overlap, or they
505240116Smarcel      // did, but none of their definitions would prevent us from coalescing.
506240116Smarcel      // We're good to go with the coalesce.
507240116Smarcel
508240116Smarcel      float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
509240116Smarcel
510240116Smarcel      coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
511240116Smarcel      coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
512240116Smarcel    }
513240116Smarcel
514240116Smarcel  }
515240116Smarcel
516240116Smarcel  return coalescesFound;
517240116Smarcel}
518240116Smarcel
519240116Smarcelvoid PBQPRegAlloc::findVRegIntervalsToAlloc() {
520240116Smarcel
521240116Smarcel  // Iterate over all live ranges.
522240116Smarcel  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
523240116Smarcel       itr != end; ++itr) {
524240116Smarcel
525240116Smarcel    // Ignore physical ones.
526240116Smarcel    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
527240116Smarcel      continue;
528240116Smarcel
529240116Smarcel    LiveInterval *li = itr->second;
530240116Smarcel
531240116Smarcel    // If this live interval is non-empty we will use pbqp to allocate it.
532240116Smarcel    // Empty intervals we allocate in a simple post-processing stage in
533240116Smarcel    // finalizeAlloc.
534240116Smarcel    if (!li->empty()) {
535240116Smarcel      vregIntervalsToAlloc.insert(li);
536240116Smarcel    }
537240116Smarcel    else {
538240116Smarcel      emptyVRegIntervals.insert(li);
539240116Smarcel    }
540240116Smarcel  }
541240116Smarcel}
542240116Smarcel
543240116SmarcelPBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
544260029Sjmmv
545240116Smarcel  typedef std::vector<const LiveInterval*> LIVector;
546240116Smarcel  typedef std::vector<unsigned> RegVector;
547260029Sjmmv
548240116Smarcel  // This will store the physical intervals for easy reference.
549240116Smarcel  LIVector physIntervals;
550240116Smarcel
551240116Smarcel  // Start by clearing the old node <-> live interval mappings & allowed sets
552240116Smarcel  li2Node.clear();
553240116Smarcel  node2LI.clear();
554240116Smarcel  allowedSets.clear();
555240116Smarcel
556240116Smarcel  // Populate physIntervals, update preg use:
557240116Smarcel  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
558240116Smarcel       itr != end; ++itr) {
559240116Smarcel
560240116Smarcel    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
561240116Smarcel      physIntervals.push_back(itr->second);
562240116Smarcel      mri->setPhysRegUsed(itr->second->reg);
563240116Smarcel    }
564240116Smarcel  }
565240116Smarcel
566240116Smarcel  // Iterate over vreg intervals, construct live interval <-> node number
567240116Smarcel  //  mappings.
568240116Smarcel  for (LiveIntervalSet::const_iterator
569240116Smarcel       itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
570240116Smarcel       itr != end; ++itr) {
571240116Smarcel    const LiveInterval *li = *itr;
572240116Smarcel
573240116Smarcel    li2Node[li] = node2LI.size();
574240116Smarcel    node2LI.push_back(li);
575240116Smarcel  }
576240116Smarcel
577240116Smarcel  // Get the set of potential coalesces.
578240116Smarcel  CoalesceMap coalesces;
579240116Smarcel
580240116Smarcel  if (pbqpCoalescing) {
581240116Smarcel    coalesces = findCoalesces();
582240116Smarcel  }
583240116Smarcel
584240116Smarcel  // Construct a PBQP solver for this problem
585240116Smarcel  PBQP::Graph problem;
586240116Smarcel  problemNodes.resize(vregIntervalsToAlloc.size());
587240116Smarcel
588240116Smarcel  // Resize allowedSets container appropriately.
589240116Smarcel  allowedSets.resize(vregIntervalsToAlloc.size());
590260029Sjmmv
591240116Smarcel  BitVector ReservedRegs = tri->getReservedRegs(*mf);
592240116Smarcel
593240116Smarcel  // Iterate over virtual register intervals to compute allowed sets...
594240116Smarcel  for (unsigned node = 0; node < node2LI.size(); ++node) {
595240116Smarcel
596260029Sjmmv    // Grab pointers to the interval and its register class.
597240116Smarcel    const LiveInterval *li = node2LI[node];
598240116Smarcel    const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
599240116Smarcel
600240116Smarcel    // Start by assuming all allocable registers in the class are allowed...
601240116Smarcel    RegVector liAllowed;
602240116Smarcel    TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
603240116Smarcel    TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
604240116Smarcel    for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
605240116Smarcel      if (!ReservedRegs.test(*it))
606240116Smarcel        liAllowed.push_back(*it);
607240116Smarcel
608240116Smarcel    // Eliminate the physical registers which overlap with this range, along
609240116Smarcel    // with all their aliases.
610240116Smarcel    for (LIVector::iterator pItr = physIntervals.begin(),
611240116Smarcel       pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
612240116Smarcel
613240116Smarcel      if (!li->overlaps(**pItr))
614240116Smarcel        continue;
615240116Smarcel
616240116Smarcel      unsigned pReg = (*pItr)->reg;
617240116Smarcel
618240116Smarcel      // If we get here then the live intervals overlap, but we're still ok
619240116Smarcel      // if they're coalescable.
620240116Smarcel      if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
621240116Smarcel        continue;
622240116Smarcel
623240116Smarcel      // If we get here then we have a genuine exclusion.
624240116Smarcel
625240116Smarcel      // Remove the overlapping reg...
626240116Smarcel      RegVector::iterator eraseItr =
627240116Smarcel        std::find(liAllowed.begin(), liAllowed.end(), pReg);
628240116Smarcel
629240116Smarcel      if (eraseItr != liAllowed.end())
630240116Smarcel        liAllowed.erase(eraseItr);
631240116Smarcel
632240116Smarcel      const unsigned *aliasItr = tri->getAliasSet(pReg);
633240116Smarcel
634240116Smarcel      if (aliasItr != 0) {
635240116Smarcel        // ...and its aliases.
636240116Smarcel        for (; *aliasItr != 0; ++aliasItr) {
637240116Smarcel          RegVector::iterator eraseItr =
638240116Smarcel            std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
639240116Smarcel
640240116Smarcel          if (eraseItr != liAllowed.end()) {
641240116Smarcel            liAllowed.erase(eraseItr);
642260029Sjmmv          }
643240116Smarcel        }
644240116Smarcel      }
645240116Smarcel    }
646240116Smarcel
647240116Smarcel    // Copy the allowed set into a member vector for use when constructing cost
648260029Sjmmv    // vectors & matrices, and mapping PBQP solutions back to assignments.
649240116Smarcel    allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
650240116Smarcel
651240116Smarcel    // Set the spill cost to the interval weight, or epsilon if the
652240116Smarcel    // interval weight is zero
653240116Smarcel    PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
654240116Smarcel        li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
655240116Smarcel
656240116Smarcel    // Build a cost vector for this interval.
657240116Smarcel    problemNodes[node] =
658240116Smarcel      problem.addNode(
659240116Smarcel        buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
660240116Smarcel
661240116Smarcel  }
662240116Smarcel
663240116Smarcel
664240116Smarcel  // Now add the cost matrices...
665240116Smarcel  for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
666240116Smarcel    const LiveInterval *li = node2LI[node1];
667240116Smarcel
668240116Smarcel    // Test for live range overlaps and insert interference matrices.
669240116Smarcel    for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
670240116Smarcel      const LiveInterval *li2 = node2LI[node2];
671240116Smarcel
672240116Smarcel      CoalesceMap::const_iterator cmItr =
673240116Smarcel        coalesces.find(RegPair(li->reg, li2->reg));
674240116Smarcel
675240116Smarcel      PBQP::Matrix *m = 0;
676240116Smarcel
677240116Smarcel      if (cmItr != coalesces.end()) {
678240116Smarcel        m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
679240116Smarcel                                  cmItr->second);
680240116Smarcel      }
681240116Smarcel      else if (li->overlaps(*li2)) {
682240116Smarcel        m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
683240116Smarcel      }
684240116Smarcel
685240116Smarcel      if (m != 0) {
686240116Smarcel        problem.addEdge(problemNodes[node1],
687240116Smarcel                        problemNodes[node2],
688240116Smarcel                        *m);
689240116Smarcel
690240116Smarcel        delete m;
691240116Smarcel      }
692260029Sjmmv    }
693240116Smarcel  }
694260029Sjmmv
695240116Smarcel  assert(problem.getNumNodes() == allowedSets.size());
696240116Smarcel/*
697240116Smarcel  std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
698260029Sjmmv            << problem.getNumEdges() << " edges.\n";
699240116Smarcel
700240116Smarcel  problem.printDot(std::cerr);
701240116Smarcel*/
702240116Smarcel  // We're done, PBQP problem constructed - return it.
703240116Smarcel  return problem;
704240116Smarcel}
705240116Smarcel
706240116Smarcelvoid PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
707240116Smarcel                                    MachineRegisterInfo* mri) {
708240116Smarcel  int stackSlot = vrm->getStackSlot(spilled->reg);
709240116Smarcel
710240116Smarcel  if (stackSlot == VirtRegMap::NO_STACK_SLOT)
711240116Smarcel    return;
712240116Smarcel
713240116Smarcel  const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
714240116Smarcel  LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
715240116Smarcel
716240116Smarcel  VNInfo *vni;
717240116Smarcel  if (stackInterval.getNumValNums() != 0)
718240116Smarcel    vni = stackInterval.getValNumInfo(0);
719240116Smarcel  else
720240116Smarcel    vni = stackInterval.getNextValue(
721240116Smarcel      SlotIndex(), 0, false, lss->getVNInfoAllocator());
722240116Smarcel
723240116Smarcel  LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
724240116Smarcel  stackInterval.MergeRangesInAsValue(rhsInterval, vni);
725240116Smarcel}
726240116Smarcel
727240116Smarcelbool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
728240116Smarcel
729240116Smarcel  // Set to true if we have any spills
730240116Smarcel  bool anotherRoundNeeded = false;
731240116Smarcel
732240116Smarcel  // Clear the existing allocation.
733240116Smarcel  vrm->clearAllVirt();
734240116Smarcel
735240116Smarcel  // Iterate over the nodes mapping the PBQP solution to a register assignment.
736240116Smarcel  for (unsigned node = 0; node < node2LI.size(); ++node) {
737240116Smarcel    unsigned virtReg = node2LI[node]->reg,
738260029Sjmmv             allocSelection = solution.getSelection(problemNodes[node]);
739240116Smarcel
740240116Smarcel
741240116Smarcel    // If the PBQP solution is non-zero it's a physical register...
742240116Smarcel    if (allocSelection != 0) {
743260029Sjmmv      // Get the physical reg, subtracting 1 to account for the spill option.
744240116Smarcel      unsigned physReg = allowedSets[node][allocSelection - 1];
745240116Smarcel
746240116Smarcel      DEBUG(dbgs() << "VREG " << virtReg << " -> "
747240116Smarcel                   << tri->getName(physReg) << "\n");
748240116Smarcel
749240116Smarcel      assert(physReg != 0);
750240116Smarcel
751240116Smarcel      // Add to the virt reg map and update the used phys regs.
752240116Smarcel      vrm->assignVirt2Phys(virtReg, physReg);
753240116Smarcel    }
754240116Smarcel    // ...Otherwise it's a spill.
755240116Smarcel    else {
756240116Smarcel
757240116Smarcel      // Make sure we ignore this virtual reg on the next round
758240116Smarcel      // of allocation
759240116Smarcel      vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
760240116Smarcel
761240116Smarcel      // Insert spill ranges for this live range
762240116Smarcel      const LiveInterval *spillInterval = node2LI[node];
763240116Smarcel      double oldSpillWeight = spillInterval->weight;
764262855Sjmmv      SmallVector<LiveInterval*, 8> spillIs;
765262855Sjmmv      rmf->rememberUseDefs(spillInterval);
766262855Sjmmv      std::vector<LiveInterval*> newSpills =
767262855Sjmmv        lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
768262855Sjmmv      addStackInterval(spillInterval, mri);
769262855Sjmmv      rmf->rememberSpills(spillInterval, newSpills);
770262855Sjmmv
771262855Sjmmv      (void) oldSpillWeight;
772262855Sjmmv      DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
773262855Sjmmv                   << oldSpillWeight << ", New vregs: ");
774262855Sjmmv
775262855Sjmmv      // Copy any newly inserted live intervals into the list of regs to
776262855Sjmmv      // allocate.
777262855Sjmmv      for (std::vector<LiveInterval*>::const_iterator
778262855Sjmmv           itr = newSpills.begin(), end = newSpills.end();
779262855Sjmmv           itr != end; ++itr) {
780262855Sjmmv
781262855Sjmmv        assert(!(*itr)->empty() && "Empty spill range.");
782262855Sjmmv
783262855Sjmmv        DEBUG(dbgs() << (*itr)->reg << " ");
784262855Sjmmv
785262855Sjmmv        vregIntervalsToAlloc.insert(*itr);
786262855Sjmmv      }
787262855Sjmmv
788240116Smarcel      DEBUG(dbgs() << ")\n");
789240116Smarcel
790240116Smarcel      // We need another round if spill intervals were added.
791240116Smarcel      anotherRoundNeeded |= !newSpills.empty();
792240116Smarcel    }
793240116Smarcel  }
794240116Smarcel
795240116Smarcel  return !anotherRoundNeeded;
796240116Smarcel}
797240116Smarcel
798240116Smarcelvoid PBQPRegAlloc::finalizeAlloc() const {
799240116Smarcel  typedef LiveIntervals::iterator LIIterator;
800240116Smarcel  typedef LiveInterval::Ranges::const_iterator LRIterator;
801240116Smarcel
802240116Smarcel  // First allocate registers for the empty intervals.
803240116Smarcel  for (LiveIntervalSet::const_iterator
804240116Smarcel         itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
805240116Smarcel         itr != end; ++itr) {
806240116Smarcel    LiveInterval *li = *itr;
807240116Smarcel
808240116Smarcel    unsigned physReg = vrm->getRegAllocPref(li->reg);
809240116Smarcel
810240116Smarcel    if (physReg == 0) {
811240116Smarcel      const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
812      physReg = *liRC->allocation_order_begin(*mf);
813    }
814
815    vrm->assignVirt2Phys(li->reg, physReg);
816  }
817
818  // Finally iterate over the basic blocks to compute and set the live-in sets.
819  SmallVector<MachineBasicBlock*, 8> liveInMBBs;
820  MachineBasicBlock *entryMBB = &*mf->begin();
821
822  for (LIIterator liItr = lis->begin(), liEnd = lis->end();
823       liItr != liEnd; ++liItr) {
824
825    const LiveInterval *li = liItr->second;
826    unsigned reg = 0;
827
828    // Get the physical register for this interval
829    if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
830      reg = li->reg;
831    }
832    else if (vrm->isAssignedReg(li->reg)) {
833      reg = vrm->getPhys(li->reg);
834    }
835    else {
836      // Ranges which are assigned a stack slot only are ignored.
837      continue;
838    }
839
840    if (reg == 0) {
841      // Filter out zero regs - they're for intervals that were spilled.
842      continue;
843    }
844
845    // Iterate over the ranges of the current interval...
846    for (LRIterator lrItr = li->begin(), lrEnd = li->end();
847         lrItr != lrEnd; ++lrItr) {
848
849      // Find the set of basic blocks which this range is live into...
850      if (lis->findLiveInMBBs(lrItr->start, lrItr->end,  liveInMBBs)) {
851        // And add the physreg for this interval to their live-in sets.
852        for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
853          if (liveInMBBs[i] != entryMBB) {
854            if (!liveInMBBs[i]->isLiveIn(reg)) {
855              liveInMBBs[i]->addLiveIn(reg);
856            }
857          }
858        }
859        liveInMBBs.clear();
860      }
861    }
862  }
863
864}
865
866bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
867
868  mf = &MF;
869  tm = &mf->getTarget();
870  tri = tm->getRegisterInfo();
871  tii = tm->getInstrInfo();
872  mri = &mf->getRegInfo();
873
874  lis = &getAnalysis<LiveIntervals>();
875  lss = &getAnalysis<LiveStacks>();
876  loopInfo = &getAnalysis<MachineLoopInfo>();
877  rmf = &getAnalysis<RenderMachineFunction>();
878
879  vrm = &getAnalysis<VirtRegMap>();
880
881
882  DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
883
884  // Allocator main loop:
885  //
886  // * Map current regalloc problem to a PBQP problem
887  // * Solve the PBQP problem
888  // * Map the solution back to a register allocation
889  // * Spill if necessary
890  //
891  // This process is continued till no more spills are generated.
892
893  // Find the vreg intervals in need of allocation.
894  findVRegIntervalsToAlloc();
895
896  // If there are non-empty intervals allocate them using pbqp.
897  if (!vregIntervalsToAlloc.empty()) {
898
899    bool pbqpAllocComplete = false;
900    unsigned round = 0;
901
902    while (!pbqpAllocComplete) {
903      DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
904
905      PBQP::Graph problem = constructPBQPProblem();
906      PBQP::Solution solution =
907        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
908
909      pbqpAllocComplete = mapPBQPToRegAlloc(solution);
910
911      ++round;
912    }
913  }
914
915  // Finalise allocation, allocate empty ranges.
916  finalizeAlloc();
917
918  rmf->renderMachineFunction("After PBQP register allocation.", vrm);
919
920  vregIntervalsToAlloc.clear();
921  emptyVRegIntervals.clear();
922  li2Node.clear();
923  node2LI.clear();
924  allowedSets.clear();
925  problemNodes.clear();
926
927  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
928
929  // Run rewriter
930  std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
931
932  rewriter->runOnMachineFunction(*mf, *vrm, lis);
933
934  return true;
935}
936
937FunctionPass* llvm::createPBQPRegisterAllocator() {
938  return new PBQPRegAlloc();
939}
940
941
942#undef DEBUG_TYPE
943