RegAllocPBQP.cpp revision 296417
1193323Sed//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based 11193323Sed// register allocator for LLVM. This allocator works by constructing a PBQP 12193323Sed// problem representing the register allocation problem under consideration, 13193323Sed// solving this using a PBQP solver, and mapping the solution back to a 14193323Sed// register assignment. If any variables are selected for spilling then spill 15193323Sed// code is inserted and the process repeated. 16193323Sed// 17193323Sed// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned 18193323Sed// for register allocation. For more information on PBQP for register 19193323Sed// allocation, see the following papers: 20193323Sed// 21193323Sed// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with 22193323Sed// PBQP. In Proceedings of the 7th Joint Modular Languages Conference 23193323Sed// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. 24193323Sed// 25193323Sed// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular 26193323Sed// architectures. In Proceedings of the Joint Conference on Languages, 27193323Sed// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, 28193323Sed// NY, USA, 139-148. 29193323Sed// 30193323Sed//===----------------------------------------------------------------------===// 31193323Sed 32249423Sdim#include "llvm/CodeGen/RegAllocPBQP.h" 33249423Sdim#include "RegisterCoalescer.h" 34234353Sdim#include "Spiller.h" 35234353Sdim#include "llvm/Analysis/AliasAnalysis.h" 36200581Srdivacky#include "llvm/CodeGen/CalcSpillWeights.h" 37193323Sed#include "llvm/CodeGen/LiveIntervalAnalysis.h" 38234353Sdim#include "llvm/CodeGen/LiveRangeEdit.h" 39193323Sed#include "llvm/CodeGen/LiveStackAnalysis.h" 40261991Sdim#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 41234353Sdim#include "llvm/CodeGen/MachineDominators.h" 42193323Sed#include "llvm/CodeGen/MachineFunctionPass.h" 43193323Sed#include "llvm/CodeGen/MachineLoopInfo.h" 44193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h" 45193323Sed#include "llvm/CodeGen/RegAllocRegistry.h" 46249423Sdim#include "llvm/CodeGen/VirtRegMap.h" 47249423Sdim#include "llvm/IR/Module.h" 48193323Sed#include "llvm/Support/Debug.h" 49276479Sdim#include "llvm/Support/FileSystem.h" 50296417Sdim#include "llvm/Support/Printable.h" 51198090Srdivacky#include "llvm/Support/raw_ostream.h" 52193323Sed#include "llvm/Target/TargetInstrInfo.h" 53280031Sdim#include "llvm/Target/TargetSubtargetInfo.h" 54193323Sed#include <limits> 55193323Sed#include <memory> 56280031Sdim#include <queue> 57193323Sed#include <set> 58234353Sdim#include <sstream> 59193323Sed#include <vector> 60193323Sed 61193323Sedusing namespace llvm; 62193323Sed 63276479Sdim#define DEBUG_TYPE "regalloc" 64276479Sdim 65193323Sedstatic RegisterRegAlloc 66280031SdimRegisterPBQPRepAlloc("pbqp", "PBQP register allocator", 67218893Sdim createDefaultPBQPRegisterAllocator); 68193323Sed 69198090Srdivackystatic cl::opt<bool> 70280031SdimPBQPCoalescing("pbqp-coalescing", 71203954Srdivacky cl::desc("Attempt coalescing during PBQP register allocation."), 72203954Srdivacky cl::init(false), cl::Hidden); 73198090Srdivacky 74234353Sdim#ifndef NDEBUG 75212904Sdimstatic cl::opt<bool> 76280031SdimPBQPDumpGraphs("pbqp-dump-graphs", 77234353Sdim cl::desc("Dump graphs for each function/round in the compilation unit."), 78234353Sdim cl::init(false), cl::Hidden); 79234353Sdim#endif 80212904Sdim 81193323Sednamespace { 82193323Sed 83218893Sdim/// 84218893Sdim/// PBQP based allocators solve the register allocation problem by mapping 85218893Sdim/// register allocation problems to Partitioned Boolean Quadratic 86218893Sdim/// Programming problems. 87218893Sdimclass RegAllocPBQP : public MachineFunctionPass { 88218893Sdimpublic: 89193323Sed 90218893Sdim static char ID; 91193323Sed 92218893Sdim /// Construct a PBQP register allocator. 93280031Sdim RegAllocPBQP(char *cPassID = nullptr) 94280031Sdim : MachineFunctionPass(ID), customPassID(cPassID) { 95218893Sdim initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 96218893Sdim initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 97218893Sdim initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 98218893Sdim initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 99218893Sdim } 100193323Sed 101218893Sdim /// Return the pass name. 102276479Sdim const char* getPassName() const override { 103218893Sdim return "PBQP Register Allocator"; 104218893Sdim } 105193323Sed 106218893Sdim /// PBQP analysis usage. 107276479Sdim void getAnalysisUsage(AnalysisUsage &au) const override; 108193323Sed 109218893Sdim /// Perform register allocation 110276479Sdim bool runOnMachineFunction(MachineFunction &MF) override; 111193323Sed 112218893Sdimprivate: 113212904Sdim 114218893Sdim typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; 115218893Sdim typedef std::vector<const LiveInterval*> Node2LIMap; 116218893Sdim typedef std::vector<unsigned> AllowedSet; 117218893Sdim typedef std::vector<AllowedSet> AllowedSetMap; 118218893Sdim typedef std::pair<unsigned, unsigned> RegPair; 119218893Sdim typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; 120218893Sdim typedef std::set<unsigned> RegSet; 121212904Sdim 122224145Sdim char *customPassID; 123224145Sdim 124280031Sdim RegSet VRegsToAlloc, EmptyIntervalVRegs; 125203954Srdivacky 126280031Sdim /// \brief Finds the initial set of vreg intervals to allocate. 127280031Sdim void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); 128193323Sed 129280031Sdim /// \brief Constructs an initial graph. 130288943Sdim void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); 131193323Sed 132288943Sdim /// \brief Spill the given VReg. 133288943Sdim void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, 134288943Sdim MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, 135288943Sdim Spiller &VRegSpiller); 136288943Sdim 137218893Sdim /// \brief Given a solved PBQP problem maps this solution back to a register 138218893Sdim /// assignment. 139280031Sdim bool mapPBQPToRegAlloc(const PBQPRAGraph &G, 140280031Sdim const PBQP::Solution &Solution, 141280031Sdim VirtRegMap &VRM, 142280031Sdim Spiller &VRegSpiller); 143193323Sed 144218893Sdim /// \brief Postprocessing before final spilling. Sets basic block "live in" 145218893Sdim /// variables. 146280031Sdim void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, 147280031Sdim VirtRegMap &VRM) const; 148193323Sed 149218893Sdim}; 150193323Sed 151218893Sdimchar RegAllocPBQP::ID = 0; 152193323Sed 153280031Sdim/// @brief Set spill costs for each node in the PBQP reg-alloc graph. 154280031Sdimclass SpillCosts : public PBQPRAConstraint { 155280031Sdimpublic: 156280031Sdim void apply(PBQPRAGraph &G) override { 157280031Sdim LiveIntervals &LIS = G.getMetadata().LIS; 158193323Sed 159280031Sdim // A minimum spill costs, so that register constraints can can be set 160280031Sdim // without normalization in the [0.0:MinSpillCost( interval. 161280031Sdim const PBQP::PBQPNum MinSpillCost = 10.0; 162193323Sed 163280031Sdim for (auto NId : G.nodeIds()) { 164280031Sdim PBQP::PBQPNum SpillCost = 165280031Sdim LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; 166280031Sdim if (SpillCost == 0.0) 167280031Sdim SpillCost = std::numeric_limits<PBQP::PBQPNum>::min(); 168280031Sdim else 169280031Sdim SpillCost += MinSpillCost; 170280031Sdim PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId)); 171280031Sdim NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost; 172280031Sdim G.setNodeCosts(NId, std::move(NodeCosts)); 173280031Sdim } 174280031Sdim } 175280031Sdim}; 176234353Sdim 177280031Sdim/// @brief Add interference edges between overlapping vregs. 178280031Sdimclass Interference : public PBQPRAConstraint { 179280031Sdimprivate: 180193323Sed 181280031Sdim typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; 182288943Sdim typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey; 183288943Sdim typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache; 184288943Sdim typedef DenseSet<IKey> DisjointAllowedRegsCache; 185288943Sdim typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey; 186288943Sdim typedef DenseSet<IEdgeKey> IEdgeCache; 187193323Sed 188288943Sdim bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, 189288943Sdim PBQPRAGraph::NodeId MId, 190288943Sdim const DisjointAllowedRegsCache &D) const { 191288943Sdim const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); 192288943Sdim const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); 193288943Sdim 194288943Sdim if (NRegs == MRegs) 195288943Sdim return false; 196288943Sdim 197288943Sdim if (NRegs < MRegs) 198288943Sdim return D.count(IKey(NRegs, MRegs)) > 0; 199288943Sdim 200288943Sdim return D.count(IKey(MRegs, NRegs)) > 0; 201288943Sdim } 202288943Sdim 203288943Sdim void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, 204288943Sdim PBQPRAGraph::NodeId MId, 205288943Sdim DisjointAllowedRegsCache &D) { 206288943Sdim const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); 207288943Sdim const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); 208288943Sdim 209288943Sdim assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself"); 210288943Sdim 211288943Sdim if (NRegs < MRegs) 212288943Sdim D.insert(IKey(NRegs, MRegs)); 213288943Sdim else 214288943Sdim D.insert(IKey(MRegs, NRegs)); 215288943Sdim } 216288943Sdim 217280031Sdim // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required 218280031Sdim // for the fast interference graph construction algorithm. The last is there 219280031Sdim // to save us from looking up node ids via the VRegToNode map in the graph 220280031Sdim // metadata. 221280031Sdim typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId> 222280031Sdim IntervalInfo; 223193323Sed 224280031Sdim static SlotIndex getStartPoint(const IntervalInfo &I) { 225280031Sdim return std::get<0>(I)->segments[std::get<1>(I)].start; 226280031Sdim } 227193323Sed 228280031Sdim static SlotIndex getEndPoint(const IntervalInfo &I) { 229280031Sdim return std::get<0>(I)->segments[std::get<1>(I)].end; 230280031Sdim } 231193323Sed 232280031Sdim static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) { 233280031Sdim return std::get<2>(I); 234280031Sdim } 235193323Sed 236280031Sdim static bool lowestStartPoint(const IntervalInfo &I1, 237280031Sdim const IntervalInfo &I2) { 238280031Sdim // Condition reversed because priority queue has the *highest* element at 239280031Sdim // the front, rather than the lowest. 240280031Sdim return getStartPoint(I1) > getStartPoint(I2); 241218893Sdim } 242193323Sed 243280031Sdim static bool lowestEndPoint(const IntervalInfo &I1, 244280031Sdim const IntervalInfo &I2) { 245280031Sdim SlotIndex E1 = getEndPoint(I1); 246280031Sdim SlotIndex E2 = getEndPoint(I2); 247193323Sed 248280031Sdim if (E1 < E2) 249280031Sdim return true; 250239462Sdim 251280031Sdim if (E1 > E2) 252280031Sdim return false; 253193323Sed 254280031Sdim // If two intervals end at the same point, we need a way to break the tie or 255280031Sdim // the set will assume they're actually equal and refuse to insert a 256280031Sdim // "duplicate". Just compare the vregs - fast and guaranteed unique. 257280031Sdim return std::get<0>(I1)->reg < std::get<0>(I2)->reg; 258280031Sdim } 259193323Sed 260280031Sdim static bool isAtLastSegment(const IntervalInfo &I) { 261280031Sdim return std::get<1>(I) == std::get<0>(I)->size() - 1; 262280031Sdim } 263193323Sed 264280031Sdim static IntervalInfo nextSegment(const IntervalInfo &I) { 265280031Sdim return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I)); 266280031Sdim } 267234353Sdim 268280031Sdimpublic: 269276479Sdim 270280031Sdim void apply(PBQPRAGraph &G) override { 271280031Sdim // The following is loosely based on the linear scan algorithm introduced in 272280031Sdim // "Linear Scan Register Allocation" by Poletto and Sarkar. This version 273280031Sdim // isn't linear, because the size of the active set isn't bound by the 274280031Sdim // number of registers, but rather the size of the largest clique in the 275280031Sdim // graph. Still, we expect this to be better than N^2. 276280031Sdim LiveIntervals &LIS = G.getMetadata().LIS; 277276479Sdim 278280031Sdim // Interferenc matrices are incredibly regular - they're only a function of 279280031Sdim // the allowed sets, so we cache them to avoid the overhead of constructing 280280031Sdim // and uniquing them. 281280031Sdim IMatrixCache C; 282276479Sdim 283288943Sdim // Finding an edge is expensive in the worst case (O(max_clique(G))). So 284288943Sdim // cache locally edges we have already seen. 285288943Sdim IEdgeCache EC; 286288943Sdim 287288943Sdim // Cache known disjoint allowed registers pairs 288288943Sdim DisjointAllowedRegsCache D; 289288943Sdim 290280031Sdim typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; 291280031Sdim typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, 292280031Sdim decltype(&lowestStartPoint)> IntervalQueue; 293280031Sdim IntervalSet Active(lowestEndPoint); 294280031Sdim IntervalQueue Inactive(lowestStartPoint); 295193323Sed 296280031Sdim // Start by building the inactive set. 297280031Sdim for (auto NId : G.nodeIds()) { 298280031Sdim unsigned VReg = G.getNodeMetadata(NId).getVReg(); 299280031Sdim LiveInterval &LI = LIS.getInterval(VReg); 300280031Sdim assert(!LI.empty() && "PBQP graph contains node for empty interval"); 301280031Sdim Inactive.push(std::make_tuple(&LI, 0, NId)); 302280031Sdim } 303193323Sed 304280031Sdim while (!Inactive.empty()) { 305280031Sdim // Tentatively grab the "next" interval - this choice may be overriden 306280031Sdim // below. 307280031Sdim IntervalInfo Cur = Inactive.top(); 308193323Sed 309280031Sdim // Retire any active intervals that end before Cur starts. 310280031Sdim IntervalSet::iterator RetireItr = Active.begin(); 311280031Sdim while (RetireItr != Active.end() && 312280031Sdim (getEndPoint(*RetireItr) <= getStartPoint(Cur))) { 313280031Sdim // If this interval has subsequent segments, add the next one to the 314280031Sdim // inactive list. 315280031Sdim if (!isAtLastSegment(*RetireItr)) 316280031Sdim Inactive.push(nextSegment(*RetireItr)); 317193323Sed 318280031Sdim ++RetireItr; 319280031Sdim } 320280031Sdim Active.erase(Active.begin(), RetireItr); 321193323Sed 322280031Sdim // One of the newly retired segments may actually start before the 323280031Sdim // Cur segment, so re-grab the front of the inactive list. 324280031Sdim Cur = Inactive.top(); 325280031Sdim Inactive.pop(); 326218893Sdim 327280031Sdim // At this point we know that Cur overlaps all active intervals. Add the 328280031Sdim // interference edges. 329280031Sdim PBQP::GraphBase::NodeId NId = getNodeId(Cur); 330280031Sdim for (const auto &A : Active) { 331280031Sdim PBQP::GraphBase::NodeId MId = getNodeId(A); 332280031Sdim 333288943Sdim // Do not add an edge when the nodes' allowed registers do not 334288943Sdim // intersect: there is obviously no interference. 335288943Sdim if (haveDisjointAllowedRegs(G, NId, MId, D)) 336288943Sdim continue; 337288943Sdim 338280031Sdim // Check that we haven't already added this edge 339288943Sdim IEdgeKey EK(std::min(NId, MId), std::max(NId, MId)); 340288943Sdim if (EC.count(EK)) 341280031Sdim continue; 342280031Sdim 343280031Sdim // This is a new edge - add it to the graph. 344288943Sdim if (!createInterferenceEdge(G, NId, MId, C)) 345288943Sdim setDisjointAllowedRegs(G, NId, MId, D); 346288943Sdim else 347288943Sdim EC.insert(EK); 348193323Sed } 349280031Sdim 350280031Sdim // Finally, add Cur to the Active set. 351280031Sdim Active.insert(Cur); 352193323Sed } 353193323Sed } 354193323Sed 355280031Sdimprivate: 356193323Sed 357288943Sdim // Create an Interference edge and add it to the graph, unless it is 358288943Sdim // a null matrix, meaning the nodes' allowed registers do not have any 359288943Sdim // interference. This case occurs frequently between integer and floating 360288943Sdim // point registers for example. 361288943Sdim // return true iff both nodes interferes. 362288943Sdim bool createInterferenceEdge(PBQPRAGraph &G, 363288943Sdim PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, 364288943Sdim IMatrixCache &C) { 365193323Sed 366280031Sdim const TargetRegisterInfo &TRI = 367288943Sdim *G.getMetadata().MF.getSubtarget().getRegisterInfo(); 368280031Sdim const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); 369280031Sdim const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); 370193323Sed 371280031Sdim // Try looking the edge costs up in the IMatrixCache first. 372288943Sdim IKey K(&NRegs, &MRegs); 373280031Sdim IMatrixCache::iterator I = C.find(K); 374280031Sdim if (I != C.end()) { 375280031Sdim G.addEdgeBypassingCostAllocator(NId, MId, I->second); 376288943Sdim return true; 377280031Sdim } 378193323Sed 379280031Sdim PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); 380288943Sdim bool NodesInterfere = false; 381280031Sdim for (unsigned I = 0; I != NRegs.size(); ++I) { 382280031Sdim unsigned PRegN = NRegs[I]; 383280031Sdim for (unsigned J = 0; J != MRegs.size(); ++J) { 384280031Sdim unsigned PRegM = MRegs[J]; 385288943Sdim if (TRI.regsOverlap(PRegN, PRegM)) { 386280031Sdim M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); 387288943Sdim NodesInterfere = true; 388288943Sdim } 389193323Sed } 390193323Sed } 391280031Sdim 392288943Sdim if (!NodesInterfere) 393288943Sdim return false; 394288943Sdim 395280031Sdim PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); 396280031Sdim C[K] = G.getEdgeCostsPtr(EId); 397288943Sdim 398288943Sdim return true; 399193323Sed } 400280031Sdim}; 401193323Sed 402193323Sed 403280031Sdimclass Coalescing : public PBQPRAConstraint { 404280031Sdimpublic: 405280031Sdim void apply(PBQPRAGraph &G) override { 406280031Sdim MachineFunction &MF = G.getMetadata().MF; 407280031Sdim MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; 408288943Sdim CoalescerPair CP(*MF.getSubtarget().getRegisterInfo()); 409193323Sed 410280031Sdim // Scan the machine function and add a coalescing cost whenever CoalescerPair 411280031Sdim // gives the Ok. 412280031Sdim for (const auto &MBB : MF) { 413280031Sdim for (const auto &MI : MBB) { 414193323Sed 415280031Sdim // Skip not-coalescable or already coalesced copies. 416280031Sdim if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) 417280031Sdim continue; 418193323Sed 419280031Sdim unsigned DstReg = CP.getDstReg(); 420280031Sdim unsigned SrcReg = CP.getSrcReg(); 421193323Sed 422280031Sdim const float Scale = 1.0f / MBFI.getEntryFreq(); 423280031Sdim PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; 424193323Sed 425280031Sdim if (CP.isPhys()) { 426280031Sdim if (!MF.getRegInfo().isAllocatable(DstReg)) 427280031Sdim continue; 428234353Sdim 429280031Sdim PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg); 430212904Sdim 431280031Sdim const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed = 432280031Sdim G.getNodeMetadata(NId).getAllowedRegs(); 433193323Sed 434280031Sdim unsigned PRegOpt = 0; 435280031Sdim while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) 436280031Sdim ++PRegOpt; 437280031Sdim 438280031Sdim if (PRegOpt < Allowed.size()) { 439280031Sdim PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId)); 440280031Sdim NewCosts[PRegOpt + 1] -= CBenefit; 441280031Sdim G.setNodeCosts(NId, std::move(NewCosts)); 442280031Sdim } 443218893Sdim } else { 444280031Sdim PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg); 445280031Sdim PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg); 446280031Sdim const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 = 447280031Sdim &G.getNodeMetadata(N1Id).getAllowedRegs(); 448280031Sdim const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 = 449280031Sdim &G.getNodeMetadata(N2Id).getAllowedRegs(); 450280031Sdim 451280031Sdim PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id); 452280031Sdim if (EId == G.invalidEdgeId()) { 453280031Sdim PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1, 454280031Sdim Allowed2->size() + 1, 0); 455280031Sdim addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); 456280031Sdim G.addEdge(N1Id, N2Id, std::move(Costs)); 457280031Sdim } else { 458280031Sdim if (G.getEdgeNode1Id(EId) == N2Id) { 459280031Sdim std::swap(N1Id, N2Id); 460280031Sdim std::swap(Allowed1, Allowed2); 461280031Sdim } 462280031Sdim PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); 463280031Sdim addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); 464288943Sdim G.updateEdgeCosts(EId, std::move(Costs)); 465203954Srdivacky } 466193323Sed } 467218893Sdim } 468218893Sdim } 469218893Sdim } 470193323Sed 471280031Sdimprivate: 472193323Sed 473280031Sdim void addVirtRegCoalesce( 474280031Sdim PBQPRAGraph::RawMatrix &CostMat, 475280031Sdim const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1, 476280031Sdim const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2, 477280031Sdim PBQP::PBQPNum Benefit) { 478280031Sdim assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); 479280031Sdim assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); 480280031Sdim for (unsigned I = 0; I != Allowed1.size(); ++I) { 481280031Sdim unsigned PReg1 = Allowed1[I]; 482280031Sdim for (unsigned J = 0; J != Allowed2.size(); ++J) { 483280031Sdim unsigned PReg2 = Allowed2[J]; 484280031Sdim if (PReg1 == PReg2) 485280031Sdim CostMat[I + 1][J + 1] -= Benefit; 486234353Sdim } 487193323Sed } 488193323Sed } 489193323Sed 490280031Sdim}; 491218893Sdim 492280031Sdim} // End anonymous namespace. 493280031Sdim 494280031Sdim// Out-of-line destructor/anchor for PBQPRAConstraint. 495280031SdimPBQPRAConstraint::~PBQPRAConstraint() {} 496280031Sdimvoid PBQPRAConstraint::anchor() {} 497280031Sdimvoid PBQPRAConstraintList::anchor() {} 498280031Sdim 499218893Sdimvoid RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { 500234353Sdim au.setPreservesCFG(); 501296417Sdim au.addRequired<AAResultsWrapperPass>(); 502296417Sdim au.addPreserved<AAResultsWrapperPass>(); 503218893Sdim au.addRequired<SlotIndexes>(); 504218893Sdim au.addPreserved<SlotIndexes>(); 505218893Sdim au.addRequired<LiveIntervals>(); 506243830Sdim au.addPreserved<LiveIntervals>(); 507218893Sdim //au.addRequiredID(SplitCriticalEdgesID); 508224145Sdim if (customPassID) 509224145Sdim au.addRequiredID(*customPassID); 510218893Sdim au.addRequired<LiveStacks>(); 511218893Sdim au.addPreserved<LiveStacks>(); 512261991Sdim au.addRequired<MachineBlockFrequencyInfo>(); 513261991Sdim au.addPreserved<MachineBlockFrequencyInfo>(); 514261991Sdim au.addRequired<MachineLoopInfo>(); 515261991Sdim au.addPreserved<MachineLoopInfo>(); 516234353Sdim au.addRequired<MachineDominatorTree>(); 517234353Sdim au.addPreserved<MachineDominatorTree>(); 518218893Sdim au.addRequired<VirtRegMap>(); 519243830Sdim au.addPreserved<VirtRegMap>(); 520218893Sdim MachineFunctionPass::getAnalysisUsage(au); 521193323Sed} 522193323Sed 523280031Sdimvoid RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, 524280031Sdim LiveIntervals &LIS) { 525280031Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 526193323Sed 527193323Sed // Iterate over all live ranges. 528280031Sdim for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { 529280031Sdim unsigned Reg = TargetRegisterInfo::index2VirtReg(I); 530280031Sdim if (MRI.reg_nodbg_empty(Reg)) 531193323Sed continue; 532280031Sdim LiveInterval &LI = LIS.getInterval(Reg); 533193323Sed 534193323Sed // If this live interval is non-empty we will use pbqp to allocate it. 535193323Sed // Empty intervals we allocate in a simple post-processing stage in 536193323Sed // finalizeAlloc. 537280031Sdim if (!LI.empty()) { 538280031Sdim VRegsToAlloc.insert(LI.reg); 539218893Sdim } else { 540280031Sdim EmptyIntervalVRegs.insert(LI.reg); 541193323Sed } 542193323Sed } 543193323Sed} 544193323Sed 545280031Sdimstatic bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, 546280031Sdim const MachineFunction &MF) { 547280031Sdim const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); 548280031Sdim for (unsigned i = 0; CSR[i] != 0; ++i) 549280031Sdim if (TRI.regsOverlap(reg, CSR[i])) 550280031Sdim return true; 551280031Sdim return false; 552280031Sdim} 553280031Sdim 554288943Sdimvoid RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, 555288943Sdim Spiller &VRegSpiller) { 556280031Sdim MachineFunction &MF = G.getMetadata().MF; 557280031Sdim 558280031Sdim LiveIntervals &LIS = G.getMetadata().LIS; 559280031Sdim const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); 560280031Sdim const TargetRegisterInfo &TRI = 561288943Sdim *G.getMetadata().MF.getSubtarget().getRegisterInfo(); 562280031Sdim 563288943Sdim std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); 564288943Sdim 565288943Sdim while (!Worklist.empty()) { 566288943Sdim unsigned VReg = Worklist.back(); 567288943Sdim Worklist.pop_back(); 568288943Sdim 569280031Sdim const TargetRegisterClass *TRC = MRI.getRegClass(VReg); 570280031Sdim LiveInterval &VRegLI = LIS.getInterval(VReg); 571280031Sdim 572280031Sdim // Record any overlaps with regmask operands. 573280031Sdim BitVector RegMaskOverlaps; 574280031Sdim LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); 575280031Sdim 576280031Sdim // Compute an initial allowed set for the current vreg. 577280031Sdim std::vector<unsigned> VRegAllowed; 578280031Sdim ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF); 579280031Sdim for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { 580280031Sdim unsigned PReg = RawPRegOrder[I]; 581280031Sdim if (MRI.isReserved(PReg)) 582280031Sdim continue; 583280031Sdim 584280031Sdim // vregLI crosses a regmask operand that clobbers preg. 585280031Sdim if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg)) 586280031Sdim continue; 587280031Sdim 588280031Sdim // vregLI overlaps fixed regunit interference. 589280031Sdim bool Interference = false; 590280031Sdim for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) { 591280031Sdim if (VRegLI.overlaps(LIS.getRegUnit(*Units))) { 592280031Sdim Interference = true; 593280031Sdim break; 594280031Sdim } 595280031Sdim } 596280031Sdim if (Interference) 597280031Sdim continue; 598280031Sdim 599280031Sdim // preg is usable for this virtual register. 600280031Sdim VRegAllowed.push_back(PReg); 601280031Sdim } 602280031Sdim 603288943Sdim // Check for vregs that have no allowed registers. These should be 604288943Sdim // pre-spilled and the new vregs added to the worklist. 605288943Sdim if (VRegAllowed.empty()) { 606288943Sdim SmallVector<unsigned, 8> NewVRegs; 607288943Sdim spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); 608288943Sdim Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); 609288943Sdim continue; 610288943Sdim } 611288943Sdim 612280031Sdim PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); 613280031Sdim 614280031Sdim // Tweak cost of callee saved registers, as using then force spilling and 615280031Sdim // restoring them. This would only happen in the prologue / epilogue though. 616280031Sdim for (unsigned i = 0; i != VRegAllowed.size(); ++i) 617280031Sdim if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF)) 618280031Sdim NodeCosts[1 + i] += 1.0; 619280031Sdim 620280031Sdim PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts)); 621280031Sdim G.getNodeMetadata(NId).setVReg(VReg); 622280031Sdim G.getNodeMetadata(NId).setAllowedRegs( 623280031Sdim G.getMetadata().getAllowedRegs(std::move(VRegAllowed))); 624280031Sdim G.getMetadata().setNodeIdForVReg(VReg, NId); 625280031Sdim } 626280031Sdim} 627280031Sdim 628288943Sdimvoid RegAllocPBQP::spillVReg(unsigned VReg, 629288943Sdim SmallVectorImpl<unsigned> &NewIntervals, 630288943Sdim MachineFunction &MF, LiveIntervals &LIS, 631288943Sdim VirtRegMap &VRM, Spiller &VRegSpiller) { 632288943Sdim 633288943Sdim VRegsToAlloc.erase(VReg); 634288943Sdim LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); 635288943Sdim VRegSpiller.spill(LRE); 636288943Sdim 637288943Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 638288943Sdim (void)TRI; 639288943Sdim DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " 640288943Sdim << LRE.getParent().weight << ", New vregs: "); 641288943Sdim 642288943Sdim // Copy any newly inserted live intervals into the list of regs to 643288943Sdim // allocate. 644288943Sdim for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); 645288943Sdim I != E; ++I) { 646288943Sdim const LiveInterval &LI = LIS.getInterval(*I); 647288943Sdim assert(!LI.empty() && "Empty spill range."); 648288943Sdim DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); 649288943Sdim VRegsToAlloc.insert(LI.reg); 650288943Sdim } 651288943Sdim 652288943Sdim DEBUG(dbgs() << ")\n"); 653288943Sdim} 654288943Sdim 655280031Sdimbool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, 656280031Sdim const PBQP::Solution &Solution, 657280031Sdim VirtRegMap &VRM, 658280031Sdim Spiller &VRegSpiller) { 659280031Sdim MachineFunction &MF = G.getMetadata().MF; 660280031Sdim LiveIntervals &LIS = G.getMetadata().LIS; 661288943Sdim const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 662280031Sdim (void)TRI; 663280031Sdim 664193323Sed // Set to true if we have any spills 665280031Sdim bool AnotherRoundNeeded = false; 666193323Sed 667193323Sed // Clear the existing allocation. 668280031Sdim VRM.clearAllVirt(); 669193323Sed 670218893Sdim // Iterate over the nodes mapping the PBQP solution to a register 671218893Sdim // assignment. 672280031Sdim for (auto NId : G.nodeIds()) { 673280031Sdim unsigned VReg = G.getNodeMetadata(NId).getVReg(); 674280031Sdim unsigned AllocOption = Solution.getSelection(NId); 675193323Sed 676280031Sdim if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { 677280031Sdim unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; 678280031Sdim DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> " 679280031Sdim << TRI.getName(PReg) << "\n"); 680280031Sdim assert(PReg != 0 && "Invalid preg selected."); 681280031Sdim VRM.assignVirt2Phys(VReg, PReg); 682280031Sdim } else { 683288943Sdim // Spill VReg. If this introduces new intervals we'll need another round 684288943Sdim // of allocation. 685288943Sdim SmallVector<unsigned, 8> NewVRegs; 686288943Sdim spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); 687288943Sdim AnotherRoundNeeded |= !NewVRegs.empty(); 688193323Sed } 689193323Sed } 690193323Sed 691280031Sdim return !AnotherRoundNeeded; 692193323Sed} 693193323Sed 694280031Sdimvoid RegAllocPBQP::finalizeAlloc(MachineFunction &MF, 695280031Sdim LiveIntervals &LIS, 696280031Sdim VirtRegMap &VRM) const { 697280031Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 698218893Sdim 699193323Sed // First allocate registers for the empty intervals. 700218893Sdim for (RegSet::const_iterator 701280031Sdim I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); 702280031Sdim I != E; ++I) { 703280031Sdim LiveInterval &LI = LIS.getInterval(*I); 704193323Sed 705280031Sdim unsigned PReg = MRI.getSimpleHint(LI.reg); 706198090Srdivacky 707280031Sdim if (PReg == 0) { 708280031Sdim const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); 709280031Sdim PReg = RC.getRawAllocationOrder(MF).front(); 710193323Sed } 711193323Sed 712280031Sdim VRM.assignVirt2Phys(LI.reg, PReg); 713193323Sed } 714193323Sed} 715193323Sed 716280031Sdimstatic inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, 717280031Sdim unsigned NumInstr) { 718280031Sdim // All intervals have a spill weight that is mostly proportional to the number 719280031Sdim // of uses, with uses in loops having a bigger weight. 720280031Sdim return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); 721280031Sdim} 722280031Sdim 723218893Sdimbool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { 724280031Sdim LiveIntervals &LIS = getAnalysis<LiveIntervals>(); 725280031Sdim MachineBlockFrequencyInfo &MBFI = 726280031Sdim getAnalysis<MachineBlockFrequencyInfo>(); 727193323Sed 728280031Sdim VirtRegMap &VRM = getAnalysis<VirtRegMap>(); 729193323Sed 730296417Sdim calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(), 731296417Sdim MBFI, normalizePBQPSpillWeight); 732296417Sdim 733280031Sdim std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); 734261991Sdim 735280031Sdim MF.getRegInfo().freezeReservedRegs(MF); 736193323Sed 737280031Sdim DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); 738212904Sdim 739193323Sed // Allocator main loop: 740193323Sed // 741193323Sed // * Map current regalloc problem to a PBQP problem 742193323Sed // * Solve the PBQP problem 743193323Sed // * Map the solution back to a register allocation 744193323Sed // * Spill if necessary 745193323Sed // 746193323Sed // This process is continued till no more spills are generated. 747193323Sed 748193323Sed // Find the vreg intervals in need of allocation. 749280031Sdim findVRegIntervalsToAlloc(MF, LIS); 750193323Sed 751243830Sdim#ifndef NDEBUG 752280031Sdim const Function &F = *MF.getFunction(); 753280031Sdim std::string FullyQualifiedName = 754280031Sdim F.getParent()->getModuleIdentifier() + "." + F.getName().str(); 755243830Sdim#endif 756234353Sdim 757193323Sed // If there are non-empty intervals allocate them using pbqp. 758280031Sdim if (!VRegsToAlloc.empty()) { 759193323Sed 760288943Sdim const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); 761280031Sdim std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = 762280031Sdim llvm::make_unique<PBQPRAConstraintList>(); 763280031Sdim ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); 764280031Sdim ConstraintsRoot->addConstraint(llvm::make_unique<Interference>()); 765280031Sdim if (PBQPCoalescing) 766280031Sdim ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>()); 767280031Sdim ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); 768193323Sed 769280031Sdim bool PBQPAllocComplete = false; 770280031Sdim unsigned Round = 0; 771193323Sed 772280031Sdim while (!PBQPAllocComplete) { 773280031Sdim DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); 774234353Sdim 775280031Sdim PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); 776288943Sdim initializeGraph(G, VRM, *VRegSpiller); 777280031Sdim ConstraintsRoot->apply(G); 778280031Sdim 779234353Sdim#ifndef NDEBUG 780280031Sdim if (PBQPDumpGraphs) { 781280031Sdim std::ostringstream RS; 782280031Sdim RS << Round; 783280031Sdim std::string GraphFileName = FullyQualifiedName + "." + RS.str() + 784280031Sdim ".pbqpgraph"; 785280031Sdim std::error_code EC; 786280031Sdim raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); 787280031Sdim DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" 788280031Sdim << GraphFileName << "\"\n"); 789288943Sdim G.dump(OS); 790234353Sdim } 791234353Sdim#endif 792234353Sdim 793280031Sdim PBQP::Solution Solution = PBQP::RegAlloc::solve(G); 794280031Sdim PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); 795280031Sdim ++Round; 796193323Sed } 797193323Sed } 798193323Sed 799193323Sed // Finalise allocation, allocate empty ranges. 800280031Sdim finalizeAlloc(MF, LIS, VRM); 801280031Sdim VRegsToAlloc.clear(); 802280031Sdim EmptyIntervalVRegs.clear(); 803193323Sed 804280031Sdim DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); 805193323Sed 806193323Sed return true; 807193323Sed} 808193323Sed 809296417Sdim/// Create Printable object for node and register info. 810296417Sdimstatic Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, 811296417Sdim const PBQP::RegAlloc::PBQPRAGraph &G) { 812296417Sdim return Printable([NId, &G](raw_ostream &OS) { 813288943Sdim const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); 814288943Sdim const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 815288943Sdim unsigned VReg = G.getNodeMetadata(NId).getVReg(); 816288943Sdim const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); 817288943Sdim OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; 818296417Sdim }); 819288943Sdim} 820288943Sdim 821288943Sdimvoid PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { 822288943Sdim for (auto NId : nodeIds()) { 823288943Sdim const Vector &Costs = getNodeCosts(NId); 824288943Sdim assert(Costs.getLength() != 0 && "Empty vector in graph."); 825288943Sdim OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n'; 826288943Sdim } 827288943Sdim OS << '\n'; 828288943Sdim 829288943Sdim for (auto EId : edgeIds()) { 830288943Sdim NodeId N1Id = getEdgeNode1Id(EId); 831288943Sdim NodeId N2Id = getEdgeNode2Id(EId); 832288943Sdim assert(N1Id != N2Id && "PBQP graphs should not have self-edges."); 833288943Sdim const Matrix &M = getEdgeCosts(EId); 834288943Sdim assert(M.getRows() != 0 && "No rows in matrix."); 835288943Sdim assert(M.getCols() != 0 && "No cols in matrix."); 836288943Sdim OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / "; 837288943Sdim OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n"; 838288943Sdim OS << M << '\n'; 839288943Sdim } 840288943Sdim} 841288943Sdim 842288943Sdimvoid PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } 843288943Sdim 844288943Sdimvoid PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { 845288943Sdim OS << "graph {\n"; 846288943Sdim for (auto NId : nodeIds()) { 847288943Sdim OS << " node" << NId << " [ label=\"" 848288943Sdim << PrintNodeInfo(NId, *this) << "\\n" 849288943Sdim << getNodeCosts(NId) << "\" ]\n"; 850288943Sdim } 851288943Sdim 852288943Sdim OS << " edge [ len=" << nodeIds().size() << " ]\n"; 853288943Sdim for (auto EId : edgeIds()) { 854288943Sdim OS << " node" << getEdgeNode1Id(EId) 855288943Sdim << " -- node" << getEdgeNode2Id(EId) 856288943Sdim << " [ label=\""; 857288943Sdim const Matrix &EdgeCosts = getEdgeCosts(EId); 858288943Sdim for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) { 859288943Sdim OS << EdgeCosts.getRowAsVector(i) << "\\n"; 860288943Sdim } 861288943Sdim OS << "\" ]\n"; 862288943Sdim } 863288943Sdim OS << "}\n"; 864288943Sdim} 865288943Sdim 866280031SdimFunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { 867280031Sdim return new RegAllocPBQP(customPassID); 868193323Sed} 869193323Sed 870218893SdimFunctionPass* llvm::createDefaultPBQPRegisterAllocator() { 871280031Sdim return createPBQPRegisterAllocator(); 872218893Sdim} 873193323Sed 874193323Sed#undef DEBUG_TYPE 875