HexagonGenInsert.cpp revision 296417
1286425Sdim//===--- HexagonGenInsert.cpp ---------------------------------------------===// 2286425Sdim// 3286425Sdim// The LLVM Compiler Infrastructure 4286425Sdim// 5286425Sdim// This file is distributed under the University of Illinois Open Source 6286425Sdim// License. See LICENSE.TXT for details. 7286425Sdim// 8286425Sdim//===----------------------------------------------------------------------===// 9286425Sdim 10286425Sdim#define DEBUG_TYPE "hexinsert" 11286425Sdim 12286425Sdim#include "llvm/Pass.h" 13286425Sdim#include "llvm/PassRegistry.h" 14286425Sdim#include "llvm/ADT/BitVector.h" 15286425Sdim#include "llvm/ADT/DenseMap.h" 16286425Sdim#include "llvm/ADT/DenseSet.h" 17286425Sdim#include "llvm/ADT/PostOrderIterator.h" 18286425Sdim#include "llvm/CodeGen/MachineDominators.h" 19286425Sdim#include "llvm/CodeGen/MachineFunction.h" 20286425Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 21286425Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 22286425Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 23286425Sdim#include "llvm/IR/Constants.h" 24286425Sdim#include "llvm/Support/CommandLine.h" 25286425Sdim#include "llvm/Support/Debug.h" 26286425Sdim#include "llvm/Support/raw_ostream.h" 27286425Sdim#include "llvm/Support/Timer.h" 28286425Sdim#include "llvm/Target/TargetMachine.h" 29286425Sdim#include "llvm/Target/TargetRegisterInfo.h" 30286425Sdim 31286425Sdim#include "Hexagon.h" 32286425Sdim#include "HexagonRegisterInfo.h" 33286425Sdim#include "HexagonTargetMachine.h" 34286425Sdim#include "HexagonBitTracker.h" 35286425Sdim 36286425Sdim#include <map> 37286425Sdim#include <vector> 38286425Sdim 39286425Sdimusing namespace llvm; 40286425Sdim 41286425Sdimstatic cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), 42286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); 43286425Sdim// The distance cutoff is selected based on the precheckin-perf results: 44286425Sdim// cutoffs 20, 25, 35, and 40 are worse than 30. 45286425Sdimstatic cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), 46286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " 47286425Sdim "generation.")); 48286425Sdim 49286425Sdimstatic cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, 50286425Sdim cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); 51286425Sdimstatic cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), 52286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " 53286425Sdim "generation")); 54286425Sdim 55286425Sdimstatic cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, 56286425Sdim cl::ZeroOrMore); 57286425Sdimstatic cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, 58286425Sdim cl::ZeroOrMore); 59286425Sdim// Whether to construct constant values via "insert". Could eliminate constant 60286425Sdim// extenders, but often not practical. 61286425Sdimstatic cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, 62286425Sdim cl::ZeroOrMore); 63286425Sdim 64286425Sdimnamespace { 65286425Sdim // The preprocessor gets confused when the DEBUG macro is passed larger 66286425Sdim // chunks of code. Use this function to detect debugging. 67286425Sdim inline bool isDebug() { 68286425Sdim#ifndef NDEBUG 69286425Sdim return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE); 70286425Sdim#else 71286425Sdim return false; 72286425Sdim#endif 73286425Sdim } 74286425Sdim} 75286425Sdim 76286425Sdim 77286425Sdimnamespace { 78286425Sdim // Set of virtual registers, based on BitVector. 79286425Sdim struct RegisterSet : private BitVector { 80296417Sdim RegisterSet() = default; 81286425Sdim explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 82286425Sdim 83286425Sdim using BitVector::clear; 84286425Sdim 85286425Sdim unsigned find_first() const { 86286425Sdim int First = BitVector::find_first(); 87286425Sdim if (First < 0) 88286425Sdim return 0; 89286425Sdim return x2v(First); 90286425Sdim } 91286425Sdim 92286425Sdim unsigned find_next(unsigned Prev) const { 93286425Sdim int Next = BitVector::find_next(v2x(Prev)); 94286425Sdim if (Next < 0) 95286425Sdim return 0; 96286425Sdim return x2v(Next); 97286425Sdim } 98286425Sdim 99286425Sdim RegisterSet &insert(unsigned R) { 100286425Sdim unsigned Idx = v2x(R); 101286425Sdim ensure(Idx); 102286425Sdim return static_cast<RegisterSet&>(BitVector::set(Idx)); 103286425Sdim } 104286425Sdim RegisterSet &remove(unsigned R) { 105286425Sdim unsigned Idx = v2x(R); 106286425Sdim if (Idx >= size()) 107286425Sdim return *this; 108286425Sdim return static_cast<RegisterSet&>(BitVector::reset(Idx)); 109286425Sdim } 110286425Sdim 111286425Sdim RegisterSet &insert(const RegisterSet &Rs) { 112286425Sdim return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 113286425Sdim } 114286425Sdim RegisterSet &remove(const RegisterSet &Rs) { 115286425Sdim return static_cast<RegisterSet&>(BitVector::reset(Rs)); 116286425Sdim } 117286425Sdim 118286425Sdim reference operator[](unsigned R) { 119286425Sdim unsigned Idx = v2x(R); 120286425Sdim ensure(Idx); 121286425Sdim return BitVector::operator[](Idx); 122286425Sdim } 123286425Sdim bool operator[](unsigned R) const { 124286425Sdim unsigned Idx = v2x(R); 125286425Sdim assert(Idx < size()); 126286425Sdim return BitVector::operator[](Idx); 127286425Sdim } 128286425Sdim bool has(unsigned R) const { 129286425Sdim unsigned Idx = v2x(R); 130286425Sdim if (Idx >= size()) 131286425Sdim return false; 132286425Sdim return BitVector::test(Idx); 133286425Sdim } 134286425Sdim 135286425Sdim bool empty() const { 136286425Sdim return !BitVector::any(); 137286425Sdim } 138286425Sdim bool includes(const RegisterSet &Rs) const { 139286425Sdim // A.BitVector::test(B) <=> A-B != {} 140286425Sdim return !Rs.BitVector::test(*this); 141286425Sdim } 142286425Sdim bool intersects(const RegisterSet &Rs) const { 143286425Sdim return BitVector::anyCommon(Rs); 144286425Sdim } 145286425Sdim 146286425Sdim private: 147286425Sdim void ensure(unsigned Idx) { 148286425Sdim if (size() <= Idx) 149286425Sdim resize(std::max(Idx+1, 32U)); 150286425Sdim } 151286425Sdim static inline unsigned v2x(unsigned v) { 152286425Sdim return TargetRegisterInfo::virtReg2Index(v); 153286425Sdim } 154286425Sdim static inline unsigned x2v(unsigned x) { 155286425Sdim return TargetRegisterInfo::index2VirtReg(x); 156286425Sdim } 157286425Sdim }; 158286425Sdim 159286425Sdim 160286425Sdim struct PrintRegSet { 161286425Sdim PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 162286425Sdim : RS(S), TRI(RI) {} 163286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, 164286425Sdim const PrintRegSet &P); 165286425Sdim private: 166286425Sdim const RegisterSet &RS; 167286425Sdim const TargetRegisterInfo *TRI; 168286425Sdim }; 169286425Sdim 170286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 171286425Sdim OS << '{'; 172286425Sdim for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 173286425Sdim OS << ' ' << PrintReg(R, P.TRI); 174286425Sdim OS << " }"; 175286425Sdim return OS; 176286425Sdim } 177286425Sdim} 178286425Sdim 179286425Sdim 180286425Sdimnamespace { 181286425Sdim // A convenience class to associate unsigned numbers (such as virtual 182286425Sdim // registers) with unsigned numbers. 183286425Sdim struct UnsignedMap : public DenseMap<unsigned,unsigned> { 184286425Sdim UnsignedMap() : BaseType() {} 185286425Sdim private: 186286425Sdim typedef DenseMap<unsigned,unsigned> BaseType; 187286425Sdim }; 188286425Sdim 189286425Sdim // A utility to establish an ordering between virtual registers: 190286425Sdim // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] 191286425Sdim // This is meant as a cache for the ordering of virtual registers defined 192286425Sdim // by a potentially expensive comparison function, or obtained by a proce- 193286425Sdim // dure that should not be repeated each time two registers are compared. 194286425Sdim struct RegisterOrdering : public UnsignedMap { 195286425Sdim RegisterOrdering() : UnsignedMap() {} 196286425Sdim unsigned operator[](unsigned VR) const { 197286425Sdim const_iterator F = find(VR); 198286425Sdim assert(F != end()); 199286425Sdim return F->second; 200286425Sdim } 201286425Sdim // Add operator(), so that objects of this class can be used as 202286425Sdim // comparators in std::sort et al. 203286425Sdim bool operator() (unsigned VR1, unsigned VR2) const { 204286425Sdim return operator[](VR1) < operator[](VR2); 205286425Sdim } 206286425Sdim }; 207286425Sdim} 208286425Sdim 209286425Sdim 210286425Sdimnamespace { 211286425Sdim // Ordering of bit values. This class does not have operator[], but 212286425Sdim // is supplies a comparison operator() for use in std:: algorithms. 213286425Sdim // The order is as follows: 214286425Sdim // - 0 < 1 < ref 215286425Sdim // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), 216286425Sdim // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. 217286425Sdim struct BitValueOrdering { 218286425Sdim BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} 219286425Sdim bool operator() (const BitTracker::BitValue &V1, 220286425Sdim const BitTracker::BitValue &V2) const; 221286425Sdim const RegisterOrdering &BaseOrd; 222286425Sdim }; 223286425Sdim} 224286425Sdim 225286425Sdim 226286425Sdimbool BitValueOrdering::operator() (const BitTracker::BitValue &V1, 227286425Sdim const BitTracker::BitValue &V2) const { 228286425Sdim if (V1 == V2) 229286425Sdim return false; 230286425Sdim // V1==0 => true, V2==0 => false 231286425Sdim if (V1.is(0) || V2.is(0)) 232286425Sdim return V1.is(0); 233286425Sdim // Neither of V1,V2 is 0, and V1!=V2. 234286425Sdim // V2==1 => false, V1==1 => true 235286425Sdim if (V2.is(1) || V1.is(1)) 236286425Sdim return !V2.is(1); 237286425Sdim // Both V1,V2 are refs. 238286425Sdim unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; 239286425Sdim if (Ind1 != Ind2) 240286425Sdim return Ind1 < Ind2; 241286425Sdim // If V1.Pos==V2.Pos 242286425Sdim assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); 243286425Sdim return V1.RefI.Pos < V2.RefI.Pos; 244286425Sdim} 245286425Sdim 246286425Sdim 247286425Sdimnamespace { 248286425Sdim // Cache for the BitTracker's cell map. Map lookup has a logarithmic 249286425Sdim // complexity, this class will memoize the lookup results to reduce 250286425Sdim // the access time for repeated lookups of the same cell. 251286425Sdim struct CellMapShadow { 252286425Sdim CellMapShadow(const BitTracker &T) : BT(T) {} 253286425Sdim const BitTracker::RegisterCell &lookup(unsigned VR) { 254286425Sdim unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); 255286425Sdim // Grow the vector to at least 32 elements. 256286425Sdim if (RInd >= CVect.size()) 257286425Sdim CVect.resize(std::max(RInd+16, 32U), 0); 258286425Sdim const BitTracker::RegisterCell *CP = CVect[RInd]; 259286425Sdim if (CP == 0) 260286425Sdim CP = CVect[RInd] = &BT.lookup(VR); 261286425Sdim return *CP; 262286425Sdim } 263286425Sdim 264286425Sdim const BitTracker &BT; 265286425Sdim 266286425Sdim private: 267286425Sdim typedef std::vector<const BitTracker::RegisterCell*> CellVectType; 268286425Sdim CellVectType CVect; 269286425Sdim }; 270286425Sdim} 271286425Sdim 272286425Sdim 273286425Sdimnamespace { 274286425Sdim // Comparator class for lexicographic ordering of virtual registers 275286425Sdim // according to the corresponding BitTracker::RegisterCell objects. 276286425Sdim struct RegisterCellLexCompare { 277286425Sdim RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) 278286425Sdim : BitOrd(BO), CM(M) {} 279286425Sdim bool operator() (unsigned VR1, unsigned VR2) const; 280286425Sdim private: 281286425Sdim const BitValueOrdering &BitOrd; 282286425Sdim CellMapShadow &CM; 283286425Sdim }; 284286425Sdim 285286425Sdim // Comparator class for lexicographic ordering of virtual registers 286286425Sdim // according to the specified bits of the corresponding BitTracker:: 287286425Sdim // RegisterCell objects. 288286425Sdim // Specifically, this class will be used to compare bit B of a register 289286425Sdim // cell for a selected virtual register R with bit N of any register 290286425Sdim // other than R. 291286425Sdim struct RegisterCellBitCompareSel { 292286425Sdim RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, 293286425Sdim const BitValueOrdering &BO, CellMapShadow &M) 294286425Sdim : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} 295286425Sdim bool operator() (unsigned VR1, unsigned VR2) const; 296286425Sdim private: 297286425Sdim const unsigned SelR, SelB; 298286425Sdim const unsigned BitN; 299286425Sdim const BitValueOrdering &BitOrd; 300286425Sdim CellMapShadow &CM; 301286425Sdim }; 302286425Sdim} 303286425Sdim 304286425Sdim 305286425Sdimbool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { 306286425Sdim // Ordering of registers, made up from two given orderings: 307286425Sdim // - the ordering of the register numbers, and 308286425Sdim // - the ordering of register cells. 309286425Sdim // Def. R1 < R2 if: 310286425Sdim // - cell(R1) < cell(R2), or 311286425Sdim // - cell(R1) == cell(R2), and index(R1) < index(R2). 312286425Sdim // 313286425Sdim // For register cells, the ordering is lexicographic, with index 0 being 314286425Sdim // the most significant. 315286425Sdim if (VR1 == VR2) 316286425Sdim return false; 317286425Sdim 318286425Sdim const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); 319286425Sdim uint16_t W1 = RC1.width(), W2 = RC2.width(); 320286425Sdim for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { 321286425Sdim const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; 322286425Sdim if (V1 != V2) 323286425Sdim return BitOrd(V1, V2); 324286425Sdim } 325286425Sdim // Cells are equal up until the common length. 326286425Sdim if (W1 != W2) 327286425Sdim return W1 < W2; 328286425Sdim 329286425Sdim return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; 330286425Sdim} 331286425Sdim 332286425Sdim 333286425Sdimbool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { 334286425Sdim if (VR1 == VR2) 335286425Sdim return false; 336286425Sdim const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); 337286425Sdim const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); 338286425Sdim uint16_t W1 = RC1.width(), W2 = RC2.width(); 339286425Sdim uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; 340286425Sdim uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; 341286425Sdim // If Bit1 exceeds the width of VR1, then: 342286425Sdim // - return false, if at the same time Bit2 exceeds VR2, or 343286425Sdim // - return true, otherwise. 344286425Sdim // (I.e. "a bit value that does not exist is less than any bit value 345286425Sdim // that does exist".) 346286425Sdim if (W1 <= Bit1) 347286425Sdim return Bit2 < W2; 348286425Sdim // If Bit1 is within VR1, but Bit2 is not within VR2, return false. 349286425Sdim if (W2 <= Bit2) 350286425Sdim return false; 351286425Sdim 352286425Sdim const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; 353286425Sdim if (V1 != V2) 354286425Sdim return BitOrd(V1, V2); 355286425Sdim return false; 356286425Sdim} 357286425Sdim 358286425Sdim 359286425Sdimnamespace { 360286425Sdim class OrderedRegisterList { 361286425Sdim typedef std::vector<unsigned> ListType; 362286425Sdim public: 363286425Sdim OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} 364286425Sdim void insert(unsigned VR); 365286425Sdim void remove(unsigned VR); 366286425Sdim unsigned operator[](unsigned Idx) const { 367286425Sdim assert(Idx < Seq.size()); 368286425Sdim return Seq[Idx]; 369286425Sdim } 370286425Sdim unsigned size() const { 371286425Sdim return Seq.size(); 372286425Sdim } 373286425Sdim 374286425Sdim typedef ListType::iterator iterator; 375286425Sdim typedef ListType::const_iterator const_iterator; 376286425Sdim iterator begin() { return Seq.begin(); } 377286425Sdim iterator end() { return Seq.end(); } 378286425Sdim const_iterator begin() const { return Seq.begin(); } 379286425Sdim const_iterator end() const { return Seq.end(); } 380286425Sdim 381286425Sdim // Convenience function to convert an iterator to the corresponding index. 382286425Sdim unsigned idx(iterator It) const { return It-begin(); } 383286425Sdim private: 384286425Sdim ListType Seq; 385286425Sdim const RegisterOrdering &Ord; 386286425Sdim }; 387286425Sdim 388286425Sdim 389286425Sdim struct PrintORL { 390286425Sdim PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) 391286425Sdim : RL(L), TRI(RI) {} 392286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); 393286425Sdim private: 394286425Sdim const OrderedRegisterList &RL; 395286425Sdim const TargetRegisterInfo *TRI; 396286425Sdim }; 397286425Sdim 398286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { 399286425Sdim OS << '('; 400286425Sdim OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); 401286425Sdim for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { 402286425Sdim if (I != B) 403286425Sdim OS << ", "; 404286425Sdim OS << PrintReg(*I, P.TRI); 405286425Sdim } 406286425Sdim OS << ')'; 407286425Sdim return OS; 408286425Sdim } 409286425Sdim} 410286425Sdim 411286425Sdim 412286425Sdimvoid OrderedRegisterList::insert(unsigned VR) { 413286425Sdim iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); 414286425Sdim if (L == Seq.end()) 415286425Sdim Seq.push_back(VR); 416286425Sdim else 417286425Sdim Seq.insert(L, VR); 418286425Sdim} 419286425Sdim 420286425Sdim 421286425Sdimvoid OrderedRegisterList::remove(unsigned VR) { 422286425Sdim iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); 423286425Sdim assert(L != Seq.end()); 424286425Sdim Seq.erase(L); 425286425Sdim} 426286425Sdim 427286425Sdim 428286425Sdimnamespace { 429286425Sdim // A record of the insert form. The fields correspond to the operands 430286425Sdim // of the "insert" instruction: 431286425Sdim // ... = insert(SrcR, InsR, #Wdh, #Off) 432286425Sdim struct IFRecord { 433286425Sdim IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) 434286425Sdim : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} 435286425Sdim unsigned SrcR, InsR; 436286425Sdim uint16_t Wdh, Off; 437286425Sdim }; 438286425Sdim 439286425Sdim struct PrintIFR { 440286425Sdim PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) 441286425Sdim : IFR(R), TRI(RI) {} 442286425Sdim private: 443286425Sdim const IFRecord &IFR; 444286425Sdim const TargetRegisterInfo *TRI; 445286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); 446286425Sdim }; 447286425Sdim 448286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { 449286425Sdim unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; 450286425Sdim OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI) 451286425Sdim << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; 452286425Sdim return OS; 453286425Sdim } 454286425Sdim 455286425Sdim typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet; 456286425Sdim} 457286425Sdim 458286425Sdim 459286425Sdimnamespace llvm { 460286425Sdim void initializeHexagonGenInsertPass(PassRegistry&); 461286425Sdim FunctionPass *createHexagonGenInsert(); 462286425Sdim} 463286425Sdim 464286425Sdim 465286425Sdimnamespace { 466286425Sdim class HexagonGenInsert : public MachineFunctionPass { 467286425Sdim public: 468286425Sdim static char ID; 469286425Sdim HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) { 470286425Sdim initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); 471286425Sdim } 472286425Sdim virtual const char *getPassName() const { 473286425Sdim return "Hexagon generate \"insert\" instructions"; 474286425Sdim } 475286425Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 476286425Sdim AU.addRequired<MachineDominatorTree>(); 477286425Sdim AU.addPreserved<MachineDominatorTree>(); 478286425Sdim MachineFunctionPass::getAnalysisUsage(AU); 479286425Sdim } 480286425Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 481286425Sdim 482286425Sdim private: 483286425Sdim typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType; 484286425Sdim 485286425Sdim void buildOrderingMF(RegisterOrdering &RO) const; 486286425Sdim void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; 487286425Sdim bool isIntClass(const TargetRegisterClass *RC) const; 488286425Sdim bool isConstant(unsigned VR) const; 489286425Sdim bool isSmallConstant(unsigned VR) const; 490286425Sdim bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, 491286425Sdim uint16_t L, uint16_t S) const; 492286425Sdim bool findSelfReference(unsigned VR) const; 493286425Sdim bool findNonSelfReference(unsigned VR) const; 494286425Sdim void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; 495286425Sdim void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; 496286425Sdim unsigned distance(const MachineBasicBlock *FromB, 497286425Sdim const MachineBasicBlock *ToB, const UnsignedMap &RPO, 498286425Sdim PairMapType &M) const; 499286425Sdim unsigned distance(MachineBasicBlock::const_iterator FromI, 500286425Sdim MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 501286425Sdim PairMapType &M) const; 502286425Sdim bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); 503286425Sdim void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); 504286425Sdim void findRemovableRegisters(unsigned VR, IFRecord IF, 505286425Sdim RegisterSet &RMs) const; 506286425Sdim void computeRemovableRegisters(); 507286425Sdim 508286425Sdim void pruneEmptyLists(); 509286425Sdim void pruneCoveredSets(unsigned VR); 510286425Sdim void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); 511286425Sdim void pruneRegCopies(unsigned VR); 512286425Sdim void pruneCandidates(); 513286425Sdim void selectCandidates(); 514286425Sdim bool generateInserts(); 515286425Sdim 516286425Sdim bool removeDeadCode(MachineDomTreeNode *N); 517286425Sdim 518286425Sdim // IFRecord coupled with a set of potentially removable registers: 519286425Sdim typedef std::vector<IFRecordWithRegSet> IFListType; 520286425Sdim typedef DenseMap<unsigned,IFListType> IFMapType; // vreg -> IFListType 521286425Sdim 522286425Sdim void dump_map() const; 523286425Sdim 524286425Sdim const HexagonInstrInfo *HII; 525286425Sdim const HexagonRegisterInfo *HRI; 526286425Sdim 527286425Sdim MachineFunction *MFN; 528286425Sdim MachineRegisterInfo *MRI; 529286425Sdim MachineDominatorTree *MDT; 530286425Sdim CellMapShadow *CMS; 531286425Sdim 532286425Sdim RegisterOrdering BaseOrd; 533286425Sdim RegisterOrdering CellOrd; 534286425Sdim IFMapType IFMap; 535286425Sdim }; 536286425Sdim 537286425Sdim char HexagonGenInsert::ID = 0; 538286425Sdim} 539286425Sdim 540286425Sdim 541286425Sdimvoid HexagonGenInsert::dump_map() const { 542286425Sdim typedef IFMapType::const_iterator iterator; 543286425Sdim for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 544286425Sdim dbgs() << " " << PrintReg(I->first, HRI) << ":\n"; 545286425Sdim const IFListType &LL = I->second; 546286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 547286425Sdim dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " 548286425Sdim << PrintRegSet(LL[i].second, HRI) << '\n'; 549286425Sdim } 550286425Sdim} 551286425Sdim 552286425Sdim 553286425Sdimvoid HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { 554286425Sdim unsigned Index = 0; 555286425Sdim typedef MachineFunction::const_iterator mf_iterator; 556286425Sdim for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { 557286425Sdim const MachineBasicBlock &B = *A; 558286425Sdim if (!CMS->BT.reached(&B)) 559286425Sdim continue; 560286425Sdim typedef MachineBasicBlock::const_iterator mb_iterator; 561286425Sdim for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { 562286425Sdim const MachineInstr *MI = &*I; 563286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 564286425Sdim const MachineOperand &MO = MI->getOperand(i); 565286425Sdim if (MO.isReg() && MO.isDef()) { 566286425Sdim unsigned R = MO.getReg(); 567286425Sdim assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); 568286425Sdim if (TargetRegisterInfo::isVirtualRegister(R)) 569286425Sdim RO.insert(std::make_pair(R, Index++)); 570286425Sdim } 571286425Sdim } 572286425Sdim } 573286425Sdim } 574286425Sdim // Since some virtual registers may have had their def and uses eliminated, 575286425Sdim // they are no longer referenced in the code, and so they will not appear 576286425Sdim // in the map. 577286425Sdim} 578286425Sdim 579286425Sdim 580286425Sdimvoid HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, 581286425Sdim RegisterOrdering &RO) const { 582286425Sdim // Create a vector of all virtual registers (collect them from the base 583286425Sdim // ordering RB), and then sort it using the RegisterCell comparator. 584286425Sdim BitValueOrdering BVO(RB); 585286425Sdim RegisterCellLexCompare LexCmp(BVO, *CMS); 586286425Sdim typedef std::vector<unsigned> SortableVectorType; 587286425Sdim SortableVectorType VRs; 588286425Sdim for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) 589286425Sdim VRs.push_back(I->first); 590286425Sdim std::sort(VRs.begin(), VRs.end(), LexCmp); 591286425Sdim // Transfer the results to the outgoing register ordering. 592286425Sdim for (unsigned i = 0, n = VRs.size(); i < n; ++i) 593286425Sdim RO.insert(std::make_pair(VRs[i], i)); 594286425Sdim} 595286425Sdim 596286425Sdim 597286425Sdiminline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { 598286425Sdim return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; 599286425Sdim} 600286425Sdim 601286425Sdim 602286425Sdimbool HexagonGenInsert::isConstant(unsigned VR) const { 603286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 604286425Sdim uint16_t W = RC.width(); 605286425Sdim for (uint16_t i = 0; i < W; ++i) { 606286425Sdim const BitTracker::BitValue &BV = RC[i]; 607286425Sdim if (BV.is(0) || BV.is(1)) 608286425Sdim continue; 609286425Sdim return false; 610286425Sdim } 611286425Sdim return true; 612286425Sdim} 613286425Sdim 614286425Sdim 615286425Sdimbool HexagonGenInsert::isSmallConstant(unsigned VR) const { 616286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 617286425Sdim uint16_t W = RC.width(); 618286425Sdim if (W > 64) 619286425Sdim return false; 620286425Sdim uint64_t V = 0, B = 1; 621286425Sdim for (uint16_t i = 0; i < W; ++i) { 622286425Sdim const BitTracker::BitValue &BV = RC[i]; 623286425Sdim if (BV.is(1)) 624286425Sdim V |= B; 625286425Sdim else if (!BV.is(0)) 626286425Sdim return false; 627286425Sdim B <<= 1; 628286425Sdim } 629286425Sdim 630286425Sdim // For 32-bit registers, consider: Rd = #s16. 631286425Sdim if (W == 32) 632286425Sdim return isInt<16>(V); 633286425Sdim 634286425Sdim // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) 635286425Sdim return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); 636286425Sdim} 637286425Sdim 638286425Sdim 639286425Sdimbool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, 640286425Sdim unsigned InsR, uint16_t L, uint16_t S) const { 641286425Sdim const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); 642286425Sdim const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); 643286425Sdim const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); 644286425Sdim // Only integet (32-/64-bit) register classes. 645286425Sdim if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) 646286425Sdim return false; 647286425Sdim // The "source" register must be of the same class as DstR. 648286425Sdim if (DstRC != SrcRC) 649286425Sdim return false; 650286425Sdim if (DstRC == InsRC) 651286425Sdim return true; 652286425Sdim // A 64-bit register can only be generated from other 64-bit registers. 653286425Sdim if (DstRC == &Hexagon::DoubleRegsRegClass) 654286425Sdim return false; 655286425Sdim // Otherwise, the L and S cannot span 32-bit word boundary. 656286425Sdim if (S < 32 && S+L > 32) 657286425Sdim return false; 658286425Sdim return true; 659286425Sdim} 660286425Sdim 661286425Sdim 662286425Sdimbool HexagonGenInsert::findSelfReference(unsigned VR) const { 663286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 664286425Sdim for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 665286425Sdim const BitTracker::BitValue &V = RC[i]; 666286425Sdim if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) 667286425Sdim return true; 668286425Sdim } 669286425Sdim return false; 670286425Sdim} 671286425Sdim 672286425Sdim 673286425Sdimbool HexagonGenInsert::findNonSelfReference(unsigned VR) const { 674286425Sdim BitTracker::RegisterCell RC = CMS->lookup(VR); 675286425Sdim for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 676286425Sdim const BitTracker::BitValue &V = RC[i]; 677286425Sdim if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) 678286425Sdim return true; 679286425Sdim } 680286425Sdim return false; 681286425Sdim} 682286425Sdim 683286425Sdim 684286425Sdimvoid HexagonGenInsert::getInstrDefs(const MachineInstr *MI, 685286425Sdim RegisterSet &Defs) const { 686286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 687286425Sdim const MachineOperand &MO = MI->getOperand(i); 688286425Sdim if (!MO.isReg() || !MO.isDef()) 689286425Sdim continue; 690286425Sdim unsigned R = MO.getReg(); 691286425Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 692286425Sdim continue; 693286425Sdim Defs.insert(R); 694286425Sdim } 695286425Sdim} 696286425Sdim 697286425Sdim 698286425Sdimvoid HexagonGenInsert::getInstrUses(const MachineInstr *MI, 699286425Sdim RegisterSet &Uses) const { 700286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 701286425Sdim const MachineOperand &MO = MI->getOperand(i); 702286425Sdim if (!MO.isReg() || !MO.isUse()) 703286425Sdim continue; 704286425Sdim unsigned R = MO.getReg(); 705286425Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 706286425Sdim continue; 707286425Sdim Uses.insert(R); 708286425Sdim } 709286425Sdim} 710286425Sdim 711286425Sdim 712286425Sdimunsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, 713286425Sdim const MachineBasicBlock *ToB, const UnsignedMap &RPO, 714286425Sdim PairMapType &M) const { 715286425Sdim // Forward distance from the end of a block to the beginning of it does 716286425Sdim // not make sense. This function should not be called with FromB == ToB. 717286425Sdim assert(FromB != ToB); 718286425Sdim 719286425Sdim unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); 720286425Sdim // If we have already computed it, return the cached result. 721286425Sdim PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); 722286425Sdim if (F != M.end()) 723286425Sdim return F->second; 724286425Sdim unsigned ToRPO = RPO.lookup(ToN); 725286425Sdim 726286425Sdim unsigned MaxD = 0; 727286425Sdim typedef MachineBasicBlock::const_pred_iterator pred_iterator; 728286425Sdim for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) { 729286425Sdim const MachineBasicBlock *PB = *I; 730286425Sdim // Skip back edges. Also, if FromB is a predecessor of ToB, the distance 731286425Sdim // along that path will be 0, and we don't need to do any calculations 732286425Sdim // on it. 733286425Sdim if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) 734286425Sdim continue; 735286425Sdim unsigned D = PB->size() + distance(FromB, PB, RPO, M); 736286425Sdim if (D > MaxD) 737286425Sdim MaxD = D; 738286425Sdim } 739286425Sdim 740286425Sdim // Memoize the result for later lookup. 741286425Sdim M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); 742286425Sdim return MaxD; 743286425Sdim} 744286425Sdim 745286425Sdim 746286425Sdimunsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, 747286425Sdim MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 748286425Sdim PairMapType &M) const { 749286425Sdim const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); 750286425Sdim if (FB == TB) 751286425Sdim return std::distance(FromI, ToI); 752286425Sdim unsigned D1 = std::distance(TB->begin(), ToI); 753286425Sdim unsigned D2 = distance(FB, TB, RPO, M); 754286425Sdim unsigned D3 = std::distance(FromI, FB->end()); 755286425Sdim return D1+D2+D3; 756286425Sdim} 757286425Sdim 758286425Sdim 759286425Sdimbool HexagonGenInsert::findRecordInsertForms(unsigned VR, 760286425Sdim OrderedRegisterList &AVs) { 761286425Sdim if (isDebug()) { 762286425Sdim dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI) 763286425Sdim << " AVs: " << PrintORL(AVs, HRI) << "\n"; 764286425Sdim } 765286425Sdim if (AVs.size() == 0) 766286425Sdim return false; 767286425Sdim 768286425Sdim typedef OrderedRegisterList::iterator iterator; 769286425Sdim BitValueOrdering BVO(BaseOrd); 770286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 771286425Sdim uint16_t W = RC.width(); 772286425Sdim 773286425Sdim typedef std::pair<unsigned,uint16_t> RSRecord; // (reg,shift) 774286425Sdim typedef std::vector<RSRecord> RSListType; 775286425Sdim // Have a map, with key being the matching prefix length, and the value 776286425Sdim // being the list of pairs (R,S), where R's prefix matches VR at S. 777286425Sdim // (DenseMap<uint16_t,RSListType> fails to instantiate.) 778286425Sdim typedef DenseMap<unsigned,RSListType> LRSMapType; 779286425Sdim LRSMapType LM; 780286425Sdim 781286425Sdim // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, 782286425Sdim // and find matching prefixes from AVs with the rotated RC. Such a prefix 783286425Sdim // would match a string of bits (of length L) in RC starting at S. 784286425Sdim for (uint16_t S = 0; S < W; ++S) { 785286425Sdim iterator B = AVs.begin(), E = AVs.end(); 786286425Sdim // The registers in AVs are ordered according to the lexical order of 787286425Sdim // the corresponding register cells. This means that the range of regis- 788286425Sdim // ters in AVs that match a prefix of length L+1 will be contained in 789286425Sdim // the range that matches a prefix of length L. This means that we can 790286425Sdim // keep narrowing the search space as the prefix length goes up. This 791286425Sdim // helps reduce the overall complexity of the search. 792286425Sdim uint16_t L; 793286425Sdim for (L = 0; L < W-S; ++L) { 794286425Sdim // Compare against VR's bits starting at S, which emulates rotation 795286425Sdim // of VR by S. 796286425Sdim RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); 797286425Sdim iterator NewB = std::lower_bound(B, E, VR, RCB); 798286425Sdim iterator NewE = std::upper_bound(NewB, E, VR, RCB); 799286425Sdim // For the registers that are eliminated from the next range, L is 800286425Sdim // the longest prefix matching VR at position S (their prefixes 801286425Sdim // differ from VR at S+L). If L>0, record this information for later 802286425Sdim // use. 803286425Sdim if (L > 0) { 804286425Sdim for (iterator I = B; I != NewB; ++I) 805286425Sdim LM[L].push_back(std::make_pair(*I, S)); 806286425Sdim for (iterator I = NewE; I != E; ++I) 807286425Sdim LM[L].push_back(std::make_pair(*I, S)); 808286425Sdim } 809286425Sdim B = NewB, E = NewE; 810286425Sdim if (B == E) 811286425Sdim break; 812286425Sdim } 813286425Sdim // Record the final register range. If this range is non-empty, then 814286425Sdim // L=W-S. 815286425Sdim assert(B == E || L == W-S); 816286425Sdim if (B != E) { 817286425Sdim for (iterator I = B; I != E; ++I) 818286425Sdim LM[L].push_back(std::make_pair(*I, S)); 819286425Sdim // If B!=E, then we found a range of registers whose prefixes cover the 820286425Sdim // rest of VR from position S. There is no need to further advance S. 821286425Sdim break; 822286425Sdim } 823286425Sdim } 824286425Sdim 825286425Sdim if (isDebug()) { 826286425Sdim dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n"; 827286425Sdim for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { 828286425Sdim dbgs() << " L=" << I->first << ':'; 829286425Sdim const RSListType &LL = I->second; 830286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 831286425Sdim dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@" 832286425Sdim << LL[i].second << ')'; 833286425Sdim dbgs() << '\n'; 834286425Sdim } 835286425Sdim } 836286425Sdim 837286425Sdim 838286425Sdim bool Recorded = false; 839286425Sdim 840286425Sdim for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { 841286425Sdim unsigned SrcR = *I; 842286425Sdim int FDi = -1, LDi = -1; // First/last different bit. 843286425Sdim const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); 844286425Sdim uint16_t AW = AC.width(); 845286425Sdim for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { 846286425Sdim if (RC[i] == AC[i]) 847286425Sdim continue; 848286425Sdim if (FDi == -1) 849286425Sdim FDi = i; 850286425Sdim LDi = i; 851286425Sdim } 852286425Sdim if (FDi == -1) 853286425Sdim continue; // TODO (future): Record identical registers. 854286425Sdim // Look for a register whose prefix could patch the range [FD..LD] 855286425Sdim // where VR and SrcR differ. 856286425Sdim uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. 857286425Sdim uint16_t MinL = LD-FD+1; 858286425Sdim for (uint16_t L = MinL; L < W; ++L) { 859286425Sdim LRSMapType::iterator F = LM.find(L); 860286425Sdim if (F == LM.end()) 861286425Sdim continue; 862286425Sdim RSListType &LL = F->second; 863286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) { 864286425Sdim uint16_t S = LL[i].second; 865286425Sdim // MinL is the minimum length of the prefix. Any length above MinL 866286425Sdim // allows some flexibility as to where the prefix can start: 867286425Sdim // given the extra length EL=L-MinL, the prefix must start between 868286425Sdim // max(0,FD-EL) and FD. 869286425Sdim if (S > FD) // Starts too late. 870286425Sdim continue; 871286425Sdim uint16_t EL = L-MinL; 872286425Sdim uint16_t LowS = (EL < FD) ? FD-EL : 0; 873286425Sdim if (S < LowS) // Starts too early. 874286425Sdim continue; 875286425Sdim unsigned InsR = LL[i].first; 876286425Sdim if (!isValidInsertForm(VR, SrcR, InsR, L, S)) 877286425Sdim continue; 878286425Sdim if (isDebug()) { 879286425Sdim dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI) 880286425Sdim << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#" 881286425Sdim << S << ")\n"; 882286425Sdim } 883286425Sdim IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); 884286425Sdim IFMap[VR].push_back(RR); 885286425Sdim Recorded = true; 886286425Sdim } 887286425Sdim } 888286425Sdim } 889286425Sdim 890286425Sdim return Recorded; 891286425Sdim} 892286425Sdim 893286425Sdim 894286425Sdimvoid HexagonGenInsert::collectInBlock(MachineBasicBlock *B, 895286425Sdim OrderedRegisterList &AVs) { 896286425Sdim if (isDebug()) 897286425Sdim dbgs() << "visiting block BB#" << B->getNumber() << "\n"; 898286425Sdim 899286425Sdim // First, check if this block is reachable at all. If not, the bit tracker 900286425Sdim // will not have any information about registers in it. 901286425Sdim if (!CMS->BT.reached(B)) 902286425Sdim return; 903286425Sdim 904286425Sdim bool DoConst = OptConst; 905286425Sdim // Keep a separate set of registers defined in this block, so that we 906286425Sdim // can remove them from the list of available registers once all DT 907286425Sdim // successors have been processed. 908286425Sdim RegisterSet BlockDefs, InsDefs; 909286425Sdim for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { 910286425Sdim MachineInstr *MI = &*I; 911286425Sdim InsDefs.clear(); 912286425Sdim getInstrDefs(MI, InsDefs); 913286425Sdim // Leave those alone. They are more transparent than "insert". 914286425Sdim bool Skip = MI->isCopy() || MI->isRegSequence(); 915286425Sdim 916286425Sdim if (!Skip) { 917286425Sdim // Visit all defined registers, and attempt to find the corresponding 918286425Sdim // "insert" representations. 919286425Sdim for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { 920286425Sdim // Do not collect registers that are known to be compile-time cons- 921286425Sdim // tants, unless requested. 922286425Sdim if (!DoConst && isConstant(VR)) 923286425Sdim continue; 924286425Sdim // If VR's cell contains a reference to VR, then VR cannot be defined 925286425Sdim // via "insert". If VR is a constant that can be generated in a single 926286425Sdim // instruction (without constant extenders), generating it via insert 927286425Sdim // makes no sense. 928286425Sdim if (findSelfReference(VR) || isSmallConstant(VR)) 929286425Sdim continue; 930286425Sdim 931286425Sdim findRecordInsertForms(VR, AVs); 932286425Sdim } 933286425Sdim } 934286425Sdim 935286425Sdim // Insert the defined registers into the list of available registers 936286425Sdim // after they have been processed. 937286425Sdim for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) 938286425Sdim AVs.insert(VR); 939286425Sdim BlockDefs.insert(InsDefs); 940286425Sdim } 941286425Sdim 942286425Sdim MachineDomTreeNode *N = MDT->getNode(B); 943286425Sdim typedef GraphTraits<MachineDomTreeNode*> GTN; 944286425Sdim typedef GTN::ChildIteratorType ChildIter; 945286425Sdim for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { 946286425Sdim MachineBasicBlock *SB = (*I)->getBlock(); 947286425Sdim collectInBlock(SB, AVs); 948286425Sdim } 949286425Sdim 950286425Sdim for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) 951286425Sdim AVs.remove(VR); 952286425Sdim} 953286425Sdim 954286425Sdim 955286425Sdimvoid HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, 956286425Sdim RegisterSet &RMs) const { 957286425Sdim // For a given register VR and a insert form, find the registers that are 958286425Sdim // used by the current definition of VR, and which would no longer be 959286425Sdim // needed for it after the definition of VR is replaced with the insert 960286425Sdim // form. These are the registers that could potentially become dead. 961286425Sdim RegisterSet Regs[2]; 962286425Sdim 963286425Sdim unsigned S = 0; // Register set selector. 964286425Sdim Regs[S].insert(VR); 965286425Sdim 966286425Sdim while (!Regs[S].empty()) { 967286425Sdim // Breadth-first search. 968286425Sdim unsigned OtherS = 1-S; 969286425Sdim Regs[OtherS].clear(); 970286425Sdim for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { 971286425Sdim Regs[S].remove(R); 972286425Sdim if (R == IF.SrcR || R == IF.InsR) 973286425Sdim continue; 974286425Sdim // Check if a given register has bits that are references to any other 975286425Sdim // registers. This is to detect situations where the instruction that 976286425Sdim // defines register R takes register Q as an operand, but R itself does 977286425Sdim // not contain any bits from Q. Loads are examples of how this could 978286425Sdim // happen: 979286425Sdim // R = load Q 980286425Sdim // In this case (assuming we do not have any knowledge about the loaded 981286425Sdim // value), we must not treat R as a "conveyance" of the bits from Q. 982286425Sdim // (The information in BT about R's bits would have them as constants, 983286425Sdim // in case of zero-extending loads, or refs to R.) 984286425Sdim if (!findNonSelfReference(R)) 985286425Sdim continue; 986286425Sdim RMs.insert(R); 987286425Sdim const MachineInstr *DefI = MRI->getVRegDef(R); 988286425Sdim assert(DefI); 989286425Sdim // Do not iterate past PHI nodes to avoid infinite loops. This can 990286425Sdim // make the final set a bit less accurate, but the removable register 991286425Sdim // sets are an approximation anyway. 992286425Sdim if (DefI->isPHI()) 993286425Sdim continue; 994286425Sdim getInstrUses(DefI, Regs[OtherS]); 995286425Sdim } 996286425Sdim S = OtherS; 997286425Sdim } 998286425Sdim // The register VR is added to the list as a side-effect of the algorithm, 999286425Sdim // but it is not "potentially removable". A potentially removable register 1000286425Sdim // is one that may become unused (dead) after conversion to the insert form 1001286425Sdim // IF, and obviously VR (or its replacement) will not become dead by apply- 1002286425Sdim // ing IF. 1003286425Sdim RMs.remove(VR); 1004286425Sdim} 1005286425Sdim 1006286425Sdim 1007286425Sdimvoid HexagonGenInsert::computeRemovableRegisters() { 1008286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1009286425Sdim IFListType &LL = I->second; 1010286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 1011286425Sdim findRemovableRegisters(I->first, LL[i].first, LL[i].second); 1012286425Sdim } 1013286425Sdim} 1014286425Sdim 1015286425Sdim 1016286425Sdimvoid HexagonGenInsert::pruneEmptyLists() { 1017286425Sdim // Remove all entries from the map, where the register has no insert forms 1018286425Sdim // associated with it. 1019286425Sdim typedef SmallVector<IFMapType::iterator,16> IterListType; 1020286425Sdim IterListType Prune; 1021286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1022286425Sdim if (I->second.size() == 0) 1023286425Sdim Prune.push_back(I); 1024286425Sdim } 1025286425Sdim for (unsigned i = 0, n = Prune.size(); i < n; ++i) 1026286425Sdim IFMap.erase(Prune[i]); 1027286425Sdim} 1028286425Sdim 1029286425Sdim 1030286425Sdimvoid HexagonGenInsert::pruneCoveredSets(unsigned VR) { 1031286425Sdim IFMapType::iterator F = IFMap.find(VR); 1032286425Sdim assert(F != IFMap.end()); 1033286425Sdim IFListType &LL = F->second; 1034286425Sdim 1035286425Sdim // First, examine the IF candidates for register VR whose removable-regis- 1036286425Sdim // ter sets are empty. This means that a given candidate will not help eli- 1037286425Sdim // minate any registers, but since "insert" is not a constant-extendable 1038286425Sdim // instruction, using such a candidate may reduce code size if the defini- 1039286425Sdim // tion of VR is constant-extended. 1040286425Sdim // If there exists a candidate with a non-empty set, the ones with empty 1041286425Sdim // sets will not be used and can be removed. 1042286425Sdim MachineInstr *DefVR = MRI->getVRegDef(VR); 1043286425Sdim bool DefEx = HII->isConstExtended(DefVR); 1044286425Sdim bool HasNE = false; 1045286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) { 1046286425Sdim if (LL[i].second.empty()) 1047286425Sdim continue; 1048286425Sdim HasNE = true; 1049286425Sdim break; 1050286425Sdim } 1051286425Sdim if (!DefEx || HasNE) { 1052286425Sdim // The definition of VR is not constant-extended, or there is a candidate 1053286425Sdim // with a non-empty set. Remove all candidates with empty sets. 1054286425Sdim auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { 1055286425Sdim return IR.second.empty(); 1056286425Sdim }; 1057286425Sdim auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty); 1058286425Sdim if (End != LL.end()) 1059286425Sdim LL.erase(End, LL.end()); 1060286425Sdim } else { 1061286425Sdim // The definition of VR is constant-extended, and all candidates have 1062286425Sdim // empty removable-register sets. Pick the maximum candidate, and remove 1063286425Sdim // all others. The "maximum" does not have any special meaning here, it 1064286425Sdim // is only so that the candidate that will remain on the list is selec- 1065286425Sdim // ted deterministically. 1066286425Sdim IFRecord MaxIF = LL[0].first; 1067286425Sdim for (unsigned i = 1, n = LL.size(); i < n; ++i) { 1068286425Sdim // If LL[MaxI] < LL[i], then MaxI = i. 1069286425Sdim const IFRecord &IF = LL[i].first; 1070286425Sdim unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; 1071286425Sdim unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; 1072286425Sdim if (M0 > R0) 1073286425Sdim continue; 1074286425Sdim if (M0 == R0) { 1075286425Sdim if (M1 > R1) 1076286425Sdim continue; 1077286425Sdim if (M1 == R1) { 1078286425Sdim if (MaxIF.Wdh > IF.Wdh) 1079286425Sdim continue; 1080286425Sdim if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) 1081286425Sdim continue; 1082286425Sdim } 1083286425Sdim } 1084286425Sdim // MaxIF < IF. 1085286425Sdim MaxIF = IF; 1086286425Sdim } 1087286425Sdim // Remove everything except the maximum candidate. All register sets 1088286425Sdim // are empty, so no need to preserve anything. 1089286425Sdim LL.clear(); 1090286425Sdim LL.push_back(std::make_pair(MaxIF, RegisterSet())); 1091286425Sdim } 1092286425Sdim 1093286425Sdim // Now, remove those whose sets of potentially removable registers are 1094286425Sdim // contained in another IF candidate for VR. For example, given these 1095286425Sdim // candidates for vreg45, 1096286425Sdim // %vreg45: 1097286425Sdim // (%vreg44,%vreg41,#9,#8), { %vreg42 } 1098286425Sdim // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 } 1099286425Sdim // remove the first one, since it is contained in the second one. 1100286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ) { 1101286425Sdim const RegisterSet &RMi = LL[i].second; 1102286425Sdim unsigned j = 0; 1103286425Sdim while (j < n) { 1104286425Sdim if (j != i && LL[j].second.includes(RMi)) 1105286425Sdim break; 1106286425Sdim j++; 1107286425Sdim } 1108286425Sdim if (j == n) { // RMi not contained in anything else. 1109286425Sdim i++; 1110286425Sdim continue; 1111286425Sdim } 1112286425Sdim LL.erase(LL.begin()+i); 1113286425Sdim n = LL.size(); 1114286425Sdim } 1115286425Sdim} 1116286425Sdim 1117286425Sdim 1118286425Sdimvoid HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, 1119286425Sdim PairMapType &M) { 1120286425Sdim IFMapType::iterator F = IFMap.find(VR); 1121286425Sdim assert(F != IFMap.end()); 1122286425Sdim IFListType &LL = F->second; 1123286425Sdim unsigned Cutoff = VRegDistCutoff; 1124286425Sdim const MachineInstr *DefV = MRI->getVRegDef(VR); 1125286425Sdim 1126286425Sdim for (unsigned i = LL.size(); i > 0; --i) { 1127286425Sdim unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; 1128286425Sdim const MachineInstr *DefS = MRI->getVRegDef(SR); 1129286425Sdim const MachineInstr *DefI = MRI->getVRegDef(IR); 1130286425Sdim unsigned DSV = distance(DefS, DefV, RPO, M); 1131286425Sdim if (DSV < Cutoff) { 1132286425Sdim unsigned DIV = distance(DefI, DefV, RPO, M); 1133286425Sdim if (DIV < Cutoff) 1134286425Sdim continue; 1135286425Sdim } 1136286425Sdim LL.erase(LL.begin()+(i-1)); 1137286425Sdim } 1138286425Sdim} 1139286425Sdim 1140286425Sdim 1141286425Sdimvoid HexagonGenInsert::pruneRegCopies(unsigned VR) { 1142286425Sdim IFMapType::iterator F = IFMap.find(VR); 1143286425Sdim assert(F != IFMap.end()); 1144286425Sdim IFListType &LL = F->second; 1145286425Sdim 1146286425Sdim auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { 1147286425Sdim return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); 1148286425Sdim }; 1149286425Sdim auto End = std::remove_if(LL.begin(), LL.end(), IsCopy); 1150286425Sdim if (End != LL.end()) 1151286425Sdim LL.erase(End, LL.end()); 1152286425Sdim} 1153286425Sdim 1154286425Sdim 1155286425Sdimvoid HexagonGenInsert::pruneCandidates() { 1156286425Sdim // Remove candidates that are not beneficial, regardless of the final 1157286425Sdim // selection method. 1158286425Sdim // First, remove candidates whose potentially removable set is a subset 1159286425Sdim // of another candidate's set. 1160286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1161286425Sdim pruneCoveredSets(I->first); 1162286425Sdim 1163286425Sdim UnsignedMap RPO; 1164286425Sdim typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; 1165286425Sdim RPOTType RPOT(MFN); 1166286425Sdim unsigned RPON = 0; 1167286425Sdim for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 1168286425Sdim RPO[(*I)->getNumber()] = RPON++; 1169286425Sdim 1170286425Sdim PairMapType Memo; // Memoization map for distance calculation. 1171286425Sdim // Remove candidates that would use registers defined too far away. 1172286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1173286425Sdim pruneUsesTooFar(I->first, RPO, Memo); 1174286425Sdim 1175286425Sdim pruneEmptyLists(); 1176286425Sdim 1177286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1178286425Sdim pruneRegCopies(I->first); 1179286425Sdim} 1180286425Sdim 1181286425Sdim 1182286425Sdimnamespace { 1183286425Sdim // Class for comparing IF candidates for registers that have multiple of 1184286425Sdim // them. The smaller the candidate, according to this ordering, the better. 1185286425Sdim // First, compare the number of zeros in the associated potentially remova- 1186286425Sdim // ble register sets. "Zero" indicates that the register is very likely to 1187286425Sdim // become dead after this transformation. 1188286425Sdim // Second, compare "averages", i.e. use-count per size. The lower wins. 1189286425Sdim // After that, it does not really matter which one is smaller. Resolve 1190286425Sdim // the tie in some deterministic way. 1191286425Sdim struct IFOrdering { 1192286425Sdim IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) 1193286425Sdim : UseC(UC), BaseOrd(BO) {} 1194286425Sdim bool operator() (const IFRecordWithRegSet &A, 1195286425Sdim const IFRecordWithRegSet &B) const; 1196286425Sdim private: 1197286425Sdim void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1198286425Sdim unsigned &Sum) const; 1199286425Sdim const UnsignedMap &UseC; 1200286425Sdim const RegisterOrdering &BaseOrd; 1201286425Sdim }; 1202286425Sdim} 1203286425Sdim 1204286425Sdim 1205286425Sdimbool IFOrdering::operator() (const IFRecordWithRegSet &A, 1206286425Sdim const IFRecordWithRegSet &B) const { 1207286425Sdim unsigned SizeA = 0, ZeroA = 0, SumA = 0; 1208286425Sdim unsigned SizeB = 0, ZeroB = 0, SumB = 0; 1209286425Sdim stats(A.second, SizeA, ZeroA, SumA); 1210286425Sdim stats(B.second, SizeB, ZeroB, SumB); 1211286425Sdim 1212286425Sdim // We will pick the minimum element. The more zeros, the better. 1213286425Sdim if (ZeroA != ZeroB) 1214286425Sdim return ZeroA > ZeroB; 1215286425Sdim // Compare SumA/SizeA with SumB/SizeB, lower is better. 1216286425Sdim uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; 1217286425Sdim if (AvgA != AvgB) 1218286425Sdim return AvgA < AvgB; 1219286425Sdim 1220286425Sdim // The sets compare identical so far. Resort to comparing the IF records. 1221286425Sdim // The actual values don't matter, this is only for determinism. 1222286425Sdim unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; 1223286425Sdim if (OSA != OSB) 1224286425Sdim return OSA < OSB; 1225286425Sdim unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; 1226286425Sdim if (OIA != OIB) 1227286425Sdim return OIA < OIB; 1228286425Sdim if (A.first.Wdh != B.first.Wdh) 1229286425Sdim return A.first.Wdh < B.first.Wdh; 1230286425Sdim return A.first.Off < B.first.Off; 1231286425Sdim} 1232286425Sdim 1233286425Sdim 1234286425Sdimvoid IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1235286425Sdim unsigned &Sum) const { 1236286425Sdim for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { 1237286425Sdim UnsignedMap::const_iterator F = UseC.find(R); 1238286425Sdim assert(F != UseC.end()); 1239286425Sdim unsigned UC = F->second; 1240286425Sdim if (UC == 0) 1241286425Sdim Zero++; 1242286425Sdim Sum += UC; 1243286425Sdim Size++; 1244286425Sdim } 1245286425Sdim} 1246286425Sdim 1247286425Sdim 1248286425Sdimvoid HexagonGenInsert::selectCandidates() { 1249286425Sdim // Some registers may have multiple valid candidates. Pick the best one 1250286425Sdim // (or decide not to use any). 1251286425Sdim 1252286425Sdim // Compute the "removability" measure of R: 1253286425Sdim // For each potentially removable register R, record the number of regis- 1254286425Sdim // ters with IF candidates, where R appears in at least one set. 1255286425Sdim RegisterSet AllRMs; 1256286425Sdim UnsignedMap UseC, RemC; 1257286425Sdim IFMapType::iterator End = IFMap.end(); 1258286425Sdim 1259286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1260286425Sdim const IFListType &LL = I->second; 1261286425Sdim RegisterSet TT; 1262286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 1263286425Sdim TT.insert(LL[i].second); 1264286425Sdim for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) 1265286425Sdim RemC[R]++; 1266286425Sdim AllRMs.insert(TT); 1267286425Sdim } 1268286425Sdim 1269286425Sdim for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { 1270286425Sdim typedef MachineRegisterInfo::use_nodbg_iterator use_iterator; 1271286425Sdim typedef SmallSet<const MachineInstr*,16> InstrSet; 1272286425Sdim InstrSet UIs; 1273286425Sdim // Count as the number of instructions in which R is used, not the 1274286425Sdim // number of operands. 1275286425Sdim use_iterator E = MRI->use_nodbg_end(); 1276286425Sdim for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) 1277286425Sdim UIs.insert(I->getParent()); 1278286425Sdim unsigned C = UIs.size(); 1279286425Sdim // Calculate a measure, which is the number of instructions using R, 1280286425Sdim // minus the "removability" count computed earlier. 1281286425Sdim unsigned D = RemC[R]; 1282286425Sdim UseC[R] = (C > D) ? C-D : 0; // doz 1283286425Sdim } 1284286425Sdim 1285286425Sdim 1286286425Sdim bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; 1287286425Sdim if (!SelectAll0 && !SelectHas0) 1288286425Sdim SelectAll0 = true; 1289286425Sdim 1290286425Sdim // The smaller the number UseC for a given register R, the "less used" 1291286425Sdim // R is aside from the opportunities for removal offered by generating 1292286425Sdim // "insert" instructions. 1293286425Sdim // Iterate over the IF map, and for those registers that have multiple 1294286425Sdim // candidates, pick the minimum one according to IFOrdering. 1295286425Sdim IFOrdering IFO(UseC, BaseOrd); 1296286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1297286425Sdim IFListType &LL = I->second; 1298286425Sdim if (LL.empty()) 1299286425Sdim continue; 1300286425Sdim // Get the minimum element, remember it and clear the list. If the 1301286425Sdim // element found is adequate, we will put it back on the list, other- 1302286425Sdim // wise the list will remain empty, and the entry for this register 1303286425Sdim // will be removed (i.e. this register will not be replaced by insert). 1304286425Sdim IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); 1305286425Sdim assert(MinI != LL.end()); 1306286425Sdim IFRecordWithRegSet M = *MinI; 1307286425Sdim LL.clear(); 1308286425Sdim 1309286425Sdim // We want to make sure that this replacement will have a chance to be 1310286425Sdim // beneficial, and that means that we want to have indication that some 1311286425Sdim // register will be removed. The most likely registers to be eliminated 1312286425Sdim // are the use operands in the definition of I->first. Accept/reject a 1313286425Sdim // candidate based on how many of its uses it can potentially eliminate. 1314286425Sdim 1315286425Sdim RegisterSet Us; 1316286425Sdim const MachineInstr *DefI = MRI->getVRegDef(I->first); 1317286425Sdim getInstrUses(DefI, Us); 1318286425Sdim bool Accept = false; 1319286425Sdim 1320286425Sdim if (SelectAll0) { 1321286425Sdim bool All0 = true; 1322286425Sdim for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1323286425Sdim if (UseC[R] == 0) 1324286425Sdim continue; 1325286425Sdim All0 = false; 1326286425Sdim break; 1327286425Sdim } 1328286425Sdim Accept = All0; 1329286425Sdim } else if (SelectHas0) { 1330286425Sdim bool Has0 = false; 1331286425Sdim for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1332286425Sdim if (UseC[R] != 0) 1333286425Sdim continue; 1334286425Sdim Has0 = true; 1335286425Sdim break; 1336286425Sdim } 1337286425Sdim Accept = Has0; 1338286425Sdim } 1339286425Sdim if (Accept) 1340286425Sdim LL.push_back(M); 1341286425Sdim } 1342286425Sdim 1343286425Sdim // Remove candidates that add uses of removable registers, unless the 1344286425Sdim // removable registers are among replacement candidates. 1345286425Sdim // Recompute the removable registers, since some candidates may have 1346286425Sdim // been eliminated. 1347286425Sdim AllRMs.clear(); 1348286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1349286425Sdim const IFListType &LL = I->second; 1350286425Sdim if (LL.size() > 0) 1351286425Sdim AllRMs.insert(LL[0].second); 1352286425Sdim } 1353286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1354286425Sdim IFListType &LL = I->second; 1355286425Sdim if (LL.size() == 0) 1356286425Sdim continue; 1357286425Sdim unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; 1358286425Sdim if (AllRMs[SR] || AllRMs[IR]) 1359286425Sdim LL.clear(); 1360286425Sdim } 1361286425Sdim 1362286425Sdim pruneEmptyLists(); 1363286425Sdim} 1364286425Sdim 1365286425Sdim 1366286425Sdimbool HexagonGenInsert::generateInserts() { 1367286425Sdim // Create a new register for each one from IFMap, and store them in the 1368286425Sdim // map. 1369286425Sdim UnsignedMap RegMap; 1370286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1371286425Sdim unsigned VR = I->first; 1372286425Sdim const TargetRegisterClass *RC = MRI->getRegClass(VR); 1373286425Sdim unsigned NewVR = MRI->createVirtualRegister(RC); 1374286425Sdim RegMap[VR] = NewVR; 1375286425Sdim } 1376286425Sdim 1377286425Sdim // We can generate the "insert" instructions using potentially stale re- 1378286425Sdim // gisters: SrcR and InsR for a given VR may be among other registers that 1379286425Sdim // are also replaced. This is fine, we will do the mass "rauw" a bit later. 1380286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1381286425Sdim MachineInstr *MI = MRI->getVRegDef(I->first); 1382286425Sdim MachineBasicBlock &B = *MI->getParent(); 1383286425Sdim DebugLoc DL = MI->getDebugLoc(); 1384286425Sdim unsigned NewR = RegMap[I->first]; 1385286425Sdim bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; 1386286425Sdim const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) 1387286425Sdim : HII->get(Hexagon::S2_insertp); 1388286425Sdim IFRecord IF = I->second[0].first; 1389286425Sdim unsigned Wdh = IF.Wdh, Off = IF.Off; 1390286425Sdim unsigned InsS = 0; 1391286425Sdim if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { 1392286425Sdim InsS = Hexagon::subreg_loreg; 1393286425Sdim if (Off >= 32) { 1394286425Sdim InsS = Hexagon::subreg_hireg; 1395286425Sdim Off -= 32; 1396286425Sdim } 1397286425Sdim } 1398286425Sdim // Advance to the proper location for inserting instructions. This could 1399286425Sdim // be B.end(). 1400286425Sdim MachineBasicBlock::iterator At = MI; 1401286425Sdim if (MI->isPHI()) 1402286425Sdim At = B.getFirstNonPHI(); 1403286425Sdim 1404286425Sdim BuildMI(B, At, DL, D, NewR) 1405286425Sdim .addReg(IF.SrcR) 1406286425Sdim .addReg(IF.InsR, 0, InsS) 1407286425Sdim .addImm(Wdh) 1408286425Sdim .addImm(Off); 1409286425Sdim 1410286425Sdim MRI->clearKillFlags(IF.SrcR); 1411286425Sdim MRI->clearKillFlags(IF.InsR); 1412286425Sdim } 1413286425Sdim 1414286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1415286425Sdim MachineInstr *DefI = MRI->getVRegDef(I->first); 1416286425Sdim MRI->replaceRegWith(I->first, RegMap[I->first]); 1417286425Sdim DefI->eraseFromParent(); 1418286425Sdim } 1419286425Sdim 1420286425Sdim return true; 1421286425Sdim} 1422286425Sdim 1423286425Sdim 1424286425Sdimbool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { 1425286425Sdim bool Changed = false; 1426286425Sdim typedef GraphTraits<MachineDomTreeNode*> GTN; 1427286425Sdim for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) 1428286425Sdim Changed |= removeDeadCode(*I); 1429286425Sdim 1430286425Sdim MachineBasicBlock *B = N->getBlock(); 1431286425Sdim std::vector<MachineInstr*> Instrs; 1432286425Sdim for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) 1433286425Sdim Instrs.push_back(&*I); 1434286425Sdim 1435286425Sdim for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { 1436286425Sdim MachineInstr *MI = *I; 1437286425Sdim unsigned Opc = MI->getOpcode(); 1438286425Sdim // Do not touch lifetime markers. This is why the target-independent DCE 1439286425Sdim // cannot be used. 1440286425Sdim if (Opc == TargetOpcode::LIFETIME_START || 1441286425Sdim Opc == TargetOpcode::LIFETIME_END) 1442286425Sdim continue; 1443286425Sdim bool Store = false; 1444286425Sdim if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) 1445286425Sdim continue; 1446286425Sdim 1447286425Sdim bool AllDead = true; 1448286425Sdim SmallVector<unsigned,2> Regs; 1449286425Sdim for (ConstMIOperands Op(MI); Op.isValid(); ++Op) { 1450286425Sdim if (!Op->isReg() || !Op->isDef()) 1451286425Sdim continue; 1452286425Sdim unsigned R = Op->getReg(); 1453286425Sdim if (!TargetRegisterInfo::isVirtualRegister(R) || 1454286425Sdim !MRI->use_nodbg_empty(R)) { 1455286425Sdim AllDead = false; 1456286425Sdim break; 1457286425Sdim } 1458286425Sdim Regs.push_back(R); 1459286425Sdim } 1460286425Sdim if (!AllDead) 1461286425Sdim continue; 1462286425Sdim 1463286425Sdim B->erase(MI); 1464286425Sdim for (unsigned I = 0, N = Regs.size(); I != N; ++I) 1465286425Sdim MRI->markUsesInDebugValueAsUndef(Regs[I]); 1466286425Sdim Changed = true; 1467286425Sdim } 1468286425Sdim 1469286425Sdim return Changed; 1470286425Sdim} 1471286425Sdim 1472286425Sdim 1473286425Sdimbool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { 1474286425Sdim bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; 1475286425Sdim bool Changed = false; 1476286425Sdim TimerGroup __G("hexinsert"); 1477286425Sdim NamedRegionTimer __T("hexinsert", Timing && !TimingDetail); 1478286425Sdim 1479286425Sdim // Sanity check: one, but not both. 1480286425Sdim assert(!OptSelectAll0 || !OptSelectHas0); 1481286425Sdim 1482286425Sdim IFMap.clear(); 1483286425Sdim BaseOrd.clear(); 1484286425Sdim CellOrd.clear(); 1485286425Sdim 1486286425Sdim const auto &ST = MF.getSubtarget<HexagonSubtarget>(); 1487286425Sdim HII = ST.getInstrInfo(); 1488286425Sdim HRI = ST.getRegisterInfo(); 1489286425Sdim MFN = &MF; 1490286425Sdim MRI = &MF.getRegInfo(); 1491286425Sdim MDT = &getAnalysis<MachineDominatorTree>(); 1492286425Sdim 1493286425Sdim // Clean up before any further processing, so that dead code does not 1494286425Sdim // get used in a newly generated "insert" instruction. Have a custom 1495286425Sdim // version of DCE that preserves lifetime markers. Without it, merging 1496286425Sdim // of stack objects can fail to recognize and merge disjoint objects 1497286425Sdim // leading to unnecessary stack growth. 1498296417Sdim Changed = removeDeadCode(MDT->getRootNode()); 1499286425Sdim 1500286425Sdim const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 1501286425Sdim BitTracker BTLoc(HE, MF); 1502286425Sdim BTLoc.trace(isDebug()); 1503286425Sdim BTLoc.run(); 1504286425Sdim CellMapShadow MS(BTLoc); 1505286425Sdim CMS = &MS; 1506286425Sdim 1507286425Sdim buildOrderingMF(BaseOrd); 1508286425Sdim buildOrderingBT(BaseOrd, CellOrd); 1509286425Sdim 1510286425Sdim if (isDebug()) { 1511286425Sdim dbgs() << "Cell ordering:\n"; 1512286425Sdim for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); 1513286425Sdim I != E; ++I) { 1514286425Sdim unsigned VR = I->first, Pos = I->second; 1515286425Sdim dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n"; 1516286425Sdim } 1517286425Sdim } 1518286425Sdim 1519286425Sdim // Collect candidates for conversion into the insert forms. 1520286425Sdim MachineBasicBlock *RootB = MDT->getRoot(); 1521286425Sdim OrderedRegisterList AvailR(CellOrd); 1522286425Sdim 1523286425Sdim { 1524286425Sdim NamedRegionTimer _T("collection", "hexinsert", TimingDetail); 1525286425Sdim collectInBlock(RootB, AvailR); 1526286425Sdim // Complete the information gathered in IFMap. 1527286425Sdim computeRemovableRegisters(); 1528286425Sdim } 1529286425Sdim 1530286425Sdim if (isDebug()) { 1531286425Sdim dbgs() << "Candidates after collection:\n"; 1532286425Sdim dump_map(); 1533286425Sdim } 1534286425Sdim 1535286425Sdim if (IFMap.empty()) 1536296417Sdim return Changed; 1537286425Sdim 1538286425Sdim { 1539286425Sdim NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); 1540286425Sdim pruneCandidates(); 1541286425Sdim } 1542286425Sdim 1543286425Sdim if (isDebug()) { 1544286425Sdim dbgs() << "Candidates after pruning:\n"; 1545286425Sdim dump_map(); 1546286425Sdim } 1547286425Sdim 1548286425Sdim if (IFMap.empty()) 1549296417Sdim return Changed; 1550286425Sdim 1551286425Sdim { 1552286425Sdim NamedRegionTimer _T("selection", "hexinsert", TimingDetail); 1553286425Sdim selectCandidates(); 1554286425Sdim } 1555286425Sdim 1556286425Sdim if (isDebug()) { 1557286425Sdim dbgs() << "Candidates after selection:\n"; 1558286425Sdim dump_map(); 1559286425Sdim } 1560286425Sdim 1561286425Sdim // Filter out vregs beyond the cutoff. 1562286425Sdim if (VRegIndexCutoff.getPosition()) { 1563286425Sdim unsigned Cutoff = VRegIndexCutoff; 1564286425Sdim typedef SmallVector<IFMapType::iterator,16> IterListType; 1565286425Sdim IterListType Out; 1566286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1567286425Sdim unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); 1568286425Sdim if (Idx >= Cutoff) 1569286425Sdim Out.push_back(I); 1570286425Sdim } 1571286425Sdim for (unsigned i = 0, n = Out.size(); i < n; ++i) 1572286425Sdim IFMap.erase(Out[i]); 1573286425Sdim } 1574296417Sdim if (IFMap.empty()) 1575296417Sdim return Changed; 1576286425Sdim 1577286425Sdim { 1578286425Sdim NamedRegionTimer _T("generation", "hexinsert", TimingDetail); 1579296417Sdim generateInserts(); 1580286425Sdim } 1581286425Sdim 1582296417Sdim return true; 1583286425Sdim} 1584286425Sdim 1585286425Sdim 1586286425SdimFunctionPass *llvm::createHexagonGenInsert() { 1587286425Sdim return new HexagonGenInsert(); 1588286425Sdim} 1589286425Sdim 1590286425Sdim 1591286425Sdim//===----------------------------------------------------------------------===// 1592286425Sdim// Public Constructor Functions 1593286425Sdim//===----------------------------------------------------------------------===// 1594286425Sdim 1595286425SdimINITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", 1596286425Sdim "Hexagon generate \"insert\" instructions", false, false) 1597286425SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1598286425SdimINITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", 1599286425Sdim "Hexagon generate \"insert\" instructions", false, false) 1600