HexagonGenInsert.cpp revision 353358
1327952Sdim//===- HexagonGenInsert.cpp -----------------------------------------------===// 2286425Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6286425Sdim// 7286425Sdim//===----------------------------------------------------------------------===// 8286425Sdim 9314564Sdim#include "BitTracker.h" 10314564Sdim#include "HexagonBitTracker.h" 11314564Sdim#include "HexagonInstrInfo.h" 12314564Sdim#include "HexagonRegisterInfo.h" 13314564Sdim#include "HexagonSubtarget.h" 14286425Sdim#include "llvm/ADT/BitVector.h" 15286425Sdim#include "llvm/ADT/DenseMap.h" 16327952Sdim#include "llvm/ADT/GraphTraits.h" 17286425Sdim#include "llvm/ADT/PostOrderIterator.h" 18321369Sdim#include "llvm/ADT/STLExtras.h" 19314564Sdim#include "llvm/ADT/SmallSet.h" 20314564Sdim#include "llvm/ADT/SmallVector.h" 21314564Sdim#include "llvm/ADT/StringRef.h" 22314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 23286425Sdim#include "llvm/CodeGen/MachineDominators.h" 24286425Sdim#include "llvm/CodeGen/MachineFunction.h" 25286425Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 26314564Sdim#include "llvm/CodeGen/MachineInstr.h" 27286425Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 28314564Sdim#include "llvm/CodeGen/MachineOperand.h" 29286425Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 30327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 31314564Sdim#include "llvm/IR/DebugLoc.h" 32309124Sdim#include "llvm/Pass.h" 33286425Sdim#include "llvm/Support/CommandLine.h" 34286425Sdim#include "llvm/Support/Debug.h" 35314564Sdim#include "llvm/Support/MathExtras.h" 36321369Sdim#include "llvm/Support/Timer.h" 37314564Sdim#include "llvm/Support/raw_ostream.h" 38314564Sdim#include <algorithm> 39314564Sdim#include <cassert> 40314564Sdim#include <cstdint> 41314564Sdim#include <iterator> 42314564Sdim#include <utility> 43286425Sdim#include <vector> 44286425Sdim 45321369Sdim#define DEBUG_TYPE "hexinsert" 46321369Sdim 47286425Sdimusing namespace llvm; 48286425Sdim 49286425Sdimstatic cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), 50286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); 51286425Sdim// The distance cutoff is selected based on the precheckin-perf results: 52286425Sdim// cutoffs 20, 25, 35, and 40 are worse than 30. 53286425Sdimstatic cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), 54286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " 55286425Sdim "generation.")); 56286425Sdim 57341825Sdim// Limit the container sizes for extreme cases where we run out of memory. 58341825Sdimstatic cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096), 59341825Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList")); 60341825Sdimstatic cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024), 61341825Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap")); 62341825Sdim 63286425Sdimstatic cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, 64286425Sdim cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); 65286425Sdimstatic cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), 66286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " 67286425Sdim "generation")); 68286425Sdim 69286425Sdimstatic cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, 70286425Sdim cl::ZeroOrMore); 71286425Sdimstatic cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, 72286425Sdim cl::ZeroOrMore); 73286425Sdim// Whether to construct constant values via "insert". Could eliminate constant 74286425Sdim// extenders, but often not practical. 75286425Sdimstatic cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, 76286425Sdim cl::ZeroOrMore); 77286425Sdim 78314564Sdim// The preprocessor gets confused when the DEBUG macro is passed larger 79314564Sdim// chunks of code. Use this function to detect debugging. 80314564Sdiminline static bool isDebug() { 81286425Sdim#ifndef NDEBUG 82314564Sdim return DebugFlag && isCurrentDebugType(DEBUG_TYPE); 83286425Sdim#else 84314564Sdim return false; 85286425Sdim#endif 86286425Sdim} 87286425Sdim 88314564Sdimnamespace { 89286425Sdim 90286425Sdim // Set of virtual registers, based on BitVector. 91286425Sdim struct RegisterSet : private BitVector { 92296417Sdim RegisterSet() = default; 93286425Sdim explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 94341825Sdim RegisterSet(const RegisterSet &RS) : BitVector(RS) {} 95286425Sdim 96286425Sdim using BitVector::clear; 97286425Sdim 98286425Sdim unsigned find_first() const { 99286425Sdim int First = BitVector::find_first(); 100286425Sdim if (First < 0) 101286425Sdim return 0; 102286425Sdim return x2v(First); 103286425Sdim } 104286425Sdim 105286425Sdim unsigned find_next(unsigned Prev) const { 106286425Sdim int Next = BitVector::find_next(v2x(Prev)); 107286425Sdim if (Next < 0) 108286425Sdim return 0; 109286425Sdim return x2v(Next); 110286425Sdim } 111286425Sdim 112286425Sdim RegisterSet &insert(unsigned R) { 113286425Sdim unsigned Idx = v2x(R); 114286425Sdim ensure(Idx); 115286425Sdim return static_cast<RegisterSet&>(BitVector::set(Idx)); 116286425Sdim } 117286425Sdim RegisterSet &remove(unsigned R) { 118286425Sdim unsigned Idx = v2x(R); 119286425Sdim if (Idx >= size()) 120286425Sdim return *this; 121286425Sdim return static_cast<RegisterSet&>(BitVector::reset(Idx)); 122286425Sdim } 123286425Sdim 124286425Sdim RegisterSet &insert(const RegisterSet &Rs) { 125286425Sdim return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 126286425Sdim } 127286425Sdim RegisterSet &remove(const RegisterSet &Rs) { 128286425Sdim return static_cast<RegisterSet&>(BitVector::reset(Rs)); 129286425Sdim } 130286425Sdim 131286425Sdim reference operator[](unsigned R) { 132286425Sdim unsigned Idx = v2x(R); 133286425Sdim ensure(Idx); 134286425Sdim return BitVector::operator[](Idx); 135286425Sdim } 136286425Sdim bool operator[](unsigned R) const { 137286425Sdim unsigned Idx = v2x(R); 138286425Sdim assert(Idx < size()); 139286425Sdim return BitVector::operator[](Idx); 140286425Sdim } 141286425Sdim bool has(unsigned R) const { 142286425Sdim unsigned Idx = v2x(R); 143286425Sdim if (Idx >= size()) 144286425Sdim return false; 145286425Sdim return BitVector::test(Idx); 146286425Sdim } 147286425Sdim 148286425Sdim bool empty() const { 149286425Sdim return !BitVector::any(); 150286425Sdim } 151286425Sdim bool includes(const RegisterSet &Rs) const { 152286425Sdim // A.BitVector::test(B) <=> A-B != {} 153286425Sdim return !Rs.BitVector::test(*this); 154286425Sdim } 155286425Sdim bool intersects(const RegisterSet &Rs) const { 156286425Sdim return BitVector::anyCommon(Rs); 157286425Sdim } 158286425Sdim 159286425Sdim private: 160286425Sdim void ensure(unsigned Idx) { 161286425Sdim if (size() <= Idx) 162286425Sdim resize(std::max(Idx+1, 32U)); 163286425Sdim } 164314564Sdim 165286425Sdim static inline unsigned v2x(unsigned v) { 166286425Sdim return TargetRegisterInfo::virtReg2Index(v); 167286425Sdim } 168314564Sdim 169286425Sdim static inline unsigned x2v(unsigned x) { 170286425Sdim return TargetRegisterInfo::index2VirtReg(x); 171286425Sdim } 172286425Sdim }; 173286425Sdim 174286425Sdim struct PrintRegSet { 175286425Sdim PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 176286425Sdim : RS(S), TRI(RI) {} 177314564Sdim 178286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, 179286425Sdim const PrintRegSet &P); 180314564Sdim 181286425Sdim private: 182286425Sdim const RegisterSet &RS; 183286425Sdim const TargetRegisterInfo *TRI; 184286425Sdim }; 185286425Sdim 186286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 187286425Sdim OS << '{'; 188286425Sdim for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 189327952Sdim OS << ' ' << printReg(R, P.TRI); 190286425Sdim OS << " }"; 191286425Sdim return OS; 192286425Sdim } 193286425Sdim 194286425Sdim // A convenience class to associate unsigned numbers (such as virtual 195286425Sdim // registers) with unsigned numbers. 196286425Sdim struct UnsignedMap : public DenseMap<unsigned,unsigned> { 197314564Sdim UnsignedMap() = default; 198314564Sdim 199286425Sdim private: 200327952Sdim using BaseType = DenseMap<unsigned, unsigned>; 201286425Sdim }; 202286425Sdim 203286425Sdim // A utility to establish an ordering between virtual registers: 204286425Sdim // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] 205286425Sdim // This is meant as a cache for the ordering of virtual registers defined 206286425Sdim // by a potentially expensive comparison function, or obtained by a proce- 207286425Sdim // dure that should not be repeated each time two registers are compared. 208286425Sdim struct RegisterOrdering : public UnsignedMap { 209314564Sdim RegisterOrdering() = default; 210314564Sdim 211286425Sdim unsigned operator[](unsigned VR) const { 212286425Sdim const_iterator F = find(VR); 213286425Sdim assert(F != end()); 214286425Sdim return F->second; 215286425Sdim } 216314564Sdim 217286425Sdim // Add operator(), so that objects of this class can be used as 218286425Sdim // comparators in std::sort et al. 219286425Sdim bool operator() (unsigned VR1, unsigned VR2) const { 220286425Sdim return operator[](VR1) < operator[](VR2); 221286425Sdim } 222286425Sdim }; 223286425Sdim 224286425Sdim // Ordering of bit values. This class does not have operator[], but 225286425Sdim // is supplies a comparison operator() for use in std:: algorithms. 226286425Sdim // The order is as follows: 227286425Sdim // - 0 < 1 < ref 228286425Sdim // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), 229286425Sdim // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. 230286425Sdim struct BitValueOrdering { 231286425Sdim BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} 232314564Sdim 233286425Sdim bool operator() (const BitTracker::BitValue &V1, 234286425Sdim const BitTracker::BitValue &V2) const; 235314564Sdim 236286425Sdim const RegisterOrdering &BaseOrd; 237286425Sdim }; 238286425Sdim 239314564Sdim} // end anonymous namespace 240286425Sdim 241286425Sdimbool BitValueOrdering::operator() (const BitTracker::BitValue &V1, 242286425Sdim const BitTracker::BitValue &V2) const { 243286425Sdim if (V1 == V2) 244286425Sdim return false; 245286425Sdim // V1==0 => true, V2==0 => false 246286425Sdim if (V1.is(0) || V2.is(0)) 247286425Sdim return V1.is(0); 248286425Sdim // Neither of V1,V2 is 0, and V1!=V2. 249286425Sdim // V2==1 => false, V1==1 => true 250286425Sdim if (V2.is(1) || V1.is(1)) 251286425Sdim return !V2.is(1); 252286425Sdim // Both V1,V2 are refs. 253286425Sdim unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; 254286425Sdim if (Ind1 != Ind2) 255286425Sdim return Ind1 < Ind2; 256286425Sdim // If V1.Pos==V2.Pos 257286425Sdim assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); 258286425Sdim return V1.RefI.Pos < V2.RefI.Pos; 259286425Sdim} 260286425Sdim 261314564Sdimnamespace { 262286425Sdim 263286425Sdim // Cache for the BitTracker's cell map. Map lookup has a logarithmic 264286425Sdim // complexity, this class will memoize the lookup results to reduce 265286425Sdim // the access time for repeated lookups of the same cell. 266286425Sdim struct CellMapShadow { 267286425Sdim CellMapShadow(const BitTracker &T) : BT(T) {} 268314564Sdim 269286425Sdim const BitTracker::RegisterCell &lookup(unsigned VR) { 270286425Sdim unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); 271286425Sdim // Grow the vector to at least 32 elements. 272286425Sdim if (RInd >= CVect.size()) 273314564Sdim CVect.resize(std::max(RInd+16, 32U), nullptr); 274286425Sdim const BitTracker::RegisterCell *CP = CVect[RInd]; 275314564Sdim if (CP == nullptr) 276286425Sdim CP = CVect[RInd] = &BT.lookup(VR); 277286425Sdim return *CP; 278286425Sdim } 279286425Sdim 280286425Sdim const BitTracker &BT; 281286425Sdim 282286425Sdim private: 283327952Sdim using CellVectType = std::vector<const BitTracker::RegisterCell *>; 284327952Sdim 285286425Sdim CellVectType CVect; 286286425Sdim }; 287286425Sdim 288286425Sdim // Comparator class for lexicographic ordering of virtual registers 289286425Sdim // according to the corresponding BitTracker::RegisterCell objects. 290286425Sdim struct RegisterCellLexCompare { 291286425Sdim RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) 292286425Sdim : BitOrd(BO), CM(M) {} 293314564Sdim 294286425Sdim bool operator() (unsigned VR1, unsigned VR2) const; 295314564Sdim 296286425Sdim private: 297286425Sdim const BitValueOrdering &BitOrd; 298286425Sdim CellMapShadow &CM; 299286425Sdim }; 300286425Sdim 301286425Sdim // Comparator class for lexicographic ordering of virtual registers 302286425Sdim // according to the specified bits of the corresponding BitTracker:: 303286425Sdim // RegisterCell objects. 304286425Sdim // Specifically, this class will be used to compare bit B of a register 305286425Sdim // cell for a selected virtual register R with bit N of any register 306286425Sdim // other than R. 307286425Sdim struct RegisterCellBitCompareSel { 308286425Sdim RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, 309286425Sdim const BitValueOrdering &BO, CellMapShadow &M) 310286425Sdim : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} 311314564Sdim 312286425Sdim bool operator() (unsigned VR1, unsigned VR2) const; 313314564Sdim 314286425Sdim private: 315286425Sdim const unsigned SelR, SelB; 316286425Sdim const unsigned BitN; 317286425Sdim const BitValueOrdering &BitOrd; 318286425Sdim CellMapShadow &CM; 319286425Sdim }; 320286425Sdim 321314564Sdim} // end anonymous namespace 322286425Sdim 323286425Sdimbool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { 324286425Sdim // Ordering of registers, made up from two given orderings: 325286425Sdim // - the ordering of the register numbers, and 326286425Sdim // - the ordering of register cells. 327286425Sdim // Def. R1 < R2 if: 328286425Sdim // - cell(R1) < cell(R2), or 329286425Sdim // - cell(R1) == cell(R2), and index(R1) < index(R2). 330286425Sdim // 331286425Sdim // For register cells, the ordering is lexicographic, with index 0 being 332286425Sdim // the most significant. 333286425Sdim if (VR1 == VR2) 334286425Sdim return false; 335286425Sdim 336286425Sdim const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); 337286425Sdim uint16_t W1 = RC1.width(), W2 = RC2.width(); 338286425Sdim for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { 339286425Sdim const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; 340286425Sdim if (V1 != V2) 341286425Sdim return BitOrd(V1, V2); 342286425Sdim } 343286425Sdim // Cells are equal up until the common length. 344286425Sdim if (W1 != W2) 345286425Sdim return W1 < W2; 346286425Sdim 347286425Sdim return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; 348286425Sdim} 349286425Sdim 350286425Sdimbool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { 351286425Sdim if (VR1 == VR2) 352286425Sdim return false; 353286425Sdim const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); 354286425Sdim const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); 355286425Sdim uint16_t W1 = RC1.width(), W2 = RC2.width(); 356286425Sdim uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; 357286425Sdim uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; 358286425Sdim // If Bit1 exceeds the width of VR1, then: 359286425Sdim // - return false, if at the same time Bit2 exceeds VR2, or 360286425Sdim // - return true, otherwise. 361286425Sdim // (I.e. "a bit value that does not exist is less than any bit value 362286425Sdim // that does exist".) 363286425Sdim if (W1 <= Bit1) 364286425Sdim return Bit2 < W2; 365286425Sdim // If Bit1 is within VR1, but Bit2 is not within VR2, return false. 366286425Sdim if (W2 <= Bit2) 367286425Sdim return false; 368286425Sdim 369286425Sdim const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; 370286425Sdim if (V1 != V2) 371286425Sdim return BitOrd(V1, V2); 372286425Sdim return false; 373286425Sdim} 374286425Sdim 375314564Sdimnamespace { 376286425Sdim 377286425Sdim class OrderedRegisterList { 378327952Sdim using ListType = std::vector<unsigned>; 379341825Sdim const unsigned MaxSize; 380314564Sdim 381286425Sdim public: 382341825Sdim OrderedRegisterList(const RegisterOrdering &RO) 383341825Sdim : MaxSize(MaxORLSize), Ord(RO) {} 384314564Sdim 385286425Sdim void insert(unsigned VR); 386286425Sdim void remove(unsigned VR); 387314564Sdim 388286425Sdim unsigned operator[](unsigned Idx) const { 389286425Sdim assert(Idx < Seq.size()); 390286425Sdim return Seq[Idx]; 391286425Sdim } 392314564Sdim 393286425Sdim unsigned size() const { 394286425Sdim return Seq.size(); 395286425Sdim } 396286425Sdim 397327952Sdim using iterator = ListType::iterator; 398327952Sdim using const_iterator = ListType::const_iterator; 399327952Sdim 400286425Sdim iterator begin() { return Seq.begin(); } 401286425Sdim iterator end() { return Seq.end(); } 402286425Sdim const_iterator begin() const { return Seq.begin(); } 403286425Sdim const_iterator end() const { return Seq.end(); } 404286425Sdim 405286425Sdim // Convenience function to convert an iterator to the corresponding index. 406286425Sdim unsigned idx(iterator It) const { return It-begin(); } 407314564Sdim 408286425Sdim private: 409286425Sdim ListType Seq; 410286425Sdim const RegisterOrdering &Ord; 411286425Sdim }; 412286425Sdim 413286425Sdim struct PrintORL { 414286425Sdim PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) 415286425Sdim : RL(L), TRI(RI) {} 416314564Sdim 417286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); 418314564Sdim 419286425Sdim private: 420286425Sdim const OrderedRegisterList &RL; 421286425Sdim const TargetRegisterInfo *TRI; 422286425Sdim }; 423286425Sdim 424286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { 425286425Sdim OS << '('; 426286425Sdim OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); 427286425Sdim for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { 428286425Sdim if (I != B) 429286425Sdim OS << ", "; 430327952Sdim OS << printReg(*I, P.TRI); 431286425Sdim } 432286425Sdim OS << ')'; 433286425Sdim return OS; 434286425Sdim } 435286425Sdim 436314564Sdim} // end anonymous namespace 437286425Sdim 438286425Sdimvoid OrderedRegisterList::insert(unsigned VR) { 439353358Sdim iterator L = llvm::lower_bound(Seq, VR, Ord); 440286425Sdim if (L == Seq.end()) 441286425Sdim Seq.push_back(VR); 442286425Sdim else 443286425Sdim Seq.insert(L, VR); 444341825Sdim 445341825Sdim unsigned S = Seq.size(); 446341825Sdim if (S > MaxSize) 447341825Sdim Seq.resize(MaxSize); 448341825Sdim assert(Seq.size() <= MaxSize); 449286425Sdim} 450286425Sdim 451286425Sdimvoid OrderedRegisterList::remove(unsigned VR) { 452353358Sdim iterator L = llvm::lower_bound(Seq, VR, Ord); 453341825Sdim if (L != Seq.end()) 454341825Sdim Seq.erase(L); 455286425Sdim} 456286425Sdim 457314564Sdimnamespace { 458286425Sdim 459286425Sdim // A record of the insert form. The fields correspond to the operands 460286425Sdim // of the "insert" instruction: 461286425Sdim // ... = insert(SrcR, InsR, #Wdh, #Off) 462286425Sdim struct IFRecord { 463286425Sdim IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) 464286425Sdim : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} 465314564Sdim 466286425Sdim unsigned SrcR, InsR; 467286425Sdim uint16_t Wdh, Off; 468286425Sdim }; 469286425Sdim 470286425Sdim struct PrintIFR { 471286425Sdim PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) 472286425Sdim : IFR(R), TRI(RI) {} 473314564Sdim 474286425Sdim private: 475314564Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); 476314564Sdim 477286425Sdim const IFRecord &IFR; 478286425Sdim const TargetRegisterInfo *TRI; 479286425Sdim }; 480286425Sdim 481286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { 482286425Sdim unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; 483327952Sdim OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI) 484286425Sdim << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; 485286425Sdim return OS; 486286425Sdim } 487286425Sdim 488327952Sdim using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>; 489286425Sdim 490314564Sdim} // end anonymous namespace 491286425Sdim 492286425Sdimnamespace llvm { 493314564Sdim 494286425Sdim void initializeHexagonGenInsertPass(PassRegistry&); 495286425Sdim FunctionPass *createHexagonGenInsert(); 496286425Sdim 497314564Sdim} // end namespace llvm 498286425Sdim 499286425Sdimnamespace { 500314564Sdim 501286425Sdim class HexagonGenInsert : public MachineFunctionPass { 502286425Sdim public: 503286425Sdim static char ID; 504314564Sdim 505327952Sdim HexagonGenInsert() : MachineFunctionPass(ID) { 506286425Sdim initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); 507286425Sdim } 508314564Sdim 509314564Sdim StringRef getPassName() const override { 510286425Sdim return "Hexagon generate \"insert\" instructions"; 511286425Sdim } 512314564Sdim 513314564Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 514286425Sdim AU.addRequired<MachineDominatorTree>(); 515286425Sdim AU.addPreserved<MachineDominatorTree>(); 516286425Sdim MachineFunctionPass::getAnalysisUsage(AU); 517286425Sdim } 518286425Sdim 519314564Sdim bool runOnMachineFunction(MachineFunction &MF) override; 520314564Sdim 521286425Sdim private: 522327952Sdim using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>; 523286425Sdim 524286425Sdim void buildOrderingMF(RegisterOrdering &RO) const; 525286425Sdim void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; 526286425Sdim bool isIntClass(const TargetRegisterClass *RC) const; 527286425Sdim bool isConstant(unsigned VR) const; 528286425Sdim bool isSmallConstant(unsigned VR) const; 529286425Sdim bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, 530286425Sdim uint16_t L, uint16_t S) const; 531286425Sdim bool findSelfReference(unsigned VR) const; 532286425Sdim bool findNonSelfReference(unsigned VR) const; 533286425Sdim void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; 534286425Sdim void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; 535286425Sdim unsigned distance(const MachineBasicBlock *FromB, 536286425Sdim const MachineBasicBlock *ToB, const UnsignedMap &RPO, 537286425Sdim PairMapType &M) const; 538286425Sdim unsigned distance(MachineBasicBlock::const_iterator FromI, 539286425Sdim MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 540286425Sdim PairMapType &M) const; 541286425Sdim bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); 542286425Sdim void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); 543286425Sdim void findRemovableRegisters(unsigned VR, IFRecord IF, 544286425Sdim RegisterSet &RMs) const; 545286425Sdim void computeRemovableRegisters(); 546286425Sdim 547286425Sdim void pruneEmptyLists(); 548286425Sdim void pruneCoveredSets(unsigned VR); 549286425Sdim void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); 550286425Sdim void pruneRegCopies(unsigned VR); 551286425Sdim void pruneCandidates(); 552286425Sdim void selectCandidates(); 553286425Sdim bool generateInserts(); 554286425Sdim 555286425Sdim bool removeDeadCode(MachineDomTreeNode *N); 556286425Sdim 557286425Sdim // IFRecord coupled with a set of potentially removable registers: 558327952Sdim using IFListType = std::vector<IFRecordWithRegSet>; 559327952Sdim using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType 560286425Sdim 561286425Sdim void dump_map() const; 562286425Sdim 563327952Sdim const HexagonInstrInfo *HII = nullptr; 564327952Sdim const HexagonRegisterInfo *HRI = nullptr; 565286425Sdim 566286425Sdim MachineFunction *MFN; 567286425Sdim MachineRegisterInfo *MRI; 568286425Sdim MachineDominatorTree *MDT; 569286425Sdim CellMapShadow *CMS; 570286425Sdim 571286425Sdim RegisterOrdering BaseOrd; 572286425Sdim RegisterOrdering CellOrd; 573286425Sdim IFMapType IFMap; 574286425Sdim }; 575286425Sdim 576314564Sdim} // end anonymous namespace 577286425Sdim 578327952Sdimchar HexagonGenInsert::ID = 0; 579327952Sdim 580286425Sdimvoid HexagonGenInsert::dump_map() const { 581327952Sdim using iterator = IFMapType::const_iterator; 582327952Sdim 583286425Sdim for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 584327952Sdim dbgs() << " " << printReg(I->first, HRI) << ":\n"; 585286425Sdim const IFListType &LL = I->second; 586286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 587286425Sdim dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " 588286425Sdim << PrintRegSet(LL[i].second, HRI) << '\n'; 589286425Sdim } 590286425Sdim} 591286425Sdim 592286425Sdimvoid HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { 593286425Sdim unsigned Index = 0; 594327952Sdim 595327952Sdim using mf_iterator = MachineFunction::const_iterator; 596327952Sdim 597286425Sdim for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { 598286425Sdim const MachineBasicBlock &B = *A; 599286425Sdim if (!CMS->BT.reached(&B)) 600286425Sdim continue; 601327952Sdim 602327952Sdim using mb_iterator = MachineBasicBlock::const_iterator; 603327952Sdim 604286425Sdim for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { 605286425Sdim const MachineInstr *MI = &*I; 606286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 607286425Sdim const MachineOperand &MO = MI->getOperand(i); 608286425Sdim if (MO.isReg() && MO.isDef()) { 609286425Sdim unsigned R = MO.getReg(); 610286425Sdim assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); 611286425Sdim if (TargetRegisterInfo::isVirtualRegister(R)) 612286425Sdim RO.insert(std::make_pair(R, Index++)); 613286425Sdim } 614286425Sdim } 615286425Sdim } 616286425Sdim } 617286425Sdim // Since some virtual registers may have had their def and uses eliminated, 618286425Sdim // they are no longer referenced in the code, and so they will not appear 619286425Sdim // in the map. 620286425Sdim} 621286425Sdim 622286425Sdimvoid HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, 623286425Sdim RegisterOrdering &RO) const { 624286425Sdim // Create a vector of all virtual registers (collect them from the base 625286425Sdim // ordering RB), and then sort it using the RegisterCell comparator. 626286425Sdim BitValueOrdering BVO(RB); 627286425Sdim RegisterCellLexCompare LexCmp(BVO, *CMS); 628327952Sdim 629327952Sdim using SortableVectorType = std::vector<unsigned>; 630327952Sdim 631286425Sdim SortableVectorType VRs; 632286425Sdim for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) 633286425Sdim VRs.push_back(I->first); 634344779Sdim llvm::sort(VRs, LexCmp); 635286425Sdim // Transfer the results to the outgoing register ordering. 636286425Sdim for (unsigned i = 0, n = VRs.size(); i < n; ++i) 637286425Sdim RO.insert(std::make_pair(VRs[i], i)); 638286425Sdim} 639286425Sdim 640286425Sdiminline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { 641286425Sdim return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; 642286425Sdim} 643286425Sdim 644286425Sdimbool HexagonGenInsert::isConstant(unsigned VR) const { 645286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 646286425Sdim uint16_t W = RC.width(); 647286425Sdim for (uint16_t i = 0; i < W; ++i) { 648286425Sdim const BitTracker::BitValue &BV = RC[i]; 649286425Sdim if (BV.is(0) || BV.is(1)) 650286425Sdim continue; 651286425Sdim return false; 652286425Sdim } 653286425Sdim return true; 654286425Sdim} 655286425Sdim 656286425Sdimbool HexagonGenInsert::isSmallConstant(unsigned VR) const { 657286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 658286425Sdim uint16_t W = RC.width(); 659286425Sdim if (W > 64) 660286425Sdim return false; 661286425Sdim uint64_t V = 0, B = 1; 662286425Sdim for (uint16_t i = 0; i < W; ++i) { 663286425Sdim const BitTracker::BitValue &BV = RC[i]; 664286425Sdim if (BV.is(1)) 665286425Sdim V |= B; 666286425Sdim else if (!BV.is(0)) 667286425Sdim return false; 668286425Sdim B <<= 1; 669286425Sdim } 670286425Sdim 671286425Sdim // For 32-bit registers, consider: Rd = #s16. 672286425Sdim if (W == 32) 673286425Sdim return isInt<16>(V); 674286425Sdim 675286425Sdim // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) 676286425Sdim return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); 677286425Sdim} 678286425Sdim 679286425Sdimbool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, 680286425Sdim unsigned InsR, uint16_t L, uint16_t S) const { 681286425Sdim const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); 682286425Sdim const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); 683286425Sdim const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); 684286425Sdim // Only integet (32-/64-bit) register classes. 685286425Sdim if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) 686286425Sdim return false; 687286425Sdim // The "source" register must be of the same class as DstR. 688286425Sdim if (DstRC != SrcRC) 689286425Sdim return false; 690286425Sdim if (DstRC == InsRC) 691286425Sdim return true; 692286425Sdim // A 64-bit register can only be generated from other 64-bit registers. 693286425Sdim if (DstRC == &Hexagon::DoubleRegsRegClass) 694286425Sdim return false; 695286425Sdim // Otherwise, the L and S cannot span 32-bit word boundary. 696286425Sdim if (S < 32 && S+L > 32) 697286425Sdim return false; 698286425Sdim return true; 699286425Sdim} 700286425Sdim 701286425Sdimbool HexagonGenInsert::findSelfReference(unsigned VR) const { 702286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 703286425Sdim for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 704286425Sdim const BitTracker::BitValue &V = RC[i]; 705286425Sdim if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) 706286425Sdim return true; 707286425Sdim } 708286425Sdim return false; 709286425Sdim} 710286425Sdim 711286425Sdimbool HexagonGenInsert::findNonSelfReference(unsigned VR) const { 712286425Sdim BitTracker::RegisterCell RC = CMS->lookup(VR); 713286425Sdim for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 714286425Sdim const BitTracker::BitValue &V = RC[i]; 715286425Sdim if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) 716286425Sdim return true; 717286425Sdim } 718286425Sdim return false; 719286425Sdim} 720286425Sdim 721286425Sdimvoid HexagonGenInsert::getInstrDefs(const MachineInstr *MI, 722286425Sdim RegisterSet &Defs) const { 723286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 724286425Sdim const MachineOperand &MO = MI->getOperand(i); 725286425Sdim if (!MO.isReg() || !MO.isDef()) 726286425Sdim continue; 727286425Sdim unsigned R = MO.getReg(); 728286425Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 729286425Sdim continue; 730286425Sdim Defs.insert(R); 731286425Sdim } 732286425Sdim} 733286425Sdim 734286425Sdimvoid HexagonGenInsert::getInstrUses(const MachineInstr *MI, 735286425Sdim RegisterSet &Uses) const { 736286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 737286425Sdim const MachineOperand &MO = MI->getOperand(i); 738286425Sdim if (!MO.isReg() || !MO.isUse()) 739286425Sdim continue; 740286425Sdim unsigned R = MO.getReg(); 741286425Sdim if (!TargetRegisterInfo::isVirtualRegister(R)) 742286425Sdim continue; 743286425Sdim Uses.insert(R); 744286425Sdim } 745286425Sdim} 746286425Sdim 747286425Sdimunsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, 748286425Sdim const MachineBasicBlock *ToB, const UnsignedMap &RPO, 749286425Sdim PairMapType &M) const { 750286425Sdim // Forward distance from the end of a block to the beginning of it does 751286425Sdim // not make sense. This function should not be called with FromB == ToB. 752286425Sdim assert(FromB != ToB); 753286425Sdim 754286425Sdim unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); 755286425Sdim // If we have already computed it, return the cached result. 756286425Sdim PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); 757286425Sdim if (F != M.end()) 758286425Sdim return F->second; 759286425Sdim unsigned ToRPO = RPO.lookup(ToN); 760286425Sdim 761286425Sdim unsigned MaxD = 0; 762327952Sdim 763327952Sdim using pred_iterator = MachineBasicBlock::const_pred_iterator; 764327952Sdim 765286425Sdim for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) { 766286425Sdim const MachineBasicBlock *PB = *I; 767286425Sdim // Skip back edges. Also, if FromB is a predecessor of ToB, the distance 768286425Sdim // along that path will be 0, and we don't need to do any calculations 769286425Sdim // on it. 770286425Sdim if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) 771286425Sdim continue; 772286425Sdim unsigned D = PB->size() + distance(FromB, PB, RPO, M); 773286425Sdim if (D > MaxD) 774286425Sdim MaxD = D; 775286425Sdim } 776286425Sdim 777286425Sdim // Memoize the result for later lookup. 778286425Sdim M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); 779286425Sdim return MaxD; 780286425Sdim} 781286425Sdim 782286425Sdimunsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, 783286425Sdim MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 784286425Sdim PairMapType &M) const { 785286425Sdim const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); 786286425Sdim if (FB == TB) 787286425Sdim return std::distance(FromI, ToI); 788286425Sdim unsigned D1 = std::distance(TB->begin(), ToI); 789286425Sdim unsigned D2 = distance(FB, TB, RPO, M); 790286425Sdim unsigned D3 = std::distance(FromI, FB->end()); 791286425Sdim return D1+D2+D3; 792286425Sdim} 793286425Sdim 794286425Sdimbool HexagonGenInsert::findRecordInsertForms(unsigned VR, 795286425Sdim OrderedRegisterList &AVs) { 796286425Sdim if (isDebug()) { 797327952Sdim dbgs() << __func__ << ": " << printReg(VR, HRI) 798286425Sdim << " AVs: " << PrintORL(AVs, HRI) << "\n"; 799286425Sdim } 800286425Sdim if (AVs.size() == 0) 801286425Sdim return false; 802286425Sdim 803327952Sdim using iterator = OrderedRegisterList::iterator; 804327952Sdim 805286425Sdim BitValueOrdering BVO(BaseOrd); 806286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 807286425Sdim uint16_t W = RC.width(); 808286425Sdim 809327952Sdim using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift) 810327952Sdim using RSListType = std::vector<RSRecord>; 811286425Sdim // Have a map, with key being the matching prefix length, and the value 812286425Sdim // being the list of pairs (R,S), where R's prefix matches VR at S. 813286425Sdim // (DenseMap<uint16_t,RSListType> fails to instantiate.) 814327952Sdim using LRSMapType = DenseMap<unsigned, RSListType>; 815286425Sdim LRSMapType LM; 816286425Sdim 817286425Sdim // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, 818286425Sdim // and find matching prefixes from AVs with the rotated RC. Such a prefix 819286425Sdim // would match a string of bits (of length L) in RC starting at S. 820286425Sdim for (uint16_t S = 0; S < W; ++S) { 821286425Sdim iterator B = AVs.begin(), E = AVs.end(); 822286425Sdim // The registers in AVs are ordered according to the lexical order of 823286425Sdim // the corresponding register cells. This means that the range of regis- 824286425Sdim // ters in AVs that match a prefix of length L+1 will be contained in 825286425Sdim // the range that matches a prefix of length L. This means that we can 826286425Sdim // keep narrowing the search space as the prefix length goes up. This 827286425Sdim // helps reduce the overall complexity of the search. 828286425Sdim uint16_t L; 829286425Sdim for (L = 0; L < W-S; ++L) { 830286425Sdim // Compare against VR's bits starting at S, which emulates rotation 831286425Sdim // of VR by S. 832286425Sdim RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); 833286425Sdim iterator NewB = std::lower_bound(B, E, VR, RCB); 834286425Sdim iterator NewE = std::upper_bound(NewB, E, VR, RCB); 835286425Sdim // For the registers that are eliminated from the next range, L is 836286425Sdim // the longest prefix matching VR at position S (their prefixes 837286425Sdim // differ from VR at S+L). If L>0, record this information for later 838286425Sdim // use. 839286425Sdim if (L > 0) { 840286425Sdim for (iterator I = B; I != NewB; ++I) 841286425Sdim LM[L].push_back(std::make_pair(*I, S)); 842286425Sdim for (iterator I = NewE; I != E; ++I) 843286425Sdim LM[L].push_back(std::make_pair(*I, S)); 844286425Sdim } 845286425Sdim B = NewB, E = NewE; 846286425Sdim if (B == E) 847286425Sdim break; 848286425Sdim } 849286425Sdim // Record the final register range. If this range is non-empty, then 850286425Sdim // L=W-S. 851286425Sdim assert(B == E || L == W-S); 852286425Sdim if (B != E) { 853286425Sdim for (iterator I = B; I != E; ++I) 854286425Sdim LM[L].push_back(std::make_pair(*I, S)); 855286425Sdim // If B!=E, then we found a range of registers whose prefixes cover the 856286425Sdim // rest of VR from position S. There is no need to further advance S. 857286425Sdim break; 858286425Sdim } 859286425Sdim } 860286425Sdim 861286425Sdim if (isDebug()) { 862327952Sdim dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n"; 863286425Sdim for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { 864286425Sdim dbgs() << " L=" << I->first << ':'; 865286425Sdim const RSListType &LL = I->second; 866286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 867327952Sdim dbgs() << " (" << printReg(LL[i].first, HRI) << ",@" 868286425Sdim << LL[i].second << ')'; 869286425Sdim dbgs() << '\n'; 870286425Sdim } 871286425Sdim } 872286425Sdim 873286425Sdim bool Recorded = false; 874286425Sdim 875286425Sdim for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { 876286425Sdim unsigned SrcR = *I; 877286425Sdim int FDi = -1, LDi = -1; // First/last different bit. 878286425Sdim const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); 879286425Sdim uint16_t AW = AC.width(); 880286425Sdim for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { 881286425Sdim if (RC[i] == AC[i]) 882286425Sdim continue; 883286425Sdim if (FDi == -1) 884286425Sdim FDi = i; 885286425Sdim LDi = i; 886286425Sdim } 887286425Sdim if (FDi == -1) 888286425Sdim continue; // TODO (future): Record identical registers. 889286425Sdim // Look for a register whose prefix could patch the range [FD..LD] 890286425Sdim // where VR and SrcR differ. 891286425Sdim uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. 892286425Sdim uint16_t MinL = LD-FD+1; 893286425Sdim for (uint16_t L = MinL; L < W; ++L) { 894286425Sdim LRSMapType::iterator F = LM.find(L); 895286425Sdim if (F == LM.end()) 896286425Sdim continue; 897286425Sdim RSListType &LL = F->second; 898286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) { 899286425Sdim uint16_t S = LL[i].second; 900286425Sdim // MinL is the minimum length of the prefix. Any length above MinL 901286425Sdim // allows some flexibility as to where the prefix can start: 902286425Sdim // given the extra length EL=L-MinL, the prefix must start between 903286425Sdim // max(0,FD-EL) and FD. 904286425Sdim if (S > FD) // Starts too late. 905286425Sdim continue; 906286425Sdim uint16_t EL = L-MinL; 907286425Sdim uint16_t LowS = (EL < FD) ? FD-EL : 0; 908286425Sdim if (S < LowS) // Starts too early. 909286425Sdim continue; 910286425Sdim unsigned InsR = LL[i].first; 911286425Sdim if (!isValidInsertForm(VR, SrcR, InsR, L, S)) 912286425Sdim continue; 913286425Sdim if (isDebug()) { 914327952Sdim dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI) 915327952Sdim << ',' << printReg(InsR, HRI) << ",#" << L << ",#" 916286425Sdim << S << ")\n"; 917286425Sdim } 918286425Sdim IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); 919286425Sdim IFMap[VR].push_back(RR); 920286425Sdim Recorded = true; 921286425Sdim } 922286425Sdim } 923286425Sdim } 924286425Sdim 925286425Sdim return Recorded; 926286425Sdim} 927286425Sdim 928286425Sdimvoid HexagonGenInsert::collectInBlock(MachineBasicBlock *B, 929286425Sdim OrderedRegisterList &AVs) { 930286425Sdim if (isDebug()) 931327952Sdim dbgs() << "visiting block " << printMBBReference(*B) << "\n"; 932286425Sdim 933286425Sdim // First, check if this block is reachable at all. If not, the bit tracker 934286425Sdim // will not have any information about registers in it. 935286425Sdim if (!CMS->BT.reached(B)) 936286425Sdim return; 937286425Sdim 938286425Sdim bool DoConst = OptConst; 939286425Sdim // Keep a separate set of registers defined in this block, so that we 940286425Sdim // can remove them from the list of available registers once all DT 941286425Sdim // successors have been processed. 942286425Sdim RegisterSet BlockDefs, InsDefs; 943286425Sdim for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { 944286425Sdim MachineInstr *MI = &*I; 945286425Sdim InsDefs.clear(); 946286425Sdim getInstrDefs(MI, InsDefs); 947286425Sdim // Leave those alone. They are more transparent than "insert". 948286425Sdim bool Skip = MI->isCopy() || MI->isRegSequence(); 949286425Sdim 950286425Sdim if (!Skip) { 951286425Sdim // Visit all defined registers, and attempt to find the corresponding 952286425Sdim // "insert" representations. 953286425Sdim for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { 954286425Sdim // Do not collect registers that are known to be compile-time cons- 955286425Sdim // tants, unless requested. 956286425Sdim if (!DoConst && isConstant(VR)) 957286425Sdim continue; 958286425Sdim // If VR's cell contains a reference to VR, then VR cannot be defined 959286425Sdim // via "insert". If VR is a constant that can be generated in a single 960286425Sdim // instruction (without constant extenders), generating it via insert 961286425Sdim // makes no sense. 962286425Sdim if (findSelfReference(VR) || isSmallConstant(VR)) 963286425Sdim continue; 964286425Sdim 965286425Sdim findRecordInsertForms(VR, AVs); 966341825Sdim // Stop if the map size is too large. 967341825Sdim if (IFMap.size() > MaxIFMSize) 968341825Sdim return; 969286425Sdim } 970286425Sdim } 971286425Sdim 972286425Sdim // Insert the defined registers into the list of available registers 973286425Sdim // after they have been processed. 974286425Sdim for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) 975286425Sdim AVs.insert(VR); 976286425Sdim BlockDefs.insert(InsDefs); 977286425Sdim } 978286425Sdim 979321369Sdim for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) { 980321369Sdim MachineBasicBlock *SB = DTN->getBlock(); 981286425Sdim collectInBlock(SB, AVs); 982286425Sdim } 983286425Sdim 984286425Sdim for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) 985286425Sdim AVs.remove(VR); 986286425Sdim} 987286425Sdim 988286425Sdimvoid HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, 989286425Sdim RegisterSet &RMs) const { 990286425Sdim // For a given register VR and a insert form, find the registers that are 991286425Sdim // used by the current definition of VR, and which would no longer be 992286425Sdim // needed for it after the definition of VR is replaced with the insert 993286425Sdim // form. These are the registers that could potentially become dead. 994286425Sdim RegisterSet Regs[2]; 995286425Sdim 996286425Sdim unsigned S = 0; // Register set selector. 997286425Sdim Regs[S].insert(VR); 998286425Sdim 999286425Sdim while (!Regs[S].empty()) { 1000286425Sdim // Breadth-first search. 1001286425Sdim unsigned OtherS = 1-S; 1002286425Sdim Regs[OtherS].clear(); 1003286425Sdim for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { 1004286425Sdim Regs[S].remove(R); 1005286425Sdim if (R == IF.SrcR || R == IF.InsR) 1006286425Sdim continue; 1007286425Sdim // Check if a given register has bits that are references to any other 1008286425Sdim // registers. This is to detect situations where the instruction that 1009286425Sdim // defines register R takes register Q as an operand, but R itself does 1010286425Sdim // not contain any bits from Q. Loads are examples of how this could 1011286425Sdim // happen: 1012286425Sdim // R = load Q 1013286425Sdim // In this case (assuming we do not have any knowledge about the loaded 1014286425Sdim // value), we must not treat R as a "conveyance" of the bits from Q. 1015286425Sdim // (The information in BT about R's bits would have them as constants, 1016286425Sdim // in case of zero-extending loads, or refs to R.) 1017286425Sdim if (!findNonSelfReference(R)) 1018286425Sdim continue; 1019286425Sdim RMs.insert(R); 1020286425Sdim const MachineInstr *DefI = MRI->getVRegDef(R); 1021286425Sdim assert(DefI); 1022286425Sdim // Do not iterate past PHI nodes to avoid infinite loops. This can 1023286425Sdim // make the final set a bit less accurate, but the removable register 1024286425Sdim // sets are an approximation anyway. 1025286425Sdim if (DefI->isPHI()) 1026286425Sdim continue; 1027286425Sdim getInstrUses(DefI, Regs[OtherS]); 1028286425Sdim } 1029286425Sdim S = OtherS; 1030286425Sdim } 1031286425Sdim // The register VR is added to the list as a side-effect of the algorithm, 1032286425Sdim // but it is not "potentially removable". A potentially removable register 1033286425Sdim // is one that may become unused (dead) after conversion to the insert form 1034286425Sdim // IF, and obviously VR (or its replacement) will not become dead by apply- 1035286425Sdim // ing IF. 1036286425Sdim RMs.remove(VR); 1037286425Sdim} 1038286425Sdim 1039286425Sdimvoid HexagonGenInsert::computeRemovableRegisters() { 1040286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1041286425Sdim IFListType &LL = I->second; 1042286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 1043286425Sdim findRemovableRegisters(I->first, LL[i].first, LL[i].second); 1044286425Sdim } 1045286425Sdim} 1046286425Sdim 1047286425Sdimvoid HexagonGenInsert::pruneEmptyLists() { 1048286425Sdim // Remove all entries from the map, where the register has no insert forms 1049286425Sdim // associated with it. 1050327952Sdim using IterListType = SmallVector<IFMapType::iterator, 16>; 1051286425Sdim IterListType Prune; 1052286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1053314564Sdim if (I->second.empty()) 1054286425Sdim Prune.push_back(I); 1055286425Sdim } 1056286425Sdim for (unsigned i = 0, n = Prune.size(); i < n; ++i) 1057286425Sdim IFMap.erase(Prune[i]); 1058286425Sdim} 1059286425Sdim 1060286425Sdimvoid HexagonGenInsert::pruneCoveredSets(unsigned VR) { 1061286425Sdim IFMapType::iterator F = IFMap.find(VR); 1062286425Sdim assert(F != IFMap.end()); 1063286425Sdim IFListType &LL = F->second; 1064286425Sdim 1065286425Sdim // First, examine the IF candidates for register VR whose removable-regis- 1066286425Sdim // ter sets are empty. This means that a given candidate will not help eli- 1067286425Sdim // minate any registers, but since "insert" is not a constant-extendable 1068286425Sdim // instruction, using such a candidate may reduce code size if the defini- 1069286425Sdim // tion of VR is constant-extended. 1070286425Sdim // If there exists a candidate with a non-empty set, the ones with empty 1071286425Sdim // sets will not be used and can be removed. 1072286425Sdim MachineInstr *DefVR = MRI->getVRegDef(VR); 1073314564Sdim bool DefEx = HII->isConstExtended(*DefVR); 1074286425Sdim bool HasNE = false; 1075286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) { 1076286425Sdim if (LL[i].second.empty()) 1077286425Sdim continue; 1078286425Sdim HasNE = true; 1079286425Sdim break; 1080286425Sdim } 1081286425Sdim if (!DefEx || HasNE) { 1082286425Sdim // The definition of VR is not constant-extended, or there is a candidate 1083286425Sdim // with a non-empty set. Remove all candidates with empty sets. 1084286425Sdim auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { 1085286425Sdim return IR.second.empty(); 1086286425Sdim }; 1087314564Sdim auto End = llvm::remove_if(LL, IsEmpty); 1088286425Sdim if (End != LL.end()) 1089286425Sdim LL.erase(End, LL.end()); 1090286425Sdim } else { 1091286425Sdim // The definition of VR is constant-extended, and all candidates have 1092286425Sdim // empty removable-register sets. Pick the maximum candidate, and remove 1093286425Sdim // all others. The "maximum" does not have any special meaning here, it 1094286425Sdim // is only so that the candidate that will remain on the list is selec- 1095286425Sdim // ted deterministically. 1096286425Sdim IFRecord MaxIF = LL[0].first; 1097286425Sdim for (unsigned i = 1, n = LL.size(); i < n; ++i) { 1098286425Sdim // If LL[MaxI] < LL[i], then MaxI = i. 1099286425Sdim const IFRecord &IF = LL[i].first; 1100286425Sdim unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; 1101286425Sdim unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; 1102286425Sdim if (M0 > R0) 1103286425Sdim continue; 1104286425Sdim if (M0 == R0) { 1105286425Sdim if (M1 > R1) 1106286425Sdim continue; 1107286425Sdim if (M1 == R1) { 1108286425Sdim if (MaxIF.Wdh > IF.Wdh) 1109286425Sdim continue; 1110286425Sdim if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) 1111286425Sdim continue; 1112286425Sdim } 1113286425Sdim } 1114286425Sdim // MaxIF < IF. 1115286425Sdim MaxIF = IF; 1116286425Sdim } 1117286425Sdim // Remove everything except the maximum candidate. All register sets 1118286425Sdim // are empty, so no need to preserve anything. 1119286425Sdim LL.clear(); 1120286425Sdim LL.push_back(std::make_pair(MaxIF, RegisterSet())); 1121286425Sdim } 1122286425Sdim 1123286425Sdim // Now, remove those whose sets of potentially removable registers are 1124286425Sdim // contained in another IF candidate for VR. For example, given these 1125327952Sdim // candidates for %45, 1126327952Sdim // %45: 1127327952Sdim // (%44,%41,#9,#8), { %42 } 1128327952Sdim // (%43,%41,#9,#8), { %42 %44 } 1129286425Sdim // remove the first one, since it is contained in the second one. 1130286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ) { 1131286425Sdim const RegisterSet &RMi = LL[i].second; 1132286425Sdim unsigned j = 0; 1133286425Sdim while (j < n) { 1134286425Sdim if (j != i && LL[j].second.includes(RMi)) 1135286425Sdim break; 1136286425Sdim j++; 1137286425Sdim } 1138286425Sdim if (j == n) { // RMi not contained in anything else. 1139286425Sdim i++; 1140286425Sdim continue; 1141286425Sdim } 1142286425Sdim LL.erase(LL.begin()+i); 1143286425Sdim n = LL.size(); 1144286425Sdim } 1145286425Sdim} 1146286425Sdim 1147286425Sdimvoid HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, 1148286425Sdim PairMapType &M) { 1149286425Sdim IFMapType::iterator F = IFMap.find(VR); 1150286425Sdim assert(F != IFMap.end()); 1151286425Sdim IFListType &LL = F->second; 1152286425Sdim unsigned Cutoff = VRegDistCutoff; 1153286425Sdim const MachineInstr *DefV = MRI->getVRegDef(VR); 1154286425Sdim 1155286425Sdim for (unsigned i = LL.size(); i > 0; --i) { 1156286425Sdim unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; 1157286425Sdim const MachineInstr *DefS = MRI->getVRegDef(SR); 1158286425Sdim const MachineInstr *DefI = MRI->getVRegDef(IR); 1159286425Sdim unsigned DSV = distance(DefS, DefV, RPO, M); 1160286425Sdim if (DSV < Cutoff) { 1161286425Sdim unsigned DIV = distance(DefI, DefV, RPO, M); 1162286425Sdim if (DIV < Cutoff) 1163286425Sdim continue; 1164286425Sdim } 1165286425Sdim LL.erase(LL.begin()+(i-1)); 1166286425Sdim } 1167286425Sdim} 1168286425Sdim 1169286425Sdimvoid HexagonGenInsert::pruneRegCopies(unsigned VR) { 1170286425Sdim IFMapType::iterator F = IFMap.find(VR); 1171286425Sdim assert(F != IFMap.end()); 1172286425Sdim IFListType &LL = F->second; 1173286425Sdim 1174286425Sdim auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { 1175286425Sdim return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); 1176286425Sdim }; 1177314564Sdim auto End = llvm::remove_if(LL, IsCopy); 1178286425Sdim if (End != LL.end()) 1179286425Sdim LL.erase(End, LL.end()); 1180286425Sdim} 1181286425Sdim 1182286425Sdimvoid HexagonGenInsert::pruneCandidates() { 1183286425Sdim // Remove candidates that are not beneficial, regardless of the final 1184286425Sdim // selection method. 1185286425Sdim // First, remove candidates whose potentially removable set is a subset 1186286425Sdim // of another candidate's set. 1187286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1188286425Sdim pruneCoveredSets(I->first); 1189286425Sdim 1190286425Sdim UnsignedMap RPO; 1191327952Sdim 1192327952Sdim using RPOTType = ReversePostOrderTraversal<const MachineFunction *>; 1193327952Sdim 1194286425Sdim RPOTType RPOT(MFN); 1195286425Sdim unsigned RPON = 0; 1196286425Sdim for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 1197286425Sdim RPO[(*I)->getNumber()] = RPON++; 1198286425Sdim 1199286425Sdim PairMapType Memo; // Memoization map for distance calculation. 1200286425Sdim // Remove candidates that would use registers defined too far away. 1201286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1202286425Sdim pruneUsesTooFar(I->first, RPO, Memo); 1203286425Sdim 1204286425Sdim pruneEmptyLists(); 1205286425Sdim 1206286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1207286425Sdim pruneRegCopies(I->first); 1208286425Sdim} 1209286425Sdim 1210314564Sdimnamespace { 1211286425Sdim 1212286425Sdim // Class for comparing IF candidates for registers that have multiple of 1213286425Sdim // them. The smaller the candidate, according to this ordering, the better. 1214286425Sdim // First, compare the number of zeros in the associated potentially remova- 1215286425Sdim // ble register sets. "Zero" indicates that the register is very likely to 1216286425Sdim // become dead after this transformation. 1217286425Sdim // Second, compare "averages", i.e. use-count per size. The lower wins. 1218286425Sdim // After that, it does not really matter which one is smaller. Resolve 1219286425Sdim // the tie in some deterministic way. 1220286425Sdim struct IFOrdering { 1221286425Sdim IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) 1222286425Sdim : UseC(UC), BaseOrd(BO) {} 1223314564Sdim 1224286425Sdim bool operator() (const IFRecordWithRegSet &A, 1225327952Sdim const IFRecordWithRegSet &B) const; 1226314564Sdim 1227286425Sdim private: 1228286425Sdim void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1229286425Sdim unsigned &Sum) const; 1230314564Sdim 1231286425Sdim const UnsignedMap &UseC; 1232286425Sdim const RegisterOrdering &BaseOrd; 1233286425Sdim }; 1234286425Sdim 1235314564Sdim} // end anonymous namespace 1236286425Sdim 1237286425Sdimbool IFOrdering::operator() (const IFRecordWithRegSet &A, 1238286425Sdim const IFRecordWithRegSet &B) const { 1239286425Sdim unsigned SizeA = 0, ZeroA = 0, SumA = 0; 1240286425Sdim unsigned SizeB = 0, ZeroB = 0, SumB = 0; 1241286425Sdim stats(A.second, SizeA, ZeroA, SumA); 1242286425Sdim stats(B.second, SizeB, ZeroB, SumB); 1243286425Sdim 1244286425Sdim // We will pick the minimum element. The more zeros, the better. 1245286425Sdim if (ZeroA != ZeroB) 1246286425Sdim return ZeroA > ZeroB; 1247286425Sdim // Compare SumA/SizeA with SumB/SizeB, lower is better. 1248286425Sdim uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; 1249286425Sdim if (AvgA != AvgB) 1250286425Sdim return AvgA < AvgB; 1251286425Sdim 1252286425Sdim // The sets compare identical so far. Resort to comparing the IF records. 1253286425Sdim // The actual values don't matter, this is only for determinism. 1254286425Sdim unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; 1255286425Sdim if (OSA != OSB) 1256286425Sdim return OSA < OSB; 1257286425Sdim unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; 1258286425Sdim if (OIA != OIB) 1259286425Sdim return OIA < OIB; 1260286425Sdim if (A.first.Wdh != B.first.Wdh) 1261286425Sdim return A.first.Wdh < B.first.Wdh; 1262286425Sdim return A.first.Off < B.first.Off; 1263286425Sdim} 1264286425Sdim 1265286425Sdimvoid IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1266286425Sdim unsigned &Sum) const { 1267286425Sdim for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { 1268286425Sdim UnsignedMap::const_iterator F = UseC.find(R); 1269286425Sdim assert(F != UseC.end()); 1270286425Sdim unsigned UC = F->second; 1271286425Sdim if (UC == 0) 1272286425Sdim Zero++; 1273286425Sdim Sum += UC; 1274286425Sdim Size++; 1275286425Sdim } 1276286425Sdim} 1277286425Sdim 1278286425Sdimvoid HexagonGenInsert::selectCandidates() { 1279286425Sdim // Some registers may have multiple valid candidates. Pick the best one 1280286425Sdim // (or decide not to use any). 1281286425Sdim 1282286425Sdim // Compute the "removability" measure of R: 1283286425Sdim // For each potentially removable register R, record the number of regis- 1284286425Sdim // ters with IF candidates, where R appears in at least one set. 1285286425Sdim RegisterSet AllRMs; 1286286425Sdim UnsignedMap UseC, RemC; 1287286425Sdim IFMapType::iterator End = IFMap.end(); 1288286425Sdim 1289286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1290286425Sdim const IFListType &LL = I->second; 1291286425Sdim RegisterSet TT; 1292286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 1293286425Sdim TT.insert(LL[i].second); 1294286425Sdim for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) 1295286425Sdim RemC[R]++; 1296286425Sdim AllRMs.insert(TT); 1297286425Sdim } 1298286425Sdim 1299286425Sdim for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { 1300327952Sdim using use_iterator = MachineRegisterInfo::use_nodbg_iterator; 1301327952Sdim using InstrSet = SmallSet<const MachineInstr *, 16>; 1302327952Sdim 1303286425Sdim InstrSet UIs; 1304286425Sdim // Count as the number of instructions in which R is used, not the 1305286425Sdim // number of operands. 1306286425Sdim use_iterator E = MRI->use_nodbg_end(); 1307286425Sdim for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) 1308286425Sdim UIs.insert(I->getParent()); 1309286425Sdim unsigned C = UIs.size(); 1310286425Sdim // Calculate a measure, which is the number of instructions using R, 1311286425Sdim // minus the "removability" count computed earlier. 1312286425Sdim unsigned D = RemC[R]; 1313286425Sdim UseC[R] = (C > D) ? C-D : 0; // doz 1314286425Sdim } 1315286425Sdim 1316286425Sdim bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; 1317286425Sdim if (!SelectAll0 && !SelectHas0) 1318286425Sdim SelectAll0 = true; 1319286425Sdim 1320286425Sdim // The smaller the number UseC for a given register R, the "less used" 1321286425Sdim // R is aside from the opportunities for removal offered by generating 1322286425Sdim // "insert" instructions. 1323286425Sdim // Iterate over the IF map, and for those registers that have multiple 1324286425Sdim // candidates, pick the minimum one according to IFOrdering. 1325286425Sdim IFOrdering IFO(UseC, BaseOrd); 1326286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1327286425Sdim IFListType &LL = I->second; 1328286425Sdim if (LL.empty()) 1329286425Sdim continue; 1330286425Sdim // Get the minimum element, remember it and clear the list. If the 1331286425Sdim // element found is adequate, we will put it back on the list, other- 1332286425Sdim // wise the list will remain empty, and the entry for this register 1333286425Sdim // will be removed (i.e. this register will not be replaced by insert). 1334286425Sdim IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); 1335286425Sdim assert(MinI != LL.end()); 1336286425Sdim IFRecordWithRegSet M = *MinI; 1337286425Sdim LL.clear(); 1338286425Sdim 1339286425Sdim // We want to make sure that this replacement will have a chance to be 1340286425Sdim // beneficial, and that means that we want to have indication that some 1341286425Sdim // register will be removed. The most likely registers to be eliminated 1342286425Sdim // are the use operands in the definition of I->first. Accept/reject a 1343286425Sdim // candidate based on how many of its uses it can potentially eliminate. 1344286425Sdim 1345286425Sdim RegisterSet Us; 1346286425Sdim const MachineInstr *DefI = MRI->getVRegDef(I->first); 1347286425Sdim getInstrUses(DefI, Us); 1348286425Sdim bool Accept = false; 1349286425Sdim 1350286425Sdim if (SelectAll0) { 1351286425Sdim bool All0 = true; 1352286425Sdim for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1353286425Sdim if (UseC[R] == 0) 1354286425Sdim continue; 1355286425Sdim All0 = false; 1356286425Sdim break; 1357286425Sdim } 1358286425Sdim Accept = All0; 1359286425Sdim } else if (SelectHas0) { 1360286425Sdim bool Has0 = false; 1361286425Sdim for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1362286425Sdim if (UseC[R] != 0) 1363286425Sdim continue; 1364286425Sdim Has0 = true; 1365286425Sdim break; 1366286425Sdim } 1367286425Sdim Accept = Has0; 1368286425Sdim } 1369286425Sdim if (Accept) 1370286425Sdim LL.push_back(M); 1371286425Sdim } 1372286425Sdim 1373286425Sdim // Remove candidates that add uses of removable registers, unless the 1374286425Sdim // removable registers are among replacement candidates. 1375286425Sdim // Recompute the removable registers, since some candidates may have 1376286425Sdim // been eliminated. 1377286425Sdim AllRMs.clear(); 1378286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1379286425Sdim const IFListType &LL = I->second; 1380314564Sdim if (!LL.empty()) 1381286425Sdim AllRMs.insert(LL[0].second); 1382286425Sdim } 1383286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1384286425Sdim IFListType &LL = I->second; 1385314564Sdim if (LL.empty()) 1386286425Sdim continue; 1387286425Sdim unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; 1388286425Sdim if (AllRMs[SR] || AllRMs[IR]) 1389286425Sdim LL.clear(); 1390286425Sdim } 1391286425Sdim 1392286425Sdim pruneEmptyLists(); 1393286425Sdim} 1394286425Sdim 1395286425Sdimbool HexagonGenInsert::generateInserts() { 1396286425Sdim // Create a new register for each one from IFMap, and store them in the 1397286425Sdim // map. 1398286425Sdim UnsignedMap RegMap; 1399286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1400286425Sdim unsigned VR = I->first; 1401286425Sdim const TargetRegisterClass *RC = MRI->getRegClass(VR); 1402286425Sdim unsigned NewVR = MRI->createVirtualRegister(RC); 1403286425Sdim RegMap[VR] = NewVR; 1404286425Sdim } 1405286425Sdim 1406286425Sdim // We can generate the "insert" instructions using potentially stale re- 1407286425Sdim // gisters: SrcR and InsR for a given VR may be among other registers that 1408286425Sdim // are also replaced. This is fine, we will do the mass "rauw" a bit later. 1409286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1410286425Sdim MachineInstr *MI = MRI->getVRegDef(I->first); 1411286425Sdim MachineBasicBlock &B = *MI->getParent(); 1412286425Sdim DebugLoc DL = MI->getDebugLoc(); 1413286425Sdim unsigned NewR = RegMap[I->first]; 1414286425Sdim bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; 1415286425Sdim const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) 1416286425Sdim : HII->get(Hexagon::S2_insertp); 1417286425Sdim IFRecord IF = I->second[0].first; 1418286425Sdim unsigned Wdh = IF.Wdh, Off = IF.Off; 1419286425Sdim unsigned InsS = 0; 1420286425Sdim if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { 1421314564Sdim InsS = Hexagon::isub_lo; 1422286425Sdim if (Off >= 32) { 1423314564Sdim InsS = Hexagon::isub_hi; 1424286425Sdim Off -= 32; 1425286425Sdim } 1426286425Sdim } 1427286425Sdim // Advance to the proper location for inserting instructions. This could 1428286425Sdim // be B.end(). 1429286425Sdim MachineBasicBlock::iterator At = MI; 1430286425Sdim if (MI->isPHI()) 1431286425Sdim At = B.getFirstNonPHI(); 1432286425Sdim 1433286425Sdim BuildMI(B, At, DL, D, NewR) 1434286425Sdim .addReg(IF.SrcR) 1435286425Sdim .addReg(IF.InsR, 0, InsS) 1436286425Sdim .addImm(Wdh) 1437286425Sdim .addImm(Off); 1438286425Sdim 1439286425Sdim MRI->clearKillFlags(IF.SrcR); 1440286425Sdim MRI->clearKillFlags(IF.InsR); 1441286425Sdim } 1442286425Sdim 1443286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1444286425Sdim MachineInstr *DefI = MRI->getVRegDef(I->first); 1445286425Sdim MRI->replaceRegWith(I->first, RegMap[I->first]); 1446286425Sdim DefI->eraseFromParent(); 1447286425Sdim } 1448286425Sdim 1449286425Sdim return true; 1450286425Sdim} 1451286425Sdim 1452286425Sdimbool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { 1453286425Sdim bool Changed = false; 1454286425Sdim 1455321369Sdim for (auto *DTN : children<MachineDomTreeNode*>(N)) 1456321369Sdim Changed |= removeDeadCode(DTN); 1457321369Sdim 1458286425Sdim MachineBasicBlock *B = N->getBlock(); 1459286425Sdim std::vector<MachineInstr*> Instrs; 1460286425Sdim for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) 1461286425Sdim Instrs.push_back(&*I); 1462286425Sdim 1463286425Sdim for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { 1464286425Sdim MachineInstr *MI = *I; 1465286425Sdim unsigned Opc = MI->getOpcode(); 1466286425Sdim // Do not touch lifetime markers. This is why the target-independent DCE 1467286425Sdim // cannot be used. 1468286425Sdim if (Opc == TargetOpcode::LIFETIME_START || 1469286425Sdim Opc == TargetOpcode::LIFETIME_END) 1470286425Sdim continue; 1471286425Sdim bool Store = false; 1472286425Sdim if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) 1473286425Sdim continue; 1474286425Sdim 1475286425Sdim bool AllDead = true; 1476286425Sdim SmallVector<unsigned,2> Regs; 1477314564Sdim for (const MachineOperand &MO : MI->operands()) { 1478314564Sdim if (!MO.isReg() || !MO.isDef()) 1479286425Sdim continue; 1480314564Sdim unsigned R = MO.getReg(); 1481286425Sdim if (!TargetRegisterInfo::isVirtualRegister(R) || 1482286425Sdim !MRI->use_nodbg_empty(R)) { 1483286425Sdim AllDead = false; 1484286425Sdim break; 1485286425Sdim } 1486286425Sdim Regs.push_back(R); 1487286425Sdim } 1488286425Sdim if (!AllDead) 1489286425Sdim continue; 1490286425Sdim 1491286425Sdim B->erase(MI); 1492286425Sdim for (unsigned I = 0, N = Regs.size(); I != N; ++I) 1493286425Sdim MRI->markUsesInDebugValueAsUndef(Regs[I]); 1494286425Sdim Changed = true; 1495286425Sdim } 1496286425Sdim 1497286425Sdim return Changed; 1498286425Sdim} 1499286425Sdim 1500286425Sdimbool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { 1501327952Sdim if (skipFunction(MF.getFunction())) 1502309124Sdim return false; 1503309124Sdim 1504286425Sdim bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; 1505286425Sdim bool Changed = false; 1506286425Sdim 1507286425Sdim // Sanity check: one, but not both. 1508286425Sdim assert(!OptSelectAll0 || !OptSelectHas0); 1509286425Sdim 1510286425Sdim IFMap.clear(); 1511286425Sdim BaseOrd.clear(); 1512286425Sdim CellOrd.clear(); 1513286425Sdim 1514286425Sdim const auto &ST = MF.getSubtarget<HexagonSubtarget>(); 1515286425Sdim HII = ST.getInstrInfo(); 1516286425Sdim HRI = ST.getRegisterInfo(); 1517286425Sdim MFN = &MF; 1518286425Sdim MRI = &MF.getRegInfo(); 1519286425Sdim MDT = &getAnalysis<MachineDominatorTree>(); 1520286425Sdim 1521286425Sdim // Clean up before any further processing, so that dead code does not 1522286425Sdim // get used in a newly generated "insert" instruction. Have a custom 1523286425Sdim // version of DCE that preserves lifetime markers. Without it, merging 1524286425Sdim // of stack objects can fail to recognize and merge disjoint objects 1525286425Sdim // leading to unnecessary stack growth. 1526296417Sdim Changed = removeDeadCode(MDT->getRootNode()); 1527286425Sdim 1528286425Sdim const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 1529286425Sdim BitTracker BTLoc(HE, MF); 1530286425Sdim BTLoc.trace(isDebug()); 1531286425Sdim BTLoc.run(); 1532286425Sdim CellMapShadow MS(BTLoc); 1533286425Sdim CMS = &MS; 1534286425Sdim 1535286425Sdim buildOrderingMF(BaseOrd); 1536286425Sdim buildOrderingBT(BaseOrd, CellOrd); 1537286425Sdim 1538286425Sdim if (isDebug()) { 1539286425Sdim dbgs() << "Cell ordering:\n"; 1540286425Sdim for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); 1541286425Sdim I != E; ++I) { 1542286425Sdim unsigned VR = I->first, Pos = I->second; 1543327952Sdim dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n"; 1544286425Sdim } 1545286425Sdim } 1546286425Sdim 1547286425Sdim // Collect candidates for conversion into the insert forms. 1548286425Sdim MachineBasicBlock *RootB = MDT->getRoot(); 1549286425Sdim OrderedRegisterList AvailR(CellOrd); 1550286425Sdim 1551314564Sdim const char *const TGName = "hexinsert"; 1552314564Sdim const char *const TGDesc = "Generate Insert Instructions"; 1553314564Sdim 1554286425Sdim { 1555314564Sdim NamedRegionTimer _T("collection", "collection", TGName, TGDesc, 1556314564Sdim TimingDetail); 1557286425Sdim collectInBlock(RootB, AvailR); 1558286425Sdim // Complete the information gathered in IFMap. 1559286425Sdim computeRemovableRegisters(); 1560286425Sdim } 1561286425Sdim 1562286425Sdim if (isDebug()) { 1563286425Sdim dbgs() << "Candidates after collection:\n"; 1564286425Sdim dump_map(); 1565286425Sdim } 1566286425Sdim 1567286425Sdim if (IFMap.empty()) 1568296417Sdim return Changed; 1569286425Sdim 1570286425Sdim { 1571314564Sdim NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail); 1572286425Sdim pruneCandidates(); 1573286425Sdim } 1574286425Sdim 1575286425Sdim if (isDebug()) { 1576286425Sdim dbgs() << "Candidates after pruning:\n"; 1577286425Sdim dump_map(); 1578286425Sdim } 1579286425Sdim 1580286425Sdim if (IFMap.empty()) 1581296417Sdim return Changed; 1582286425Sdim 1583286425Sdim { 1584314564Sdim NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail); 1585286425Sdim selectCandidates(); 1586286425Sdim } 1587286425Sdim 1588286425Sdim if (isDebug()) { 1589286425Sdim dbgs() << "Candidates after selection:\n"; 1590286425Sdim dump_map(); 1591286425Sdim } 1592286425Sdim 1593286425Sdim // Filter out vregs beyond the cutoff. 1594286425Sdim if (VRegIndexCutoff.getPosition()) { 1595286425Sdim unsigned Cutoff = VRegIndexCutoff; 1596327952Sdim 1597327952Sdim using IterListType = SmallVector<IFMapType::iterator, 16>; 1598327952Sdim 1599286425Sdim IterListType Out; 1600286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1601286425Sdim unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); 1602286425Sdim if (Idx >= Cutoff) 1603286425Sdim Out.push_back(I); 1604286425Sdim } 1605286425Sdim for (unsigned i = 0, n = Out.size(); i < n; ++i) 1606286425Sdim IFMap.erase(Out[i]); 1607286425Sdim } 1608296417Sdim if (IFMap.empty()) 1609296417Sdim return Changed; 1610286425Sdim 1611286425Sdim { 1612314564Sdim NamedRegionTimer _T("generation", "generation", TGName, TGDesc, 1613314564Sdim TimingDetail); 1614296417Sdim generateInserts(); 1615286425Sdim } 1616286425Sdim 1617296417Sdim return true; 1618286425Sdim} 1619286425Sdim 1620286425SdimFunctionPass *llvm::createHexagonGenInsert() { 1621286425Sdim return new HexagonGenInsert(); 1622286425Sdim} 1623286425Sdim 1624286425Sdim//===----------------------------------------------------------------------===// 1625286425Sdim// Public Constructor Functions 1626286425Sdim//===----------------------------------------------------------------------===// 1627286425Sdim 1628286425SdimINITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", 1629286425Sdim "Hexagon generate \"insert\" instructions", false, false) 1630286425SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1631286425SdimINITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", 1632286425Sdim "Hexagon generate \"insert\" instructions", false, false) 1633