1327952Sdim//===- HexagonGenInsert.cpp -----------------------------------------------===// 2286425Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6286425Sdim// 7286425Sdim//===----------------------------------------------------------------------===// 8286425Sdim 9314564Sdim#include "BitTracker.h" 10314564Sdim#include "HexagonBitTracker.h" 11314564Sdim#include "HexagonInstrInfo.h" 12314564Sdim#include "HexagonRegisterInfo.h" 13314564Sdim#include "HexagonSubtarget.h" 14286425Sdim#include "llvm/ADT/BitVector.h" 15286425Sdim#include "llvm/ADT/DenseMap.h" 16327952Sdim#include "llvm/ADT/GraphTraits.h" 17286425Sdim#include "llvm/ADT/PostOrderIterator.h" 18321369Sdim#include "llvm/ADT/STLExtras.h" 19314564Sdim#include "llvm/ADT/SmallSet.h" 20314564Sdim#include "llvm/ADT/SmallVector.h" 21314564Sdim#include "llvm/ADT/StringRef.h" 22314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 23286425Sdim#include "llvm/CodeGen/MachineDominators.h" 24286425Sdim#include "llvm/CodeGen/MachineFunction.h" 25286425Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 26314564Sdim#include "llvm/CodeGen/MachineInstr.h" 27286425Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 28314564Sdim#include "llvm/CodeGen/MachineOperand.h" 29286425Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 30327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 31314564Sdim#include "llvm/IR/DebugLoc.h" 32360784Sdim#include "llvm/InitializePasses.h" 33309124Sdim#include "llvm/Pass.h" 34286425Sdim#include "llvm/Support/CommandLine.h" 35286425Sdim#include "llvm/Support/Debug.h" 36314564Sdim#include "llvm/Support/MathExtras.h" 37321369Sdim#include "llvm/Support/Timer.h" 38314564Sdim#include "llvm/Support/raw_ostream.h" 39314564Sdim#include <algorithm> 40314564Sdim#include <cassert> 41314564Sdim#include <cstdint> 42314564Sdim#include <iterator> 43314564Sdim#include <utility> 44286425Sdim#include <vector> 45286425Sdim 46321369Sdim#define DEBUG_TYPE "hexinsert" 47321369Sdim 48286425Sdimusing namespace llvm; 49286425Sdim 50286425Sdimstatic cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), 51286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); 52286425Sdim// The distance cutoff is selected based on the precheckin-perf results: 53286425Sdim// cutoffs 20, 25, 35, and 40 are worse than 30. 54286425Sdimstatic cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), 55286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " 56286425Sdim "generation.")); 57286425Sdim 58341825Sdim// Limit the container sizes for extreme cases where we run out of memory. 59341825Sdimstatic cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096), 60341825Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList")); 61341825Sdimstatic cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024), 62341825Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap")); 63341825Sdim 64286425Sdimstatic cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, 65286425Sdim cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); 66286425Sdimstatic cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), 67286425Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " 68286425Sdim "generation")); 69286425Sdim 70286425Sdimstatic cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, 71286425Sdim cl::ZeroOrMore); 72286425Sdimstatic cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, 73286425Sdim cl::ZeroOrMore); 74286425Sdim// Whether to construct constant values via "insert". Could eliminate constant 75286425Sdim// extenders, but often not practical. 76286425Sdimstatic cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, 77286425Sdim cl::ZeroOrMore); 78286425Sdim 79314564Sdim// The preprocessor gets confused when the DEBUG macro is passed larger 80314564Sdim// chunks of code. Use this function to detect debugging. 81314564Sdiminline static bool isDebug() { 82286425Sdim#ifndef NDEBUG 83314564Sdim return DebugFlag && isCurrentDebugType(DEBUG_TYPE); 84286425Sdim#else 85314564Sdim return false; 86286425Sdim#endif 87286425Sdim} 88286425Sdim 89314564Sdimnamespace { 90286425Sdim 91286425Sdim // Set of virtual registers, based on BitVector. 92286425Sdim struct RegisterSet : private BitVector { 93296417Sdim RegisterSet() = default; 94286425Sdim explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 95341825Sdim RegisterSet(const RegisterSet &RS) : BitVector(RS) {} 96360784Sdim RegisterSet &operator=(const RegisterSet &RS) { 97360784Sdim BitVector::operator=(RS); 98360784Sdim return *this; 99360784Sdim } 100286425Sdim 101286425Sdim using BitVector::clear; 102286425Sdim 103286425Sdim unsigned find_first() const { 104286425Sdim int First = BitVector::find_first(); 105286425Sdim if (First < 0) 106286425Sdim return 0; 107286425Sdim return x2v(First); 108286425Sdim } 109286425Sdim 110286425Sdim unsigned find_next(unsigned Prev) const { 111286425Sdim int Next = BitVector::find_next(v2x(Prev)); 112286425Sdim if (Next < 0) 113286425Sdim return 0; 114286425Sdim return x2v(Next); 115286425Sdim } 116286425Sdim 117286425Sdim RegisterSet &insert(unsigned R) { 118286425Sdim unsigned Idx = v2x(R); 119286425Sdim ensure(Idx); 120286425Sdim return static_cast<RegisterSet&>(BitVector::set(Idx)); 121286425Sdim } 122286425Sdim RegisterSet &remove(unsigned R) { 123286425Sdim unsigned Idx = v2x(R); 124286425Sdim if (Idx >= size()) 125286425Sdim return *this; 126286425Sdim return static_cast<RegisterSet&>(BitVector::reset(Idx)); 127286425Sdim } 128286425Sdim 129286425Sdim RegisterSet &insert(const RegisterSet &Rs) { 130286425Sdim return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 131286425Sdim } 132286425Sdim RegisterSet &remove(const RegisterSet &Rs) { 133286425Sdim return static_cast<RegisterSet&>(BitVector::reset(Rs)); 134286425Sdim } 135286425Sdim 136286425Sdim reference operator[](unsigned R) { 137286425Sdim unsigned Idx = v2x(R); 138286425Sdim ensure(Idx); 139286425Sdim return BitVector::operator[](Idx); 140286425Sdim } 141286425Sdim bool operator[](unsigned R) const { 142286425Sdim unsigned Idx = v2x(R); 143286425Sdim assert(Idx < size()); 144286425Sdim return BitVector::operator[](Idx); 145286425Sdim } 146286425Sdim bool has(unsigned R) const { 147286425Sdim unsigned Idx = v2x(R); 148286425Sdim if (Idx >= size()) 149286425Sdim return false; 150286425Sdim return BitVector::test(Idx); 151286425Sdim } 152286425Sdim 153286425Sdim bool empty() const { 154286425Sdim return !BitVector::any(); 155286425Sdim } 156286425Sdim bool includes(const RegisterSet &Rs) const { 157286425Sdim // A.BitVector::test(B) <=> A-B != {} 158286425Sdim return !Rs.BitVector::test(*this); 159286425Sdim } 160286425Sdim bool intersects(const RegisterSet &Rs) const { 161286425Sdim return BitVector::anyCommon(Rs); 162286425Sdim } 163286425Sdim 164286425Sdim private: 165286425Sdim void ensure(unsigned Idx) { 166286425Sdim if (size() <= Idx) 167286425Sdim resize(std::max(Idx+1, 32U)); 168286425Sdim } 169314564Sdim 170286425Sdim static inline unsigned v2x(unsigned v) { 171360784Sdim return Register::virtReg2Index(v); 172286425Sdim } 173314564Sdim 174286425Sdim static inline unsigned x2v(unsigned x) { 175360784Sdim return Register::index2VirtReg(x); 176286425Sdim } 177286425Sdim }; 178286425Sdim 179286425Sdim struct PrintRegSet { 180286425Sdim PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 181286425Sdim : RS(S), TRI(RI) {} 182314564Sdim 183286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, 184286425Sdim const PrintRegSet &P); 185314564Sdim 186286425Sdim private: 187286425Sdim const RegisterSet &RS; 188286425Sdim const TargetRegisterInfo *TRI; 189286425Sdim }; 190286425Sdim 191286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 192286425Sdim OS << '{'; 193286425Sdim for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 194327952Sdim OS << ' ' << printReg(R, P.TRI); 195286425Sdim OS << " }"; 196286425Sdim return OS; 197286425Sdim } 198286425Sdim 199286425Sdim // A convenience class to associate unsigned numbers (such as virtual 200286425Sdim // registers) with unsigned numbers. 201286425Sdim struct UnsignedMap : public DenseMap<unsigned,unsigned> { 202314564Sdim UnsignedMap() = default; 203314564Sdim 204286425Sdim private: 205327952Sdim using BaseType = DenseMap<unsigned, unsigned>; 206286425Sdim }; 207286425Sdim 208286425Sdim // A utility to establish an ordering between virtual registers: 209286425Sdim // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] 210286425Sdim // This is meant as a cache for the ordering of virtual registers defined 211286425Sdim // by a potentially expensive comparison function, or obtained by a proce- 212286425Sdim // dure that should not be repeated each time two registers are compared. 213286425Sdim struct RegisterOrdering : public UnsignedMap { 214314564Sdim RegisterOrdering() = default; 215314564Sdim 216286425Sdim unsigned operator[](unsigned VR) const { 217286425Sdim const_iterator F = find(VR); 218286425Sdim assert(F != end()); 219286425Sdim return F->second; 220286425Sdim } 221314564Sdim 222286425Sdim // Add operator(), so that objects of this class can be used as 223286425Sdim // comparators in std::sort et al. 224286425Sdim bool operator() (unsigned VR1, unsigned VR2) const { 225286425Sdim return operator[](VR1) < operator[](VR2); 226286425Sdim } 227286425Sdim }; 228286425Sdim 229286425Sdim // Ordering of bit values. This class does not have operator[], but 230286425Sdim // is supplies a comparison operator() for use in std:: algorithms. 231286425Sdim // The order is as follows: 232286425Sdim // - 0 < 1 < ref 233286425Sdim // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), 234286425Sdim // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. 235286425Sdim struct BitValueOrdering { 236286425Sdim BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} 237314564Sdim 238286425Sdim bool operator() (const BitTracker::BitValue &V1, 239286425Sdim const BitTracker::BitValue &V2) const; 240314564Sdim 241286425Sdim const RegisterOrdering &BaseOrd; 242286425Sdim }; 243286425Sdim 244314564Sdim} // end anonymous namespace 245286425Sdim 246286425Sdimbool BitValueOrdering::operator() (const BitTracker::BitValue &V1, 247286425Sdim const BitTracker::BitValue &V2) const { 248286425Sdim if (V1 == V2) 249286425Sdim return false; 250286425Sdim // V1==0 => true, V2==0 => false 251286425Sdim if (V1.is(0) || V2.is(0)) 252286425Sdim return V1.is(0); 253286425Sdim // Neither of V1,V2 is 0, and V1!=V2. 254286425Sdim // V2==1 => false, V1==1 => true 255286425Sdim if (V2.is(1) || V1.is(1)) 256286425Sdim return !V2.is(1); 257286425Sdim // Both V1,V2 are refs. 258286425Sdim unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; 259286425Sdim if (Ind1 != Ind2) 260286425Sdim return Ind1 < Ind2; 261286425Sdim // If V1.Pos==V2.Pos 262286425Sdim assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); 263286425Sdim return V1.RefI.Pos < V2.RefI.Pos; 264286425Sdim} 265286425Sdim 266314564Sdimnamespace { 267286425Sdim 268286425Sdim // Cache for the BitTracker's cell map. Map lookup has a logarithmic 269286425Sdim // complexity, this class will memoize the lookup results to reduce 270286425Sdim // the access time for repeated lookups of the same cell. 271286425Sdim struct CellMapShadow { 272286425Sdim CellMapShadow(const BitTracker &T) : BT(T) {} 273314564Sdim 274286425Sdim const BitTracker::RegisterCell &lookup(unsigned VR) { 275360784Sdim unsigned RInd = Register::virtReg2Index(VR); 276286425Sdim // Grow the vector to at least 32 elements. 277286425Sdim if (RInd >= CVect.size()) 278314564Sdim CVect.resize(std::max(RInd+16, 32U), nullptr); 279286425Sdim const BitTracker::RegisterCell *CP = CVect[RInd]; 280314564Sdim if (CP == nullptr) 281286425Sdim CP = CVect[RInd] = &BT.lookup(VR); 282286425Sdim return *CP; 283286425Sdim } 284286425Sdim 285286425Sdim const BitTracker &BT; 286286425Sdim 287286425Sdim private: 288327952Sdim using CellVectType = std::vector<const BitTracker::RegisterCell *>; 289327952Sdim 290286425Sdim CellVectType CVect; 291286425Sdim }; 292286425Sdim 293286425Sdim // Comparator class for lexicographic ordering of virtual registers 294286425Sdim // according to the corresponding BitTracker::RegisterCell objects. 295286425Sdim struct RegisterCellLexCompare { 296286425Sdim RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) 297286425Sdim : BitOrd(BO), CM(M) {} 298314564Sdim 299286425Sdim bool operator() (unsigned VR1, unsigned VR2) const; 300314564Sdim 301286425Sdim private: 302286425Sdim const BitValueOrdering &BitOrd; 303286425Sdim CellMapShadow &CM; 304286425Sdim }; 305286425Sdim 306286425Sdim // Comparator class for lexicographic ordering of virtual registers 307286425Sdim // according to the specified bits of the corresponding BitTracker:: 308286425Sdim // RegisterCell objects. 309286425Sdim // Specifically, this class will be used to compare bit B of a register 310286425Sdim // cell for a selected virtual register R with bit N of any register 311286425Sdim // other than R. 312286425Sdim struct RegisterCellBitCompareSel { 313286425Sdim RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, 314286425Sdim const BitValueOrdering &BO, CellMapShadow &M) 315286425Sdim : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} 316314564Sdim 317286425Sdim bool operator() (unsigned VR1, unsigned VR2) const; 318314564Sdim 319286425Sdim private: 320286425Sdim const unsigned SelR, SelB; 321286425Sdim const unsigned BitN; 322286425Sdim const BitValueOrdering &BitOrd; 323286425Sdim CellMapShadow &CM; 324286425Sdim }; 325286425Sdim 326314564Sdim} // end anonymous namespace 327286425Sdim 328286425Sdimbool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { 329286425Sdim // Ordering of registers, made up from two given orderings: 330286425Sdim // - the ordering of the register numbers, and 331286425Sdim // - the ordering of register cells. 332286425Sdim // Def. R1 < R2 if: 333286425Sdim // - cell(R1) < cell(R2), or 334286425Sdim // - cell(R1) == cell(R2), and index(R1) < index(R2). 335286425Sdim // 336286425Sdim // For register cells, the ordering is lexicographic, with index 0 being 337286425Sdim // the most significant. 338286425Sdim if (VR1 == VR2) 339286425Sdim return false; 340286425Sdim 341286425Sdim const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); 342286425Sdim uint16_t W1 = RC1.width(), W2 = RC2.width(); 343286425Sdim for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { 344286425Sdim const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; 345286425Sdim if (V1 != V2) 346286425Sdim return BitOrd(V1, V2); 347286425Sdim } 348286425Sdim // Cells are equal up until the common length. 349286425Sdim if (W1 != W2) 350286425Sdim return W1 < W2; 351286425Sdim 352286425Sdim return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; 353286425Sdim} 354286425Sdim 355286425Sdimbool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { 356286425Sdim if (VR1 == VR2) 357286425Sdim return false; 358286425Sdim const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); 359286425Sdim const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); 360286425Sdim uint16_t W1 = RC1.width(), W2 = RC2.width(); 361286425Sdim uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; 362286425Sdim uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; 363286425Sdim // If Bit1 exceeds the width of VR1, then: 364286425Sdim // - return false, if at the same time Bit2 exceeds VR2, or 365286425Sdim // - return true, otherwise. 366286425Sdim // (I.e. "a bit value that does not exist is less than any bit value 367286425Sdim // that does exist".) 368286425Sdim if (W1 <= Bit1) 369286425Sdim return Bit2 < W2; 370286425Sdim // If Bit1 is within VR1, but Bit2 is not within VR2, return false. 371286425Sdim if (W2 <= Bit2) 372286425Sdim return false; 373286425Sdim 374286425Sdim const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; 375286425Sdim if (V1 != V2) 376286425Sdim return BitOrd(V1, V2); 377286425Sdim return false; 378286425Sdim} 379286425Sdim 380314564Sdimnamespace { 381286425Sdim 382286425Sdim class OrderedRegisterList { 383327952Sdim using ListType = std::vector<unsigned>; 384341825Sdim const unsigned MaxSize; 385314564Sdim 386286425Sdim public: 387341825Sdim OrderedRegisterList(const RegisterOrdering &RO) 388341825Sdim : MaxSize(MaxORLSize), Ord(RO) {} 389314564Sdim 390286425Sdim void insert(unsigned VR); 391286425Sdim void remove(unsigned VR); 392314564Sdim 393286425Sdim unsigned operator[](unsigned Idx) const { 394286425Sdim assert(Idx < Seq.size()); 395286425Sdim return Seq[Idx]; 396286425Sdim } 397314564Sdim 398286425Sdim unsigned size() const { 399286425Sdim return Seq.size(); 400286425Sdim } 401286425Sdim 402327952Sdim using iterator = ListType::iterator; 403327952Sdim using const_iterator = ListType::const_iterator; 404327952Sdim 405286425Sdim iterator begin() { return Seq.begin(); } 406286425Sdim iterator end() { return Seq.end(); } 407286425Sdim const_iterator begin() const { return Seq.begin(); } 408286425Sdim const_iterator end() const { return Seq.end(); } 409286425Sdim 410286425Sdim // Convenience function to convert an iterator to the corresponding index. 411286425Sdim unsigned idx(iterator It) const { return It-begin(); } 412314564Sdim 413286425Sdim private: 414286425Sdim ListType Seq; 415286425Sdim const RegisterOrdering &Ord; 416286425Sdim }; 417286425Sdim 418286425Sdim struct PrintORL { 419286425Sdim PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) 420286425Sdim : RL(L), TRI(RI) {} 421314564Sdim 422286425Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); 423314564Sdim 424286425Sdim private: 425286425Sdim const OrderedRegisterList &RL; 426286425Sdim const TargetRegisterInfo *TRI; 427286425Sdim }; 428286425Sdim 429286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { 430286425Sdim OS << '('; 431286425Sdim OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); 432286425Sdim for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { 433286425Sdim if (I != B) 434286425Sdim OS << ", "; 435327952Sdim OS << printReg(*I, P.TRI); 436286425Sdim } 437286425Sdim OS << ')'; 438286425Sdim return OS; 439286425Sdim } 440286425Sdim 441314564Sdim} // end anonymous namespace 442286425Sdim 443286425Sdimvoid OrderedRegisterList::insert(unsigned VR) { 444353358Sdim iterator L = llvm::lower_bound(Seq, VR, Ord); 445286425Sdim if (L == Seq.end()) 446286425Sdim Seq.push_back(VR); 447286425Sdim else 448286425Sdim Seq.insert(L, VR); 449341825Sdim 450341825Sdim unsigned S = Seq.size(); 451341825Sdim if (S > MaxSize) 452341825Sdim Seq.resize(MaxSize); 453341825Sdim assert(Seq.size() <= MaxSize); 454286425Sdim} 455286425Sdim 456286425Sdimvoid OrderedRegisterList::remove(unsigned VR) { 457353358Sdim iterator L = llvm::lower_bound(Seq, VR, Ord); 458341825Sdim if (L != Seq.end()) 459341825Sdim Seq.erase(L); 460286425Sdim} 461286425Sdim 462314564Sdimnamespace { 463286425Sdim 464286425Sdim // A record of the insert form. The fields correspond to the operands 465286425Sdim // of the "insert" instruction: 466286425Sdim // ... = insert(SrcR, InsR, #Wdh, #Off) 467286425Sdim struct IFRecord { 468286425Sdim IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) 469286425Sdim : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} 470314564Sdim 471286425Sdim unsigned SrcR, InsR; 472286425Sdim uint16_t Wdh, Off; 473286425Sdim }; 474286425Sdim 475286425Sdim struct PrintIFR { 476286425Sdim PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) 477286425Sdim : IFR(R), TRI(RI) {} 478314564Sdim 479286425Sdim private: 480314564Sdim friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); 481314564Sdim 482286425Sdim const IFRecord &IFR; 483286425Sdim const TargetRegisterInfo *TRI; 484286425Sdim }; 485286425Sdim 486286425Sdim raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { 487286425Sdim unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; 488327952Sdim OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI) 489286425Sdim << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; 490286425Sdim return OS; 491286425Sdim } 492286425Sdim 493327952Sdim using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>; 494286425Sdim 495314564Sdim} // end anonymous namespace 496286425Sdim 497286425Sdimnamespace llvm { 498314564Sdim 499286425Sdim void initializeHexagonGenInsertPass(PassRegistry&); 500286425Sdim FunctionPass *createHexagonGenInsert(); 501286425Sdim 502314564Sdim} // end namespace llvm 503286425Sdim 504286425Sdimnamespace { 505314564Sdim 506286425Sdim class HexagonGenInsert : public MachineFunctionPass { 507286425Sdim public: 508286425Sdim static char ID; 509314564Sdim 510327952Sdim HexagonGenInsert() : MachineFunctionPass(ID) { 511286425Sdim initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); 512286425Sdim } 513314564Sdim 514314564Sdim StringRef getPassName() const override { 515286425Sdim return "Hexagon generate \"insert\" instructions"; 516286425Sdim } 517314564Sdim 518314564Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 519286425Sdim AU.addRequired<MachineDominatorTree>(); 520286425Sdim AU.addPreserved<MachineDominatorTree>(); 521286425Sdim MachineFunctionPass::getAnalysisUsage(AU); 522286425Sdim } 523286425Sdim 524314564Sdim bool runOnMachineFunction(MachineFunction &MF) override; 525314564Sdim 526286425Sdim private: 527327952Sdim using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>; 528286425Sdim 529286425Sdim void buildOrderingMF(RegisterOrdering &RO) const; 530286425Sdim void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; 531286425Sdim bool isIntClass(const TargetRegisterClass *RC) const; 532286425Sdim bool isConstant(unsigned VR) const; 533286425Sdim bool isSmallConstant(unsigned VR) const; 534286425Sdim bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, 535286425Sdim uint16_t L, uint16_t S) const; 536286425Sdim bool findSelfReference(unsigned VR) const; 537286425Sdim bool findNonSelfReference(unsigned VR) const; 538286425Sdim void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; 539286425Sdim void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; 540286425Sdim unsigned distance(const MachineBasicBlock *FromB, 541286425Sdim const MachineBasicBlock *ToB, const UnsignedMap &RPO, 542286425Sdim PairMapType &M) const; 543286425Sdim unsigned distance(MachineBasicBlock::const_iterator FromI, 544286425Sdim MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 545286425Sdim PairMapType &M) const; 546286425Sdim bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); 547286425Sdim void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); 548286425Sdim void findRemovableRegisters(unsigned VR, IFRecord IF, 549286425Sdim RegisterSet &RMs) const; 550286425Sdim void computeRemovableRegisters(); 551286425Sdim 552286425Sdim void pruneEmptyLists(); 553286425Sdim void pruneCoveredSets(unsigned VR); 554286425Sdim void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); 555286425Sdim void pruneRegCopies(unsigned VR); 556286425Sdim void pruneCandidates(); 557286425Sdim void selectCandidates(); 558286425Sdim bool generateInserts(); 559286425Sdim 560286425Sdim bool removeDeadCode(MachineDomTreeNode *N); 561286425Sdim 562286425Sdim // IFRecord coupled with a set of potentially removable registers: 563327952Sdim using IFListType = std::vector<IFRecordWithRegSet>; 564327952Sdim using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType 565286425Sdim 566286425Sdim void dump_map() const; 567286425Sdim 568327952Sdim const HexagonInstrInfo *HII = nullptr; 569327952Sdim const HexagonRegisterInfo *HRI = nullptr; 570286425Sdim 571286425Sdim MachineFunction *MFN; 572286425Sdim MachineRegisterInfo *MRI; 573286425Sdim MachineDominatorTree *MDT; 574286425Sdim CellMapShadow *CMS; 575286425Sdim 576286425Sdim RegisterOrdering BaseOrd; 577286425Sdim RegisterOrdering CellOrd; 578286425Sdim IFMapType IFMap; 579286425Sdim }; 580286425Sdim 581314564Sdim} // end anonymous namespace 582286425Sdim 583327952Sdimchar HexagonGenInsert::ID = 0; 584327952Sdim 585286425Sdimvoid HexagonGenInsert::dump_map() const { 586327952Sdim using iterator = IFMapType::const_iterator; 587327952Sdim 588286425Sdim for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 589327952Sdim dbgs() << " " << printReg(I->first, HRI) << ":\n"; 590286425Sdim const IFListType &LL = I->second; 591286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 592286425Sdim dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " 593286425Sdim << PrintRegSet(LL[i].second, HRI) << '\n'; 594286425Sdim } 595286425Sdim} 596286425Sdim 597286425Sdimvoid HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { 598286425Sdim unsigned Index = 0; 599327952Sdim 600327952Sdim using mf_iterator = MachineFunction::const_iterator; 601327952Sdim 602286425Sdim for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { 603286425Sdim const MachineBasicBlock &B = *A; 604286425Sdim if (!CMS->BT.reached(&B)) 605286425Sdim continue; 606327952Sdim 607327952Sdim using mb_iterator = MachineBasicBlock::const_iterator; 608327952Sdim 609286425Sdim for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { 610286425Sdim const MachineInstr *MI = &*I; 611286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 612286425Sdim const MachineOperand &MO = MI->getOperand(i); 613286425Sdim if (MO.isReg() && MO.isDef()) { 614360784Sdim Register R = MO.getReg(); 615286425Sdim assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); 616360784Sdim if (Register::isVirtualRegister(R)) 617286425Sdim RO.insert(std::make_pair(R, Index++)); 618286425Sdim } 619286425Sdim } 620286425Sdim } 621286425Sdim } 622286425Sdim // Since some virtual registers may have had their def and uses eliminated, 623286425Sdim // they are no longer referenced in the code, and so they will not appear 624286425Sdim // in the map. 625286425Sdim} 626286425Sdim 627286425Sdimvoid HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, 628286425Sdim RegisterOrdering &RO) const { 629286425Sdim // Create a vector of all virtual registers (collect them from the base 630286425Sdim // ordering RB), and then sort it using the RegisterCell comparator. 631286425Sdim BitValueOrdering BVO(RB); 632286425Sdim RegisterCellLexCompare LexCmp(BVO, *CMS); 633327952Sdim 634327952Sdim using SortableVectorType = std::vector<unsigned>; 635327952Sdim 636286425Sdim SortableVectorType VRs; 637286425Sdim for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) 638286425Sdim VRs.push_back(I->first); 639344779Sdim llvm::sort(VRs, LexCmp); 640286425Sdim // Transfer the results to the outgoing register ordering. 641286425Sdim for (unsigned i = 0, n = VRs.size(); i < n; ++i) 642286425Sdim RO.insert(std::make_pair(VRs[i], i)); 643286425Sdim} 644286425Sdim 645286425Sdiminline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { 646286425Sdim return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; 647286425Sdim} 648286425Sdim 649286425Sdimbool HexagonGenInsert::isConstant(unsigned VR) const { 650286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 651286425Sdim uint16_t W = RC.width(); 652286425Sdim for (uint16_t i = 0; i < W; ++i) { 653286425Sdim const BitTracker::BitValue &BV = RC[i]; 654286425Sdim if (BV.is(0) || BV.is(1)) 655286425Sdim continue; 656286425Sdim return false; 657286425Sdim } 658286425Sdim return true; 659286425Sdim} 660286425Sdim 661286425Sdimbool HexagonGenInsert::isSmallConstant(unsigned VR) const { 662286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 663286425Sdim uint16_t W = RC.width(); 664286425Sdim if (W > 64) 665286425Sdim return false; 666286425Sdim uint64_t V = 0, B = 1; 667286425Sdim for (uint16_t i = 0; i < W; ++i) { 668286425Sdim const BitTracker::BitValue &BV = RC[i]; 669286425Sdim if (BV.is(1)) 670286425Sdim V |= B; 671286425Sdim else if (!BV.is(0)) 672286425Sdim return false; 673286425Sdim B <<= 1; 674286425Sdim } 675286425Sdim 676286425Sdim // For 32-bit registers, consider: Rd = #s16. 677286425Sdim if (W == 32) 678286425Sdim return isInt<16>(V); 679286425Sdim 680286425Sdim // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) 681286425Sdim return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); 682286425Sdim} 683286425Sdim 684286425Sdimbool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, 685286425Sdim unsigned InsR, uint16_t L, uint16_t S) const { 686286425Sdim const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); 687286425Sdim const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); 688286425Sdim const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); 689286425Sdim // Only integet (32-/64-bit) register classes. 690286425Sdim if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) 691286425Sdim return false; 692286425Sdim // The "source" register must be of the same class as DstR. 693286425Sdim if (DstRC != SrcRC) 694286425Sdim return false; 695286425Sdim if (DstRC == InsRC) 696286425Sdim return true; 697286425Sdim // A 64-bit register can only be generated from other 64-bit registers. 698286425Sdim if (DstRC == &Hexagon::DoubleRegsRegClass) 699286425Sdim return false; 700286425Sdim // Otherwise, the L and S cannot span 32-bit word boundary. 701286425Sdim if (S < 32 && S+L > 32) 702286425Sdim return false; 703286425Sdim return true; 704286425Sdim} 705286425Sdim 706286425Sdimbool HexagonGenInsert::findSelfReference(unsigned VR) const { 707286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 708286425Sdim for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 709286425Sdim const BitTracker::BitValue &V = RC[i]; 710286425Sdim if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) 711286425Sdim return true; 712286425Sdim } 713286425Sdim return false; 714286425Sdim} 715286425Sdim 716286425Sdimbool HexagonGenInsert::findNonSelfReference(unsigned VR) const { 717286425Sdim BitTracker::RegisterCell RC = CMS->lookup(VR); 718286425Sdim for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 719286425Sdim const BitTracker::BitValue &V = RC[i]; 720286425Sdim if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) 721286425Sdim return true; 722286425Sdim } 723286425Sdim return false; 724286425Sdim} 725286425Sdim 726286425Sdimvoid HexagonGenInsert::getInstrDefs(const MachineInstr *MI, 727286425Sdim RegisterSet &Defs) const { 728286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 729286425Sdim const MachineOperand &MO = MI->getOperand(i); 730286425Sdim if (!MO.isReg() || !MO.isDef()) 731286425Sdim continue; 732360784Sdim Register R = MO.getReg(); 733360784Sdim if (!Register::isVirtualRegister(R)) 734286425Sdim continue; 735286425Sdim Defs.insert(R); 736286425Sdim } 737286425Sdim} 738286425Sdim 739286425Sdimvoid HexagonGenInsert::getInstrUses(const MachineInstr *MI, 740286425Sdim RegisterSet &Uses) const { 741286425Sdim for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 742286425Sdim const MachineOperand &MO = MI->getOperand(i); 743286425Sdim if (!MO.isReg() || !MO.isUse()) 744286425Sdim continue; 745360784Sdim Register R = MO.getReg(); 746360784Sdim if (!Register::isVirtualRegister(R)) 747286425Sdim continue; 748286425Sdim Uses.insert(R); 749286425Sdim } 750286425Sdim} 751286425Sdim 752286425Sdimunsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, 753286425Sdim const MachineBasicBlock *ToB, const UnsignedMap &RPO, 754286425Sdim PairMapType &M) const { 755286425Sdim // Forward distance from the end of a block to the beginning of it does 756286425Sdim // not make sense. This function should not be called with FromB == ToB. 757286425Sdim assert(FromB != ToB); 758286425Sdim 759286425Sdim unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); 760286425Sdim // If we have already computed it, return the cached result. 761286425Sdim PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); 762286425Sdim if (F != M.end()) 763286425Sdim return F->second; 764286425Sdim unsigned ToRPO = RPO.lookup(ToN); 765286425Sdim 766286425Sdim unsigned MaxD = 0; 767327952Sdim 768327952Sdim using pred_iterator = MachineBasicBlock::const_pred_iterator; 769327952Sdim 770286425Sdim for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) { 771286425Sdim const MachineBasicBlock *PB = *I; 772286425Sdim // Skip back edges. Also, if FromB is a predecessor of ToB, the distance 773286425Sdim // along that path will be 0, and we don't need to do any calculations 774286425Sdim // on it. 775286425Sdim if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) 776286425Sdim continue; 777286425Sdim unsigned D = PB->size() + distance(FromB, PB, RPO, M); 778286425Sdim if (D > MaxD) 779286425Sdim MaxD = D; 780286425Sdim } 781286425Sdim 782286425Sdim // Memoize the result for later lookup. 783286425Sdim M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); 784286425Sdim return MaxD; 785286425Sdim} 786286425Sdim 787286425Sdimunsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, 788286425Sdim MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 789286425Sdim PairMapType &M) const { 790286425Sdim const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); 791286425Sdim if (FB == TB) 792286425Sdim return std::distance(FromI, ToI); 793286425Sdim unsigned D1 = std::distance(TB->begin(), ToI); 794286425Sdim unsigned D2 = distance(FB, TB, RPO, M); 795286425Sdim unsigned D3 = std::distance(FromI, FB->end()); 796286425Sdim return D1+D2+D3; 797286425Sdim} 798286425Sdim 799286425Sdimbool HexagonGenInsert::findRecordInsertForms(unsigned VR, 800286425Sdim OrderedRegisterList &AVs) { 801286425Sdim if (isDebug()) { 802327952Sdim dbgs() << __func__ << ": " << printReg(VR, HRI) 803286425Sdim << " AVs: " << PrintORL(AVs, HRI) << "\n"; 804286425Sdim } 805286425Sdim if (AVs.size() == 0) 806286425Sdim return false; 807286425Sdim 808327952Sdim using iterator = OrderedRegisterList::iterator; 809327952Sdim 810286425Sdim BitValueOrdering BVO(BaseOrd); 811286425Sdim const BitTracker::RegisterCell &RC = CMS->lookup(VR); 812286425Sdim uint16_t W = RC.width(); 813286425Sdim 814327952Sdim using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift) 815327952Sdim using RSListType = std::vector<RSRecord>; 816286425Sdim // Have a map, with key being the matching prefix length, and the value 817286425Sdim // being the list of pairs (R,S), where R's prefix matches VR at S. 818286425Sdim // (DenseMap<uint16_t,RSListType> fails to instantiate.) 819327952Sdim using LRSMapType = DenseMap<unsigned, RSListType>; 820286425Sdim LRSMapType LM; 821286425Sdim 822286425Sdim // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, 823286425Sdim // and find matching prefixes from AVs with the rotated RC. Such a prefix 824286425Sdim // would match a string of bits (of length L) in RC starting at S. 825286425Sdim for (uint16_t S = 0; S < W; ++S) { 826286425Sdim iterator B = AVs.begin(), E = AVs.end(); 827286425Sdim // The registers in AVs are ordered according to the lexical order of 828286425Sdim // the corresponding register cells. This means that the range of regis- 829286425Sdim // ters in AVs that match a prefix of length L+1 will be contained in 830286425Sdim // the range that matches a prefix of length L. This means that we can 831286425Sdim // keep narrowing the search space as the prefix length goes up. This 832286425Sdim // helps reduce the overall complexity of the search. 833286425Sdim uint16_t L; 834286425Sdim for (L = 0; L < W-S; ++L) { 835286425Sdim // Compare against VR's bits starting at S, which emulates rotation 836286425Sdim // of VR by S. 837286425Sdim RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); 838286425Sdim iterator NewB = std::lower_bound(B, E, VR, RCB); 839286425Sdim iterator NewE = std::upper_bound(NewB, E, VR, RCB); 840286425Sdim // For the registers that are eliminated from the next range, L is 841286425Sdim // the longest prefix matching VR at position S (their prefixes 842286425Sdim // differ from VR at S+L). If L>0, record this information for later 843286425Sdim // use. 844286425Sdim if (L > 0) { 845286425Sdim for (iterator I = B; I != NewB; ++I) 846286425Sdim LM[L].push_back(std::make_pair(*I, S)); 847286425Sdim for (iterator I = NewE; I != E; ++I) 848286425Sdim LM[L].push_back(std::make_pair(*I, S)); 849286425Sdim } 850286425Sdim B = NewB, E = NewE; 851286425Sdim if (B == E) 852286425Sdim break; 853286425Sdim } 854286425Sdim // Record the final register range. If this range is non-empty, then 855286425Sdim // L=W-S. 856286425Sdim assert(B == E || L == W-S); 857286425Sdim if (B != E) { 858286425Sdim for (iterator I = B; I != E; ++I) 859286425Sdim LM[L].push_back(std::make_pair(*I, S)); 860286425Sdim // If B!=E, then we found a range of registers whose prefixes cover the 861286425Sdim // rest of VR from position S. There is no need to further advance S. 862286425Sdim break; 863286425Sdim } 864286425Sdim } 865286425Sdim 866286425Sdim if (isDebug()) { 867327952Sdim dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n"; 868286425Sdim for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { 869286425Sdim dbgs() << " L=" << I->first << ':'; 870286425Sdim const RSListType &LL = I->second; 871286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 872327952Sdim dbgs() << " (" << printReg(LL[i].first, HRI) << ",@" 873286425Sdim << LL[i].second << ')'; 874286425Sdim dbgs() << '\n'; 875286425Sdim } 876286425Sdim } 877286425Sdim 878286425Sdim bool Recorded = false; 879286425Sdim 880286425Sdim for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { 881286425Sdim unsigned SrcR = *I; 882286425Sdim int FDi = -1, LDi = -1; // First/last different bit. 883286425Sdim const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); 884286425Sdim uint16_t AW = AC.width(); 885286425Sdim for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { 886286425Sdim if (RC[i] == AC[i]) 887286425Sdim continue; 888286425Sdim if (FDi == -1) 889286425Sdim FDi = i; 890286425Sdim LDi = i; 891286425Sdim } 892286425Sdim if (FDi == -1) 893286425Sdim continue; // TODO (future): Record identical registers. 894286425Sdim // Look for a register whose prefix could patch the range [FD..LD] 895286425Sdim // where VR and SrcR differ. 896286425Sdim uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. 897286425Sdim uint16_t MinL = LD-FD+1; 898286425Sdim for (uint16_t L = MinL; L < W; ++L) { 899286425Sdim LRSMapType::iterator F = LM.find(L); 900286425Sdim if (F == LM.end()) 901286425Sdim continue; 902286425Sdim RSListType &LL = F->second; 903286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) { 904286425Sdim uint16_t S = LL[i].second; 905286425Sdim // MinL is the minimum length of the prefix. Any length above MinL 906286425Sdim // allows some flexibility as to where the prefix can start: 907286425Sdim // given the extra length EL=L-MinL, the prefix must start between 908286425Sdim // max(0,FD-EL) and FD. 909286425Sdim if (S > FD) // Starts too late. 910286425Sdim continue; 911286425Sdim uint16_t EL = L-MinL; 912286425Sdim uint16_t LowS = (EL < FD) ? FD-EL : 0; 913286425Sdim if (S < LowS) // Starts too early. 914286425Sdim continue; 915286425Sdim unsigned InsR = LL[i].first; 916286425Sdim if (!isValidInsertForm(VR, SrcR, InsR, L, S)) 917286425Sdim continue; 918286425Sdim if (isDebug()) { 919327952Sdim dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI) 920327952Sdim << ',' << printReg(InsR, HRI) << ",#" << L << ",#" 921286425Sdim << S << ")\n"; 922286425Sdim } 923286425Sdim IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); 924286425Sdim IFMap[VR].push_back(RR); 925286425Sdim Recorded = true; 926286425Sdim } 927286425Sdim } 928286425Sdim } 929286425Sdim 930286425Sdim return Recorded; 931286425Sdim} 932286425Sdim 933286425Sdimvoid HexagonGenInsert::collectInBlock(MachineBasicBlock *B, 934286425Sdim OrderedRegisterList &AVs) { 935286425Sdim if (isDebug()) 936327952Sdim dbgs() << "visiting block " << printMBBReference(*B) << "\n"; 937286425Sdim 938286425Sdim // First, check if this block is reachable at all. If not, the bit tracker 939286425Sdim // will not have any information about registers in it. 940286425Sdim if (!CMS->BT.reached(B)) 941286425Sdim return; 942286425Sdim 943286425Sdim bool DoConst = OptConst; 944286425Sdim // Keep a separate set of registers defined in this block, so that we 945286425Sdim // can remove them from the list of available registers once all DT 946286425Sdim // successors have been processed. 947286425Sdim RegisterSet BlockDefs, InsDefs; 948286425Sdim for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { 949286425Sdim MachineInstr *MI = &*I; 950286425Sdim InsDefs.clear(); 951286425Sdim getInstrDefs(MI, InsDefs); 952286425Sdim // Leave those alone. They are more transparent than "insert". 953286425Sdim bool Skip = MI->isCopy() || MI->isRegSequence(); 954286425Sdim 955286425Sdim if (!Skip) { 956286425Sdim // Visit all defined registers, and attempt to find the corresponding 957286425Sdim // "insert" representations. 958286425Sdim for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { 959286425Sdim // Do not collect registers that are known to be compile-time cons- 960286425Sdim // tants, unless requested. 961286425Sdim if (!DoConst && isConstant(VR)) 962286425Sdim continue; 963286425Sdim // If VR's cell contains a reference to VR, then VR cannot be defined 964286425Sdim // via "insert". If VR is a constant that can be generated in a single 965286425Sdim // instruction (without constant extenders), generating it via insert 966286425Sdim // makes no sense. 967286425Sdim if (findSelfReference(VR) || isSmallConstant(VR)) 968286425Sdim continue; 969286425Sdim 970286425Sdim findRecordInsertForms(VR, AVs); 971341825Sdim // Stop if the map size is too large. 972341825Sdim if (IFMap.size() > MaxIFMSize) 973341825Sdim return; 974286425Sdim } 975286425Sdim } 976286425Sdim 977286425Sdim // Insert the defined registers into the list of available registers 978286425Sdim // after they have been processed. 979286425Sdim for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) 980286425Sdim AVs.insert(VR); 981286425Sdim BlockDefs.insert(InsDefs); 982286425Sdim } 983286425Sdim 984321369Sdim for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) { 985321369Sdim MachineBasicBlock *SB = DTN->getBlock(); 986286425Sdim collectInBlock(SB, AVs); 987286425Sdim } 988286425Sdim 989286425Sdim for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) 990286425Sdim AVs.remove(VR); 991286425Sdim} 992286425Sdim 993286425Sdimvoid HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, 994286425Sdim RegisterSet &RMs) const { 995286425Sdim // For a given register VR and a insert form, find the registers that are 996286425Sdim // used by the current definition of VR, and which would no longer be 997286425Sdim // needed for it after the definition of VR is replaced with the insert 998286425Sdim // form. These are the registers that could potentially become dead. 999286425Sdim RegisterSet Regs[2]; 1000286425Sdim 1001286425Sdim unsigned S = 0; // Register set selector. 1002286425Sdim Regs[S].insert(VR); 1003286425Sdim 1004286425Sdim while (!Regs[S].empty()) { 1005286425Sdim // Breadth-first search. 1006286425Sdim unsigned OtherS = 1-S; 1007286425Sdim Regs[OtherS].clear(); 1008286425Sdim for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { 1009286425Sdim Regs[S].remove(R); 1010286425Sdim if (R == IF.SrcR || R == IF.InsR) 1011286425Sdim continue; 1012286425Sdim // Check if a given register has bits that are references to any other 1013286425Sdim // registers. This is to detect situations where the instruction that 1014286425Sdim // defines register R takes register Q as an operand, but R itself does 1015286425Sdim // not contain any bits from Q. Loads are examples of how this could 1016286425Sdim // happen: 1017286425Sdim // R = load Q 1018286425Sdim // In this case (assuming we do not have any knowledge about the loaded 1019286425Sdim // value), we must not treat R as a "conveyance" of the bits from Q. 1020286425Sdim // (The information in BT about R's bits would have them as constants, 1021286425Sdim // in case of zero-extending loads, or refs to R.) 1022286425Sdim if (!findNonSelfReference(R)) 1023286425Sdim continue; 1024286425Sdim RMs.insert(R); 1025286425Sdim const MachineInstr *DefI = MRI->getVRegDef(R); 1026286425Sdim assert(DefI); 1027286425Sdim // Do not iterate past PHI nodes to avoid infinite loops. This can 1028286425Sdim // make the final set a bit less accurate, but the removable register 1029286425Sdim // sets are an approximation anyway. 1030286425Sdim if (DefI->isPHI()) 1031286425Sdim continue; 1032286425Sdim getInstrUses(DefI, Regs[OtherS]); 1033286425Sdim } 1034286425Sdim S = OtherS; 1035286425Sdim } 1036286425Sdim // The register VR is added to the list as a side-effect of the algorithm, 1037286425Sdim // but it is not "potentially removable". A potentially removable register 1038286425Sdim // is one that may become unused (dead) after conversion to the insert form 1039286425Sdim // IF, and obviously VR (or its replacement) will not become dead by apply- 1040286425Sdim // ing IF. 1041286425Sdim RMs.remove(VR); 1042286425Sdim} 1043286425Sdim 1044286425Sdimvoid HexagonGenInsert::computeRemovableRegisters() { 1045286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1046286425Sdim IFListType &LL = I->second; 1047286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 1048286425Sdim findRemovableRegisters(I->first, LL[i].first, LL[i].second); 1049286425Sdim } 1050286425Sdim} 1051286425Sdim 1052286425Sdimvoid HexagonGenInsert::pruneEmptyLists() { 1053286425Sdim // Remove all entries from the map, where the register has no insert forms 1054286425Sdim // associated with it. 1055327952Sdim using IterListType = SmallVector<IFMapType::iterator, 16>; 1056286425Sdim IterListType Prune; 1057286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1058314564Sdim if (I->second.empty()) 1059286425Sdim Prune.push_back(I); 1060286425Sdim } 1061286425Sdim for (unsigned i = 0, n = Prune.size(); i < n; ++i) 1062286425Sdim IFMap.erase(Prune[i]); 1063286425Sdim} 1064286425Sdim 1065286425Sdimvoid HexagonGenInsert::pruneCoveredSets(unsigned VR) { 1066286425Sdim IFMapType::iterator F = IFMap.find(VR); 1067286425Sdim assert(F != IFMap.end()); 1068286425Sdim IFListType &LL = F->second; 1069286425Sdim 1070286425Sdim // First, examine the IF candidates for register VR whose removable-regis- 1071286425Sdim // ter sets are empty. This means that a given candidate will not help eli- 1072286425Sdim // minate any registers, but since "insert" is not a constant-extendable 1073286425Sdim // instruction, using such a candidate may reduce code size if the defini- 1074286425Sdim // tion of VR is constant-extended. 1075286425Sdim // If there exists a candidate with a non-empty set, the ones with empty 1076286425Sdim // sets will not be used and can be removed. 1077286425Sdim MachineInstr *DefVR = MRI->getVRegDef(VR); 1078314564Sdim bool DefEx = HII->isConstExtended(*DefVR); 1079286425Sdim bool HasNE = false; 1080286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) { 1081286425Sdim if (LL[i].second.empty()) 1082286425Sdim continue; 1083286425Sdim HasNE = true; 1084286425Sdim break; 1085286425Sdim } 1086286425Sdim if (!DefEx || HasNE) { 1087286425Sdim // The definition of VR is not constant-extended, or there is a candidate 1088286425Sdim // with a non-empty set. Remove all candidates with empty sets. 1089286425Sdim auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { 1090286425Sdim return IR.second.empty(); 1091286425Sdim }; 1092314564Sdim auto End = llvm::remove_if(LL, IsEmpty); 1093286425Sdim if (End != LL.end()) 1094286425Sdim LL.erase(End, LL.end()); 1095286425Sdim } else { 1096286425Sdim // The definition of VR is constant-extended, and all candidates have 1097286425Sdim // empty removable-register sets. Pick the maximum candidate, and remove 1098286425Sdim // all others. The "maximum" does not have any special meaning here, it 1099286425Sdim // is only so that the candidate that will remain on the list is selec- 1100286425Sdim // ted deterministically. 1101286425Sdim IFRecord MaxIF = LL[0].first; 1102286425Sdim for (unsigned i = 1, n = LL.size(); i < n; ++i) { 1103286425Sdim // If LL[MaxI] < LL[i], then MaxI = i. 1104286425Sdim const IFRecord &IF = LL[i].first; 1105286425Sdim unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; 1106286425Sdim unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; 1107286425Sdim if (M0 > R0) 1108286425Sdim continue; 1109286425Sdim if (M0 == R0) { 1110286425Sdim if (M1 > R1) 1111286425Sdim continue; 1112286425Sdim if (M1 == R1) { 1113286425Sdim if (MaxIF.Wdh > IF.Wdh) 1114286425Sdim continue; 1115286425Sdim if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) 1116286425Sdim continue; 1117286425Sdim } 1118286425Sdim } 1119286425Sdim // MaxIF < IF. 1120286425Sdim MaxIF = IF; 1121286425Sdim } 1122286425Sdim // Remove everything except the maximum candidate. All register sets 1123286425Sdim // are empty, so no need to preserve anything. 1124286425Sdim LL.clear(); 1125286425Sdim LL.push_back(std::make_pair(MaxIF, RegisterSet())); 1126286425Sdim } 1127286425Sdim 1128286425Sdim // Now, remove those whose sets of potentially removable registers are 1129286425Sdim // contained in another IF candidate for VR. For example, given these 1130327952Sdim // candidates for %45, 1131327952Sdim // %45: 1132327952Sdim // (%44,%41,#9,#8), { %42 } 1133327952Sdim // (%43,%41,#9,#8), { %42 %44 } 1134286425Sdim // remove the first one, since it is contained in the second one. 1135286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ) { 1136286425Sdim const RegisterSet &RMi = LL[i].second; 1137286425Sdim unsigned j = 0; 1138286425Sdim while (j < n) { 1139286425Sdim if (j != i && LL[j].second.includes(RMi)) 1140286425Sdim break; 1141286425Sdim j++; 1142286425Sdim } 1143286425Sdim if (j == n) { // RMi not contained in anything else. 1144286425Sdim i++; 1145286425Sdim continue; 1146286425Sdim } 1147286425Sdim LL.erase(LL.begin()+i); 1148286425Sdim n = LL.size(); 1149286425Sdim } 1150286425Sdim} 1151286425Sdim 1152286425Sdimvoid HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, 1153286425Sdim PairMapType &M) { 1154286425Sdim IFMapType::iterator F = IFMap.find(VR); 1155286425Sdim assert(F != IFMap.end()); 1156286425Sdim IFListType &LL = F->second; 1157286425Sdim unsigned Cutoff = VRegDistCutoff; 1158286425Sdim const MachineInstr *DefV = MRI->getVRegDef(VR); 1159286425Sdim 1160286425Sdim for (unsigned i = LL.size(); i > 0; --i) { 1161286425Sdim unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; 1162286425Sdim const MachineInstr *DefS = MRI->getVRegDef(SR); 1163286425Sdim const MachineInstr *DefI = MRI->getVRegDef(IR); 1164286425Sdim unsigned DSV = distance(DefS, DefV, RPO, M); 1165286425Sdim if (DSV < Cutoff) { 1166286425Sdim unsigned DIV = distance(DefI, DefV, RPO, M); 1167286425Sdim if (DIV < Cutoff) 1168286425Sdim continue; 1169286425Sdim } 1170286425Sdim LL.erase(LL.begin()+(i-1)); 1171286425Sdim } 1172286425Sdim} 1173286425Sdim 1174286425Sdimvoid HexagonGenInsert::pruneRegCopies(unsigned VR) { 1175286425Sdim IFMapType::iterator F = IFMap.find(VR); 1176286425Sdim assert(F != IFMap.end()); 1177286425Sdim IFListType &LL = F->second; 1178286425Sdim 1179286425Sdim auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { 1180286425Sdim return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); 1181286425Sdim }; 1182314564Sdim auto End = llvm::remove_if(LL, IsCopy); 1183286425Sdim if (End != LL.end()) 1184286425Sdim LL.erase(End, LL.end()); 1185286425Sdim} 1186286425Sdim 1187286425Sdimvoid HexagonGenInsert::pruneCandidates() { 1188286425Sdim // Remove candidates that are not beneficial, regardless of the final 1189286425Sdim // selection method. 1190286425Sdim // First, remove candidates whose potentially removable set is a subset 1191286425Sdim // of another candidate's set. 1192286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1193286425Sdim pruneCoveredSets(I->first); 1194286425Sdim 1195286425Sdim UnsignedMap RPO; 1196327952Sdim 1197327952Sdim using RPOTType = ReversePostOrderTraversal<const MachineFunction *>; 1198327952Sdim 1199286425Sdim RPOTType RPOT(MFN); 1200286425Sdim unsigned RPON = 0; 1201286425Sdim for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 1202286425Sdim RPO[(*I)->getNumber()] = RPON++; 1203286425Sdim 1204286425Sdim PairMapType Memo; // Memoization map for distance calculation. 1205286425Sdim // Remove candidates that would use registers defined too far away. 1206286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1207286425Sdim pruneUsesTooFar(I->first, RPO, Memo); 1208286425Sdim 1209286425Sdim pruneEmptyLists(); 1210286425Sdim 1211286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1212286425Sdim pruneRegCopies(I->first); 1213286425Sdim} 1214286425Sdim 1215314564Sdimnamespace { 1216286425Sdim 1217286425Sdim // Class for comparing IF candidates for registers that have multiple of 1218286425Sdim // them. The smaller the candidate, according to this ordering, the better. 1219286425Sdim // First, compare the number of zeros in the associated potentially remova- 1220286425Sdim // ble register sets. "Zero" indicates that the register is very likely to 1221286425Sdim // become dead after this transformation. 1222286425Sdim // Second, compare "averages", i.e. use-count per size. The lower wins. 1223286425Sdim // After that, it does not really matter which one is smaller. Resolve 1224286425Sdim // the tie in some deterministic way. 1225286425Sdim struct IFOrdering { 1226286425Sdim IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) 1227286425Sdim : UseC(UC), BaseOrd(BO) {} 1228314564Sdim 1229286425Sdim bool operator() (const IFRecordWithRegSet &A, 1230327952Sdim const IFRecordWithRegSet &B) const; 1231314564Sdim 1232286425Sdim private: 1233286425Sdim void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1234286425Sdim unsigned &Sum) const; 1235314564Sdim 1236286425Sdim const UnsignedMap &UseC; 1237286425Sdim const RegisterOrdering &BaseOrd; 1238286425Sdim }; 1239286425Sdim 1240314564Sdim} // end anonymous namespace 1241286425Sdim 1242286425Sdimbool IFOrdering::operator() (const IFRecordWithRegSet &A, 1243286425Sdim const IFRecordWithRegSet &B) const { 1244286425Sdim unsigned SizeA = 0, ZeroA = 0, SumA = 0; 1245286425Sdim unsigned SizeB = 0, ZeroB = 0, SumB = 0; 1246286425Sdim stats(A.second, SizeA, ZeroA, SumA); 1247286425Sdim stats(B.second, SizeB, ZeroB, SumB); 1248286425Sdim 1249286425Sdim // We will pick the minimum element. The more zeros, the better. 1250286425Sdim if (ZeroA != ZeroB) 1251286425Sdim return ZeroA > ZeroB; 1252286425Sdim // Compare SumA/SizeA with SumB/SizeB, lower is better. 1253286425Sdim uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; 1254286425Sdim if (AvgA != AvgB) 1255286425Sdim return AvgA < AvgB; 1256286425Sdim 1257286425Sdim // The sets compare identical so far. Resort to comparing the IF records. 1258286425Sdim // The actual values don't matter, this is only for determinism. 1259286425Sdim unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; 1260286425Sdim if (OSA != OSB) 1261286425Sdim return OSA < OSB; 1262286425Sdim unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; 1263286425Sdim if (OIA != OIB) 1264286425Sdim return OIA < OIB; 1265286425Sdim if (A.first.Wdh != B.first.Wdh) 1266286425Sdim return A.first.Wdh < B.first.Wdh; 1267286425Sdim return A.first.Off < B.first.Off; 1268286425Sdim} 1269286425Sdim 1270286425Sdimvoid IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1271286425Sdim unsigned &Sum) const { 1272286425Sdim for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { 1273286425Sdim UnsignedMap::const_iterator F = UseC.find(R); 1274286425Sdim assert(F != UseC.end()); 1275286425Sdim unsigned UC = F->second; 1276286425Sdim if (UC == 0) 1277286425Sdim Zero++; 1278286425Sdim Sum += UC; 1279286425Sdim Size++; 1280286425Sdim } 1281286425Sdim} 1282286425Sdim 1283286425Sdimvoid HexagonGenInsert::selectCandidates() { 1284286425Sdim // Some registers may have multiple valid candidates. Pick the best one 1285286425Sdim // (or decide not to use any). 1286286425Sdim 1287286425Sdim // Compute the "removability" measure of R: 1288286425Sdim // For each potentially removable register R, record the number of regis- 1289286425Sdim // ters with IF candidates, where R appears in at least one set. 1290286425Sdim RegisterSet AllRMs; 1291286425Sdim UnsignedMap UseC, RemC; 1292286425Sdim IFMapType::iterator End = IFMap.end(); 1293286425Sdim 1294286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1295286425Sdim const IFListType &LL = I->second; 1296286425Sdim RegisterSet TT; 1297286425Sdim for (unsigned i = 0, n = LL.size(); i < n; ++i) 1298286425Sdim TT.insert(LL[i].second); 1299286425Sdim for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) 1300286425Sdim RemC[R]++; 1301286425Sdim AllRMs.insert(TT); 1302286425Sdim } 1303286425Sdim 1304286425Sdim for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { 1305327952Sdim using use_iterator = MachineRegisterInfo::use_nodbg_iterator; 1306327952Sdim using InstrSet = SmallSet<const MachineInstr *, 16>; 1307327952Sdim 1308286425Sdim InstrSet UIs; 1309286425Sdim // Count as the number of instructions in which R is used, not the 1310286425Sdim // number of operands. 1311286425Sdim use_iterator E = MRI->use_nodbg_end(); 1312286425Sdim for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) 1313286425Sdim UIs.insert(I->getParent()); 1314286425Sdim unsigned C = UIs.size(); 1315286425Sdim // Calculate a measure, which is the number of instructions using R, 1316286425Sdim // minus the "removability" count computed earlier. 1317286425Sdim unsigned D = RemC[R]; 1318286425Sdim UseC[R] = (C > D) ? C-D : 0; // doz 1319286425Sdim } 1320286425Sdim 1321286425Sdim bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; 1322286425Sdim if (!SelectAll0 && !SelectHas0) 1323286425Sdim SelectAll0 = true; 1324286425Sdim 1325286425Sdim // The smaller the number UseC for a given register R, the "less used" 1326286425Sdim // R is aside from the opportunities for removal offered by generating 1327286425Sdim // "insert" instructions. 1328286425Sdim // Iterate over the IF map, and for those registers that have multiple 1329286425Sdim // candidates, pick the minimum one according to IFOrdering. 1330286425Sdim IFOrdering IFO(UseC, BaseOrd); 1331286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1332286425Sdim IFListType &LL = I->second; 1333286425Sdim if (LL.empty()) 1334286425Sdim continue; 1335286425Sdim // Get the minimum element, remember it and clear the list. If the 1336286425Sdim // element found is adequate, we will put it back on the list, other- 1337286425Sdim // wise the list will remain empty, and the entry for this register 1338286425Sdim // will be removed (i.e. this register will not be replaced by insert). 1339286425Sdim IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); 1340286425Sdim assert(MinI != LL.end()); 1341286425Sdim IFRecordWithRegSet M = *MinI; 1342286425Sdim LL.clear(); 1343286425Sdim 1344286425Sdim // We want to make sure that this replacement will have a chance to be 1345286425Sdim // beneficial, and that means that we want to have indication that some 1346286425Sdim // register will be removed. The most likely registers to be eliminated 1347286425Sdim // are the use operands in the definition of I->first. Accept/reject a 1348286425Sdim // candidate based on how many of its uses it can potentially eliminate. 1349286425Sdim 1350286425Sdim RegisterSet Us; 1351286425Sdim const MachineInstr *DefI = MRI->getVRegDef(I->first); 1352286425Sdim getInstrUses(DefI, Us); 1353286425Sdim bool Accept = false; 1354286425Sdim 1355286425Sdim if (SelectAll0) { 1356286425Sdim bool All0 = true; 1357286425Sdim for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1358286425Sdim if (UseC[R] == 0) 1359286425Sdim continue; 1360286425Sdim All0 = false; 1361286425Sdim break; 1362286425Sdim } 1363286425Sdim Accept = All0; 1364286425Sdim } else if (SelectHas0) { 1365286425Sdim bool Has0 = false; 1366286425Sdim for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1367286425Sdim if (UseC[R] != 0) 1368286425Sdim continue; 1369286425Sdim Has0 = true; 1370286425Sdim break; 1371286425Sdim } 1372286425Sdim Accept = Has0; 1373286425Sdim } 1374286425Sdim if (Accept) 1375286425Sdim LL.push_back(M); 1376286425Sdim } 1377286425Sdim 1378286425Sdim // Remove candidates that add uses of removable registers, unless the 1379286425Sdim // removable registers are among replacement candidates. 1380286425Sdim // Recompute the removable registers, since some candidates may have 1381286425Sdim // been eliminated. 1382286425Sdim AllRMs.clear(); 1383286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1384286425Sdim const IFListType &LL = I->second; 1385314564Sdim if (!LL.empty()) 1386286425Sdim AllRMs.insert(LL[0].second); 1387286425Sdim } 1388286425Sdim for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1389286425Sdim IFListType &LL = I->second; 1390314564Sdim if (LL.empty()) 1391286425Sdim continue; 1392286425Sdim unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; 1393286425Sdim if (AllRMs[SR] || AllRMs[IR]) 1394286425Sdim LL.clear(); 1395286425Sdim } 1396286425Sdim 1397286425Sdim pruneEmptyLists(); 1398286425Sdim} 1399286425Sdim 1400286425Sdimbool HexagonGenInsert::generateInserts() { 1401286425Sdim // Create a new register for each one from IFMap, and store them in the 1402286425Sdim // map. 1403286425Sdim UnsignedMap RegMap; 1404286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1405286425Sdim unsigned VR = I->first; 1406286425Sdim const TargetRegisterClass *RC = MRI->getRegClass(VR); 1407360784Sdim Register NewVR = MRI->createVirtualRegister(RC); 1408286425Sdim RegMap[VR] = NewVR; 1409286425Sdim } 1410286425Sdim 1411286425Sdim // We can generate the "insert" instructions using potentially stale re- 1412286425Sdim // gisters: SrcR and InsR for a given VR may be among other registers that 1413286425Sdim // are also replaced. This is fine, we will do the mass "rauw" a bit later. 1414286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1415286425Sdim MachineInstr *MI = MRI->getVRegDef(I->first); 1416286425Sdim MachineBasicBlock &B = *MI->getParent(); 1417286425Sdim DebugLoc DL = MI->getDebugLoc(); 1418286425Sdim unsigned NewR = RegMap[I->first]; 1419286425Sdim bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; 1420286425Sdim const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) 1421286425Sdim : HII->get(Hexagon::S2_insertp); 1422286425Sdim IFRecord IF = I->second[0].first; 1423286425Sdim unsigned Wdh = IF.Wdh, Off = IF.Off; 1424286425Sdim unsigned InsS = 0; 1425286425Sdim if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { 1426314564Sdim InsS = Hexagon::isub_lo; 1427286425Sdim if (Off >= 32) { 1428314564Sdim InsS = Hexagon::isub_hi; 1429286425Sdim Off -= 32; 1430286425Sdim } 1431286425Sdim } 1432286425Sdim // Advance to the proper location for inserting instructions. This could 1433286425Sdim // be B.end(). 1434286425Sdim MachineBasicBlock::iterator At = MI; 1435286425Sdim if (MI->isPHI()) 1436286425Sdim At = B.getFirstNonPHI(); 1437286425Sdim 1438286425Sdim BuildMI(B, At, DL, D, NewR) 1439286425Sdim .addReg(IF.SrcR) 1440286425Sdim .addReg(IF.InsR, 0, InsS) 1441286425Sdim .addImm(Wdh) 1442286425Sdim .addImm(Off); 1443286425Sdim 1444286425Sdim MRI->clearKillFlags(IF.SrcR); 1445286425Sdim MRI->clearKillFlags(IF.InsR); 1446286425Sdim } 1447286425Sdim 1448286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1449286425Sdim MachineInstr *DefI = MRI->getVRegDef(I->first); 1450286425Sdim MRI->replaceRegWith(I->first, RegMap[I->first]); 1451286425Sdim DefI->eraseFromParent(); 1452286425Sdim } 1453286425Sdim 1454286425Sdim return true; 1455286425Sdim} 1456286425Sdim 1457286425Sdimbool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { 1458286425Sdim bool Changed = false; 1459286425Sdim 1460321369Sdim for (auto *DTN : children<MachineDomTreeNode*>(N)) 1461321369Sdim Changed |= removeDeadCode(DTN); 1462321369Sdim 1463286425Sdim MachineBasicBlock *B = N->getBlock(); 1464286425Sdim std::vector<MachineInstr*> Instrs; 1465286425Sdim for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) 1466286425Sdim Instrs.push_back(&*I); 1467286425Sdim 1468286425Sdim for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { 1469286425Sdim MachineInstr *MI = *I; 1470286425Sdim unsigned Opc = MI->getOpcode(); 1471286425Sdim // Do not touch lifetime markers. This is why the target-independent DCE 1472286425Sdim // cannot be used. 1473286425Sdim if (Opc == TargetOpcode::LIFETIME_START || 1474286425Sdim Opc == TargetOpcode::LIFETIME_END) 1475286425Sdim continue; 1476286425Sdim bool Store = false; 1477286425Sdim if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) 1478286425Sdim continue; 1479286425Sdim 1480286425Sdim bool AllDead = true; 1481286425Sdim SmallVector<unsigned,2> Regs; 1482314564Sdim for (const MachineOperand &MO : MI->operands()) { 1483314564Sdim if (!MO.isReg() || !MO.isDef()) 1484286425Sdim continue; 1485360784Sdim Register R = MO.getReg(); 1486360784Sdim if (!Register::isVirtualRegister(R) || !MRI->use_nodbg_empty(R)) { 1487286425Sdim AllDead = false; 1488286425Sdim break; 1489286425Sdim } 1490286425Sdim Regs.push_back(R); 1491286425Sdim } 1492286425Sdim if (!AllDead) 1493286425Sdim continue; 1494286425Sdim 1495286425Sdim B->erase(MI); 1496286425Sdim for (unsigned I = 0, N = Regs.size(); I != N; ++I) 1497286425Sdim MRI->markUsesInDebugValueAsUndef(Regs[I]); 1498286425Sdim Changed = true; 1499286425Sdim } 1500286425Sdim 1501286425Sdim return Changed; 1502286425Sdim} 1503286425Sdim 1504286425Sdimbool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { 1505327952Sdim if (skipFunction(MF.getFunction())) 1506309124Sdim return false; 1507309124Sdim 1508286425Sdim bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; 1509286425Sdim bool Changed = false; 1510286425Sdim 1511286425Sdim // Sanity check: one, but not both. 1512286425Sdim assert(!OptSelectAll0 || !OptSelectHas0); 1513286425Sdim 1514286425Sdim IFMap.clear(); 1515286425Sdim BaseOrd.clear(); 1516286425Sdim CellOrd.clear(); 1517286425Sdim 1518286425Sdim const auto &ST = MF.getSubtarget<HexagonSubtarget>(); 1519286425Sdim HII = ST.getInstrInfo(); 1520286425Sdim HRI = ST.getRegisterInfo(); 1521286425Sdim MFN = &MF; 1522286425Sdim MRI = &MF.getRegInfo(); 1523286425Sdim MDT = &getAnalysis<MachineDominatorTree>(); 1524286425Sdim 1525286425Sdim // Clean up before any further processing, so that dead code does not 1526286425Sdim // get used in a newly generated "insert" instruction. Have a custom 1527286425Sdim // version of DCE that preserves lifetime markers. Without it, merging 1528286425Sdim // of stack objects can fail to recognize and merge disjoint objects 1529286425Sdim // leading to unnecessary stack growth. 1530296417Sdim Changed = removeDeadCode(MDT->getRootNode()); 1531286425Sdim 1532286425Sdim const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 1533286425Sdim BitTracker BTLoc(HE, MF); 1534286425Sdim BTLoc.trace(isDebug()); 1535286425Sdim BTLoc.run(); 1536286425Sdim CellMapShadow MS(BTLoc); 1537286425Sdim CMS = &MS; 1538286425Sdim 1539286425Sdim buildOrderingMF(BaseOrd); 1540286425Sdim buildOrderingBT(BaseOrd, CellOrd); 1541286425Sdim 1542286425Sdim if (isDebug()) { 1543286425Sdim dbgs() << "Cell ordering:\n"; 1544286425Sdim for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); 1545286425Sdim I != E; ++I) { 1546286425Sdim unsigned VR = I->first, Pos = I->second; 1547327952Sdim dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n"; 1548286425Sdim } 1549286425Sdim } 1550286425Sdim 1551286425Sdim // Collect candidates for conversion into the insert forms. 1552286425Sdim MachineBasicBlock *RootB = MDT->getRoot(); 1553286425Sdim OrderedRegisterList AvailR(CellOrd); 1554286425Sdim 1555314564Sdim const char *const TGName = "hexinsert"; 1556314564Sdim const char *const TGDesc = "Generate Insert Instructions"; 1557314564Sdim 1558286425Sdim { 1559314564Sdim NamedRegionTimer _T("collection", "collection", TGName, TGDesc, 1560314564Sdim TimingDetail); 1561286425Sdim collectInBlock(RootB, AvailR); 1562286425Sdim // Complete the information gathered in IFMap. 1563286425Sdim computeRemovableRegisters(); 1564286425Sdim } 1565286425Sdim 1566286425Sdim if (isDebug()) { 1567286425Sdim dbgs() << "Candidates after collection:\n"; 1568286425Sdim dump_map(); 1569286425Sdim } 1570286425Sdim 1571286425Sdim if (IFMap.empty()) 1572296417Sdim return Changed; 1573286425Sdim 1574286425Sdim { 1575314564Sdim NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail); 1576286425Sdim pruneCandidates(); 1577286425Sdim } 1578286425Sdim 1579286425Sdim if (isDebug()) { 1580286425Sdim dbgs() << "Candidates after pruning:\n"; 1581286425Sdim dump_map(); 1582286425Sdim } 1583286425Sdim 1584286425Sdim if (IFMap.empty()) 1585296417Sdim return Changed; 1586286425Sdim 1587286425Sdim { 1588314564Sdim NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail); 1589286425Sdim selectCandidates(); 1590286425Sdim } 1591286425Sdim 1592286425Sdim if (isDebug()) { 1593286425Sdim dbgs() << "Candidates after selection:\n"; 1594286425Sdim dump_map(); 1595286425Sdim } 1596286425Sdim 1597286425Sdim // Filter out vregs beyond the cutoff. 1598286425Sdim if (VRegIndexCutoff.getPosition()) { 1599286425Sdim unsigned Cutoff = VRegIndexCutoff; 1600327952Sdim 1601327952Sdim using IterListType = SmallVector<IFMapType::iterator, 16>; 1602327952Sdim 1603286425Sdim IterListType Out; 1604286425Sdim for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1605360784Sdim unsigned Idx = Register::virtReg2Index(I->first); 1606286425Sdim if (Idx >= Cutoff) 1607286425Sdim Out.push_back(I); 1608286425Sdim } 1609286425Sdim for (unsigned i = 0, n = Out.size(); i < n; ++i) 1610286425Sdim IFMap.erase(Out[i]); 1611286425Sdim } 1612296417Sdim if (IFMap.empty()) 1613296417Sdim return Changed; 1614286425Sdim 1615286425Sdim { 1616314564Sdim NamedRegionTimer _T("generation", "generation", TGName, TGDesc, 1617314564Sdim TimingDetail); 1618296417Sdim generateInserts(); 1619286425Sdim } 1620286425Sdim 1621296417Sdim return true; 1622286425Sdim} 1623286425Sdim 1624286425SdimFunctionPass *llvm::createHexagonGenInsert() { 1625286425Sdim return new HexagonGenInsert(); 1626286425Sdim} 1627286425Sdim 1628286425Sdim//===----------------------------------------------------------------------===// 1629286425Sdim// Public Constructor Functions 1630286425Sdim//===----------------------------------------------------------------------===// 1631286425Sdim 1632286425SdimINITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", 1633286425Sdim "Hexagon generate \"insert\" instructions", false, false) 1634286425SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1635286425SdimINITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", 1636286425Sdim "Hexagon generate \"insert\" instructions", false, false) 1637