1326938Sdim//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===// 2326938Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6326938Sdim// 7326938Sdim//===----------------------------------------------------------------------===// 8326938Sdim 9326938Sdim#include "Hexagon.h" 10326938Sdim#include "HexagonISelDAGToDAG.h" 11326938Sdim#include "HexagonISelLowering.h" 12326938Sdim#include "HexagonTargetMachine.h" 13341825Sdim#include "llvm/ADT/SetVector.h" 14326938Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 15326938Sdim#include "llvm/CodeGen/SelectionDAGISel.h" 16326938Sdim#include "llvm/IR/Intrinsics.h" 17360784Sdim#include "llvm/IR/IntrinsicsHexagon.h" 18326938Sdim#include "llvm/Support/CommandLine.h" 19326938Sdim#include "llvm/Support/Debug.h" 20326938Sdim 21326938Sdim#include <deque> 22326938Sdim#include <map> 23326938Sdim#include <set> 24326938Sdim#include <utility> 25326938Sdim#include <vector> 26326938Sdim 27326938Sdim#define DEBUG_TYPE "hexagon-isel" 28326938Sdim 29326938Sdimusing namespace llvm; 30326938Sdim 31327330Sdimnamespace { 32327330Sdim 33326938Sdim// -------------------------------------------------------------------- 34326938Sdim// Implementation of permutation networks. 35326938Sdim 36326938Sdim// Implementation of the node routing through butterfly networks: 37326938Sdim// - Forward delta. 38326938Sdim// - Reverse delta. 39326938Sdim// - Benes. 40326938Sdim// 41326938Sdim// 42326938Sdim// Forward delta network consists of log(N) steps, where N is the number 43326938Sdim// of inputs. In each step, an input can stay in place, or it can get 44326938Sdim// routed to another position[1]. The step after that consists of two 45326938Sdim// networks, each half in size in terms of the number of nodes. In those 46326938Sdim// terms, in the given step, an input can go to either the upper or the 47326938Sdim// lower network in the next step. 48326938Sdim// 49326938Sdim// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both 50326938Sdim// positions as long as there is no conflict. 51326938Sdim 52326938Sdim// Here's a delta network for 8 inputs, only the switching routes are 53326938Sdim// shown: 54326938Sdim// 55326938Sdim// Steps: 56326938Sdim// |- 1 ---------------|- 2 -----|- 3 -| 57326938Sdim// 58326938Sdim// Inp[0] *** *** *** *** Out[0] 59326938Sdim// \ / \ / \ / 60326938Sdim// \ / \ / X 61326938Sdim// \ / \ / / \ 62326938Sdim// Inp[1] *** \ / *** X *** *** Out[1] 63326938Sdim// \ \ / / \ / \ / 64326938Sdim// \ \ / / X X 65326938Sdim// \ \ / / / \ / \ 66326938Sdim// Inp[2] *** \ \ / / *** X *** *** Out[2] 67326938Sdim// \ \ X / / / \ \ / 68326938Sdim// \ \ / \ / / / \ X 69326938Sdim// \ X X / / \ / \ 70326938Sdim// Inp[3] *** \ / \ / \ / *** *** *** Out[3] 71326938Sdim// \ X X X / 72326938Sdim// \ / \ / \ / \ / 73326938Sdim// X X X X 74326938Sdim// / \ / \ / \ / \ 75326938Sdim// / X X X \ 76326938Sdim// Inp[4] *** / \ / \ / \ *** *** *** Out[4] 77326938Sdim// / X X \ \ / \ / 78326938Sdim// / / \ / \ \ \ / X 79326938Sdim// / / X \ \ \ / / \ 80326938Sdim// Inp[5] *** / / \ \ *** X *** *** Out[5] 81326938Sdim// / / \ \ \ / \ / 82326938Sdim// / / \ \ X X 83326938Sdim// / / \ \ / \ / \ 84326938Sdim// Inp[6] *** / \ *** X *** *** Out[6] 85326938Sdim// / \ / \ \ / 86326938Sdim// / \ / \ X 87326938Sdim// / \ / \ / \ 88326938Sdim// Inp[7] *** *** *** *** Out[7] 89326938Sdim// 90326938Sdim// 91326938Sdim// Reverse delta network is same as delta network, with the steps in 92326938Sdim// the opposite order. 93326938Sdim// 94326938Sdim// 95326938Sdim// Benes network is a forward delta network immediately followed by 96326938Sdim// a reverse delta network. 97326938Sdim 98341825Sdimenum class ColorKind { None, Red, Black }; 99326938Sdim 100326938Sdim// Graph coloring utility used to partition nodes into two groups: 101326938Sdim// they will correspond to nodes routed to the upper and lower networks. 102326938Sdimstruct Coloring { 103326938Sdim using Node = int; 104341825Sdim using MapType = std::map<Node, ColorKind>; 105326938Sdim static constexpr Node Ignore = Node(-1); 106326938Sdim 107326938Sdim Coloring(ArrayRef<Node> Ord) : Order(Ord) { 108326938Sdim build(); 109326938Sdim if (!color()) 110326938Sdim Colors.clear(); 111326938Sdim } 112326938Sdim 113326938Sdim const MapType &colors() const { 114326938Sdim return Colors; 115326938Sdim } 116326938Sdim 117341825Sdim ColorKind other(ColorKind Color) { 118341825Sdim if (Color == ColorKind::None) 119341825Sdim return ColorKind::Red; 120341825Sdim return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red; 121326938Sdim } 122326938Sdim 123344779Sdim LLVM_DUMP_METHOD void dump() const; 124326938Sdim 125326938Sdimprivate: 126326938Sdim ArrayRef<Node> Order; 127326938Sdim MapType Colors; 128326938Sdim std::set<Node> Needed; 129326938Sdim 130326938Sdim using NodeSet = std::set<Node>; 131326938Sdim std::map<Node,NodeSet> Edges; 132326938Sdim 133326938Sdim Node conj(Node Pos) { 134326938Sdim Node Num = Order.size(); 135326938Sdim return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2; 136326938Sdim } 137326938Sdim 138341825Sdim ColorKind getColor(Node N) { 139326938Sdim auto F = Colors.find(N); 140341825Sdim return F != Colors.end() ? F->second : ColorKind::None; 141326938Sdim } 142326938Sdim 143341825Sdim std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes); 144326938Sdim 145326938Sdim void build(); 146326938Sdim bool color(); 147326938Sdim}; 148327330Sdim} // namespace 149326938Sdim 150341825Sdimstd::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) { 151341825Sdim auto Color = ColorKind::None; 152326938Sdim for (Node N : Nodes) { 153341825Sdim ColorKind ColorN = getColor(N); 154341825Sdim if (ColorN == ColorKind::None) 155326938Sdim continue; 156341825Sdim if (Color == ColorKind::None) 157326938Sdim Color = ColorN; 158341825Sdim else if (Color != ColorKind::None && Color != ColorN) 159341825Sdim return { false, ColorKind::None }; 160326938Sdim } 161326938Sdim return { true, Color }; 162326938Sdim} 163326938Sdim 164326938Sdimvoid Coloring::build() { 165326938Sdim // Add Order[P] and Order[conj(P)] to Edges. 166326938Sdim for (unsigned P = 0; P != Order.size(); ++P) { 167326938Sdim Node I = Order[P]; 168326938Sdim if (I != Ignore) { 169326938Sdim Needed.insert(I); 170326938Sdim Node PC = Order[conj(P)]; 171326938Sdim if (PC != Ignore && PC != I) 172326938Sdim Edges[I].insert(PC); 173326938Sdim } 174326938Sdim } 175326938Sdim // Add I and conj(I) to Edges. 176326938Sdim for (unsigned I = 0; I != Order.size(); ++I) { 177326938Sdim if (!Needed.count(I)) 178326938Sdim continue; 179326938Sdim Node C = conj(I); 180326938Sdim // This will create an entry in the edge table, even if I is not 181326938Sdim // connected to any other node. This is necessary, because it still 182326938Sdim // needs to be colored. 183326938Sdim NodeSet &Is = Edges[I]; 184326938Sdim if (Needed.count(C)) 185326938Sdim Is.insert(C); 186326938Sdim } 187326938Sdim} 188326938Sdim 189326938Sdimbool Coloring::color() { 190326938Sdim SetVector<Node> FirstQ; 191326938Sdim auto Enqueue = [this,&FirstQ] (Node N) { 192326938Sdim SetVector<Node> Q; 193326938Sdim Q.insert(N); 194326938Sdim for (unsigned I = 0; I != Q.size(); ++I) { 195326938Sdim NodeSet &Ns = Edges[Q[I]]; 196326938Sdim Q.insert(Ns.begin(), Ns.end()); 197326938Sdim } 198326938Sdim FirstQ.insert(Q.begin(), Q.end()); 199326938Sdim }; 200326938Sdim for (Node N : Needed) 201326938Sdim Enqueue(N); 202326938Sdim 203326938Sdim for (Node N : FirstQ) { 204326938Sdim if (Colors.count(N)) 205326938Sdim continue; 206326938Sdim NodeSet &Ns = Edges[N]; 207326938Sdim auto P = getUniqueColor(Ns); 208326938Sdim if (!P.first) 209326938Sdim return false; 210326938Sdim Colors[N] = other(P.second); 211326938Sdim } 212326938Sdim 213326938Sdim // First, color nodes that don't have any dups. 214326938Sdim for (auto E : Edges) { 215326938Sdim Node N = E.first; 216326938Sdim if (!Needed.count(conj(N)) || Colors.count(N)) 217326938Sdim continue; 218326938Sdim auto P = getUniqueColor(E.second); 219326938Sdim if (!P.first) 220326938Sdim return false; 221326938Sdim Colors[N] = other(P.second); 222326938Sdim } 223326938Sdim 224326938Sdim // Now, nodes that are still uncolored. Since the graph can be modified 225326938Sdim // in this step, create a work queue. 226326938Sdim std::vector<Node> WorkQ; 227326938Sdim for (auto E : Edges) { 228326938Sdim Node N = E.first; 229326938Sdim if (!Colors.count(N)) 230326938Sdim WorkQ.push_back(N); 231326938Sdim } 232326938Sdim 233326938Sdim for (unsigned I = 0; I < WorkQ.size(); ++I) { 234326938Sdim Node N = WorkQ[I]; 235326938Sdim NodeSet &Ns = Edges[N]; 236326938Sdim auto P = getUniqueColor(Ns); 237326938Sdim if (P.first) { 238326938Sdim Colors[N] = other(P.second); 239326938Sdim continue; 240326938Sdim } 241326938Sdim 242326938Sdim // Coloring failed. Split this node. 243326938Sdim Node C = conj(N); 244341825Sdim ColorKind ColorN = other(ColorKind::None); 245341825Sdim ColorKind ColorC = other(ColorN); 246326938Sdim NodeSet &Cs = Edges[C]; 247326938Sdim NodeSet CopyNs = Ns; 248326938Sdim for (Node M : CopyNs) { 249341825Sdim ColorKind ColorM = getColor(M); 250326938Sdim if (ColorM == ColorC) { 251326938Sdim // Connect M with C, disconnect M from N. 252326938Sdim Cs.insert(M); 253326938Sdim Edges[M].insert(C); 254326938Sdim Ns.erase(M); 255326938Sdim Edges[M].erase(N); 256326938Sdim } 257326938Sdim } 258326938Sdim Colors[N] = ColorN; 259326938Sdim Colors[C] = ColorC; 260326938Sdim } 261326938Sdim 262341825Sdim // Explicitly assign "None" to all uncolored nodes. 263326938Sdim for (unsigned I = 0; I != Order.size(); ++I) 264326938Sdim if (Colors.count(I) == 0) 265341825Sdim Colors[I] = ColorKind::None; 266326938Sdim 267326938Sdim return true; 268326938Sdim} 269326938Sdim 270344779Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 271326938Sdimvoid Coloring::dump() const { 272326938Sdim dbgs() << "{ Order: {"; 273326938Sdim for (unsigned I = 0; I != Order.size(); ++I) { 274326938Sdim Node P = Order[I]; 275326938Sdim if (P != Ignore) 276326938Sdim dbgs() << ' ' << P; 277326938Sdim else 278326938Sdim dbgs() << " -"; 279326938Sdim } 280326938Sdim dbgs() << " }\n"; 281326938Sdim dbgs() << " Needed: {"; 282326938Sdim for (Node N : Needed) 283326938Sdim dbgs() << ' ' << N; 284326938Sdim dbgs() << " }\n"; 285326938Sdim 286326938Sdim dbgs() << " Edges: {\n"; 287326938Sdim for (auto E : Edges) { 288326938Sdim dbgs() << " " << E.first << " -> {"; 289326938Sdim for (auto N : E.second) 290326938Sdim dbgs() << ' ' << N; 291326938Sdim dbgs() << " }\n"; 292326938Sdim } 293326938Sdim dbgs() << " }\n"; 294326938Sdim 295341825Sdim auto ColorKindToName = [](ColorKind C) { 296341825Sdim switch (C) { 297341825Sdim case ColorKind::None: 298341825Sdim return "None"; 299341825Sdim case ColorKind::Red: 300341825Sdim return "Red"; 301341825Sdim case ColorKind::Black: 302341825Sdim return "Black"; 303341825Sdim } 304341825Sdim llvm_unreachable("all ColorKinds should be handled by the switch above"); 305341825Sdim }; 306341825Sdim 307326938Sdim dbgs() << " Colors: {\n"; 308326938Sdim for (auto C : Colors) 309341825Sdim dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n"; 310326938Sdim dbgs() << " }\n}\n"; 311326938Sdim} 312344779Sdim#endif 313326938Sdim 314327330Sdimnamespace { 315326938Sdim// Base class of for reordering networks. They don't strictly need to be 316326938Sdim// permutations, as outputs with repeated occurrences of an input element 317326938Sdim// are allowed. 318326938Sdimstruct PermNetwork { 319326938Sdim using Controls = std::vector<uint8_t>; 320326938Sdim using ElemType = int; 321326938Sdim static constexpr ElemType Ignore = ElemType(-1); 322326938Sdim 323326938Sdim enum : uint8_t { 324326938Sdim None, 325326938Sdim Pass, 326326938Sdim Switch 327326938Sdim }; 328326938Sdim enum : uint8_t { 329326938Sdim Forward, 330326938Sdim Reverse 331326938Sdim }; 332326938Sdim 333326938Sdim PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) { 334326938Sdim Order.assign(Ord.data(), Ord.data()+Ord.size()); 335326938Sdim Log = 0; 336326938Sdim 337326938Sdim unsigned S = Order.size(); 338326938Sdim while (S >>= 1) 339326938Sdim ++Log; 340326938Sdim 341326938Sdim Table.resize(Order.size()); 342326938Sdim for (RowType &Row : Table) 343326938Sdim Row.resize(Mult*Log, None); 344326938Sdim } 345326938Sdim 346326938Sdim void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const { 347326938Sdim unsigned Size = Order.size(); 348326938Sdim V.resize(Size); 349326938Sdim for (unsigned I = 0; I != Size; ++I) { 350326938Sdim unsigned W = 0; 351326938Sdim for (unsigned L = 0; L != Log; ++L) { 352326938Sdim unsigned C = ctl(I, StartAt+L) == Switch; 353326938Sdim if (Dir == Forward) 354326938Sdim W |= C << (Log-1-L); 355326938Sdim else 356326938Sdim W |= C << L; 357326938Sdim } 358326938Sdim assert(isUInt<8>(W)); 359326938Sdim V[I] = uint8_t(W); 360326938Sdim } 361326938Sdim } 362326938Sdim 363326938Sdim uint8_t ctl(ElemType Pos, unsigned Step) const { 364326938Sdim return Table[Pos][Step]; 365326938Sdim } 366326938Sdim unsigned size() const { 367326938Sdim return Order.size(); 368326938Sdim } 369326938Sdim unsigned steps() const { 370326938Sdim return Log; 371326938Sdim } 372326938Sdim 373326938Sdimprotected: 374326938Sdim unsigned Log; 375326938Sdim std::vector<ElemType> Order; 376326938Sdim using RowType = std::vector<uint8_t>; 377326938Sdim std::vector<RowType> Table; 378326938Sdim}; 379326938Sdim 380326938Sdimstruct ForwardDeltaNetwork : public PermNetwork { 381326938Sdim ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {} 382326938Sdim 383326938Sdim bool run(Controls &V) { 384326938Sdim if (!route(Order.data(), Table.data(), size(), 0)) 385326938Sdim return false; 386326938Sdim getControls(V, 0, Forward); 387326938Sdim return true; 388326938Sdim } 389326938Sdim 390326938Sdimprivate: 391326938Sdim bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); 392326938Sdim}; 393326938Sdim 394326938Sdimstruct ReverseDeltaNetwork : public PermNetwork { 395326938Sdim ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {} 396326938Sdim 397326938Sdim bool run(Controls &V) { 398326938Sdim if (!route(Order.data(), Table.data(), size(), 0)) 399326938Sdim return false; 400326938Sdim getControls(V, 0, Reverse); 401326938Sdim return true; 402326938Sdim } 403326938Sdim 404326938Sdimprivate: 405326938Sdim bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); 406326938Sdim}; 407326938Sdim 408326938Sdimstruct BenesNetwork : public PermNetwork { 409326938Sdim BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {} 410326938Sdim 411326938Sdim bool run(Controls &F, Controls &R) { 412326938Sdim if (!route(Order.data(), Table.data(), size(), 0)) 413326938Sdim return false; 414326938Sdim 415326938Sdim getControls(F, 0, Forward); 416326938Sdim getControls(R, Log, Reverse); 417326938Sdim return true; 418326938Sdim } 419326938Sdim 420326938Sdimprivate: 421326938Sdim bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); 422326938Sdim}; 423327330Sdim} // namespace 424326938Sdim 425326938Sdimbool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, 426326938Sdim unsigned Step) { 427326938Sdim bool UseUp = false, UseDown = false; 428326938Sdim ElemType Num = Size; 429326938Sdim 430326938Sdim // Cannot use coloring here, because coloring is used to determine 431326938Sdim // the "big" switch, i.e. the one that changes halves, and in a forward 432326938Sdim // network, a color can be simultaneously routed to both halves in the 433326938Sdim // step we're working on. 434326938Sdim for (ElemType J = 0; J != Num; ++J) { 435326938Sdim ElemType I = P[J]; 436326938Sdim // I is the position in the input, 437326938Sdim // J is the position in the output. 438326938Sdim if (I == Ignore) 439326938Sdim continue; 440326938Sdim uint8_t S; 441326938Sdim if (I < Num/2) 442326938Sdim S = (J < Num/2) ? Pass : Switch; 443326938Sdim else 444326938Sdim S = (J < Num/2) ? Switch : Pass; 445326938Sdim 446326938Sdim // U is the element in the table that needs to be updated. 447326938Sdim ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2); 448326938Sdim if (U < Num/2) 449326938Sdim UseUp = true; 450326938Sdim else 451326938Sdim UseDown = true; 452326938Sdim if (T[U][Step] != S && T[U][Step] != None) 453326938Sdim return false; 454326938Sdim T[U][Step] = S; 455326938Sdim } 456326938Sdim 457326938Sdim for (ElemType J = 0; J != Num; ++J) 458326938Sdim if (P[J] != Ignore && P[J] >= Num/2) 459326938Sdim P[J] -= Num/2; 460326938Sdim 461326938Sdim if (Step+1 < Log) { 462326938Sdim if (UseUp && !route(P, T, Size/2, Step+1)) 463326938Sdim return false; 464326938Sdim if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) 465326938Sdim return false; 466326938Sdim } 467326938Sdim return true; 468326938Sdim} 469326938Sdim 470326938Sdimbool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, 471326938Sdim unsigned Step) { 472326938Sdim unsigned Pets = Log-1 - Step; 473326938Sdim bool UseUp = false, UseDown = false; 474326938Sdim ElemType Num = Size; 475326938Sdim 476326938Sdim // In this step half-switching occurs, so coloring can be used. 477326938Sdim Coloring G({P,Size}); 478326938Sdim const Coloring::MapType &M = G.colors(); 479326938Sdim if (M.empty()) 480326938Sdim return false; 481326938Sdim 482341825Sdim ColorKind ColorUp = ColorKind::None; 483326938Sdim for (ElemType J = 0; J != Num; ++J) { 484326938Sdim ElemType I = P[J]; 485326938Sdim // I is the position in the input, 486326938Sdim // J is the position in the output. 487326938Sdim if (I == Ignore) 488326938Sdim continue; 489341825Sdim ColorKind C = M.at(I); 490341825Sdim if (C == ColorKind::None) 491326938Sdim continue; 492326938Sdim // During "Step", inputs cannot switch halves, so if the "up" color 493326938Sdim // is still unknown, make sure that it is selected in such a way that 494326938Sdim // "I" will stay in the same half. 495326938Sdim bool InpUp = I < Num/2; 496341825Sdim if (ColorUp == ColorKind::None) 497326938Sdim ColorUp = InpUp ? C : G.other(C); 498326938Sdim if ((C == ColorUp) != InpUp) { 499326938Sdim // If I should go to a different half than where is it now, give up. 500326938Sdim return false; 501326938Sdim } 502326938Sdim 503326938Sdim uint8_t S; 504326938Sdim if (InpUp) { 505326938Sdim S = (J < Num/2) ? Pass : Switch; 506326938Sdim UseUp = true; 507326938Sdim } else { 508326938Sdim S = (J < Num/2) ? Switch : Pass; 509326938Sdim UseDown = true; 510326938Sdim } 511326938Sdim T[J][Pets] = S; 512326938Sdim } 513326938Sdim 514326938Sdim // Reorder the working permutation according to the computed switch table 515326938Sdim // for the last step (i.e. Pets). 516326938Sdim for (ElemType J = 0, E = Size / 2; J != E; ++J) { 517326938Sdim ElemType PJ = P[J]; // Current values of P[J] 518326938Sdim ElemType PC = P[J+Size/2]; // and P[conj(J)] 519326938Sdim ElemType QJ = PJ; // New values of P[J] 520326938Sdim ElemType QC = PC; // and P[conj(J)] 521326938Sdim if (T[J][Pets] == Switch) 522326938Sdim QC = PJ; 523326938Sdim if (T[J+Size/2][Pets] == Switch) 524326938Sdim QJ = PC; 525326938Sdim P[J] = QJ; 526326938Sdim P[J+Size/2] = QC; 527326938Sdim } 528326938Sdim 529326938Sdim for (ElemType J = 0; J != Num; ++J) 530326938Sdim if (P[J] != Ignore && P[J] >= Num/2) 531326938Sdim P[J] -= Num/2; 532326938Sdim 533326938Sdim if (Step+1 < Log) { 534326938Sdim if (UseUp && !route(P, T, Size/2, Step+1)) 535326938Sdim return false; 536326938Sdim if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) 537326938Sdim return false; 538326938Sdim } 539326938Sdim return true; 540326938Sdim} 541326938Sdim 542326938Sdimbool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size, 543326938Sdim unsigned Step) { 544326938Sdim Coloring G({P,Size}); 545326938Sdim const Coloring::MapType &M = G.colors(); 546326938Sdim if (M.empty()) 547326938Sdim return false; 548326938Sdim ElemType Num = Size; 549326938Sdim 550326938Sdim unsigned Pets = 2*Log-1 - Step; 551326938Sdim bool UseUp = false, UseDown = false; 552326938Sdim 553326938Sdim // Both assignments, i.e. Red->Up and Red->Down are valid, but they will 554326938Sdim // result in different controls. Let's pick the one where the first 555326938Sdim // control will be "Pass". 556341825Sdim ColorKind ColorUp = ColorKind::None; 557326938Sdim for (ElemType J = 0; J != Num; ++J) { 558326938Sdim ElemType I = P[J]; 559326938Sdim if (I == Ignore) 560326938Sdim continue; 561341825Sdim ColorKind C = M.at(I); 562341825Sdim if (C == ColorKind::None) 563326938Sdim continue; 564341825Sdim if (ColorUp == ColorKind::None) { 565341825Sdim ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black; 566326938Sdim } 567326938Sdim unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2; 568326938Sdim if (C == ColorUp) { 569326938Sdim if (I < Num/2) 570326938Sdim T[I][Step] = Pass; 571326938Sdim else 572326938Sdim T[CI][Step] = Switch; 573326938Sdim T[J][Pets] = (J < Num/2) ? Pass : Switch; 574326938Sdim UseUp = true; 575326938Sdim } else { // Down 576326938Sdim if (I < Num/2) 577326938Sdim T[CI][Step] = Switch; 578326938Sdim else 579326938Sdim T[I][Step] = Pass; 580326938Sdim T[J][Pets] = (J < Num/2) ? Switch : Pass; 581326938Sdim UseDown = true; 582326938Sdim } 583326938Sdim } 584326938Sdim 585326938Sdim // Reorder the working permutation according to the computed switch table 586326938Sdim // for the last step (i.e. Pets). 587326938Sdim for (ElemType J = 0; J != Num/2; ++J) { 588326938Sdim ElemType PJ = P[J]; // Current values of P[J] 589326938Sdim ElemType PC = P[J+Num/2]; // and P[conj(J)] 590326938Sdim ElemType QJ = PJ; // New values of P[J] 591326938Sdim ElemType QC = PC; // and P[conj(J)] 592326938Sdim if (T[J][Pets] == Switch) 593326938Sdim QC = PJ; 594326938Sdim if (T[J+Num/2][Pets] == Switch) 595326938Sdim QJ = PC; 596326938Sdim P[J] = QJ; 597326938Sdim P[J+Num/2] = QC; 598326938Sdim } 599326938Sdim 600326938Sdim for (ElemType J = 0; J != Num; ++J) 601326938Sdim if (P[J] != Ignore && P[J] >= Num/2) 602326938Sdim P[J] -= Num/2; 603326938Sdim 604326938Sdim if (Step+1 < Log) { 605326938Sdim if (UseUp && !route(P, T, Size/2, Step+1)) 606326938Sdim return false; 607326938Sdim if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) 608326938Sdim return false; 609326938Sdim } 610326938Sdim return true; 611326938Sdim} 612326938Sdim 613326938Sdim// -------------------------------------------------------------------- 614326938Sdim// Support for building selection results (output instructions that are 615326938Sdim// parts of the final selection). 616326938Sdim 617327330Sdimnamespace { 618326938Sdimstruct OpRef { 619326938Sdim OpRef(SDValue V) : OpV(V) {} 620326938Sdim bool isValue() const { return OpV.getNode() != nullptr; } 621326938Sdim bool isValid() const { return isValue() || !(OpN & Invalid); } 622326938Sdim static OpRef res(int N) { return OpRef(Whole | (N & Index)); } 623326938Sdim static OpRef fail() { return OpRef(Invalid); } 624326938Sdim 625326938Sdim static OpRef lo(const OpRef &R) { 626326938Sdim assert(!R.isValue()); 627326938Sdim return OpRef(R.OpN & (Undef | Index | LoHalf)); 628326938Sdim } 629326938Sdim static OpRef hi(const OpRef &R) { 630326938Sdim assert(!R.isValue()); 631326938Sdim return OpRef(R.OpN & (Undef | Index | HiHalf)); 632326938Sdim } 633326938Sdim static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); } 634326938Sdim 635326938Sdim // Direct value. 636326938Sdim SDValue OpV = SDValue(); 637326938Sdim 638326938Sdim // Reference to the operand of the input node: 639326938Sdim // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the 640326938Sdim // operand index: 641326938Sdim // If bit 30 is set, it's the high half of the operand. 642326938Sdim // If bit 29 is set, it's the low half of the operand. 643326938Sdim unsigned OpN = 0; 644326938Sdim 645326938Sdim enum : unsigned { 646326938Sdim Invalid = 0x10000000, 647326938Sdim LoHalf = 0x20000000, 648326938Sdim HiHalf = 0x40000000, 649326938Sdim Whole = LoHalf | HiHalf, 650326938Sdim Undef = 0x80000000, 651326938Sdim Index = 0x0FFFFFFF, // Mask of the index value. 652326938Sdim IndexBits = 28, 653326938Sdim }; 654326938Sdim 655344779Sdim LLVM_DUMP_METHOD 656326938Sdim void print(raw_ostream &OS, const SelectionDAG &G) const; 657326938Sdim 658326938Sdimprivate: 659326938Sdim OpRef(unsigned N) : OpN(N) {} 660326938Sdim}; 661326938Sdim 662326938Sdimstruct NodeTemplate { 663326938Sdim NodeTemplate() = default; 664326938Sdim unsigned Opc = 0; 665326938Sdim MVT Ty = MVT::Other; 666326938Sdim std::vector<OpRef> Ops; 667326938Sdim 668344779Sdim LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const; 669326938Sdim}; 670326938Sdim 671326938Sdimstruct ResultStack { 672326938Sdim ResultStack(SDNode *Inp) 673326938Sdim : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {} 674326938Sdim SDNode *InpNode; 675326938Sdim MVT InpTy; 676326938Sdim unsigned push(const NodeTemplate &Res) { 677326938Sdim List.push_back(Res); 678326938Sdim return List.size()-1; 679326938Sdim } 680326938Sdim unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) { 681326938Sdim NodeTemplate Res; 682326938Sdim Res.Opc = Opc; 683326938Sdim Res.Ty = Ty; 684326938Sdim Res.Ops = Ops; 685326938Sdim return push(Res); 686326938Sdim } 687326938Sdim bool empty() const { return List.empty(); } 688326938Sdim unsigned size() const { return List.size(); } 689326938Sdim unsigned top() const { return size()-1; } 690326938Sdim const NodeTemplate &operator[](unsigned I) const { return List[I]; } 691326938Sdim unsigned reset(unsigned NewTop) { 692326938Sdim List.resize(NewTop+1); 693326938Sdim return NewTop; 694326938Sdim } 695326938Sdim 696326938Sdim using BaseType = std::vector<NodeTemplate>; 697326938Sdim BaseType::iterator begin() { return List.begin(); } 698326938Sdim BaseType::iterator end() { return List.end(); } 699326938Sdim BaseType::const_iterator begin() const { return List.begin(); } 700326938Sdim BaseType::const_iterator end() const { return List.end(); } 701326938Sdim 702326938Sdim BaseType List; 703326938Sdim 704344779Sdim LLVM_DUMP_METHOD 705326938Sdim void print(raw_ostream &OS, const SelectionDAG &G) const; 706326938Sdim}; 707327330Sdim} // namespace 708326938Sdim 709344779Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 710326938Sdimvoid OpRef::print(raw_ostream &OS, const SelectionDAG &G) const { 711326938Sdim if (isValue()) { 712326938Sdim OpV.getNode()->print(OS, &G); 713326938Sdim return; 714326938Sdim } 715326938Sdim if (OpN & Invalid) { 716326938Sdim OS << "invalid"; 717326938Sdim return; 718326938Sdim } 719326938Sdim if (OpN & Undef) { 720326938Sdim OS << "undef"; 721326938Sdim return; 722326938Sdim } 723326938Sdim if ((OpN & Whole) != Whole) { 724326938Sdim assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf); 725326938Sdim if (OpN & LoHalf) 726326938Sdim OS << "lo "; 727326938Sdim else 728326938Sdim OS << "hi "; 729326938Sdim } 730326938Sdim OS << '#' << SignExtend32(OpN & Index, IndexBits); 731326938Sdim} 732326938Sdim 733326938Sdimvoid NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const { 734326938Sdim const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo(); 735326938Sdim OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " " 736326938Sdim << TII.getName(Opc); 737326938Sdim bool Comma = false; 738326938Sdim for (const auto &R : Ops) { 739326938Sdim if (Comma) 740326938Sdim OS << ','; 741326938Sdim Comma = true; 742326938Sdim OS << ' '; 743326938Sdim R.print(OS, G); 744326938Sdim } 745326938Sdim} 746326938Sdim 747326938Sdimvoid ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const { 748326938Sdim OS << "Input node:\n"; 749326938Sdim#ifndef NDEBUG 750326938Sdim InpNode->dumpr(&G); 751326938Sdim#endif 752326938Sdim OS << "Result templates:\n"; 753326938Sdim for (unsigned I = 0, E = List.size(); I != E; ++I) { 754326938Sdim OS << '[' << I << "] "; 755326938Sdim List[I].print(OS, G); 756326938Sdim OS << '\n'; 757326938Sdim } 758326938Sdim} 759344779Sdim#endif 760326938Sdim 761327330Sdimnamespace { 762326938Sdimstruct ShuffleMask { 763326938Sdim ShuffleMask(ArrayRef<int> M) : Mask(M) { 764326938Sdim for (unsigned I = 0, E = Mask.size(); I != E; ++I) { 765326938Sdim int M = Mask[I]; 766326938Sdim if (M == -1) 767326938Sdim continue; 768326938Sdim MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M); 769326938Sdim MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M); 770326938Sdim } 771326938Sdim } 772326938Sdim 773326938Sdim ArrayRef<int> Mask; 774326938Sdim int MinSrc = -1, MaxSrc = -1; 775326938Sdim 776326938Sdim ShuffleMask lo() const { 777326938Sdim size_t H = Mask.size()/2; 778327134Sdim return ShuffleMask(Mask.take_front(H)); 779326938Sdim } 780326938Sdim ShuffleMask hi() const { 781326938Sdim size_t H = Mask.size()/2; 782327134Sdim return ShuffleMask(Mask.take_back(H)); 783326938Sdim } 784341825Sdim 785341825Sdim void print(raw_ostream &OS) const { 786341825Sdim OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {"; 787341825Sdim for (int M : Mask) 788341825Sdim OS << ' ' << M; 789341825Sdim OS << " }"; 790341825Sdim } 791326938Sdim}; 792327330Sdim} // namespace 793326938Sdim 794326938Sdim// -------------------------------------------------------------------- 795326938Sdim// The HvxSelector class. 796326938Sdim 797326938Sdimstatic const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) { 798326938Sdim return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo()); 799326938Sdim} 800326938Sdimstatic const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) { 801326938Sdim return static_cast<const HexagonSubtarget&>(G.getSubtarget()); 802326938Sdim} 803326938Sdim 804326938Sdimnamespace llvm { 805326938Sdim struct HvxSelector { 806326938Sdim const HexagonTargetLowering &Lower; 807326938Sdim HexagonDAGToDAGISel &ISel; 808326938Sdim SelectionDAG &DAG; 809326938Sdim const HexagonSubtarget &HST; 810326938Sdim const unsigned HwLen; 811326938Sdim 812326938Sdim HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G) 813326938Sdim : Lower(getHexagonLowering(G)), ISel(HS), DAG(G), 814326938Sdim HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {} 815326938Sdim 816326938Sdim MVT getSingleVT(MVT ElemTy) const { 817326938Sdim unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8); 818326938Sdim return MVT::getVectorVT(ElemTy, NumElems); 819326938Sdim } 820326938Sdim 821326938Sdim MVT getPairVT(MVT ElemTy) const { 822326938Sdim unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8); 823326938Sdim return MVT::getVectorVT(ElemTy, NumElems); 824326938Sdim } 825326938Sdim 826326938Sdim void selectShuffle(SDNode *N); 827326938Sdim void selectRor(SDNode *N); 828341825Sdim void selectVAlign(SDNode *N); 829326938Sdim 830326938Sdim private: 831326938Sdim void materialize(const ResultStack &Results); 832326938Sdim 833326938Sdim SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl); 834326938Sdim 835326938Sdim enum : unsigned { 836326938Sdim None, 837326938Sdim PackMux, 838326938Sdim }; 839326938Sdim OpRef concat(OpRef Va, OpRef Vb, ResultStack &Results); 840326938Sdim OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, 841326938Sdim MutableArrayRef<int> NewMask, unsigned Options = None); 842326938Sdim OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, 843326938Sdim MutableArrayRef<int> NewMask); 844326938Sdim OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 845326938Sdim ResultStack &Results); 846326938Sdim OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 847326938Sdim ResultStack &Results); 848326938Sdim 849326938Sdim OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results); 850326938Sdim OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); 851326938Sdim OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results); 852326938Sdim OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); 853326938Sdim 854326938Sdim OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results); 855326938Sdim OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); 856326938Sdim OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results); 857326938Sdim OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results); 858326938Sdim 859326938Sdim bool selectVectorConstants(SDNode *N); 860326938Sdim bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy, 861326938Sdim SDValue Va, SDValue Vb, SDNode *N); 862326938Sdim 863326938Sdim }; 864326938Sdim} 865326938Sdim 866326938Sdimstatic void splitMask(ArrayRef<int> Mask, MutableArrayRef<int> MaskL, 867326938Sdim MutableArrayRef<int> MaskR) { 868326938Sdim unsigned VecLen = Mask.size(); 869326938Sdim assert(MaskL.size() == VecLen && MaskR.size() == VecLen); 870326938Sdim for (unsigned I = 0; I != VecLen; ++I) { 871326938Sdim int M = Mask[I]; 872326938Sdim if (M < 0) { 873326938Sdim MaskL[I] = MaskR[I] = -1; 874326938Sdim } else if (unsigned(M) < VecLen) { 875326938Sdim MaskL[I] = M; 876326938Sdim MaskR[I] = -1; 877326938Sdim } else { 878326938Sdim MaskL[I] = -1; 879326938Sdim MaskR[I] = M-VecLen; 880326938Sdim } 881326938Sdim } 882326938Sdim} 883326938Sdim 884326938Sdimstatic std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc, 885326938Sdim unsigned MaxLen) { 886326938Sdim assert(A.size() > 0 && A.size() >= MaxLen); 887326938Sdim int F = A[0]; 888326938Sdim int E = F; 889326938Sdim for (unsigned I = 1; I != MaxLen; ++I) { 890326938Sdim if (A[I] - E != Inc) 891326938Sdim return { F, I }; 892326938Sdim E = A[I]; 893326938Sdim } 894326938Sdim return { F, MaxLen }; 895326938Sdim} 896326938Sdim 897326938Sdimstatic bool isUndef(ArrayRef<int> Mask) { 898326938Sdim for (int Idx : Mask) 899326938Sdim if (Idx != -1) 900326938Sdim return false; 901326938Sdim return true; 902326938Sdim} 903326938Sdim 904326938Sdimstatic bool isIdentity(ArrayRef<int> Mask) { 905326938Sdim for (int I = 0, E = Mask.size(); I != E; ++I) { 906326938Sdim int M = Mask[I]; 907326938Sdim if (M >= 0 && M != I) 908326938Sdim return false; 909326938Sdim } 910326938Sdim return true; 911326938Sdim} 912326938Sdim 913326938Sdimstatic bool isPermutation(ArrayRef<int> Mask) { 914326938Sdim // Check by adding all numbers only works if there is no overflow. 915326938Sdim assert(Mask.size() < 0x00007FFF && "Sanity failure"); 916326938Sdim int Sum = 0; 917326938Sdim for (int Idx : Mask) { 918326938Sdim if (Idx == -1) 919326938Sdim return false; 920326938Sdim Sum += Idx; 921326938Sdim } 922326938Sdim int N = Mask.size(); 923326938Sdim return 2*Sum == N*(N-1); 924326938Sdim} 925326938Sdim 926326938Sdimbool HvxSelector::selectVectorConstants(SDNode *N) { 927341825Sdim // Constant vectors are generated as loads from constant pools or as 928341825Sdim // splats of a constant value. Since they are generated during the 929341825Sdim // selection process, the main selection algorithm is not aware of them. 930341825Sdim // Select them directly here. 931341825Sdim SmallVector<SDNode*,4> Nodes; 932341825Sdim SetVector<SDNode*> WorkQ; 933327134Sdim 934341825Sdim // The one-use test for VSPLATW's operand may fail due to dead nodes 935341825Sdim // left over in the DAG. 936341825Sdim DAG.RemoveDeadNodes(); 937341825Sdim 938327134Sdim // The DAG can change (due to CSE) during selection, so cache all the 939327134Sdim // unselected nodes first to avoid traversing a mutating DAG. 940327134Sdim 941341825Sdim auto IsNodeToSelect = [] (SDNode *N) { 942341825Sdim if (N->isMachineOpcode()) 943341825Sdim return false; 944341825Sdim switch (N->getOpcode()) { 945341825Sdim case HexagonISD::VZERO: 946341825Sdim case HexagonISD::VSPLATW: 947341825Sdim return true; 948341825Sdim case ISD::LOAD: { 949341825Sdim SDValue Addr = cast<LoadSDNode>(N)->getBasePtr(); 950341825Sdim unsigned AddrOpc = Addr.getOpcode(); 951341825Sdim if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) 952341825Sdim if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) 953341825Sdim return true; 954341825Sdim } 955341825Sdim break; 956326938Sdim } 957341825Sdim // Make sure to select the operand of VSPLATW. 958341825Sdim bool IsSplatOp = N->hasOneUse() && 959341825Sdim N->use_begin()->getOpcode() == HexagonISD::VSPLATW; 960341825Sdim return IsSplatOp; 961327134Sdim }; 962327134Sdim 963341825Sdim WorkQ.insert(N); 964327134Sdim for (unsigned i = 0; i != WorkQ.size(); ++i) { 965327134Sdim SDNode *W = WorkQ[i]; 966341825Sdim if (IsNodeToSelect(W)) 967341825Sdim Nodes.push_back(W); 968327134Sdim for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j) 969341825Sdim WorkQ.insert(W->getOperand(j).getNode()); 970326938Sdim } 971326938Sdim 972341825Sdim for (SDNode *L : Nodes) 973327134Sdim ISel.Select(L); 974327134Sdim 975341825Sdim return !Nodes.empty(); 976326938Sdim} 977326938Sdim 978326938Sdimvoid HvxSelector::materialize(const ResultStack &Results) { 979326938Sdim DEBUG_WITH_TYPE("isel", { 980326938Sdim dbgs() << "Materializing\n"; 981326938Sdim Results.print(dbgs(), DAG); 982326938Sdim }); 983326938Sdim if (Results.empty()) 984326938Sdim return; 985326938Sdim const SDLoc &dl(Results.InpNode); 986326938Sdim std::vector<SDValue> Output; 987326938Sdim 988326938Sdim for (unsigned I = 0, E = Results.size(); I != E; ++I) { 989326938Sdim const NodeTemplate &Node = Results[I]; 990326938Sdim std::vector<SDValue> Ops; 991326938Sdim for (const OpRef &R : Node.Ops) { 992326938Sdim assert(R.isValid()); 993326938Sdim if (R.isValue()) { 994326938Sdim Ops.push_back(R.OpV); 995326938Sdim continue; 996326938Sdim } 997326938Sdim if (R.OpN & OpRef::Undef) { 998326938Sdim MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index); 999326938Sdim Ops.push_back(ISel.selectUndef(dl, MVT(SVT))); 1000326938Sdim continue; 1001326938Sdim } 1002326938Sdim // R is an index of a result. 1003326938Sdim unsigned Part = R.OpN & OpRef::Whole; 1004326938Sdim int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits); 1005326938Sdim if (Idx < 0) 1006326938Sdim Idx += I; 1007326938Sdim assert(Idx >= 0 && unsigned(Idx) < Output.size()); 1008326938Sdim SDValue Op = Output[Idx]; 1009326938Sdim MVT OpTy = Op.getValueType().getSimpleVT(); 1010326938Sdim if (Part != OpRef::Whole) { 1011326938Sdim assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf); 1012341825Sdim MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(), 1013341825Sdim OpTy.getVectorNumElements()/2); 1014341825Sdim unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo 1015341825Sdim : Hexagon::vsub_hi; 1016341825Sdim Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op); 1017326938Sdim } 1018326938Sdim Ops.push_back(Op); 1019326938Sdim } // for (Node : Results) 1020326938Sdim 1021326938Sdim assert(Node.Ty != MVT::Other); 1022326938Sdim SDNode *ResN = (Node.Opc == TargetOpcode::COPY) 1023326938Sdim ? Ops.front().getNode() 1024326938Sdim : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops); 1025326938Sdim Output.push_back(SDValue(ResN, 0)); 1026326938Sdim } 1027326938Sdim 1028326938Sdim SDNode *OutN = Output.back().getNode(); 1029326938Sdim SDNode *InpN = Results.InpNode; 1030326938Sdim DEBUG_WITH_TYPE("isel", { 1031326938Sdim dbgs() << "Generated node:\n"; 1032326938Sdim OutN->dumpr(&DAG); 1033326938Sdim }); 1034326938Sdim 1035326938Sdim ISel.ReplaceNode(InpN, OutN); 1036326938Sdim selectVectorConstants(OutN); 1037326938Sdim DAG.RemoveDeadNodes(); 1038326938Sdim} 1039326938Sdim 1040326938SdimOpRef HvxSelector::concat(OpRef Lo, OpRef Hi, ResultStack &Results) { 1041326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1042326938Sdim const SDLoc &dl(Results.InpNode); 1043326938Sdim Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), { 1044326938Sdim DAG.getTargetConstant(Hexagon::HvxWRRegClassID, dl, MVT::i32), 1045326938Sdim Lo, DAG.getTargetConstant(Hexagon::vsub_lo, dl, MVT::i32), 1046326938Sdim Hi, DAG.getTargetConstant(Hexagon::vsub_hi, dl, MVT::i32), 1047326938Sdim }); 1048326938Sdim return OpRef::res(Results.top()); 1049326938Sdim} 1050326938Sdim 1051326938Sdim// Va, Vb are single vectors, SM can be arbitrarily long. 1052326938SdimOpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, 1053326938Sdim ResultStack &Results, MutableArrayRef<int> NewMask, 1054326938Sdim unsigned Options) { 1055326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1056326938Sdim if (!Va.isValid() || !Vb.isValid()) 1057326938Sdim return OpRef::fail(); 1058326938Sdim 1059326938Sdim int VecLen = SM.Mask.size(); 1060326938Sdim MVT Ty = getSingleVT(MVT::i8); 1061326938Sdim 1062341825Sdim auto IsExtSubvector = [] (ShuffleMask M) { 1063341825Sdim assert(M.MinSrc >= 0 && M.MaxSrc >= 0); 1064341825Sdim for (int I = 0, E = M.Mask.size(); I != E; ++I) { 1065341825Sdim if (M.Mask[I] >= 0 && M.Mask[I]-I != M.MinSrc) 1066341825Sdim return false; 1067341825Sdim } 1068341825Sdim return true; 1069341825Sdim }; 1070341825Sdim 1071326938Sdim if (SM.MaxSrc - SM.MinSrc < int(HwLen)) { 1072341825Sdim if (SM.MinSrc == 0 || SM.MinSrc == int(HwLen) || !IsExtSubvector(SM)) { 1073341825Sdim // If the mask picks elements from only one of the operands, return 1074341825Sdim // that operand, and update the mask to use index 0 to refer to the 1075341825Sdim // first element of that operand. 1076341825Sdim // If the mask extracts a subvector, it will be handled below, so 1077341825Sdim // skip it here. 1078341825Sdim if (SM.MaxSrc < int(HwLen)) { 1079341825Sdim memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen); 1080341825Sdim return Va; 1081341825Sdim } 1082341825Sdim if (SM.MinSrc >= int(HwLen)) { 1083341825Sdim for (int I = 0; I != VecLen; ++I) { 1084341825Sdim int M = SM.Mask[I]; 1085341825Sdim if (M != -1) 1086341825Sdim M -= HwLen; 1087341825Sdim NewMask[I] = M; 1088341825Sdim } 1089341825Sdim return Vb; 1090341825Sdim } 1091341825Sdim } 1092341825Sdim int MinSrc = SM.MinSrc; 1093326938Sdim if (SM.MaxSrc < int(HwLen)) { 1094341825Sdim Vb = Va; 1095341825Sdim } else if (SM.MinSrc > int(HwLen)) { 1096341825Sdim Va = Vb; 1097341825Sdim MinSrc = SM.MinSrc - HwLen; 1098326938Sdim } 1099326938Sdim const SDLoc &dl(Results.InpNode); 1100341825Sdim if (isUInt<3>(MinSrc) || isUInt<3>(HwLen-MinSrc)) { 1101341825Sdim bool IsRight = isUInt<3>(MinSrc); // Right align. 1102341825Sdim SDValue S = DAG.getTargetConstant(IsRight ? MinSrc : HwLen-MinSrc, 1103341825Sdim dl, MVT::i32); 1104341825Sdim unsigned Opc = IsRight ? Hexagon::V6_valignbi 1105341825Sdim : Hexagon::V6_vlalignbi; 1106341825Sdim Results.push(Opc, Ty, {Vb, Va, S}); 1107326938Sdim } else { 1108341825Sdim SDValue S = DAG.getTargetConstant(MinSrc, dl, MVT::i32); 1109326938Sdim Results.push(Hexagon::A2_tfrsi, MVT::i32, {S}); 1110326938Sdim unsigned Top = Results.top(); 1111326938Sdim Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)}); 1112326938Sdim } 1113326938Sdim for (int I = 0; I != VecLen; ++I) { 1114326938Sdim int M = SM.Mask[I]; 1115326938Sdim if (M != -1) 1116326938Sdim M -= SM.MinSrc; 1117326938Sdim NewMask[I] = M; 1118326938Sdim } 1119326938Sdim return OpRef::res(Results.top()); 1120326938Sdim } 1121326938Sdim 1122326938Sdim if (Options & PackMux) { 1123326938Sdim // If elements picked from Va and Vb have all different (source) indexes 1124326938Sdim // (relative to the start of the argument), do a mux, and update the mask. 1125326938Sdim BitVector Picked(HwLen); 1126326938Sdim SmallVector<uint8_t,128> MuxBytes(HwLen); 1127326938Sdim bool CanMux = true; 1128326938Sdim for (int I = 0; I != VecLen; ++I) { 1129326938Sdim int M = SM.Mask[I]; 1130326938Sdim if (M == -1) 1131326938Sdim continue; 1132326938Sdim if (M >= int(HwLen)) 1133326938Sdim M -= HwLen; 1134326938Sdim else 1135326938Sdim MuxBytes[M] = 0xFF; 1136326938Sdim if (Picked[M]) { 1137326938Sdim CanMux = false; 1138326938Sdim break; 1139326938Sdim } 1140326938Sdim NewMask[I] = M; 1141326938Sdim } 1142326938Sdim if (CanMux) 1143326938Sdim return vmuxs(MuxBytes, Va, Vb, Results); 1144326938Sdim } 1145326938Sdim 1146326938Sdim return OpRef::fail(); 1147326938Sdim} 1148326938Sdim 1149326938SdimOpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb, 1150326938Sdim ResultStack &Results, MutableArrayRef<int> NewMask) { 1151326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1152326938Sdim unsigned HalfMask = 0; 1153326938Sdim unsigned LogHw = Log2_32(HwLen); 1154326938Sdim for (int M : SM.Mask) { 1155326938Sdim if (M == -1) 1156326938Sdim continue; 1157326938Sdim HalfMask |= (1u << (M >> LogHw)); 1158326938Sdim } 1159326938Sdim 1160326938Sdim if (HalfMask == 0) 1161326938Sdim return OpRef::undef(getPairVT(MVT::i8)); 1162326938Sdim 1163326938Sdim // If more than two halves are used, bail. 1164326938Sdim // TODO: be more aggressive here? 1165326938Sdim if (countPopulation(HalfMask) > 2) 1166326938Sdim return OpRef::fail(); 1167326938Sdim 1168326938Sdim MVT HalfTy = getSingleVT(MVT::i8); 1169326938Sdim 1170326938Sdim OpRef Inp[2] = { Va, Vb }; 1171326938Sdim OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) }; 1172326938Sdim 1173326938Sdim uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; 1174326938Sdim unsigned Idx = 0; 1175326938Sdim for (unsigned I = 0; I != 4; ++I) { 1176326938Sdim if ((HalfMask & (1u << I)) == 0) 1177326938Sdim continue; 1178326938Sdim assert(Idx < 2); 1179326938Sdim OpRef Op = Inp[I/2]; 1180326938Sdim Out[Idx] = (I & 1) ? OpRef::hi(Op) : OpRef::lo(Op); 1181326938Sdim HalfIdx[I] = Idx++; 1182326938Sdim } 1183326938Sdim 1184326938Sdim int VecLen = SM.Mask.size(); 1185326938Sdim for (int I = 0; I != VecLen; ++I) { 1186326938Sdim int M = SM.Mask[I]; 1187326938Sdim if (M >= 0) { 1188326938Sdim uint8_t Idx = HalfIdx[M >> LogHw]; 1189326938Sdim assert(Idx == 0 || Idx == 1); 1190326938Sdim M = (M & (HwLen-1)) + HwLen*Idx; 1191326938Sdim } 1192326938Sdim NewMask[I] = M; 1193326938Sdim } 1194326938Sdim 1195326938Sdim return concat(Out[0], Out[1], Results); 1196326938Sdim} 1197326938Sdim 1198326938SdimOpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 1199326938Sdim ResultStack &Results) { 1200326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1201326938Sdim MVT ByteTy = getSingleVT(MVT::i8); 1202326938Sdim MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX 1203326938Sdim const SDLoc &dl(Results.InpNode); 1204326938Sdim SDValue B = getVectorConstant(Bytes, dl); 1205326938Sdim Results.push(Hexagon::V6_vd0, ByteTy, {}); 1206326938Sdim Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)}); 1207326938Sdim Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va}); 1208326938Sdim return OpRef::res(Results.top()); 1209326938Sdim} 1210326938Sdim 1211326938SdimOpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 1212326938Sdim ResultStack &Results) { 1213326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1214326938Sdim size_t S = Bytes.size() / 2; 1215327134Sdim OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results); 1216327134Sdim OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results); 1217326938Sdim return concat(L, H, Results); 1218326938Sdim} 1219326938Sdim 1220326938SdimOpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1221326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1222326938Sdim unsigned VecLen = SM.Mask.size(); 1223326938Sdim assert(HwLen == VecLen); 1224326938Sdim (void)VecLen; 1225326938Sdim assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); })); 1226326938Sdim 1227326938Sdim if (isIdentity(SM.Mask)) 1228326938Sdim return Va; 1229326938Sdim if (isUndef(SM.Mask)) 1230326938Sdim return OpRef::undef(getSingleVT(MVT::i8)); 1231326938Sdim 1232326938Sdim OpRef P = perfect(SM, Va, Results); 1233326938Sdim if (P.isValid()) 1234326938Sdim return P; 1235326938Sdim return butterfly(SM, Va, Results); 1236326938Sdim} 1237326938Sdim 1238326938SdimOpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, 1239326938Sdim ResultStack &Results) { 1240326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1241326938Sdim if (isUndef(SM.Mask)) 1242326938Sdim return OpRef::undef(getSingleVT(MVT::i8)); 1243326938Sdim 1244326938Sdim OpRef C = contracting(SM, Va, Vb, Results); 1245326938Sdim if (C.isValid()) 1246326938Sdim return C; 1247326938Sdim 1248326938Sdim int VecLen = SM.Mask.size(); 1249326938Sdim SmallVector<int,128> NewMask(VecLen); 1250326938Sdim OpRef P = packs(SM, Va, Vb, Results, NewMask); 1251326938Sdim if (P.isValid()) 1252326938Sdim return shuffs1(ShuffleMask(NewMask), P, Results); 1253326938Sdim 1254326938Sdim SmallVector<int,128> MaskL(VecLen), MaskR(VecLen); 1255326938Sdim splitMask(SM.Mask, MaskL, MaskR); 1256326938Sdim 1257326938Sdim OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results); 1258326938Sdim OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results); 1259326938Sdim if (!L.isValid() || !R.isValid()) 1260326938Sdim return OpRef::fail(); 1261326938Sdim 1262326938Sdim SmallVector<uint8_t,128> Bytes(VecLen); 1263326938Sdim for (int I = 0; I != VecLen; ++I) { 1264326938Sdim if (MaskL[I] != -1) 1265326938Sdim Bytes[I] = 0xFF; 1266326938Sdim } 1267326938Sdim return vmuxs(Bytes, L, R, Results); 1268326938Sdim} 1269326938Sdim 1270326938SdimOpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1271326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1272326938Sdim int VecLen = SM.Mask.size(); 1273326938Sdim 1274326938Sdim if (isIdentity(SM.Mask)) 1275326938Sdim return Va; 1276326938Sdim if (isUndef(SM.Mask)) 1277326938Sdim return OpRef::undef(getPairVT(MVT::i8)); 1278326938Sdim 1279326938Sdim SmallVector<int,128> PackedMask(VecLen); 1280326938Sdim OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask); 1281326938Sdim if (P.isValid()) { 1282326938Sdim ShuffleMask PM(PackedMask); 1283326938Sdim OpRef E = expanding(PM, P, Results); 1284326938Sdim if (E.isValid()) 1285326938Sdim return E; 1286326938Sdim 1287326938Sdim OpRef L = shuffs1(PM.lo(), P, Results); 1288326938Sdim OpRef H = shuffs1(PM.hi(), P, Results); 1289326938Sdim if (L.isValid() && H.isValid()) 1290326938Sdim return concat(L, H, Results); 1291326938Sdim } 1292326938Sdim 1293326938Sdim OpRef R = perfect(SM, Va, Results); 1294326938Sdim if (R.isValid()) 1295326938Sdim return R; 1296326938Sdim // TODO commute the mask and try the opposite order of the halves. 1297326938Sdim 1298326938Sdim OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results); 1299326938Sdim OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results); 1300326938Sdim if (L.isValid() && H.isValid()) 1301326938Sdim return concat(L, H, Results); 1302326938Sdim 1303326938Sdim return OpRef::fail(); 1304326938Sdim} 1305326938Sdim 1306326938SdimOpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, 1307326938Sdim ResultStack &Results) { 1308326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1309326938Sdim if (isUndef(SM.Mask)) 1310326938Sdim return OpRef::undef(getPairVT(MVT::i8)); 1311326938Sdim 1312326938Sdim int VecLen = SM.Mask.size(); 1313326938Sdim SmallVector<int,256> PackedMask(VecLen); 1314326938Sdim OpRef P = packp(SM, Va, Vb, Results, PackedMask); 1315326938Sdim if (P.isValid()) 1316326938Sdim return shuffp1(ShuffleMask(PackedMask), P, Results); 1317326938Sdim 1318326938Sdim SmallVector<int,256> MaskL(VecLen), MaskR(VecLen); 1319341825Sdim splitMask(SM.Mask, MaskL, MaskR); 1320341825Sdim 1321326938Sdim OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results); 1322326938Sdim OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results); 1323326938Sdim if (!L.isValid() || !R.isValid()) 1324326938Sdim return OpRef::fail(); 1325326938Sdim 1326326938Sdim // Mux the results. 1327326938Sdim SmallVector<uint8_t,256> Bytes(VecLen); 1328326938Sdim for (int I = 0; I != VecLen; ++I) { 1329326938Sdim if (MaskL[I] != -1) 1330326938Sdim Bytes[I] = 0xFF; 1331326938Sdim } 1332326938Sdim return vmuxp(Bytes, L, R, Results); 1333326938Sdim} 1334326938Sdim 1335344779Sdimnamespace { 1336344779Sdim struct Deleter : public SelectionDAG::DAGNodeDeletedListener { 1337344779Sdim template <typename T> 1338344779Sdim Deleter(SelectionDAG &D, T &C) 1339344779Sdim : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) { 1340344779Sdim C.erase(N); 1341344779Sdim }) {} 1342344779Sdim }; 1343344779Sdim 1344344779Sdim template <typename T> 1345344779Sdim struct NullifyingVector : public T { 1346344779Sdim DenseMap<SDNode*, SDNode**> Refs; 1347344779Sdim NullifyingVector(T &&V) : T(V) { 1348344779Sdim for (unsigned i = 0, e = T::size(); i != e; ++i) { 1349344779Sdim SDNode *&N = T::operator[](i); 1350344779Sdim Refs[N] = &N; 1351344779Sdim } 1352344779Sdim } 1353344779Sdim void erase(SDNode *N) { 1354344779Sdim auto F = Refs.find(N); 1355344779Sdim if (F != Refs.end()) 1356344779Sdim *F->second = nullptr; 1357344779Sdim } 1358344779Sdim }; 1359344779Sdim} 1360344779Sdim 1361326938Sdimbool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, 1362326938Sdim MVT ResTy, SDValue Va, SDValue Vb, 1363326938Sdim SDNode *N) { 1364326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1365326938Sdim MVT ElemTy = ResTy.getVectorElementType(); 1366326938Sdim assert(ElemTy == MVT::i8); 1367326938Sdim unsigned VecLen = Mask.size(); 1368326938Sdim bool HavePairs = (2*HwLen == VecLen); 1369326938Sdim MVT SingleTy = getSingleVT(MVT::i8); 1370326938Sdim 1371344779Sdim // The prior attempts to handle this shuffle may have left a bunch of 1372344779Sdim // dead nodes in the DAG (such as constants). These nodes will be added 1373344779Sdim // at the end of DAG's node list, which at that point had already been 1374344779Sdim // sorted topologically. In the main selection loop, the node list is 1375344779Sdim // traversed backwards from the root node, which means that any new 1376344779Sdim // nodes (from the end of the list) will not be visited. 1377344779Sdim // Scalarization will replace the shuffle node with the scalarized 1378344779Sdim // expression, and if that expression reused any if the leftoever (dead) 1379344779Sdim // nodes, these nodes would not be selected (since the "local" selection 1380344779Sdim // only visits nodes that are not in AllNodes). 1381344779Sdim // To avoid this issue, remove all dead nodes from the DAG now. 1382344779Sdim DAG.RemoveDeadNodes(); 1383344779Sdim DenseSet<SDNode*> AllNodes; 1384344779Sdim for (SDNode &S : DAG.allnodes()) 1385344779Sdim AllNodes.insert(&S); 1386344779Sdim 1387344779Sdim Deleter DUA(DAG, AllNodes); 1388344779Sdim 1389326938Sdim SmallVector<SDValue,128> Ops; 1390344779Sdim LLVMContext &Ctx = *DAG.getContext(); 1391344779Sdim MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT(); 1392326938Sdim for (int I : Mask) { 1393326938Sdim if (I < 0) { 1394344779Sdim Ops.push_back(ISel.selectUndef(dl, LegalTy)); 1395326938Sdim continue; 1396326938Sdim } 1397326938Sdim SDValue Vec; 1398326938Sdim unsigned M = I; 1399326938Sdim if (M < VecLen) { 1400326938Sdim Vec = Va; 1401326938Sdim } else { 1402326938Sdim Vec = Vb; 1403326938Sdim M -= VecLen; 1404326938Sdim } 1405326938Sdim if (HavePairs) { 1406326938Sdim if (M < HwLen) { 1407326938Sdim Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec); 1408326938Sdim } else { 1409326938Sdim Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec); 1410326938Sdim M -= HwLen; 1411326938Sdim } 1412326938Sdim } 1413326938Sdim SDValue Idx = DAG.getConstant(M, dl, MVT::i32); 1414344779Sdim SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx}); 1415326938Sdim SDValue L = Lower.LowerOperation(Ex, DAG); 1416326938Sdim assert(L.getNode()); 1417326938Sdim Ops.push_back(L); 1418326938Sdim } 1419326938Sdim 1420326938Sdim SDValue LV; 1421326938Sdim if (2*HwLen == VecLen) { 1422326938Sdim SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen}); 1423326938Sdim SDValue L0 = Lower.LowerOperation(B0, DAG); 1424326938Sdim SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen}); 1425326938Sdim SDValue L1 = Lower.LowerOperation(B1, DAG); 1426326938Sdim // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering) 1427326938Sdim // functions may expect to be called only for illegal operations, so 1428326938Sdim // make sure that they are not called for legal ones. Develop a better 1429326938Sdim // mechanism for dealing with this. 1430326938Sdim LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1}); 1431326938Sdim } else { 1432326938Sdim SDValue BV = DAG.getBuildVector(ResTy, dl, Ops); 1433326938Sdim LV = Lower.LowerOperation(BV, DAG); 1434326938Sdim } 1435326938Sdim 1436326938Sdim assert(!N->use_empty()); 1437326938Sdim ISel.ReplaceNode(N, LV.getNode()); 1438326938Sdim 1439344779Sdim if (AllNodes.count(LV.getNode())) { 1440344779Sdim DAG.RemoveDeadNodes(); 1441344779Sdim return true; 1442344779Sdim } 1443344779Sdim 1444344779Sdim // The lowered build-vector node will now need to be selected. It needs 1445344779Sdim // to be done here because this node and its submodes are not included 1446344779Sdim // in the main selection loop. 1447344779Sdim // Implement essentially the same topological ordering algorithm as is 1448344779Sdim // used in SelectionDAGISel. 1449344779Sdim 1450344779Sdim SetVector<SDNode*> SubNodes, TmpQ; 1451344779Sdim std::map<SDNode*,unsigned> NumOps; 1452344779Sdim 1453344779Sdim SubNodes.insert(LV.getNode()); 1454326938Sdim for (unsigned I = 0; I != SubNodes.size(); ++I) { 1455344779Sdim unsigned OpN = 0; 1456344779Sdim SDNode *S = SubNodes[I]; 1457344779Sdim for (SDValue Op : S->ops()) { 1458344779Sdim if (AllNodes.count(Op.getNode())) 1459344779Sdim continue; 1460344779Sdim SubNodes.insert(Op.getNode()); 1461344779Sdim ++OpN; 1462344779Sdim } 1463344779Sdim NumOps.insert({S, OpN}); 1464344779Sdim if (OpN == 0) 1465344779Sdim TmpQ.insert(S); 1466326938Sdim } 1467344779Sdim 1468344779Sdim for (unsigned I = 0; I != TmpQ.size(); ++I) { 1469344779Sdim SDNode *S = TmpQ[I]; 1470344779Sdim for (SDNode *U : S->uses()) { 1471344779Sdim if (!SubNodes.count(U)) 1472344779Sdim continue; 1473344779Sdim auto F = NumOps.find(U); 1474344779Sdim assert(F != NumOps.end()); 1475344779Sdim assert(F->second > 0); 1476344779Sdim if (!--F->second) 1477344779Sdim TmpQ.insert(F->first); 1478344779Sdim } 1479326938Sdim } 1480344779Sdim assert(SubNodes.size() == TmpQ.size()); 1481344779Sdim NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector()); 1482326938Sdim 1483344779Sdim Deleter DUQ(DAG, Queue); 1484344779Sdim for (SDNode *S : reverse(Queue)) 1485344779Sdim if (S != nullptr) 1486344779Sdim ISel.Select(S); 1487344779Sdim 1488326938Sdim DAG.RemoveDeadNodes(); 1489326938Sdim return true; 1490326938Sdim} 1491326938Sdim 1492326938SdimOpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb, 1493326938Sdim ResultStack &Results) { 1494326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1495326938Sdim if (!Va.isValid() || !Vb.isValid()) 1496326938Sdim return OpRef::fail(); 1497326938Sdim 1498326938Sdim // Contracting shuffles, i.e. instructions that always discard some bytes 1499326938Sdim // from the operand vectors. 1500326938Sdim // 1501326938Sdim // V6_vshuff{e,o}b 1502326938Sdim // V6_vdealb4w 1503326938Sdim // V6_vpack{e,o}{b,h} 1504326938Sdim 1505326938Sdim int VecLen = SM.Mask.size(); 1506326938Sdim std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen); 1507326938Sdim MVT ResTy = getSingleVT(MVT::i8); 1508326938Sdim 1509326938Sdim // The following shuffles only work for bytes and halfwords. This requires 1510326938Sdim // the strip length to be 1 or 2. 1511326938Sdim if (Strip.second != 1 && Strip.second != 2) 1512326938Sdim return OpRef::fail(); 1513326938Sdim 1514326938Sdim // The patterns for the shuffles, in terms of the starting offsets of the 1515326938Sdim // consecutive strips (L = length of the strip, N = VecLen): 1516326938Sdim // 1517326938Sdim // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2 1518326938Sdim // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2 1519326938Sdim // 1520326938Sdim // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2 1521326938Sdim // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2 1522326938Sdim // 1523326938Sdim // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ... 1524326938Sdim 1525326938Sdim // The value of the element in the mask following the strip will decide 1526326938Sdim // what kind of a shuffle this can be. 1527326938Sdim int NextInMask = SM.Mask[Strip.second]; 1528326938Sdim 1529326938Sdim // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask 1530326938Sdim // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would 1531326938Sdim // satisfy this. 1532326938Sdim if (NextInMask < VecLen) { 1533326938Sdim // vpack{e,o} or vdealb4w 1534326938Sdim if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) { 1535326938Sdim int N = VecLen; 1536326938Sdim // Check if this is vdealb4w (L=1). 1537326938Sdim for (int I = 0; I != N/4; ++I) 1538326938Sdim if (SM.Mask[I] != 4*I) 1539326938Sdim return OpRef::fail(); 1540326938Sdim for (int I = 0; I != N/4; ++I) 1541326938Sdim if (SM.Mask[I+N/4] != 2 + 4*I) 1542326938Sdim return OpRef::fail(); 1543326938Sdim for (int I = 0; I != N/4; ++I) 1544326938Sdim if (SM.Mask[I+N/2] != N + 4*I) 1545326938Sdim return OpRef::fail(); 1546326938Sdim for (int I = 0; I != N/4; ++I) 1547326938Sdim if (SM.Mask[I+3*N/4] != N+2 + 4*I) 1548326938Sdim return OpRef::fail(); 1549326938Sdim // Matched mask for vdealb4w. 1550326938Sdim Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va}); 1551326938Sdim return OpRef::res(Results.top()); 1552326938Sdim } 1553326938Sdim 1554326938Sdim // Check if this is vpack{e,o}. 1555326938Sdim int N = VecLen; 1556326938Sdim int L = Strip.second; 1557326938Sdim // Check if the first strip starts at 0 or at L. 1558326938Sdim if (Strip.first != 0 && Strip.first != L) 1559326938Sdim return OpRef::fail(); 1560326938Sdim // Examine the rest of the mask. 1561326938Sdim for (int I = L; I < N; I += L) { 1562327134Sdim auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); 1563326938Sdim // Check whether the mask element at the beginning of each strip 1564326938Sdim // increases by 2L each time. 1565326938Sdim if (S.first - Strip.first != 2*I) 1566326938Sdim return OpRef::fail(); 1567326938Sdim // Check whether each strip is of the same length. 1568326938Sdim if (S.second != unsigned(L)) 1569326938Sdim return OpRef::fail(); 1570326938Sdim } 1571326938Sdim 1572326938Sdim // Strip.first == 0 => vpacke 1573326938Sdim // Strip.first == L => vpacko 1574326938Sdim assert(Strip.first == 0 || Strip.first == L); 1575326938Sdim using namespace Hexagon; 1576326938Sdim NodeTemplate Res; 1577326938Sdim Res.Opc = Strip.second == 1 // Number of bytes. 1578326938Sdim ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob) 1579326938Sdim : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh); 1580326938Sdim Res.Ty = ResTy; 1581326938Sdim Res.Ops = { Vb, Va }; 1582326938Sdim Results.push(Res); 1583326938Sdim return OpRef::res(Results.top()); 1584326938Sdim } 1585326938Sdim 1586326938Sdim // Check if this is vshuff{e,o}. 1587326938Sdim int N = VecLen; 1588326938Sdim int L = Strip.second; 1589326938Sdim std::pair<int,unsigned> PrevS = Strip; 1590326938Sdim bool Flip = false; 1591326938Sdim for (int I = L; I < N; I += L) { 1592327134Sdim auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); 1593326938Sdim if (S.second != PrevS.second) 1594326938Sdim return OpRef::fail(); 1595326938Sdim int Diff = Flip ? PrevS.first - S.first + 2*L 1596326938Sdim : S.first - PrevS.first; 1597326938Sdim if (Diff != N) 1598326938Sdim return OpRef::fail(); 1599326938Sdim Flip ^= true; 1600326938Sdim PrevS = S; 1601326938Sdim } 1602326938Sdim // Strip.first == 0 => vshuffe 1603326938Sdim // Strip.first == L => vshuffo 1604326938Sdim assert(Strip.first == 0 || Strip.first == L); 1605326938Sdim using namespace Hexagon; 1606326938Sdim NodeTemplate Res; 1607326938Sdim Res.Opc = Strip.second == 1 // Number of bytes. 1608326938Sdim ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob) 1609326938Sdim : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh); 1610326938Sdim Res.Ty = ResTy; 1611326938Sdim Res.Ops = { Vb, Va }; 1612326938Sdim Results.push(Res); 1613326938Sdim return OpRef::res(Results.top()); 1614326938Sdim} 1615326938Sdim 1616326938SdimOpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1617326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1618326938Sdim // Expanding shuffles (using all elements and inserting into larger vector): 1619326938Sdim // 1620326938Sdim // V6_vunpacku{b,h} [*] 1621326938Sdim // 1622326938Sdim // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask. 1623326938Sdim // 1624326938Sdim // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so 1625326938Sdim // they are not shuffles. 1626326938Sdim // 1627326938Sdim // The argument is a single vector. 1628326938Sdim 1629326938Sdim int VecLen = SM.Mask.size(); 1630326938Sdim assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type"); 1631326938Sdim 1632326938Sdim std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen); 1633326938Sdim 1634326938Sdim // The patterns for the unpacks, in terms of the starting offsets of the 1635326938Sdim // consecutive strips (L = length of the strip, N = VecLen): 1636326938Sdim // 1637326938Sdim // vunpacku: 0, -1, L, -1, 2L, -1 ... 1638326938Sdim 1639326938Sdim if (Strip.first != 0) 1640326938Sdim return OpRef::fail(); 1641326938Sdim 1642326938Sdim // The vunpackus only handle byte and half-word. 1643326938Sdim if (Strip.second != 1 && Strip.second != 2) 1644326938Sdim return OpRef::fail(); 1645326938Sdim 1646326938Sdim int N = VecLen; 1647326938Sdim int L = Strip.second; 1648326938Sdim 1649326938Sdim // First, check the non-ignored strips. 1650326938Sdim for (int I = 2*L; I < 2*N; I += 2*L) { 1651327134Sdim auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); 1652326938Sdim if (S.second != unsigned(L)) 1653326938Sdim return OpRef::fail(); 1654326938Sdim if (2*S.first != I) 1655326938Sdim return OpRef::fail(); 1656326938Sdim } 1657326938Sdim // Check the -1s. 1658326938Sdim for (int I = L; I < 2*N; I += 2*L) { 1659327134Sdim auto S = findStrip(SM.Mask.drop_front(I), 0, N-I); 1660326938Sdim if (S.first != -1 || S.second != unsigned(L)) 1661326938Sdim return OpRef::fail(); 1662326938Sdim } 1663326938Sdim 1664326938Sdim unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub 1665326938Sdim : Hexagon::V6_vunpackuh; 1666326938Sdim Results.push(Opc, getPairVT(MVT::i8), {Va}); 1667326938Sdim return OpRef::res(Results.top()); 1668326938Sdim} 1669326938Sdim 1670326938SdimOpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1671326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1672326938Sdim // V6_vdeal{b,h} 1673326938Sdim // V6_vshuff{b,h} 1674326938Sdim 1675326938Sdim // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2}) 1676326938Sdim // V6_vshuffvdd (V6_vshuff) 1677326938Sdim // V6_dealvdd (V6_vdeal) 1678326938Sdim 1679326938Sdim int VecLen = SM.Mask.size(); 1680326938Sdim assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8); 1681326938Sdim unsigned LogLen = Log2_32(VecLen); 1682326938Sdim unsigned HwLog = Log2_32(HwLen); 1683326938Sdim // The result length must be the same as the length of a single vector, 1684326938Sdim // or a vector pair. 1685326938Sdim assert(LogLen == HwLog || LogLen == HwLog+1); 1686326938Sdim bool Extend = (LogLen == HwLog); 1687326938Sdim 1688326938Sdim if (!isPermutation(SM.Mask)) 1689326938Sdim return OpRef::fail(); 1690326938Sdim 1691326938Sdim SmallVector<unsigned,8> Perm(LogLen); 1692326938Sdim 1693326938Sdim // Check if this could be a perfect shuffle, or a combination of perfect 1694326938Sdim // shuffles. 1695326938Sdim // 1696326938Sdim // Consider this permutation (using hex digits to make the ASCII diagrams 1697326938Sdim // easier to read): 1698326938Sdim // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }. 1699326938Sdim // This is a "deal" operation: divide the input into two halves, and 1700326938Sdim // create the output by picking elements by alternating between these two 1701326938Sdim // halves: 1702326938Sdim // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*] 1703326938Sdim // 8 9 A B C D E F 1704326938Sdim // 1705326938Sdim // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides 1706326938Sdim // a somwehat different mechanism that could be used to perform shuffle/ 1707326938Sdim // deal operations: a 2x2 transpose. 1708326938Sdim // Consider the halves of inputs again, they can be interpreted as a 2x8 1709326938Sdim // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated 1710326938Sdim // together. Now, when considering 2 elements at a time, it will be a 2x4 1711326938Sdim // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices: 1712326938Sdim // 01 23 45 67 1713326938Sdim // 89 AB CD EF 1714326938Sdim // With groups of 4, this will become a single 2x2 matrix, and so on. 1715326938Sdim // 1716326938Sdim // The 2x2 transpose instruction works by transposing each of the 2x2 1717326938Sdim // matrices (or "sub-matrices"), given a specific group size. For example, 1718326938Sdim // if the group size is 1 (i.e. each element is its own group), there 1719326938Sdim // will be four transposes of the four 2x2 matrices that form the 2x8. 1720326938Sdim // For example, with the inputs as above, the result will be: 1721326938Sdim // 0 8 2 A 4 C 6 E 1722326938Sdim // 1 9 3 B 5 D 7 F 1723326938Sdim // Now, this result can be tranposed again, but with the group size of 2: 1724326938Sdim // 08 19 4C 5D 1725326938Sdim // 2A 3B 6E 7F 1726326938Sdim // If we then transpose that result, but with the group size of 4, we get: 1727326938Sdim // 0819 2A3B 1728326938Sdim // 4C5D 6E7F 1729326938Sdim // If we concatenate these two rows, it will be 1730326938Sdim // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F 1731326938Sdim // which is the same as the "deal" [*] above. 1732326938Sdim // 1733326938Sdim // In general, a "deal" of individual elements is a series of 2x2 transposes, 1734326938Sdim // with changing group size. HVX has two instructions: 1735326938Sdim // Vdd = V6_vdealvdd Vu, Vv, Rt 1736326938Sdim // Vdd = V6_shufvdd Vu, Vv, Rt 1737326938Sdim // that perform exactly that. The register Rt controls which transposes are 1738326938Sdim // going to happen: a bit at position n (counting from 0) indicates that a 1739326938Sdim // transpose with a group size of 2^n will take place. If multiple bits are 1740326938Sdim // set, multiple transposes will happen: vdealvdd will perform them starting 1741326938Sdim // with the largest group size, vshuffvdd will do them in the reverse order. 1742326938Sdim // 1743326938Sdim // The main observation is that each 2x2 transpose corresponds to swapping 1744326938Sdim // columns of bits in the binary representation of the values. 1745326938Sdim // 1746326938Sdim // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits 1747326938Sdim // in a given column. The * denote the columns that will be swapped. 1748326938Sdim // The transpose with the group size 2^n corresponds to swapping columns 1749326938Sdim // 3 (the highest log) and log2(n): 1750326938Sdim // 1751326938Sdim // 3 2 1 0 0 2 1 3 0 2 3 1 1752326938Sdim // * * * * * * 1753326938Sdim // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1754326938Sdim // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0 1755326938Sdim // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1 1756326938Sdim // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1 1757326938Sdim // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0 1758326938Sdim // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0 1759326938Sdim // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1 1760326938Sdim // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1 1761326938Sdim // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0 1762326938Sdim // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0 1763326938Sdim // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1 1764326938Sdim // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1 1765326938Sdim // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0 1766326938Sdim // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0 1767326938Sdim // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1 1768326938Sdim // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 1769326938Sdim 1770326938Sdim auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) { 1771326938Sdim unsigned X = Mask[0] ^ Mask[Num/2]; 1772326938Sdim // Check that the first half has the X's bits clear. 1773326938Sdim if ((Mask[0] & X) != 0) 1774326938Sdim return 0u; 1775326938Sdim for (unsigned I = 1; I != Num/2; ++I) { 1776326938Sdim if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X) 1777326938Sdim return 0u; 1778326938Sdim if ((Mask[I] & X) != 0) 1779326938Sdim return 0u; 1780326938Sdim } 1781326938Sdim return X; 1782326938Sdim }; 1783326938Sdim 1784326938Sdim // Create a vector of log2's for each column: Perm[i] corresponds to 1785326938Sdim // the i-th bit (lsb is 0). 1786326938Sdim assert(VecLen > 2); 1787326938Sdim for (unsigned I = VecLen; I >= 2; I >>= 1) { 1788326938Sdim // Examine the initial segment of Mask of size I. 1789326938Sdim unsigned X = XorPow2(SM.Mask, I); 1790326938Sdim if (!isPowerOf2_32(X)) 1791326938Sdim return OpRef::fail(); 1792326938Sdim // Check the other segments of Mask. 1793327134Sdim for (int J = I; J < VecLen; J += I) { 1794327134Sdim if (XorPow2(SM.Mask.slice(J, I), I) != X) 1795326938Sdim return OpRef::fail(); 1796326938Sdim } 1797326938Sdim Perm[Log2_32(X)] = Log2_32(I)-1; 1798326938Sdim } 1799326938Sdim 1800326938Sdim // Once we have Perm, represent it as cycles. Denote the maximum log2 1801326938Sdim // (equal to log2(VecLen)-1) as M. The cycle containing M can then be 1802326938Sdim // written as (M a1 a2 a3 ... an). That cycle can be broken up into 1803326938Sdim // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition 1804326938Sdim // order being from left to right. Any (contiguous) segment where the 1805326938Sdim // values ai, ai+1...aj are either all increasing or all decreasing, 1806326938Sdim // can be implemented via a single vshuffvdd/vdealvdd respectively. 1807326938Sdim // 1808326938Sdim // If there is a cycle (a1 a2 ... an) that does not involve M, it can 1809326938Sdim // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can 1810326938Sdim // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure 1811326938Sdim // can be used to generate a sequence of vshuffvdd/vdealvdd. 1812326938Sdim // 1813326938Sdim // Example: 1814326938Sdim // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written 1815326938Sdim // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply 1816326938Sdim // (4 0 1)(4 0)(4 2 3)(4 2). 1817326938Sdim // It can then be expanded into swaps as 1818326938Sdim // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2), 1819326938Sdim // and broken up into "increasing" segments as 1820326938Sdim // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)]. 1821326938Sdim // This is equivalent to 1822326938Sdim // (4 0 1)(4 0 2 3)(4 2), 1823326938Sdim // which can be implemented as 3 vshufvdd instructions. 1824326938Sdim 1825326938Sdim using CycleType = SmallVector<unsigned,8>; 1826326938Sdim std::set<CycleType> Cycles; 1827326938Sdim std::set<unsigned> All; 1828326938Sdim 1829326938Sdim for (unsigned I : Perm) 1830326938Sdim All.insert(I); 1831326938Sdim 1832326938Sdim // If the cycle contains LogLen-1, move it to the front of the cycle. 1833326938Sdim // Otherwise, return the cycle unchanged. 1834326938Sdim auto canonicalize = [LogLen](const CycleType &C) -> CycleType { 1835326938Sdim unsigned LogPos, N = C.size(); 1836326938Sdim for (LogPos = 0; LogPos != N; ++LogPos) 1837326938Sdim if (C[LogPos] == LogLen-1) 1838326938Sdim break; 1839326938Sdim if (LogPos == N) 1840326938Sdim return C; 1841326938Sdim 1842326938Sdim CycleType NewC(C.begin()+LogPos, C.end()); 1843326938Sdim NewC.append(C.begin(), C.begin()+LogPos); 1844326938Sdim return NewC; 1845326938Sdim }; 1846326938Sdim 1847326938Sdim auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) { 1848326938Sdim // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6), 1849326938Sdim // for bytes zero is included, for halfwords is not. 1850326938Sdim if (Cs.size() != 1) 1851326938Sdim return 0u; 1852326938Sdim const CycleType &C = *Cs.begin(); 1853326938Sdim if (C[0] != Len-1) 1854326938Sdim return 0u; 1855326938Sdim int D = Len - C.size(); 1856326938Sdim if (D != 0 && D != 1) 1857326938Sdim return 0u; 1858326938Sdim 1859326938Sdim bool IsDeal = true, IsShuff = true; 1860326938Sdim for (unsigned I = 1; I != Len-D; ++I) { 1861326938Sdim if (C[I] != Len-1-I) 1862326938Sdim IsDeal = false; 1863326938Sdim if (C[I] != I-(1-D)) // I-1, I 1864326938Sdim IsShuff = false; 1865326938Sdim } 1866326938Sdim // At most one, IsDeal or IsShuff, can be non-zero. 1867326938Sdim assert(!(IsDeal || IsShuff) || IsDeal != IsShuff); 1868326938Sdim static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh }; 1869326938Sdim static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh }; 1870326938Sdim return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0); 1871326938Sdim }; 1872326938Sdim 1873326938Sdim while (!All.empty()) { 1874326938Sdim unsigned A = *All.begin(); 1875326938Sdim All.erase(A); 1876326938Sdim CycleType C; 1877326938Sdim C.push_back(A); 1878326938Sdim for (unsigned B = Perm[A]; B != A; B = Perm[B]) { 1879326938Sdim C.push_back(B); 1880326938Sdim All.erase(B); 1881326938Sdim } 1882326938Sdim if (C.size() <= 1) 1883326938Sdim continue; 1884326938Sdim Cycles.insert(canonicalize(C)); 1885326938Sdim } 1886326938Sdim 1887326938Sdim MVT SingleTy = getSingleVT(MVT::i8); 1888326938Sdim MVT PairTy = getPairVT(MVT::i8); 1889326938Sdim 1890326938Sdim // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}. 1891326938Sdim if (unsigned(VecLen) == HwLen) { 1892326938Sdim if (unsigned SingleOpc = pfs(Cycles, LogLen)) { 1893326938Sdim Results.push(SingleOpc, SingleTy, {Va}); 1894326938Sdim return OpRef::res(Results.top()); 1895326938Sdim } 1896326938Sdim } 1897326938Sdim 1898326938Sdim SmallVector<unsigned,8> SwapElems; 1899326938Sdim if (HwLen == unsigned(VecLen)) 1900326938Sdim SwapElems.push_back(LogLen-1); 1901326938Sdim 1902326938Sdim for (const CycleType &C : Cycles) { 1903326938Sdim unsigned First = (C[0] == LogLen-1) ? 1 : 0; 1904326938Sdim SwapElems.append(C.begin()+First, C.end()); 1905326938Sdim if (First == 0) 1906326938Sdim SwapElems.push_back(C[0]); 1907326938Sdim } 1908326938Sdim 1909326938Sdim const SDLoc &dl(Results.InpNode); 1910326938Sdim OpRef Arg = !Extend ? Va 1911326938Sdim : concat(Va, OpRef::undef(SingleTy), Results); 1912326938Sdim 1913326938Sdim for (unsigned I = 0, E = SwapElems.size(); I != E; ) { 1914326938Sdim bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1]; 1915326938Sdim unsigned S = (1u << SwapElems[I]); 1916326938Sdim if (I < E-1) { 1917326938Sdim while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1])) 1918326938Sdim S |= 1u << SwapElems[I]; 1919326938Sdim // The above loop will not add a bit for the final SwapElems[I+1], 1920326938Sdim // so add it here. 1921326938Sdim S |= 1u << SwapElems[I]; 1922326938Sdim } 1923326938Sdim ++I; 1924326938Sdim 1925326938Sdim NodeTemplate Res; 1926326938Sdim Results.push(Hexagon::A2_tfrsi, MVT::i32, 1927326938Sdim { DAG.getTargetConstant(S, dl, MVT::i32) }); 1928326938Sdim Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd; 1929326938Sdim Res.Ty = PairTy; 1930326938Sdim Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) }; 1931326938Sdim Results.push(Res); 1932326938Sdim Arg = OpRef::res(Results.top()); 1933326938Sdim } 1934326938Sdim 1935326938Sdim return !Extend ? Arg : OpRef::lo(Arg); 1936326938Sdim} 1937326938Sdim 1938326938SdimOpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1939326938Sdim DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1940326938Sdim // Butterfly shuffles. 1941326938Sdim // 1942326938Sdim // V6_vdelta 1943326938Sdim // V6_vrdelta 1944326938Sdim // V6_vror 1945326938Sdim 1946326938Sdim // The assumption here is that all elements picked by Mask are in the 1947326938Sdim // first operand to the vector_shuffle. This assumption is enforced 1948326938Sdim // by the caller. 1949326938Sdim 1950326938Sdim MVT ResTy = getSingleVT(MVT::i8); 1951326938Sdim PermNetwork::Controls FC, RC; 1952326938Sdim const SDLoc &dl(Results.InpNode); 1953326938Sdim int VecLen = SM.Mask.size(); 1954326938Sdim 1955326938Sdim for (int M : SM.Mask) { 1956326938Sdim if (M != -1 && M >= VecLen) 1957326938Sdim return OpRef::fail(); 1958326938Sdim } 1959326938Sdim 1960326938Sdim // Try the deltas/benes for both single vectors and vector pairs. 1961326938Sdim ForwardDeltaNetwork FN(SM.Mask); 1962326938Sdim if (FN.run(FC)) { 1963326938Sdim SDValue Ctl = getVectorConstant(FC, dl); 1964326938Sdim Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)}); 1965326938Sdim return OpRef::res(Results.top()); 1966326938Sdim } 1967326938Sdim 1968326938Sdim // Try reverse delta. 1969326938Sdim ReverseDeltaNetwork RN(SM.Mask); 1970326938Sdim if (RN.run(RC)) { 1971326938Sdim SDValue Ctl = getVectorConstant(RC, dl); 1972326938Sdim Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)}); 1973326938Sdim return OpRef::res(Results.top()); 1974326938Sdim } 1975326938Sdim 1976326938Sdim // Do Benes. 1977326938Sdim BenesNetwork BN(SM.Mask); 1978326938Sdim if (BN.run(FC, RC)) { 1979326938Sdim SDValue CtlF = getVectorConstant(FC, dl); 1980326938Sdim SDValue CtlR = getVectorConstant(RC, dl); 1981326938Sdim Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)}); 1982326938Sdim Results.push(Hexagon::V6_vrdelta, ResTy, 1983326938Sdim {OpRef::res(-1), OpRef(CtlR)}); 1984326938Sdim return OpRef::res(Results.top()); 1985326938Sdim } 1986326938Sdim 1987326938Sdim return OpRef::fail(); 1988326938Sdim} 1989326938Sdim 1990326938SdimSDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data, 1991326938Sdim const SDLoc &dl) { 1992326938Sdim SmallVector<SDValue, 128> Elems; 1993326938Sdim for (uint8_t C : Data) 1994326938Sdim Elems.push_back(DAG.getConstant(C, dl, MVT::i8)); 1995326938Sdim MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size()); 1996326938Sdim SDValue BV = DAG.getBuildVector(VecTy, dl, Elems); 1997326938Sdim SDValue LV = Lower.LowerOperation(BV, DAG); 1998326938Sdim DAG.RemoveDeadNode(BV.getNode()); 1999326938Sdim return LV; 2000326938Sdim} 2001326938Sdim 2002326938Sdimvoid HvxSelector::selectShuffle(SDNode *N) { 2003326938Sdim DEBUG_WITH_TYPE("isel", { 2004326938Sdim dbgs() << "Starting " << __func__ << " on node:\n"; 2005326938Sdim N->dump(&DAG); 2006326938Sdim }); 2007326938Sdim MVT ResTy = N->getValueType(0).getSimpleVT(); 2008326938Sdim // Assume that vector shuffles operate on vectors of bytes. 2009326938Sdim assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8); 2010326938Sdim 2011326938Sdim auto *SN = cast<ShuffleVectorSDNode>(N); 2012326938Sdim std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end()); 2013326938Sdim // This shouldn't really be necessary. Is it? 2014326938Sdim for (int &Idx : Mask) 2015326938Sdim if (Idx != -1 && Idx < 0) 2016326938Sdim Idx = -1; 2017326938Sdim 2018326938Sdim unsigned VecLen = Mask.size(); 2019326938Sdim bool HavePairs = (2*HwLen == VecLen); 2020326938Sdim assert(ResTy.getSizeInBits() / 8 == VecLen); 2021326938Sdim 2022326938Sdim // Vd = vector_shuffle Va, Vb, Mask 2023326938Sdim // 2024326938Sdim 2025326938Sdim bool UseLeft = false, UseRight = false; 2026326938Sdim for (unsigned I = 0; I != VecLen; ++I) { 2027326938Sdim if (Mask[I] == -1) 2028326938Sdim continue; 2029326938Sdim unsigned Idx = Mask[I]; 2030326938Sdim assert(Idx < 2*VecLen); 2031326938Sdim if (Idx < VecLen) 2032326938Sdim UseLeft = true; 2033326938Sdim else 2034326938Sdim UseRight = true; 2035326938Sdim } 2036326938Sdim 2037326938Sdim DEBUG_WITH_TYPE("isel", { 2038326938Sdim dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft=" 2039326938Sdim << UseLeft << " UseRight=" << UseRight << " HavePairs=" 2040326938Sdim << HavePairs << '\n'; 2041326938Sdim }); 2042326938Sdim // If the mask is all -1's, generate "undef". 2043326938Sdim if (!UseLeft && !UseRight) { 2044326938Sdim ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); 2045326938Sdim return; 2046326938Sdim } 2047326938Sdim 2048326938Sdim SDValue Vec0 = N->getOperand(0); 2049326938Sdim SDValue Vec1 = N->getOperand(1); 2050326938Sdim ResultStack Results(SN); 2051326938Sdim Results.push(TargetOpcode::COPY, ResTy, {Vec0}); 2052326938Sdim Results.push(TargetOpcode::COPY, ResTy, {Vec1}); 2053326938Sdim OpRef Va = OpRef::res(Results.top()-1); 2054326938Sdim OpRef Vb = OpRef::res(Results.top()); 2055326938Sdim 2056326938Sdim OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results) 2057326938Sdim : shuffp2(ShuffleMask(Mask), Va, Vb, Results); 2058326938Sdim 2059326938Sdim bool Done = Res.isValid(); 2060326938Sdim if (Done) { 2061326938Sdim // Make sure that Res is on the stack before materializing. 2062326938Sdim Results.push(TargetOpcode::COPY, ResTy, {Res}); 2063326938Sdim materialize(Results); 2064326938Sdim } else { 2065326938Sdim Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N); 2066326938Sdim } 2067326938Sdim 2068326938Sdim if (!Done) { 2069326938Sdim#ifndef NDEBUG 2070326938Sdim dbgs() << "Unhandled shuffle:\n"; 2071326938Sdim SN->dumpr(&DAG); 2072326938Sdim#endif 2073326938Sdim llvm_unreachable("Failed to select vector shuffle"); 2074326938Sdim } 2075326938Sdim} 2076326938Sdim 2077326938Sdimvoid HvxSelector::selectRor(SDNode *N) { 2078326938Sdim // If this is a rotation by less than 8, use V6_valignbi. 2079326938Sdim MVT Ty = N->getValueType(0).getSimpleVT(); 2080326938Sdim const SDLoc &dl(N); 2081326938Sdim SDValue VecV = N->getOperand(0); 2082326938Sdim SDValue RotV = N->getOperand(1); 2083326938Sdim SDNode *NewN = nullptr; 2084326938Sdim 2085326938Sdim if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) { 2086341825Sdim unsigned S = CN->getZExtValue() % HST.getVectorLength(); 2087341825Sdim if (S == 0) { 2088326938Sdim NewN = VecV.getNode(); 2089326938Sdim } else if (isUInt<3>(S)) { 2090326938Sdim SDValue C = DAG.getTargetConstant(S, dl, MVT::i32); 2091326938Sdim NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty, 2092326938Sdim {VecV, VecV, C}); 2093326938Sdim } 2094326938Sdim } 2095326938Sdim 2096326938Sdim if (!NewN) 2097326938Sdim NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); 2098326938Sdim 2099326938Sdim ISel.ReplaceNode(N, NewN); 2100341825Sdim} 2101341825Sdim 2102341825Sdimvoid HvxSelector::selectVAlign(SDNode *N) { 2103341825Sdim SDValue Vv = N->getOperand(0); 2104341825Sdim SDValue Vu = N->getOperand(1); 2105341825Sdim SDValue Rt = N->getOperand(2); 2106341825Sdim SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N), 2107341825Sdim N->getValueType(0), {Vv, Vu, Rt}); 2108341825Sdim ISel.ReplaceNode(N, NewN); 2109326938Sdim DAG.RemoveDeadNode(N); 2110326938Sdim} 2111326938Sdim 2112326938Sdimvoid HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { 2113326938Sdim HvxSelector(*this, *CurDAG).selectShuffle(N); 2114326938Sdim} 2115326938Sdim 2116326938Sdimvoid HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) { 2117326938Sdim HvxSelector(*this, *CurDAG).selectRor(N); 2118326938Sdim} 2119326938Sdim 2120341825Sdimvoid HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) { 2121341825Sdim HvxSelector(*this, *CurDAG).selectVAlign(N); 2122341825Sdim} 2123341825Sdim 2124326938Sdimvoid HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { 2125326938Sdim const SDLoc &dl(N); 2126326938Sdim SDValue Chain = N->getOperand(0); 2127326938Sdim SDValue Address = N->getOperand(2); 2128326938Sdim SDValue Predicate = N->getOperand(3); 2129326938Sdim SDValue Base = N->getOperand(4); 2130326938Sdim SDValue Modifier = N->getOperand(5); 2131326938Sdim SDValue Offset = N->getOperand(6); 2132326938Sdim 2133326938Sdim unsigned Opcode; 2134326938Sdim unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 2135326938Sdim switch (IntNo) { 2136326938Sdim default: 2137326938Sdim llvm_unreachable("Unexpected HVX gather intrinsic."); 2138326938Sdim case Intrinsic::hexagon_V6_vgathermhq: 2139326938Sdim case Intrinsic::hexagon_V6_vgathermhq_128B: 2140326938Sdim Opcode = Hexagon::V6_vgathermhq_pseudo; 2141326938Sdim break; 2142326938Sdim case Intrinsic::hexagon_V6_vgathermwq: 2143326938Sdim case Intrinsic::hexagon_V6_vgathermwq_128B: 2144326938Sdim Opcode = Hexagon::V6_vgathermwq_pseudo; 2145326938Sdim break; 2146326938Sdim case Intrinsic::hexagon_V6_vgathermhwq: 2147326938Sdim case Intrinsic::hexagon_V6_vgathermhwq_128B: 2148326938Sdim Opcode = Hexagon::V6_vgathermhwq_pseudo; 2149326938Sdim break; 2150326938Sdim } 2151326938Sdim 2152326938Sdim SDVTList VTs = CurDAG->getVTList(MVT::Other); 2153326938Sdim SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain }; 2154326938Sdim SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 2155326938Sdim 2156344779Sdim MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2157344779Sdim CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 2158326938Sdim 2159341825Sdim ReplaceNode(N, Result); 2160326938Sdim} 2161326938Sdim 2162326938Sdimvoid HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { 2163326938Sdim const SDLoc &dl(N); 2164326938Sdim SDValue Chain = N->getOperand(0); 2165326938Sdim SDValue Address = N->getOperand(2); 2166326938Sdim SDValue Base = N->getOperand(3); 2167326938Sdim SDValue Modifier = N->getOperand(4); 2168326938Sdim SDValue Offset = N->getOperand(5); 2169326938Sdim 2170326938Sdim unsigned Opcode; 2171326938Sdim unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 2172326938Sdim switch (IntNo) { 2173326938Sdim default: 2174326938Sdim llvm_unreachable("Unexpected HVX gather intrinsic."); 2175326938Sdim case Intrinsic::hexagon_V6_vgathermh: 2176326938Sdim case Intrinsic::hexagon_V6_vgathermh_128B: 2177326938Sdim Opcode = Hexagon::V6_vgathermh_pseudo; 2178326938Sdim break; 2179326938Sdim case Intrinsic::hexagon_V6_vgathermw: 2180326938Sdim case Intrinsic::hexagon_V6_vgathermw_128B: 2181326938Sdim Opcode = Hexagon::V6_vgathermw_pseudo; 2182326938Sdim break; 2183326938Sdim case Intrinsic::hexagon_V6_vgathermhw: 2184326938Sdim case Intrinsic::hexagon_V6_vgathermhw_128B: 2185326938Sdim Opcode = Hexagon::V6_vgathermhw_pseudo; 2186326938Sdim break; 2187326938Sdim } 2188326938Sdim 2189326938Sdim SDVTList VTs = CurDAG->getVTList(MVT::Other); 2190326938Sdim SDValue Ops[] = { Address, Base, Modifier, Offset, Chain }; 2191326938Sdim SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 2192326938Sdim 2193344779Sdim MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2194344779Sdim CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 2195326938Sdim 2196341825Sdim ReplaceNode(N, Result); 2197326938Sdim} 2198326938Sdim 2199326938Sdimvoid HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { 2200326938Sdim unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 2201326938Sdim SDNode *Result; 2202326938Sdim switch (IID) { 2203326938Sdim case Intrinsic::hexagon_V6_vaddcarry: { 2204326938Sdim SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2205326938Sdim N->getOperand(3) }; 2206326938Sdim SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1); 2207326938Sdim Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); 2208326938Sdim break; 2209326938Sdim } 2210326938Sdim case Intrinsic::hexagon_V6_vaddcarry_128B: { 2211326938Sdim SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2212326938Sdim N->getOperand(3) }; 2213326938Sdim SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1); 2214326938Sdim Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); 2215326938Sdim break; 2216326938Sdim } 2217326938Sdim case Intrinsic::hexagon_V6_vsubcarry: { 2218326938Sdim SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2219326938Sdim N->getOperand(3) }; 2220326938Sdim SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1); 2221326938Sdim Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); 2222326938Sdim break; 2223326938Sdim } 2224326938Sdim case Intrinsic::hexagon_V6_vsubcarry_128B: { 2225326938Sdim SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2226326938Sdim N->getOperand(3) }; 2227326938Sdim SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1); 2228326938Sdim Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); 2229326938Sdim break; 2230326938Sdim } 2231326938Sdim default: 2232326938Sdim llvm_unreachable("Unexpected HVX dual output intrinsic."); 2233326938Sdim } 2234326938Sdim ReplaceUses(N, Result); 2235326938Sdim ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 2236326938Sdim ReplaceUses(SDValue(N, 1), SDValue(Result, 1)); 2237326938Sdim CurDAG->RemoveDeadNode(N); 2238326938Sdim} 2239