HexagonISelDAGToDAGHVX.cpp revision 355940
11556Srgrimes//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===// 250471Speter// 31556Srgrimes// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4262951Sjmmv// See https://llvm.org/LICENSE.txt for license information. 5262951Sjmmv// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61556Srgrimes// 71556Srgrimes//===----------------------------------------------------------------------===// 8262951Sjmmv 9262951Sjmmv#include "Hexagon.h" 10262951Sjmmv#include "HexagonISelDAGToDAG.h" 11262951Sjmmv#include "HexagonISelLowering.h" 121556Srgrimes#include "HexagonTargetMachine.h" 13#include "llvm/ADT/SetVector.h" 14#include "llvm/CodeGen/MachineInstrBuilder.h" 15#include "llvm/CodeGen/SelectionDAGISel.h" 16#include "llvm/IR/Intrinsics.h" 17#include "llvm/Support/CommandLine.h" 18#include "llvm/Support/Debug.h" 19 20#include <deque> 21#include <map> 22#include <set> 23#include <utility> 24#include <vector> 25 26#define DEBUG_TYPE "hexagon-isel" 27 28using namespace llvm; 29 30namespace { 31 32// -------------------------------------------------------------------- 33// Implementation of permutation networks. 34 35// Implementation of the node routing through butterfly networks: 36// - Forward delta. 37// - Reverse delta. 38// - Benes. 39// 40// 41// Forward delta network consists of log(N) steps, where N is the number 42// of inputs. In each step, an input can stay in place, or it can get 43// routed to another position[1]. The step after that consists of two 44// networks, each half in size in terms of the number of nodes. In those 45// terms, in the given step, an input can go to either the upper or the 46// lower network in the next step. 47// 48// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both 49// positions as long as there is no conflict. 50 51// Here's a delta network for 8 inputs, only the switching routes are 52// shown: 53// 54// Steps: 55// |- 1 ---------------|- 2 -----|- 3 -| 56// 57// Inp[0] *** *** *** *** Out[0] 58// \ / \ / \ / 59// \ / \ / X 60// \ / \ / / \ 61// Inp[1] *** \ / *** X *** *** Out[1] 62// \ \ / / \ / \ / 63// \ \ / / X X 64// \ \ / / / \ / \ 65// Inp[2] *** \ \ / / *** X *** *** Out[2] 66// \ \ X / / / \ \ / 67// \ \ / \ / / / \ X 68// \ X X / / \ / \ 69// Inp[3] *** \ / \ / \ / *** *** *** Out[3] 70// \ X X X / 71// \ / \ / \ / \ / 72// X X X X 73// / \ / \ / \ / \ 74// / X X X \ 75// Inp[4] *** / \ / \ / \ *** *** *** Out[4] 76// / X X \ \ / \ / 77// / / \ / \ \ \ / X 78// / / X \ \ \ / / \ 79// Inp[5] *** / / \ \ *** X *** *** Out[5] 80// / / \ \ \ / \ / 81// / / \ \ X X 82// / / \ \ / \ / \ 83// Inp[6] *** / \ *** X *** *** Out[6] 84// / \ / \ \ / 85// / \ / \ X 86// / \ / \ / \ 87// Inp[7] *** *** *** *** Out[7] 88// 89// 90// Reverse delta network is same as delta network, with the steps in 91// the opposite order. 92// 93// 94// Benes network is a forward delta network immediately followed by 95// a reverse delta network. 96 97enum class ColorKind { None, Red, Black }; 98 99// Graph coloring utility used to partition nodes into two groups: 100// they will correspond to nodes routed to the upper and lower networks. 101struct Coloring { 102 using Node = int; 103 using MapType = std::map<Node, ColorKind>; 104 static constexpr Node Ignore = Node(-1); 105 106 Coloring(ArrayRef<Node> Ord) : Order(Ord) { 107 build(); 108 if (!color()) 109 Colors.clear(); 110 } 111 112 const MapType &colors() const { 113 return Colors; 114 } 115 116 ColorKind other(ColorKind Color) { 117 if (Color == ColorKind::None) 118 return ColorKind::Red; 119 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red; 120 } 121 122 LLVM_DUMP_METHOD void dump() const; 123 124private: 125 ArrayRef<Node> Order; 126 MapType Colors; 127 std::set<Node> Needed; 128 129 using NodeSet = std::set<Node>; 130 std::map<Node,NodeSet> Edges; 131 132 Node conj(Node Pos) { 133 Node Num = Order.size(); 134 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2; 135 } 136 137 ColorKind getColor(Node N) { 138 auto F = Colors.find(N); 139 return F != Colors.end() ? F->second : ColorKind::None; 140 } 141 142 std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes); 143 144 void build(); 145 bool color(); 146}; 147} // namespace 148 149std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) { 150 auto Color = ColorKind::None; 151 for (Node N : Nodes) { 152 ColorKind ColorN = getColor(N); 153 if (ColorN == ColorKind::None) 154 continue; 155 if (Color == ColorKind::None) 156 Color = ColorN; 157 else if (Color != ColorKind::None && Color != ColorN) 158 return { false, ColorKind::None }; 159 } 160 return { true, Color }; 161} 162 163void Coloring::build() { 164 // Add Order[P] and Order[conj(P)] to Edges. 165 for (unsigned P = 0; P != Order.size(); ++P) { 166 Node I = Order[P]; 167 if (I != Ignore) { 168 Needed.insert(I); 169 Node PC = Order[conj(P)]; 170 if (PC != Ignore && PC != I) 171 Edges[I].insert(PC); 172 } 173 } 174 // Add I and conj(I) to Edges. 175 for (unsigned I = 0; I != Order.size(); ++I) { 176 if (!Needed.count(I)) 177 continue; 178 Node C = conj(I); 179 // This will create an entry in the edge table, even if I is not 180 // connected to any other node. This is necessary, because it still 181 // needs to be colored. 182 NodeSet &Is = Edges[I]; 183 if (Needed.count(C)) 184 Is.insert(C); 185 } 186} 187 188bool Coloring::color() { 189 SetVector<Node> FirstQ; 190 auto Enqueue = [this,&FirstQ] (Node N) { 191 SetVector<Node> Q; 192 Q.insert(N); 193 for (unsigned I = 0; I != Q.size(); ++I) { 194 NodeSet &Ns = Edges[Q[I]]; 195 Q.insert(Ns.begin(), Ns.end()); 196 } 197 FirstQ.insert(Q.begin(), Q.end()); 198 }; 199 for (Node N : Needed) 200 Enqueue(N); 201 202 for (Node N : FirstQ) { 203 if (Colors.count(N)) 204 continue; 205 NodeSet &Ns = Edges[N]; 206 auto P = getUniqueColor(Ns); 207 if (!P.first) 208 return false; 209 Colors[N] = other(P.second); 210 } 211 212 // First, color nodes that don't have any dups. 213 for (auto E : Edges) { 214 Node N = E.first; 215 if (!Needed.count(conj(N)) || Colors.count(N)) 216 continue; 217 auto P = getUniqueColor(E.second); 218 if (!P.first) 219 return false; 220 Colors[N] = other(P.second); 221 } 222 223 // Now, nodes that are still uncolored. Since the graph can be modified 224 // in this step, create a work queue. 225 std::vector<Node> WorkQ; 226 for (auto E : Edges) { 227 Node N = E.first; 228 if (!Colors.count(N)) 229 WorkQ.push_back(N); 230 } 231 232 for (unsigned I = 0; I < WorkQ.size(); ++I) { 233 Node N = WorkQ[I]; 234 NodeSet &Ns = Edges[N]; 235 auto P = getUniqueColor(Ns); 236 if (P.first) { 237 Colors[N] = other(P.second); 238 continue; 239 } 240 241 // Coloring failed. Split this node. 242 Node C = conj(N); 243 ColorKind ColorN = other(ColorKind::None); 244 ColorKind ColorC = other(ColorN); 245 NodeSet &Cs = Edges[C]; 246 NodeSet CopyNs = Ns; 247 for (Node M : CopyNs) { 248 ColorKind ColorM = getColor(M); 249 if (ColorM == ColorC) { 250 // Connect M with C, disconnect M from N. 251 Cs.insert(M); 252 Edges[M].insert(C); 253 Ns.erase(M); 254 Edges[M].erase(N); 255 } 256 } 257 Colors[N] = ColorN; 258 Colors[C] = ColorC; 259 } 260 261 // Explicitly assign "None" to all uncolored nodes. 262 for (unsigned I = 0; I != Order.size(); ++I) 263 if (Colors.count(I) == 0) 264 Colors[I] = ColorKind::None; 265 266 return true; 267} 268 269#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 270void Coloring::dump() const { 271 dbgs() << "{ Order: {"; 272 for (unsigned I = 0; I != Order.size(); ++I) { 273 Node P = Order[I]; 274 if (P != Ignore) 275 dbgs() << ' ' << P; 276 else 277 dbgs() << " -"; 278 } 279 dbgs() << " }\n"; 280 dbgs() << " Needed: {"; 281 for (Node N : Needed) 282 dbgs() << ' ' << N; 283 dbgs() << " }\n"; 284 285 dbgs() << " Edges: {\n"; 286 for (auto E : Edges) { 287 dbgs() << " " << E.first << " -> {"; 288 for (auto N : E.second) 289 dbgs() << ' ' << N; 290 dbgs() << " }\n"; 291 } 292 dbgs() << " }\n"; 293 294 auto ColorKindToName = [](ColorKind C) { 295 switch (C) { 296 case ColorKind::None: 297 return "None"; 298 case ColorKind::Red: 299 return "Red"; 300 case ColorKind::Black: 301 return "Black"; 302 } 303 llvm_unreachable("all ColorKinds should be handled by the switch above"); 304 }; 305 306 dbgs() << " Colors: {\n"; 307 for (auto C : Colors) 308 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n"; 309 dbgs() << " }\n}\n"; 310} 311#endif 312 313namespace { 314// Base class of for reordering networks. They don't strictly need to be 315// permutations, as outputs with repeated occurrences of an input element 316// are allowed. 317struct PermNetwork { 318 using Controls = std::vector<uint8_t>; 319 using ElemType = int; 320 static constexpr ElemType Ignore = ElemType(-1); 321 322 enum : uint8_t { 323 None, 324 Pass, 325 Switch 326 }; 327 enum : uint8_t { 328 Forward, 329 Reverse 330 }; 331 332 PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) { 333 Order.assign(Ord.data(), Ord.data()+Ord.size()); 334 Log = 0; 335 336 unsigned S = Order.size(); 337 while (S >>= 1) 338 ++Log; 339 340 Table.resize(Order.size()); 341 for (RowType &Row : Table) 342 Row.resize(Mult*Log, None); 343 } 344 345 void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const { 346 unsigned Size = Order.size(); 347 V.resize(Size); 348 for (unsigned I = 0; I != Size; ++I) { 349 unsigned W = 0; 350 for (unsigned L = 0; L != Log; ++L) { 351 unsigned C = ctl(I, StartAt+L) == Switch; 352 if (Dir == Forward) 353 W |= C << (Log-1-L); 354 else 355 W |= C << L; 356 } 357 assert(isUInt<8>(W)); 358 V[I] = uint8_t(W); 359 } 360 } 361 362 uint8_t ctl(ElemType Pos, unsigned Step) const { 363 return Table[Pos][Step]; 364 } 365 unsigned size() const { 366 return Order.size(); 367 } 368 unsigned steps() const { 369 return Log; 370 } 371 372protected: 373 unsigned Log; 374 std::vector<ElemType> Order; 375 using RowType = std::vector<uint8_t>; 376 std::vector<RowType> Table; 377}; 378 379struct ForwardDeltaNetwork : public PermNetwork { 380 ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {} 381 382 bool run(Controls &V) { 383 if (!route(Order.data(), Table.data(), size(), 0)) 384 return false; 385 getControls(V, 0, Forward); 386 return true; 387 } 388 389private: 390 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); 391}; 392 393struct ReverseDeltaNetwork : public PermNetwork { 394 ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {} 395 396 bool run(Controls &V) { 397 if (!route(Order.data(), Table.data(), size(), 0)) 398 return false; 399 getControls(V, 0, Reverse); 400 return true; 401 } 402 403private: 404 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); 405}; 406 407struct BenesNetwork : public PermNetwork { 408 BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {} 409 410 bool run(Controls &F, Controls &R) { 411 if (!route(Order.data(), Table.data(), size(), 0)) 412 return false; 413 414 getControls(F, 0, Forward); 415 getControls(R, Log, Reverse); 416 return true; 417 } 418 419private: 420 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); 421}; 422} // namespace 423 424bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, 425 unsigned Step) { 426 bool UseUp = false, UseDown = false; 427 ElemType Num = Size; 428 429 // Cannot use coloring here, because coloring is used to determine 430 // the "big" switch, i.e. the one that changes halves, and in a forward 431 // network, a color can be simultaneously routed to both halves in the 432 // step we're working on. 433 for (ElemType J = 0; J != Num; ++J) { 434 ElemType I = P[J]; 435 // I is the position in the input, 436 // J is the position in the output. 437 if (I == Ignore) 438 continue; 439 uint8_t S; 440 if (I < Num/2) 441 S = (J < Num/2) ? Pass : Switch; 442 else 443 S = (J < Num/2) ? Switch : Pass; 444 445 // U is the element in the table that needs to be updated. 446 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2); 447 if (U < Num/2) 448 UseUp = true; 449 else 450 UseDown = true; 451 if (T[U][Step] != S && T[U][Step] != None) 452 return false; 453 T[U][Step] = S; 454 } 455 456 for (ElemType J = 0; J != Num; ++J) 457 if (P[J] != Ignore && P[J] >= Num/2) 458 P[J] -= Num/2; 459 460 if (Step+1 < Log) { 461 if (UseUp && !route(P, T, Size/2, Step+1)) 462 return false; 463 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) 464 return false; 465 } 466 return true; 467} 468 469bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, 470 unsigned Step) { 471 unsigned Pets = Log-1 - Step; 472 bool UseUp = false, UseDown = false; 473 ElemType Num = Size; 474 475 // In this step half-switching occurs, so coloring can be used. 476 Coloring G({P,Size}); 477 const Coloring::MapType &M = G.colors(); 478 if (M.empty()) 479 return false; 480 481 ColorKind ColorUp = ColorKind::None; 482 for (ElemType J = 0; J != Num; ++J) { 483 ElemType I = P[J]; 484 // I is the position in the input, 485 // J is the position in the output. 486 if (I == Ignore) 487 continue; 488 ColorKind C = M.at(I); 489 if (C == ColorKind::None) 490 continue; 491 // During "Step", inputs cannot switch halves, so if the "up" color 492 // is still unknown, make sure that it is selected in such a way that 493 // "I" will stay in the same half. 494 bool InpUp = I < Num/2; 495 if (ColorUp == ColorKind::None) 496 ColorUp = InpUp ? C : G.other(C); 497 if ((C == ColorUp) != InpUp) { 498 // If I should go to a different half than where is it now, give up. 499 return false; 500 } 501 502 uint8_t S; 503 if (InpUp) { 504 S = (J < Num/2) ? Pass : Switch; 505 UseUp = true; 506 } else { 507 S = (J < Num/2) ? Switch : Pass; 508 UseDown = true; 509 } 510 T[J][Pets] = S; 511 } 512 513 // Reorder the working permutation according to the computed switch table 514 // for the last step (i.e. Pets). 515 for (ElemType J = 0, E = Size / 2; J != E; ++J) { 516 ElemType PJ = P[J]; // Current values of P[J] 517 ElemType PC = P[J+Size/2]; // and P[conj(J)] 518 ElemType QJ = PJ; // New values of P[J] 519 ElemType QC = PC; // and P[conj(J)] 520 if (T[J][Pets] == Switch) 521 QC = PJ; 522 if (T[J+Size/2][Pets] == Switch) 523 QJ = PC; 524 P[J] = QJ; 525 P[J+Size/2] = QC; 526 } 527 528 for (ElemType J = 0; J != Num; ++J) 529 if (P[J] != Ignore && P[J] >= Num/2) 530 P[J] -= Num/2; 531 532 if (Step+1 < Log) { 533 if (UseUp && !route(P, T, Size/2, Step+1)) 534 return false; 535 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) 536 return false; 537 } 538 return true; 539} 540 541bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size, 542 unsigned Step) { 543 Coloring G({P,Size}); 544 const Coloring::MapType &M = G.colors(); 545 if (M.empty()) 546 return false; 547 ElemType Num = Size; 548 549 unsigned Pets = 2*Log-1 - Step; 550 bool UseUp = false, UseDown = false; 551 552 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will 553 // result in different controls. Let's pick the one where the first 554 // control will be "Pass". 555 ColorKind ColorUp = ColorKind::None; 556 for (ElemType J = 0; J != Num; ++J) { 557 ElemType I = P[J]; 558 if (I == Ignore) 559 continue; 560 ColorKind C = M.at(I); 561 if (C == ColorKind::None) 562 continue; 563 if (ColorUp == ColorKind::None) { 564 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black; 565 } 566 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2; 567 if (C == ColorUp) { 568 if (I < Num/2) 569 T[I][Step] = Pass; 570 else 571 T[CI][Step] = Switch; 572 T[J][Pets] = (J < Num/2) ? Pass : Switch; 573 UseUp = true; 574 } else { // Down 575 if (I < Num/2) 576 T[CI][Step] = Switch; 577 else 578 T[I][Step] = Pass; 579 T[J][Pets] = (J < Num/2) ? Switch : Pass; 580 UseDown = true; 581 } 582 } 583 584 // Reorder the working permutation according to the computed switch table 585 // for the last step (i.e. Pets). 586 for (ElemType J = 0; J != Num/2; ++J) { 587 ElemType PJ = P[J]; // Current values of P[J] 588 ElemType PC = P[J+Num/2]; // and P[conj(J)] 589 ElemType QJ = PJ; // New values of P[J] 590 ElemType QC = PC; // and P[conj(J)] 591 if (T[J][Pets] == Switch) 592 QC = PJ; 593 if (T[J+Num/2][Pets] == Switch) 594 QJ = PC; 595 P[J] = QJ; 596 P[J+Num/2] = QC; 597 } 598 599 for (ElemType J = 0; J != Num; ++J) 600 if (P[J] != Ignore && P[J] >= Num/2) 601 P[J] -= Num/2; 602 603 if (Step+1 < Log) { 604 if (UseUp && !route(P, T, Size/2, Step+1)) 605 return false; 606 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) 607 return false; 608 } 609 return true; 610} 611 612// -------------------------------------------------------------------- 613// Support for building selection results (output instructions that are 614// parts of the final selection). 615 616namespace { 617struct OpRef { 618 OpRef(SDValue V) : OpV(V) {} 619 bool isValue() const { return OpV.getNode() != nullptr; } 620 bool isValid() const { return isValue() || !(OpN & Invalid); } 621 static OpRef res(int N) { return OpRef(Whole | (N & Index)); } 622 static OpRef fail() { return OpRef(Invalid); } 623 624 static OpRef lo(const OpRef &R) { 625 assert(!R.isValue()); 626 return OpRef(R.OpN & (Undef | Index | LoHalf)); 627 } 628 static OpRef hi(const OpRef &R) { 629 assert(!R.isValue()); 630 return OpRef(R.OpN & (Undef | Index | HiHalf)); 631 } 632 static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); } 633 634 // Direct value. 635 SDValue OpV = SDValue(); 636 637 // Reference to the operand of the input node: 638 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the 639 // operand index: 640 // If bit 30 is set, it's the high half of the operand. 641 // If bit 29 is set, it's the low half of the operand. 642 unsigned OpN = 0; 643 644 enum : unsigned { 645 Invalid = 0x10000000, 646 LoHalf = 0x20000000, 647 HiHalf = 0x40000000, 648 Whole = LoHalf | HiHalf, 649 Undef = 0x80000000, 650 Index = 0x0FFFFFFF, // Mask of the index value. 651 IndexBits = 28, 652 }; 653 654 LLVM_DUMP_METHOD 655 void print(raw_ostream &OS, const SelectionDAG &G) const; 656 657private: 658 OpRef(unsigned N) : OpN(N) {} 659}; 660 661struct NodeTemplate { 662 NodeTemplate() = default; 663 unsigned Opc = 0; 664 MVT Ty = MVT::Other; 665 std::vector<OpRef> Ops; 666 667 LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const; 668}; 669 670struct ResultStack { 671 ResultStack(SDNode *Inp) 672 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {} 673 SDNode *InpNode; 674 MVT InpTy; 675 unsigned push(const NodeTemplate &Res) { 676 List.push_back(Res); 677 return List.size()-1; 678 } 679 unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) { 680 NodeTemplate Res; 681 Res.Opc = Opc; 682 Res.Ty = Ty; 683 Res.Ops = Ops; 684 return push(Res); 685 } 686 bool empty() const { return List.empty(); } 687 unsigned size() const { return List.size(); } 688 unsigned top() const { return size()-1; } 689 const NodeTemplate &operator[](unsigned I) const { return List[I]; } 690 unsigned reset(unsigned NewTop) { 691 List.resize(NewTop+1); 692 return NewTop; 693 } 694 695 using BaseType = std::vector<NodeTemplate>; 696 BaseType::iterator begin() { return List.begin(); } 697 BaseType::iterator end() { return List.end(); } 698 BaseType::const_iterator begin() const { return List.begin(); } 699 BaseType::const_iterator end() const { return List.end(); } 700 701 BaseType List; 702 703 LLVM_DUMP_METHOD 704 void print(raw_ostream &OS, const SelectionDAG &G) const; 705}; 706} // namespace 707 708#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 709void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const { 710 if (isValue()) { 711 OpV.getNode()->print(OS, &G); 712 return; 713 } 714 if (OpN & Invalid) { 715 OS << "invalid"; 716 return; 717 } 718 if (OpN & Undef) { 719 OS << "undef"; 720 return; 721 } 722 if ((OpN & Whole) != Whole) { 723 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf); 724 if (OpN & LoHalf) 725 OS << "lo "; 726 else 727 OS << "hi "; 728 } 729 OS << '#' << SignExtend32(OpN & Index, IndexBits); 730} 731 732void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const { 733 const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo(); 734 OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " " 735 << TII.getName(Opc); 736 bool Comma = false; 737 for (const auto &R : Ops) { 738 if (Comma) 739 OS << ','; 740 Comma = true; 741 OS << ' '; 742 R.print(OS, G); 743 } 744} 745 746void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const { 747 OS << "Input node:\n"; 748#ifndef NDEBUG 749 InpNode->dumpr(&G); 750#endif 751 OS << "Result templates:\n"; 752 for (unsigned I = 0, E = List.size(); I != E; ++I) { 753 OS << '[' << I << "] "; 754 List[I].print(OS, G); 755 OS << '\n'; 756 } 757} 758#endif 759 760namespace { 761struct ShuffleMask { 762 ShuffleMask(ArrayRef<int> M) : Mask(M) { 763 for (unsigned I = 0, E = Mask.size(); I != E; ++I) { 764 int M = Mask[I]; 765 if (M == -1) 766 continue; 767 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M); 768 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M); 769 } 770 } 771 772 ArrayRef<int> Mask; 773 int MinSrc = -1, MaxSrc = -1; 774 775 ShuffleMask lo() const { 776 size_t H = Mask.size()/2; 777 return ShuffleMask(Mask.take_front(H)); 778 } 779 ShuffleMask hi() const { 780 size_t H = Mask.size()/2; 781 return ShuffleMask(Mask.take_back(H)); 782 } 783 784 void print(raw_ostream &OS) const { 785 OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {"; 786 for (int M : Mask) 787 OS << ' ' << M; 788 OS << " }"; 789 } 790}; 791} // namespace 792 793// -------------------------------------------------------------------- 794// The HvxSelector class. 795 796static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) { 797 return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo()); 798} 799static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) { 800 return static_cast<const HexagonSubtarget&>(G.getSubtarget()); 801} 802 803namespace llvm { 804 struct HvxSelector { 805 const HexagonTargetLowering &Lower; 806 HexagonDAGToDAGISel &ISel; 807 SelectionDAG &DAG; 808 const HexagonSubtarget &HST; 809 const unsigned HwLen; 810 811 HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G) 812 : Lower(getHexagonLowering(G)), ISel(HS), DAG(G), 813 HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {} 814 815 MVT getSingleVT(MVT ElemTy) const { 816 unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8); 817 return MVT::getVectorVT(ElemTy, NumElems); 818 } 819 820 MVT getPairVT(MVT ElemTy) const { 821 unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8); 822 return MVT::getVectorVT(ElemTy, NumElems); 823 } 824 825 void selectShuffle(SDNode *N); 826 void selectRor(SDNode *N); 827 void selectVAlign(SDNode *N); 828 829 private: 830 void materialize(const ResultStack &Results); 831 832 SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl); 833 834 enum : unsigned { 835 None, 836 PackMux, 837 }; 838 OpRef concat(OpRef Va, OpRef Vb, ResultStack &Results); 839 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, 840 MutableArrayRef<int> NewMask, unsigned Options = None); 841 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, 842 MutableArrayRef<int> NewMask); 843 OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 844 ResultStack &Results); 845 OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 846 ResultStack &Results); 847 848 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results); 849 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); 850 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results); 851 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); 852 853 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results); 854 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); 855 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results); 856 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results); 857 858 bool selectVectorConstants(SDNode *N); 859 bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy, 860 SDValue Va, SDValue Vb, SDNode *N); 861 862 }; 863} 864 865static void splitMask(ArrayRef<int> Mask, MutableArrayRef<int> MaskL, 866 MutableArrayRef<int> MaskR) { 867 unsigned VecLen = Mask.size(); 868 assert(MaskL.size() == VecLen && MaskR.size() == VecLen); 869 for (unsigned I = 0; I != VecLen; ++I) { 870 int M = Mask[I]; 871 if (M < 0) { 872 MaskL[I] = MaskR[I] = -1; 873 } else if (unsigned(M) < VecLen) { 874 MaskL[I] = M; 875 MaskR[I] = -1; 876 } else { 877 MaskL[I] = -1; 878 MaskR[I] = M-VecLen; 879 } 880 } 881} 882 883static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc, 884 unsigned MaxLen) { 885 assert(A.size() > 0 && A.size() >= MaxLen); 886 int F = A[0]; 887 int E = F; 888 for (unsigned I = 1; I != MaxLen; ++I) { 889 if (A[I] - E != Inc) 890 return { F, I }; 891 E = A[I]; 892 } 893 return { F, MaxLen }; 894} 895 896static bool isUndef(ArrayRef<int> Mask) { 897 for (int Idx : Mask) 898 if (Idx != -1) 899 return false; 900 return true; 901} 902 903static bool isIdentity(ArrayRef<int> Mask) { 904 for (int I = 0, E = Mask.size(); I != E; ++I) { 905 int M = Mask[I]; 906 if (M >= 0 && M != I) 907 return false; 908 } 909 return true; 910} 911 912static bool isPermutation(ArrayRef<int> Mask) { 913 // Check by adding all numbers only works if there is no overflow. 914 assert(Mask.size() < 0x00007FFF && "Sanity failure"); 915 int Sum = 0; 916 for (int Idx : Mask) { 917 if (Idx == -1) 918 return false; 919 Sum += Idx; 920 } 921 int N = Mask.size(); 922 return 2*Sum == N*(N-1); 923} 924 925bool HvxSelector::selectVectorConstants(SDNode *N) { 926 // Constant vectors are generated as loads from constant pools or as 927 // splats of a constant value. Since they are generated during the 928 // selection process, the main selection algorithm is not aware of them. 929 // Select them directly here. 930 SmallVector<SDNode*,4> Nodes; 931 SetVector<SDNode*> WorkQ; 932 933 // The one-use test for VSPLATW's operand may fail due to dead nodes 934 // left over in the DAG. 935 DAG.RemoveDeadNodes(); 936 937 // The DAG can change (due to CSE) during selection, so cache all the 938 // unselected nodes first to avoid traversing a mutating DAG. 939 940 auto IsNodeToSelect = [] (SDNode *N) { 941 if (N->isMachineOpcode()) 942 return false; 943 switch (N->getOpcode()) { 944 case HexagonISD::VZERO: 945 case HexagonISD::VSPLATW: 946 return true; 947 case ISD::LOAD: { 948 SDValue Addr = cast<LoadSDNode>(N)->getBasePtr(); 949 unsigned AddrOpc = Addr.getOpcode(); 950 if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) 951 if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) 952 return true; 953 } 954 break; 955 } 956 // Make sure to select the operand of VSPLATW. 957 bool IsSplatOp = N->hasOneUse() && 958 N->use_begin()->getOpcode() == HexagonISD::VSPLATW; 959 return IsSplatOp; 960 }; 961 962 WorkQ.insert(N); 963 for (unsigned i = 0; i != WorkQ.size(); ++i) { 964 SDNode *W = WorkQ[i]; 965 if (IsNodeToSelect(W)) 966 Nodes.push_back(W); 967 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j) 968 WorkQ.insert(W->getOperand(j).getNode()); 969 } 970 971 for (SDNode *L : Nodes) 972 ISel.Select(L); 973 974 return !Nodes.empty(); 975} 976 977void HvxSelector::materialize(const ResultStack &Results) { 978 DEBUG_WITH_TYPE("isel", { 979 dbgs() << "Materializing\n"; 980 Results.print(dbgs(), DAG); 981 }); 982 if (Results.empty()) 983 return; 984 const SDLoc &dl(Results.InpNode); 985 std::vector<SDValue> Output; 986 987 for (unsigned I = 0, E = Results.size(); I != E; ++I) { 988 const NodeTemplate &Node = Results[I]; 989 std::vector<SDValue> Ops; 990 for (const OpRef &R : Node.Ops) { 991 assert(R.isValid()); 992 if (R.isValue()) { 993 Ops.push_back(R.OpV); 994 continue; 995 } 996 if (R.OpN & OpRef::Undef) { 997 MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index); 998 Ops.push_back(ISel.selectUndef(dl, MVT(SVT))); 999 continue; 1000 } 1001 // R is an index of a result. 1002 unsigned Part = R.OpN & OpRef::Whole; 1003 int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits); 1004 if (Idx < 0) 1005 Idx += I; 1006 assert(Idx >= 0 && unsigned(Idx) < Output.size()); 1007 SDValue Op = Output[Idx]; 1008 MVT OpTy = Op.getValueType().getSimpleVT(); 1009 if (Part != OpRef::Whole) { 1010 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf); 1011 MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(), 1012 OpTy.getVectorNumElements()/2); 1013 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo 1014 : Hexagon::vsub_hi; 1015 Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op); 1016 } 1017 Ops.push_back(Op); 1018 } // for (Node : Results) 1019 1020 assert(Node.Ty != MVT::Other); 1021 SDNode *ResN = (Node.Opc == TargetOpcode::COPY) 1022 ? Ops.front().getNode() 1023 : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops); 1024 Output.push_back(SDValue(ResN, 0)); 1025 } 1026 1027 SDNode *OutN = Output.back().getNode(); 1028 SDNode *InpN = Results.InpNode; 1029 DEBUG_WITH_TYPE("isel", { 1030 dbgs() << "Generated node:\n"; 1031 OutN->dumpr(&DAG); 1032 }); 1033 1034 ISel.ReplaceNode(InpN, OutN); 1035 selectVectorConstants(OutN); 1036 DAG.RemoveDeadNodes(); 1037} 1038 1039OpRef HvxSelector::concat(OpRef Lo, OpRef Hi, ResultStack &Results) { 1040 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1041 const SDLoc &dl(Results.InpNode); 1042 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), { 1043 DAG.getTargetConstant(Hexagon::HvxWRRegClassID, dl, MVT::i32), 1044 Lo, DAG.getTargetConstant(Hexagon::vsub_lo, dl, MVT::i32), 1045 Hi, DAG.getTargetConstant(Hexagon::vsub_hi, dl, MVT::i32), 1046 }); 1047 return OpRef::res(Results.top()); 1048} 1049 1050// Va, Vb are single vectors, SM can be arbitrarily long. 1051OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, 1052 ResultStack &Results, MutableArrayRef<int> NewMask, 1053 unsigned Options) { 1054 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1055 if (!Va.isValid() || !Vb.isValid()) 1056 return OpRef::fail(); 1057 1058 int VecLen = SM.Mask.size(); 1059 MVT Ty = getSingleVT(MVT::i8); 1060 1061 auto IsExtSubvector = [] (ShuffleMask M) { 1062 assert(M.MinSrc >= 0 && M.MaxSrc >= 0); 1063 for (int I = 0, E = M.Mask.size(); I != E; ++I) { 1064 if (M.Mask[I] >= 0 && M.Mask[I]-I != M.MinSrc) 1065 return false; 1066 } 1067 return true; 1068 }; 1069 1070 if (SM.MaxSrc - SM.MinSrc < int(HwLen)) { 1071 if (SM.MinSrc == 0 || SM.MinSrc == int(HwLen) || !IsExtSubvector(SM)) { 1072 // If the mask picks elements from only one of the operands, return 1073 // that operand, and update the mask to use index 0 to refer to the 1074 // first element of that operand. 1075 // If the mask extracts a subvector, it will be handled below, so 1076 // skip it here. 1077 if (SM.MaxSrc < int(HwLen)) { 1078 memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen); 1079 return Va; 1080 } 1081 if (SM.MinSrc >= int(HwLen)) { 1082 for (int I = 0; I != VecLen; ++I) { 1083 int M = SM.Mask[I]; 1084 if (M != -1) 1085 M -= HwLen; 1086 NewMask[I] = M; 1087 } 1088 return Vb; 1089 } 1090 } 1091 int MinSrc = SM.MinSrc; 1092 if (SM.MaxSrc < int(HwLen)) { 1093 Vb = Va; 1094 } else if (SM.MinSrc > int(HwLen)) { 1095 Va = Vb; 1096 MinSrc = SM.MinSrc - HwLen; 1097 } 1098 const SDLoc &dl(Results.InpNode); 1099 if (isUInt<3>(MinSrc) || isUInt<3>(HwLen-MinSrc)) { 1100 bool IsRight = isUInt<3>(MinSrc); // Right align. 1101 SDValue S = DAG.getTargetConstant(IsRight ? MinSrc : HwLen-MinSrc, 1102 dl, MVT::i32); 1103 unsigned Opc = IsRight ? Hexagon::V6_valignbi 1104 : Hexagon::V6_vlalignbi; 1105 Results.push(Opc, Ty, {Vb, Va, S}); 1106 } else { 1107 SDValue S = DAG.getTargetConstant(MinSrc, dl, MVT::i32); 1108 Results.push(Hexagon::A2_tfrsi, MVT::i32, {S}); 1109 unsigned Top = Results.top(); 1110 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)}); 1111 } 1112 for (int I = 0; I != VecLen; ++I) { 1113 int M = SM.Mask[I]; 1114 if (M != -1) 1115 M -= SM.MinSrc; 1116 NewMask[I] = M; 1117 } 1118 return OpRef::res(Results.top()); 1119 } 1120 1121 if (Options & PackMux) { 1122 // If elements picked from Va and Vb have all different (source) indexes 1123 // (relative to the start of the argument), do a mux, and update the mask. 1124 BitVector Picked(HwLen); 1125 SmallVector<uint8_t,128> MuxBytes(HwLen); 1126 bool CanMux = true; 1127 for (int I = 0; I != VecLen; ++I) { 1128 int M = SM.Mask[I]; 1129 if (M == -1) 1130 continue; 1131 if (M >= int(HwLen)) 1132 M -= HwLen; 1133 else 1134 MuxBytes[M] = 0xFF; 1135 if (Picked[M]) { 1136 CanMux = false; 1137 break; 1138 } 1139 NewMask[I] = M; 1140 } 1141 if (CanMux) 1142 return vmuxs(MuxBytes, Va, Vb, Results); 1143 } 1144 1145 return OpRef::fail(); 1146} 1147 1148OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb, 1149 ResultStack &Results, MutableArrayRef<int> NewMask) { 1150 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1151 unsigned HalfMask = 0; 1152 unsigned LogHw = Log2_32(HwLen); 1153 for (int M : SM.Mask) { 1154 if (M == -1) 1155 continue; 1156 HalfMask |= (1u << (M >> LogHw)); 1157 } 1158 1159 if (HalfMask == 0) 1160 return OpRef::undef(getPairVT(MVT::i8)); 1161 1162 // If more than two halves are used, bail. 1163 // TODO: be more aggressive here? 1164 if (countPopulation(HalfMask) > 2) 1165 return OpRef::fail(); 1166 1167 MVT HalfTy = getSingleVT(MVT::i8); 1168 1169 OpRef Inp[2] = { Va, Vb }; 1170 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) }; 1171 1172 uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; 1173 unsigned Idx = 0; 1174 for (unsigned I = 0; I != 4; ++I) { 1175 if ((HalfMask & (1u << I)) == 0) 1176 continue; 1177 assert(Idx < 2); 1178 OpRef Op = Inp[I/2]; 1179 Out[Idx] = (I & 1) ? OpRef::hi(Op) : OpRef::lo(Op); 1180 HalfIdx[I] = Idx++; 1181 } 1182 1183 int VecLen = SM.Mask.size(); 1184 for (int I = 0; I != VecLen; ++I) { 1185 int M = SM.Mask[I]; 1186 if (M >= 0) { 1187 uint8_t Idx = HalfIdx[M >> LogHw]; 1188 assert(Idx == 0 || Idx == 1); 1189 M = (M & (HwLen-1)) + HwLen*Idx; 1190 } 1191 NewMask[I] = M; 1192 } 1193 1194 return concat(Out[0], Out[1], Results); 1195} 1196 1197OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 1198 ResultStack &Results) { 1199 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1200 MVT ByteTy = getSingleVT(MVT::i8); 1201 MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX 1202 const SDLoc &dl(Results.InpNode); 1203 SDValue B = getVectorConstant(Bytes, dl); 1204 Results.push(Hexagon::V6_vd0, ByteTy, {}); 1205 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)}); 1206 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va}); 1207 return OpRef::res(Results.top()); 1208} 1209 1210OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, 1211 ResultStack &Results) { 1212 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1213 size_t S = Bytes.size() / 2; 1214 OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results); 1215 OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results); 1216 return concat(L, H, Results); 1217} 1218 1219OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1220 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1221 unsigned VecLen = SM.Mask.size(); 1222 assert(HwLen == VecLen); 1223 (void)VecLen; 1224 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); })); 1225 1226 if (isIdentity(SM.Mask)) 1227 return Va; 1228 if (isUndef(SM.Mask)) 1229 return OpRef::undef(getSingleVT(MVT::i8)); 1230 1231 OpRef P = perfect(SM, Va, Results); 1232 if (P.isValid()) 1233 return P; 1234 return butterfly(SM, Va, Results); 1235} 1236 1237OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, 1238 ResultStack &Results) { 1239 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1240 if (isUndef(SM.Mask)) 1241 return OpRef::undef(getSingleVT(MVT::i8)); 1242 1243 OpRef C = contracting(SM, Va, Vb, Results); 1244 if (C.isValid()) 1245 return C; 1246 1247 int VecLen = SM.Mask.size(); 1248 SmallVector<int,128> NewMask(VecLen); 1249 OpRef P = packs(SM, Va, Vb, Results, NewMask); 1250 if (P.isValid()) 1251 return shuffs1(ShuffleMask(NewMask), P, Results); 1252 1253 SmallVector<int,128> MaskL(VecLen), MaskR(VecLen); 1254 splitMask(SM.Mask, MaskL, MaskR); 1255 1256 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results); 1257 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results); 1258 if (!L.isValid() || !R.isValid()) 1259 return OpRef::fail(); 1260 1261 SmallVector<uint8_t,128> Bytes(VecLen); 1262 for (int I = 0; I != VecLen; ++I) { 1263 if (MaskL[I] != -1) 1264 Bytes[I] = 0xFF; 1265 } 1266 return vmuxs(Bytes, L, R, Results); 1267} 1268 1269OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1270 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1271 int VecLen = SM.Mask.size(); 1272 1273 if (isIdentity(SM.Mask)) 1274 return Va; 1275 if (isUndef(SM.Mask)) 1276 return OpRef::undef(getPairVT(MVT::i8)); 1277 1278 SmallVector<int,128> PackedMask(VecLen); 1279 OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask); 1280 if (P.isValid()) { 1281 ShuffleMask PM(PackedMask); 1282 OpRef E = expanding(PM, P, Results); 1283 if (E.isValid()) 1284 return E; 1285 1286 OpRef L = shuffs1(PM.lo(), P, Results); 1287 OpRef H = shuffs1(PM.hi(), P, Results); 1288 if (L.isValid() && H.isValid()) 1289 return concat(L, H, Results); 1290 } 1291 1292 OpRef R = perfect(SM, Va, Results); 1293 if (R.isValid()) 1294 return R; 1295 // TODO commute the mask and try the opposite order of the halves. 1296 1297 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results); 1298 OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results); 1299 if (L.isValid() && H.isValid()) 1300 return concat(L, H, Results); 1301 1302 return OpRef::fail(); 1303} 1304 1305OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, 1306 ResultStack &Results) { 1307 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1308 if (isUndef(SM.Mask)) 1309 return OpRef::undef(getPairVT(MVT::i8)); 1310 1311 int VecLen = SM.Mask.size(); 1312 SmallVector<int,256> PackedMask(VecLen); 1313 OpRef P = packp(SM, Va, Vb, Results, PackedMask); 1314 if (P.isValid()) 1315 return shuffp1(ShuffleMask(PackedMask), P, Results); 1316 1317 SmallVector<int,256> MaskL(VecLen), MaskR(VecLen); 1318 splitMask(SM.Mask, MaskL, MaskR); 1319 1320 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results); 1321 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results); 1322 if (!L.isValid() || !R.isValid()) 1323 return OpRef::fail(); 1324 1325 // Mux the results. 1326 SmallVector<uint8_t,256> Bytes(VecLen); 1327 for (int I = 0; I != VecLen; ++I) { 1328 if (MaskL[I] != -1) 1329 Bytes[I] = 0xFF; 1330 } 1331 return vmuxp(Bytes, L, R, Results); 1332} 1333 1334namespace { 1335 struct Deleter : public SelectionDAG::DAGNodeDeletedListener { 1336 template <typename T> 1337 Deleter(SelectionDAG &D, T &C) 1338 : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) { 1339 C.erase(N); 1340 }) {} 1341 }; 1342 1343 template <typename T> 1344 struct NullifyingVector : public T { 1345 DenseMap<SDNode*, SDNode**> Refs; 1346 NullifyingVector(T &&V) : T(V) { 1347 for (unsigned i = 0, e = T::size(); i != e; ++i) { 1348 SDNode *&N = T::operator[](i); 1349 Refs[N] = &N; 1350 } 1351 } 1352 void erase(SDNode *N) { 1353 auto F = Refs.find(N); 1354 if (F != Refs.end()) 1355 *F->second = nullptr; 1356 } 1357 }; 1358} 1359 1360bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, 1361 MVT ResTy, SDValue Va, SDValue Vb, 1362 SDNode *N) { 1363 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1364 MVT ElemTy = ResTy.getVectorElementType(); 1365 assert(ElemTy == MVT::i8); 1366 unsigned VecLen = Mask.size(); 1367 bool HavePairs = (2*HwLen == VecLen); 1368 MVT SingleTy = getSingleVT(MVT::i8); 1369 1370 // The prior attempts to handle this shuffle may have left a bunch of 1371 // dead nodes in the DAG (such as constants). These nodes will be added 1372 // at the end of DAG's node list, which at that point had already been 1373 // sorted topologically. In the main selection loop, the node list is 1374 // traversed backwards from the root node, which means that any new 1375 // nodes (from the end of the list) will not be visited. 1376 // Scalarization will replace the shuffle node with the scalarized 1377 // expression, and if that expression reused any if the leftoever (dead) 1378 // nodes, these nodes would not be selected (since the "local" selection 1379 // only visits nodes that are not in AllNodes). 1380 // To avoid this issue, remove all dead nodes from the DAG now. 1381 DAG.RemoveDeadNodes(); 1382 DenseSet<SDNode*> AllNodes; 1383 for (SDNode &S : DAG.allnodes()) 1384 AllNodes.insert(&S); 1385 1386 Deleter DUA(DAG, AllNodes); 1387 1388 SmallVector<SDValue,128> Ops; 1389 LLVMContext &Ctx = *DAG.getContext(); 1390 MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT(); 1391 for (int I : Mask) { 1392 if (I < 0) { 1393 Ops.push_back(ISel.selectUndef(dl, LegalTy)); 1394 continue; 1395 } 1396 SDValue Vec; 1397 unsigned M = I; 1398 if (M < VecLen) { 1399 Vec = Va; 1400 } else { 1401 Vec = Vb; 1402 M -= VecLen; 1403 } 1404 if (HavePairs) { 1405 if (M < HwLen) { 1406 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec); 1407 } else { 1408 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec); 1409 M -= HwLen; 1410 } 1411 } 1412 SDValue Idx = DAG.getConstant(M, dl, MVT::i32); 1413 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx}); 1414 SDValue L = Lower.LowerOperation(Ex, DAG); 1415 assert(L.getNode()); 1416 Ops.push_back(L); 1417 } 1418 1419 SDValue LV; 1420 if (2*HwLen == VecLen) { 1421 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen}); 1422 SDValue L0 = Lower.LowerOperation(B0, DAG); 1423 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen}); 1424 SDValue L1 = Lower.LowerOperation(B1, DAG); 1425 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering) 1426 // functions may expect to be called only for illegal operations, so 1427 // make sure that they are not called for legal ones. Develop a better 1428 // mechanism for dealing with this. 1429 LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1}); 1430 } else { 1431 SDValue BV = DAG.getBuildVector(ResTy, dl, Ops); 1432 LV = Lower.LowerOperation(BV, DAG); 1433 } 1434 1435 assert(!N->use_empty()); 1436 ISel.ReplaceNode(N, LV.getNode()); 1437 1438 if (AllNodes.count(LV.getNode())) { 1439 DAG.RemoveDeadNodes(); 1440 return true; 1441 } 1442 1443 // The lowered build-vector node will now need to be selected. It needs 1444 // to be done here because this node and its submodes are not included 1445 // in the main selection loop. 1446 // Implement essentially the same topological ordering algorithm as is 1447 // used in SelectionDAGISel. 1448 1449 SetVector<SDNode*> SubNodes, TmpQ; 1450 std::map<SDNode*,unsigned> NumOps; 1451 1452 SubNodes.insert(LV.getNode()); 1453 for (unsigned I = 0; I != SubNodes.size(); ++I) { 1454 unsigned OpN = 0; 1455 SDNode *S = SubNodes[I]; 1456 for (SDValue Op : S->ops()) { 1457 if (AllNodes.count(Op.getNode())) 1458 continue; 1459 SubNodes.insert(Op.getNode()); 1460 ++OpN; 1461 } 1462 NumOps.insert({S, OpN}); 1463 if (OpN == 0) 1464 TmpQ.insert(S); 1465 } 1466 1467 for (unsigned I = 0; I != TmpQ.size(); ++I) { 1468 SDNode *S = TmpQ[I]; 1469 for (SDNode *U : S->uses()) { 1470 if (!SubNodes.count(U)) 1471 continue; 1472 auto F = NumOps.find(U); 1473 assert(F != NumOps.end()); 1474 assert(F->second > 0); 1475 if (!--F->second) 1476 TmpQ.insert(F->first); 1477 } 1478 } 1479 assert(SubNodes.size() == TmpQ.size()); 1480 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector()); 1481 1482 Deleter DUQ(DAG, Queue); 1483 for (SDNode *S : reverse(Queue)) 1484 if (S != nullptr) 1485 ISel.Select(S); 1486 1487 DAG.RemoveDeadNodes(); 1488 return true; 1489} 1490 1491OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb, 1492 ResultStack &Results) { 1493 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1494 if (!Va.isValid() || !Vb.isValid()) 1495 return OpRef::fail(); 1496 1497 // Contracting shuffles, i.e. instructions that always discard some bytes 1498 // from the operand vectors. 1499 // 1500 // V6_vshuff{e,o}b 1501 // V6_vdealb4w 1502 // V6_vpack{e,o}{b,h} 1503 1504 int VecLen = SM.Mask.size(); 1505 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen); 1506 MVT ResTy = getSingleVT(MVT::i8); 1507 1508 // The following shuffles only work for bytes and halfwords. This requires 1509 // the strip length to be 1 or 2. 1510 if (Strip.second != 1 && Strip.second != 2) 1511 return OpRef::fail(); 1512 1513 // The patterns for the shuffles, in terms of the starting offsets of the 1514 // consecutive strips (L = length of the strip, N = VecLen): 1515 // 1516 // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2 1517 // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2 1518 // 1519 // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2 1520 // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2 1521 // 1522 // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ... 1523 1524 // The value of the element in the mask following the strip will decide 1525 // what kind of a shuffle this can be. 1526 int NextInMask = SM.Mask[Strip.second]; 1527 1528 // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask 1529 // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would 1530 // satisfy this. 1531 if (NextInMask < VecLen) { 1532 // vpack{e,o} or vdealb4w 1533 if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) { 1534 int N = VecLen; 1535 // Check if this is vdealb4w (L=1). 1536 for (int I = 0; I != N/4; ++I) 1537 if (SM.Mask[I] != 4*I) 1538 return OpRef::fail(); 1539 for (int I = 0; I != N/4; ++I) 1540 if (SM.Mask[I+N/4] != 2 + 4*I) 1541 return OpRef::fail(); 1542 for (int I = 0; I != N/4; ++I) 1543 if (SM.Mask[I+N/2] != N + 4*I) 1544 return OpRef::fail(); 1545 for (int I = 0; I != N/4; ++I) 1546 if (SM.Mask[I+3*N/4] != N+2 + 4*I) 1547 return OpRef::fail(); 1548 // Matched mask for vdealb4w. 1549 Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va}); 1550 return OpRef::res(Results.top()); 1551 } 1552 1553 // Check if this is vpack{e,o}. 1554 int N = VecLen; 1555 int L = Strip.second; 1556 // Check if the first strip starts at 0 or at L. 1557 if (Strip.first != 0 && Strip.first != L) 1558 return OpRef::fail(); 1559 // Examine the rest of the mask. 1560 for (int I = L; I < N; I += L) { 1561 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); 1562 // Check whether the mask element at the beginning of each strip 1563 // increases by 2L each time. 1564 if (S.first - Strip.first != 2*I) 1565 return OpRef::fail(); 1566 // Check whether each strip is of the same length. 1567 if (S.second != unsigned(L)) 1568 return OpRef::fail(); 1569 } 1570 1571 // Strip.first == 0 => vpacke 1572 // Strip.first == L => vpacko 1573 assert(Strip.first == 0 || Strip.first == L); 1574 using namespace Hexagon; 1575 NodeTemplate Res; 1576 Res.Opc = Strip.second == 1 // Number of bytes. 1577 ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob) 1578 : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh); 1579 Res.Ty = ResTy; 1580 Res.Ops = { Vb, Va }; 1581 Results.push(Res); 1582 return OpRef::res(Results.top()); 1583 } 1584 1585 // Check if this is vshuff{e,o}. 1586 int N = VecLen; 1587 int L = Strip.second; 1588 std::pair<int,unsigned> PrevS = Strip; 1589 bool Flip = false; 1590 for (int I = L; I < N; I += L) { 1591 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); 1592 if (S.second != PrevS.second) 1593 return OpRef::fail(); 1594 int Diff = Flip ? PrevS.first - S.first + 2*L 1595 : S.first - PrevS.first; 1596 if (Diff != N) 1597 return OpRef::fail(); 1598 Flip ^= true; 1599 PrevS = S; 1600 } 1601 // Strip.first == 0 => vshuffe 1602 // Strip.first == L => vshuffo 1603 assert(Strip.first == 0 || Strip.first == L); 1604 using namespace Hexagon; 1605 NodeTemplate Res; 1606 Res.Opc = Strip.second == 1 // Number of bytes. 1607 ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob) 1608 : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh); 1609 Res.Ty = ResTy; 1610 Res.Ops = { Vb, Va }; 1611 Results.push(Res); 1612 return OpRef::res(Results.top()); 1613} 1614 1615OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1616 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1617 // Expanding shuffles (using all elements and inserting into larger vector): 1618 // 1619 // V6_vunpacku{b,h} [*] 1620 // 1621 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask. 1622 // 1623 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so 1624 // they are not shuffles. 1625 // 1626 // The argument is a single vector. 1627 1628 int VecLen = SM.Mask.size(); 1629 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type"); 1630 1631 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen); 1632 1633 // The patterns for the unpacks, in terms of the starting offsets of the 1634 // consecutive strips (L = length of the strip, N = VecLen): 1635 // 1636 // vunpacku: 0, -1, L, -1, 2L, -1 ... 1637 1638 if (Strip.first != 0) 1639 return OpRef::fail(); 1640 1641 // The vunpackus only handle byte and half-word. 1642 if (Strip.second != 1 && Strip.second != 2) 1643 return OpRef::fail(); 1644 1645 int N = VecLen; 1646 int L = Strip.second; 1647 1648 // First, check the non-ignored strips. 1649 for (int I = 2*L; I < 2*N; I += 2*L) { 1650 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); 1651 if (S.second != unsigned(L)) 1652 return OpRef::fail(); 1653 if (2*S.first != I) 1654 return OpRef::fail(); 1655 } 1656 // Check the -1s. 1657 for (int I = L; I < 2*N; I += 2*L) { 1658 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I); 1659 if (S.first != -1 || S.second != unsigned(L)) 1660 return OpRef::fail(); 1661 } 1662 1663 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub 1664 : Hexagon::V6_vunpackuh; 1665 Results.push(Opc, getPairVT(MVT::i8), {Va}); 1666 return OpRef::res(Results.top()); 1667} 1668 1669OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1670 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1671 // V6_vdeal{b,h} 1672 // V6_vshuff{b,h} 1673 1674 // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2}) 1675 // V6_vshuffvdd (V6_vshuff) 1676 // V6_dealvdd (V6_vdeal) 1677 1678 int VecLen = SM.Mask.size(); 1679 assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8); 1680 unsigned LogLen = Log2_32(VecLen); 1681 unsigned HwLog = Log2_32(HwLen); 1682 // The result length must be the same as the length of a single vector, 1683 // or a vector pair. 1684 assert(LogLen == HwLog || LogLen == HwLog+1); 1685 bool Extend = (LogLen == HwLog); 1686 1687 if (!isPermutation(SM.Mask)) 1688 return OpRef::fail(); 1689 1690 SmallVector<unsigned,8> Perm(LogLen); 1691 1692 // Check if this could be a perfect shuffle, or a combination of perfect 1693 // shuffles. 1694 // 1695 // Consider this permutation (using hex digits to make the ASCII diagrams 1696 // easier to read): 1697 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }. 1698 // This is a "deal" operation: divide the input into two halves, and 1699 // create the output by picking elements by alternating between these two 1700 // halves: 1701 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*] 1702 // 8 9 A B C D E F 1703 // 1704 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides 1705 // a somwehat different mechanism that could be used to perform shuffle/ 1706 // deal operations: a 2x2 transpose. 1707 // Consider the halves of inputs again, they can be interpreted as a 2x8 1708 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated 1709 // together. Now, when considering 2 elements at a time, it will be a 2x4 1710 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices: 1711 // 01 23 45 67 1712 // 89 AB CD EF 1713 // With groups of 4, this will become a single 2x2 matrix, and so on. 1714 // 1715 // The 2x2 transpose instruction works by transposing each of the 2x2 1716 // matrices (or "sub-matrices"), given a specific group size. For example, 1717 // if the group size is 1 (i.e. each element is its own group), there 1718 // will be four transposes of the four 2x2 matrices that form the 2x8. 1719 // For example, with the inputs as above, the result will be: 1720 // 0 8 2 A 4 C 6 E 1721 // 1 9 3 B 5 D 7 F 1722 // Now, this result can be tranposed again, but with the group size of 2: 1723 // 08 19 4C 5D 1724 // 2A 3B 6E 7F 1725 // If we then transpose that result, but with the group size of 4, we get: 1726 // 0819 2A3B 1727 // 4C5D 6E7F 1728 // If we concatenate these two rows, it will be 1729 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F 1730 // which is the same as the "deal" [*] above. 1731 // 1732 // In general, a "deal" of individual elements is a series of 2x2 transposes, 1733 // with changing group size. HVX has two instructions: 1734 // Vdd = V6_vdealvdd Vu, Vv, Rt 1735 // Vdd = V6_shufvdd Vu, Vv, Rt 1736 // that perform exactly that. The register Rt controls which transposes are 1737 // going to happen: a bit at position n (counting from 0) indicates that a 1738 // transpose with a group size of 2^n will take place. If multiple bits are 1739 // set, multiple transposes will happen: vdealvdd will perform them starting 1740 // with the largest group size, vshuffvdd will do them in the reverse order. 1741 // 1742 // The main observation is that each 2x2 transpose corresponds to swapping 1743 // columns of bits in the binary representation of the values. 1744 // 1745 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits 1746 // in a given column. The * denote the columns that will be swapped. 1747 // The transpose with the group size 2^n corresponds to swapping columns 1748 // 3 (the highest log) and log2(n): 1749 // 1750 // 3 2 1 0 0 2 1 3 0 2 3 1 1751 // * * * * * * 1752 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1753 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0 1754 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1 1755 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1 1756 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0 1757 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0 1758 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1 1759 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1 1760 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0 1761 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0 1762 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1 1763 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1 1764 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0 1765 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0 1766 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1 1767 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 1768 1769 auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) { 1770 unsigned X = Mask[0] ^ Mask[Num/2]; 1771 // Check that the first half has the X's bits clear. 1772 if ((Mask[0] & X) != 0) 1773 return 0u; 1774 for (unsigned I = 1; I != Num/2; ++I) { 1775 if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X) 1776 return 0u; 1777 if ((Mask[I] & X) != 0) 1778 return 0u; 1779 } 1780 return X; 1781 }; 1782 1783 // Create a vector of log2's for each column: Perm[i] corresponds to 1784 // the i-th bit (lsb is 0). 1785 assert(VecLen > 2); 1786 for (unsigned I = VecLen; I >= 2; I >>= 1) { 1787 // Examine the initial segment of Mask of size I. 1788 unsigned X = XorPow2(SM.Mask, I); 1789 if (!isPowerOf2_32(X)) 1790 return OpRef::fail(); 1791 // Check the other segments of Mask. 1792 for (int J = I; J < VecLen; J += I) { 1793 if (XorPow2(SM.Mask.slice(J, I), I) != X) 1794 return OpRef::fail(); 1795 } 1796 Perm[Log2_32(X)] = Log2_32(I)-1; 1797 } 1798 1799 // Once we have Perm, represent it as cycles. Denote the maximum log2 1800 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be 1801 // written as (M a1 a2 a3 ... an). That cycle can be broken up into 1802 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition 1803 // order being from left to right. Any (contiguous) segment where the 1804 // values ai, ai+1...aj are either all increasing or all decreasing, 1805 // can be implemented via a single vshuffvdd/vdealvdd respectively. 1806 // 1807 // If there is a cycle (a1 a2 ... an) that does not involve M, it can 1808 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can 1809 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure 1810 // can be used to generate a sequence of vshuffvdd/vdealvdd. 1811 // 1812 // Example: 1813 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written 1814 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply 1815 // (4 0 1)(4 0)(4 2 3)(4 2). 1816 // It can then be expanded into swaps as 1817 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2), 1818 // and broken up into "increasing" segments as 1819 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)]. 1820 // This is equivalent to 1821 // (4 0 1)(4 0 2 3)(4 2), 1822 // which can be implemented as 3 vshufvdd instructions. 1823 1824 using CycleType = SmallVector<unsigned,8>; 1825 std::set<CycleType> Cycles; 1826 std::set<unsigned> All; 1827 1828 for (unsigned I : Perm) 1829 All.insert(I); 1830 1831 // If the cycle contains LogLen-1, move it to the front of the cycle. 1832 // Otherwise, return the cycle unchanged. 1833 auto canonicalize = [LogLen](const CycleType &C) -> CycleType { 1834 unsigned LogPos, N = C.size(); 1835 for (LogPos = 0; LogPos != N; ++LogPos) 1836 if (C[LogPos] == LogLen-1) 1837 break; 1838 if (LogPos == N) 1839 return C; 1840 1841 CycleType NewC(C.begin()+LogPos, C.end()); 1842 NewC.append(C.begin(), C.begin()+LogPos); 1843 return NewC; 1844 }; 1845 1846 auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) { 1847 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6), 1848 // for bytes zero is included, for halfwords is not. 1849 if (Cs.size() != 1) 1850 return 0u; 1851 const CycleType &C = *Cs.begin(); 1852 if (C[0] != Len-1) 1853 return 0u; 1854 int D = Len - C.size(); 1855 if (D != 0 && D != 1) 1856 return 0u; 1857 1858 bool IsDeal = true, IsShuff = true; 1859 for (unsigned I = 1; I != Len-D; ++I) { 1860 if (C[I] != Len-1-I) 1861 IsDeal = false; 1862 if (C[I] != I-(1-D)) // I-1, I 1863 IsShuff = false; 1864 } 1865 // At most one, IsDeal or IsShuff, can be non-zero. 1866 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff); 1867 static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh }; 1868 static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh }; 1869 return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0); 1870 }; 1871 1872 while (!All.empty()) { 1873 unsigned A = *All.begin(); 1874 All.erase(A); 1875 CycleType C; 1876 C.push_back(A); 1877 for (unsigned B = Perm[A]; B != A; B = Perm[B]) { 1878 C.push_back(B); 1879 All.erase(B); 1880 } 1881 if (C.size() <= 1) 1882 continue; 1883 Cycles.insert(canonicalize(C)); 1884 } 1885 1886 MVT SingleTy = getSingleVT(MVT::i8); 1887 MVT PairTy = getPairVT(MVT::i8); 1888 1889 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}. 1890 if (unsigned(VecLen) == HwLen) { 1891 if (unsigned SingleOpc = pfs(Cycles, LogLen)) { 1892 Results.push(SingleOpc, SingleTy, {Va}); 1893 return OpRef::res(Results.top()); 1894 } 1895 } 1896 1897 SmallVector<unsigned,8> SwapElems; 1898 if (HwLen == unsigned(VecLen)) 1899 SwapElems.push_back(LogLen-1); 1900 1901 for (const CycleType &C : Cycles) { 1902 unsigned First = (C[0] == LogLen-1) ? 1 : 0; 1903 SwapElems.append(C.begin()+First, C.end()); 1904 if (First == 0) 1905 SwapElems.push_back(C[0]); 1906 } 1907 1908 const SDLoc &dl(Results.InpNode); 1909 OpRef Arg = !Extend ? Va 1910 : concat(Va, OpRef::undef(SingleTy), Results); 1911 1912 for (unsigned I = 0, E = SwapElems.size(); I != E; ) { 1913 bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1]; 1914 unsigned S = (1u << SwapElems[I]); 1915 if (I < E-1) { 1916 while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1])) 1917 S |= 1u << SwapElems[I]; 1918 // The above loop will not add a bit for the final SwapElems[I+1], 1919 // so add it here. 1920 S |= 1u << SwapElems[I]; 1921 } 1922 ++I; 1923 1924 NodeTemplate Res; 1925 Results.push(Hexagon::A2_tfrsi, MVT::i32, 1926 { DAG.getTargetConstant(S, dl, MVT::i32) }); 1927 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd; 1928 Res.Ty = PairTy; 1929 Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) }; 1930 Results.push(Res); 1931 Arg = OpRef::res(Results.top()); 1932 } 1933 1934 return !Extend ? Arg : OpRef::lo(Arg); 1935} 1936 1937OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) { 1938 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); 1939 // Butterfly shuffles. 1940 // 1941 // V6_vdelta 1942 // V6_vrdelta 1943 // V6_vror 1944 1945 // The assumption here is that all elements picked by Mask are in the 1946 // first operand to the vector_shuffle. This assumption is enforced 1947 // by the caller. 1948 1949 MVT ResTy = getSingleVT(MVT::i8); 1950 PermNetwork::Controls FC, RC; 1951 const SDLoc &dl(Results.InpNode); 1952 int VecLen = SM.Mask.size(); 1953 1954 for (int M : SM.Mask) { 1955 if (M != -1 && M >= VecLen) 1956 return OpRef::fail(); 1957 } 1958 1959 // Try the deltas/benes for both single vectors and vector pairs. 1960 ForwardDeltaNetwork FN(SM.Mask); 1961 if (FN.run(FC)) { 1962 SDValue Ctl = getVectorConstant(FC, dl); 1963 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)}); 1964 return OpRef::res(Results.top()); 1965 } 1966 1967 // Try reverse delta. 1968 ReverseDeltaNetwork RN(SM.Mask); 1969 if (RN.run(RC)) { 1970 SDValue Ctl = getVectorConstant(RC, dl); 1971 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)}); 1972 return OpRef::res(Results.top()); 1973 } 1974 1975 // Do Benes. 1976 BenesNetwork BN(SM.Mask); 1977 if (BN.run(FC, RC)) { 1978 SDValue CtlF = getVectorConstant(FC, dl); 1979 SDValue CtlR = getVectorConstant(RC, dl); 1980 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)}); 1981 Results.push(Hexagon::V6_vrdelta, ResTy, 1982 {OpRef::res(-1), OpRef(CtlR)}); 1983 return OpRef::res(Results.top()); 1984 } 1985 1986 return OpRef::fail(); 1987} 1988 1989SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data, 1990 const SDLoc &dl) { 1991 SmallVector<SDValue, 128> Elems; 1992 for (uint8_t C : Data) 1993 Elems.push_back(DAG.getConstant(C, dl, MVT::i8)); 1994 MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size()); 1995 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems); 1996 SDValue LV = Lower.LowerOperation(BV, DAG); 1997 DAG.RemoveDeadNode(BV.getNode()); 1998 return LV; 1999} 2000 2001void HvxSelector::selectShuffle(SDNode *N) { 2002 DEBUG_WITH_TYPE("isel", { 2003 dbgs() << "Starting " << __func__ << " on node:\n"; 2004 N->dump(&DAG); 2005 }); 2006 MVT ResTy = N->getValueType(0).getSimpleVT(); 2007 // Assume that vector shuffles operate on vectors of bytes. 2008 assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8); 2009 2010 auto *SN = cast<ShuffleVectorSDNode>(N); 2011 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end()); 2012 // This shouldn't really be necessary. Is it? 2013 for (int &Idx : Mask) 2014 if (Idx != -1 && Idx < 0) 2015 Idx = -1; 2016 2017 unsigned VecLen = Mask.size(); 2018 bool HavePairs = (2*HwLen == VecLen); 2019 assert(ResTy.getSizeInBits() / 8 == VecLen); 2020 2021 // Vd = vector_shuffle Va, Vb, Mask 2022 // 2023 2024 bool UseLeft = false, UseRight = false; 2025 for (unsigned I = 0; I != VecLen; ++I) { 2026 if (Mask[I] == -1) 2027 continue; 2028 unsigned Idx = Mask[I]; 2029 assert(Idx < 2*VecLen); 2030 if (Idx < VecLen) 2031 UseLeft = true; 2032 else 2033 UseRight = true; 2034 } 2035 2036 DEBUG_WITH_TYPE("isel", { 2037 dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft=" 2038 << UseLeft << " UseRight=" << UseRight << " HavePairs=" 2039 << HavePairs << '\n'; 2040 }); 2041 // If the mask is all -1's, generate "undef". 2042 if (!UseLeft && !UseRight) { 2043 ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); 2044 return; 2045 } 2046 2047 SDValue Vec0 = N->getOperand(0); 2048 SDValue Vec1 = N->getOperand(1); 2049 ResultStack Results(SN); 2050 Results.push(TargetOpcode::COPY, ResTy, {Vec0}); 2051 Results.push(TargetOpcode::COPY, ResTy, {Vec1}); 2052 OpRef Va = OpRef::res(Results.top()-1); 2053 OpRef Vb = OpRef::res(Results.top()); 2054 2055 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results) 2056 : shuffp2(ShuffleMask(Mask), Va, Vb, Results); 2057 2058 bool Done = Res.isValid(); 2059 if (Done) { 2060 // Make sure that Res is on the stack before materializing. 2061 Results.push(TargetOpcode::COPY, ResTy, {Res}); 2062 materialize(Results); 2063 } else { 2064 Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N); 2065 } 2066 2067 if (!Done) { 2068#ifndef NDEBUG 2069 dbgs() << "Unhandled shuffle:\n"; 2070 SN->dumpr(&DAG); 2071#endif 2072 llvm_unreachable("Failed to select vector shuffle"); 2073 } 2074} 2075 2076void HvxSelector::selectRor(SDNode *N) { 2077 // If this is a rotation by less than 8, use V6_valignbi. 2078 MVT Ty = N->getValueType(0).getSimpleVT(); 2079 const SDLoc &dl(N); 2080 SDValue VecV = N->getOperand(0); 2081 SDValue RotV = N->getOperand(1); 2082 SDNode *NewN = nullptr; 2083 2084 if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) { 2085 unsigned S = CN->getZExtValue() % HST.getVectorLength(); 2086 if (S == 0) { 2087 NewN = VecV.getNode(); 2088 } else if (isUInt<3>(S)) { 2089 SDValue C = DAG.getTargetConstant(S, dl, MVT::i32); 2090 NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty, 2091 {VecV, VecV, C}); 2092 } 2093 } 2094 2095 if (!NewN) 2096 NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); 2097 2098 ISel.ReplaceNode(N, NewN); 2099} 2100 2101void HvxSelector::selectVAlign(SDNode *N) { 2102 SDValue Vv = N->getOperand(0); 2103 SDValue Vu = N->getOperand(1); 2104 SDValue Rt = N->getOperand(2); 2105 SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N), 2106 N->getValueType(0), {Vv, Vu, Rt}); 2107 ISel.ReplaceNode(N, NewN); 2108 DAG.RemoveDeadNode(N); 2109} 2110 2111void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { 2112 HvxSelector(*this, *CurDAG).selectShuffle(N); 2113} 2114 2115void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) { 2116 HvxSelector(*this, *CurDAG).selectRor(N); 2117} 2118 2119void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) { 2120 HvxSelector(*this, *CurDAG).selectVAlign(N); 2121} 2122 2123void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { 2124 const SDLoc &dl(N); 2125 SDValue Chain = N->getOperand(0); 2126 SDValue Address = N->getOperand(2); 2127 SDValue Predicate = N->getOperand(3); 2128 SDValue Base = N->getOperand(4); 2129 SDValue Modifier = N->getOperand(5); 2130 SDValue Offset = N->getOperand(6); 2131 2132 unsigned Opcode; 2133 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 2134 switch (IntNo) { 2135 default: 2136 llvm_unreachable("Unexpected HVX gather intrinsic."); 2137 case Intrinsic::hexagon_V6_vgathermhq: 2138 case Intrinsic::hexagon_V6_vgathermhq_128B: 2139 Opcode = Hexagon::V6_vgathermhq_pseudo; 2140 break; 2141 case Intrinsic::hexagon_V6_vgathermwq: 2142 case Intrinsic::hexagon_V6_vgathermwq_128B: 2143 Opcode = Hexagon::V6_vgathermwq_pseudo; 2144 break; 2145 case Intrinsic::hexagon_V6_vgathermhwq: 2146 case Intrinsic::hexagon_V6_vgathermhwq_128B: 2147 Opcode = Hexagon::V6_vgathermhwq_pseudo; 2148 break; 2149 } 2150 2151 SDVTList VTs = CurDAG->getVTList(MVT::Other); 2152 SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain }; 2153 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 2154 2155 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2156 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 2157 2158 ReplaceNode(N, Result); 2159} 2160 2161void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { 2162 const SDLoc &dl(N); 2163 SDValue Chain = N->getOperand(0); 2164 SDValue Address = N->getOperand(2); 2165 SDValue Base = N->getOperand(3); 2166 SDValue Modifier = N->getOperand(4); 2167 SDValue Offset = N->getOperand(5); 2168 2169 unsigned Opcode; 2170 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 2171 switch (IntNo) { 2172 default: 2173 llvm_unreachable("Unexpected HVX gather intrinsic."); 2174 case Intrinsic::hexagon_V6_vgathermh: 2175 case Intrinsic::hexagon_V6_vgathermh_128B: 2176 Opcode = Hexagon::V6_vgathermh_pseudo; 2177 break; 2178 case Intrinsic::hexagon_V6_vgathermw: 2179 case Intrinsic::hexagon_V6_vgathermw_128B: 2180 Opcode = Hexagon::V6_vgathermw_pseudo; 2181 break; 2182 case Intrinsic::hexagon_V6_vgathermhw: 2183 case Intrinsic::hexagon_V6_vgathermhw_128B: 2184 Opcode = Hexagon::V6_vgathermhw_pseudo; 2185 break; 2186 } 2187 2188 SDVTList VTs = CurDAG->getVTList(MVT::Other); 2189 SDValue Ops[] = { Address, Base, Modifier, Offset, Chain }; 2190 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 2191 2192 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2193 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 2194 2195 ReplaceNode(N, Result); 2196} 2197 2198void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { 2199 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 2200 SDNode *Result; 2201 switch (IID) { 2202 case Intrinsic::hexagon_V6_vaddcarry: { 2203 SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2204 N->getOperand(3) }; 2205 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1); 2206 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); 2207 break; 2208 } 2209 case Intrinsic::hexagon_V6_vaddcarry_128B: { 2210 SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2211 N->getOperand(3) }; 2212 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1); 2213 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); 2214 break; 2215 } 2216 case Intrinsic::hexagon_V6_vsubcarry: { 2217 SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2218 N->getOperand(3) }; 2219 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1); 2220 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); 2221 break; 2222 } 2223 case Intrinsic::hexagon_V6_vsubcarry_128B: { 2224 SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2), 2225 N->getOperand(3) }; 2226 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1); 2227 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); 2228 break; 2229 } 2230 default: 2231 llvm_unreachable("Unexpected HVX dual output intrinsic."); 2232 } 2233 ReplaceUses(N, Result); 2234 ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 2235 ReplaceUses(SDValue(N, 1), SDValue(Result, 1)); 2236 CurDAG->RemoveDeadNode(N); 2237} 2238