cortex-a57-fma-steering.c revision 1.5
1/* FMA steering optimization pass for Cortex-A57. 2 Copyright (C) 2015-2019 Free Software Foundation, Inc. 3 Contributed by ARM Ltd. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21#define IN_TARGET_CODE 1 22 23#include "config.h" 24#define INCLUDE_LIST 25#include "system.h" 26#include "coretypes.h" 27#include "backend.h" 28#include "target.h" 29#include "rtl.h" 30#include "df.h" 31#include "insn-config.h" 32#include "regs.h" 33#include "memmodel.h" 34#include "emit-rtl.h" 35#include "recog.h" 36#include "cfganal.h" 37#include "insn-attr.h" 38#include "context.h" 39#include "tree-pass.h" 40#include "regrename.h" 41#include "aarch64-protos.h" 42 43/* For better performance, the destination of FMADD/FMSUB instructions should 44 have the same parity as their accumulator register if the accumulator 45 contains the result of a previous FMUL or FMADD/FMSUB instruction if 46 targetting Cortex-A57 processors. Performance is also increased by 47 otherwise keeping a good balance in the parity of the destination register 48 of FMUL or FMADD/FMSUB. 49 50 This pass ensure that registers are renamed so that these conditions hold. 51 We reuse the existing register renaming facility from regrename.c to build 52 dependency chains and expose candidate registers for renaming. 53 54 55 The algorithm has three steps: 56 57 First, the functions of the register renaming pass are called. These 58 analyze the instructions and produce a list of def/use chains of 59 instructions. 60 61 Next, this information is used to build trees of multiply and 62 multiply-accumulate instructions. The roots of these trees are any 63 multiply, or any multiply-accumulate whose accumulator is not dependent on 64 a multiply or multiply-accumulate instruction. A child is added to the 65 tree where a dependency chain exists between the result of the parent 66 instruction and the accumulator operand of the child, as in the diagram 67 below: 68 69 fmul s2, s0, s1 70 / \ 71 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2 72 | 73 fmadd s3, s1, s1, s0 74 75 Trees made of a single instruction are permitted. 76 77 Finally, renaming is performed. The parity of the destination register at 78 the root of a tree is checked against the current balance of multiply and 79 multiply-accumulate on each pipeline. If necessary, the root of a tree is 80 renamed, in which case the rest of the tree is then renamed to keep the same 81 parity in the destination registers of all instructions in the tree. */ 82 83 84 85/* Forward declarations. */ 86class fma_node; 87class fma_root_node; 88class func_fma_steering; 89 90/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent 91 FMADD/FMSUB instructions form a graph. This is because alternatives can 92 make a register be set by several FMUL or FMADD/FMSUB instructions in 93 different basic blocks and because of loops. For ease of browsing, the 94 connected components of this graph are broken up into forests of trees. 95 Forests are represented by fma_forest objects, contained in the fma_forests 96 list. Using a separate object for the forests allows for a better use of 97 memory as there is some information that is global to each forest, such as 98 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each 99 floating-point execution pipelines. */ 100 101class fma_forest 102{ 103public: 104 fma_forest (func_fma_steering *, fma_root_node *, int); 105 ~fma_forest (); 106 107 int get_id (); 108 std::list<fma_root_node *> *get_roots (); 109 func_fma_steering *get_globals (); 110 int get_target_parity (); 111 void fma_node_created (fma_node *); 112 void merge_forest (fma_forest *); 113 void dump_info (); 114 void dispatch (); 115 116private: 117 /* Prohibit copy construction. */ 118 fma_forest (const fma_forest &); 119 120 /* The list of roots that form this forest. */ 121 std::list<fma_root_node *> *m_roots; 122 123 /* Target parity the destination register of all FMUL and FMADD/FMSUB 124 instructions in this forest should have. */ 125 int m_target_parity; 126 127 /* Link to the instance of func_fma_steering holding data related to the 128 FMA steering of the current function (cfun). */ 129 func_fma_steering *m_globals; 130 131 /* Identifier for the forest (used for dumps). */ 132 int m_id; 133 134 /* Total number of nodes in the forest (for statistics). */ 135 int m_nb_nodes; 136}; 137 138class fma_node 139{ 140public: 141 fma_node (fma_node *parent, du_chain *chain); 142 ~fma_node (); 143 144 bool root_p (); 145 fma_forest *get_forest (); 146 std::list<fma_node *> *get_children (); 147 rtx_insn *get_insn (); 148 void add_child (fma_node *); 149 int get_parity (); 150 void set_head (du_head *); 151 void rename (fma_forest *); 152 void dump_info (fma_forest *); 153 154private: 155 /* Prohibit copy construction. */ 156 fma_node (const fma_node &); 157 158protected: 159 /* Root node that lead to this node. */ 160 fma_root_node *m_root; 161 162 /* The parent node of this node. If the node belong to a chain with several 163 parent nodes, the first one encountered in a depth-first search is chosen 164 as canonical parent. */ 165 fma_node *m_parent; 166 167 /* The list of child nodes. If a chain contains several parent nodes, one is 168 chosen as canonical parent and the others will have no children. */ 169 std::list<fma_node *> *m_children; 170 171 /* The associated DU_HEAD chain that the insn represented by this object 172 is (one of) the root of. When a chain contains several roots, the non 173 canonical ones have this field set to NULL. */ 174 struct du_head *m_head; 175 176 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */ 177 rtx_insn *m_insn; 178}; 179 180class fma_root_node : public fma_node 181{ 182public: 183 fma_root_node (func_fma_steering *, du_chain *, int); 184 185 fma_forest *get_forest (); 186 void set_forest (fma_forest *); 187 void dump_info (fma_forest *); 188 189private: 190 /* The forest this node belonged to when it was created. */ 191 fma_forest *m_forest; 192}; 193 194/* Class holding all data and methods relative to the FMA steering of a given 195 function. The FMA steering pass could then run in parallel for different 196 functions. */ 197 198class func_fma_steering 199{ 200public: 201 func_fma_steering (); 202 ~func_fma_steering (); 203 204 int get_fpu_balance (); 205 void remove_forest (fma_forest *); 206 bool put_node (fma_node *); 207 void update_balance (int); 208 fma_node *get_fma_node (rtx_insn *); 209 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p); 210 void execute_fma_steering (); 211 212private: 213 /* Prohibit copy construction. */ 214 func_fma_steering (const func_fma_steering &); 215 216 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *), 217 void (*) (fma_forest *, fma_node *), bool); 218 void analyze (); 219 void rename_fma_trees (); 220 221 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated 222 fma_node object. Used when analyzing an instruction that is a root of 223 a chain to find if such an object was created because this instruction 224 is also a use in another chain. */ 225 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map; 226 227 /* A list of all the forests in a given function. */ 228 std::list<fma_forest *> m_fma_forests; 229 230 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU 231 pipelines: 232 < 0: more instruction dispatched to the first pipeline 233 == 0: perfect balance 234 > 0: more instruction dispatched to the second pipeline. */ 235 int m_fpu_balance; 236 237 /* Identifier for the next forest created. */ 238 int m_next_forest_id; 239}; 240 241/* Rename the register HEAD->regno in all the insns in the chain HEAD to any 242 register not in the set UNAVAILABLE. Adapted from rename_chains in 243 regrename.c. */ 244 245static bool 246rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) 247{ 248 int best_new_reg; 249 int n_uses = 0; 250 struct du_chain *tmp; 251 int reg = head->regno; 252 enum reg_class super_class = NO_REGS; 253 254 if (head->cannot_rename) 255 return false; 256 257 if (fixed_regs[reg] || global_regs[reg] 258 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM)) 259 return false; 260 261 /* Iterate over elements in the chain in order to: 262 1. Count number of uses, and narrow the set of registers we can 263 use for renaming. 264 2. Compute the superunion of register classes in this chain. */ 265 for (tmp = head->first; tmp; tmp = tmp->next_use) 266 { 267 if (DEBUG_INSN_P (tmp->insn)) 268 continue; 269 n_uses++; 270 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); 271 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; 272 } 273 274 if (n_uses < 1) 275 return false; 276 277 best_new_reg = find_rename_reg (head, super_class, unavailable, reg, 278 false); 279 280 if (dump_file) 281 { 282 fprintf (dump_file, "Register %s in insn %d", reg_names[reg], 283 INSN_UID (head->first->insn)); 284 if (head->need_caller_save_reg) 285 fprintf (dump_file, " crosses a call"); 286 } 287 288 if (best_new_reg == reg) 289 { 290 if (dump_file) 291 fprintf (dump_file, "; no available better choice\n"); 292 return false; 293 } 294 295 if (regrename_do_replace (head, best_new_reg)) 296 { 297 if (dump_file) 298 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]); 299 df_set_regs_ever_live (best_new_reg, true); 300 } 301 else 302 { 303 if (dump_file) 304 fprintf (dump_file, ", renaming as %s failed\n", 305 reg_names[best_new_reg]); 306 return false; 307 } 308 return true; 309} 310 311/* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */ 312 313static bool 314is_fmac_op (enum attr_type t) 315{ 316 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S); 317} 318 319/* Return whether T is the attribute of a FMUL instruction. */ 320 321static bool 322is_fmul_op (enum attr_type t) 323{ 324 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S); 325} 326 327/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB 328 instruction. */ 329 330static bool 331is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok) 332{ 333 enum attr_type t; 334 335 if (!NONDEBUG_INSN_P (insn)) 336 return false; 337 338 if (recog_memoized (insn) < 0) 339 return false; 340 341 /* Only consider chain(s) this instruction is a root of if this is an FMUL or 342 FMADD/FMSUB instruction. This allows to avoid browsing chains of all 343 instructions for FMUL or FMADD/FMSUB in them. */ 344 t = get_attr_type (insn); 345 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t)); 346} 347 348 349/* 350 * Class fma_forest method definitions. 351 */ 352 353fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root, 354 int id) 355{ 356 memset (this, 0, sizeof (*this)); 357 this->m_globals = fma_steer; 358 this->m_roots = new std::list<fma_root_node *>; 359 this->m_roots->push_back (fma_root); 360 this->m_id = id; 361} 362 363fma_forest::~fma_forest () 364{ 365 delete this->m_roots; 366} 367 368int 369fma_forest::get_id () 370{ 371 return this->m_id; 372} 373 374std::list<fma_root_node *> * 375fma_forest::get_roots () 376{ 377 return this->m_roots; 378} 379 380func_fma_steering * 381fma_forest::get_globals () 382{ 383 return this->m_globals; 384} 385 386int 387fma_forest::get_target_parity () 388{ 389 return this->m_target_parity; 390} 391 392/* Act on the creation of NODE by updating statistics in FOREST and adding an 393 entry for it in the func_fma_steering hashmap. */ 394 395void fma_forest::fma_node_created (fma_node *node) 396{ 397 bool created = !this->m_globals->put_node (node); 398 399 gcc_assert (created); 400 this->m_nb_nodes++; 401} 402 403/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical 404 fma_forest object to represent both. */ 405 406void 407fma_forest::merge_forest (fma_forest *other_forest) 408{ 409 std::list<fma_root_node *> *other_roots; 410 std::list<fma_root_node *>::iterator other_root_iter; 411 412 if (this == other_forest) 413 return; 414 415 other_roots = other_forest->m_roots; 416 417 /* Update root nodes' pointer to forest. */ 418 for (other_root_iter = other_roots->begin (); 419 other_root_iter != other_roots->end (); ++other_root_iter) 420 (*other_root_iter)->set_forest (this); 421 422 /* Remove other_forest from the list of forests and move its tree roots in 423 the list of tree roots of ref_forest. */ 424 this->m_globals->remove_forest (other_forest); 425 this->m_roots->splice (this->m_roots->begin (), *other_roots); 426 this->m_nb_nodes += other_forest->m_nb_nodes; 427 428 delete other_forest; 429} 430 431/* Dump information about the forest FOREST. */ 432 433void 434fma_forest::dump_info () 435{ 436 gcc_assert (dump_file); 437 438 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id, 439 this->m_nb_nodes); 440} 441 442/* Wrapper around fma_forest::dump_info for use as parameter of function 443 pointer type in func_fma_steering::dfs. */ 444 445static void 446dump_forest_info (fma_forest *forest) 447{ 448 forest->dump_info (); 449} 450 451/* Dispatch forest to the least utilized pipeline. */ 452 453void 454fma_forest::dispatch () 455{ 456 this->m_target_parity = this->m_roots->front ()->get_parity (); 457 int fpu_balance = this->m_globals->get_fpu_balance (); 458 if (fpu_balance != 0) 459 this->m_target_parity = (fpu_balance < 0); 460 461 if (dump_file) 462 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id, 463 this->m_target_parity ? "odd" : "even"); 464} 465 466/* Wrapper around fma_forest::dispatch for use as parameter of function pointer 467 type in func_fma_steering::dfs. */ 468 469static void 470dispatch_forest (fma_forest *forest) 471{ 472 forest->dispatch (); 473} 474 475fma_node::fma_node (fma_node *parent, du_chain *chain) 476{ 477 memset (this, 0, sizeof (*this)); 478 this->m_parent = parent; 479 this->m_children = new std::list<fma_node *>; 480 this->m_insn = chain->insn; 481 /* root_p () cannot be used to check for root before root is set. */ 482 if (this->m_parent == this) 483 this->m_root = static_cast<fma_root_node *> (parent); 484 else 485 { 486 this->m_root = parent->m_root; 487 this->get_forest ()->fma_node_created (this); 488 } 489} 490 491fma_node::~fma_node () 492{ 493 delete this->m_children; 494} 495 496std::list<fma_node *> * 497fma_node::get_children () 498{ 499 return this->m_children; 500} 501 502rtx_insn * 503fma_node::get_insn () 504{ 505 return this->m_insn; 506} 507 508void 509fma_node::set_head (du_head *head) 510{ 511 gcc_assert (!this->m_head); 512 this->m_head = head; 513} 514 515/* Add a child to this node in the list of children. */ 516 517void 518fma_node::add_child (fma_node *child) 519{ 520 this->m_children->push_back (child); 521} 522 523/* Return the parity of the destination register of the instruction represented 524 by this node. */ 525 526int 527fma_node::get_parity () 528{ 529 return this->m_head->regno % 2; 530} 531 532/* Get the actual forest associated with a non root node as the one the node 533 points to might have been merged into another one. In that case the pointer 534 in the root nodes are updated so we return the forest pointer of a root node 535 pointed to by the initial forest. Despite being a oneliner, this method is 536 defined here as it references a method from fma_root_node. */ 537 538fma_forest * 539fma_node::get_forest () 540{ 541 return this->m_root->get_forest (); 542} 543 544/* Return whether a node is a root node. */ 545 546bool 547fma_node::root_p () 548{ 549 return this->m_root == this; 550} 551 552/* Dump information about the children of node FMA_NODE in forest FOREST. */ 553 554void 555fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest) 556{ 557 struct du_chain *chain; 558 std::list<fma_node *>::iterator fma_child; 559 560 gcc_assert (dump_file); 561 562 if (this->get_children ()->empty ()) 563 return; 564 565 fprintf (dump_file, "Instruction(s)"); 566 for (chain = this->m_head->first; chain; chain = chain->next_use) 567 { 568 if (!is_fmul_fmac_insn (chain->insn, true)) 569 continue; 570 571 if (chain->loc != &SET_DEST (PATTERN (chain->insn))) 572 continue; 573 574 fprintf (dump_file, " %d", INSN_UID (chain->insn)); 575 } 576 577 fprintf (dump_file, " is(are) accumulator dependency of instructions"); 578 for (fma_child = this->get_children ()->begin (); 579 fma_child != this->get_children ()->end (); fma_child++) 580 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn)); 581 fprintf (dump_file, "\n"); 582} 583 584/* Wrapper around fma_node::dump_info for use as parameter of function pointer 585 type in func_fma_steering::dfs. */ 586 587static void 588dump_tree_node_info (fma_forest *forest, fma_node *node) 589{ 590 node->dump_info (forest); 591} 592 593/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction 594 represented by FMA_NODE to a register that respect the target parity for 595 FOREST or with same parity of the instruction represented by its parent node 596 if it has one. */ 597 598void 599fma_node::rename (fma_forest *forest) 600{ 601 int cur_parity, target_parity; 602 603 /* This is alternate root of a chain and thus has no children. It will be 604 renamed when processing the canonical root for that chain. */ 605 if (!this->m_head) 606 return; 607 608 target_parity = forest->get_target_parity (); 609 if (this->m_parent) 610 target_parity = this->m_parent->get_parity (); 611 cur_parity = this->get_parity (); 612 613 /* Rename if parity differs. */ 614 if (cur_parity != target_parity) 615 { 616 rtx_insn *insn = this->m_insn; 617 HARD_REG_SET unavailable; 618 machine_mode mode; 619 int reg; 620 621 if (dump_file) 622 { 623 unsigned cur_dest_reg = this->m_head->regno; 624 625 fprintf (dump_file, "FMA or FMUL at insn %d but destination " 626 "register (%s) has different parity from expected to " 627 "maximize FPU pipeline utilization\n", INSN_UID (insn), 628 reg_names[cur_dest_reg]); 629 } 630 631 /* Don't clobber traceback for noreturn functions. */ 632 CLEAR_HARD_REG_SET (unavailable); 633 if (frame_pointer_needed) 634 { 635 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM); 636 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM); 637 } 638 639 /* Exclude registers with wrong parity. */ 640 mode = GET_MODE (SET_DEST (PATTERN (insn))); 641 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2) 642 add_to_hard_reg_set (&unavailable, mode, reg); 643 644 if (!rename_single_chain (this->m_head, &unavailable)) 645 { 646 if (dump_file) 647 fprintf (dump_file, "Destination register of insn %d could not be " 648 "renamed. Dependent FMA insns will use this parity from " 649 "there on.\n", INSN_UID (insn)); 650 } 651 else 652 cur_parity = target_parity; 653 } 654 655 forest->get_globals ()->update_balance (cur_parity); 656} 657 658/* Wrapper around fma_node::dump_info for use as parameter of function pointer 659 type in func_fma_steering::dfs. */ 660 661static void 662rename_fma_node (fma_forest *forest, fma_node *node) 663{ 664 node->rename (forest); 665} 666 667fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain, 668 int id) : fma_node (this, chain) 669{ 670 this->m_forest = new fma_forest (globals, this, id); 671 this->m_forest->fma_node_created (this); 672} 673 674fma_forest * 675fma_root_node::get_forest () 676{ 677 return this->m_forest; 678} 679 680void 681fma_root_node::set_forest (fma_forest *ref_forest) 682{ 683 this->m_forest = ref_forest; 684} 685 686/* Dump information about the roots of forest FOREST. */ 687 688void 689fma_root_node::dump_info (fma_forest *forest) 690{ 691 gcc_assert (dump_file); 692 693 if (this == forest->get_roots ()->front ()) 694 fprintf (dump_file, "Instruction(s) at root of forest #%d:", 695 forest->get_id ()); 696 fprintf (dump_file, " %d", INSN_UID (this->m_insn)); 697 if (this == forest->get_roots ()->back ()) 698 fprintf (dump_file, "\n"); 699} 700 701/* Wrapper around fma_root_node::dump_info for use as parameter of function 702 pointer type in func_fma_steering::dfs. */ 703 704static void 705dump_tree_root_info (fma_forest *forest, fma_root_node *node) 706{ 707 node->dump_info (forest); 708} 709 710func_fma_steering::func_fma_steering () : m_fpu_balance (0) 711{ 712 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>; 713 this->m_fma_forests.clear (); 714 this->m_next_forest_id = 0; 715} 716 717func_fma_steering::~func_fma_steering () 718{ 719 delete this->m_insn_fma_head_map; 720} 721 722int 723func_fma_steering::get_fpu_balance () 724{ 725 return this->m_fpu_balance; 726} 727 728void 729func_fma_steering::remove_forest (fma_forest *forest) 730{ 731 this->m_fma_forests.remove (forest); 732} 733 734/* Memorize the mapping of this instruction to its fma_node object and return 735 whether such a mapping existed. */ 736 737bool 738func_fma_steering::put_node (fma_node *node) 739{ 740 return this->m_insn_fma_head_map->put (node->get_insn (), node); 741} 742 743/* Update the current balance considering a node with the given PARITY. */ 744 745void 746func_fma_steering::update_balance (int parity) 747{ 748 this->m_fpu_balance = parity ? this->m_fpu_balance + 1 749 : this->m_fpu_balance - 1; 750} 751 752/* Return whether an fma_node object exists for instruction INSN and, if not, 753 allocate one in *RET. */ 754 755fma_node * 756func_fma_steering::get_fma_node (rtx_insn *insn) 757{ 758 fma_node **fma_slot; 759 760 fma_slot = this->m_insn_fma_head_map->get (insn); 761 if (fma_slot) 762 return *fma_slot; 763 return NULL; 764} 765 766/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB 767 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all 768 part of FOREST. For the children, the associated head is left untouched 769 (and thus null) as this function will be called again when considering the 770 chain where they are def. For the parent, the chain is given in HEAD. */ 771 772void 773func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest, 774 du_chain *chain, du_head_p head) 775{ 776 fma_forest *forest; 777 fma_node *node = this->get_fma_node (chain->insn); 778 779 /* This is a root node. */ 780 if (!node) 781 { 782 fma_root_node *root_node; 783 784 root_node = new fma_root_node (this, chain, this->m_next_forest_id++); 785 forest = root_node->get_forest (); 786 node = root_node; 787 788 /* Until proved otherwise, assume this root is not part of an existing 789 forest and thus add its forest to the list of forests. */ 790 this->m_fma_forests.push_back (forest); 791 } 792 else 793 forest = node->get_forest (); 794 795 node->set_head (head); 796 797 /* fma_node is part of a chain with several defs, one of them having already 798 been processed. The root of that already processed def is the canonical 799 one and the root of fma_node is added to its forest. No need to process 800 the children nodes as they were already processed when the other def was 801 processed. */ 802 if (ref_forest) 803 { 804 ref_forest->merge_forest (forest); 805 return; 806 } 807 808 for (chain = head->first; chain; chain = chain->next_use) 809 { 810 fma_node *child_fma; 811 rtx fma_rtx, *accum_rtx_p; 812 813 if (!is_fmul_fmac_insn (chain->insn, false)) 814 continue; 815 816 /* Get FMA rtx. */ 817 fma_rtx = SET_SRC (PATTERN (chain->insn)); 818 /* FMA is negated. */ 819 if (GET_CODE (fma_rtx) == NEG) 820 fma_rtx = XEXP (fma_rtx, 0); 821 /* Get accumulator rtx. */ 822 accum_rtx_p = &XEXP (fma_rtx, 2); 823 /* Accumulator is negated. */ 824 if (!REG_P (*accum_rtx_p)) 825 accum_rtx_p = &XEXP (*accum_rtx_p, 0); 826 827 /* This du_chain structure is not for the accumulator register. */ 828 if (accum_rtx_p != chain->loc) 829 continue; 830 831 /* If object already created, this is a loop carried dependency. We 832 don't include this object in the children as we want trees for 833 rename_fma_trees to not be an infinite loop. */ 834 if (this->get_fma_node (chain->insn)) 835 continue; 836 837 child_fma = new fma_node (node, chain); 838 839 /* Memorize the mapping of this instruction to its fma_node object 840 as it will be processed for the chain starting at its destination 841 register later. */ 842 843 /* Link to siblings. */ 844 node->add_child (child_fma); 845 } 846} 847 848/* Perform a depth-first search of the forests of fma_node in 849 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in 850 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and 851 PROCESS_NODE () on each node. If FREE is true, free all std::list in the 852 same dfs. */ 853 854void 855func_fma_steering::dfs (void (*process_forest) (fma_forest *), 856 void (*process_root) (fma_forest *, fma_root_node *), 857 void (*process_node) (fma_forest *, fma_node *), 858 bool free) 859{ 860 auto_vec<fma_node *> to_process; 861 auto_vec<fma_node *> to_free; 862 std::list<fma_forest *>::iterator forest_iter; 863 864 /* For each forest. */ 865 for (forest_iter = this->m_fma_forests.begin (); 866 forest_iter != this->m_fma_forests.end (); ++forest_iter) 867 { 868 std::list<fma_root_node *>::iterator root_iter; 869 870 if (process_forest) 871 process_forest (*forest_iter); 872 873 /* For each tree root in this forest. */ 874 for (root_iter = (*forest_iter)->get_roots ()->begin (); 875 root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter) 876 { 877 if (process_root) 878 process_root (*forest_iter, *root_iter); 879 to_process.safe_push (*root_iter); 880 } 881 882 /* For each tree node in this forest. */ 883 while (!to_process.is_empty ()) 884 { 885 fma_node *node; 886 std::list<fma_node *>::iterator child_iter; 887 888 node = to_process.pop (); 889 890 if (process_node) 891 process_node (*forest_iter, node); 892 893 for (child_iter = node->get_children ()->begin (); 894 child_iter != node->get_children ()->end (); ++child_iter) 895 to_process.safe_push (*child_iter); 896 897 /* Defer freeing so that the process_node callback can access the 898 parent and children of the node being processed. */ 899 if (free) 900 to_free.safe_push (node); 901 } 902 903 if (free) 904 { 905 delete *forest_iter; 906 907 while (!to_free.is_empty ()) 908 { 909 fma_node *node = to_free.pop (); 910 if (node->root_p ()) 911 delete static_cast<fma_root_node *> (node); 912 else 913 delete node; 914 } 915 } 916 } 917} 918 919/* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */ 920 921void 922func_fma_steering::analyze () 923{ 924 int i, n_blocks, *bb_dfs_preorder; 925 basic_block bb; 926 rtx_insn *insn; 927 928 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); 929 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false); 930 931 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB 932 instructions. */ 933 for (i = 0; i < n_blocks; i++) 934 { 935 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]); 936 FOR_BB_INSNS (bb, insn) 937 { 938 operand_rr_info *dest_op_info; 939 struct du_chain *chain = NULL; 940 unsigned dest_regno; 941 fma_forest *forest = NULL; 942 du_head_p head = NULL; 943 int i; 944 945 if (!is_fmul_fmac_insn (insn, true)) 946 continue; 947 948 /* Search the chain where this instruction is (one of) the root. */ 949 dest_op_info = insn_rr[INSN_UID (insn)].op_info; 950 dest_regno = REGNO (SET_DEST (PATTERN (insn))); 951 for (i = 0; i < dest_op_info->n_chains; i++) 952 { 953 /* The register tracked by this chain does not match the 954 destination register of insn. */ 955 if (dest_op_info->heads[i]->regno != dest_regno) 956 continue; 957 958 head = dest_op_info->heads[i]; 959 /* The chain was merged in another, find the new head. */ 960 if (!head->first) 961 head = regrename_chain_from_id (head->id); 962 963 /* Search the chain element for this instruction and, if another 964 FMUL or FMADD/FMSUB instruction was already processed, note 965 the forest of its tree. */ 966 forest = NULL; 967 for (chain = head->first; chain; chain = chain->next_use) 968 { 969 fma_node **fma_slot; 970 971 if (!is_fmul_fmac_insn (chain->insn, true)) 972 continue; 973 974 /* This is a use, continue. */ 975 if (chain->loc != &SET_DEST (PATTERN (chain->insn))) 976 continue; 977 978 if (chain->insn == insn) 979 break; 980 981 fma_slot = this->m_insn_fma_head_map->get (chain->insn); 982 if (fma_slot && (*fma_slot)->get_children ()) 983 forest = (*fma_slot)->get_forest (); 984 } 985 if (chain) 986 break; 987 } 988 989 /* Due to implementation of regrename, dest register can slip away 990 from regrename's analysis. As a result, there is no chain for 991 the destination register of insn. We simply skip the insn even 992 it is a fmul/fmac instruction. This can happen when the dest 993 register is also a source register of insn and one of the below 994 conditions is satisfied: 995 1) the source reg is setup in larger mode than this insn; 996 2) the source reg is uninitialized; 997 3) the source reg is passed in as parameter. */ 998 if (i < dest_op_info->n_chains) 999 this->analyze_fma_fmul_insn (forest, chain, head); 1000 } 1001 } 1002 free (bb_dfs_preorder); 1003 1004 if (dump_file) 1005 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info, 1006 false); 1007} 1008 1009/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with 1010 the objective of keeping FPU pipeline balanced in term of instructions and 1011 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be 1012 scheduled on the same pipeline. */ 1013 1014void 1015func_fma_steering::rename_fma_trees () 1016{ 1017 this->dfs (dispatch_forest, NULL, rename_fma_node, true); 1018 1019 if (dump_file && !this->m_fma_forests.empty ()) 1020 { 1021 fprintf (dump_file, "Function %s has ", current_function_name ()); 1022 if (this->m_fpu_balance == 0) 1023 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the " 1024 "two FPU pipelines\n"); 1025 else if (this->m_fpu_balance > 0) 1026 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second " 1027 "FPU pipeline\n", this->m_fpu_balance); 1028 else /* this->m_fpu_balance < 0 */ 1029 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first " 1030 "FPU pipeline\n", - this->m_fpu_balance); 1031 } 1032} 1033 1034/* Execute FMA steering pass. */ 1035 1036void 1037func_fma_steering::execute_fma_steering () 1038{ 1039 df_set_flags (DF_LR_RUN_DCE); 1040 df_note_add_problem (); 1041 df_analyze (); 1042 df_set_flags (DF_DEFER_INSN_RESCAN); 1043 1044 regrename_init (true); 1045 regrename_analyze (NULL); 1046 this->analyze (); 1047 this->rename_fma_trees (); 1048 regrename_finish (); 1049} 1050 1051const pass_data pass_data_fma_steering = 1052{ 1053 RTL_PASS, /* type */ 1054 "fma_steering", /* name */ 1055 OPTGROUP_NONE, /* optinfo_flags */ 1056 TV_NONE, /* tv_id */ 1057 0, /* properties_required */ 1058 0, /* properties_provided */ 1059 0, /* properties_destroyed */ 1060 0, /* todo_flags_start */ 1061 TODO_df_finish, /* todo_flags_finish */ 1062}; 1063 1064class pass_fma_steering : public rtl_opt_pass 1065{ 1066public: 1067 pass_fma_steering (gcc::context *ctxt) 1068 : rtl_opt_pass (pass_data_fma_steering, ctxt) 1069 {} 1070 1071 /* opt_pass methods: */ 1072 virtual bool gate (function *) 1073 { 1074 return (aarch64_tune_params.extra_tuning_flags 1075 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) 1076 && optimize >= 2; 1077 } 1078 1079 virtual unsigned int execute (function *) 1080 { 1081 func_fma_steering *fma_steering = new func_fma_steering; 1082 fma_steering->execute_fma_steering (); 1083 delete fma_steering; 1084 return 0; 1085 } 1086 1087}; // class pass_fma_steering 1088 1089/* Create a new fma steering pass instance. */ 1090 1091rtl_opt_pass * 1092make_pass_fma_steering (gcc::context *ctxt) 1093{ 1094 return new pass_fma_steering (ctxt); 1095} 1096