1/* FMA steering optimization pass for Cortex-A57.
2   Copyright (C) 2015-2020 Free Software Foundation, Inc.
3   Contributed by ARM Ltd.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#define INCLUDE_LIST
25#include "system.h"
26#include "coretypes.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "df.h"
31#include "insn-config.h"
32#include "regs.h"
33#include "memmodel.h"
34#include "emit-rtl.h"
35#include "recog.h"
36#include "cfganal.h"
37#include "insn-attr.h"
38#include "context.h"
39#include "tree-pass.h"
40#include "function-abi.h"
41#include "regrename.h"
42#include "aarch64-protos.h"
43
44/* For better performance, the destination of FMADD/FMSUB instructions should
45   have the same parity as their accumulator register if the accumulator
46   contains the result of a previous FMUL or FMADD/FMSUB instruction if
47   targetting Cortex-A57 processors.  Performance is also increased by
48   otherwise keeping a good balance in the parity of the destination register
49   of FMUL or FMADD/FMSUB.
50
51   This pass ensure that registers are renamed so that these conditions hold.
52   We reuse the existing register renaming facility from regrename.c to build
53   dependency chains and expose candidate registers for renaming.
54
55
56   The algorithm has three steps:
57
58   First, the functions of the register renaming pass are called.  These
59   analyze the instructions and produce a list of def/use chains of
60   instructions.
61
62   Next, this information is used to build trees of multiply and
63   multiply-accumulate instructions.  The roots of these trees are any
64   multiply, or any multiply-accumulate whose accumulator is not dependent on
65   a multiply or multiply-accumulate instruction.  A child is added to the
66   tree where a dependency chain exists between the result of the parent
67   instruction and the accumulator operand of the child, as in the diagram
68   below:
69
70		 fmul s2, s0, s1
71		/		\
72   fmadd s0, s1, s1, s2   fmadd s4, s1, s1 s2
73	    |
74   fmadd s3, s1, s1, s0
75
76   Trees made of a single instruction are permitted.
77
78   Finally, renaming is performed.  The parity of the destination register at
79   the root of a tree is checked against the current balance of multiply and
80   multiply-accumulate on each pipeline.  If necessary, the root of a tree is
81   renamed, in which case the rest of the tree is then renamed to keep the same
82   parity in the destination registers of all instructions in the tree.  */
83
84
85
86/* Forward declarations.  */
87class fma_node;
88class fma_root_node;
89class func_fma_steering;
90
91/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
92   FMADD/FMSUB instructions form a graph.  This is because alternatives can
93   make a register be set by several FMUL or FMADD/FMSUB instructions in
94   different basic blocks and because of loops.  For ease of browsing, the
95   connected components of this graph are broken up into forests of trees.
96   Forests are represented by fma_forest objects, contained in the fma_forests
97   list.  Using a separate object for the forests allows for a better use of
98   memory as there is some information that is global to each forest, such as
99   the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
100   floating-point execution pipelines.  */
101
102class fma_forest
103{
104public:
105  fma_forest (func_fma_steering *, fma_root_node *, int);
106  ~fma_forest ();
107
108  int get_id ();
109  std::list<fma_root_node *> *get_roots ();
110  func_fma_steering *get_globals ();
111  int get_target_parity ();
112  void fma_node_created (fma_node *);
113  void merge_forest (fma_forest *);
114  void dump_info ();
115  void dispatch ();
116
117private:
118  /* Prohibit copy construction.  */
119  fma_forest (const fma_forest &);
120
121  /* The list of roots that form this forest.  */
122  std::list<fma_root_node *> *m_roots;
123
124  /* Target parity the destination register of all FMUL and FMADD/FMSUB
125     instructions in this forest should have.  */
126  int m_target_parity;
127
128  /* Link to the instance of func_fma_steering holding data related to the
129     FMA steering of the current function (cfun).  */
130  func_fma_steering *m_globals;
131
132  /* Identifier for the forest (used for dumps).  */
133  int m_id;
134
135  /* Total number of nodes in the forest (for statistics).  */
136  int m_nb_nodes;
137};
138
139class fma_node
140{
141public:
142  fma_node (fma_node *parent, du_chain *chain);
143  ~fma_node ();
144
145  bool root_p ();
146  fma_forest *get_forest ();
147  std::list<fma_node *> *get_children ();
148  rtx_insn *get_insn ();
149  void add_child (fma_node *);
150  int get_parity ();
151  void set_head (du_head *);
152  void rename (fma_forest *);
153  void dump_info (fma_forest *);
154
155private:
156  /* Prohibit copy construction.  */
157  fma_node (const fma_node &);
158
159protected:
160  /* Root node that lead to this node.  */
161  fma_root_node *m_root;
162
163  /* The parent node of this node.  If the node belong to a chain with several
164     parent nodes, the first one encountered in a depth-first search is chosen
165     as canonical parent.  */
166  fma_node *m_parent;
167
168  /* The list of child nodes.  If a chain contains several parent nodes, one is
169     chosen as canonical parent and the others will have no children.  */
170  std::list<fma_node *> *m_children;
171
172  /* The associated DU_HEAD chain that the insn represented by this object
173     is (one of) the root of.  When a chain contains several roots, the non
174     canonical ones have this field set to NULL.  */
175  struct du_head *m_head;
176
177  /* The FMUL or FMADD/FMSUB instruction this object corresponds to.  */
178  rtx_insn *m_insn;
179};
180
181class fma_root_node : public fma_node
182{
183public:
184  fma_root_node (func_fma_steering *, du_chain *, int);
185
186  fma_forest *get_forest ();
187  void set_forest (fma_forest *);
188  void dump_info (fma_forest *);
189
190private:
191  /* The forest this node belonged to when it was created.  */
192  fma_forest *m_forest;
193};
194
195/* Class holding all data and methods relative to the FMA steering of a given
196   function.  The FMA steering pass could then run in parallel for different
197   functions.  */
198
199class func_fma_steering
200{
201public:
202  func_fma_steering ();
203  ~func_fma_steering ();
204
205  int get_fpu_balance ();
206  void remove_forest (fma_forest *);
207  bool put_node (fma_node *);
208  void update_balance (int);
209  fma_node *get_fma_node (rtx_insn *);
210  void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
211  void execute_fma_steering ();
212
213private:
214  /* Prohibit copy construction.  */
215  func_fma_steering (const func_fma_steering &);
216
217  void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
218	    void (*) (fma_forest *, fma_node *), bool);
219  void analyze ();
220  void rename_fma_trees ();
221
222  /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
223     fma_node object.  Used when analyzing an instruction that is a root of
224     a chain to find if such an object was created because this instruction
225     is also a use in another chain.  */
226  hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
227
228  /* A list of all the forests in a given function.  */
229  std::list<fma_forest *> m_fma_forests;
230
231  /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
232     pipelines:
233     < 0: more instruction dispatched to the first pipeline
234     == 0: perfect balance
235     > 0: more instruction dispatched to the second pipeline.  */
236  int m_fpu_balance;
237
238  /* Identifier for the next forest created.  */
239  int m_next_forest_id;
240};
241
242/* Rename the register HEAD->regno in all the insns in the chain HEAD to any
243   register not in the set UNAVAILABLE.  Adapted from rename_chains in
244   regrename.c.  */
245
246static bool
247rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
248{
249  int best_new_reg;
250  int n_uses = 0;
251  struct du_chain *tmp;
252  int reg = head->regno;
253  enum reg_class super_class = NO_REGS;
254
255  if (head->cannot_rename)
256    return false;
257
258  if (fixed_regs[reg] || global_regs[reg]
259      || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
260    return false;
261
262  /* Iterate over elements in the chain in order to:
263     1. Count number of uses, and narrow the set of registers we can
264	use for renaming.
265     2. Compute the superunion of register classes in this chain.  */
266  for (tmp = head->first; tmp; tmp = tmp->next_use)
267    {
268      if (DEBUG_INSN_P (tmp->insn))
269	continue;
270      n_uses++;
271      *unavailable |= ~reg_class_contents[tmp->cl];
272      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
273    }
274
275  if (n_uses < 1)
276    return false;
277
278  best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
279				  false);
280
281  if (dump_file)
282    {
283      fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
284	       INSN_UID (head->first->insn));
285      if (head->call_abis)
286	fprintf (dump_file, " crosses a call");
287    }
288
289  if (best_new_reg == reg)
290    {
291      if (dump_file)
292	fprintf (dump_file, "; no available better choice\n");
293      return false;
294    }
295
296  if (regrename_do_replace (head, best_new_reg))
297    {
298      if (dump_file)
299	fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
300      df_set_regs_ever_live (best_new_reg, true);
301    }
302  else
303    {
304      if (dump_file)
305	fprintf (dump_file, ", renaming as %s failed\n",
306		 reg_names[best_new_reg]);
307      return false;
308    }
309  return true;
310}
311
312/* Return whether T is the attribute of a FMADD/FMSUB-like instruction.  */
313
314static bool
315is_fmac_op (enum attr_type t)
316{
317  return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
318}
319
320/* Return whether T is the attribute of a FMUL instruction.  */
321
322static bool
323is_fmul_op (enum attr_type t)
324{
325  return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
326}
327
328/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
329   instruction.  */
330
331static bool
332is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
333{
334  enum attr_type t;
335
336  if (!NONDEBUG_INSN_P (insn))
337    return false;
338
339  if (recog_memoized (insn) < 0)
340    return false;
341
342  /* Only consider chain(s) this instruction is a root of if this is an FMUL or
343     FMADD/FMSUB instruction.  This allows to avoid browsing chains of all
344     instructions for FMUL or FMADD/FMSUB in them.  */
345  t = get_attr_type (insn);
346  return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
347}
348
349
350/*
351 * Class fma_forest method definitions.
352 */
353
354fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
355			int id)
356{
357      memset (this, 0, sizeof (*this));
358      this->m_globals = fma_steer;
359      this->m_roots = new std::list<fma_root_node *>;
360      this->m_roots->push_back (fma_root);
361      this->m_id = id;
362}
363
364fma_forest::~fma_forest ()
365{
366  delete this->m_roots;
367}
368
369int
370fma_forest::get_id ()
371{
372  return this->m_id;
373}
374
375std::list<fma_root_node *> *
376fma_forest::get_roots ()
377{
378  return this->m_roots;
379}
380
381func_fma_steering *
382fma_forest::get_globals ()
383{
384  return this->m_globals;
385}
386
387int
388fma_forest::get_target_parity ()
389{
390  return this->m_target_parity;
391}
392
393/* Act on the creation of NODE by updating statistics in FOREST and adding an
394   entry for it in the func_fma_steering hashmap.  */
395
396void fma_forest::fma_node_created (fma_node *node)
397{
398  bool created = !this->m_globals->put_node (node);
399
400  gcc_assert (created);
401  this->m_nb_nodes++;
402}
403
404/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
405   fma_forest object to represent both.  */
406
407void
408fma_forest::merge_forest (fma_forest *other_forest)
409{
410  std::list<fma_root_node *> *other_roots;
411  std::list<fma_root_node *>::iterator other_root_iter;
412
413  if (this == other_forest)
414    return;
415
416  other_roots = other_forest->m_roots;
417
418  /* Update root nodes' pointer to forest.  */
419  for (other_root_iter = other_roots->begin ();
420       other_root_iter != other_roots->end (); ++other_root_iter)
421    (*other_root_iter)->set_forest (this);
422
423  /* Remove other_forest from the list of forests and move its tree roots in
424     the list of tree roots of ref_forest.  */
425  this->m_globals->remove_forest (other_forest);
426  this->m_roots->splice (this->m_roots->begin (), *other_roots);
427  this->m_nb_nodes += other_forest->m_nb_nodes;
428
429  delete other_forest;
430}
431
432/* Dump information about the forest FOREST.  */
433
434void
435fma_forest::dump_info ()
436{
437  gcc_assert (dump_file);
438
439  fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
440	   this->m_nb_nodes);
441}
442
443/* Wrapper around fma_forest::dump_info for use as parameter of function
444   pointer type in func_fma_steering::dfs.  */
445
446static void
447dump_forest_info (fma_forest *forest)
448{
449  forest->dump_info ();
450}
451
452/* Dispatch forest to the least utilized pipeline.  */
453
454void
455fma_forest::dispatch ()
456{
457  this->m_target_parity = this->m_roots->front ()->get_parity ();
458  int fpu_balance = this->m_globals->get_fpu_balance ();
459  if (fpu_balance != 0)
460    this->m_target_parity = (fpu_balance < 0);
461
462  if (dump_file)
463    fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
464	     this->m_target_parity ? "odd" : "even");
465}
466
467/* Wrapper around fma_forest::dispatch for use as parameter of function pointer
468   type in func_fma_steering::dfs.  */
469
470static void
471dispatch_forest (fma_forest *forest)
472{
473  forest->dispatch ();
474}
475
476fma_node::fma_node (fma_node *parent, du_chain *chain)
477{
478  memset (this, 0, sizeof (*this));
479  this->m_parent = parent;
480  this->m_children = new std::list<fma_node *>;
481  this->m_insn = chain->insn;
482  /* root_p () cannot be used to check for root before root is set.  */
483  if (this->m_parent == this)
484    this->m_root = static_cast<fma_root_node *> (parent);
485  else
486    {
487      this->m_root = parent->m_root;
488      this->get_forest ()->fma_node_created (this);
489    }
490}
491
492fma_node::~fma_node ()
493{
494  delete this->m_children;
495}
496
497std::list<fma_node *> *
498fma_node::get_children ()
499{
500  return this->m_children;
501}
502
503rtx_insn *
504fma_node::get_insn ()
505{
506  return this->m_insn;
507}
508
509void
510fma_node::set_head (du_head *head)
511{
512  gcc_assert (!this->m_head);
513  this->m_head = head;
514}
515
516/* Add a child to this node in the list of children.  */
517
518void
519fma_node::add_child (fma_node *child)
520{
521  this->m_children->push_back (child);
522}
523
524/* Return the parity of the destination register of the instruction represented
525   by this node.  */
526
527int
528fma_node::get_parity ()
529{
530  return this->m_head->regno % 2;
531}
532
533/* Get the actual forest associated with a non root node as the one the node
534   points to might have been merged into another one.  In that case the pointer
535   in the root nodes are updated so we return the forest pointer of a root node
536   pointed to by the initial forest.  Despite being a oneliner, this method is
537   defined here as it references a method from fma_root_node.  */
538
539fma_forest *
540fma_node::get_forest ()
541{
542  return this->m_root->get_forest ();
543}
544
545/* Return whether a node is a root node.  */
546
547bool
548fma_node::root_p ()
549{
550  return this->m_root == this;
551}
552
553/* Dump information about the children of node FMA_NODE in forest FOREST.  */
554
555void
556fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
557{
558  struct du_chain *chain;
559  std::list<fma_node *>::iterator fma_child;
560
561  gcc_assert (dump_file);
562
563  if (this->get_children ()->empty ())
564    return;
565
566  fprintf (dump_file, "Instruction(s)");
567  for (chain = this->m_head->first; chain; chain = chain->next_use)
568    {
569      if (!is_fmul_fmac_insn (chain->insn, true))
570	continue;
571
572      if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
573	continue;
574
575      fprintf (dump_file, " %d", INSN_UID (chain->insn));
576    }
577
578  fprintf (dump_file, " is(are) accumulator dependency of instructions");
579  for (fma_child = this->get_children ()->begin ();
580       fma_child != this->get_children ()->end (); fma_child++)
581    fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
582  fprintf (dump_file, "\n");
583}
584
585/* Wrapper around fma_node::dump_info for use as parameter of function pointer
586   type in func_fma_steering::dfs.  */
587
588static void
589dump_tree_node_info (fma_forest *forest, fma_node *node)
590{
591  node->dump_info (forest);
592}
593
594/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
595   represented by FMA_NODE to a register that respect the target parity for
596   FOREST or with same parity of the instruction represented by its parent node
597   if it has one.  */
598
599void
600fma_node::rename (fma_forest *forest)
601{
602  int cur_parity, target_parity;
603
604  /* This is alternate root of a chain and thus has no children.  It will be
605     renamed when processing the canonical root for that chain.  */
606  if (!this->m_head)
607    return;
608
609  target_parity = forest->get_target_parity ();
610  if (this->m_parent)
611    target_parity = this->m_parent->get_parity ();
612  cur_parity = this->get_parity ();
613
614  /* Rename if parity differs.  */
615  if (cur_parity != target_parity)
616    {
617      rtx_insn *insn = this->m_insn;
618      HARD_REG_SET unavailable;
619      machine_mode mode;
620      int reg;
621
622      if (dump_file)
623	{
624	  unsigned cur_dest_reg = this->m_head->regno;
625
626	  fprintf (dump_file, "FMA or FMUL at insn %d but destination "
627		   "register (%s) has different parity from expected to "
628		   "maximize FPU pipeline utilization\n", INSN_UID (insn),
629		   reg_names[cur_dest_reg]);
630	}
631
632      /* Don't clobber traceback for noreturn functions.  */
633      CLEAR_HARD_REG_SET (unavailable);
634      if (frame_pointer_needed)
635	{
636	  add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
637	  add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
638	}
639
640      /* Exclude registers with wrong parity.  */
641      mode = GET_MODE (SET_DEST (PATTERN (insn)));
642      for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
643	add_to_hard_reg_set (&unavailable, mode, reg);
644
645      if (!rename_single_chain (this->m_head, &unavailable))
646	{
647	  if (dump_file)
648	    fprintf (dump_file, "Destination register of insn %d could not be "
649		     "renamed. Dependent FMA insns will use this parity from "
650		     "there on.\n", INSN_UID (insn));
651	}
652      else
653	cur_parity = target_parity;
654    }
655
656  forest->get_globals ()->update_balance (cur_parity);
657}
658
659/* Wrapper around fma_node::dump_info for use as parameter of function pointer
660   type in func_fma_steering::dfs.  */
661
662static void
663rename_fma_node (fma_forest *forest, fma_node *node)
664{
665  node->rename (forest);
666}
667
668fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
669			      int id) : fma_node (this, chain)
670{
671  this->m_forest = new fma_forest (globals, this, id);
672  this->m_forest->fma_node_created (this);
673}
674
675fma_forest *
676fma_root_node::get_forest ()
677{
678  return this->m_forest;
679}
680
681void
682fma_root_node::set_forest (fma_forest *ref_forest)
683{
684  this->m_forest = ref_forest;
685}
686
687/* Dump information about the roots of forest FOREST.  */
688
689void
690fma_root_node::dump_info (fma_forest *forest)
691{
692  gcc_assert (dump_file);
693
694  if (this == forest->get_roots ()->front ())
695    fprintf (dump_file, "Instruction(s) at root of forest #%d:",
696	     forest->get_id ());
697  fprintf (dump_file, " %d", INSN_UID (this->m_insn));
698  if (this == forest->get_roots ()->back ())
699    fprintf (dump_file, "\n");
700}
701
702/* Wrapper around fma_root_node::dump_info for use as parameter of function
703   pointer type in func_fma_steering::dfs.  */
704
705static void
706dump_tree_root_info (fma_forest *forest, fma_root_node *node)
707{
708  node->dump_info (forest);
709}
710
711func_fma_steering::func_fma_steering () : m_fpu_balance (0)
712{
713  this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
714  this->m_fma_forests.clear ();
715  this->m_next_forest_id = 0;
716}
717
718func_fma_steering::~func_fma_steering ()
719{
720  delete this->m_insn_fma_head_map;
721}
722
723int
724func_fma_steering::get_fpu_balance ()
725{
726  return this->m_fpu_balance;
727}
728
729void
730func_fma_steering::remove_forest (fma_forest *forest)
731{
732  this->m_fma_forests.remove (forest);
733}
734
735/* Memorize the mapping of this instruction to its fma_node object and return
736   whether such a mapping existed.  */
737
738bool
739func_fma_steering::put_node (fma_node *node)
740{
741  return this->m_insn_fma_head_map->put (node->get_insn (), node);
742}
743
744/* Update the current balance considering a node with the given PARITY.  */
745
746void
747func_fma_steering::update_balance (int parity)
748{
749  this->m_fpu_balance = parity ? this->m_fpu_balance + 1
750			       : this->m_fpu_balance - 1;
751}
752
753/* Return whether an fma_node object exists for instruction INSN and, if not,
754   allocate one in *RET.  */
755
756fma_node *
757func_fma_steering::get_fma_node (rtx_insn *insn)
758{
759  fma_node **fma_slot;
760
761  fma_slot = this->m_insn_fma_head_map->get (insn);
762  if (fma_slot)
763    return *fma_slot;
764  return NULL;
765}
766
767/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
768   instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
769   part of FOREST.  For the children, the associated head is left untouched
770   (and thus null) as this function will be called again when considering the
771   chain where they are def.  For the parent, the chain is given in HEAD.  */
772
773void
774func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
775					  du_chain *chain, du_head_p head)
776{
777  fma_forest *forest;
778  fma_node *node = this->get_fma_node (chain->insn);
779
780  /* This is a root node.  */
781  if (!node)
782    {
783      fma_root_node *root_node;
784
785      root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
786      forest = root_node->get_forest ();
787      node = root_node;
788
789      /* Until proved otherwise, assume this root is not part of an existing
790	 forest and thus add its forest to the list of forests.  */
791      this->m_fma_forests.push_back (forest);
792    }
793  else
794    forest = node->get_forest ();
795
796  node->set_head (head);
797
798  /* fma_node is part of a chain with several defs, one of them having already
799     been processed.  The root of that already processed def is the canonical
800     one and the root of fma_node is added to its forest.  No need to process
801     the children nodes as they were already processed when the other def was
802     processed.  */
803  if (ref_forest)
804    {
805      ref_forest->merge_forest (forest);
806      return;
807    }
808
809  for (chain = head->first; chain; chain = chain->next_use)
810    {
811      fma_node *child_fma;
812      rtx fma_rtx, *accum_rtx_p;
813
814      if (!is_fmul_fmac_insn (chain->insn, false))
815	continue;
816
817      /* Get FMA rtx.  */
818      fma_rtx = SET_SRC (PATTERN (chain->insn));
819      /* FMA is negated.  */
820      if (GET_CODE (fma_rtx) == NEG)
821	fma_rtx = XEXP (fma_rtx, 0);
822      /* Get accumulator rtx.  */
823      accum_rtx_p = &XEXP (fma_rtx, 2);
824      /* Accumulator is negated.  */
825      if (!REG_P (*accum_rtx_p))
826	accum_rtx_p = &XEXP (*accum_rtx_p, 0);
827
828      /* This du_chain structure is not for the accumulator register.  */
829      if (accum_rtx_p != chain->loc)
830	continue;
831
832      /* If object already created, this is a loop carried dependency.  We
833	 don't include this object in the children as we want trees for
834	 rename_fma_trees to not be an infinite loop.  */
835      if (this->get_fma_node (chain->insn))
836	continue;
837
838      child_fma = new fma_node (node, chain);
839
840      /* Memorize the mapping of this instruction to its fma_node object
841	 as it will be processed for the chain starting at its destination
842	 register later.  */
843
844      /* Link to siblings.  */
845      node->add_child (child_fma);
846    }
847}
848
849/* Perform a depth-first search of the forests of fma_node in
850   THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
851   THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
852   PROCESS_NODE () on each node.  If FREE is true, free all std::list in the
853   same dfs.  */
854
855void
856func_fma_steering::dfs (void (*process_forest) (fma_forest *),
857			void (*process_root) (fma_forest *, fma_root_node *),
858			void (*process_node) (fma_forest *, fma_node *),
859			bool free)
860{
861  auto_vec<fma_node *> to_process;
862  auto_vec<fma_node *> to_free;
863  std::list<fma_forest *>::iterator forest_iter;
864
865  /* For each forest.  */
866  for (forest_iter = this->m_fma_forests.begin ();
867       forest_iter != this->m_fma_forests.end (); ++forest_iter)
868    {
869      std::list<fma_root_node *>::iterator root_iter;
870
871      if (process_forest)
872	process_forest (*forest_iter);
873
874      /* For each tree root in this forest.  */
875      for (root_iter = (*forest_iter)->get_roots ()->begin ();
876	   root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
877	{
878	  if (process_root)
879	    process_root (*forest_iter, *root_iter);
880	  to_process.safe_push (*root_iter);
881	}
882
883      /* For each tree node in this forest.  */
884      while (!to_process.is_empty ())
885	{
886	  fma_node *node;
887	  std::list<fma_node *>::iterator child_iter;
888
889	  node = to_process.pop ();
890
891	  if (process_node)
892	    process_node (*forest_iter, node);
893
894	  for (child_iter = node->get_children ()->begin ();
895	       child_iter != node->get_children ()->end (); ++child_iter)
896	    to_process.safe_push (*child_iter);
897
898	  /* Defer freeing so that the process_node callback can access the
899	     parent and children of the node being processed.  */
900	  if (free)
901	    to_free.safe_push (node);
902	}
903
904      if (free)
905	{
906	  delete *forest_iter;
907
908	  while (!to_free.is_empty ())
909	    {
910	      fma_node *node = to_free.pop ();
911	      if (node->root_p ())
912		delete static_cast<fma_root_node *> (node);
913	      else
914		delete node;
915	    }
916	}
917    }
918}
919
920/* Build the dependency trees of FMUL and FMADD/FMSUB instructions.  */
921
922void
923func_fma_steering::analyze ()
924{
925  int i, n_blocks, *bb_dfs_preorder;
926  basic_block bb;
927  rtx_insn *insn;
928
929  bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
930  n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
931
932  /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
933     instructions.  */
934  for (i = 0; i < n_blocks; i++)
935    {
936      bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
937      FOR_BB_INSNS (bb, insn)
938	{
939	  operand_rr_info *dest_op_info;
940	  struct du_chain *chain = NULL;
941	  unsigned dest_regno;
942	  fma_forest *forest = NULL;
943	  du_head_p head = NULL;
944	  int i;
945
946	  if (!is_fmul_fmac_insn (insn, true))
947	    continue;
948
949	  /* Search the chain where this instruction is (one of) the root.  */
950	  dest_op_info = insn_rr[INSN_UID (insn)].op_info;
951	  dest_regno = REGNO (SET_DEST (PATTERN (insn)));
952	  for (i = 0; i < dest_op_info->n_chains; i++)
953	    {
954	      /* The register tracked by this chain does not match the
955		 destination register of insn.  */
956	      if (dest_op_info->heads[i]->regno != dest_regno)
957		continue;
958
959	      head = dest_op_info->heads[i];
960	      /* The chain was merged in another, find the new head.  */
961	      if (!head->first)
962		head = regrename_chain_from_id (head->id);
963
964	      /* Search the chain element for this instruction and, if another
965		 FMUL or FMADD/FMSUB instruction was already processed, note
966		 the forest of its tree.  */
967	      forest = NULL;
968	      for (chain = head->first; chain; chain = chain->next_use)
969		{
970		  fma_node **fma_slot;
971
972		  if (!is_fmul_fmac_insn (chain->insn, true))
973		    continue;
974
975		  /* This is a use, continue.  */
976		  if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
977		    continue;
978
979		  if (chain->insn == insn)
980		    break;
981
982		  fma_slot = this->m_insn_fma_head_map->get (chain->insn);
983		  if (fma_slot && (*fma_slot)->get_children ())
984		    forest = (*fma_slot)->get_forest ();
985		}
986	      if (chain)
987		break;
988	    }
989
990	  /* Due to implementation of regrename, dest register can slip away
991	     from regrename's analysis.  As a result, there is no chain for
992	     the destination register of insn.  We simply skip the insn even
993	     it is a fmul/fmac instruction.  This can happen when the dest
994	     register is also a source register of insn and one of the below
995	     conditions is satisfied:
996	       1) the source reg is setup in larger mode than this insn;
997	       2) the source reg is uninitialized;
998	       3) the source reg is passed in as parameter.  */
999	  if (i < dest_op_info->n_chains)
1000	    this->analyze_fma_fmul_insn (forest, chain, head);
1001	}
1002    }
1003  free (bb_dfs_preorder);
1004
1005  if (dump_file)
1006    this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
1007	       false);
1008}
1009
1010/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
1011   the objective of keeping FPU pipeline balanced in term of instructions and
1012   having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
1013   scheduled on the same pipeline.  */
1014
1015void
1016func_fma_steering::rename_fma_trees ()
1017{
1018  this->dfs (dispatch_forest, NULL, rename_fma_node, true);
1019
1020  if (dump_file && !this->m_fma_forests.empty ())
1021    {
1022      fprintf (dump_file, "Function %s has ", current_function_name ());
1023      if (this->m_fpu_balance == 0)
1024	fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1025		 "two FPU pipelines\n");
1026      else if (this->m_fpu_balance > 0)
1027	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1028		 "FPU pipeline\n", this->m_fpu_balance);
1029      else /* this->m_fpu_balance < 0 */
1030	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1031		 "FPU pipeline\n", - this->m_fpu_balance);
1032    }
1033}
1034
1035/* Execute FMA steering pass.  */
1036
1037void
1038func_fma_steering::execute_fma_steering ()
1039{
1040  df_set_flags (DF_LR_RUN_DCE);
1041  df_note_add_problem ();
1042  df_analyze ();
1043  df_set_flags (DF_DEFER_INSN_RESCAN);
1044
1045  regrename_init (true);
1046  regrename_analyze (NULL);
1047  this->analyze ();
1048  this->rename_fma_trees ();
1049  regrename_finish ();
1050}
1051
1052const pass_data pass_data_fma_steering =
1053{
1054  RTL_PASS, /* type */
1055  "fma_steering", /* name */
1056  OPTGROUP_NONE, /* optinfo_flags */
1057  TV_NONE, /* tv_id */
1058  0, /* properties_required */
1059  0, /* properties_provided */
1060  0, /* properties_destroyed */
1061  0, /* todo_flags_start */
1062  TODO_df_finish, /* todo_flags_finish */
1063};
1064
1065class pass_fma_steering : public rtl_opt_pass
1066{
1067public:
1068  pass_fma_steering (gcc::context *ctxt)
1069    : rtl_opt_pass (pass_data_fma_steering, ctxt)
1070  {}
1071
1072  /* opt_pass methods: */
1073  virtual bool gate (function *)
1074    {
1075      return (aarch64_tune_params.extra_tuning_flags
1076	      & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1077	      && optimize >= 2;
1078    }
1079
1080  virtual unsigned int execute (function *)
1081    {
1082      func_fma_steering *fma_steering = new func_fma_steering;
1083      fma_steering->execute_fma_steering ();
1084      delete fma_steering;
1085      return 0;
1086    }
1087
1088}; // class pass_fma_steering
1089
1090/* Create a new fma steering pass instance.  */
1091
1092rtl_opt_pass *
1093make_pass_fma_steering (gcc::context *ctxt)
1094{
1095  return new pass_fma_steering (ctxt);
1096}
1097