cortex-a57-fma-steering.c revision 1.5
1/* FMA steering optimization pass for Cortex-A57.
2   Copyright (C) 2015-2019 Free Software Foundation, Inc.
3   Contributed by ARM Ltd.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#define INCLUDE_LIST
25#include "system.h"
26#include "coretypes.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "df.h"
31#include "insn-config.h"
32#include "regs.h"
33#include "memmodel.h"
34#include "emit-rtl.h"
35#include "recog.h"
36#include "cfganal.h"
37#include "insn-attr.h"
38#include "context.h"
39#include "tree-pass.h"
40#include "regrename.h"
41#include "aarch64-protos.h"
42
43/* For better performance, the destination of FMADD/FMSUB instructions should
44   have the same parity as their accumulator register if the accumulator
45   contains the result of a previous FMUL or FMADD/FMSUB instruction if
46   targetting Cortex-A57 processors.  Performance is also increased by
47   otherwise keeping a good balance in the parity of the destination register
48   of FMUL or FMADD/FMSUB.
49
50   This pass ensure that registers are renamed so that these conditions hold.
51   We reuse the existing register renaming facility from regrename.c to build
52   dependency chains and expose candidate registers for renaming.
53
54
55   The algorithm has three steps:
56
57   First, the functions of the register renaming pass are called.  These
58   analyze the instructions and produce a list of def/use chains of
59   instructions.
60
61   Next, this information is used to build trees of multiply and
62   multiply-accumulate instructions.  The roots of these trees are any
63   multiply, or any multiply-accumulate whose accumulator is not dependent on
64   a multiply or multiply-accumulate instruction.  A child is added to the
65   tree where a dependency chain exists between the result of the parent
66   instruction and the accumulator operand of the child, as in the diagram
67   below:
68
69		 fmul s2, s0, s1
70		/		\
71   fmadd s0, s1, s1, s2   fmadd s4, s1, s1 s2
72	    |
73   fmadd s3, s1, s1, s0
74
75   Trees made of a single instruction are permitted.
76
77   Finally, renaming is performed.  The parity of the destination register at
78   the root of a tree is checked against the current balance of multiply and
79   multiply-accumulate on each pipeline.  If necessary, the root of a tree is
80   renamed, in which case the rest of the tree is then renamed to keep the same
81   parity in the destination registers of all instructions in the tree.  */
82
83
84
85/* Forward declarations.  */
86class fma_node;
87class fma_root_node;
88class func_fma_steering;
89
90/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
91   FMADD/FMSUB instructions form a graph.  This is because alternatives can
92   make a register be set by several FMUL or FMADD/FMSUB instructions in
93   different basic blocks and because of loops.  For ease of browsing, the
94   connected components of this graph are broken up into forests of trees.
95   Forests are represented by fma_forest objects, contained in the fma_forests
96   list.  Using a separate object for the forests allows for a better use of
97   memory as there is some information that is global to each forest, such as
98   the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
99   floating-point execution pipelines.  */
100
101class fma_forest
102{
103public:
104  fma_forest (func_fma_steering *, fma_root_node *, int);
105  ~fma_forest ();
106
107  int get_id ();
108  std::list<fma_root_node *> *get_roots ();
109  func_fma_steering *get_globals ();
110  int get_target_parity ();
111  void fma_node_created (fma_node *);
112  void merge_forest (fma_forest *);
113  void dump_info ();
114  void dispatch ();
115
116private:
117  /* Prohibit copy construction.  */
118  fma_forest (const fma_forest &);
119
120  /* The list of roots that form this forest.  */
121  std::list<fma_root_node *> *m_roots;
122
123  /* Target parity the destination register of all FMUL and FMADD/FMSUB
124     instructions in this forest should have.  */
125  int m_target_parity;
126
127  /* Link to the instance of func_fma_steering holding data related to the
128     FMA steering of the current function (cfun).  */
129  func_fma_steering *m_globals;
130
131  /* Identifier for the forest (used for dumps).  */
132  int m_id;
133
134  /* Total number of nodes in the forest (for statistics).  */
135  int m_nb_nodes;
136};
137
138class fma_node
139{
140public:
141  fma_node (fma_node *parent, du_chain *chain);
142  ~fma_node ();
143
144  bool root_p ();
145  fma_forest *get_forest ();
146  std::list<fma_node *> *get_children ();
147  rtx_insn *get_insn ();
148  void add_child (fma_node *);
149  int get_parity ();
150  void set_head (du_head *);
151  void rename (fma_forest *);
152  void dump_info (fma_forest *);
153
154private:
155  /* Prohibit copy construction.  */
156  fma_node (const fma_node &);
157
158protected:
159  /* Root node that lead to this node.  */
160  fma_root_node *m_root;
161
162  /* The parent node of this node.  If the node belong to a chain with several
163     parent nodes, the first one encountered in a depth-first search is chosen
164     as canonical parent.  */
165  fma_node *m_parent;
166
167  /* The list of child nodes.  If a chain contains several parent nodes, one is
168     chosen as canonical parent and the others will have no children.  */
169  std::list<fma_node *> *m_children;
170
171  /* The associated DU_HEAD chain that the insn represented by this object
172     is (one of) the root of.  When a chain contains several roots, the non
173     canonical ones have this field set to NULL.  */
174  struct du_head *m_head;
175
176  /* The FMUL or FMADD/FMSUB instruction this object corresponds to.  */
177  rtx_insn *m_insn;
178};
179
180class fma_root_node : public fma_node
181{
182public:
183  fma_root_node (func_fma_steering *, du_chain *, int);
184
185  fma_forest *get_forest ();
186  void set_forest (fma_forest *);
187  void dump_info (fma_forest *);
188
189private:
190  /* The forest this node belonged to when it was created.  */
191  fma_forest *m_forest;
192};
193
194/* Class holding all data and methods relative to the FMA steering of a given
195   function.  The FMA steering pass could then run in parallel for different
196   functions.  */
197
198class func_fma_steering
199{
200public:
201  func_fma_steering ();
202  ~func_fma_steering ();
203
204  int get_fpu_balance ();
205  void remove_forest (fma_forest *);
206  bool put_node (fma_node *);
207  void update_balance (int);
208  fma_node *get_fma_node (rtx_insn *);
209  void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
210  void execute_fma_steering ();
211
212private:
213  /* Prohibit copy construction.  */
214  func_fma_steering (const func_fma_steering &);
215
216  void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
217	    void (*) (fma_forest *, fma_node *), bool);
218  void analyze ();
219  void rename_fma_trees ();
220
221  /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
222     fma_node object.  Used when analyzing an instruction that is a root of
223     a chain to find if such an object was created because this instruction
224     is also a use in another chain.  */
225  hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
226
227  /* A list of all the forests in a given function.  */
228  std::list<fma_forest *> m_fma_forests;
229
230  /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
231     pipelines:
232     < 0: more instruction dispatched to the first pipeline
233     == 0: perfect balance
234     > 0: more instruction dispatched to the second pipeline.  */
235  int m_fpu_balance;
236
237  /* Identifier for the next forest created.  */
238  int m_next_forest_id;
239};
240
241/* Rename the register HEAD->regno in all the insns in the chain HEAD to any
242   register not in the set UNAVAILABLE.  Adapted from rename_chains in
243   regrename.c.  */
244
245static bool
246rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
247{
248  int best_new_reg;
249  int n_uses = 0;
250  struct du_chain *tmp;
251  int reg = head->regno;
252  enum reg_class super_class = NO_REGS;
253
254  if (head->cannot_rename)
255    return false;
256
257  if (fixed_regs[reg] || global_regs[reg]
258      || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
259    return false;
260
261  /* Iterate over elements in the chain in order to:
262     1. Count number of uses, and narrow the set of registers we can
263	use for renaming.
264     2. Compute the superunion of register classes in this chain.  */
265  for (tmp = head->first; tmp; tmp = tmp->next_use)
266    {
267      if (DEBUG_INSN_P (tmp->insn))
268	continue;
269      n_uses++;
270      IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
271      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
272    }
273
274  if (n_uses < 1)
275    return false;
276
277  best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
278				  false);
279
280  if (dump_file)
281    {
282      fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
283	       INSN_UID (head->first->insn));
284      if (head->need_caller_save_reg)
285	fprintf (dump_file, " crosses a call");
286    }
287
288  if (best_new_reg == reg)
289    {
290      if (dump_file)
291	fprintf (dump_file, "; no available better choice\n");
292      return false;
293    }
294
295  if (regrename_do_replace (head, best_new_reg))
296    {
297      if (dump_file)
298	fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
299      df_set_regs_ever_live (best_new_reg, true);
300    }
301  else
302    {
303      if (dump_file)
304	fprintf (dump_file, ", renaming as %s failed\n",
305		 reg_names[best_new_reg]);
306      return false;
307    }
308  return true;
309}
310
311/* Return whether T is the attribute of a FMADD/FMSUB-like instruction.  */
312
313static bool
314is_fmac_op (enum attr_type t)
315{
316  return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
317}
318
319/* Return whether T is the attribute of a FMUL instruction.  */
320
321static bool
322is_fmul_op (enum attr_type t)
323{
324  return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
325}
326
327/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
328   instruction.  */
329
330static bool
331is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
332{
333  enum attr_type t;
334
335  if (!NONDEBUG_INSN_P (insn))
336    return false;
337
338  if (recog_memoized (insn) < 0)
339    return false;
340
341  /* Only consider chain(s) this instruction is a root of if this is an FMUL or
342     FMADD/FMSUB instruction.  This allows to avoid browsing chains of all
343     instructions for FMUL or FMADD/FMSUB in them.  */
344  t = get_attr_type (insn);
345  return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
346}
347
348
349/*
350 * Class fma_forest method definitions.
351 */
352
353fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
354			int id)
355{
356      memset (this, 0, sizeof (*this));
357      this->m_globals = fma_steer;
358      this->m_roots = new std::list<fma_root_node *>;
359      this->m_roots->push_back (fma_root);
360      this->m_id = id;
361}
362
363fma_forest::~fma_forest ()
364{
365  delete this->m_roots;
366}
367
368int
369fma_forest::get_id ()
370{
371  return this->m_id;
372}
373
374std::list<fma_root_node *> *
375fma_forest::get_roots ()
376{
377  return this->m_roots;
378}
379
380func_fma_steering *
381fma_forest::get_globals ()
382{
383  return this->m_globals;
384}
385
386int
387fma_forest::get_target_parity ()
388{
389  return this->m_target_parity;
390}
391
392/* Act on the creation of NODE by updating statistics in FOREST and adding an
393   entry for it in the func_fma_steering hashmap.  */
394
395void fma_forest::fma_node_created (fma_node *node)
396{
397  bool created = !this->m_globals->put_node (node);
398
399  gcc_assert (created);
400  this->m_nb_nodes++;
401}
402
403/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
404   fma_forest object to represent both.  */
405
406void
407fma_forest::merge_forest (fma_forest *other_forest)
408{
409  std::list<fma_root_node *> *other_roots;
410  std::list<fma_root_node *>::iterator other_root_iter;
411
412  if (this == other_forest)
413    return;
414
415  other_roots = other_forest->m_roots;
416
417  /* Update root nodes' pointer to forest.  */
418  for (other_root_iter = other_roots->begin ();
419       other_root_iter != other_roots->end (); ++other_root_iter)
420    (*other_root_iter)->set_forest (this);
421
422  /* Remove other_forest from the list of forests and move its tree roots in
423     the list of tree roots of ref_forest.  */
424  this->m_globals->remove_forest (other_forest);
425  this->m_roots->splice (this->m_roots->begin (), *other_roots);
426  this->m_nb_nodes += other_forest->m_nb_nodes;
427
428  delete other_forest;
429}
430
431/* Dump information about the forest FOREST.  */
432
433void
434fma_forest::dump_info ()
435{
436  gcc_assert (dump_file);
437
438  fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
439	   this->m_nb_nodes);
440}
441
442/* Wrapper around fma_forest::dump_info for use as parameter of function
443   pointer type in func_fma_steering::dfs.  */
444
445static void
446dump_forest_info (fma_forest *forest)
447{
448  forest->dump_info ();
449}
450
451/* Dispatch forest to the least utilized pipeline.  */
452
453void
454fma_forest::dispatch ()
455{
456  this->m_target_parity = this->m_roots->front ()->get_parity ();
457  int fpu_balance = this->m_globals->get_fpu_balance ();
458  if (fpu_balance != 0)
459    this->m_target_parity = (fpu_balance < 0);
460
461  if (dump_file)
462    fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
463	     this->m_target_parity ? "odd" : "even");
464}
465
466/* Wrapper around fma_forest::dispatch for use as parameter of function pointer
467   type in func_fma_steering::dfs.  */
468
469static void
470dispatch_forest (fma_forest *forest)
471{
472  forest->dispatch ();
473}
474
475fma_node::fma_node (fma_node *parent, du_chain *chain)
476{
477  memset (this, 0, sizeof (*this));
478  this->m_parent = parent;
479  this->m_children = new std::list<fma_node *>;
480  this->m_insn = chain->insn;
481  /* root_p () cannot be used to check for root before root is set.  */
482  if (this->m_parent == this)
483    this->m_root = static_cast<fma_root_node *> (parent);
484  else
485    {
486      this->m_root = parent->m_root;
487      this->get_forest ()->fma_node_created (this);
488    }
489}
490
491fma_node::~fma_node ()
492{
493  delete this->m_children;
494}
495
496std::list<fma_node *> *
497fma_node::get_children ()
498{
499  return this->m_children;
500}
501
502rtx_insn *
503fma_node::get_insn ()
504{
505  return this->m_insn;
506}
507
508void
509fma_node::set_head (du_head *head)
510{
511  gcc_assert (!this->m_head);
512  this->m_head = head;
513}
514
515/* Add a child to this node in the list of children.  */
516
517void
518fma_node::add_child (fma_node *child)
519{
520  this->m_children->push_back (child);
521}
522
523/* Return the parity of the destination register of the instruction represented
524   by this node.  */
525
526int
527fma_node::get_parity ()
528{
529  return this->m_head->regno % 2;
530}
531
532/* Get the actual forest associated with a non root node as the one the node
533   points to might have been merged into another one.  In that case the pointer
534   in the root nodes are updated so we return the forest pointer of a root node
535   pointed to by the initial forest.  Despite being a oneliner, this method is
536   defined here as it references a method from fma_root_node.  */
537
538fma_forest *
539fma_node::get_forest ()
540{
541  return this->m_root->get_forest ();
542}
543
544/* Return whether a node is a root node.  */
545
546bool
547fma_node::root_p ()
548{
549  return this->m_root == this;
550}
551
552/* Dump information about the children of node FMA_NODE in forest FOREST.  */
553
554void
555fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
556{
557  struct du_chain *chain;
558  std::list<fma_node *>::iterator fma_child;
559
560  gcc_assert (dump_file);
561
562  if (this->get_children ()->empty ())
563    return;
564
565  fprintf (dump_file, "Instruction(s)");
566  for (chain = this->m_head->first; chain; chain = chain->next_use)
567    {
568      if (!is_fmul_fmac_insn (chain->insn, true))
569	continue;
570
571      if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
572	continue;
573
574      fprintf (dump_file, " %d", INSN_UID (chain->insn));
575    }
576
577  fprintf (dump_file, " is(are) accumulator dependency of instructions");
578  for (fma_child = this->get_children ()->begin ();
579       fma_child != this->get_children ()->end (); fma_child++)
580    fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
581  fprintf (dump_file, "\n");
582}
583
584/* Wrapper around fma_node::dump_info for use as parameter of function pointer
585   type in func_fma_steering::dfs.  */
586
587static void
588dump_tree_node_info (fma_forest *forest, fma_node *node)
589{
590  node->dump_info (forest);
591}
592
593/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
594   represented by FMA_NODE to a register that respect the target parity for
595   FOREST or with same parity of the instruction represented by its parent node
596   if it has one.  */
597
598void
599fma_node::rename (fma_forest *forest)
600{
601  int cur_parity, target_parity;
602
603  /* This is alternate root of a chain and thus has no children.  It will be
604     renamed when processing the canonical root for that chain.  */
605  if (!this->m_head)
606    return;
607
608  target_parity = forest->get_target_parity ();
609  if (this->m_parent)
610    target_parity = this->m_parent->get_parity ();
611  cur_parity = this->get_parity ();
612
613  /* Rename if parity differs.  */
614  if (cur_parity != target_parity)
615    {
616      rtx_insn *insn = this->m_insn;
617      HARD_REG_SET unavailable;
618      machine_mode mode;
619      int reg;
620
621      if (dump_file)
622	{
623	  unsigned cur_dest_reg = this->m_head->regno;
624
625	  fprintf (dump_file, "FMA or FMUL at insn %d but destination "
626		   "register (%s) has different parity from expected to "
627		   "maximize FPU pipeline utilization\n", INSN_UID (insn),
628		   reg_names[cur_dest_reg]);
629	}
630
631      /* Don't clobber traceback for noreturn functions.  */
632      CLEAR_HARD_REG_SET (unavailable);
633      if (frame_pointer_needed)
634	{
635	  add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
636	  add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
637	}
638
639      /* Exclude registers with wrong parity.  */
640      mode = GET_MODE (SET_DEST (PATTERN (insn)));
641      for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
642	add_to_hard_reg_set (&unavailable, mode, reg);
643
644      if (!rename_single_chain (this->m_head, &unavailable))
645	{
646	  if (dump_file)
647	    fprintf (dump_file, "Destination register of insn %d could not be "
648		     "renamed. Dependent FMA insns will use this parity from "
649		     "there on.\n", INSN_UID (insn));
650	}
651      else
652	cur_parity = target_parity;
653    }
654
655  forest->get_globals ()->update_balance (cur_parity);
656}
657
658/* Wrapper around fma_node::dump_info for use as parameter of function pointer
659   type in func_fma_steering::dfs.  */
660
661static void
662rename_fma_node (fma_forest *forest, fma_node *node)
663{
664  node->rename (forest);
665}
666
667fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
668			      int id) : fma_node (this, chain)
669{
670  this->m_forest = new fma_forest (globals, this, id);
671  this->m_forest->fma_node_created (this);
672}
673
674fma_forest *
675fma_root_node::get_forest ()
676{
677  return this->m_forest;
678}
679
680void
681fma_root_node::set_forest (fma_forest *ref_forest)
682{
683  this->m_forest = ref_forest;
684}
685
686/* Dump information about the roots of forest FOREST.  */
687
688void
689fma_root_node::dump_info (fma_forest *forest)
690{
691  gcc_assert (dump_file);
692
693  if (this == forest->get_roots ()->front ())
694    fprintf (dump_file, "Instruction(s) at root of forest #%d:",
695	     forest->get_id ());
696  fprintf (dump_file, " %d", INSN_UID (this->m_insn));
697  if (this == forest->get_roots ()->back ())
698    fprintf (dump_file, "\n");
699}
700
701/* Wrapper around fma_root_node::dump_info for use as parameter of function
702   pointer type in func_fma_steering::dfs.  */
703
704static void
705dump_tree_root_info (fma_forest *forest, fma_root_node *node)
706{
707  node->dump_info (forest);
708}
709
710func_fma_steering::func_fma_steering () : m_fpu_balance (0)
711{
712  this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
713  this->m_fma_forests.clear ();
714  this->m_next_forest_id = 0;
715}
716
717func_fma_steering::~func_fma_steering ()
718{
719  delete this->m_insn_fma_head_map;
720}
721
722int
723func_fma_steering::get_fpu_balance ()
724{
725  return this->m_fpu_balance;
726}
727
728void
729func_fma_steering::remove_forest (fma_forest *forest)
730{
731  this->m_fma_forests.remove (forest);
732}
733
734/* Memorize the mapping of this instruction to its fma_node object and return
735   whether such a mapping existed.  */
736
737bool
738func_fma_steering::put_node (fma_node *node)
739{
740  return this->m_insn_fma_head_map->put (node->get_insn (), node);
741}
742
743/* Update the current balance considering a node with the given PARITY.  */
744
745void
746func_fma_steering::update_balance (int parity)
747{
748  this->m_fpu_balance = parity ? this->m_fpu_balance + 1
749			       : this->m_fpu_balance - 1;
750}
751
752/* Return whether an fma_node object exists for instruction INSN and, if not,
753   allocate one in *RET.  */
754
755fma_node *
756func_fma_steering::get_fma_node (rtx_insn *insn)
757{
758  fma_node **fma_slot;
759
760  fma_slot = this->m_insn_fma_head_map->get (insn);
761  if (fma_slot)
762    return *fma_slot;
763  return NULL;
764}
765
766/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
767   instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
768   part of FOREST.  For the children, the associated head is left untouched
769   (and thus null) as this function will be called again when considering the
770   chain where they are def.  For the parent, the chain is given in HEAD.  */
771
772void
773func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
774					  du_chain *chain, du_head_p head)
775{
776  fma_forest *forest;
777  fma_node *node = this->get_fma_node (chain->insn);
778
779  /* This is a root node.  */
780  if (!node)
781    {
782      fma_root_node *root_node;
783
784      root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
785      forest = root_node->get_forest ();
786      node = root_node;
787
788      /* Until proved otherwise, assume this root is not part of an existing
789	 forest and thus add its forest to the list of forests.  */
790      this->m_fma_forests.push_back (forest);
791    }
792  else
793    forest = node->get_forest ();
794
795  node->set_head (head);
796
797  /* fma_node is part of a chain with several defs, one of them having already
798     been processed.  The root of that already processed def is the canonical
799     one and the root of fma_node is added to its forest.  No need to process
800     the children nodes as they were already processed when the other def was
801     processed.  */
802  if (ref_forest)
803    {
804      ref_forest->merge_forest (forest);
805      return;
806    }
807
808  for (chain = head->first; chain; chain = chain->next_use)
809    {
810      fma_node *child_fma;
811      rtx fma_rtx, *accum_rtx_p;
812
813      if (!is_fmul_fmac_insn (chain->insn, false))
814	continue;
815
816      /* Get FMA rtx.  */
817      fma_rtx = SET_SRC (PATTERN (chain->insn));
818      /* FMA is negated.  */
819      if (GET_CODE (fma_rtx) == NEG)
820	fma_rtx = XEXP (fma_rtx, 0);
821      /* Get accumulator rtx.  */
822      accum_rtx_p = &XEXP (fma_rtx, 2);
823      /* Accumulator is negated.  */
824      if (!REG_P (*accum_rtx_p))
825	accum_rtx_p = &XEXP (*accum_rtx_p, 0);
826
827      /* This du_chain structure is not for the accumulator register.  */
828      if (accum_rtx_p != chain->loc)
829	continue;
830
831      /* If object already created, this is a loop carried dependency.  We
832	 don't include this object in the children as we want trees for
833	 rename_fma_trees to not be an infinite loop.  */
834      if (this->get_fma_node (chain->insn))
835	continue;
836
837      child_fma = new fma_node (node, chain);
838
839      /* Memorize the mapping of this instruction to its fma_node object
840	 as it will be processed for the chain starting at its destination
841	 register later.  */
842
843      /* Link to siblings.  */
844      node->add_child (child_fma);
845    }
846}
847
848/* Perform a depth-first search of the forests of fma_node in
849   THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
850   THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
851   PROCESS_NODE () on each node.  If FREE is true, free all std::list in the
852   same dfs.  */
853
854void
855func_fma_steering::dfs (void (*process_forest) (fma_forest *),
856			void (*process_root) (fma_forest *, fma_root_node *),
857			void (*process_node) (fma_forest *, fma_node *),
858			bool free)
859{
860  auto_vec<fma_node *> to_process;
861  auto_vec<fma_node *> to_free;
862  std::list<fma_forest *>::iterator forest_iter;
863
864  /* For each forest.  */
865  for (forest_iter = this->m_fma_forests.begin ();
866       forest_iter != this->m_fma_forests.end (); ++forest_iter)
867    {
868      std::list<fma_root_node *>::iterator root_iter;
869
870      if (process_forest)
871	process_forest (*forest_iter);
872
873      /* For each tree root in this forest.  */
874      for (root_iter = (*forest_iter)->get_roots ()->begin ();
875	   root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
876	{
877	  if (process_root)
878	    process_root (*forest_iter, *root_iter);
879	  to_process.safe_push (*root_iter);
880	}
881
882      /* For each tree node in this forest.  */
883      while (!to_process.is_empty ())
884	{
885	  fma_node *node;
886	  std::list<fma_node *>::iterator child_iter;
887
888	  node = to_process.pop ();
889
890	  if (process_node)
891	    process_node (*forest_iter, node);
892
893	  for (child_iter = node->get_children ()->begin ();
894	       child_iter != node->get_children ()->end (); ++child_iter)
895	    to_process.safe_push (*child_iter);
896
897	  /* Defer freeing so that the process_node callback can access the
898	     parent and children of the node being processed.  */
899	  if (free)
900	    to_free.safe_push (node);
901	}
902
903      if (free)
904	{
905	  delete *forest_iter;
906
907	  while (!to_free.is_empty ())
908	    {
909	      fma_node *node = to_free.pop ();
910	      if (node->root_p ())
911		delete static_cast<fma_root_node *> (node);
912	      else
913		delete node;
914	    }
915	}
916    }
917}
918
919/* Build the dependency trees of FMUL and FMADD/FMSUB instructions.  */
920
921void
922func_fma_steering::analyze ()
923{
924  int i, n_blocks, *bb_dfs_preorder;
925  basic_block bb;
926  rtx_insn *insn;
927
928  bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
929  n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
930
931  /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
932     instructions.  */
933  for (i = 0; i < n_blocks; i++)
934    {
935      bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
936      FOR_BB_INSNS (bb, insn)
937	{
938	  operand_rr_info *dest_op_info;
939	  struct du_chain *chain = NULL;
940	  unsigned dest_regno;
941	  fma_forest *forest = NULL;
942	  du_head_p head = NULL;
943	  int i;
944
945	  if (!is_fmul_fmac_insn (insn, true))
946	    continue;
947
948	  /* Search the chain where this instruction is (one of) the root.  */
949	  dest_op_info = insn_rr[INSN_UID (insn)].op_info;
950	  dest_regno = REGNO (SET_DEST (PATTERN (insn)));
951	  for (i = 0; i < dest_op_info->n_chains; i++)
952	    {
953	      /* The register tracked by this chain does not match the
954		 destination register of insn.  */
955	      if (dest_op_info->heads[i]->regno != dest_regno)
956		continue;
957
958	      head = dest_op_info->heads[i];
959	      /* The chain was merged in another, find the new head.  */
960	      if (!head->first)
961		head = regrename_chain_from_id (head->id);
962
963	      /* Search the chain element for this instruction and, if another
964		 FMUL or FMADD/FMSUB instruction was already processed, note
965		 the forest of its tree.  */
966	      forest = NULL;
967	      for (chain = head->first; chain; chain = chain->next_use)
968		{
969		  fma_node **fma_slot;
970
971		  if (!is_fmul_fmac_insn (chain->insn, true))
972		    continue;
973
974		  /* This is a use, continue.  */
975		  if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
976		    continue;
977
978		  if (chain->insn == insn)
979		    break;
980
981		  fma_slot = this->m_insn_fma_head_map->get (chain->insn);
982		  if (fma_slot && (*fma_slot)->get_children ())
983		    forest = (*fma_slot)->get_forest ();
984		}
985	      if (chain)
986		break;
987	    }
988
989	  /* Due to implementation of regrename, dest register can slip away
990	     from regrename's analysis.  As a result, there is no chain for
991	     the destination register of insn.  We simply skip the insn even
992	     it is a fmul/fmac instruction.  This can happen when the dest
993	     register is also a source register of insn and one of the below
994	     conditions is satisfied:
995	       1) the source reg is setup in larger mode than this insn;
996	       2) the source reg is uninitialized;
997	       3) the source reg is passed in as parameter.  */
998	  if (i < dest_op_info->n_chains)
999	    this->analyze_fma_fmul_insn (forest, chain, head);
1000	}
1001    }
1002  free (bb_dfs_preorder);
1003
1004  if (dump_file)
1005    this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
1006	       false);
1007}
1008
1009/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
1010   the objective of keeping FPU pipeline balanced in term of instructions and
1011   having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
1012   scheduled on the same pipeline.  */
1013
1014void
1015func_fma_steering::rename_fma_trees ()
1016{
1017  this->dfs (dispatch_forest, NULL, rename_fma_node, true);
1018
1019  if (dump_file && !this->m_fma_forests.empty ())
1020    {
1021      fprintf (dump_file, "Function %s has ", current_function_name ());
1022      if (this->m_fpu_balance == 0)
1023	fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1024		 "two FPU pipelines\n");
1025      else if (this->m_fpu_balance > 0)
1026	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1027		 "FPU pipeline\n", this->m_fpu_balance);
1028      else /* this->m_fpu_balance < 0 */
1029	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1030		 "FPU pipeline\n", - this->m_fpu_balance);
1031    }
1032}
1033
1034/* Execute FMA steering pass.  */
1035
1036void
1037func_fma_steering::execute_fma_steering ()
1038{
1039  df_set_flags (DF_LR_RUN_DCE);
1040  df_note_add_problem ();
1041  df_analyze ();
1042  df_set_flags (DF_DEFER_INSN_RESCAN);
1043
1044  regrename_init (true);
1045  regrename_analyze (NULL);
1046  this->analyze ();
1047  this->rename_fma_trees ();
1048  regrename_finish ();
1049}
1050
1051const pass_data pass_data_fma_steering =
1052{
1053  RTL_PASS, /* type */
1054  "fma_steering", /* name */
1055  OPTGROUP_NONE, /* optinfo_flags */
1056  TV_NONE, /* tv_id */
1057  0, /* properties_required */
1058  0, /* properties_provided */
1059  0, /* properties_destroyed */
1060  0, /* todo_flags_start */
1061  TODO_df_finish, /* todo_flags_finish */
1062};
1063
1064class pass_fma_steering : public rtl_opt_pass
1065{
1066public:
1067  pass_fma_steering (gcc::context *ctxt)
1068    : rtl_opt_pass (pass_data_fma_steering, ctxt)
1069  {}
1070
1071  /* opt_pass methods: */
1072  virtual bool gate (function *)
1073    {
1074      return (aarch64_tune_params.extra_tuning_flags
1075	      & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1076	      && optimize >= 2;
1077    }
1078
1079  virtual unsigned int execute (function *)
1080    {
1081      func_fma_steering *fma_steering = new func_fma_steering;
1082      fma_steering->execute_fma_steering ();
1083      delete fma_steering;
1084      return 0;
1085    }
1086
1087}; // class pass_fma_steering
1088
1089/* Create a new fma steering pass instance.  */
1090
1091rtl_opt_pass *
1092make_pass_fma_steering (gcc::context *ctxt)
1093{
1094  return new pass_fma_steering (ctxt);
1095}
1096