tree-ssa-math-opts.c revision 1.14
1/* Global, SSA-based optimizations using mathematical identities.
2   Copyright (C) 2005-2020 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* Currently, the only mini-pass in this file tries to CSE reciprocal
21   operations.  These are common in sequences such as this one:
22
23	modulus = sqrt(x*x + y*y + z*z);
24	x = x / modulus;
25	y = y / modulus;
26	z = z / modulus;
27
28   that can be optimized to
29
30	modulus = sqrt(x*x + y*y + z*z);
31        rmodulus = 1.0 / modulus;
32	x = x * rmodulus;
33	y = y * rmodulus;
34	z = z * rmodulus;
35
36   We do this for loop invariant divisors, and with this pass whenever
37   we notice that a division has the same divisor multiple times.
38
39   Of course, like in PRE, we don't insert a division if a dominator
40   already has one.  However, this cannot be done as an extension of
41   PRE for several reasons.
42
43   First of all, with some experiments it was found out that the
44   transformation is not always useful if there are only two divisions
45   by the same divisor.  This is probably because modern processors
46   can pipeline the divisions; on older, in-order processors it should
47   still be effective to optimize two divisions by the same number.
48   We make this a param, and it shall be called N in the remainder of
49   this comment.
50
51   Second, if trapping math is active, we have less freedom on where
52   to insert divisions: we can only do so in basic blocks that already
53   contain one.  (If divisions don't trap, instead, we can insert
54   divisions elsewhere, which will be in blocks that are common dominators
55   of those that have the division).
56
57   We really don't want to compute the reciprocal unless a division will
58   be found.  To do this, we won't insert the division in a basic block
59   that has less than N divisions *post-dominating* it.
60
61   The algorithm constructs a subset of the dominator tree, holding the
62   blocks containing the divisions and the common dominators to them,
63   and walk it twice.  The first walk is in post-order, and it annotates
64   each block with the number of divisions that post-dominate it: this
65   gives information on where divisions can be inserted profitably.
66   The second walk is in pre-order, and it inserts divisions as explained
67   above, and replaces divisions by multiplications.
68
69   In the best case, the cost of the pass is O(n_statements).  In the
70   worst-case, the cost is due to creating the dominator tree subset,
71   with a cost of O(n_basic_blocks ^ 2); however this can only happen
72   for n_statements / n_basic_blocks statements.  So, the amortized cost
73   of creating the dominator tree subset is O(n_basic_blocks) and the
74   worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76   More practically, the cost will be small because there are few
77   divisions, and they tend to be in the same basic block, so insert_bb
78   is called very few times.
79
80   If we did this using domwalk.c, an efficient implementation would have
81   to work on all the variables in a single pass, because we could not
82   work on just a subset of the dominator tree, as we do now, and the
83   cost would also be something like O(n_statements * n_basic_blocks).
84   The data structures would be more complex in order to work on all the
85   variables in a single pass.  */
86
87#include "config.h"
88#include "system.h"
89#include "coretypes.h"
90#include "backend.h"
91#include "target.h"
92#include "rtl.h"
93#include "tree.h"
94#include "gimple.h"
95#include "predict.h"
96#include "alloc-pool.h"
97#include "tree-pass.h"
98#include "ssa.h"
99#include "optabs-tree.h"
100#include "gimple-pretty-print.h"
101#include "alias.h"
102#include "fold-const.h"
103#include "gimple-fold.h"
104#include "gimple-iterator.h"
105#include "gimplify.h"
106#include "gimplify-me.h"
107#include "stor-layout.h"
108#include "tree-cfg.h"
109#include "tree-dfa.h"
110#include "tree-ssa.h"
111#include "builtins.h"
112#include "internal-fn.h"
113#include "case-cfn-macros.h"
114#include "optabs-libfuncs.h"
115#include "tree-eh.h"
116#include "targhooks.h"
117#include "domwalk.h"
118
119/* This structure represents one basic block that either computes a
120   division, or is a common dominator for basic block that compute a
121   division.  */
122struct occurrence {
123  /* The basic block represented by this structure.  */
124  basic_block bb;
125
126  /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
127     inserted in BB.  */
128  tree recip_def;
129
130  /* If non-NULL, the SSA_NAME holding the definition for a squared
131     reciprocal inserted in BB.  */
132  tree square_recip_def;
133
134  /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
135     was inserted in BB.  */
136  gimple *recip_def_stmt;
137
138  /* Pointer to a list of "struct occurrence"s for blocks dominated
139     by BB.  */
140  struct occurrence *children;
141
142  /* Pointer to the next "struct occurrence"s in the list of blocks
143     sharing a common dominator.  */
144  struct occurrence *next;
145
146  /* The number of divisions that are in BB before compute_merit.  The
147     number of divisions that are in BB or post-dominate it after
148     compute_merit.  */
149  int num_divisions;
150
151  /* True if the basic block has a division, false if it is a common
152     dominator for basic blocks that do.  If it is false and trapping
153     math is active, BB is not a candidate for inserting a reciprocal.  */
154  bool bb_has_division;
155};
156
157static struct
158{
159  /* Number of 1.0/X ops inserted.  */
160  int rdivs_inserted;
161
162  /* Number of 1.0/FUNC ops inserted.  */
163  int rfuncs_inserted;
164} reciprocal_stats;
165
166static struct
167{
168  /* Number of cexpi calls inserted.  */
169  int inserted;
170} sincos_stats;
171
172static struct
173{
174  /* Number of widening multiplication ops inserted.  */
175  int widen_mults_inserted;
176
177  /* Number of integer multiply-and-accumulate ops inserted.  */
178  int maccs_inserted;
179
180  /* Number of fp fused multiply-add ops inserted.  */
181  int fmas_inserted;
182
183  /* Number of divmod calls inserted.  */
184  int divmod_calls_inserted;
185} widen_mul_stats;
186
187/* The instance of "struct occurrence" representing the highest
188   interesting block in the dominator tree.  */
189static struct occurrence *occ_head;
190
191/* Allocation pool for getting instances of "struct occurrence".  */
192static object_allocator<occurrence> *occ_pool;
193
194
195
196/* Allocate and return a new struct occurrence for basic block BB, and
197   whose children list is headed by CHILDREN.  */
198static struct occurrence *
199occ_new (basic_block bb, struct occurrence *children)
200{
201  struct occurrence *occ;
202
203  bb->aux = occ = occ_pool->allocate ();
204  memset (occ, 0, sizeof (struct occurrence));
205
206  occ->bb = bb;
207  occ->children = children;
208  return occ;
209}
210
211
212/* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
213   list of "struct occurrence"s, one per basic block, having IDOM as
214   their common dominator.
215
216   We try to insert NEW_OCC as deep as possible in the tree, and we also
217   insert any other block that is a common dominator for BB and one
218   block already in the tree.  */
219
220static void
221insert_bb (struct occurrence *new_occ, basic_block idom,
222	   struct occurrence **p_head)
223{
224  struct occurrence *occ, **p_occ;
225
226  for (p_occ = p_head; (occ = *p_occ) != NULL; )
227    {
228      basic_block bb = new_occ->bb, occ_bb = occ->bb;
229      basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
230      if (dom == bb)
231	{
232	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
233	     from its list.  */
234	  *p_occ = occ->next;
235	  occ->next = new_occ->children;
236	  new_occ->children = occ;
237
238	  /* Try the next block (it may as well be dominated by BB).  */
239	}
240
241      else if (dom == occ_bb)
242	{
243	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
244	  insert_bb (new_occ, dom, &occ->children);
245	  return;
246	}
247
248      else if (dom != idom)
249	{
250	  gcc_assert (!dom->aux);
251
252	  /* There is a dominator between IDOM and BB, add it and make
253	     two children out of NEW_OCC and OCC.  First, remove OCC from
254	     its list.	*/
255	  *p_occ = occ->next;
256	  new_occ->next = occ;
257	  occ->next = NULL;
258
259	  /* None of the previous blocks has DOM as a dominator: if we tail
260	     recursed, we would reexamine them uselessly. Just switch BB with
261	     DOM, and go on looking for blocks dominated by DOM.  */
262          new_occ = occ_new (dom, new_occ);
263	}
264
265      else
266	{
267	  /* Nothing special, go on with the next element.  */
268	  p_occ = &occ->next;
269	}
270    }
271
272  /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
273  new_occ->next = *p_head;
274  *p_head = new_occ;
275}
276
277/* Register that we found a division in BB.
278   IMPORTANCE is a measure of how much weighting to give
279   that division.  Use IMPORTANCE = 2 to register a single
280   division.  If the division is going to be found multiple
281   times use 1 (as it is with squares).  */
282
283static inline void
284register_division_in (basic_block bb, int importance)
285{
286  struct occurrence *occ;
287
288  occ = (struct occurrence *) bb->aux;
289  if (!occ)
290    {
291      occ = occ_new (bb, NULL);
292      insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
293    }
294
295  occ->bb_has_division = true;
296  occ->num_divisions += importance;
297}
298
299
300/* Compute the number of divisions that postdominate each block in OCC and
301   its children.  */
302
303static void
304compute_merit (struct occurrence *occ)
305{
306  struct occurrence *occ_child;
307  basic_block dom = occ->bb;
308
309  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
310    {
311      basic_block bb;
312      if (occ_child->children)
313        compute_merit (occ_child);
314
315      if (flag_exceptions)
316	bb = single_noncomplex_succ (dom);
317      else
318	bb = dom;
319
320      if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
321        occ->num_divisions += occ_child->num_divisions;
322    }
323}
324
325
326/* Return whether USE_STMT is a floating-point division by DEF.  */
327static inline bool
328is_division_by (gimple *use_stmt, tree def)
329{
330  return is_gimple_assign (use_stmt)
331	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
332	 && gimple_assign_rhs2 (use_stmt) == def
333	 /* Do not recognize x / x as valid division, as we are getting
334	    confused later by replacing all immediate uses x in such
335	    a stmt.  */
336	 && gimple_assign_rhs1 (use_stmt) != def
337	 && !stmt_can_throw_internal (cfun, use_stmt);
338}
339
340/* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
341static inline bool
342is_mult_by (gimple *use_stmt, tree def, tree a)
343{
344  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
345      && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
346    {
347      tree op0 = gimple_assign_rhs1 (use_stmt);
348      tree op1 = gimple_assign_rhs2 (use_stmt);
349
350      return (op0 == def && op1 == a)
351	      || (op0 == a && op1 == def);
352    }
353  return 0;
354}
355
356/* Return whether USE_STMT is DEF * DEF.  */
357static inline bool
358is_square_of (gimple *use_stmt, tree def)
359{
360  return is_mult_by (use_stmt, def, def);
361}
362
363/* Return whether USE_STMT is a floating-point division by
364   DEF * DEF.  */
365static inline bool
366is_division_by_square (gimple *use_stmt, tree def)
367{
368  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
369      && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
370      && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
371      && !stmt_can_throw_internal (cfun, use_stmt))
372    {
373      tree denominator = gimple_assign_rhs2 (use_stmt);
374      if (TREE_CODE (denominator) == SSA_NAME)
375	return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
376    }
377  return 0;
378}
379
380/* Walk the subset of the dominator tree rooted at OCC, setting the
381   RECIP_DEF field to a definition of 1.0 / DEF that can be used in
382   the given basic block.  The field may be left NULL, of course,
383   if it is not possible or profitable to do the optimization.
384
385   DEF_BSI is an iterator pointing at the statement defining DEF.
386   If RECIP_DEF is set, a dominator already has a computation that can
387   be used.
388
389   If should_insert_square_recip is set, then this also inserts
390   the square of the reciprocal immediately after the definition
391   of the reciprocal.  */
392
393static void
394insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
395		    tree def, tree recip_def, tree square_recip_def,
396		    int should_insert_square_recip, int threshold)
397{
398  tree type;
399  gassign *new_stmt, *new_square_stmt;
400  gimple_stmt_iterator gsi;
401  struct occurrence *occ_child;
402
403  if (!recip_def
404      && (occ->bb_has_division || !flag_trapping_math)
405      /* Divide by two as all divisions are counted twice in
406	 the costing loop.  */
407      && occ->num_divisions / 2 >= threshold)
408    {
409      /* Make a variable with the replacement and substitute it.  */
410      type = TREE_TYPE (def);
411      recip_def = create_tmp_reg (type, "reciptmp");
412      new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
413				      build_one_cst (type), def);
414
415      if (should_insert_square_recip)
416	{
417	  square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
418	  new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
419						 recip_def, recip_def);
420	}
421
422      if (occ->bb_has_division)
423	{
424	  /* Case 1: insert before an existing division.  */
425	  gsi = gsi_after_labels (occ->bb);
426	  while (!gsi_end_p (gsi)
427		 && (!is_division_by (gsi_stmt (gsi), def))
428		 && (!is_division_by_square (gsi_stmt (gsi), def)))
429	    gsi_next (&gsi);
430
431	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
432	  if (should_insert_square_recip)
433	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
434	}
435      else if (def_gsi && occ->bb == def_gsi->bb)
436	{
437	  /* Case 2: insert right after the definition.  Note that this will
438	     never happen if the definition statement can throw, because in
439	     that case the sole successor of the statement's basic block will
440	     dominate all the uses as well.  */
441	  gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
442	  if (should_insert_square_recip)
443	    gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
444	}
445      else
446	{
447	  /* Case 3: insert in a basic block not containing defs/uses.  */
448	  gsi = gsi_after_labels (occ->bb);
449	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
450	  if (should_insert_square_recip)
451	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
452	}
453
454      reciprocal_stats.rdivs_inserted++;
455
456      occ->recip_def_stmt = new_stmt;
457    }
458
459  occ->recip_def = recip_def;
460  occ->square_recip_def = square_recip_def;
461  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
462    insert_reciprocals (def_gsi, occ_child, def, recip_def,
463			square_recip_def, should_insert_square_recip,
464			threshold);
465}
466
467/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
468   Take as argument the use for (x * x).  */
469static inline void
470replace_reciprocal_squares (use_operand_p use_p)
471{
472  gimple *use_stmt = USE_STMT (use_p);
473  basic_block bb = gimple_bb (use_stmt);
474  struct occurrence *occ = (struct occurrence *) bb->aux;
475
476  if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
477      && occ->recip_def)
478    {
479      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
480      gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
481      gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
482      SET_USE (use_p, occ->square_recip_def);
483      fold_stmt_inplace (&gsi);
484      update_stmt (use_stmt);
485    }
486}
487
488
489/* Replace the division at USE_P with a multiplication by the reciprocal, if
490   possible.  */
491
492static inline void
493replace_reciprocal (use_operand_p use_p)
494{
495  gimple *use_stmt = USE_STMT (use_p);
496  basic_block bb = gimple_bb (use_stmt);
497  struct occurrence *occ = (struct occurrence *) bb->aux;
498
499  if (optimize_bb_for_speed_p (bb)
500      && occ->recip_def && use_stmt != occ->recip_def_stmt)
501    {
502      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
503      gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
504      SET_USE (use_p, occ->recip_def);
505      fold_stmt_inplace (&gsi);
506      update_stmt (use_stmt);
507    }
508}
509
510
511/* Free OCC and return one more "struct occurrence" to be freed.  */
512
513static struct occurrence *
514free_bb (struct occurrence *occ)
515{
516  struct occurrence *child, *next;
517
518  /* First get the two pointers hanging off OCC.  */
519  next = occ->next;
520  child = occ->children;
521  occ->bb->aux = NULL;
522  occ_pool->remove (occ);
523
524  /* Now ensure that we don't recurse unless it is necessary.  */
525  if (!child)
526    return next;
527  else
528    {
529      while (next)
530	next = free_bb (next);
531
532      return child;
533    }
534}
535
536/* Transform sequences like
537   t = sqrt (a)
538   x = 1.0 / t;
539   r1 = x * x;
540   r2 = a * x;
541   into:
542   t = sqrt (a)
543   r1 = 1.0 / a;
544   r2 = t;
545   x = r1 * r2;
546   depending on the uses of x, r1, r2.  This removes one multiplication and
547   allows the sqrt and division operations to execute in parallel.
548   DEF_GSI is the gsi of the initial division by sqrt that defines
549   DEF (x in the example above).  */
550
551static void
552optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
553{
554  gimple *use_stmt;
555  imm_use_iterator use_iter;
556  gimple *stmt = gsi_stmt (*def_gsi);
557  tree x = def;
558  tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
559  tree div_rhs1 = gimple_assign_rhs1 (stmt);
560
561  if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
562      || TREE_CODE (div_rhs1) != REAL_CST
563      || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
564    return;
565
566  gcall *sqrt_stmt
567    = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
568
569  if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
570    return;
571
572  switch (gimple_call_combined_fn (sqrt_stmt))
573    {
574    CASE_CFN_SQRT:
575    CASE_CFN_SQRT_FN:
576      break;
577
578    default:
579      return;
580    }
581  tree a = gimple_call_arg (sqrt_stmt, 0);
582
583  /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
584
585  /* Statements that use x in x * x.  */
586  auto_vec<gimple *> sqr_stmts;
587  /* Statements that use x in a * x.  */
588  auto_vec<gimple *> mult_stmts;
589  bool has_other_use = false;
590  bool mult_on_main_path = false;
591
592  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
593    {
594      if (is_gimple_debug (use_stmt))
595	continue;
596      if (is_square_of (use_stmt, x))
597	{
598	  sqr_stmts.safe_push (use_stmt);
599	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
600	    mult_on_main_path = true;
601	}
602      else if (is_mult_by (use_stmt, x, a))
603	{
604	  mult_stmts.safe_push (use_stmt);
605	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
606	    mult_on_main_path = true;
607	}
608      else
609	has_other_use = true;
610    }
611
612  /* In the x * x and a * x cases we just rewire stmt operands or
613     remove multiplications.  In the has_other_use case we introduce
614     a multiplication so make sure we don't introduce a multiplication
615     on a path where there was none.  */
616  if (has_other_use && !mult_on_main_path)
617    return;
618
619  if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
620    return;
621
622  /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
623     to be able to compose it from the sqr and mult cases.  */
624  if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
625    return;
626
627  if (dump_file)
628    {
629      fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
630      print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
631      print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
632      fprintf (dump_file, "\n");
633    }
634
635  bool delete_div = !has_other_use;
636  tree sqr_ssa_name = NULL_TREE;
637  if (!sqr_stmts.is_empty ())
638    {
639      /* r1 = x * x.  Transform the original
640	 x = 1.0 / t
641	 into
642	 tmp1 = 1.0 / a
643	 r1 = tmp1.  */
644
645      sqr_ssa_name
646	= make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
647
648      if (dump_file)
649	{
650	  fprintf (dump_file, "Replacing original division\n");
651	  print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
652	  fprintf (dump_file, "with new division\n");
653	}
654      stmt
655	= gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
656			       gimple_assign_rhs1 (stmt), a);
657      gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
658      gsi_remove (def_gsi, true);
659      *def_gsi = gsi_for_stmt (stmt);
660      fold_stmt_inplace (def_gsi);
661      update_stmt (stmt);
662
663      if (dump_file)
664	print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
665
666      delete_div = false;
667      gimple *sqr_stmt;
668      unsigned int i;
669      FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
670	{
671	  gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
672	  gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
673	  update_stmt (sqr_stmt);
674	}
675    }
676  if (!mult_stmts.is_empty ())
677    {
678      /* r2 = a * x.  Transform this into:
679	 r2 = t (The original sqrt (a)).  */
680      unsigned int i;
681      gimple *mult_stmt = NULL;
682      FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
683	{
684	  gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
685
686	  if (dump_file)
687	    {
688	      fprintf (dump_file, "Replacing squaring multiplication\n");
689	      print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
690	      fprintf (dump_file, "with assignment\n");
691	    }
692	  gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
693	  fold_stmt_inplace (&gsi2);
694	  update_stmt (mult_stmt);
695	  if (dump_file)
696	    print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
697      }
698    }
699
700  if (has_other_use)
701    {
702      /* Using the two temporaries tmp1, tmp2 from above
703	 the original x is now:
704	 x = tmp1 * tmp2.  */
705      gcc_assert (orig_sqrt_ssa_name);
706      gcc_assert (sqr_ssa_name);
707
708      gimple *new_stmt
709	= gimple_build_assign (x, MULT_EXPR,
710			       orig_sqrt_ssa_name, sqr_ssa_name);
711      gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
712      update_stmt (stmt);
713    }
714  else if (delete_div)
715    {
716      /* Remove the original division.  */
717      gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
718      gsi_remove (&gsi2, true);
719      release_defs (stmt);
720    }
721  else
722    release_ssa_name (x);
723}
724
725/* Look for floating-point divisions among DEF's uses, and try to
726   replace them by multiplications with the reciprocal.  Add
727   as many statements computing the reciprocal as needed.
728
729   DEF must be a GIMPLE register of a floating-point type.  */
730
731static void
732execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
733{
734  use_operand_p use_p, square_use_p;
735  imm_use_iterator use_iter, square_use_iter;
736  tree square_def;
737  struct occurrence *occ;
738  int count = 0;
739  int threshold;
740  int square_recip_count = 0;
741  int sqrt_recip_count = 0;
742
743  gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
744  threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
745
746  /* If DEF is a square (x * x), count the number of divisions by x.
747     If there are more divisions by x than by (DEF * DEF), prefer to optimize
748     the reciprocal of x instead of DEF.  This improves cases like:
749       def = x * x
750       t0 = a / def
751       t1 = b / def
752       t2 = c / x
753     Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
754  gimple *def_stmt = SSA_NAME_DEF_STMT (def);
755
756  if (is_gimple_assign (def_stmt)
757      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
758      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
759      && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
760    {
761      tree op0 = gimple_assign_rhs1 (def_stmt);
762
763      FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
764	{
765	  gimple *use_stmt = USE_STMT (use_p);
766	  if (is_division_by (use_stmt, op0))
767	    sqrt_recip_count++;
768	}
769    }
770
771  FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
772    {
773      gimple *use_stmt = USE_STMT (use_p);
774      if (is_division_by (use_stmt, def))
775	{
776	  register_division_in (gimple_bb (use_stmt), 2);
777	  count++;
778	}
779
780      if (is_square_of (use_stmt, def))
781	{
782	  square_def = gimple_assign_lhs (use_stmt);
783	  FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
784	    {
785	      gimple *square_use_stmt = USE_STMT (square_use_p);
786	      if (is_division_by (square_use_stmt, square_def))
787		{
788		  /* This is executed twice for each division by a square.  */
789		  register_division_in (gimple_bb (square_use_stmt), 1);
790		  square_recip_count++;
791		}
792	    }
793	}
794    }
795
796  /* Square reciprocals were counted twice above.  */
797  square_recip_count /= 2;
798
799  /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
800  if (sqrt_recip_count > square_recip_count)
801    goto out;
802
803  /* Do the expensive part only if we can hope to optimize something.  */
804  if (count + square_recip_count >= threshold && count >= 1)
805    {
806      gimple *use_stmt;
807      for (occ = occ_head; occ; occ = occ->next)
808	{
809	  compute_merit (occ);
810	  insert_reciprocals (def_gsi, occ, def, NULL, NULL,
811			      square_recip_count, threshold);
812	}
813
814      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
815	{
816	  if (is_division_by (use_stmt, def))
817	    {
818	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
819		replace_reciprocal (use_p);
820	    }
821	  else if (square_recip_count > 0 && is_square_of (use_stmt, def))
822	    {
823	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
824		{
825		  /* Find all uses of the square that are divisions and
826		   * replace them by multiplications with the inverse.  */
827		  imm_use_iterator square_iterator;
828		  gimple *powmult_use_stmt = USE_STMT (use_p);
829		  tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
830
831		  FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
832					 square_iterator, powmult_def_name)
833		    FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
834		      {
835			gimple *powmult_use_stmt = USE_STMT (square_use_p);
836			if (is_division_by (powmult_use_stmt, powmult_def_name))
837			  replace_reciprocal_squares (square_use_p);
838		      }
839		}
840	    }
841	}
842    }
843
844out:
845  for (occ = occ_head; occ; )
846    occ = free_bb (occ);
847
848  occ_head = NULL;
849}
850
851/* Return an internal function that implements the reciprocal of CALL,
852   or IFN_LAST if there is no such function that the target supports.  */
853
854internal_fn
855internal_fn_reciprocal (gcall *call)
856{
857  internal_fn ifn;
858
859  switch (gimple_call_combined_fn (call))
860    {
861    CASE_CFN_SQRT:
862    CASE_CFN_SQRT_FN:
863      ifn = IFN_RSQRT;
864      break;
865
866    default:
867      return IFN_LAST;
868    }
869
870  tree_pair types = direct_internal_fn_types (ifn, call);
871  if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
872    return IFN_LAST;
873
874  return ifn;
875}
876
877/* Go through all the floating-point SSA_NAMEs, and call
878   execute_cse_reciprocals_1 on each of them.  */
879namespace {
880
881const pass_data pass_data_cse_reciprocals =
882{
883  GIMPLE_PASS, /* type */
884  "recip", /* name */
885  OPTGROUP_NONE, /* optinfo_flags */
886  TV_TREE_RECIP, /* tv_id */
887  PROP_ssa, /* properties_required */
888  0, /* properties_provided */
889  0, /* properties_destroyed */
890  0, /* todo_flags_start */
891  TODO_update_ssa, /* todo_flags_finish */
892};
893
894class pass_cse_reciprocals : public gimple_opt_pass
895{
896public:
897  pass_cse_reciprocals (gcc::context *ctxt)
898    : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
899  {}
900
901  /* opt_pass methods: */
902  virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
903  virtual unsigned int execute (function *);
904
905}; // class pass_cse_reciprocals
906
907unsigned int
908pass_cse_reciprocals::execute (function *fun)
909{
910  basic_block bb;
911  tree arg;
912
913  occ_pool = new object_allocator<occurrence> ("dominators for recip");
914
915  memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
916  calculate_dominance_info (CDI_DOMINATORS);
917  calculate_dominance_info (CDI_POST_DOMINATORS);
918
919  if (flag_checking)
920    FOR_EACH_BB_FN (bb, fun)
921      gcc_assert (!bb->aux);
922
923  for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
924    if (FLOAT_TYPE_P (TREE_TYPE (arg))
925	&& is_gimple_reg (arg))
926      {
927	tree name = ssa_default_def (fun, arg);
928	if (name)
929	  execute_cse_reciprocals_1 (NULL, name);
930      }
931
932  FOR_EACH_BB_FN (bb, fun)
933    {
934      tree def;
935
936      for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
937	   gsi_next (&gsi))
938	{
939	  gphi *phi = gsi.phi ();
940	  def = PHI_RESULT (phi);
941	  if (! virtual_operand_p (def)
942	      && FLOAT_TYPE_P (TREE_TYPE (def)))
943	    execute_cse_reciprocals_1 (NULL, def);
944	}
945
946      for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
947	   gsi_next (&gsi))
948        {
949	  gimple *stmt = gsi_stmt (gsi);
950
951	  if (gimple_has_lhs (stmt)
952	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
953	      && FLOAT_TYPE_P (TREE_TYPE (def))
954	      && TREE_CODE (def) == SSA_NAME)
955	    {
956	      execute_cse_reciprocals_1 (&gsi, def);
957	      stmt = gsi_stmt (gsi);
958	      if (flag_unsafe_math_optimizations
959		  && is_gimple_assign (stmt)
960		  && gimple_assign_lhs (stmt) == def
961		  && !stmt_can_throw_internal (cfun, stmt)
962		  && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
963		optimize_recip_sqrt (&gsi, def);
964	    }
965	}
966
967      if (optimize_bb_for_size_p (bb))
968        continue;
969
970      /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
971      for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
972	   gsi_next (&gsi))
973        {
974	  gimple *stmt = gsi_stmt (gsi);
975
976	  if (is_gimple_assign (stmt)
977	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
978	    {
979	      tree arg1 = gimple_assign_rhs2 (stmt);
980	      gimple *stmt1;
981
982	      if (TREE_CODE (arg1) != SSA_NAME)
983		continue;
984
985	      stmt1 = SSA_NAME_DEF_STMT (arg1);
986
987	      if (is_gimple_call (stmt1)
988		  && gimple_call_lhs (stmt1))
989		{
990		  bool fail;
991		  imm_use_iterator ui;
992		  use_operand_p use_p;
993		  tree fndecl = NULL_TREE;
994
995		  gcall *call = as_a <gcall *> (stmt1);
996		  internal_fn ifn = internal_fn_reciprocal (call);
997		  if (ifn == IFN_LAST)
998		    {
999		      fndecl = gimple_call_fndecl (call);
1000		      if (!fndecl
1001			  || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1002			continue;
1003		      fndecl = targetm.builtin_reciprocal (fndecl);
1004		      if (!fndecl)
1005			continue;
1006		    }
1007
1008		  /* Check that all uses of the SSA name are divisions,
1009		     otherwise replacing the defining statement will do
1010		     the wrong thing.  */
1011		  fail = false;
1012		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1013		    {
1014		      gimple *stmt2 = USE_STMT (use_p);
1015		      if (is_gimple_debug (stmt2))
1016			continue;
1017		      if (!is_gimple_assign (stmt2)
1018			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1019			  || gimple_assign_rhs1 (stmt2) == arg1
1020			  || gimple_assign_rhs2 (stmt2) != arg1)
1021			{
1022			  fail = true;
1023			  break;
1024			}
1025		    }
1026		  if (fail)
1027		    continue;
1028
1029		  gimple_replace_ssa_lhs (call, arg1);
1030		  if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1031		    {
1032		      auto_vec<tree, 4> args;
1033		      for (unsigned int i = 0;
1034			   i < gimple_call_num_args (call); i++)
1035			args.safe_push (gimple_call_arg (call, i));
1036		      gcall *stmt2;
1037		      if (ifn == IFN_LAST)
1038			stmt2 = gimple_build_call_vec (fndecl, args);
1039		      else
1040			stmt2 = gimple_build_call_internal_vec (ifn, args);
1041		      gimple_call_set_lhs (stmt2, arg1);
1042		      gimple_move_vops (stmt2, call);
1043		      gimple_call_set_nothrow (stmt2,
1044					       gimple_call_nothrow_p (call));
1045		      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1046		      gsi_replace (&gsi2, stmt2, true);
1047		    }
1048		  else
1049		    {
1050		      if (ifn == IFN_LAST)
1051			gimple_call_set_fndecl (call, fndecl);
1052		      else
1053			gimple_call_set_internal_fn (call, ifn);
1054		      update_stmt (call);
1055		    }
1056		  reciprocal_stats.rfuncs_inserted++;
1057
1058		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1059		    {
1060		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1061		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1062		      fold_stmt_inplace (&gsi);
1063		      update_stmt (stmt);
1064		    }
1065		}
1066	    }
1067	}
1068    }
1069
1070  statistics_counter_event (fun, "reciprocal divs inserted",
1071			    reciprocal_stats.rdivs_inserted);
1072  statistics_counter_event (fun, "reciprocal functions inserted",
1073			    reciprocal_stats.rfuncs_inserted);
1074
1075  free_dominance_info (CDI_DOMINATORS);
1076  free_dominance_info (CDI_POST_DOMINATORS);
1077  delete occ_pool;
1078  return 0;
1079}
1080
1081} // anon namespace
1082
1083gimple_opt_pass *
1084make_pass_cse_reciprocals (gcc::context *ctxt)
1085{
1086  return new pass_cse_reciprocals (ctxt);
1087}
1088
1089/* Records an occurrence at statement USE_STMT in the vector of trees
1090   STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1091   is not yet initialized.  Returns true if the occurrence was pushed on
1092   the vector.  Adjusts *TOP_BB to be the basic block dominating all
1093   statements in the vector.  */
1094
1095static bool
1096maybe_record_sincos (vec<gimple *> *stmts,
1097		     basic_block *top_bb, gimple *use_stmt)
1098{
1099  basic_block use_bb = gimple_bb (use_stmt);
1100  if (*top_bb
1101      && (*top_bb == use_bb
1102	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1103    stmts->safe_push (use_stmt);
1104  else if (!*top_bb
1105	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1106    {
1107      stmts->safe_push (use_stmt);
1108      *top_bb = use_bb;
1109    }
1110  else
1111    return false;
1112
1113  return true;
1114}
1115
1116/* Look for sin, cos and cexpi calls with the same argument NAME and
1117   create a single call to cexpi CSEing the result in this case.
1118   We first walk over all immediate uses of the argument collecting
1119   statements that we can CSE in a vector and in a second pass replace
1120   the statement rhs with a REALPART or IMAGPART expression on the
1121   result of the cexpi call we insert before the use statement that
1122   dominates all other candidates.  */
1123
1124static bool
1125execute_cse_sincos_1 (tree name)
1126{
1127  gimple_stmt_iterator gsi;
1128  imm_use_iterator use_iter;
1129  tree fndecl, res, type;
1130  gimple *def_stmt, *use_stmt, *stmt;
1131  int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1132  auto_vec<gimple *> stmts;
1133  basic_block top_bb = NULL;
1134  int i;
1135  bool cfg_changed = false;
1136
1137  type = TREE_TYPE (name);
1138  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1139    {
1140      if (gimple_code (use_stmt) != GIMPLE_CALL
1141	  || !gimple_call_lhs (use_stmt))
1142	continue;
1143
1144      switch (gimple_call_combined_fn (use_stmt))
1145	{
1146	CASE_CFN_COS:
1147	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1148	  break;
1149
1150	CASE_CFN_SIN:
1151	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1152	  break;
1153
1154	CASE_CFN_CEXPI:
1155	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1156	  break;
1157
1158	default:;
1159	}
1160    }
1161
1162  if (seen_cos + seen_sin + seen_cexpi <= 1)
1163    return false;
1164
1165  /* Simply insert cexpi at the beginning of top_bb but not earlier than
1166     the name def statement.  */
1167  fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1168  if (!fndecl)
1169    return false;
1170  stmt = gimple_build_call (fndecl, 1, name);
1171  res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1172  gimple_call_set_lhs (stmt, res);
1173
1174  def_stmt = SSA_NAME_DEF_STMT (name);
1175  if (!SSA_NAME_IS_DEFAULT_DEF (name)
1176      && gimple_code (def_stmt) != GIMPLE_PHI
1177      && gimple_bb (def_stmt) == top_bb)
1178    {
1179      gsi = gsi_for_stmt (def_stmt);
1180      gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1181    }
1182  else
1183    {
1184      gsi = gsi_after_labels (top_bb);
1185      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1186    }
1187  sincos_stats.inserted++;
1188
1189  /* And adjust the recorded old call sites.  */
1190  for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1191    {
1192      tree rhs = NULL;
1193
1194      switch (gimple_call_combined_fn (use_stmt))
1195	{
1196	CASE_CFN_COS:
1197	  rhs = fold_build1 (REALPART_EXPR, type, res);
1198	  break;
1199
1200	CASE_CFN_SIN:
1201	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
1202	  break;
1203
1204	CASE_CFN_CEXPI:
1205	  rhs = res;
1206	  break;
1207
1208	default:;
1209	  gcc_unreachable ();
1210	}
1211
1212	/* Replace call with a copy.  */
1213	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1214
1215	gsi = gsi_for_stmt (use_stmt);
1216	gsi_replace (&gsi, stmt, true);
1217	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1218	  cfg_changed = true;
1219    }
1220
1221  return cfg_changed;
1222}
1223
1224/* To evaluate powi(x,n), the floating point value x raised to the
1225   constant integer exponent n, we use a hybrid algorithm that
1226   combines the "window method" with look-up tables.  For an
1227   introduction to exponentiation algorithms and "addition chains",
1228   see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1229   "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1230   3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1231   Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1232
1233/* Provide a default value for POWI_MAX_MULTS, the maximum number of
1234   multiplications to inline before calling the system library's pow
1235   function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1236   so this default never requires calling pow, powf or powl.  */
1237
1238#ifndef POWI_MAX_MULTS
1239#define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1240#endif
1241
1242/* The size of the "optimal power tree" lookup table.  All
1243   exponents less than this value are simply looked up in the
1244   powi_table below.  This threshold is also used to size the
1245   cache of pseudo registers that hold intermediate results.  */
1246#define POWI_TABLE_SIZE 256
1247
1248/* The size, in bits of the window, used in the "window method"
1249   exponentiation algorithm.  This is equivalent to a radix of
1250   (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1251#define POWI_WINDOW_SIZE 3
1252
1253/* The following table is an efficient representation of an
1254   "optimal power tree".  For each value, i, the corresponding
1255   value, j, in the table states than an optimal evaluation
1256   sequence for calculating pow(x,i) can be found by evaluating
1257   pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1258   100 integers is given in Knuth's "Seminumerical algorithms".  */
1259
1260static const unsigned char powi_table[POWI_TABLE_SIZE] =
1261  {
1262      0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1263      4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1264      8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1265     12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1266     16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1267     20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1268     24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1269     28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1270     32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1271     36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1272     40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1273     44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1274     48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1275     52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1276     56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1277     60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1278     64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1279     68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1280     72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1281     76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1282     80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1283     84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1284     88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1285     92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1286     96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1287    100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1288    104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1289    108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1290    112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1291    116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1292    120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1293    124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1294  };
1295
1296
1297/* Return the number of multiplications required to calculate
1298   powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1299   subroutine of powi_cost.  CACHE is an array indicating
1300   which exponents have already been calculated.  */
1301
1302static int
1303powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1304{
1305  /* If we've already calculated this exponent, then this evaluation
1306     doesn't require any additional multiplications.  */
1307  if (cache[n])
1308    return 0;
1309
1310  cache[n] = true;
1311  return powi_lookup_cost (n - powi_table[n], cache)
1312	 + powi_lookup_cost (powi_table[n], cache) + 1;
1313}
1314
1315/* Return the number of multiplications required to calculate
1316   powi(x,n) for an arbitrary x, given the exponent N.  This
1317   function needs to be kept in sync with powi_as_mults below.  */
1318
1319static int
1320powi_cost (HOST_WIDE_INT n)
1321{
1322  bool cache[POWI_TABLE_SIZE];
1323  unsigned HOST_WIDE_INT digit;
1324  unsigned HOST_WIDE_INT val;
1325  int result;
1326
1327  if (n == 0)
1328    return 0;
1329
1330  /* Ignore the reciprocal when calculating the cost.  */
1331  val = absu_hwi (n);
1332
1333  /* Initialize the exponent cache.  */
1334  memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1335  cache[1] = true;
1336
1337  result = 0;
1338
1339  while (val >= POWI_TABLE_SIZE)
1340    {
1341      if (val & 1)
1342	{
1343	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1344	  result += powi_lookup_cost (digit, cache)
1345		    + POWI_WINDOW_SIZE + 1;
1346	  val >>= POWI_WINDOW_SIZE;
1347	}
1348      else
1349	{
1350	  val >>= 1;
1351	  result++;
1352	}
1353    }
1354
1355  return result + powi_lookup_cost (val, cache);
1356}
1357
1358/* Recursive subroutine of powi_as_mults.  This function takes the
1359   array, CACHE, of already calculated exponents and an exponent N and
1360   returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1361
1362static tree
1363powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1364		 unsigned HOST_WIDE_INT n, tree *cache)
1365{
1366  tree op0, op1, ssa_target;
1367  unsigned HOST_WIDE_INT digit;
1368  gassign *mult_stmt;
1369
1370  if (n < POWI_TABLE_SIZE && cache[n])
1371    return cache[n];
1372
1373  ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1374
1375  if (n < POWI_TABLE_SIZE)
1376    {
1377      cache[n] = ssa_target;
1378      op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1379      op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1380    }
1381  else if (n & 1)
1382    {
1383      digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1384      op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1385      op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1386    }
1387  else
1388    {
1389      op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1390      op1 = op0;
1391    }
1392
1393  mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1394  gimple_set_location (mult_stmt, loc);
1395  gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1396
1397  return ssa_target;
1398}
1399
1400/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1401   This function needs to be kept in sync with powi_cost above.  */
1402
1403static tree
1404powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1405	       tree arg0, HOST_WIDE_INT n)
1406{
1407  tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1408  gassign *div_stmt;
1409  tree target;
1410
1411  if (n == 0)
1412    return build_real (type, dconst1);
1413
1414  memset (cache, 0,  sizeof (cache));
1415  cache[1] = arg0;
1416
1417  result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1418  if (n >= 0)
1419    return result;
1420
1421  /* If the original exponent was negative, reciprocate the result.  */
1422  target = make_temp_ssa_name (type, NULL, "powmult");
1423  div_stmt = gimple_build_assign (target, RDIV_EXPR,
1424				  build_real (type, dconst1), result);
1425  gimple_set_location (div_stmt, loc);
1426  gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1427
1428  return target;
1429}
1430
1431/* ARG0 and N are the two arguments to a powi builtin in GSI with
1432   location info LOC.  If the arguments are appropriate, create an
1433   equivalent sequence of statements prior to GSI using an optimal
1434   number of multiplications, and return an expession holding the
1435   result.  */
1436
1437static tree
1438gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1439			    tree arg0, HOST_WIDE_INT n)
1440{
1441  if ((n >= -1 && n <= 2)
1442      || (optimize_function_for_speed_p (cfun)
1443	  && powi_cost (n) <= POWI_MAX_MULTS))
1444    return powi_as_mults (gsi, loc, arg0, n);
1445
1446  return NULL_TREE;
1447}
1448
1449/* Build a gimple call statement that calls FN with argument ARG.
1450   Set the lhs of the call statement to a fresh SSA name.  Insert the
1451   statement prior to GSI's current position, and return the fresh
1452   SSA name.  */
1453
1454static tree
1455build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1456		       tree fn, tree arg)
1457{
1458  gcall *call_stmt;
1459  tree ssa_target;
1460
1461  call_stmt = gimple_build_call (fn, 1, arg);
1462  ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1463  gimple_set_lhs (call_stmt, ssa_target);
1464  gimple_set_location (call_stmt, loc);
1465  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1466
1467  return ssa_target;
1468}
1469
1470/* Build a gimple binary operation with the given CODE and arguments
1471   ARG0, ARG1, assigning the result to a new SSA name for variable
1472   TARGET.  Insert the statement prior to GSI's current position, and
1473   return the fresh SSA name.*/
1474
1475static tree
1476build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1477			const char *name, enum tree_code code,
1478			tree arg0, tree arg1)
1479{
1480  tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1481  gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1482  gimple_set_location (stmt, loc);
1483  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1484  return result;
1485}
1486
1487/* Build a gimple reference operation with the given CODE and argument
1488   ARG, assigning the result to a new SSA name of TYPE with NAME.
1489   Insert the statement prior to GSI's current position, and return
1490   the fresh SSA name.  */
1491
1492static inline tree
1493build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1494		      const char *name, enum tree_code code, tree arg0)
1495{
1496  tree result = make_temp_ssa_name (type, NULL, name);
1497  gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1498  gimple_set_location (stmt, loc);
1499  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1500  return result;
1501}
1502
1503/* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1504   prior to GSI's current position, and return the fresh SSA name.  */
1505
1506static tree
1507build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1508		       tree type, tree val)
1509{
1510  tree result = make_ssa_name (type);
1511  gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1512  gimple_set_location (stmt, loc);
1513  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1514  return result;
1515}
1516
1517struct pow_synth_sqrt_info
1518{
1519  bool *factors;
1520  unsigned int deepest;
1521  unsigned int num_mults;
1522};
1523
1524/* Return true iff the real value C can be represented as a
1525   sum of powers of 0.5 up to N.  That is:
1526   C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1527   Record in INFO the various parameters of the synthesis algorithm such
1528   as the factors a[i], the maximum 0.5 power and the number of
1529   multiplications that will be required.  */
1530
1531bool
1532representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1533				 struct pow_synth_sqrt_info *info)
1534{
1535  REAL_VALUE_TYPE factor = dconsthalf;
1536  REAL_VALUE_TYPE remainder = c;
1537
1538  info->deepest = 0;
1539  info->num_mults = 0;
1540  memset (info->factors, 0, n * sizeof (bool));
1541
1542  for (unsigned i = 0; i < n; i++)
1543    {
1544      REAL_VALUE_TYPE res;
1545
1546      /* If something inexact happened bail out now.  */
1547      if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1548	return false;
1549
1550      /* We have hit zero.  The number is representable as a sum
1551         of powers of 0.5.  */
1552      if (real_equal (&res, &dconst0))
1553	{
1554	  info->factors[i] = true;
1555	  info->deepest = i + 1;
1556	  return true;
1557	}
1558      else if (!REAL_VALUE_NEGATIVE (res))
1559	{
1560	  remainder = res;
1561	  info->factors[i] = true;
1562	  info->num_mults++;
1563	}
1564      else
1565	info->factors[i] = false;
1566
1567      real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1568    }
1569  return false;
1570}
1571
1572/* Return the tree corresponding to FN being applied
1573   to ARG N times at GSI and LOC.
1574   Look up previous results from CACHE if need be.
1575   cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1576
1577static tree
1578get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1579	      tree fn, location_t loc, tree *cache)
1580{
1581  tree res = cache[n];
1582  if (!res)
1583    {
1584      tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1585      res = build_and_insert_call (gsi, loc, fn, prev);
1586      cache[n] = res;
1587    }
1588
1589  return res;
1590}
1591
1592/* Print to STREAM the repeated application of function FNAME to ARG
1593   N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1594   "foo (foo (x))".  */
1595
1596static void
1597print_nested_fn (FILE* stream, const char *fname, const char* arg,
1598		 unsigned int n)
1599{
1600  if (n == 0)
1601    fprintf (stream, "%s", arg);
1602  else
1603    {
1604      fprintf (stream, "%s (", fname);
1605      print_nested_fn (stream, fname, arg, n - 1);
1606      fprintf (stream, ")");
1607    }
1608}
1609
1610/* Print to STREAM the fractional sequence of sqrt chains
1611   applied to ARG, described by INFO.  Used for the dump file.  */
1612
1613static void
1614dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1615			        struct pow_synth_sqrt_info *info)
1616{
1617  for (unsigned int i = 0; i < info->deepest; i++)
1618    {
1619      bool is_set = info->factors[i];
1620      if (is_set)
1621	{
1622	  print_nested_fn (stream, "sqrt", arg, i + 1);
1623	  if (i != info->deepest - 1)
1624	    fprintf (stream, " * ");
1625	}
1626    }
1627}
1628
1629/* Print to STREAM a representation of raising ARG to an integer
1630   power N.  Used for the dump file.  */
1631
1632static void
1633dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1634{
1635  if (n > 1)
1636    fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1637  else if (n == 1)
1638    fprintf (stream, "%s", arg);
1639}
1640
1641/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1642   square roots.  Place at GSI and LOC.  Limit the maximum depth
1643   of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1644   result of the expanded sequence or NULL_TREE if the expansion failed.
1645
1646   This routine assumes that ARG1 is a real number with a fractional part
1647   (the integer exponent case will have been handled earlier in
1648   gimple_expand_builtin_pow).
1649
1650   For ARG1 > 0.0:
1651   * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1652     FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1653                    FRAC_PART == ARG1 - WHOLE_PART:
1654     Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1655     POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1656     if it can be expressed as such, that is if FRAC_PART satisfies:
1657     FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1658     where integer a[i] is either 0 or 1.
1659
1660     Example:
1661     POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1662       --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1663
1664   For ARG1 < 0.0 there are two approaches:
1665   * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1666         is calculated as above.
1667
1668     Example:
1669     POW (x, -5.625) == 1.0 / POW (x, 5.625)
1670       -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1671
1672   * (B) : WHOLE_PART := - ceil (abs (ARG1))
1673           FRAC_PART  := ARG1 - WHOLE_PART
1674     and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1675     Example:
1676     POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1677       --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1678
1679   For ARG1 < 0.0 we choose between (A) and (B) depending on
1680   how many multiplications we'd have to do.
1681   So, for the example in (B): POW (x, -5.875), if we were to
1682   follow algorithm (A) we would produce:
1683   1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1684   which contains more multiplications than approach (B).
1685
1686   Hopefully, this approach will eliminate potentially expensive POW library
1687   calls when unsafe floating point math is enabled and allow the compiler to
1688   further optimise the multiplies, square roots and divides produced by this
1689   function.  */
1690
1691static tree
1692expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1693		     tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1694{
1695  tree type = TREE_TYPE (arg0);
1696  machine_mode mode = TYPE_MODE (type);
1697  tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1698  bool one_over = true;
1699
1700  if (!sqrtfn)
1701    return NULL_TREE;
1702
1703  if (TREE_CODE (arg1) != REAL_CST)
1704    return NULL_TREE;
1705
1706  REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1707
1708  gcc_assert (max_depth > 0);
1709  tree *cache = XALLOCAVEC (tree, max_depth + 1);
1710
1711  struct pow_synth_sqrt_info synth_info;
1712  synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1713  synth_info.deepest = 0;
1714  synth_info.num_mults = 0;
1715
1716  bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1717  REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1718
1719  /* The whole and fractional parts of exp.  */
1720  REAL_VALUE_TYPE whole_part;
1721  REAL_VALUE_TYPE frac_part;
1722
1723  real_floor (&whole_part, mode, &exp);
1724  real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1725
1726
1727  REAL_VALUE_TYPE ceil_whole = dconst0;
1728  REAL_VALUE_TYPE ceil_fract = dconst0;
1729
1730  if (neg_exp)
1731    {
1732      real_ceil (&ceil_whole, mode, &exp);
1733      real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1734    }
1735
1736  if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1737    return NULL_TREE;
1738
1739  /* Check whether it's more profitable to not use 1.0 / ...  */
1740  if (neg_exp)
1741    {
1742      struct pow_synth_sqrt_info alt_synth_info;
1743      alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1744      alt_synth_info.deepest = 0;
1745      alt_synth_info.num_mults = 0;
1746
1747      if (representable_as_half_series_p (ceil_fract, max_depth,
1748					   &alt_synth_info)
1749	  && alt_synth_info.deepest <= synth_info.deepest
1750	  && alt_synth_info.num_mults < synth_info.num_mults)
1751	{
1752	  whole_part = ceil_whole;
1753	  frac_part = ceil_fract;
1754	  synth_info.deepest = alt_synth_info.deepest;
1755	  synth_info.num_mults = alt_synth_info.num_mults;
1756	  memcpy (synth_info.factors, alt_synth_info.factors,
1757		  (max_depth + 1) * sizeof (bool));
1758	  one_over = false;
1759	}
1760    }
1761
1762  HOST_WIDE_INT n = real_to_integer (&whole_part);
1763  REAL_VALUE_TYPE cint;
1764  real_from_integer (&cint, VOIDmode, n, SIGNED);
1765
1766  if (!real_identical (&whole_part, &cint))
1767    return NULL_TREE;
1768
1769  if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1770    return NULL_TREE;
1771
1772  memset (cache, 0, (max_depth + 1) * sizeof (tree));
1773
1774  tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1775
1776  /* Calculate the integer part of the exponent.  */
1777  if (n > 1)
1778    {
1779      integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1780      if (!integer_res)
1781	return NULL_TREE;
1782    }
1783
1784  if (dump_file)
1785    {
1786      char string[64];
1787
1788      real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1789      fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1790
1791      if (neg_exp)
1792	{
1793	  if (one_over)
1794	    {
1795	      fprintf (dump_file, "1.0 / (");
1796	      dump_integer_part (dump_file, "x", n);
1797	      if (n > 0)
1798	        fprintf (dump_file, " * ");
1799	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1800	      fprintf (dump_file, ")");
1801	    }
1802	  else
1803	    {
1804	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1805	      fprintf (dump_file, " / (");
1806	      dump_integer_part (dump_file, "x", n);
1807	      fprintf (dump_file, ")");
1808	    }
1809	}
1810      else
1811	{
1812	  dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1813	  if (n > 0)
1814	    fprintf (dump_file, " * ");
1815	  dump_integer_part (dump_file, "x", n);
1816	}
1817
1818      fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1819    }
1820
1821
1822  tree fract_res = NULL_TREE;
1823  cache[0] = arg0;
1824
1825  /* Calculate the fractional part of the exponent.  */
1826  for (unsigned i = 0; i < synth_info.deepest; i++)
1827    {
1828      if (synth_info.factors[i])
1829	{
1830	  tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1831
1832	  if (!fract_res)
1833	      fract_res = sqrt_chain;
1834
1835	  else
1836	    fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1837					   fract_res, sqrt_chain);
1838	}
1839    }
1840
1841  tree res = NULL_TREE;
1842
1843  if (neg_exp)
1844    {
1845      if (one_over)
1846	{
1847	  if (n > 0)
1848	    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1849					   fract_res, integer_res);
1850	  else
1851	    res = fract_res;
1852
1853	  res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1854					  build_real (type, dconst1), res);
1855	}
1856      else
1857	{
1858	  res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1859					 fract_res, integer_res);
1860	}
1861    }
1862  else
1863    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1864				   fract_res, integer_res);
1865  return res;
1866}
1867
1868/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1869   with location info LOC.  If possible, create an equivalent and
1870   less expensive sequence of statements prior to GSI, and return an
1871   expession holding the result.  */
1872
1873static tree
1874gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1875			   tree arg0, tree arg1)
1876{
1877  REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1878  REAL_VALUE_TYPE c2, dconst3;
1879  HOST_WIDE_INT n;
1880  tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1881  machine_mode mode;
1882  bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1883  bool hw_sqrt_exists, c_is_int, c2_is_int;
1884
1885  dconst1_4 = dconst1;
1886  SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1887
1888  /* If the exponent isn't a constant, there's nothing of interest
1889     to be done.  */
1890  if (TREE_CODE (arg1) != REAL_CST)
1891    return NULL_TREE;
1892
1893  /* Don't perform the operation if flag_signaling_nans is on
1894     and the operand is a signaling NaN.  */
1895  if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1896      && ((TREE_CODE (arg0) == REAL_CST
1897	   && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1898	  || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1899    return NULL_TREE;
1900
1901  /* If the exponent is equivalent to an integer, expand to an optimal
1902     multiplication sequence when profitable.  */
1903  c = TREE_REAL_CST (arg1);
1904  n = real_to_integer (&c);
1905  real_from_integer (&cint, VOIDmode, n, SIGNED);
1906  c_is_int = real_identical (&c, &cint);
1907
1908  if (c_is_int
1909      && ((n >= -1 && n <= 2)
1910	  || (flag_unsafe_math_optimizations
1911	      && speed_p
1912	      && powi_cost (n) <= POWI_MAX_MULTS)))
1913    return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1914
1915  /* Attempt various optimizations using sqrt and cbrt.  */
1916  type = TREE_TYPE (arg0);
1917  mode = TYPE_MODE (type);
1918  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1919
1920  /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1921     unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1922     sqrt(-0) = -0.  */
1923  if (sqrtfn
1924      && real_equal (&c, &dconsthalf)
1925      && !HONOR_SIGNED_ZEROS (mode))
1926    return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1927
1928  hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1929
1930  /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1931     optimizations since 1./3. is not exactly representable.  If x
1932     is negative and finite, the correct value of pow(x,1./3.) is
1933     a NaN with the "invalid" exception raised, because the value
1934     of 1./3. actually has an even denominator.  The correct value
1935     of cbrt(x) is a negative real value.  */
1936  cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1937  dconst1_3 = real_value_truncate (mode, dconst_third ());
1938
1939  if (flag_unsafe_math_optimizations
1940      && cbrtfn
1941      && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1942      && real_equal (&c, &dconst1_3))
1943    return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1944
1945  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1946     if we don't have a hardware sqrt insn.  */
1947  dconst1_6 = dconst1_3;
1948  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1949
1950  if (flag_unsafe_math_optimizations
1951      && sqrtfn
1952      && cbrtfn
1953      && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1954      && speed_p
1955      && hw_sqrt_exists
1956      && real_equal (&c, &dconst1_6))
1957    {
1958      /* sqrt(x)  */
1959      sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1960
1961      /* cbrt(sqrt(x))  */
1962      return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1963    }
1964
1965
1966  /* Attempt to expand the POW as a product of square root chains.
1967     Expand the 0.25 case even when otpimising for size.  */
1968  if (flag_unsafe_math_optimizations
1969      && sqrtfn
1970      && hw_sqrt_exists
1971      && (speed_p || real_equal (&c, &dconst1_4))
1972      && !HONOR_SIGNED_ZEROS (mode))
1973    {
1974      unsigned int max_depth = speed_p
1975				? param_max_pow_sqrt_depth
1976				: 2;
1977
1978      tree expand_with_sqrts
1979	= expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1980
1981      if (expand_with_sqrts)
1982	return expand_with_sqrts;
1983    }
1984
1985  real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1986  n = real_to_integer (&c2);
1987  real_from_integer (&cint, VOIDmode, n, SIGNED);
1988  c2_is_int = real_identical (&c2, &cint);
1989
1990  /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1991
1992     powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1993     1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1994
1995     Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1996     different from pow(x, 1./3.) due to rounding and behavior with
1997     negative x, we need to constrain this transformation to unsafe
1998     math and positive x or finite math.  */
1999  real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2000  real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2001  real_round (&c2, mode, &c2);
2002  n = real_to_integer (&c2);
2003  real_from_integer (&cint, VOIDmode, n, SIGNED);
2004  real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2005  real_convert (&c2, mode, &c2);
2006
2007  if (flag_unsafe_math_optimizations
2008      && cbrtfn
2009      && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2010      && real_identical (&c2, &c)
2011      && !c2_is_int
2012      && optimize_function_for_speed_p (cfun)
2013      && powi_cost (n / 3) <= POWI_MAX_MULTS)
2014    {
2015      tree powi_x_ndiv3 = NULL_TREE;
2016
2017      /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2018         possible or profitable, give up.  Skip the degenerate case when
2019         abs(n) < 3, where the result is always 1.  */
2020      if (absu_hwi (n) >= 3)
2021	{
2022	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2023						     abs_hwi (n / 3));
2024	  if (!powi_x_ndiv3)
2025	    return NULL_TREE;
2026	}
2027
2028      /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2029         as that creates an unnecessary variable.  Instead, just produce
2030         either cbrt(x) or cbrt(x) * cbrt(x).  */
2031      cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2032
2033      if (absu_hwi (n) % 3 == 1)
2034	powi_cbrt_x = cbrt_x;
2035      else
2036	powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2037					      cbrt_x, cbrt_x);
2038
2039      /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2040      if (absu_hwi (n) < 3)
2041	result = powi_cbrt_x;
2042      else
2043	result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2044					 powi_x_ndiv3, powi_cbrt_x);
2045
2046      /* If n is negative, reciprocate the result.  */
2047      if (n < 0)
2048	result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2049					 build_real (type, dconst1), result);
2050
2051      return result;
2052    }
2053
2054  /* No optimizations succeeded.  */
2055  return NULL_TREE;
2056}
2057
2058/* ARG is the argument to a cabs builtin call in GSI with location info
2059   LOC.  Create a sequence of statements prior to GSI that calculates
2060   sqrt(R*R + I*I), where R and I are the real and imaginary components
2061   of ARG, respectively.  Return an expression holding the result.  */
2062
2063static tree
2064gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2065{
2066  tree real_part, imag_part, addend1, addend2, sum, result;
2067  tree type = TREE_TYPE (TREE_TYPE (arg));
2068  tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2069  machine_mode mode = TYPE_MODE (type);
2070
2071  if (!flag_unsafe_math_optimizations
2072      || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2073      || !sqrtfn
2074      || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2075    return NULL_TREE;
2076
2077  real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2078				    REALPART_EXPR, arg);
2079  addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2080				    real_part, real_part);
2081  imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2082				    IMAGPART_EXPR, arg);
2083  addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2084				    imag_part, imag_part);
2085  sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2086  result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2087
2088  return result;
2089}
2090
2091/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2092   on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2093   an optimal number of multiplies, when n is a constant.  */
2094
2095namespace {
2096
2097const pass_data pass_data_cse_sincos =
2098{
2099  GIMPLE_PASS, /* type */
2100  "sincos", /* name */
2101  OPTGROUP_NONE, /* optinfo_flags */
2102  TV_TREE_SINCOS, /* tv_id */
2103  PROP_ssa, /* properties_required */
2104  PROP_gimple_opt_math, /* properties_provided */
2105  0, /* properties_destroyed */
2106  0, /* todo_flags_start */
2107  TODO_update_ssa, /* todo_flags_finish */
2108};
2109
2110class pass_cse_sincos : public gimple_opt_pass
2111{
2112public:
2113  pass_cse_sincos (gcc::context *ctxt)
2114    : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2115  {}
2116
2117  /* opt_pass methods: */
2118  virtual bool gate (function *)
2119    {
2120      /* We no longer require either sincos or cexp, since powi expansion
2121	 piggybacks on this pass.  */
2122      return optimize;
2123    }
2124
2125  virtual unsigned int execute (function *);
2126
2127}; // class pass_cse_sincos
2128
2129unsigned int
2130pass_cse_sincos::execute (function *fun)
2131{
2132  basic_block bb;
2133  bool cfg_changed = false;
2134
2135  calculate_dominance_info (CDI_DOMINATORS);
2136  memset (&sincos_stats, 0, sizeof (sincos_stats));
2137
2138  FOR_EACH_BB_FN (bb, fun)
2139    {
2140      gimple_stmt_iterator gsi;
2141      bool cleanup_eh = false;
2142
2143      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2144        {
2145	  gimple *stmt = gsi_stmt (gsi);
2146
2147	  /* Only the last stmt in a bb could throw, no need to call
2148	     gimple_purge_dead_eh_edges if we change something in the middle
2149	     of a basic block.  */
2150	  cleanup_eh = false;
2151
2152	  if (is_gimple_call (stmt)
2153	      && gimple_call_lhs (stmt))
2154	    {
2155	      tree arg, arg0, arg1, result;
2156	      HOST_WIDE_INT n;
2157	      location_t loc;
2158
2159	      switch (gimple_call_combined_fn (stmt))
2160		{
2161		CASE_CFN_COS:
2162		CASE_CFN_SIN:
2163		CASE_CFN_CEXPI:
2164		  /* Make sure we have either sincos or cexp.  */
2165		  if (!targetm.libc_has_function (function_c99_math_complex)
2166		      && !targetm.libc_has_function (function_sincos))
2167		    break;
2168
2169		  arg = gimple_call_arg (stmt, 0);
2170		  if (TREE_CODE (arg) == SSA_NAME)
2171		    cfg_changed |= execute_cse_sincos_1 (arg);
2172		  break;
2173
2174		CASE_CFN_POW:
2175		  arg0 = gimple_call_arg (stmt, 0);
2176		  arg1 = gimple_call_arg (stmt, 1);
2177
2178		  loc = gimple_location (stmt);
2179		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2180
2181		  if (result)
2182		    {
2183		      tree lhs = gimple_get_lhs (stmt);
2184		      gassign *new_stmt = gimple_build_assign (lhs, result);
2185		      gimple_set_location (new_stmt, loc);
2186		      unlink_stmt_vdef (stmt);
2187		      gsi_replace (&gsi, new_stmt, true);
2188		      cleanup_eh = true;
2189		      if (gimple_vdef (stmt))
2190			release_ssa_name (gimple_vdef (stmt));
2191		    }
2192		  break;
2193
2194		CASE_CFN_POWI:
2195		  arg0 = gimple_call_arg (stmt, 0);
2196		  arg1 = gimple_call_arg (stmt, 1);
2197		  loc = gimple_location (stmt);
2198
2199		  if (real_minus_onep (arg0))
2200		    {
2201                      tree t0, t1, cond, one, minus_one;
2202		      gassign *stmt;
2203
2204		      t0 = TREE_TYPE (arg0);
2205		      t1 = TREE_TYPE (arg1);
2206		      one = build_real (t0, dconst1);
2207		      minus_one = build_real (t0, dconstm1);
2208
2209		      cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2210		      stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2211						  arg1, build_int_cst (t1, 1));
2212		      gimple_set_location (stmt, loc);
2213		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2214
2215		      result = make_temp_ssa_name (t0, NULL, "powi");
2216		      stmt = gimple_build_assign (result, COND_EXPR, cond,
2217						  minus_one, one);
2218		      gimple_set_location (stmt, loc);
2219		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2220		    }
2221		  else
2222		    {
2223		      if (!tree_fits_shwi_p (arg1))
2224			break;
2225
2226		      n = tree_to_shwi (arg1);
2227		      result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2228		    }
2229
2230		  if (result)
2231		    {
2232		      tree lhs = gimple_get_lhs (stmt);
2233		      gassign *new_stmt = gimple_build_assign (lhs, result);
2234		      gimple_set_location (new_stmt, loc);
2235		      unlink_stmt_vdef (stmt);
2236		      gsi_replace (&gsi, new_stmt, true);
2237		      cleanup_eh = true;
2238		      if (gimple_vdef (stmt))
2239			release_ssa_name (gimple_vdef (stmt));
2240		    }
2241		  break;
2242
2243		CASE_CFN_CABS:
2244		  arg0 = gimple_call_arg (stmt, 0);
2245		  loc = gimple_location (stmt);
2246		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2247
2248		  if (result)
2249		    {
2250		      tree lhs = gimple_get_lhs (stmt);
2251		      gassign *new_stmt = gimple_build_assign (lhs, result);
2252		      gimple_set_location (new_stmt, loc);
2253		      unlink_stmt_vdef (stmt);
2254		      gsi_replace (&gsi, new_stmt, true);
2255		      cleanup_eh = true;
2256		      if (gimple_vdef (stmt))
2257			release_ssa_name (gimple_vdef (stmt));
2258		    }
2259		  break;
2260
2261		default:;
2262		}
2263	    }
2264	}
2265      if (cleanup_eh)
2266	cfg_changed |= gimple_purge_dead_eh_edges (bb);
2267    }
2268
2269  statistics_counter_event (fun, "sincos statements inserted",
2270			    sincos_stats.inserted);
2271
2272  return cfg_changed ? TODO_cleanup_cfg : 0;
2273}
2274
2275} // anon namespace
2276
2277gimple_opt_pass *
2278make_pass_cse_sincos (gcc::context *ctxt)
2279{
2280  return new pass_cse_sincos (ctxt);
2281}
2282
2283/* Return true if stmt is a type conversion operation that can be stripped
2284   when used in a widening multiply operation.  */
2285static bool
2286widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2287{
2288  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2289
2290  if (TREE_CODE (result_type) == INTEGER_TYPE)
2291    {
2292      tree op_type;
2293      tree inner_op_type;
2294
2295      if (!CONVERT_EXPR_CODE_P (rhs_code))
2296	return false;
2297
2298      op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2299
2300      /* If the type of OP has the same precision as the result, then
2301	 we can strip this conversion.  The multiply operation will be
2302	 selected to create the correct extension as a by-product.  */
2303      if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2304	return true;
2305
2306      /* We can also strip a conversion if it preserves the signed-ness of
2307	 the operation and doesn't narrow the range.  */
2308      inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2309
2310      /* If the inner-most type is unsigned, then we can strip any
2311	 intermediate widening operation.  If it's signed, then the
2312	 intermediate widening operation must also be signed.  */
2313      if ((TYPE_UNSIGNED (inner_op_type)
2314	   || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2315	  && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2316	return true;
2317
2318      return false;
2319    }
2320
2321  return rhs_code == FIXED_CONVERT_EXPR;
2322}
2323
2324/* Return true if RHS is a suitable operand for a widening multiplication,
2325   assuming a target type of TYPE.
2326   There are two cases:
2327
2328     - RHS makes some value at least twice as wide.  Store that value
2329       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2330
2331     - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2332       but leave *TYPE_OUT untouched.  */
2333
2334static bool
2335is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2336			tree *new_rhs_out)
2337{
2338  gimple *stmt;
2339  tree type1, rhs1;
2340
2341  if (TREE_CODE (rhs) == SSA_NAME)
2342    {
2343      stmt = SSA_NAME_DEF_STMT (rhs);
2344      if (is_gimple_assign (stmt))
2345	{
2346	  if (! widening_mult_conversion_strippable_p (type, stmt))
2347	    rhs1 = rhs;
2348	  else
2349	    {
2350	      rhs1 = gimple_assign_rhs1 (stmt);
2351
2352	      if (TREE_CODE (rhs1) == INTEGER_CST)
2353		{
2354		  *new_rhs_out = rhs1;
2355		  *type_out = NULL;
2356		  return true;
2357		}
2358	    }
2359	}
2360      else
2361	rhs1 = rhs;
2362
2363      type1 = TREE_TYPE (rhs1);
2364
2365      if (TREE_CODE (type1) != TREE_CODE (type)
2366	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2367	return false;
2368
2369      *new_rhs_out = rhs1;
2370      *type_out = type1;
2371      return true;
2372    }
2373
2374  if (TREE_CODE (rhs) == INTEGER_CST)
2375    {
2376      *new_rhs_out = rhs;
2377      *type_out = NULL;
2378      return true;
2379    }
2380
2381  return false;
2382}
2383
2384/* Return true if STMT performs a widening multiplication, assuming the
2385   output type is TYPE.  If so, store the unwidened types of the operands
2386   in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2387   *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2388   and *TYPE2_OUT would give the operands of the multiplication.  */
2389
2390static bool
2391is_widening_mult_p (gimple *stmt,
2392		    tree *type1_out, tree *rhs1_out,
2393		    tree *type2_out, tree *rhs2_out)
2394{
2395  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2396
2397  if (TREE_CODE (type) == INTEGER_TYPE)
2398    {
2399      if (TYPE_OVERFLOW_TRAPS (type))
2400	return false;
2401    }
2402  else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2403    return false;
2404
2405  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2406			       rhs1_out))
2407    return false;
2408
2409  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2410			       rhs2_out))
2411    return false;
2412
2413  if (*type1_out == NULL)
2414    {
2415      if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2416	return false;
2417      *type1_out = *type2_out;
2418    }
2419
2420  if (*type2_out == NULL)
2421    {
2422      if (!int_fits_type_p (*rhs2_out, *type1_out))
2423	return false;
2424      *type2_out = *type1_out;
2425    }
2426
2427  /* Ensure that the larger of the two operands comes first. */
2428  if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2429    {
2430      std::swap (*type1_out, *type2_out);
2431      std::swap (*rhs1_out, *rhs2_out);
2432    }
2433
2434  return true;
2435}
2436
2437/* Check to see if the CALL statement is an invocation of copysign
2438   with 1. being the first argument.  */
2439static bool
2440is_copysign_call_with_1 (gimple *call)
2441{
2442  gcall *c = dyn_cast <gcall *> (call);
2443  if (! c)
2444    return false;
2445
2446  enum combined_fn code = gimple_call_combined_fn (c);
2447
2448  if (code == CFN_LAST)
2449    return false;
2450
2451  if (builtin_fn_p (code))
2452    {
2453      switch (as_builtin_fn (code))
2454	{
2455	CASE_FLT_FN (BUILT_IN_COPYSIGN):
2456	CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2457	  return real_onep (gimple_call_arg (c, 0));
2458	default:
2459	  return false;
2460	}
2461    }
2462
2463  if (internal_fn_p (code))
2464    {
2465      switch (as_internal_fn (code))
2466	{
2467	case IFN_COPYSIGN:
2468	  return real_onep (gimple_call_arg (c, 0));
2469	default:
2470	  return false;
2471	}
2472    }
2473
2474   return false;
2475}
2476
2477/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2478   This only happens when the xorsign optab is defined, if the
2479   pattern is not a xorsign pattern or if expansion fails FALSE is
2480   returned, otherwise TRUE is returned.  */
2481static bool
2482convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2483{
2484  tree treeop0, treeop1, lhs, type;
2485  location_t loc = gimple_location (stmt);
2486  lhs = gimple_assign_lhs (stmt);
2487  treeop0 = gimple_assign_rhs1 (stmt);
2488  treeop1 = gimple_assign_rhs2 (stmt);
2489  type = TREE_TYPE (lhs);
2490  machine_mode mode = TYPE_MODE (type);
2491
2492  if (HONOR_SNANS (type))
2493    return false;
2494
2495  if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2496    {
2497      gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2498      if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2499	{
2500	  call0 = SSA_NAME_DEF_STMT (treeop1);
2501	  if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2502	    return false;
2503
2504	  treeop1 = treeop0;
2505	}
2506	if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2507	  return false;
2508
2509	gcall *c = as_a<gcall*> (call0);
2510	treeop0 = gimple_call_arg (c, 1);
2511
2512	gcall *call_stmt
2513	  = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2514	gimple_set_lhs (call_stmt, lhs);
2515	gimple_set_location (call_stmt, loc);
2516	gsi_replace (gsi, call_stmt, true);
2517	return true;
2518    }
2519
2520  return false;
2521}
2522
2523/* Process a single gimple statement STMT, which has a MULT_EXPR as
2524   its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2525   value is true iff we converted the statement.  */
2526
2527static bool
2528convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2529{
2530  tree lhs, rhs1, rhs2, type, type1, type2;
2531  enum insn_code handler;
2532  scalar_int_mode to_mode, from_mode, actual_mode;
2533  optab op;
2534  int actual_precision;
2535  location_t loc = gimple_location (stmt);
2536  bool from_unsigned1, from_unsigned2;
2537
2538  lhs = gimple_assign_lhs (stmt);
2539  type = TREE_TYPE (lhs);
2540  if (TREE_CODE (type) != INTEGER_TYPE)
2541    return false;
2542
2543  if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2544    return false;
2545
2546  to_mode = SCALAR_INT_TYPE_MODE (type);
2547  from_mode = SCALAR_INT_TYPE_MODE (type1);
2548  if (to_mode == from_mode)
2549    return false;
2550
2551  from_unsigned1 = TYPE_UNSIGNED (type1);
2552  from_unsigned2 = TYPE_UNSIGNED (type2);
2553
2554  if (from_unsigned1 && from_unsigned2)
2555    op = umul_widen_optab;
2556  else if (!from_unsigned1 && !from_unsigned2)
2557    op = smul_widen_optab;
2558  else
2559    op = usmul_widen_optab;
2560
2561  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2562						  &actual_mode);
2563
2564  if (handler == CODE_FOR_nothing)
2565    {
2566      if (op != smul_widen_optab)
2567	{
2568	  /* We can use a signed multiply with unsigned types as long as
2569	     there is a wider mode to use, or it is the smaller of the two
2570	     types that is unsigned.  Note that type1 >= type2, always.  */
2571	  if ((TYPE_UNSIGNED (type1)
2572	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2573	      || (TYPE_UNSIGNED (type2)
2574		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2575	    {
2576	      if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2577		  || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2578		return false;
2579	    }
2580
2581	  op = smul_widen_optab;
2582	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2583							  from_mode,
2584							  &actual_mode);
2585
2586	  if (handler == CODE_FOR_nothing)
2587	    return false;
2588
2589	  from_unsigned1 = from_unsigned2 = false;
2590	}
2591      else
2592	return false;
2593    }
2594
2595  /* Ensure that the inputs to the handler are in the correct precison
2596     for the opcode.  This will be the full mode size.  */
2597  actual_precision = GET_MODE_PRECISION (actual_mode);
2598  if (2 * actual_precision > TYPE_PRECISION (type))
2599    return false;
2600  if (actual_precision != TYPE_PRECISION (type1)
2601      || from_unsigned1 != TYPE_UNSIGNED (type1))
2602    rhs1 = build_and_insert_cast (gsi, loc,
2603				  build_nonstandard_integer_type
2604				    (actual_precision, from_unsigned1), rhs1);
2605  if (actual_precision != TYPE_PRECISION (type2)
2606      || from_unsigned2 != TYPE_UNSIGNED (type2))
2607    rhs2 = build_and_insert_cast (gsi, loc,
2608				  build_nonstandard_integer_type
2609				    (actual_precision, from_unsigned2), rhs2);
2610
2611  /* Handle constants.  */
2612  if (TREE_CODE (rhs1) == INTEGER_CST)
2613    rhs1 = fold_convert (type1, rhs1);
2614  if (TREE_CODE (rhs2) == INTEGER_CST)
2615    rhs2 = fold_convert (type2, rhs2);
2616
2617  gimple_assign_set_rhs1 (stmt, rhs1);
2618  gimple_assign_set_rhs2 (stmt, rhs2);
2619  gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2620  update_stmt (stmt);
2621  widen_mul_stats.widen_mults_inserted++;
2622  return true;
2623}
2624
2625/* Process a single gimple statement STMT, which is found at the
2626   iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2627   rhs (given by CODE), and try to convert it into a
2628   WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2629   is true iff we converted the statement.  */
2630
2631static bool
2632convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2633			    enum tree_code code)
2634{
2635  gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2636  gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2637  tree type, type1, type2, optype;
2638  tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2639  enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2640  optab this_optab;
2641  enum tree_code wmult_code;
2642  enum insn_code handler;
2643  scalar_mode to_mode, from_mode, actual_mode;
2644  location_t loc = gimple_location (stmt);
2645  int actual_precision;
2646  bool from_unsigned1, from_unsigned2;
2647
2648  lhs = gimple_assign_lhs (stmt);
2649  type = TREE_TYPE (lhs);
2650  if (TREE_CODE (type) != INTEGER_TYPE
2651      && TREE_CODE (type) != FIXED_POINT_TYPE)
2652    return false;
2653
2654  if (code == MINUS_EXPR)
2655    wmult_code = WIDEN_MULT_MINUS_EXPR;
2656  else
2657    wmult_code = WIDEN_MULT_PLUS_EXPR;
2658
2659  rhs1 = gimple_assign_rhs1 (stmt);
2660  rhs2 = gimple_assign_rhs2 (stmt);
2661
2662  if (TREE_CODE (rhs1) == SSA_NAME)
2663    {
2664      rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2665      if (is_gimple_assign (rhs1_stmt))
2666	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2667    }
2668
2669  if (TREE_CODE (rhs2) == SSA_NAME)
2670    {
2671      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2672      if (is_gimple_assign (rhs2_stmt))
2673	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2674    }
2675
2676  /* Allow for one conversion statement between the multiply
2677     and addition/subtraction statement.  If there are more than
2678     one conversions then we assume they would invalidate this
2679     transformation.  If that's not the case then they should have
2680     been folded before now.  */
2681  if (CONVERT_EXPR_CODE_P (rhs1_code))
2682    {
2683      conv1_stmt = rhs1_stmt;
2684      rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2685      if (TREE_CODE (rhs1) == SSA_NAME)
2686	{
2687	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2688	  if (is_gimple_assign (rhs1_stmt))
2689	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2690	}
2691      else
2692	return false;
2693    }
2694  if (CONVERT_EXPR_CODE_P (rhs2_code))
2695    {
2696      conv2_stmt = rhs2_stmt;
2697      rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2698      if (TREE_CODE (rhs2) == SSA_NAME)
2699	{
2700	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2701	  if (is_gimple_assign (rhs2_stmt))
2702	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2703	}
2704      else
2705	return false;
2706    }
2707
2708  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2709     is_widening_mult_p, but we still need the rhs returns.
2710
2711     It might also appear that it would be sufficient to use the existing
2712     operands of the widening multiply, but that would limit the choice of
2713     multiply-and-accumulate instructions.
2714
2715     If the widened-multiplication result has more than one uses, it is
2716     probably wiser not to do the conversion.  Also restrict this operation
2717     to single basic block to avoid moving the multiply to a different block
2718     with a higher execution frequency.  */
2719  if (code == PLUS_EXPR
2720      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2721    {
2722      if (!has_single_use (rhs1)
2723	  || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2724	  || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2725				  &type2, &mult_rhs2))
2726	return false;
2727      add_rhs = rhs2;
2728      conv_stmt = conv1_stmt;
2729    }
2730  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2731    {
2732      if (!has_single_use (rhs2)
2733	  || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2734	  || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2735				  &type2, &mult_rhs2))
2736	return false;
2737      add_rhs = rhs1;
2738      conv_stmt = conv2_stmt;
2739    }
2740  else
2741    return false;
2742
2743  to_mode = SCALAR_TYPE_MODE (type);
2744  from_mode = SCALAR_TYPE_MODE (type1);
2745  if (to_mode == from_mode)
2746    return false;
2747
2748  from_unsigned1 = TYPE_UNSIGNED (type1);
2749  from_unsigned2 = TYPE_UNSIGNED (type2);
2750  optype = type1;
2751
2752  /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2753  if (from_unsigned1 != from_unsigned2)
2754    {
2755      if (!INTEGRAL_TYPE_P (type))
2756	return false;
2757      /* We can use a signed multiply with unsigned types as long as
2758	 there is a wider mode to use, or it is the smaller of the two
2759	 types that is unsigned.  Note that type1 >= type2, always.  */
2760      if ((from_unsigned1
2761	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2762	  || (from_unsigned2
2763	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2764	{
2765	  if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2766	      || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2767	    return false;
2768	}
2769
2770      from_unsigned1 = from_unsigned2 = false;
2771      optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2772					       false);
2773    }
2774
2775  /* If there was a conversion between the multiply and addition
2776     then we need to make sure it fits a multiply-and-accumulate.
2777     The should be a single mode change which does not change the
2778     value.  */
2779  if (conv_stmt)
2780    {
2781      /* We use the original, unmodified data types for this.  */
2782      tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2783      tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2784      int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2785      bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2786
2787      if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2788	{
2789	  /* Conversion is a truncate.  */
2790	  if (TYPE_PRECISION (to_type) < data_size)
2791	    return false;
2792	}
2793      else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2794	{
2795	  /* Conversion is an extend.  Check it's the right sort.  */
2796	  if (TYPE_UNSIGNED (from_type) != is_unsigned
2797	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2798	    return false;
2799	}
2800      /* else convert is a no-op for our purposes.  */
2801    }
2802
2803  /* Verify that the machine can perform a widening multiply
2804     accumulate in this mode/signedness combination, otherwise
2805     this transformation is likely to pessimize code.  */
2806  this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2807  handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2808						  from_mode, &actual_mode);
2809
2810  if (handler == CODE_FOR_nothing)
2811    return false;
2812
2813  /* Ensure that the inputs to the handler are in the correct precison
2814     for the opcode.  This will be the full mode size.  */
2815  actual_precision = GET_MODE_PRECISION (actual_mode);
2816  if (actual_precision != TYPE_PRECISION (type1)
2817      || from_unsigned1 != TYPE_UNSIGNED (type1))
2818    mult_rhs1 = build_and_insert_cast (gsi, loc,
2819				       build_nonstandard_integer_type
2820				         (actual_precision, from_unsigned1),
2821				       mult_rhs1);
2822  if (actual_precision != TYPE_PRECISION (type2)
2823      || from_unsigned2 != TYPE_UNSIGNED (type2))
2824    mult_rhs2 = build_and_insert_cast (gsi, loc,
2825				       build_nonstandard_integer_type
2826					 (actual_precision, from_unsigned2),
2827				       mult_rhs2);
2828
2829  if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2830    add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2831
2832  /* Handle constants.  */
2833  if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2834    mult_rhs1 = fold_convert (type1, mult_rhs1);
2835  if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2836    mult_rhs2 = fold_convert (type2, mult_rhs2);
2837
2838  gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2839				  add_rhs);
2840  update_stmt (gsi_stmt (*gsi));
2841  widen_mul_stats.maccs_inserted++;
2842  return true;
2843}
2844
2845/* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2846   OP2 and which we know is used in statements that can be, together with the
2847   multiplication, converted to FMAs, perform the transformation.  */
2848
2849static void
2850convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2851{
2852  tree type = TREE_TYPE (mul_result);
2853  gimple *use_stmt;
2854  imm_use_iterator imm_iter;
2855  gcall *fma_stmt;
2856
2857  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2858    {
2859      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2860      tree addop, mulop1 = op1, result = mul_result;
2861      bool negate_p = false;
2862      gimple_seq seq = NULL;
2863
2864      if (is_gimple_debug (use_stmt))
2865	continue;
2866
2867      if (is_gimple_assign (use_stmt)
2868	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
2869	{
2870	  result = gimple_assign_lhs (use_stmt);
2871	  use_operand_p use_p;
2872	  gimple *neguse_stmt;
2873	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2874	  gsi_remove (&gsi, true);
2875	  release_defs (use_stmt);
2876
2877	  use_stmt = neguse_stmt;
2878	  gsi = gsi_for_stmt (use_stmt);
2879	  negate_p = true;
2880	}
2881
2882      tree cond, else_value, ops[3];
2883      tree_code code;
2884      if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
2885					      ops, &else_value))
2886	gcc_unreachable ();
2887      addop = ops[0] == result ? ops[1] : ops[0];
2888
2889      if (code == MINUS_EXPR)
2890	{
2891	  if (ops[0] == result)
2892	    /* a * b - c -> a * b + (-c)  */
2893	    addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
2894	  else
2895	    /* a - b * c -> (-b) * c + a */
2896	    negate_p = !negate_p;
2897	}
2898
2899      if (negate_p)
2900	mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
2901
2902      if (seq)
2903	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2904
2905      if (cond)
2906	fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
2907					       op2, addop, else_value);
2908      else
2909	fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
2910      gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
2911      gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
2912								   use_stmt));
2913      gsi_replace (&gsi, fma_stmt, true);
2914      /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
2915	 regardless of where the negation occurs.  */
2916      gimple *orig_stmt = gsi_stmt (gsi);
2917      if (fold_stmt (&gsi, follow_all_ssa_edges))
2918	{
2919	  if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
2920	    gcc_unreachable ();
2921	  update_stmt (gsi_stmt (gsi));
2922	}
2923
2924      if (dump_file && (dump_flags & TDF_DETAILS))
2925	{
2926	  fprintf (dump_file, "Generated FMA ");
2927	  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
2928	  fprintf (dump_file, "\n");
2929	}
2930
2931      widen_mul_stats.fmas_inserted++;
2932    }
2933}
2934
2935/* Data necessary to perform the actual transformation from a multiplication
2936   and an addition to an FMA after decision is taken it should be done and to
2937   then delete the multiplication statement from the function IL.  */
2938
2939struct fma_transformation_info
2940{
2941  gimple *mul_stmt;
2942  tree mul_result;
2943  tree op1;
2944  tree op2;
2945};
2946
2947/* Structure containing the current state of FMA deferring, i.e. whether we are
2948   deferring, whether to continue deferring, and all data necessary to come
2949   back and perform all deferred transformations.  */
2950
2951class fma_deferring_state
2952{
2953public:
2954  /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
2955     do any deferring.  */
2956
2957  fma_deferring_state (bool perform_deferring)
2958    : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
2959      m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
2960
2961  /* List of FMA candidates for which we the transformation has been determined
2962     possible but we at this point in BB analysis we do not consider them
2963     beneficial.  */
2964  auto_vec<fma_transformation_info, 8> m_candidates;
2965
2966  /* Set of results of multiplication that are part of an already deferred FMA
2967     candidates.  */
2968  hash_set<tree> m_mul_result_set;
2969
2970  /* The PHI that supposedly feeds back result of a FMA to another over loop
2971     boundary.  */
2972  gphi *m_initial_phi;
2973
2974  /* Result of the last produced FMA candidate or NULL if there has not been
2975     one.  */
2976  tree m_last_result;
2977
2978  /* If true, deferring might still be profitable.  If false, transform all
2979     candidates and no longer defer.  */
2980  bool m_deferring_p;
2981};
2982
2983/* Transform all deferred FMA candidates and mark STATE as no longer
2984   deferring.  */
2985
2986static void
2987cancel_fma_deferring (fma_deferring_state *state)
2988{
2989  if (!state->m_deferring_p)
2990    return;
2991
2992  for (unsigned i = 0; i < state->m_candidates.length (); i++)
2993    {
2994      if (dump_file && (dump_flags & TDF_DETAILS))
2995	fprintf (dump_file, "Generating deferred FMA\n");
2996
2997      const fma_transformation_info &fti = state->m_candidates[i];
2998      convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
2999
3000      gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3001      gsi_remove (&gsi, true);
3002      release_defs (fti.mul_stmt);
3003    }
3004  state->m_deferring_p = false;
3005}
3006
3007/* If OP is an SSA name defined by a PHI node, return the PHI statement.
3008   Otherwise return NULL.  */
3009
3010static gphi *
3011result_of_phi (tree op)
3012{
3013  if (TREE_CODE (op) != SSA_NAME)
3014    return NULL;
3015
3016  return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3017}
3018
3019/* After processing statements of a BB and recording STATE, return true if the
3020   initial phi is fed by the last FMA candidate result ore one such result from
3021   previously processed BBs marked in LAST_RESULT_SET.  */
3022
3023static bool
3024last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3025				      hash_set<tree> *last_result_set)
3026{
3027  ssa_op_iter iter;
3028  use_operand_p use;
3029  FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3030    {
3031      tree t = USE_FROM_PTR (use);
3032      if (t == state->m_last_result
3033	  || last_result_set->contains (t))
3034	return true;
3035    }
3036
3037  return false;
3038}
3039
3040/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3041   with uses in additions and subtractions to form fused multiply-add
3042   operations.  Returns true if successful and MUL_STMT should be removed.
3043   If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3044   on MUL_COND, otherwise it is unconditional.
3045
3046   If STATE indicates that we are deferring FMA transformation, that means
3047   that we do not produce FMAs for basic blocks which look like:
3048
3049    <bb 6>
3050    # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3051    _65 = _14 * _16;
3052    accumulator_66 = _65 + accumulator_111;
3053
3054  or its unrolled version, i.e. with several FMA candidates that feed result
3055  of one into the addend of another.  Instead, we add them to a list in STATE
3056  and if we later discover an FMA candidate that is not part of such a chain,
3057  we go back and perform all deferred past candidates.  */
3058
3059static bool
3060convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3061		     fma_deferring_state *state, tree mul_cond = NULL_TREE)
3062{
3063  tree mul_result = gimple_get_lhs (mul_stmt);
3064  tree type = TREE_TYPE (mul_result);
3065  gimple *use_stmt, *neguse_stmt;
3066  use_operand_p use_p;
3067  imm_use_iterator imm_iter;
3068
3069  if (FLOAT_TYPE_P (type)
3070      && flag_fp_contract_mode == FP_CONTRACT_OFF)
3071    return false;
3072
3073  /* We don't want to do bitfield reduction ops.  */
3074  if (INTEGRAL_TYPE_P (type)
3075      && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3076    return false;
3077
3078  /* If the target doesn't support it, don't generate it.  We assume that
3079     if fma isn't available then fms, fnma or fnms are not either.  */
3080  optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3081  if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3082    return false;
3083
3084  /* If the multiplication has zero uses, it is kept around probably because
3085     of -fnon-call-exceptions.  Don't optimize it away in that case,
3086     it is DCE job.  */
3087  if (has_zero_uses (mul_result))
3088    return false;
3089
3090  bool check_defer
3091    = (state->m_deferring_p
3092       && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3093		    param_avoid_fma_max_bits));
3094  bool defer = check_defer;
3095  bool seen_negate_p = false;
3096  /* Make sure that the multiplication statement becomes dead after
3097     the transformation, thus that all uses are transformed to FMAs.
3098     This means we assume that an FMA operation has the same cost
3099     as an addition.  */
3100  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3101    {
3102      tree result = mul_result;
3103      bool negate_p = false;
3104
3105      use_stmt = USE_STMT (use_p);
3106
3107      if (is_gimple_debug (use_stmt))
3108	continue;
3109
3110      /* For now restrict this operations to single basic blocks.  In theory
3111	 we would want to support sinking the multiplication in
3112	 m = a*b;
3113	 if ()
3114	   ma = m + c;
3115	 else
3116	   d = m;
3117	 to form a fma in the then block and sink the multiplication to the
3118	 else block.  */
3119      if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3120	return false;
3121
3122      /* A negate on the multiplication leads to FNMA.  */
3123      if (is_gimple_assign (use_stmt)
3124	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3125	{
3126	  ssa_op_iter iter;
3127	  use_operand_p usep;
3128
3129	  /* If (due to earlier missed optimizations) we have two
3130	     negates of the same value, treat them as equivalent
3131	     to a single negate with multiple uses.  */
3132	  if (seen_negate_p)
3133	    return false;
3134
3135	  result = gimple_assign_lhs (use_stmt);
3136
3137	  /* Make sure the negate statement becomes dead with this
3138	     single transformation.  */
3139	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
3140			       &use_p, &neguse_stmt))
3141	    return false;
3142
3143	  /* Make sure the multiplication isn't also used on that stmt.  */
3144	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3145	    if (USE_FROM_PTR (usep) == mul_result)
3146	      return false;
3147
3148	  /* Re-validate.  */
3149	  use_stmt = neguse_stmt;
3150	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3151	    return false;
3152
3153	  negate_p = seen_negate_p = true;
3154	}
3155
3156      tree cond, else_value, ops[3];
3157      tree_code code;
3158      if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3159					      &else_value))
3160	return false;
3161
3162      switch (code)
3163	{
3164	case MINUS_EXPR:
3165	  if (ops[1] == result)
3166	    negate_p = !negate_p;
3167	  break;
3168	case PLUS_EXPR:
3169	  break;
3170	default:
3171	  /* FMA can only be formed from PLUS and MINUS.  */
3172	  return false;
3173	}
3174
3175      if (mul_cond && cond != mul_cond)
3176	return false;
3177
3178      if (cond)
3179	{
3180	  if (cond == result || else_value == result)
3181	    return false;
3182	  if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3183	    return false;
3184	}
3185
3186      /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3187	 we'll visit later, we might be able to get a more profitable
3188	 match with fnma.
3189	 OTOH, if we don't, a negate / fma pair has likely lower latency
3190	 that a mult / subtract pair.  */
3191      if (code == MINUS_EXPR
3192	  && !negate_p
3193	  && ops[0] == result
3194	  && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3195	  && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3196	  && TREE_CODE (ops[1]) == SSA_NAME
3197	  && has_single_use (ops[1]))
3198	{
3199	  gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3200	  if (is_gimple_assign (stmt2)
3201	      && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3202	    return false;
3203	}
3204
3205      /* We can't handle a * b + a * b.  */
3206      if (ops[0] == ops[1])
3207	return false;
3208      /* If deferring, make sure we are not looking at an instruction that
3209	 wouldn't have existed if we were not.  */
3210      if (state->m_deferring_p
3211	  && (state->m_mul_result_set.contains (ops[0])
3212	      || state->m_mul_result_set.contains (ops[1])))
3213	return false;
3214
3215      if (check_defer)
3216	{
3217	  tree use_lhs = gimple_get_lhs (use_stmt);
3218	  if (state->m_last_result)
3219	    {
3220	      if (ops[1] == state->m_last_result
3221		  || ops[0] == state->m_last_result)
3222		defer = true;
3223	      else
3224		defer = false;
3225	    }
3226	  else
3227	    {
3228	      gcc_checking_assert (!state->m_initial_phi);
3229	      gphi *phi;
3230	      if (ops[0] == result)
3231		phi = result_of_phi (ops[1]);
3232	      else
3233		{
3234		  gcc_assert (ops[1] == result);
3235		  phi = result_of_phi (ops[0]);
3236		}
3237
3238	      if (phi)
3239		{
3240		  state->m_initial_phi = phi;
3241		  defer = true;
3242		}
3243	      else
3244		defer = false;
3245	    }
3246
3247	  state->m_last_result = use_lhs;
3248	  check_defer = false;
3249	}
3250      else
3251	defer = false;
3252
3253      /* While it is possible to validate whether or not the exact form that
3254	 we've recognized is available in the backend, the assumption is that
3255	 if the deferring logic above did not trigger, the transformation is
3256	 never a loss.  For instance, suppose the target only has the plain FMA
3257	 pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3258	 MUL+SUB for FMA+NEG, which is still two operations.  Consider
3259         -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3260	 form the two NEGs are independent and could be run in parallel.  */
3261    }
3262
3263  if (defer)
3264    {
3265      fma_transformation_info fti;
3266      fti.mul_stmt = mul_stmt;
3267      fti.mul_result = mul_result;
3268      fti.op1 = op1;
3269      fti.op2 = op2;
3270      state->m_candidates.safe_push (fti);
3271      state->m_mul_result_set.add (mul_result);
3272
3273      if (dump_file && (dump_flags & TDF_DETAILS))
3274	{
3275	  fprintf (dump_file, "Deferred generating FMA for multiplication ");
3276	  print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3277	  fprintf (dump_file, "\n");
3278	}
3279
3280      return false;
3281    }
3282  else
3283    {
3284      if (state->m_deferring_p)
3285	cancel_fma_deferring (state);
3286      convert_mult_to_fma_1 (mul_result, op1, op2);
3287      return true;
3288    }
3289}
3290
3291
3292/* Helper function of match_uaddsub_overflow.  Return 1
3293   if USE_STMT is unsigned overflow check ovf != 0 for
3294   STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3295   and 0 otherwise.  */
3296
3297static int
3298uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3299{
3300  enum tree_code ccode = ERROR_MARK;
3301  tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3302  if (gimple_code (use_stmt) == GIMPLE_COND)
3303    {
3304      ccode = gimple_cond_code (use_stmt);
3305      crhs1 = gimple_cond_lhs (use_stmt);
3306      crhs2 = gimple_cond_rhs (use_stmt);
3307    }
3308  else if (is_gimple_assign (use_stmt))
3309    {
3310      if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3311	{
3312	  ccode = gimple_assign_rhs_code (use_stmt);
3313	  crhs1 = gimple_assign_rhs1 (use_stmt);
3314	  crhs2 = gimple_assign_rhs2 (use_stmt);
3315	}
3316      else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3317	{
3318	  tree cond = gimple_assign_rhs1 (use_stmt);
3319	  if (COMPARISON_CLASS_P (cond))
3320	    {
3321	      ccode = TREE_CODE (cond);
3322	      crhs1 = TREE_OPERAND (cond, 0);
3323	      crhs2 = TREE_OPERAND (cond, 1);
3324	    }
3325	  else
3326	    return 0;
3327	}
3328      else
3329	return 0;
3330    }
3331  else
3332    return 0;
3333
3334  if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3335    return 0;
3336
3337  enum tree_code code = gimple_assign_rhs_code (stmt);
3338  tree lhs = gimple_assign_lhs (stmt);
3339  tree rhs1 = gimple_assign_rhs1 (stmt);
3340  tree rhs2 = gimple_assign_rhs2 (stmt);
3341
3342  switch (ccode)
3343    {
3344    case GT_EXPR:
3345    case LE_EXPR:
3346      /* r = a - b; r > a or r <= a
3347	 r = a + b; a > r or a <= r or b > r or b <= r.  */
3348      if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3349	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3350	      && crhs2 == lhs))
3351	return ccode == GT_EXPR ? 1 : -1;
3352      break;
3353    case LT_EXPR:
3354    case GE_EXPR:
3355      /* r = a - b; a < r or a >= r
3356	 r = a + b; r < a or r >= a or r < b or r >= b.  */
3357      if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3358	  || (code == PLUS_EXPR && crhs1 == lhs
3359	      && (crhs2 == rhs1 || crhs2 == rhs2)))
3360	return ccode == LT_EXPR ? 1 : -1;
3361      break;
3362    default:
3363      break;
3364    }
3365  return 0;
3366}
3367
3368/* Recognize for unsigned x
3369   x = y - z;
3370   if (x > y)
3371   where there are other uses of x and replace it with
3372   _7 = SUB_OVERFLOW (y, z);
3373   x = REALPART_EXPR <_7>;
3374   _8 = IMAGPART_EXPR <_7>;
3375   if (_8)
3376   and similarly for addition.  */
3377
3378static bool
3379match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3380			enum tree_code code)
3381{
3382  tree lhs = gimple_assign_lhs (stmt);
3383  tree type = TREE_TYPE (lhs);
3384  use_operand_p use_p;
3385  imm_use_iterator iter;
3386  bool use_seen = false;
3387  bool ovf_use_seen = false;
3388  gimple *use_stmt;
3389
3390  gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3391  if (!INTEGRAL_TYPE_P (type)
3392      || !TYPE_UNSIGNED (type)
3393      || has_zero_uses (lhs)
3394      || has_single_use (lhs)
3395      || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3396			TYPE_MODE (type)) == CODE_FOR_nothing)
3397    return false;
3398
3399  FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3400    {
3401      use_stmt = USE_STMT (use_p);
3402      if (is_gimple_debug (use_stmt))
3403	continue;
3404
3405      if (uaddsub_overflow_check_p (stmt, use_stmt))
3406	ovf_use_seen = true;
3407      else
3408	use_seen = true;
3409      if (ovf_use_seen && use_seen)
3410	break;
3411    }
3412
3413  if (!ovf_use_seen || !use_seen)
3414    return false;
3415
3416  tree ctype = build_complex_type (type);
3417  tree rhs1 = gimple_assign_rhs1 (stmt);
3418  tree rhs2 = gimple_assign_rhs2 (stmt);
3419  gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3420					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3421					 2, rhs1, rhs2);
3422  tree ctmp = make_ssa_name (ctype);
3423  gimple_call_set_lhs (g, ctmp);
3424  gsi_insert_before (gsi, g, GSI_SAME_STMT);
3425  gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3426				     build1 (REALPART_EXPR, type, ctmp));
3427  gsi_replace (gsi, g2, true);
3428  tree ovf = make_ssa_name (type);
3429  g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3430			    build1 (IMAGPART_EXPR, type, ctmp));
3431  gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3432
3433  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3434    {
3435      if (is_gimple_debug (use_stmt))
3436	continue;
3437
3438      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3439      if (ovf_use == 0)
3440	continue;
3441      if (gimple_code (use_stmt) == GIMPLE_COND)
3442	{
3443	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
3444	  gimple_cond_set_lhs (cond_stmt, ovf);
3445	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3446	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3447	}
3448      else
3449	{
3450	  gcc_checking_assert (is_gimple_assign (use_stmt));
3451	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3452	    {
3453	      gimple_assign_set_rhs1 (use_stmt, ovf);
3454	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3455	      gimple_assign_set_rhs_code (use_stmt,
3456					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3457	    }
3458	  else
3459	    {
3460	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3461				   == COND_EXPR);
3462	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3463				  boolean_type_node, ovf,
3464				  build_int_cst (type, 0));
3465	      gimple_assign_set_rhs1 (use_stmt, cond);
3466	    }
3467	}
3468      update_stmt (use_stmt);
3469    }
3470  return true;
3471}
3472
3473/* Return true if target has support for divmod.  */
3474
3475static bool
3476target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3477{
3478  /* If target supports hardware divmod insn, use it for divmod.  */
3479  if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3480    return true;
3481
3482  /* Check if libfunc for divmod is available.  */
3483  rtx libfunc = optab_libfunc (divmod_optab, mode);
3484  if (libfunc != NULL_RTX)
3485    {
3486      /* If optab_handler exists for div_optab, perhaps in a wider mode,
3487	 we don't want to use the libfunc even if it exists for given mode.  */
3488      machine_mode div_mode;
3489      FOR_EACH_MODE_FROM (div_mode, mode)
3490	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3491	  return false;
3492
3493      return targetm.expand_divmod_libfunc != NULL;
3494    }
3495
3496  return false;
3497}
3498
3499/* Check if stmt is candidate for divmod transform.  */
3500
3501static bool
3502divmod_candidate_p (gassign *stmt)
3503{
3504  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3505  machine_mode mode = TYPE_MODE (type);
3506  optab divmod_optab, div_optab;
3507
3508  if (TYPE_UNSIGNED (type))
3509    {
3510      divmod_optab = udivmod_optab;
3511      div_optab = udiv_optab;
3512    }
3513  else
3514    {
3515      divmod_optab = sdivmod_optab;
3516      div_optab = sdiv_optab;
3517    }
3518
3519  tree op1 = gimple_assign_rhs1 (stmt);
3520  tree op2 = gimple_assign_rhs2 (stmt);
3521
3522  /* Disable the transform if either is a constant, since division-by-constant
3523     may have specialized expansion.  */
3524  if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3525    return false;
3526
3527  /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3528     expand using the [su]divv optabs.  */
3529  if (TYPE_OVERFLOW_TRAPS (type))
3530    return false;
3531
3532  if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3533    return false;
3534
3535  return true;
3536}
3537
3538/* This function looks for:
3539   t1 = a TRUNC_DIV_EXPR b;
3540   t2 = a TRUNC_MOD_EXPR b;
3541   and transforms it to the following sequence:
3542   complex_tmp = DIVMOD (a, b);
3543   t1 = REALPART_EXPR(a);
3544   t2 = IMAGPART_EXPR(b);
3545   For conditions enabling the transform see divmod_candidate_p().
3546
3547   The pass has three parts:
3548   1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3549      other trunc_div_expr and trunc_mod_expr stmts.
3550   2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3551      to stmts vector.
3552   3) Insert DIVMOD call just before top_stmt and update entries in
3553      stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3554      IMAGPART_EXPR for mod).  */
3555
3556static bool
3557convert_to_divmod (gassign *stmt)
3558{
3559  if (stmt_can_throw_internal (cfun, stmt)
3560      || !divmod_candidate_p (stmt))
3561    return false;
3562
3563  tree op1 = gimple_assign_rhs1 (stmt);
3564  tree op2 = gimple_assign_rhs2 (stmt);
3565
3566  imm_use_iterator use_iter;
3567  gimple *use_stmt;
3568  auto_vec<gimple *> stmts;
3569
3570  gimple *top_stmt = stmt;
3571  basic_block top_bb = gimple_bb (stmt);
3572
3573  /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3574     at-least stmt and possibly other trunc_div/trunc_mod stmts
3575     having same operands as stmt.  */
3576
3577  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3578    {
3579      if (is_gimple_assign (use_stmt)
3580	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3581	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3582	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3583	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3584	{
3585	  if (stmt_can_throw_internal (cfun, use_stmt))
3586	    continue;
3587
3588	  basic_block bb = gimple_bb (use_stmt);
3589
3590	  if (bb == top_bb)
3591	    {
3592	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3593		top_stmt = use_stmt;
3594	    }
3595	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3596	    {
3597	      top_bb = bb;
3598	      top_stmt = use_stmt;
3599	    }
3600	}
3601    }
3602
3603  tree top_op1 = gimple_assign_rhs1 (top_stmt);
3604  tree top_op2 = gimple_assign_rhs2 (top_stmt);
3605
3606  stmts.safe_push (top_stmt);
3607  bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3608
3609  /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3610     to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3611     gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3612     2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3613
3614  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3615    {
3616      if (is_gimple_assign (use_stmt)
3617	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3618	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3619	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3620	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3621	{
3622	  if (use_stmt == top_stmt
3623	      || stmt_can_throw_internal (cfun, use_stmt)
3624	      || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3625	    continue;
3626
3627	  stmts.safe_push (use_stmt);
3628	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3629	    div_seen = true;
3630	}
3631    }
3632
3633  if (!div_seen)
3634    return false;
3635
3636  /* Part 3: Create libcall to internal fn DIVMOD:
3637     divmod_tmp = DIVMOD (op1, op2).  */
3638
3639  gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3640  tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3641				 call_stmt, "divmod_tmp");
3642  gimple_call_set_lhs (call_stmt, res);
3643  /* We rejected throwing statements above.  */
3644  gimple_call_set_nothrow (call_stmt, true);
3645
3646  /* Insert the call before top_stmt.  */
3647  gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3648  gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3649
3650  widen_mul_stats.divmod_calls_inserted++;
3651
3652  /* Update all statements in stmts vector:
3653     lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3654     lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3655
3656  for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3657    {
3658      tree new_rhs;
3659
3660      switch (gimple_assign_rhs_code (use_stmt))
3661	{
3662	  case TRUNC_DIV_EXPR:
3663	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3664	    break;
3665
3666	  case TRUNC_MOD_EXPR:
3667	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3668	    break;
3669
3670	  default:
3671	    gcc_unreachable ();
3672	}
3673
3674      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3675      gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3676      update_stmt (use_stmt);
3677    }
3678
3679  return true;
3680}
3681
3682/* Find integer multiplications where the operands are extended from
3683   smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3684   where appropriate.  */
3685
3686namespace {
3687
3688const pass_data pass_data_optimize_widening_mul =
3689{
3690  GIMPLE_PASS, /* type */
3691  "widening_mul", /* name */
3692  OPTGROUP_NONE, /* optinfo_flags */
3693  TV_TREE_WIDEN_MUL, /* tv_id */
3694  PROP_ssa, /* properties_required */
3695  0, /* properties_provided */
3696  0, /* properties_destroyed */
3697  0, /* todo_flags_start */
3698  TODO_update_ssa, /* todo_flags_finish */
3699};
3700
3701class pass_optimize_widening_mul : public gimple_opt_pass
3702{
3703public:
3704  pass_optimize_widening_mul (gcc::context *ctxt)
3705    : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3706  {}
3707
3708  /* opt_pass methods: */
3709  virtual bool gate (function *)
3710    {
3711      return flag_expensive_optimizations && optimize;
3712    }
3713
3714  virtual unsigned int execute (function *);
3715
3716}; // class pass_optimize_widening_mul
3717
3718/* Walker class to perform the transformation in reverse dominance order. */
3719
3720class math_opts_dom_walker : public dom_walker
3721{
3722public:
3723  /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
3724     if walking modidifes the CFG.  */
3725
3726  math_opts_dom_walker (bool *cfg_changed_p)
3727    : dom_walker (CDI_DOMINATORS), m_last_result_set (),
3728      m_cfg_changed_p (cfg_changed_p) {}
3729
3730  /* The actual actions performed in the walk.  */
3731
3732  virtual void after_dom_children (basic_block);
3733
3734  /* Set of results of chains of multiply and add statement combinations that
3735     were not transformed into FMAs because of active deferring.  */
3736  hash_set<tree> m_last_result_set;
3737
3738  /* Pointer to a flag of the user that needs to be set if CFG has been
3739     modified.  */
3740  bool *m_cfg_changed_p;
3741};
3742
3743void
3744math_opts_dom_walker::after_dom_children (basic_block bb)
3745{
3746  gimple_stmt_iterator gsi;
3747
3748  fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
3749
3750  for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3751    {
3752      gimple *stmt = gsi_stmt (gsi);
3753      enum tree_code code;
3754
3755      if (is_gimple_assign (stmt))
3756	{
3757	  code = gimple_assign_rhs_code (stmt);
3758	  switch (code)
3759	    {
3760	    case MULT_EXPR:
3761	      if (!convert_mult_to_widen (stmt, &gsi)
3762		  && !convert_expand_mult_copysign (stmt, &gsi)
3763		  && convert_mult_to_fma (stmt,
3764					  gimple_assign_rhs1 (stmt),
3765					  gimple_assign_rhs2 (stmt),
3766					  &fma_state))
3767		{
3768		  gsi_remove (&gsi, true);
3769		  release_defs (stmt);
3770		  continue;
3771		}
3772	      break;
3773
3774	    case PLUS_EXPR:
3775	    case MINUS_EXPR:
3776	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
3777		match_uaddsub_overflow (&gsi, stmt, code);
3778	      break;
3779
3780	    case TRUNC_MOD_EXPR:
3781	      convert_to_divmod (as_a<gassign *> (stmt));
3782	      break;
3783
3784	    default:;
3785	    }
3786	}
3787      else if (is_gimple_call (stmt))
3788	{
3789	  switch (gimple_call_combined_fn (stmt))
3790	    {
3791	    CASE_CFN_POW:
3792	      if (gimple_call_lhs (stmt)
3793		  && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3794		  && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3795				 &dconst2)
3796		  && convert_mult_to_fma (stmt,
3797					  gimple_call_arg (stmt, 0),
3798					  gimple_call_arg (stmt, 0),
3799					  &fma_state))
3800		{
3801		  unlink_stmt_vdef (stmt);
3802		  if (gsi_remove (&gsi, true)
3803		      && gimple_purge_dead_eh_edges (bb))
3804		    *m_cfg_changed_p = true;
3805		  release_defs (stmt);
3806		  continue;
3807		}
3808	      break;
3809
3810	    case CFN_COND_MUL:
3811	      if (convert_mult_to_fma (stmt,
3812				       gimple_call_arg (stmt, 1),
3813				       gimple_call_arg (stmt, 2),
3814				       &fma_state,
3815				       gimple_call_arg (stmt, 0)))
3816
3817		{
3818		  gsi_remove (&gsi, true);
3819		  release_defs (stmt);
3820		  continue;
3821		}
3822	      break;
3823
3824	    case CFN_LAST:
3825	      cancel_fma_deferring (&fma_state);
3826	      break;
3827
3828	    default:
3829	      break;
3830	    }
3831	}
3832      gsi_next (&gsi);
3833    }
3834  if (fma_state.m_deferring_p
3835      && fma_state.m_initial_phi)
3836    {
3837      gcc_checking_assert (fma_state.m_last_result);
3838      if (!last_fma_candidate_feeds_initial_phi (&fma_state,
3839						 &m_last_result_set))
3840	cancel_fma_deferring (&fma_state);
3841      else
3842	m_last_result_set.add (fma_state.m_last_result);
3843    }
3844}
3845
3846
3847unsigned int
3848pass_optimize_widening_mul::execute (function *fun)
3849{
3850  bool cfg_changed = false;
3851
3852  memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3853  calculate_dominance_info (CDI_DOMINATORS);
3854  renumber_gimple_stmt_uids (cfun);
3855
3856  math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3857
3858  statistics_counter_event (fun, "widening multiplications inserted",
3859			    widen_mul_stats.widen_mults_inserted);
3860  statistics_counter_event (fun, "widening maccs inserted",
3861			    widen_mul_stats.maccs_inserted);
3862  statistics_counter_event (fun, "fused multiply-adds inserted",
3863			    widen_mul_stats.fmas_inserted);
3864  statistics_counter_event (fun, "divmod calls inserted",
3865			    widen_mul_stats.divmod_calls_inserted);
3866
3867  return cfg_changed ? TODO_cleanup_cfg : 0;
3868}
3869
3870} // anon namespace
3871
3872gimple_opt_pass *
3873make_pass_optimize_widening_mul (gcc::context *ctxt)
3874{
3875  return new pass_optimize_widening_mul (ctxt);
3876}
3877