1/* Global, SSA-based optimizations using mathematical identities.
2   Copyright (C) 2005-2022 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* Currently, the only mini-pass in this file tries to CSE reciprocal
21   operations.  These are common in sequences such as this one:
22
23	modulus = sqrt(x*x + y*y + z*z);
24	x = x / modulus;
25	y = y / modulus;
26	z = z / modulus;
27
28   that can be optimized to
29
30	modulus = sqrt(x*x + y*y + z*z);
31        rmodulus = 1.0 / modulus;
32	x = x * rmodulus;
33	y = y * rmodulus;
34	z = z * rmodulus;
35
36   We do this for loop invariant divisors, and with this pass whenever
37   we notice that a division has the same divisor multiple times.
38
39   Of course, like in PRE, we don't insert a division if a dominator
40   already has one.  However, this cannot be done as an extension of
41   PRE for several reasons.
42
43   First of all, with some experiments it was found out that the
44   transformation is not always useful if there are only two divisions
45   by the same divisor.  This is probably because modern processors
46   can pipeline the divisions; on older, in-order processors it should
47   still be effective to optimize two divisions by the same number.
48   We make this a param, and it shall be called N in the remainder of
49   this comment.
50
51   Second, if trapping math is active, we have less freedom on where
52   to insert divisions: we can only do so in basic blocks that already
53   contain one.  (If divisions don't trap, instead, we can insert
54   divisions elsewhere, which will be in blocks that are common dominators
55   of those that have the division).
56
57   We really don't want to compute the reciprocal unless a division will
58   be found.  To do this, we won't insert the division in a basic block
59   that has less than N divisions *post-dominating* it.
60
61   The algorithm constructs a subset of the dominator tree, holding the
62   blocks containing the divisions and the common dominators to them,
63   and walk it twice.  The first walk is in post-order, and it annotates
64   each block with the number of divisions that post-dominate it: this
65   gives information on where divisions can be inserted profitably.
66   The second walk is in pre-order, and it inserts divisions as explained
67   above, and replaces divisions by multiplications.
68
69   In the best case, the cost of the pass is O(n_statements).  In the
70   worst-case, the cost is due to creating the dominator tree subset,
71   with a cost of O(n_basic_blocks ^ 2); however this can only happen
72   for n_statements / n_basic_blocks statements.  So, the amortized cost
73   of creating the dominator tree subset is O(n_basic_blocks) and the
74   worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76   More practically, the cost will be small because there are few
77   divisions, and they tend to be in the same basic block, so insert_bb
78   is called very few times.
79
80   If we did this using domwalk.cc, an efficient implementation would have
81   to work on all the variables in a single pass, because we could not
82   work on just a subset of the dominator tree, as we do now, and the
83   cost would also be something like O(n_statements * n_basic_blocks).
84   The data structures would be more complex in order to work on all the
85   variables in a single pass.  */
86
87#include "config.h"
88#include "system.h"
89#include "coretypes.h"
90#include "backend.h"
91#include "target.h"
92#include "rtl.h"
93#include "tree.h"
94#include "gimple.h"
95#include "predict.h"
96#include "alloc-pool.h"
97#include "tree-pass.h"
98#include "ssa.h"
99#include "optabs-tree.h"
100#include "gimple-pretty-print.h"
101#include "alias.h"
102#include "fold-const.h"
103#include "gimple-fold.h"
104#include "gimple-iterator.h"
105#include "gimplify.h"
106#include "gimplify-me.h"
107#include "stor-layout.h"
108#include "tree-cfg.h"
109#include "tree-dfa.h"
110#include "tree-ssa.h"
111#include "builtins.h"
112#include "internal-fn.h"
113#include "case-cfn-macros.h"
114#include "optabs-libfuncs.h"
115#include "tree-eh.h"
116#include "targhooks.h"
117#include "domwalk.h"
118#include "tree-ssa-math-opts.h"
119
120/* This structure represents one basic block that either computes a
121   division, or is a common dominator for basic block that compute a
122   division.  */
123struct occurrence {
124  /* The basic block represented by this structure.  */
125  basic_block bb = basic_block();
126
127  /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128     inserted in BB.  */
129  tree recip_def = tree();
130
131  /* If non-NULL, the SSA_NAME holding the definition for a squared
132     reciprocal inserted in BB.  */
133  tree square_recip_def = tree();
134
135  /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136     was inserted in BB.  */
137  gimple *recip_def_stmt = nullptr;
138
139  /* Pointer to a list of "struct occurrence"s for blocks dominated
140     by BB.  */
141  struct occurrence *children = nullptr;
142
143  /* Pointer to the next "struct occurrence"s in the list of blocks
144     sharing a common dominator.  */
145  struct occurrence *next = nullptr;
146
147  /* The number of divisions that are in BB before compute_merit.  The
148     number of divisions that are in BB or post-dominate it after
149     compute_merit.  */
150  int num_divisions = 0;
151
152  /* True if the basic block has a division, false if it is a common
153     dominator for basic blocks that do.  If it is false and trapping
154     math is active, BB is not a candidate for inserting a reciprocal.  */
155  bool bb_has_division = false;
156
157  /* Construct a struct occurrence for basic block BB, and whose
158     children list is headed by CHILDREN.  */
159  occurrence (basic_block bb, struct occurrence *children)
160  : bb (bb), children (children)
161  {
162    bb->aux = this;
163  }
164
165  /* Destroy a struct occurrence and remove it from its basic block.  */
166  ~occurrence ()
167  {
168    bb->aux = nullptr;
169  }
170
171  /* Allocate memory for a struct occurrence from OCC_POOL.  */
172  static void* operator new (size_t);
173
174  /* Return memory for a struct occurrence to OCC_POOL.  */
175  static void operator delete (void*, size_t);
176};
177
178static struct
179{
180  /* Number of 1.0/X ops inserted.  */
181  int rdivs_inserted;
182
183  /* Number of 1.0/FUNC ops inserted.  */
184  int rfuncs_inserted;
185} reciprocal_stats;
186
187static struct
188{
189  /* Number of cexpi calls inserted.  */
190  int inserted;
191
192  /* Number of conversions removed.  */
193  int conv_removed;
194
195} sincos_stats;
196
197static struct
198{
199  /* Number of widening multiplication ops inserted.  */
200  int widen_mults_inserted;
201
202  /* Number of integer multiply-and-accumulate ops inserted.  */
203  int maccs_inserted;
204
205  /* Number of fp fused multiply-add ops inserted.  */
206  int fmas_inserted;
207
208  /* Number of divmod calls inserted.  */
209  int divmod_calls_inserted;
210
211  /* Number of highpart multiplication ops inserted.  */
212  int highpart_mults_inserted;
213} widen_mul_stats;
214
215/* The instance of "struct occurrence" representing the highest
216   interesting block in the dominator tree.  */
217static struct occurrence *occ_head;
218
219/* Allocation pool for getting instances of "struct occurrence".  */
220static object_allocator<occurrence> *occ_pool;
221
222void* occurrence::operator new (size_t n)
223{
224  gcc_assert (n == sizeof(occurrence));
225  return occ_pool->allocate_raw ();
226}
227
228void occurrence::operator delete (void *occ, size_t n)
229{
230  gcc_assert (n == sizeof(occurrence));
231  occ_pool->remove_raw (occ);
232}
233
234/* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
235   list of "struct occurrence"s, one per basic block, having IDOM as
236   their common dominator.
237
238   We try to insert NEW_OCC as deep as possible in the tree, and we also
239   insert any other block that is a common dominator for BB and one
240   block already in the tree.  */
241
242static void
243insert_bb (struct occurrence *new_occ, basic_block idom,
244	   struct occurrence **p_head)
245{
246  struct occurrence *occ, **p_occ;
247
248  for (p_occ = p_head; (occ = *p_occ) != NULL; )
249    {
250      basic_block bb = new_occ->bb, occ_bb = occ->bb;
251      basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252      if (dom == bb)
253	{
254	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
255	     from its list.  */
256	  *p_occ = occ->next;
257	  occ->next = new_occ->children;
258	  new_occ->children = occ;
259
260	  /* Try the next block (it may as well be dominated by BB).  */
261	}
262
263      else if (dom == occ_bb)
264	{
265	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
266	  insert_bb (new_occ, dom, &occ->children);
267	  return;
268	}
269
270      else if (dom != idom)
271	{
272	  gcc_assert (!dom->aux);
273
274	  /* There is a dominator between IDOM and BB, add it and make
275	     two children out of NEW_OCC and OCC.  First, remove OCC from
276	     its list.	*/
277	  *p_occ = occ->next;
278	  new_occ->next = occ;
279	  occ->next = NULL;
280
281	  /* None of the previous blocks has DOM as a dominator: if we tail
282	     recursed, we would reexamine them uselessly. Just switch BB with
283	     DOM, and go on looking for blocks dominated by DOM.  */
284	  new_occ = new occurrence (dom, new_occ);
285	}
286
287      else
288	{
289	  /* Nothing special, go on with the next element.  */
290	  p_occ = &occ->next;
291	}
292    }
293
294  /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
295  new_occ->next = *p_head;
296  *p_head = new_occ;
297}
298
299/* Register that we found a division in BB.
300   IMPORTANCE is a measure of how much weighting to give
301   that division.  Use IMPORTANCE = 2 to register a single
302   division.  If the division is going to be found multiple
303   times use 1 (as it is with squares).  */
304
305static inline void
306register_division_in (basic_block bb, int importance)
307{
308  struct occurrence *occ;
309
310  occ = (struct occurrence *) bb->aux;
311  if (!occ)
312    {
313      occ = new occurrence (bb, NULL);
314      insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
315    }
316
317  occ->bb_has_division = true;
318  occ->num_divisions += importance;
319}
320
321
322/* Compute the number of divisions that postdominate each block in OCC and
323   its children.  */
324
325static void
326compute_merit (struct occurrence *occ)
327{
328  struct occurrence *occ_child;
329  basic_block dom = occ->bb;
330
331  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
332    {
333      basic_block bb;
334      if (occ_child->children)
335        compute_merit (occ_child);
336
337      if (flag_exceptions)
338	bb = single_noncomplex_succ (dom);
339      else
340	bb = dom;
341
342      if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343        occ->num_divisions += occ_child->num_divisions;
344    }
345}
346
347
348/* Return whether USE_STMT is a floating-point division by DEF.  */
349static inline bool
350is_division_by (gimple *use_stmt, tree def)
351{
352  return is_gimple_assign (use_stmt)
353	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354	 && gimple_assign_rhs2 (use_stmt) == def
355	 /* Do not recognize x / x as valid division, as we are getting
356	    confused later by replacing all immediate uses x in such
357	    a stmt.  */
358	 && gimple_assign_rhs1 (use_stmt) != def
359	 && !stmt_can_throw_internal (cfun, use_stmt);
360}
361
362/* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
363static inline bool
364is_mult_by (gimple *use_stmt, tree def, tree a)
365{
366  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367      && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
368    {
369      tree op0 = gimple_assign_rhs1 (use_stmt);
370      tree op1 = gimple_assign_rhs2 (use_stmt);
371
372      return (op0 == def && op1 == a)
373	      || (op0 == a && op1 == def);
374    }
375  return 0;
376}
377
378/* Return whether USE_STMT is DEF * DEF.  */
379static inline bool
380is_square_of (gimple *use_stmt, tree def)
381{
382  return is_mult_by (use_stmt, def, def);
383}
384
385/* Return whether USE_STMT is a floating-point division by
386   DEF * DEF.  */
387static inline bool
388is_division_by_square (gimple *use_stmt, tree def)
389{
390  if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391      && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392      && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393      && !stmt_can_throw_internal (cfun, use_stmt))
394    {
395      tree denominator = gimple_assign_rhs2 (use_stmt);
396      if (TREE_CODE (denominator) == SSA_NAME)
397	return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
398    }
399  return 0;
400}
401
402/* Walk the subset of the dominator tree rooted at OCC, setting the
403   RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404   the given basic block.  The field may be left NULL, of course,
405   if it is not possible or profitable to do the optimization.
406
407   DEF_BSI is an iterator pointing at the statement defining DEF.
408   If RECIP_DEF is set, a dominator already has a computation that can
409   be used.
410
411   If should_insert_square_recip is set, then this also inserts
412   the square of the reciprocal immediately after the definition
413   of the reciprocal.  */
414
415static void
416insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417		    tree def, tree recip_def, tree square_recip_def,
418		    int should_insert_square_recip, int threshold)
419{
420  tree type;
421  gassign *new_stmt, *new_square_stmt;
422  gimple_stmt_iterator gsi;
423  struct occurrence *occ_child;
424
425  if (!recip_def
426      && (occ->bb_has_division || !flag_trapping_math)
427      /* Divide by two as all divisions are counted twice in
428	 the costing loop.  */
429      && occ->num_divisions / 2 >= threshold)
430    {
431      /* Make a variable with the replacement and substitute it.  */
432      type = TREE_TYPE (def);
433      recip_def = create_tmp_reg (type, "reciptmp");
434      new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435				      build_one_cst (type), def);
436
437      if (should_insert_square_recip)
438	{
439	  square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440	  new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441						 recip_def, recip_def);
442	}
443
444      if (occ->bb_has_division)
445	{
446	  /* Case 1: insert before an existing division.  */
447	  gsi = gsi_after_labels (occ->bb);
448	  while (!gsi_end_p (gsi)
449		 && (!is_division_by (gsi_stmt (gsi), def))
450		 && (!is_division_by_square (gsi_stmt (gsi), def)))
451	    gsi_next (&gsi);
452
453	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454	  if (should_insert_square_recip)
455	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
456	}
457      else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
458	{
459	  /* Case 2: insert right after the definition.  Note that this will
460	     never happen if the definition statement can throw, because in
461	     that case the sole successor of the statement's basic block will
462	     dominate all the uses as well.  */
463	  gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464	  if (should_insert_square_recip)
465	    gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
466	}
467      else
468	{
469	  /* Case 3: insert in a basic block not containing defs/uses.  */
470	  gsi = gsi_after_labels (occ->bb);
471	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472	  if (should_insert_square_recip)
473	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
474	}
475
476      reciprocal_stats.rdivs_inserted++;
477
478      occ->recip_def_stmt = new_stmt;
479    }
480
481  occ->recip_def = recip_def;
482  occ->square_recip_def = square_recip_def;
483  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484    insert_reciprocals (def_gsi, occ_child, def, recip_def,
485			square_recip_def, should_insert_square_recip,
486			threshold);
487}
488
489/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490   Take as argument the use for (x * x).  */
491static inline void
492replace_reciprocal_squares (use_operand_p use_p)
493{
494  gimple *use_stmt = USE_STMT (use_p);
495  basic_block bb = gimple_bb (use_stmt);
496  struct occurrence *occ = (struct occurrence *) bb->aux;
497
498  if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499      && occ->recip_def)
500    {
501      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502      gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503      gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504      SET_USE (use_p, occ->square_recip_def);
505      fold_stmt_inplace (&gsi);
506      update_stmt (use_stmt);
507    }
508}
509
510
511/* Replace the division at USE_P with a multiplication by the reciprocal, if
512   possible.  */
513
514static inline void
515replace_reciprocal (use_operand_p use_p)
516{
517  gimple *use_stmt = USE_STMT (use_p);
518  basic_block bb = gimple_bb (use_stmt);
519  struct occurrence *occ = (struct occurrence *) bb->aux;
520
521  if (optimize_bb_for_speed_p (bb)
522      && occ->recip_def && use_stmt != occ->recip_def_stmt)
523    {
524      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525      gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526      SET_USE (use_p, occ->recip_def);
527      fold_stmt_inplace (&gsi);
528      update_stmt (use_stmt);
529    }
530}
531
532
533/* Free OCC and return one more "struct occurrence" to be freed.  */
534
535static struct occurrence *
536free_bb (struct occurrence *occ)
537{
538  struct occurrence *child, *next;
539
540  /* First get the two pointers hanging off OCC.  */
541  next = occ->next;
542  child = occ->children;
543  delete occ;
544
545  /* Now ensure that we don't recurse unless it is necessary.  */
546  if (!child)
547    return next;
548  else
549    {
550      while (next)
551	next = free_bb (next);
552
553      return child;
554    }
555}
556
557/* Transform sequences like
558   t = sqrt (a)
559   x = 1.0 / t;
560   r1 = x * x;
561   r2 = a * x;
562   into:
563   t = sqrt (a)
564   r1 = 1.0 / a;
565   r2 = t;
566   x = r1 * r2;
567   depending on the uses of x, r1, r2.  This removes one multiplication and
568   allows the sqrt and division operations to execute in parallel.
569   DEF_GSI is the gsi of the initial division by sqrt that defines
570   DEF (x in the example above).  */
571
572static void
573optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
574{
575  gimple *use_stmt;
576  imm_use_iterator use_iter;
577  gimple *stmt = gsi_stmt (*def_gsi);
578  tree x = def;
579  tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580  tree div_rhs1 = gimple_assign_rhs1 (stmt);
581
582  if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583      || TREE_CODE (div_rhs1) != REAL_CST
584      || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585    return;
586
587  gcall *sqrt_stmt
588    = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
589
590  if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591    return;
592
593  switch (gimple_call_combined_fn (sqrt_stmt))
594    {
595    CASE_CFN_SQRT:
596    CASE_CFN_SQRT_FN:
597      break;
598
599    default:
600      return;
601    }
602  tree a = gimple_call_arg (sqrt_stmt, 0);
603
604  /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
605
606  /* Statements that use x in x * x.  */
607  auto_vec<gimple *> sqr_stmts;
608  /* Statements that use x in a * x.  */
609  auto_vec<gimple *> mult_stmts;
610  bool has_other_use = false;
611  bool mult_on_main_path = false;
612
613  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
614    {
615      if (is_gimple_debug (use_stmt))
616	continue;
617      if (is_square_of (use_stmt, x))
618	{
619	  sqr_stmts.safe_push (use_stmt);
620	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
621	    mult_on_main_path = true;
622	}
623      else if (is_mult_by (use_stmt, x, a))
624	{
625	  mult_stmts.safe_push (use_stmt);
626	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
627	    mult_on_main_path = true;
628	}
629      else
630	has_other_use = true;
631    }
632
633  /* In the x * x and a * x cases we just rewire stmt operands or
634     remove multiplications.  In the has_other_use case we introduce
635     a multiplication so make sure we don't introduce a multiplication
636     on a path where there was none.  */
637  if (has_other_use && !mult_on_main_path)
638    return;
639
640  if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641    return;
642
643  /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644     to be able to compose it from the sqr and mult cases.  */
645  if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646    return;
647
648  if (dump_file)
649    {
650      fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651      print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652      print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653      fprintf (dump_file, "\n");
654    }
655
656  bool delete_div = !has_other_use;
657  tree sqr_ssa_name = NULL_TREE;
658  if (!sqr_stmts.is_empty ())
659    {
660      /* r1 = x * x.  Transform the original
661	 x = 1.0 / t
662	 into
663	 tmp1 = 1.0 / a
664	 r1 = tmp1.  */
665
666      sqr_ssa_name
667	= make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
668
669      if (dump_file)
670	{
671	  fprintf (dump_file, "Replacing original division\n");
672	  print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673	  fprintf (dump_file, "with new division\n");
674	}
675      stmt
676	= gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677			       gimple_assign_rhs1 (stmt), a);
678      gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679      gsi_remove (def_gsi, true);
680      *def_gsi = gsi_for_stmt (stmt);
681      fold_stmt_inplace (def_gsi);
682      update_stmt (stmt);
683
684      if (dump_file)
685	print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
686
687      delete_div = false;
688      gimple *sqr_stmt;
689      unsigned int i;
690      FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
691	{
692	  gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693	  gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694	  update_stmt (sqr_stmt);
695	}
696    }
697  if (!mult_stmts.is_empty ())
698    {
699      /* r2 = a * x.  Transform this into:
700	 r2 = t (The original sqrt (a)).  */
701      unsigned int i;
702      gimple *mult_stmt = NULL;
703      FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
704	{
705	  gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
706
707	  if (dump_file)
708	    {
709	      fprintf (dump_file, "Replacing squaring multiplication\n");
710	      print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711	      fprintf (dump_file, "with assignment\n");
712	    }
713	  gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714	  fold_stmt_inplace (&gsi2);
715	  update_stmt (mult_stmt);
716	  if (dump_file)
717	    print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
718      }
719    }
720
721  if (has_other_use)
722    {
723      /* Using the two temporaries tmp1, tmp2 from above
724	 the original x is now:
725	 x = tmp1 * tmp2.  */
726      gcc_assert (orig_sqrt_ssa_name);
727      gcc_assert (sqr_ssa_name);
728
729      gimple *new_stmt
730	= gimple_build_assign (x, MULT_EXPR,
731			       orig_sqrt_ssa_name, sqr_ssa_name);
732      gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733      update_stmt (stmt);
734    }
735  else if (delete_div)
736    {
737      /* Remove the original division.  */
738      gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739      gsi_remove (&gsi2, true);
740      release_defs (stmt);
741    }
742  else
743    release_ssa_name (x);
744}
745
746/* Look for floating-point divisions among DEF's uses, and try to
747   replace them by multiplications with the reciprocal.  Add
748   as many statements computing the reciprocal as needed.
749
750   DEF must be a GIMPLE register of a floating-point type.  */
751
752static void
753execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
754{
755  use_operand_p use_p, square_use_p;
756  imm_use_iterator use_iter, square_use_iter;
757  tree square_def;
758  struct occurrence *occ;
759  int count = 0;
760  int threshold;
761  int square_recip_count = 0;
762  int sqrt_recip_count = 0;
763
764  gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765  threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
766
767  /* If DEF is a square (x * x), count the number of divisions by x.
768     If there are more divisions by x than by (DEF * DEF), prefer to optimize
769     the reciprocal of x instead of DEF.  This improves cases like:
770       def = x * x
771       t0 = a / def
772       t1 = b / def
773       t2 = c / x
774     Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
775  gimple *def_stmt = SSA_NAME_DEF_STMT (def);
776
777  if (is_gimple_assign (def_stmt)
778      && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780      && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
781    {
782      tree op0 = gimple_assign_rhs1 (def_stmt);
783
784      FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
785	{
786	  gimple *use_stmt = USE_STMT (use_p);
787	  if (is_division_by (use_stmt, op0))
788	    sqrt_recip_count++;
789	}
790    }
791
792  FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
793    {
794      gimple *use_stmt = USE_STMT (use_p);
795      if (is_division_by (use_stmt, def))
796	{
797	  register_division_in (gimple_bb (use_stmt), 2);
798	  count++;
799	}
800
801      if (is_square_of (use_stmt, def))
802	{
803	  square_def = gimple_assign_lhs (use_stmt);
804	  FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
805	    {
806	      gimple *square_use_stmt = USE_STMT (square_use_p);
807	      if (is_division_by (square_use_stmt, square_def))
808		{
809		  /* This is executed twice for each division by a square.  */
810		  register_division_in (gimple_bb (square_use_stmt), 1);
811		  square_recip_count++;
812		}
813	    }
814	}
815    }
816
817  /* Square reciprocals were counted twice above.  */
818  square_recip_count /= 2;
819
820  /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
821  if (sqrt_recip_count > square_recip_count)
822    goto out;
823
824  /* Do the expensive part only if we can hope to optimize something.  */
825  if (count + square_recip_count >= threshold && count >= 1)
826    {
827      gimple *use_stmt;
828      for (occ = occ_head; occ; occ = occ->next)
829	{
830	  compute_merit (occ);
831	  insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832			      square_recip_count, threshold);
833	}
834
835      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
836	{
837	  if (is_division_by (use_stmt, def))
838	    {
839	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840		replace_reciprocal (use_p);
841	    }
842	  else if (square_recip_count > 0 && is_square_of (use_stmt, def))
843	    {
844	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
845		{
846		  /* Find all uses of the square that are divisions and
847		   * replace them by multiplications with the inverse.  */
848		  imm_use_iterator square_iterator;
849		  gimple *powmult_use_stmt = USE_STMT (use_p);
850		  tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
851
852		  FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853					 square_iterator, powmult_def_name)
854		    FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
855		      {
856			gimple *powmult_use_stmt = USE_STMT (square_use_p);
857			if (is_division_by (powmult_use_stmt, powmult_def_name))
858			  replace_reciprocal_squares (square_use_p);
859		      }
860		}
861	    }
862	}
863    }
864
865out:
866  for (occ = occ_head; occ; )
867    occ = free_bb (occ);
868
869  occ_head = NULL;
870}
871
872/* Return an internal function that implements the reciprocal of CALL,
873   or IFN_LAST if there is no such function that the target supports.  */
874
875internal_fn
876internal_fn_reciprocal (gcall *call)
877{
878  internal_fn ifn;
879
880  switch (gimple_call_combined_fn (call))
881    {
882    CASE_CFN_SQRT:
883    CASE_CFN_SQRT_FN:
884      ifn = IFN_RSQRT;
885      break;
886
887    default:
888      return IFN_LAST;
889    }
890
891  tree_pair types = direct_internal_fn_types (ifn, call);
892  if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893    return IFN_LAST;
894
895  return ifn;
896}
897
898/* Go through all the floating-point SSA_NAMEs, and call
899   execute_cse_reciprocals_1 on each of them.  */
900namespace {
901
902const pass_data pass_data_cse_reciprocals =
903{
904  GIMPLE_PASS, /* type */
905  "recip", /* name */
906  OPTGROUP_NONE, /* optinfo_flags */
907  TV_TREE_RECIP, /* tv_id */
908  PROP_ssa, /* properties_required */
909  0, /* properties_provided */
910  0, /* properties_destroyed */
911  0, /* todo_flags_start */
912  TODO_update_ssa, /* todo_flags_finish */
913};
914
915class pass_cse_reciprocals : public gimple_opt_pass
916{
917public:
918  pass_cse_reciprocals (gcc::context *ctxt)
919    : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
920  {}
921
922  /* opt_pass methods: */
923  virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
924  virtual unsigned int execute (function *);
925
926}; // class pass_cse_reciprocals
927
928unsigned int
929pass_cse_reciprocals::execute (function *fun)
930{
931  basic_block bb;
932  tree arg;
933
934  occ_pool = new object_allocator<occurrence> ("dominators for recip");
935
936  memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
937  calculate_dominance_info (CDI_DOMINATORS);
938  calculate_dominance_info (CDI_POST_DOMINATORS);
939
940  if (flag_checking)
941    FOR_EACH_BB_FN (bb, fun)
942      gcc_assert (!bb->aux);
943
944  for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
945    if (FLOAT_TYPE_P (TREE_TYPE (arg))
946	&& is_gimple_reg (arg))
947      {
948	tree name = ssa_default_def (fun, arg);
949	if (name)
950	  execute_cse_reciprocals_1 (NULL, name);
951      }
952
953  FOR_EACH_BB_FN (bb, fun)
954    {
955      tree def;
956
957      for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
958	   gsi_next (&gsi))
959	{
960	  gphi *phi = gsi.phi ();
961	  def = PHI_RESULT (phi);
962	  if (! virtual_operand_p (def)
963	      && FLOAT_TYPE_P (TREE_TYPE (def)))
964	    execute_cse_reciprocals_1 (NULL, def);
965	}
966
967      for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
968	   gsi_next (&gsi))
969        {
970	  gimple *stmt = gsi_stmt (gsi);
971
972	  if (gimple_has_lhs (stmt)
973	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
974	      && FLOAT_TYPE_P (TREE_TYPE (def))
975	      && TREE_CODE (def) == SSA_NAME)
976	    {
977	      execute_cse_reciprocals_1 (&gsi, def);
978	      stmt = gsi_stmt (gsi);
979	      if (flag_unsafe_math_optimizations
980		  && is_gimple_assign (stmt)
981		  && gimple_assign_lhs (stmt) == def
982		  && !stmt_can_throw_internal (cfun, stmt)
983		  && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
984		optimize_recip_sqrt (&gsi, def);
985	    }
986	}
987
988      if (optimize_bb_for_size_p (bb))
989        continue;
990
991      /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
992      for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
993	   gsi_next (&gsi))
994        {
995	  gimple *stmt = gsi_stmt (gsi);
996
997	  if (is_gimple_assign (stmt)
998	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
999	    {
1000	      tree arg1 = gimple_assign_rhs2 (stmt);
1001	      gimple *stmt1;
1002
1003	      if (TREE_CODE (arg1) != SSA_NAME)
1004		continue;
1005
1006	      stmt1 = SSA_NAME_DEF_STMT (arg1);
1007
1008	      if (is_gimple_call (stmt1)
1009		  && gimple_call_lhs (stmt1))
1010		{
1011		  bool fail;
1012		  imm_use_iterator ui;
1013		  use_operand_p use_p;
1014		  tree fndecl = NULL_TREE;
1015
1016		  gcall *call = as_a <gcall *> (stmt1);
1017		  internal_fn ifn = internal_fn_reciprocal (call);
1018		  if (ifn == IFN_LAST)
1019		    {
1020		      fndecl = gimple_call_fndecl (call);
1021		      if (!fndecl
1022			  || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023			continue;
1024		      fndecl = targetm.builtin_reciprocal (fndecl);
1025		      if (!fndecl)
1026			continue;
1027		    }
1028
1029		  /* Check that all uses of the SSA name are divisions,
1030		     otherwise replacing the defining statement will do
1031		     the wrong thing.  */
1032		  fail = false;
1033		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1034		    {
1035		      gimple *stmt2 = USE_STMT (use_p);
1036		      if (is_gimple_debug (stmt2))
1037			continue;
1038		      if (!is_gimple_assign (stmt2)
1039			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040			  || gimple_assign_rhs1 (stmt2) == arg1
1041			  || gimple_assign_rhs2 (stmt2) != arg1)
1042			{
1043			  fail = true;
1044			  break;
1045			}
1046		    }
1047		  if (fail)
1048		    continue;
1049
1050		  gimple_replace_ssa_lhs (call, arg1);
1051		  if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1052		    {
1053		      auto_vec<tree, 4> args;
1054		      for (unsigned int i = 0;
1055			   i < gimple_call_num_args (call); i++)
1056			args.safe_push (gimple_call_arg (call, i));
1057		      gcall *stmt2;
1058		      if (ifn == IFN_LAST)
1059			stmt2 = gimple_build_call_vec (fndecl, args);
1060		      else
1061			stmt2 = gimple_build_call_internal_vec (ifn, args);
1062		      gimple_call_set_lhs (stmt2, arg1);
1063		      gimple_move_vops (stmt2, call);
1064		      gimple_call_set_nothrow (stmt2,
1065					       gimple_call_nothrow_p (call));
1066		      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067		      gsi_replace (&gsi2, stmt2, true);
1068		    }
1069		  else
1070		    {
1071		      if (ifn == IFN_LAST)
1072			gimple_call_set_fndecl (call, fndecl);
1073		      else
1074			gimple_call_set_internal_fn (call, ifn);
1075		      update_stmt (call);
1076		    }
1077		  reciprocal_stats.rfuncs_inserted++;
1078
1079		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1080		    {
1081		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083		      fold_stmt_inplace (&gsi);
1084		      update_stmt (stmt);
1085		    }
1086		}
1087	    }
1088	}
1089    }
1090
1091  statistics_counter_event (fun, "reciprocal divs inserted",
1092			    reciprocal_stats.rdivs_inserted);
1093  statistics_counter_event (fun, "reciprocal functions inserted",
1094			    reciprocal_stats.rfuncs_inserted);
1095
1096  free_dominance_info (CDI_DOMINATORS);
1097  free_dominance_info (CDI_POST_DOMINATORS);
1098  delete occ_pool;
1099  return 0;
1100}
1101
1102} // anon namespace
1103
1104gimple_opt_pass *
1105make_pass_cse_reciprocals (gcc::context *ctxt)
1106{
1107  return new pass_cse_reciprocals (ctxt);
1108}
1109
1110/* If NAME is the result of a type conversion, look for other
1111   equivalent dominating or dominated conversions, and replace all
1112   uses with the earliest dominating name, removing the redundant
1113   conversions.  Return the prevailing name.  */
1114
1115static tree
1116execute_cse_conv_1 (tree name, bool *cfg_changed)
1117{
1118  if (SSA_NAME_IS_DEFAULT_DEF (name)
1119      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120    return name;
1121
1122  gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1123
1124  if (!gimple_assign_cast_p (def_stmt))
1125    return name;
1126
1127  tree src = gimple_assign_rhs1 (def_stmt);
1128
1129  if (TREE_CODE (src) != SSA_NAME)
1130    return name;
1131
1132  imm_use_iterator use_iter;
1133  gimple *use_stmt;
1134
1135  /* Find the earliest dominating def.    */
1136  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1137    {
1138      if (use_stmt == def_stmt
1139	  || !gimple_assign_cast_p (use_stmt))
1140	continue;
1141
1142      tree lhs = gimple_assign_lhs (use_stmt);
1143
1144      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145	  || (gimple_assign_rhs1 (use_stmt)
1146	      != gimple_assign_rhs1 (def_stmt))
1147	  || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148	continue;
1149
1150      bool use_dominates;
1151      if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1152	{
1153	  gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154	  while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155	    gsi_next (&gsi);
1156	  use_dominates = !gsi_end_p (gsi);
1157	}
1158      else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159			       gimple_bb (def_stmt)))
1160	use_dominates = false;
1161      else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162			       gimple_bb (use_stmt)))
1163	use_dominates = true;
1164      else
1165	continue;
1166
1167      if (use_dominates)
1168	{
1169	  std::swap (name, lhs);
1170	  std::swap (def_stmt, use_stmt);
1171	}
1172    }
1173
1174  /* Now go through all uses of SRC again, replacing the equivalent
1175     dominated conversions.  We may replace defs that were not
1176     dominated by the then-prevailing defs when we first visited
1177     them.  */
1178  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1179    {
1180      if (use_stmt == def_stmt
1181	  || !gimple_assign_cast_p (use_stmt))
1182	continue;
1183
1184      tree lhs = gimple_assign_lhs (use_stmt);
1185
1186      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187	  || (gimple_assign_rhs1 (use_stmt)
1188	      != gimple_assign_rhs1 (def_stmt))
1189	  || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190	continue;
1191
1192      basic_block use_bb = gimple_bb (use_stmt);
1193      if (gimple_bb (def_stmt) == use_bb
1194	  || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1195	{
1196	  sincos_stats.conv_removed++;
1197
1198	  gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199	  replace_uses_by (lhs, name);
1200	  if (gsi_remove (&gsi, true)
1201	      && gimple_purge_dead_eh_edges (use_bb))
1202	    *cfg_changed = true;
1203	  release_defs (use_stmt);
1204	}
1205    }
1206
1207  return name;
1208}
1209
1210/* Records an occurrence at statement USE_STMT in the vector of trees
1211   STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1212   is not yet initialized.  Returns true if the occurrence was pushed on
1213   the vector.  Adjusts *TOP_BB to be the basic block dominating all
1214   statements in the vector.  */
1215
1216static bool
1217maybe_record_sincos (vec<gimple *> *stmts,
1218		     basic_block *top_bb, gimple *use_stmt)
1219{
1220  basic_block use_bb = gimple_bb (use_stmt);
1221  if (*top_bb
1222      && (*top_bb == use_bb
1223	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1224    stmts->safe_push (use_stmt);
1225  else if (!*top_bb
1226	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1227    {
1228      stmts->safe_push (use_stmt);
1229      *top_bb = use_bb;
1230    }
1231  else
1232    return false;
1233
1234  return true;
1235}
1236
1237/* Look for sin, cos and cexpi calls with the same argument NAME and
1238   create a single call to cexpi CSEing the result in this case.
1239   We first walk over all immediate uses of the argument collecting
1240   statements that we can CSE in a vector and in a second pass replace
1241   the statement rhs with a REALPART or IMAGPART expression on the
1242   result of the cexpi call we insert before the use statement that
1243   dominates all other candidates.  */
1244
1245static bool
1246execute_cse_sincos_1 (tree name)
1247{
1248  gimple_stmt_iterator gsi;
1249  imm_use_iterator use_iter;
1250  tree fndecl, res, type = NULL_TREE;
1251  gimple *def_stmt, *use_stmt, *stmt;
1252  int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1253  auto_vec<gimple *> stmts;
1254  basic_block top_bb = NULL;
1255  int i;
1256  bool cfg_changed = false;
1257
1258  name = execute_cse_conv_1 (name, &cfg_changed);
1259
1260  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1261    {
1262      if (gimple_code (use_stmt) != GIMPLE_CALL
1263	  || !gimple_call_lhs (use_stmt))
1264	continue;
1265
1266      switch (gimple_call_combined_fn (use_stmt))
1267	{
1268	CASE_CFN_COS:
1269	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1270	  break;
1271
1272	CASE_CFN_SIN:
1273	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274	  break;
1275
1276	CASE_CFN_CEXPI:
1277	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278	  break;
1279
1280	default:;
1281	  continue;
1282	}
1283
1284      tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1285      if (!type)
1286	{
1287	  type = t;
1288	  t = TREE_TYPE (name);
1289	}
1290      /* This checks that NAME has the right type in the first round,
1291	 and, in subsequent rounds, that the built_in type is the same
1292	 type, or a compatible type.  */
1293      if (type != t && !types_compatible_p (type, t))
1294	return false;
1295    }
1296  if (seen_cos + seen_sin + seen_cexpi <= 1)
1297    return false;
1298
1299  /* Simply insert cexpi at the beginning of top_bb but not earlier than
1300     the name def statement.  */
1301  fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1302  if (!fndecl)
1303    return false;
1304  stmt = gimple_build_call (fndecl, 1, name);
1305  res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1306  gimple_call_set_lhs (stmt, res);
1307
1308  def_stmt = SSA_NAME_DEF_STMT (name);
1309  if (!SSA_NAME_IS_DEFAULT_DEF (name)
1310      && gimple_code (def_stmt) != GIMPLE_PHI
1311      && gimple_bb (def_stmt) == top_bb)
1312    {
1313      gsi = gsi_for_stmt (def_stmt);
1314      gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1315    }
1316  else
1317    {
1318      gsi = gsi_after_labels (top_bb);
1319      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1320    }
1321  sincos_stats.inserted++;
1322
1323  /* And adjust the recorded old call sites.  */
1324  for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1325    {
1326      tree rhs = NULL;
1327
1328      switch (gimple_call_combined_fn (use_stmt))
1329	{
1330	CASE_CFN_COS:
1331	  rhs = fold_build1 (REALPART_EXPR, type, res);
1332	  break;
1333
1334	CASE_CFN_SIN:
1335	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
1336	  break;
1337
1338	CASE_CFN_CEXPI:
1339	  rhs = res;
1340	  break;
1341
1342	default:;
1343	  gcc_unreachable ();
1344	}
1345
1346	/* Replace call with a copy.  */
1347	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1348
1349	gsi = gsi_for_stmt (use_stmt);
1350	gsi_replace (&gsi, stmt, true);
1351	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1352	  cfg_changed = true;
1353    }
1354
1355  return cfg_changed;
1356}
1357
1358/* To evaluate powi(x,n), the floating point value x raised to the
1359   constant integer exponent n, we use a hybrid algorithm that
1360   combines the "window method" with look-up tables.  For an
1361   introduction to exponentiation algorithms and "addition chains",
1362   see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1363   "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1364   3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1365   Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1366
1367/* Provide a default value for POWI_MAX_MULTS, the maximum number of
1368   multiplications to inline before calling the system library's pow
1369   function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1370   so this default never requires calling pow, powf or powl.  */
1371
1372#ifndef POWI_MAX_MULTS
1373#define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1374#endif
1375
1376/* The size of the "optimal power tree" lookup table.  All
1377   exponents less than this value are simply looked up in the
1378   powi_table below.  This threshold is also used to size the
1379   cache of pseudo registers that hold intermediate results.  */
1380#define POWI_TABLE_SIZE 256
1381
1382/* The size, in bits of the window, used in the "window method"
1383   exponentiation algorithm.  This is equivalent to a radix of
1384   (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1385#define POWI_WINDOW_SIZE 3
1386
1387/* The following table is an efficient representation of an
1388   "optimal power tree".  For each value, i, the corresponding
1389   value, j, in the table states than an optimal evaluation
1390   sequence for calculating pow(x,i) can be found by evaluating
1391   pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1392   100 integers is given in Knuth's "Seminumerical algorithms".  */
1393
1394static const unsigned char powi_table[POWI_TABLE_SIZE] =
1395  {
1396      0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1397      4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1398      8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1399     12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1400     16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1401     20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1402     24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1403     28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1404     32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1405     36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1406     40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1407     44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1408     48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1409     52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1410     56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1411     60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1412     64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1413     68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1414     72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1415     76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1416     80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1417     84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1418     88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1419     92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1420     96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1421    100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1422    104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1423    108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1424    112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1425    116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1426    120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1427    124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1428  };
1429
1430
1431/* Return the number of multiplications required to calculate
1432   powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1433   subroutine of powi_cost.  CACHE is an array indicating
1434   which exponents have already been calculated.  */
1435
1436static int
1437powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1438{
1439  /* If we've already calculated this exponent, then this evaluation
1440     doesn't require any additional multiplications.  */
1441  if (cache[n])
1442    return 0;
1443
1444  cache[n] = true;
1445  return powi_lookup_cost (n - powi_table[n], cache)
1446	 + powi_lookup_cost (powi_table[n], cache) + 1;
1447}
1448
1449/* Return the number of multiplications required to calculate
1450   powi(x,n) for an arbitrary x, given the exponent N.  This
1451   function needs to be kept in sync with powi_as_mults below.  */
1452
1453static int
1454powi_cost (HOST_WIDE_INT n)
1455{
1456  bool cache[POWI_TABLE_SIZE];
1457  unsigned HOST_WIDE_INT digit;
1458  unsigned HOST_WIDE_INT val;
1459  int result;
1460
1461  if (n == 0)
1462    return 0;
1463
1464  /* Ignore the reciprocal when calculating the cost.  */
1465  val = absu_hwi (n);
1466
1467  /* Initialize the exponent cache.  */
1468  memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1469  cache[1] = true;
1470
1471  result = 0;
1472
1473  while (val >= POWI_TABLE_SIZE)
1474    {
1475      if (val & 1)
1476	{
1477	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1478	  result += powi_lookup_cost (digit, cache)
1479		    + POWI_WINDOW_SIZE + 1;
1480	  val >>= POWI_WINDOW_SIZE;
1481	}
1482      else
1483	{
1484	  val >>= 1;
1485	  result++;
1486	}
1487    }
1488
1489  return result + powi_lookup_cost (val, cache);
1490}
1491
1492/* Recursive subroutine of powi_as_mults.  This function takes the
1493   array, CACHE, of already calculated exponents and an exponent N and
1494   returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1495
1496static tree
1497powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1498		 unsigned HOST_WIDE_INT n, tree *cache)
1499{
1500  tree op0, op1, ssa_target;
1501  unsigned HOST_WIDE_INT digit;
1502  gassign *mult_stmt;
1503
1504  if (n < POWI_TABLE_SIZE && cache[n])
1505    return cache[n];
1506
1507  ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1508
1509  if (n < POWI_TABLE_SIZE)
1510    {
1511      cache[n] = ssa_target;
1512      op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1513      op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1514    }
1515  else if (n & 1)
1516    {
1517      digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1518      op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1519      op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1520    }
1521  else
1522    {
1523      op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1524      op1 = op0;
1525    }
1526
1527  mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1528  gimple_set_location (mult_stmt, loc);
1529  gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1530
1531  return ssa_target;
1532}
1533
1534/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1535   This function needs to be kept in sync with powi_cost above.  */
1536
1537tree
1538powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1539	       tree arg0, HOST_WIDE_INT n)
1540{
1541  tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1542  gassign *div_stmt;
1543  tree target;
1544
1545  if (n == 0)
1546    return build_one_cst (type);
1547
1548  memset (cache, 0, sizeof (cache));
1549  cache[1] = arg0;
1550
1551  result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1552  if (n >= 0)
1553    return result;
1554
1555  /* If the original exponent was negative, reciprocate the result.  */
1556  target = make_temp_ssa_name (type, NULL, "powmult");
1557  div_stmt = gimple_build_assign (target, RDIV_EXPR,
1558				  build_real (type, dconst1), result);
1559  gimple_set_location (div_stmt, loc);
1560  gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1561
1562  return target;
1563}
1564
1565/* ARG0 and N are the two arguments to a powi builtin in GSI with
1566   location info LOC.  If the arguments are appropriate, create an
1567   equivalent sequence of statements prior to GSI using an optimal
1568   number of multiplications, and return an expession holding the
1569   result.  */
1570
1571static tree
1572gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1573			    tree arg0, HOST_WIDE_INT n)
1574{
1575  if ((n >= -1 && n <= 2)
1576      || (optimize_function_for_speed_p (cfun)
1577	  && powi_cost (n) <= POWI_MAX_MULTS))
1578    return powi_as_mults (gsi, loc, arg0, n);
1579
1580  return NULL_TREE;
1581}
1582
1583/* Build a gimple call statement that calls FN with argument ARG.
1584   Set the lhs of the call statement to a fresh SSA name.  Insert the
1585   statement prior to GSI's current position, and return the fresh
1586   SSA name.  */
1587
1588static tree
1589build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1590		       tree fn, tree arg)
1591{
1592  gcall *call_stmt;
1593  tree ssa_target;
1594
1595  call_stmt = gimple_build_call (fn, 1, arg);
1596  ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1597  gimple_set_lhs (call_stmt, ssa_target);
1598  gimple_set_location (call_stmt, loc);
1599  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1600
1601  return ssa_target;
1602}
1603
1604/* Build a gimple binary operation with the given CODE and arguments
1605   ARG0, ARG1, assigning the result to a new SSA name for variable
1606   TARGET.  Insert the statement prior to GSI's current position, and
1607   return the fresh SSA name.*/
1608
1609static tree
1610build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1611			const char *name, enum tree_code code,
1612			tree arg0, tree arg1)
1613{
1614  tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1615  gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1616  gimple_set_location (stmt, loc);
1617  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1618  return result;
1619}
1620
1621/* Build a gimple reference operation with the given CODE and argument
1622   ARG, assigning the result to a new SSA name of TYPE with NAME.
1623   Insert the statement prior to GSI's current position, and return
1624   the fresh SSA name.  */
1625
1626static inline tree
1627build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1628		      const char *name, enum tree_code code, tree arg0)
1629{
1630  tree result = make_temp_ssa_name (type, NULL, name);
1631  gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1632  gimple_set_location (stmt, loc);
1633  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1634  return result;
1635}
1636
1637/* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1638   prior to GSI's current position, and return the fresh SSA name.  */
1639
1640static tree
1641build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1642		       tree type, tree val)
1643{
1644  tree result = make_ssa_name (type);
1645  gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1646  gimple_set_location (stmt, loc);
1647  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1648  return result;
1649}
1650
1651struct pow_synth_sqrt_info
1652{
1653  bool *factors;
1654  unsigned int deepest;
1655  unsigned int num_mults;
1656};
1657
1658/* Return true iff the real value C can be represented as a
1659   sum of powers of 0.5 up to N.  That is:
1660   C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1661   Record in INFO the various parameters of the synthesis algorithm such
1662   as the factors a[i], the maximum 0.5 power and the number of
1663   multiplications that will be required.  */
1664
1665bool
1666representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1667				 struct pow_synth_sqrt_info *info)
1668{
1669  REAL_VALUE_TYPE factor = dconsthalf;
1670  REAL_VALUE_TYPE remainder = c;
1671
1672  info->deepest = 0;
1673  info->num_mults = 0;
1674  memset (info->factors, 0, n * sizeof (bool));
1675
1676  for (unsigned i = 0; i < n; i++)
1677    {
1678      REAL_VALUE_TYPE res;
1679
1680      /* If something inexact happened bail out now.  */
1681      if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1682	return false;
1683
1684      /* We have hit zero.  The number is representable as a sum
1685         of powers of 0.5.  */
1686      if (real_equal (&res, &dconst0))
1687	{
1688	  info->factors[i] = true;
1689	  info->deepest = i + 1;
1690	  return true;
1691	}
1692      else if (!REAL_VALUE_NEGATIVE (res))
1693	{
1694	  remainder = res;
1695	  info->factors[i] = true;
1696	  info->num_mults++;
1697	}
1698      else
1699	info->factors[i] = false;
1700
1701      real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1702    }
1703  return false;
1704}
1705
1706/* Return the tree corresponding to FN being applied
1707   to ARG N times at GSI and LOC.
1708   Look up previous results from CACHE if need be.
1709   cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1710
1711static tree
1712get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1713	      tree fn, location_t loc, tree *cache)
1714{
1715  tree res = cache[n];
1716  if (!res)
1717    {
1718      tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1719      res = build_and_insert_call (gsi, loc, fn, prev);
1720      cache[n] = res;
1721    }
1722
1723  return res;
1724}
1725
1726/* Print to STREAM the repeated application of function FNAME to ARG
1727   N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1728   "foo (foo (x))".  */
1729
1730static void
1731print_nested_fn (FILE* stream, const char *fname, const char* arg,
1732		 unsigned int n)
1733{
1734  if (n == 0)
1735    fprintf (stream, "%s", arg);
1736  else
1737    {
1738      fprintf (stream, "%s (", fname);
1739      print_nested_fn (stream, fname, arg, n - 1);
1740      fprintf (stream, ")");
1741    }
1742}
1743
1744/* Print to STREAM the fractional sequence of sqrt chains
1745   applied to ARG, described by INFO.  Used for the dump file.  */
1746
1747static void
1748dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1749			        struct pow_synth_sqrt_info *info)
1750{
1751  for (unsigned int i = 0; i < info->deepest; i++)
1752    {
1753      bool is_set = info->factors[i];
1754      if (is_set)
1755	{
1756	  print_nested_fn (stream, "sqrt", arg, i + 1);
1757	  if (i != info->deepest - 1)
1758	    fprintf (stream, " * ");
1759	}
1760    }
1761}
1762
1763/* Print to STREAM a representation of raising ARG to an integer
1764   power N.  Used for the dump file.  */
1765
1766static void
1767dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1768{
1769  if (n > 1)
1770    fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1771  else if (n == 1)
1772    fprintf (stream, "%s", arg);
1773}
1774
1775/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1776   square roots.  Place at GSI and LOC.  Limit the maximum depth
1777   of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1778   result of the expanded sequence or NULL_TREE if the expansion failed.
1779
1780   This routine assumes that ARG1 is a real number with a fractional part
1781   (the integer exponent case will have been handled earlier in
1782   gimple_expand_builtin_pow).
1783
1784   For ARG1 > 0.0:
1785   * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1786     FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1787                    FRAC_PART == ARG1 - WHOLE_PART:
1788     Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1789     POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1790     if it can be expressed as such, that is if FRAC_PART satisfies:
1791     FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1792     where integer a[i] is either 0 or 1.
1793
1794     Example:
1795     POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1796       --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1797
1798   For ARG1 < 0.0 there are two approaches:
1799   * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1800         is calculated as above.
1801
1802     Example:
1803     POW (x, -5.625) == 1.0 / POW (x, 5.625)
1804       -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1805
1806   * (B) : WHOLE_PART := - ceil (abs (ARG1))
1807           FRAC_PART  := ARG1 - WHOLE_PART
1808     and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1809     Example:
1810     POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1811       --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1812
1813   For ARG1 < 0.0 we choose between (A) and (B) depending on
1814   how many multiplications we'd have to do.
1815   So, for the example in (B): POW (x, -5.875), if we were to
1816   follow algorithm (A) we would produce:
1817   1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1818   which contains more multiplications than approach (B).
1819
1820   Hopefully, this approach will eliminate potentially expensive POW library
1821   calls when unsafe floating point math is enabled and allow the compiler to
1822   further optimise the multiplies, square roots and divides produced by this
1823   function.  */
1824
1825static tree
1826expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1827		     tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1828{
1829  tree type = TREE_TYPE (arg0);
1830  machine_mode mode = TYPE_MODE (type);
1831  tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1832  bool one_over = true;
1833
1834  if (!sqrtfn)
1835    return NULL_TREE;
1836
1837  if (TREE_CODE (arg1) != REAL_CST)
1838    return NULL_TREE;
1839
1840  REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1841
1842  gcc_assert (max_depth > 0);
1843  tree *cache = XALLOCAVEC (tree, max_depth + 1);
1844
1845  struct pow_synth_sqrt_info synth_info;
1846  synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1847  synth_info.deepest = 0;
1848  synth_info.num_mults = 0;
1849
1850  bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1851  REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1852
1853  /* The whole and fractional parts of exp.  */
1854  REAL_VALUE_TYPE whole_part;
1855  REAL_VALUE_TYPE frac_part;
1856
1857  real_floor (&whole_part, mode, &exp);
1858  real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1859
1860
1861  REAL_VALUE_TYPE ceil_whole = dconst0;
1862  REAL_VALUE_TYPE ceil_fract = dconst0;
1863
1864  if (neg_exp)
1865    {
1866      real_ceil (&ceil_whole, mode, &exp);
1867      real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1868    }
1869
1870  if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1871    return NULL_TREE;
1872
1873  /* Check whether it's more profitable to not use 1.0 / ...  */
1874  if (neg_exp)
1875    {
1876      struct pow_synth_sqrt_info alt_synth_info;
1877      alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1878      alt_synth_info.deepest = 0;
1879      alt_synth_info.num_mults = 0;
1880
1881      if (representable_as_half_series_p (ceil_fract, max_depth,
1882					   &alt_synth_info)
1883	  && alt_synth_info.deepest <= synth_info.deepest
1884	  && alt_synth_info.num_mults < synth_info.num_mults)
1885	{
1886	  whole_part = ceil_whole;
1887	  frac_part = ceil_fract;
1888	  synth_info.deepest = alt_synth_info.deepest;
1889	  synth_info.num_mults = alt_synth_info.num_mults;
1890	  memcpy (synth_info.factors, alt_synth_info.factors,
1891		  (max_depth + 1) * sizeof (bool));
1892	  one_over = false;
1893	}
1894    }
1895
1896  HOST_WIDE_INT n = real_to_integer (&whole_part);
1897  REAL_VALUE_TYPE cint;
1898  real_from_integer (&cint, VOIDmode, n, SIGNED);
1899
1900  if (!real_identical (&whole_part, &cint))
1901    return NULL_TREE;
1902
1903  if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1904    return NULL_TREE;
1905
1906  memset (cache, 0, (max_depth + 1) * sizeof (tree));
1907
1908  tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1909
1910  /* Calculate the integer part of the exponent.  */
1911  if (n > 1)
1912    {
1913      integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1914      if (!integer_res)
1915	return NULL_TREE;
1916    }
1917
1918  if (dump_file)
1919    {
1920      char string[64];
1921
1922      real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1923      fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1924
1925      if (neg_exp)
1926	{
1927	  if (one_over)
1928	    {
1929	      fprintf (dump_file, "1.0 / (");
1930	      dump_integer_part (dump_file, "x", n);
1931	      if (n > 0)
1932	        fprintf (dump_file, " * ");
1933	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934	      fprintf (dump_file, ")");
1935	    }
1936	  else
1937	    {
1938	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1939	      fprintf (dump_file, " / (");
1940	      dump_integer_part (dump_file, "x", n);
1941	      fprintf (dump_file, ")");
1942	    }
1943	}
1944      else
1945	{
1946	  dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1947	  if (n > 0)
1948	    fprintf (dump_file, " * ");
1949	  dump_integer_part (dump_file, "x", n);
1950	}
1951
1952      fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1953    }
1954
1955
1956  tree fract_res = NULL_TREE;
1957  cache[0] = arg0;
1958
1959  /* Calculate the fractional part of the exponent.  */
1960  for (unsigned i = 0; i < synth_info.deepest; i++)
1961    {
1962      if (synth_info.factors[i])
1963	{
1964	  tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1965
1966	  if (!fract_res)
1967	      fract_res = sqrt_chain;
1968
1969	  else
1970	    fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1971					   fract_res, sqrt_chain);
1972	}
1973    }
1974
1975  tree res = NULL_TREE;
1976
1977  if (neg_exp)
1978    {
1979      if (one_over)
1980	{
1981	  if (n > 0)
1982	    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1983					   fract_res, integer_res);
1984	  else
1985	    res = fract_res;
1986
1987	  res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1988					  build_real (type, dconst1), res);
1989	}
1990      else
1991	{
1992	  res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1993					 fract_res, integer_res);
1994	}
1995    }
1996  else
1997    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1998				   fract_res, integer_res);
1999  return res;
2000}
2001
2002/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2003   with location info LOC.  If possible, create an equivalent and
2004   less expensive sequence of statements prior to GSI, and return an
2005   expession holding the result.  */
2006
2007static tree
2008gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2009			   tree arg0, tree arg1)
2010{
2011  REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2012  REAL_VALUE_TYPE c2, dconst3;
2013  HOST_WIDE_INT n;
2014  tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2015  machine_mode mode;
2016  bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2017  bool hw_sqrt_exists, c_is_int, c2_is_int;
2018
2019  dconst1_4 = dconst1;
2020  SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2021
2022  /* If the exponent isn't a constant, there's nothing of interest
2023     to be done.  */
2024  if (TREE_CODE (arg1) != REAL_CST)
2025    return NULL_TREE;
2026
2027  /* Don't perform the operation if flag_signaling_nans is on
2028     and the operand is a signaling NaN.  */
2029  if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2030      && ((TREE_CODE (arg0) == REAL_CST
2031	   && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2032	  || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2033    return NULL_TREE;
2034
2035  /* If the exponent is equivalent to an integer, expand to an optimal
2036     multiplication sequence when profitable.  */
2037  c = TREE_REAL_CST (arg1);
2038  n = real_to_integer (&c);
2039  real_from_integer (&cint, VOIDmode, n, SIGNED);
2040  c_is_int = real_identical (&c, &cint);
2041
2042  if (c_is_int
2043      && ((n >= -1 && n <= 2)
2044	  || (flag_unsafe_math_optimizations
2045	      && speed_p
2046	      && powi_cost (n) <= POWI_MAX_MULTS)))
2047    return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048
2049  /* Attempt various optimizations using sqrt and cbrt.  */
2050  type = TREE_TYPE (arg0);
2051  mode = TYPE_MODE (type);
2052  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053
2054  /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2055     unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2056     sqrt(-0) = -0.  */
2057  if (sqrtfn
2058      && real_equal (&c, &dconsthalf)
2059      && !HONOR_SIGNED_ZEROS (mode))
2060    return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061
2062  hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063
2064  /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2065     optimizations since 1./3. is not exactly representable.  If x
2066     is negative and finite, the correct value of pow(x,1./3.) is
2067     a NaN with the "invalid" exception raised, because the value
2068     of 1./3. actually has an even denominator.  The correct value
2069     of cbrt(x) is a negative real value.  */
2070  cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071  dconst1_3 = real_value_truncate (mode, dconst_third ());
2072
2073  if (flag_unsafe_math_optimizations
2074      && cbrtfn
2075      && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076      && real_equal (&c, &dconst1_3))
2077    return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078
2079  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2080     if we don't have a hardware sqrt insn.  */
2081  dconst1_6 = dconst1_3;
2082  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083
2084  if (flag_unsafe_math_optimizations
2085      && sqrtfn
2086      && cbrtfn
2087      && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088      && speed_p
2089      && hw_sqrt_exists
2090      && real_equal (&c, &dconst1_6))
2091    {
2092      /* sqrt(x)  */
2093      sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094
2095      /* cbrt(sqrt(x))  */
2096      return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2097    }
2098
2099
2100  /* Attempt to expand the POW as a product of square root chains.
2101     Expand the 0.25 case even when otpimising for size.  */
2102  if (flag_unsafe_math_optimizations
2103      && sqrtfn
2104      && hw_sqrt_exists
2105      && (speed_p || real_equal (&c, &dconst1_4))
2106      && !HONOR_SIGNED_ZEROS (mode))
2107    {
2108      unsigned int max_depth = speed_p
2109				? param_max_pow_sqrt_depth
2110				: 2;
2111
2112      tree expand_with_sqrts
2113	= expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114
2115      if (expand_with_sqrts)
2116	return expand_with_sqrts;
2117    }
2118
2119  real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120  n = real_to_integer (&c2);
2121  real_from_integer (&cint, VOIDmode, n, SIGNED);
2122  c2_is_int = real_identical (&c2, &cint);
2123
2124  /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125
2126     powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2127     1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2128
2129     Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2130     different from pow(x, 1./3.) due to rounding and behavior with
2131     negative x, we need to constrain this transformation to unsafe
2132     math and positive x or finite math.  */
2133  real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134  real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135  real_round (&c2, mode, &c2);
2136  n = real_to_integer (&c2);
2137  real_from_integer (&cint, VOIDmode, n, SIGNED);
2138  real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139  real_convert (&c2, mode, &c2);
2140
2141  if (flag_unsafe_math_optimizations
2142      && cbrtfn
2143      && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144      && real_identical (&c2, &c)
2145      && !c2_is_int
2146      && optimize_function_for_speed_p (cfun)
2147      && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148    {
2149      tree powi_x_ndiv3 = NULL_TREE;
2150
2151      /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2152         possible or profitable, give up.  Skip the degenerate case when
2153         abs(n) < 3, where the result is always 1.  */
2154      if (absu_hwi (n) >= 3)
2155	{
2156	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157						     abs_hwi (n / 3));
2158	  if (!powi_x_ndiv3)
2159	    return NULL_TREE;
2160	}
2161
2162      /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2163         as that creates an unnecessary variable.  Instead, just produce
2164         either cbrt(x) or cbrt(x) * cbrt(x).  */
2165      cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166
2167      if (absu_hwi (n) % 3 == 1)
2168	powi_cbrt_x = cbrt_x;
2169      else
2170	powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171					      cbrt_x, cbrt_x);
2172
2173      /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2174      if (absu_hwi (n) < 3)
2175	result = powi_cbrt_x;
2176      else
2177	result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178					 powi_x_ndiv3, powi_cbrt_x);
2179
2180      /* If n is negative, reciprocate the result.  */
2181      if (n < 0)
2182	result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183					 build_real (type, dconst1), result);
2184
2185      return result;
2186    }
2187
2188  /* No optimizations succeeded.  */
2189  return NULL_TREE;
2190}
2191
2192/* ARG is the argument to a cabs builtin call in GSI with location info
2193   LOC.  Create a sequence of statements prior to GSI that calculates
2194   sqrt(R*R + I*I), where R and I are the real and imaginary components
2195   of ARG, respectively.  Return an expression holding the result.  */
2196
2197static tree
2198gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2199{
2200  tree real_part, imag_part, addend1, addend2, sum, result;
2201  tree type = TREE_TYPE (TREE_TYPE (arg));
2202  tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2203  machine_mode mode = TYPE_MODE (type);
2204
2205  if (!flag_unsafe_math_optimizations
2206      || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2207      || !sqrtfn
2208      || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2209    return NULL_TREE;
2210
2211  real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212				    REALPART_EXPR, arg);
2213  addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214				    real_part, real_part);
2215  imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2216				    IMAGPART_EXPR, arg);
2217  addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2218				    imag_part, imag_part);
2219  sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2220  result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2221
2222  return result;
2223}
2224
2225/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2226   on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2227   an optimal number of multiplies, when n is a constant.  */
2228
2229namespace {
2230
2231const pass_data pass_data_cse_sincos =
2232{
2233  GIMPLE_PASS, /* type */
2234  "sincos", /* name */
2235  OPTGROUP_NONE, /* optinfo_flags */
2236  TV_TREE_SINCOS, /* tv_id */
2237  PROP_ssa, /* properties_required */
2238  PROP_gimple_opt_math, /* properties_provided */
2239  0, /* properties_destroyed */
2240  0, /* todo_flags_start */
2241  TODO_update_ssa, /* todo_flags_finish */
2242};
2243
2244class pass_cse_sincos : public gimple_opt_pass
2245{
2246public:
2247  pass_cse_sincos (gcc::context *ctxt)
2248    : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2249  {}
2250
2251  /* opt_pass methods: */
2252  virtual bool gate (function *)
2253    {
2254      /* We no longer require either sincos or cexp, since powi expansion
2255	 piggybacks on this pass.  */
2256      return optimize;
2257    }
2258
2259  virtual unsigned int execute (function *);
2260
2261}; // class pass_cse_sincos
2262
2263unsigned int
2264pass_cse_sincos::execute (function *fun)
2265{
2266  basic_block bb;
2267  bool cfg_changed = false;
2268
2269  calculate_dominance_info (CDI_DOMINATORS);
2270  memset (&sincos_stats, 0, sizeof (sincos_stats));
2271
2272  FOR_EACH_BB_FN (bb, fun)
2273    {
2274      gimple_stmt_iterator gsi;
2275      bool cleanup_eh = false;
2276
2277      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278        {
2279	  gimple *stmt = gsi_stmt (gsi);
2280
2281	  /* Only the last stmt in a bb could throw, no need to call
2282	     gimple_purge_dead_eh_edges if we change something in the middle
2283	     of a basic block.  */
2284	  cleanup_eh = false;
2285
2286	  if (is_gimple_call (stmt)
2287	      && gimple_call_lhs (stmt))
2288	    {
2289	      tree arg, arg0, arg1, result;
2290	      HOST_WIDE_INT n;
2291	      location_t loc;
2292
2293	      switch (gimple_call_combined_fn (stmt))
2294		{
2295		CASE_CFN_COS:
2296		CASE_CFN_SIN:
2297		CASE_CFN_CEXPI:
2298		  arg = gimple_call_arg (stmt, 0);
2299		  /* Make sure we have either sincos or cexp.  */
2300		  if (!targetm.libc_has_function (function_c99_math_complex,
2301						  TREE_TYPE (arg))
2302		      && !targetm.libc_has_function (function_sincos,
2303						     TREE_TYPE (arg)))
2304		    break;
2305
2306		  if (TREE_CODE (arg) == SSA_NAME)
2307		    cfg_changed |= execute_cse_sincos_1 (arg);
2308		  break;
2309
2310		CASE_CFN_POW:
2311		  arg0 = gimple_call_arg (stmt, 0);
2312		  arg1 = gimple_call_arg (stmt, 1);
2313
2314		  loc = gimple_location (stmt);
2315		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2316
2317		  if (result)
2318		    {
2319		      tree lhs = gimple_get_lhs (stmt);
2320		      gassign *new_stmt = gimple_build_assign (lhs, result);
2321		      gimple_set_location (new_stmt, loc);
2322		      unlink_stmt_vdef (stmt);
2323		      gsi_replace (&gsi, new_stmt, true);
2324		      cleanup_eh = true;
2325		      if (gimple_vdef (stmt))
2326			release_ssa_name (gimple_vdef (stmt));
2327		    }
2328		  break;
2329
2330		CASE_CFN_POWI:
2331		  arg0 = gimple_call_arg (stmt, 0);
2332		  arg1 = gimple_call_arg (stmt, 1);
2333		  loc = gimple_location (stmt);
2334
2335		  if (real_minus_onep (arg0))
2336		    {
2337                      tree t0, t1, cond, one, minus_one;
2338		      gassign *stmt;
2339
2340		      t0 = TREE_TYPE (arg0);
2341		      t1 = TREE_TYPE (arg1);
2342		      one = build_real (t0, dconst1);
2343		      minus_one = build_real (t0, dconstm1);
2344
2345		      cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2346		      stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2347						  arg1, build_int_cst (t1, 1));
2348		      gimple_set_location (stmt, loc);
2349		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2350
2351		      result = make_temp_ssa_name (t0, NULL, "powi");
2352		      stmt = gimple_build_assign (result, COND_EXPR, cond,
2353						  minus_one, one);
2354		      gimple_set_location (stmt, loc);
2355		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2356		    }
2357		  else
2358		    {
2359		      if (!tree_fits_shwi_p (arg1))
2360			break;
2361
2362		      n = tree_to_shwi (arg1);
2363		      result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2364		    }
2365
2366		  if (result)
2367		    {
2368		      tree lhs = gimple_get_lhs (stmt);
2369		      gassign *new_stmt = gimple_build_assign (lhs, result);
2370		      gimple_set_location (new_stmt, loc);
2371		      unlink_stmt_vdef (stmt);
2372		      gsi_replace (&gsi, new_stmt, true);
2373		      cleanup_eh = true;
2374		      if (gimple_vdef (stmt))
2375			release_ssa_name (gimple_vdef (stmt));
2376		    }
2377		  break;
2378
2379		CASE_CFN_CABS:
2380		  arg0 = gimple_call_arg (stmt, 0);
2381		  loc = gimple_location (stmt);
2382		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2383
2384		  if (result)
2385		    {
2386		      tree lhs = gimple_get_lhs (stmt);
2387		      gassign *new_stmt = gimple_build_assign (lhs, result);
2388		      gimple_set_location (new_stmt, loc);
2389		      unlink_stmt_vdef (stmt);
2390		      gsi_replace (&gsi, new_stmt, true);
2391		      cleanup_eh = true;
2392		      if (gimple_vdef (stmt))
2393			release_ssa_name (gimple_vdef (stmt));
2394		    }
2395		  break;
2396
2397		default:;
2398		}
2399	    }
2400	}
2401      if (cleanup_eh)
2402	cfg_changed |= gimple_purge_dead_eh_edges (bb);
2403    }
2404
2405  statistics_counter_event (fun, "sincos statements inserted",
2406			    sincos_stats.inserted);
2407  statistics_counter_event (fun, "conv statements removed",
2408			    sincos_stats.conv_removed);
2409
2410  return cfg_changed ? TODO_cleanup_cfg : 0;
2411}
2412
2413} // anon namespace
2414
2415gimple_opt_pass *
2416make_pass_cse_sincos (gcc::context *ctxt)
2417{
2418  return new pass_cse_sincos (ctxt);
2419}
2420
2421/* Return true if stmt is a type conversion operation that can be stripped
2422   when used in a widening multiply operation.  */
2423static bool
2424widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2425{
2426  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2427
2428  if (TREE_CODE (result_type) == INTEGER_TYPE)
2429    {
2430      tree op_type;
2431      tree inner_op_type;
2432
2433      if (!CONVERT_EXPR_CODE_P (rhs_code))
2434	return false;
2435
2436      op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2437
2438      /* If the type of OP has the same precision as the result, then
2439	 we can strip this conversion.  The multiply operation will be
2440	 selected to create the correct extension as a by-product.  */
2441      if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2442	return true;
2443
2444      /* We can also strip a conversion if it preserves the signed-ness of
2445	 the operation and doesn't narrow the range.  */
2446      inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2447
2448      /* If the inner-most type is unsigned, then we can strip any
2449	 intermediate widening operation.  If it's signed, then the
2450	 intermediate widening operation must also be signed.  */
2451      if ((TYPE_UNSIGNED (inner_op_type)
2452	   || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2453	  && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2454	return true;
2455
2456      return false;
2457    }
2458
2459  return rhs_code == FIXED_CONVERT_EXPR;
2460}
2461
2462/* Return true if RHS is a suitable operand for a widening multiplication,
2463   assuming a target type of TYPE.
2464   There are two cases:
2465
2466     - RHS makes some value at least twice as wide.  Store that value
2467       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2468
2469     - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2470       but leave *TYPE_OUT untouched.  */
2471
2472static bool
2473is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2474			tree *new_rhs_out)
2475{
2476  gimple *stmt;
2477  tree type1, rhs1;
2478
2479  if (TREE_CODE (rhs) == SSA_NAME)
2480    {
2481      stmt = SSA_NAME_DEF_STMT (rhs);
2482      if (is_gimple_assign (stmt))
2483	{
2484	  if (! widening_mult_conversion_strippable_p (type, stmt))
2485	    rhs1 = rhs;
2486	  else
2487	    {
2488	      rhs1 = gimple_assign_rhs1 (stmt);
2489
2490	      if (TREE_CODE (rhs1) == INTEGER_CST)
2491		{
2492		  *new_rhs_out = rhs1;
2493		  *type_out = NULL;
2494		  return true;
2495		}
2496	    }
2497	}
2498      else
2499	rhs1 = rhs;
2500
2501      type1 = TREE_TYPE (rhs1);
2502
2503      if (TREE_CODE (type1) != TREE_CODE (type)
2504	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2505	return false;
2506
2507      *new_rhs_out = rhs1;
2508      *type_out = type1;
2509      return true;
2510    }
2511
2512  if (TREE_CODE (rhs) == INTEGER_CST)
2513    {
2514      *new_rhs_out = rhs;
2515      *type_out = NULL;
2516      return true;
2517    }
2518
2519  return false;
2520}
2521
2522/* Return true if STMT performs a widening multiplication, assuming the
2523   output type is TYPE.  If so, store the unwidened types of the operands
2524   in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2525   *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2526   and *TYPE2_OUT would give the operands of the multiplication.  */
2527
2528static bool
2529is_widening_mult_p (gimple *stmt,
2530		    tree *type1_out, tree *rhs1_out,
2531		    tree *type2_out, tree *rhs2_out)
2532{
2533  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2534
2535  if (TREE_CODE (type) == INTEGER_TYPE)
2536    {
2537      if (TYPE_OVERFLOW_TRAPS (type))
2538	return false;
2539    }
2540  else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2541    return false;
2542
2543  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2544			       rhs1_out))
2545    return false;
2546
2547  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2548			       rhs2_out))
2549    return false;
2550
2551  if (*type1_out == NULL)
2552    {
2553      if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2554	return false;
2555      *type1_out = *type2_out;
2556    }
2557
2558  if (*type2_out == NULL)
2559    {
2560      if (!int_fits_type_p (*rhs2_out, *type1_out))
2561	return false;
2562      *type2_out = *type1_out;
2563    }
2564
2565  /* Ensure that the larger of the two operands comes first. */
2566  if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2567    {
2568      std::swap (*type1_out, *type2_out);
2569      std::swap (*rhs1_out, *rhs2_out);
2570    }
2571
2572  return true;
2573}
2574
2575/* Check to see if the CALL statement is an invocation of copysign
2576   with 1. being the first argument.  */
2577static bool
2578is_copysign_call_with_1 (gimple *call)
2579{
2580  gcall *c = dyn_cast <gcall *> (call);
2581  if (! c)
2582    return false;
2583
2584  enum combined_fn code = gimple_call_combined_fn (c);
2585
2586  if (code == CFN_LAST)
2587    return false;
2588
2589  if (builtin_fn_p (code))
2590    {
2591      switch (as_builtin_fn (code))
2592	{
2593	CASE_FLT_FN (BUILT_IN_COPYSIGN):
2594	CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2595	  return real_onep (gimple_call_arg (c, 0));
2596	default:
2597	  return false;
2598	}
2599    }
2600
2601  if (internal_fn_p (code))
2602    {
2603      switch (as_internal_fn (code))
2604	{
2605	case IFN_COPYSIGN:
2606	  return real_onep (gimple_call_arg (c, 0));
2607	default:
2608	  return false;
2609	}
2610    }
2611
2612   return false;
2613}
2614
2615/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2616   This only happens when the xorsign optab is defined, if the
2617   pattern is not a xorsign pattern or if expansion fails FALSE is
2618   returned, otherwise TRUE is returned.  */
2619static bool
2620convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2621{
2622  tree treeop0, treeop1, lhs, type;
2623  location_t loc = gimple_location (stmt);
2624  lhs = gimple_assign_lhs (stmt);
2625  treeop0 = gimple_assign_rhs1 (stmt);
2626  treeop1 = gimple_assign_rhs2 (stmt);
2627  type = TREE_TYPE (lhs);
2628  machine_mode mode = TYPE_MODE (type);
2629
2630  if (HONOR_SNANS (type))
2631    return false;
2632
2633  if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2634    {
2635      gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2636      if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2637	{
2638	  call0 = SSA_NAME_DEF_STMT (treeop1);
2639	  if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2640	    return false;
2641
2642	  treeop1 = treeop0;
2643	}
2644	if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2645	  return false;
2646
2647	gcall *c = as_a<gcall*> (call0);
2648	treeop0 = gimple_call_arg (c, 1);
2649
2650	gcall *call_stmt
2651	  = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2652	gimple_set_lhs (call_stmt, lhs);
2653	gimple_set_location (call_stmt, loc);
2654	gsi_replace (gsi, call_stmt, true);
2655	return true;
2656    }
2657
2658  return false;
2659}
2660
2661/* Process a single gimple statement STMT, which has a MULT_EXPR as
2662   its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2663   value is true iff we converted the statement.  */
2664
2665static bool
2666convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2667{
2668  tree lhs, rhs1, rhs2, type, type1, type2;
2669  enum insn_code handler;
2670  scalar_int_mode to_mode, from_mode, actual_mode;
2671  optab op;
2672  int actual_precision;
2673  location_t loc = gimple_location (stmt);
2674  bool from_unsigned1, from_unsigned2;
2675
2676  lhs = gimple_assign_lhs (stmt);
2677  type = TREE_TYPE (lhs);
2678  if (TREE_CODE (type) != INTEGER_TYPE)
2679    return false;
2680
2681  if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2682    return false;
2683
2684  to_mode = SCALAR_INT_TYPE_MODE (type);
2685  from_mode = SCALAR_INT_TYPE_MODE (type1);
2686  if (to_mode == from_mode)
2687    return false;
2688
2689  from_unsigned1 = TYPE_UNSIGNED (type1);
2690  from_unsigned2 = TYPE_UNSIGNED (type2);
2691
2692  if (from_unsigned1 && from_unsigned2)
2693    op = umul_widen_optab;
2694  else if (!from_unsigned1 && !from_unsigned2)
2695    op = smul_widen_optab;
2696  else
2697    op = usmul_widen_optab;
2698
2699  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2700						  &actual_mode);
2701
2702  if (handler == CODE_FOR_nothing)
2703    {
2704      if (op != smul_widen_optab)
2705	{
2706	  /* We can use a signed multiply with unsigned types as long as
2707	     there is a wider mode to use, or it is the smaller of the two
2708	     types that is unsigned.  Note that type1 >= type2, always.  */
2709	  if ((TYPE_UNSIGNED (type1)
2710	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2711	      || (TYPE_UNSIGNED (type2)
2712		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2713	    {
2714	      if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2715		  || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2716		return false;
2717	    }
2718
2719	  op = smul_widen_optab;
2720	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2721							  from_mode,
2722							  &actual_mode);
2723
2724	  if (handler == CODE_FOR_nothing)
2725	    return false;
2726
2727	  from_unsigned1 = from_unsigned2 = false;
2728	}
2729      else
2730	{
2731	  /* Expand can synthesize smul_widen_optab if the target
2732	     supports umul_widen_optab.  */
2733	  op = umul_widen_optab;
2734	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2735							  from_mode,
2736							  &actual_mode);
2737	  if (handler == CODE_FOR_nothing)
2738	    return false;
2739	}
2740    }
2741
2742  /* Ensure that the inputs to the handler are in the correct precison
2743     for the opcode.  This will be the full mode size.  */
2744  actual_precision = GET_MODE_PRECISION (actual_mode);
2745  if (2 * actual_precision > TYPE_PRECISION (type))
2746    return false;
2747  if (actual_precision != TYPE_PRECISION (type1)
2748      || from_unsigned1 != TYPE_UNSIGNED (type1))
2749    rhs1 = build_and_insert_cast (gsi, loc,
2750				  build_nonstandard_integer_type
2751				    (actual_precision, from_unsigned1), rhs1);
2752  if (actual_precision != TYPE_PRECISION (type2)
2753      || from_unsigned2 != TYPE_UNSIGNED (type2))
2754    rhs2 = build_and_insert_cast (gsi, loc,
2755				  build_nonstandard_integer_type
2756				    (actual_precision, from_unsigned2), rhs2);
2757
2758  /* Handle constants.  */
2759  if (TREE_CODE (rhs1) == INTEGER_CST)
2760    rhs1 = fold_convert (type1, rhs1);
2761  if (TREE_CODE (rhs2) == INTEGER_CST)
2762    rhs2 = fold_convert (type2, rhs2);
2763
2764  gimple_assign_set_rhs1 (stmt, rhs1);
2765  gimple_assign_set_rhs2 (stmt, rhs2);
2766  gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2767  update_stmt (stmt);
2768  widen_mul_stats.widen_mults_inserted++;
2769  return true;
2770}
2771
2772/* Process a single gimple statement STMT, which is found at the
2773   iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2774   rhs (given by CODE), and try to convert it into a
2775   WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2776   is true iff we converted the statement.  */
2777
2778static bool
2779convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2780			    enum tree_code code)
2781{
2782  gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2783  gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2784  tree type, type1, type2, optype;
2785  tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2786  enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2787  optab this_optab;
2788  enum tree_code wmult_code;
2789  enum insn_code handler;
2790  scalar_mode to_mode, from_mode, actual_mode;
2791  location_t loc = gimple_location (stmt);
2792  int actual_precision;
2793  bool from_unsigned1, from_unsigned2;
2794
2795  lhs = gimple_assign_lhs (stmt);
2796  type = TREE_TYPE (lhs);
2797  if (TREE_CODE (type) != INTEGER_TYPE
2798      && TREE_CODE (type) != FIXED_POINT_TYPE)
2799    return false;
2800
2801  if (code == MINUS_EXPR)
2802    wmult_code = WIDEN_MULT_MINUS_EXPR;
2803  else
2804    wmult_code = WIDEN_MULT_PLUS_EXPR;
2805
2806  rhs1 = gimple_assign_rhs1 (stmt);
2807  rhs2 = gimple_assign_rhs2 (stmt);
2808
2809  if (TREE_CODE (rhs1) == SSA_NAME)
2810    {
2811      rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2812      if (is_gimple_assign (rhs1_stmt))
2813	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2814    }
2815
2816  if (TREE_CODE (rhs2) == SSA_NAME)
2817    {
2818      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2819      if (is_gimple_assign (rhs2_stmt))
2820	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2821    }
2822
2823  /* Allow for one conversion statement between the multiply
2824     and addition/subtraction statement.  If there are more than
2825     one conversions then we assume they would invalidate this
2826     transformation.  If that's not the case then they should have
2827     been folded before now.  */
2828  if (CONVERT_EXPR_CODE_P (rhs1_code))
2829    {
2830      conv1_stmt = rhs1_stmt;
2831      rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2832      if (TREE_CODE (rhs1) == SSA_NAME)
2833	{
2834	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2835	  if (is_gimple_assign (rhs1_stmt))
2836	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2837	}
2838      else
2839	return false;
2840    }
2841  if (CONVERT_EXPR_CODE_P (rhs2_code))
2842    {
2843      conv2_stmt = rhs2_stmt;
2844      rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2845      if (TREE_CODE (rhs2) == SSA_NAME)
2846	{
2847	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2848	  if (is_gimple_assign (rhs2_stmt))
2849	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2850	}
2851      else
2852	return false;
2853    }
2854
2855  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2856     is_widening_mult_p, but we still need the rhs returns.
2857
2858     It might also appear that it would be sufficient to use the existing
2859     operands of the widening multiply, but that would limit the choice of
2860     multiply-and-accumulate instructions.
2861
2862     If the widened-multiplication result has more than one uses, it is
2863     probably wiser not to do the conversion.  Also restrict this operation
2864     to single basic block to avoid moving the multiply to a different block
2865     with a higher execution frequency.  */
2866  if (code == PLUS_EXPR
2867      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2868    {
2869      if (!has_single_use (rhs1)
2870	  || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2871	  || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2872				  &type2, &mult_rhs2))
2873	return false;
2874      add_rhs = rhs2;
2875      conv_stmt = conv1_stmt;
2876    }
2877  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2878    {
2879      if (!has_single_use (rhs2)
2880	  || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2881	  || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2882				  &type2, &mult_rhs2))
2883	return false;
2884      add_rhs = rhs1;
2885      conv_stmt = conv2_stmt;
2886    }
2887  else
2888    return false;
2889
2890  to_mode = SCALAR_TYPE_MODE (type);
2891  from_mode = SCALAR_TYPE_MODE (type1);
2892  if (to_mode == from_mode)
2893    return false;
2894
2895  from_unsigned1 = TYPE_UNSIGNED (type1);
2896  from_unsigned2 = TYPE_UNSIGNED (type2);
2897  optype = type1;
2898
2899  /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2900  if (from_unsigned1 != from_unsigned2)
2901    {
2902      if (!INTEGRAL_TYPE_P (type))
2903	return false;
2904      /* We can use a signed multiply with unsigned types as long as
2905	 there is a wider mode to use, or it is the smaller of the two
2906	 types that is unsigned.  Note that type1 >= type2, always.  */
2907      if ((from_unsigned1
2908	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2909	  || (from_unsigned2
2910	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2911	{
2912	  if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2913	      || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2914	    return false;
2915	}
2916
2917      from_unsigned1 = from_unsigned2 = false;
2918      optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2919					       false);
2920    }
2921
2922  /* If there was a conversion between the multiply and addition
2923     then we need to make sure it fits a multiply-and-accumulate.
2924     The should be a single mode change which does not change the
2925     value.  */
2926  if (conv_stmt)
2927    {
2928      /* We use the original, unmodified data types for this.  */
2929      tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2930      tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2931      int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2932      bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2933
2934      if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2935	{
2936	  /* Conversion is a truncate.  */
2937	  if (TYPE_PRECISION (to_type) < data_size)
2938	    return false;
2939	}
2940      else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2941	{
2942	  /* Conversion is an extend.  Check it's the right sort.  */
2943	  if (TYPE_UNSIGNED (from_type) != is_unsigned
2944	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2945	    return false;
2946	}
2947      /* else convert is a no-op for our purposes.  */
2948    }
2949
2950  /* Verify that the machine can perform a widening multiply
2951     accumulate in this mode/signedness combination, otherwise
2952     this transformation is likely to pessimize code.  */
2953  this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2954  handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2955						  from_mode, &actual_mode);
2956
2957  if (handler == CODE_FOR_nothing)
2958    return false;
2959
2960  /* Ensure that the inputs to the handler are in the correct precison
2961     for the opcode.  This will be the full mode size.  */
2962  actual_precision = GET_MODE_PRECISION (actual_mode);
2963  if (actual_precision != TYPE_PRECISION (type1)
2964      || from_unsigned1 != TYPE_UNSIGNED (type1))
2965    mult_rhs1 = build_and_insert_cast (gsi, loc,
2966				       build_nonstandard_integer_type
2967				         (actual_precision, from_unsigned1),
2968				       mult_rhs1);
2969  if (actual_precision != TYPE_PRECISION (type2)
2970      || from_unsigned2 != TYPE_UNSIGNED (type2))
2971    mult_rhs2 = build_and_insert_cast (gsi, loc,
2972				       build_nonstandard_integer_type
2973					 (actual_precision, from_unsigned2),
2974				       mult_rhs2);
2975
2976  if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2977    add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2978
2979  /* Handle constants.  */
2980  if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2981    mult_rhs1 = fold_convert (type1, mult_rhs1);
2982  if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2983    mult_rhs2 = fold_convert (type2, mult_rhs2);
2984
2985  gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2986				  add_rhs);
2987  update_stmt (gsi_stmt (*gsi));
2988  widen_mul_stats.maccs_inserted++;
2989  return true;
2990}
2991
2992/* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2993   OP2 and which we know is used in statements that can be, together with the
2994   multiplication, converted to FMAs, perform the transformation.  */
2995
2996static void
2997convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2998{
2999  tree type = TREE_TYPE (mul_result);
3000  gimple *use_stmt;
3001  imm_use_iterator imm_iter;
3002  gcall *fma_stmt;
3003
3004  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3005    {
3006      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3007      tree addop, mulop1 = op1, result = mul_result;
3008      bool negate_p = false;
3009      gimple_seq seq = NULL;
3010
3011      if (is_gimple_debug (use_stmt))
3012	continue;
3013
3014      if (is_gimple_assign (use_stmt)
3015	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3016	{
3017	  result = gimple_assign_lhs (use_stmt);
3018	  use_operand_p use_p;
3019	  gimple *neguse_stmt;
3020	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3021	  gsi_remove (&gsi, true);
3022	  release_defs (use_stmt);
3023
3024	  use_stmt = neguse_stmt;
3025	  gsi = gsi_for_stmt (use_stmt);
3026	  negate_p = true;
3027	}
3028
3029      tree cond, else_value, ops[3];
3030      tree_code code;
3031      if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3032					      ops, &else_value))
3033	gcc_unreachable ();
3034      addop = ops[0] == result ? ops[1] : ops[0];
3035
3036      if (code == MINUS_EXPR)
3037	{
3038	  if (ops[0] == result)
3039	    /* a * b - c -> a * b + (-c)  */
3040	    addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3041	  else
3042	    /* a - b * c -> (-b) * c + a */
3043	    negate_p = !negate_p;
3044	}
3045
3046      if (negate_p)
3047	mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3048
3049      if (seq)
3050	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3051
3052      if (cond)
3053	fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3054					       op2, addop, else_value);
3055      else
3056	fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3057      gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3058      gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3059								   use_stmt));
3060      gsi_replace (&gsi, fma_stmt, true);
3061      /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3062	 regardless of where the negation occurs.  */
3063      gimple *orig_stmt = gsi_stmt (gsi);
3064      if (fold_stmt (&gsi, follow_all_ssa_edges))
3065	{
3066	  if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3067	    gcc_unreachable ();
3068	  update_stmt (gsi_stmt (gsi));
3069	}
3070
3071      if (dump_file && (dump_flags & TDF_DETAILS))
3072	{
3073	  fprintf (dump_file, "Generated FMA ");
3074	  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3075	  fprintf (dump_file, "\n");
3076	}
3077
3078      /* If the FMA result is negated in a single use, fold the negation
3079	 too.  */
3080      orig_stmt = gsi_stmt (gsi);
3081      use_operand_p use_p;
3082      gimple *neg_stmt;
3083      if (is_gimple_call (orig_stmt)
3084	  && gimple_call_internal_p (orig_stmt)
3085	  && gimple_call_lhs (orig_stmt)
3086	  && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3087	  && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3088	  && is_gimple_assign (neg_stmt)
3089	  && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3090	  && !stmt_could_throw_p (cfun, neg_stmt))
3091	{
3092	  gsi = gsi_for_stmt (neg_stmt);
3093	  if (fold_stmt (&gsi, follow_all_ssa_edges))
3094	    {
3095	      if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3096		gcc_unreachable ();
3097	      update_stmt (gsi_stmt (gsi));
3098	      if (dump_file && (dump_flags & TDF_DETAILS))
3099		{
3100		  fprintf (dump_file, "Folded FMA negation ");
3101		  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3102		  fprintf (dump_file, "\n");
3103		}
3104	    }
3105	}
3106
3107      widen_mul_stats.fmas_inserted++;
3108    }
3109}
3110
3111/* Data necessary to perform the actual transformation from a multiplication
3112   and an addition to an FMA after decision is taken it should be done and to
3113   then delete the multiplication statement from the function IL.  */
3114
3115struct fma_transformation_info
3116{
3117  gimple *mul_stmt;
3118  tree mul_result;
3119  tree op1;
3120  tree op2;
3121};
3122
3123/* Structure containing the current state of FMA deferring, i.e. whether we are
3124   deferring, whether to continue deferring, and all data necessary to come
3125   back and perform all deferred transformations.  */
3126
3127class fma_deferring_state
3128{
3129public:
3130  /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3131     do any deferring.  */
3132
3133  fma_deferring_state (bool perform_deferring)
3134    : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3135      m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3136
3137  /* List of FMA candidates for which we the transformation has been determined
3138     possible but we at this point in BB analysis we do not consider them
3139     beneficial.  */
3140  auto_vec<fma_transformation_info, 8> m_candidates;
3141
3142  /* Set of results of multiplication that are part of an already deferred FMA
3143     candidates.  */
3144  hash_set<tree> m_mul_result_set;
3145
3146  /* The PHI that supposedly feeds back result of a FMA to another over loop
3147     boundary.  */
3148  gphi *m_initial_phi;
3149
3150  /* Result of the last produced FMA candidate or NULL if there has not been
3151     one.  */
3152  tree m_last_result;
3153
3154  /* If true, deferring might still be profitable.  If false, transform all
3155     candidates and no longer defer.  */
3156  bool m_deferring_p;
3157};
3158
3159/* Transform all deferred FMA candidates and mark STATE as no longer
3160   deferring.  */
3161
3162static void
3163cancel_fma_deferring (fma_deferring_state *state)
3164{
3165  if (!state->m_deferring_p)
3166    return;
3167
3168  for (unsigned i = 0; i < state->m_candidates.length (); i++)
3169    {
3170      if (dump_file && (dump_flags & TDF_DETAILS))
3171	fprintf (dump_file, "Generating deferred FMA\n");
3172
3173      const fma_transformation_info &fti = state->m_candidates[i];
3174      convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3175
3176      gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3177      gsi_remove (&gsi, true);
3178      release_defs (fti.mul_stmt);
3179    }
3180  state->m_deferring_p = false;
3181}
3182
3183/* If OP is an SSA name defined by a PHI node, return the PHI statement.
3184   Otherwise return NULL.  */
3185
3186static gphi *
3187result_of_phi (tree op)
3188{
3189  if (TREE_CODE (op) != SSA_NAME)
3190    return NULL;
3191
3192  return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3193}
3194
3195/* After processing statements of a BB and recording STATE, return true if the
3196   initial phi is fed by the last FMA candidate result ore one such result from
3197   previously processed BBs marked in LAST_RESULT_SET.  */
3198
3199static bool
3200last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3201				      hash_set<tree> *last_result_set)
3202{
3203  ssa_op_iter iter;
3204  use_operand_p use;
3205  FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3206    {
3207      tree t = USE_FROM_PTR (use);
3208      if (t == state->m_last_result
3209	  || last_result_set->contains (t))
3210	return true;
3211    }
3212
3213  return false;
3214}
3215
3216/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3217   with uses in additions and subtractions to form fused multiply-add
3218   operations.  Returns true if successful and MUL_STMT should be removed.
3219   If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3220   on MUL_COND, otherwise it is unconditional.
3221
3222   If STATE indicates that we are deferring FMA transformation, that means
3223   that we do not produce FMAs for basic blocks which look like:
3224
3225    <bb 6>
3226    # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3227    _65 = _14 * _16;
3228    accumulator_66 = _65 + accumulator_111;
3229
3230  or its unrolled version, i.e. with several FMA candidates that feed result
3231  of one into the addend of another.  Instead, we add them to a list in STATE
3232  and if we later discover an FMA candidate that is not part of such a chain,
3233  we go back and perform all deferred past candidates.  */
3234
3235static bool
3236convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3237		     fma_deferring_state *state, tree mul_cond = NULL_TREE)
3238{
3239  tree mul_result = gimple_get_lhs (mul_stmt);
3240  /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3241     if the statement was left just for the side-effects.  */
3242  if (!mul_result)
3243    return false;
3244  tree type = TREE_TYPE (mul_result);
3245  gimple *use_stmt, *neguse_stmt;
3246  use_operand_p use_p;
3247  imm_use_iterator imm_iter;
3248
3249  if (FLOAT_TYPE_P (type)
3250      && flag_fp_contract_mode == FP_CONTRACT_OFF)
3251    return false;
3252
3253  /* We don't want to do bitfield reduction ops.  */
3254  if (INTEGRAL_TYPE_P (type)
3255      && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3256    return false;
3257
3258  /* If the target doesn't support it, don't generate it.  We assume that
3259     if fma isn't available then fms, fnma or fnms are not either.  */
3260  optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3261  if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3262    return false;
3263
3264  /* If the multiplication has zero uses, it is kept around probably because
3265     of -fnon-call-exceptions.  Don't optimize it away in that case,
3266     it is DCE job.  */
3267  if (has_zero_uses (mul_result))
3268    return false;
3269
3270  bool check_defer
3271    = (state->m_deferring_p
3272       && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3273		    param_avoid_fma_max_bits));
3274  bool defer = check_defer;
3275  bool seen_negate_p = false;
3276  /* Make sure that the multiplication statement becomes dead after
3277     the transformation, thus that all uses are transformed to FMAs.
3278     This means we assume that an FMA operation has the same cost
3279     as an addition.  */
3280  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3281    {
3282      tree result = mul_result;
3283      bool negate_p = false;
3284
3285      use_stmt = USE_STMT (use_p);
3286
3287      if (is_gimple_debug (use_stmt))
3288	continue;
3289
3290      /* For now restrict this operations to single basic blocks.  In theory
3291	 we would want to support sinking the multiplication in
3292	 m = a*b;
3293	 if ()
3294	   ma = m + c;
3295	 else
3296	   d = m;
3297	 to form a fma in the then block and sink the multiplication to the
3298	 else block.  */
3299      if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3300	return false;
3301
3302      /* A negate on the multiplication leads to FNMA.  */
3303      if (is_gimple_assign (use_stmt)
3304	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3305	{
3306	  ssa_op_iter iter;
3307	  use_operand_p usep;
3308
3309	  /* If (due to earlier missed optimizations) we have two
3310	     negates of the same value, treat them as equivalent
3311	     to a single negate with multiple uses.  */
3312	  if (seen_negate_p)
3313	    return false;
3314
3315	  result = gimple_assign_lhs (use_stmt);
3316
3317	  /* Make sure the negate statement becomes dead with this
3318	     single transformation.  */
3319	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
3320			       &use_p, &neguse_stmt))
3321	    return false;
3322
3323	  /* Make sure the multiplication isn't also used on that stmt.  */
3324	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3325	    if (USE_FROM_PTR (usep) == mul_result)
3326	      return false;
3327
3328	  /* Re-validate.  */
3329	  use_stmt = neguse_stmt;
3330	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3331	    return false;
3332
3333	  negate_p = seen_negate_p = true;
3334	}
3335
3336      tree cond, else_value, ops[3];
3337      tree_code code;
3338      if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3339					      &else_value))
3340	return false;
3341
3342      switch (code)
3343	{
3344	case MINUS_EXPR:
3345	  if (ops[1] == result)
3346	    negate_p = !negate_p;
3347	  break;
3348	case PLUS_EXPR:
3349	  break;
3350	default:
3351	  /* FMA can only be formed from PLUS and MINUS.  */
3352	  return false;
3353	}
3354
3355      if (mul_cond && cond != mul_cond)
3356	return false;
3357
3358      if (cond)
3359	{
3360	  if (cond == result || else_value == result)
3361	    return false;
3362	  if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3363	    return false;
3364	}
3365
3366      /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3367	 we'll visit later, we might be able to get a more profitable
3368	 match with fnma.
3369	 OTOH, if we don't, a negate / fma pair has likely lower latency
3370	 that a mult / subtract pair.  */
3371      if (code == MINUS_EXPR
3372	  && !negate_p
3373	  && ops[0] == result
3374	  && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3375	  && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3376	  && TREE_CODE (ops[1]) == SSA_NAME
3377	  && has_single_use (ops[1]))
3378	{
3379	  gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3380	  if (is_gimple_assign (stmt2)
3381	      && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3382	    return false;
3383	}
3384
3385      /* We can't handle a * b + a * b.  */
3386      if (ops[0] == ops[1])
3387	return false;
3388      /* If deferring, make sure we are not looking at an instruction that
3389	 wouldn't have existed if we were not.  */
3390      if (state->m_deferring_p
3391	  && (state->m_mul_result_set.contains (ops[0])
3392	      || state->m_mul_result_set.contains (ops[1])))
3393	return false;
3394
3395      if (check_defer)
3396	{
3397	  tree use_lhs = gimple_get_lhs (use_stmt);
3398	  if (state->m_last_result)
3399	    {
3400	      if (ops[1] == state->m_last_result
3401		  || ops[0] == state->m_last_result)
3402		defer = true;
3403	      else
3404		defer = false;
3405	    }
3406	  else
3407	    {
3408	      gcc_checking_assert (!state->m_initial_phi);
3409	      gphi *phi;
3410	      if (ops[0] == result)
3411		phi = result_of_phi (ops[1]);
3412	      else
3413		{
3414		  gcc_assert (ops[1] == result);
3415		  phi = result_of_phi (ops[0]);
3416		}
3417
3418	      if (phi)
3419		{
3420		  state->m_initial_phi = phi;
3421		  defer = true;
3422		}
3423	      else
3424		defer = false;
3425	    }
3426
3427	  state->m_last_result = use_lhs;
3428	  check_defer = false;
3429	}
3430      else
3431	defer = false;
3432
3433      /* While it is possible to validate whether or not the exact form that
3434	 we've recognized is available in the backend, the assumption is that
3435	 if the deferring logic above did not trigger, the transformation is
3436	 never a loss.  For instance, suppose the target only has the plain FMA
3437	 pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3438	 MUL+SUB for FMA+NEG, which is still two operations.  Consider
3439         -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3440	 form the two NEGs are independent and could be run in parallel.  */
3441    }
3442
3443  if (defer)
3444    {
3445      fma_transformation_info fti;
3446      fti.mul_stmt = mul_stmt;
3447      fti.mul_result = mul_result;
3448      fti.op1 = op1;
3449      fti.op2 = op2;
3450      state->m_candidates.safe_push (fti);
3451      state->m_mul_result_set.add (mul_result);
3452
3453      if (dump_file && (dump_flags & TDF_DETAILS))
3454	{
3455	  fprintf (dump_file, "Deferred generating FMA for multiplication ");
3456	  print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3457	  fprintf (dump_file, "\n");
3458	}
3459
3460      return false;
3461    }
3462  else
3463    {
3464      if (state->m_deferring_p)
3465	cancel_fma_deferring (state);
3466      convert_mult_to_fma_1 (mul_result, op1, op2);
3467      return true;
3468    }
3469}
3470
3471
3472/* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3473   a check for non-zero like:
3474   _1 = x_4(D) * y_5(D);
3475   *res_7(D) = _1;
3476   if (x_4(D) != 0)
3477     goto <bb 3>; [50.00%]
3478   else
3479     goto <bb 4>; [50.00%]
3480
3481   <bb 3> [local count: 536870913]:
3482   _2 = _1 / x_4(D);
3483   _9 = _2 != y_5(D);
3484   _10 = (int) _9;
3485
3486   <bb 4> [local count: 1073741824]:
3487   # iftmp.0_3 = PHI <_10(3), 0(2)>
3488   then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3489   optimize the x_4(D) != 0 condition to 1.  */
3490
3491static void
3492maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3493			       gimple *div_stmt, bool *cfg_changed)
3494{
3495  basic_block bb = gimple_bb (cond_stmt);
3496  if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3497    return;
3498  edge pred_edge = single_pred_edge (bb);
3499  basic_block pred_bb = pred_edge->src;
3500  if (EDGE_COUNT (pred_bb->succs) != 2)
3501    return;
3502  edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3503  edge other_succ_edge = NULL;
3504  if (gimple_code (cond_stmt) == GIMPLE_COND)
3505    {
3506      if (EDGE_COUNT (bb->succs) != 2)
3507	return;
3508      other_succ_edge = EDGE_SUCC (bb, 0);
3509      if (gimple_cond_code (cond_stmt) == NE_EXPR)
3510	{
3511	  if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3512	    other_succ_edge = EDGE_SUCC (bb, 1);
3513	}
3514      else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3515	other_succ_edge = EDGE_SUCC (bb, 0);
3516      if (other_edge->dest != other_succ_edge->dest)
3517	return;
3518    }
3519  else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3520    return;
3521  gimple *zero_cond = last_stmt (pred_bb);
3522  if (zero_cond == NULL
3523      || gimple_code (zero_cond) != GIMPLE_COND
3524      || (gimple_cond_code (zero_cond)
3525	  != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3526      || !integer_zerop (gimple_cond_rhs (zero_cond)))
3527    return;
3528  tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3529  if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3530    return;
3531  if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3532    {
3533      /* Allow the divisor to be result of a same precision cast
3534	 from zero_cond_lhs.  */
3535      tree rhs2 = gimple_assign_rhs2 (div_stmt);
3536      if (TREE_CODE (rhs2) != SSA_NAME)
3537	return;
3538      gimple *g = SSA_NAME_DEF_STMT (rhs2);
3539      if (!gimple_assign_cast_p (g)
3540	  || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3541	  || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3542	  || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3543	      != TYPE_PRECISION (TREE_TYPE (rhs2))))
3544	return;
3545    }
3546  gimple_stmt_iterator gsi = gsi_after_labels (bb);
3547  mul_stmts.quick_push (div_stmt);
3548  if (is_gimple_debug (gsi_stmt (gsi)))
3549    gsi_next_nondebug (&gsi);
3550  unsigned cast_count = 0;
3551  while (gsi_stmt (gsi) != cond_stmt)
3552    {
3553      /* If original mul_stmt has a single use, allow it in the same bb,
3554	 we are looking then just at __builtin_mul_overflow_p.
3555	 Though, in that case the original mul_stmt will be replaced
3556	 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3557      gimple *mul_stmt;
3558      unsigned int i;
3559      bool ok = false;
3560      FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3561	{
3562	  if (gsi_stmt (gsi) == mul_stmt)
3563	    {
3564	      ok = true;
3565	      break;
3566	    }
3567	}
3568      if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3569	ok = true;
3570      if (!ok)
3571	return;
3572      gsi_next_nondebug (&gsi);
3573    }
3574  if (gimple_code (cond_stmt) == GIMPLE_COND)
3575    {
3576      basic_block succ_bb = other_edge->dest;
3577      for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3578	   gsi_next (&gpi))
3579	{
3580	  gphi *phi = gpi.phi ();
3581	  tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3582	  tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3583	  if (!operand_equal_p (v1, v2, 0))
3584	    return;
3585	}
3586    }
3587  else
3588    {
3589      tree lhs = gimple_assign_lhs (cond_stmt);
3590      if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3591	return;
3592      gsi_next_nondebug (&gsi);
3593      if (!gsi_end_p (gsi))
3594	{
3595	  if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3596	    return;
3597	  gimple *cast_stmt = gsi_stmt (gsi);
3598	  if (!gimple_assign_cast_p (cast_stmt))
3599	    return;
3600	  tree new_lhs = gimple_assign_lhs (cast_stmt);
3601	  gsi_next_nondebug (&gsi);
3602	  if (!gsi_end_p (gsi)
3603	      || !new_lhs
3604	      || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3605	      || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3606	    return;
3607	  lhs = new_lhs;
3608	}
3609      edge succ_edge = single_succ_edge (bb);
3610      basic_block succ_bb = succ_edge->dest;
3611      gsi = gsi_start_phis (succ_bb);
3612      if (gsi_end_p (gsi))
3613	return;
3614      gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3615      gsi_next (&gsi);
3616      if (!gsi_end_p (gsi))
3617	return;
3618      if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3619	return;
3620      tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3621      if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3622	{
3623	  tree cond = gimple_assign_rhs1 (cond_stmt);
3624	  if (TREE_CODE (cond) == NE_EXPR)
3625	    {
3626	      if (!operand_equal_p (other_val,
3627				    gimple_assign_rhs3 (cond_stmt), 0))
3628		return;
3629	    }
3630	  else if (!operand_equal_p (other_val,
3631				     gimple_assign_rhs2 (cond_stmt), 0))
3632	    return;
3633	}
3634      else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3635	{
3636	  if (!integer_zerop (other_val))
3637	    return;
3638	}
3639      else if (!integer_onep (other_val))
3640	return;
3641    }
3642  gcond *zero_gcond = as_a <gcond *> (zero_cond);
3643  if (pred_edge->flags & EDGE_TRUE_VALUE)
3644    gimple_cond_make_true (zero_gcond);
3645  else
3646    gimple_cond_make_false (zero_gcond);
3647  update_stmt (zero_cond);
3648  *cfg_changed = true;
3649}
3650
3651/* Helper function for arith_overflow_check_p.  Return true
3652   if VAL1 is equal to VAL2 cast to corresponding integral type
3653   with other signedness or vice versa.  */
3654
3655static bool
3656arith_cast_equal_p (tree val1, tree val2)
3657{
3658  if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3659    return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3660  else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3661    return false;
3662  if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3663      && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3664    return true;
3665  if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3666      && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3667    return true;
3668  return false;
3669}
3670
3671/* Helper function of match_arith_overflow.  Return 1
3672   if USE_STMT is unsigned overflow check ovf != 0 for
3673   STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3674   and 0 otherwise.  */
3675
3676static int
3677arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3678			tree maxval, tree *other)
3679{
3680  enum tree_code ccode = ERROR_MARK;
3681  tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3682  enum tree_code code = gimple_assign_rhs_code (stmt);
3683  tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3684  tree rhs1 = gimple_assign_rhs1 (stmt);
3685  tree rhs2 = gimple_assign_rhs2 (stmt);
3686  tree multop = NULL_TREE, divlhs = NULL_TREE;
3687  gimple *cur_use_stmt = use_stmt;
3688
3689  if (code == MULT_EXPR)
3690    {
3691      if (!is_gimple_assign (use_stmt))
3692	return 0;
3693      if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3694	return 0;
3695      if (gimple_assign_rhs1 (use_stmt) != lhs)
3696	return 0;
3697      if (cast_stmt)
3698	{
3699	  if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3700	    multop = rhs2;
3701	  else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3702	    multop = rhs1;
3703	  else
3704	    return 0;
3705	}
3706      else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3707	multop = rhs2;
3708      else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3709	multop = rhs1;
3710      else
3711	return 0;
3712      if (stmt_ends_bb_p (use_stmt))
3713	return 0;
3714      divlhs = gimple_assign_lhs (use_stmt);
3715      if (!divlhs)
3716	return 0;
3717      use_operand_p use;
3718      if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3719	return 0;
3720    }
3721  if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3722    {
3723      ccode = gimple_cond_code (cur_use_stmt);
3724      crhs1 = gimple_cond_lhs (cur_use_stmt);
3725      crhs2 = gimple_cond_rhs (cur_use_stmt);
3726    }
3727  else if (is_gimple_assign (cur_use_stmt))
3728    {
3729      if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3730	{
3731	  ccode = gimple_assign_rhs_code (cur_use_stmt);
3732	  crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3733	  crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3734	}
3735      else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3736	{
3737	  tree cond = gimple_assign_rhs1 (cur_use_stmt);
3738	  if (COMPARISON_CLASS_P (cond))
3739	    {
3740	      ccode = TREE_CODE (cond);
3741	      crhs1 = TREE_OPERAND (cond, 0);
3742	      crhs2 = TREE_OPERAND (cond, 1);
3743	    }
3744	  else
3745	    return 0;
3746	}
3747      else
3748	return 0;
3749    }
3750  else
3751    return 0;
3752
3753  if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3754    return 0;
3755
3756  switch (ccode)
3757    {
3758    case GT_EXPR:
3759    case LE_EXPR:
3760      if (maxval)
3761	{
3762	  /* r = a + b; r > maxval or r <= maxval  */
3763	  if (crhs1 == lhs
3764	      && TREE_CODE (crhs2) == INTEGER_CST
3765	      && tree_int_cst_equal (crhs2, maxval))
3766	    return ccode == GT_EXPR ? 1 : -1;
3767	  break;
3768	}
3769      /* r = a - b; r > a or r <= a
3770	 r = a + b; a > r or a <= r or b > r or b <= r.  */
3771      if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3772	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3773	      && crhs2 == lhs))
3774	return ccode == GT_EXPR ? 1 : -1;
3775      /* r = ~a; b > r or b <= r.  */
3776      if (code == BIT_NOT_EXPR && crhs2 == lhs)
3777	{
3778	  if (other)
3779	    *other = crhs1;
3780	  return ccode == GT_EXPR ? 1 : -1;
3781	}
3782      break;
3783    case LT_EXPR:
3784    case GE_EXPR:
3785      if (maxval)
3786	break;
3787      /* r = a - b; a < r or a >= r
3788	 r = a + b; r < a or r >= a or r < b or r >= b.  */
3789      if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3790	  || (code == PLUS_EXPR && crhs1 == lhs
3791	      && (crhs2 == rhs1 || crhs2 == rhs2)))
3792	return ccode == LT_EXPR ? 1 : -1;
3793      /* r = ~a; r < b or r >= b.  */
3794      if (code == BIT_NOT_EXPR && crhs1 == lhs)
3795	{
3796	  if (other)
3797	    *other = crhs2;
3798	  return ccode == LT_EXPR ? 1 : -1;
3799	}
3800      break;
3801    case EQ_EXPR:
3802    case NE_EXPR:
3803      /* r = a * b; _1 = r / a; _1 == b
3804	 r = a * b; _1 = r / b; _1 == a
3805	 r = a * b; _1 = r / a; _1 != b
3806	 r = a * b; _1 = r / b; _1 != a.  */
3807      if (code == MULT_EXPR)
3808	{
3809	  if (cast_stmt)
3810	    {
3811	      if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3812		  || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3813		{
3814		  use_stmt = cur_use_stmt;
3815		  return ccode == NE_EXPR ? 1 : -1;
3816		}
3817	    }
3818	  else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3819		   || (crhs2 == divlhs && crhs1 == multop))
3820	    {
3821	      use_stmt = cur_use_stmt;
3822	      return ccode == NE_EXPR ? 1 : -1;
3823	    }
3824	}
3825      break;
3826    default:
3827      break;
3828    }
3829  return 0;
3830}
3831
3832/* Recognize for unsigned x
3833   x = y - z;
3834   if (x > y)
3835   where there are other uses of x and replace it with
3836   _7 = .SUB_OVERFLOW (y, z);
3837   x = REALPART_EXPR <_7>;
3838   _8 = IMAGPART_EXPR <_7>;
3839   if (_8)
3840   and similarly for addition.
3841
3842   Also recognize:
3843   yc = (type) y;
3844   zc = (type) z;
3845   x = yc + zc;
3846   if (x > max)
3847   where y and z have unsigned types with maximum max
3848   and there are other uses of x and all of those cast x
3849   back to that unsigned type and again replace it with
3850   _7 = .ADD_OVERFLOW (y, z);
3851   _9 = REALPART_EXPR <_7>;
3852   _8 = IMAGPART_EXPR <_7>;
3853   if (_8)
3854   and replace (utype) x with _9.
3855
3856   Also recognize:
3857   x = ~z;
3858   if (y > x)
3859   and replace it with
3860   _7 = .ADD_OVERFLOW (y, z);
3861   _8 = IMAGPART_EXPR <_7>;
3862   if (_8)
3863
3864   And also recognize:
3865   z = x * y;
3866   if (x != 0)
3867     goto <bb 3>; [50.00%]
3868   else
3869     goto <bb 4>; [50.00%]
3870
3871   <bb 3> [local count: 536870913]:
3872   _2 = z / x;
3873   _9 = _2 != y;
3874   _10 = (int) _9;
3875
3876   <bb 4> [local count: 1073741824]:
3877   # iftmp.0_3 = PHI <_10(3), 0(2)>
3878   and replace it with
3879   _7 = .MUL_OVERFLOW (x, y);
3880   z = IMAGPART_EXPR <_7>;
3881   _8 = IMAGPART_EXPR <_7>;
3882   _9 = _8 != 0;
3883   iftmp.0_3 = (int) _9;  */
3884
3885static bool
3886match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3887		      enum tree_code code, bool *cfg_changed)
3888{
3889  tree lhs = gimple_assign_lhs (stmt);
3890  tree type = TREE_TYPE (lhs);
3891  use_operand_p use_p;
3892  imm_use_iterator iter;
3893  bool use_seen = false;
3894  bool ovf_use_seen = false;
3895  gimple *use_stmt;
3896  gimple *add_stmt = NULL;
3897  bool add_first = false;
3898  gimple *cond_stmt = NULL;
3899  gimple *cast_stmt = NULL;
3900  tree cast_lhs = NULL_TREE;
3901
3902  gcc_checking_assert (code == PLUS_EXPR
3903		       || code == MINUS_EXPR
3904		       || code == MULT_EXPR
3905		       || code == BIT_NOT_EXPR);
3906  if (!INTEGRAL_TYPE_P (type)
3907      || !TYPE_UNSIGNED (type)
3908      || has_zero_uses (lhs)
3909      || (code != PLUS_EXPR
3910	  && code != MULT_EXPR
3911	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3912			    TYPE_MODE (type)) == CODE_FOR_nothing))
3913    return false;
3914
3915  tree rhs1 = gimple_assign_rhs1 (stmt);
3916  tree rhs2 = gimple_assign_rhs2 (stmt);
3917  FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3918    {
3919      use_stmt = USE_STMT (use_p);
3920      if (is_gimple_debug (use_stmt))
3921	continue;
3922
3923      tree other = NULL_TREE;
3924      if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3925	{
3926	  if (code == BIT_NOT_EXPR)
3927	    {
3928	      gcc_assert (other);
3929	      if (TREE_CODE (other) != SSA_NAME)
3930		return false;
3931	      if (rhs2 == NULL)
3932		rhs2 = other;
3933	      else
3934		return false;
3935	      cond_stmt = use_stmt;
3936	    }
3937	  ovf_use_seen = true;
3938	}
3939      else
3940	{
3941	  use_seen = true;
3942	  if (code == MULT_EXPR
3943	      && cast_stmt == NULL
3944	      && gimple_assign_cast_p (use_stmt))
3945	    {
3946	      cast_lhs = gimple_assign_lhs (use_stmt);
3947	      if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3948		  && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3949		  && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3950		      == TYPE_PRECISION (TREE_TYPE (lhs))))
3951		cast_stmt = use_stmt;
3952	      else
3953		cast_lhs = NULL_TREE;
3954	    }
3955	}
3956      if (ovf_use_seen && use_seen)
3957	break;
3958    }
3959
3960  if (!ovf_use_seen
3961      && code == MULT_EXPR
3962      && cast_stmt)
3963    {
3964      if (TREE_CODE (rhs1) != SSA_NAME
3965	  || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3966	return false;
3967      FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3968	{
3969	  use_stmt = USE_STMT (use_p);
3970	  if (is_gimple_debug (use_stmt))
3971	    continue;
3972
3973	  if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3974				      NULL_TREE, NULL))
3975	    ovf_use_seen = true;
3976	}
3977    }
3978  else
3979    {
3980      cast_stmt = NULL;
3981      cast_lhs = NULL_TREE;
3982    }
3983
3984  tree maxval = NULL_TREE;
3985  if (!ovf_use_seen
3986      || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3987      || (code == PLUS_EXPR
3988	  && optab_handler (uaddv4_optab,
3989			    TYPE_MODE (type)) == CODE_FOR_nothing)
3990      || (code == MULT_EXPR
3991	  && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3992			    TYPE_MODE (type)) == CODE_FOR_nothing))
3993    {
3994      if (code != PLUS_EXPR)
3995	return false;
3996      if (TREE_CODE (rhs1) != SSA_NAME
3997	  || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3998	return false;
3999      rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4000      tree type1 = TREE_TYPE (rhs1);
4001      if (!INTEGRAL_TYPE_P (type1)
4002	  || !TYPE_UNSIGNED (type1)
4003	  || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4004	  || (TYPE_PRECISION (type1)
4005	      != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4006	return false;
4007      if (TREE_CODE (rhs2) == INTEGER_CST)
4008	{
4009	  if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4010	  			    TYPE_PRECISION (type1),
4011				    UNSIGNED), 0))
4012	    return false;
4013	  rhs2 = fold_convert (type1, rhs2);
4014	}
4015      else
4016	{
4017	  if (TREE_CODE (rhs2) != SSA_NAME
4018	      || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4019	    return false;
4020	  rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4021	  tree type2 = TREE_TYPE (rhs2);
4022	  if (!INTEGRAL_TYPE_P (type2)
4023	      || !TYPE_UNSIGNED (type2)
4024	      || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4025	      || (TYPE_PRECISION (type2)
4026		  != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4027	    return false;
4028	}
4029      if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4030	type = type1;
4031      else
4032	type = TREE_TYPE (rhs2);
4033
4034      if (TREE_CODE (type) != INTEGER_TYPE
4035	  || optab_handler (uaddv4_optab,
4036			    TYPE_MODE (type)) == CODE_FOR_nothing)
4037	return false;
4038
4039      maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4040						      UNSIGNED));
4041      ovf_use_seen = false;
4042      use_seen = false;
4043      basic_block use_bb = NULL;
4044      FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4045	{
4046	  use_stmt = USE_STMT (use_p);
4047	  if (is_gimple_debug (use_stmt))
4048	    continue;
4049
4050	  if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4051	    {
4052	      ovf_use_seen = true;
4053	      use_bb = gimple_bb (use_stmt);
4054	    }
4055	  else
4056	    {
4057	      if (!gimple_assign_cast_p (use_stmt)
4058		  || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4059		return false;
4060	      tree use_lhs = gimple_assign_lhs (use_stmt);
4061	      if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4062		  || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4063		      > TYPE_PRECISION (type)))
4064		return false;
4065	      use_seen = true;
4066	    }
4067	}
4068      if (!ovf_use_seen)
4069	return false;
4070      if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4071	{
4072	  if (!use_seen)
4073	    return false;
4074	  tree new_rhs1 = make_ssa_name (type);
4075	  gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4076	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4077	  rhs1 = new_rhs1;
4078	}
4079      else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4080	{
4081	  if (!use_seen)
4082	    return false;
4083	  tree new_rhs2 = make_ssa_name (type);
4084	  gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4085	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4086	  rhs2 = new_rhs2;
4087	}
4088      else if (!use_seen)
4089	{
4090	  /* If there are no uses of the wider addition, check if
4091	     forwprop has not created a narrower addition.
4092	     Require it to be in the same bb as the overflow check.  */
4093	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4094	    {
4095	      use_stmt = USE_STMT (use_p);
4096	      if (is_gimple_debug (use_stmt))
4097		continue;
4098
4099	      if (use_stmt == stmt)
4100		continue;
4101
4102	      if (!is_gimple_assign (use_stmt)
4103		  || gimple_bb (use_stmt) != use_bb
4104		  || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4105		continue;
4106
4107	      if (gimple_assign_rhs1 (use_stmt) == rhs1)
4108		{
4109		  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4110					rhs2, 0))
4111		    continue;
4112		}
4113	      else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4114		{
4115		  if (gimple_assign_rhs1 (use_stmt) != rhs2)
4116		    continue;
4117		}
4118	      else
4119		continue;
4120
4121	      add_stmt = use_stmt;
4122	      break;
4123	    }
4124	  if (add_stmt == NULL)
4125	    return false;
4126
4127	  /* If stmt and add_stmt are in the same bb, we need to find out
4128	     which one is earlier.  If they are in different bbs, we've
4129	     checked add_stmt is in the same bb as one of the uses of the
4130	     stmt lhs, so stmt needs to dominate add_stmt too.  */
4131	  if (gimple_bb (stmt) == gimple_bb (add_stmt))
4132	    {
4133	      gimple_stmt_iterator gsif = *gsi;
4134	      gimple_stmt_iterator gsib = *gsi;
4135	      int i;
4136	      /* Search both forward and backward from stmt and have a small
4137		 upper bound.  */
4138	      for (i = 0; i < 128; i++)
4139		{
4140		  if (!gsi_end_p (gsib))
4141		    {
4142		      gsi_prev_nondebug (&gsib);
4143		      if (gsi_stmt (gsib) == add_stmt)
4144			{
4145			  add_first = true;
4146			  break;
4147			}
4148		    }
4149		  else if (gsi_end_p (gsif))
4150		    break;
4151		  if (!gsi_end_p (gsif))
4152		    {
4153		      gsi_next_nondebug (&gsif);
4154		      if (gsi_stmt (gsif) == add_stmt)
4155			break;
4156		    }
4157		}
4158	      if (i == 128)
4159		return false;
4160	      if (add_first)
4161		*gsi = gsi_for_stmt (add_stmt);
4162	    }
4163	}
4164    }
4165
4166  if (code == BIT_NOT_EXPR)
4167    *gsi = gsi_for_stmt (cond_stmt);
4168
4169  auto_vec<gimple *, 8> mul_stmts;
4170  if (code == MULT_EXPR && cast_stmt)
4171    {
4172      type = TREE_TYPE (cast_lhs);
4173      gimple *g = SSA_NAME_DEF_STMT (rhs1);
4174      if (gimple_assign_cast_p (g)
4175	  && useless_type_conversion_p (type,
4176					TREE_TYPE (gimple_assign_rhs1 (g)))
4177	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4178	rhs1 = gimple_assign_rhs1 (g);
4179      else
4180	{
4181	  g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4182	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4183	  rhs1 = gimple_assign_lhs (g);
4184	  mul_stmts.quick_push (g);
4185	}
4186      if (TREE_CODE (rhs2) == INTEGER_CST)
4187	rhs2 = fold_convert (type, rhs2);
4188      else
4189	{
4190	  g = SSA_NAME_DEF_STMT (rhs2);
4191	  if (gimple_assign_cast_p (g)
4192	      && useless_type_conversion_p (type,
4193					    TREE_TYPE (gimple_assign_rhs1 (g)))
4194	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4195	    rhs2 = gimple_assign_rhs1 (g);
4196	  else
4197	    {
4198	      g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4199	      gsi_insert_before (gsi, g, GSI_SAME_STMT);
4200	      rhs2 = gimple_assign_lhs (g);
4201	      mul_stmts.quick_push (g);
4202	    }
4203	}
4204    }
4205  tree ctype = build_complex_type (type);
4206  gcall *g = gimple_build_call_internal (code == MULT_EXPR
4207					 ? IFN_MUL_OVERFLOW
4208					 : code != MINUS_EXPR
4209					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4210					 2, rhs1, rhs2);
4211  tree ctmp = make_ssa_name (ctype);
4212  gimple_call_set_lhs (g, ctmp);
4213  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4214  tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4215  gassign *g2;
4216  if (code != BIT_NOT_EXPR)
4217    {
4218      g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4219				build1 (REALPART_EXPR, type, ctmp));
4220      if (maxval || cast_stmt)
4221	{
4222	  gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4223	  if (add_first)
4224	    *gsi = gsi_for_stmt (stmt);
4225	}
4226      else
4227	gsi_replace (gsi, g2, true);
4228      if (code == MULT_EXPR)
4229	{
4230	  mul_stmts.quick_push (g);
4231	  mul_stmts.quick_push (g2);
4232	  if (cast_stmt)
4233	    {
4234	      g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4235	      gsi_replace (gsi, g2, true);
4236	      mul_stmts.quick_push (g2);
4237	    }
4238	}
4239    }
4240  tree ovf = make_ssa_name (type);
4241  g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4242			    build1 (IMAGPART_EXPR, type, ctmp));
4243  if (code != BIT_NOT_EXPR)
4244    gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4245  else
4246    gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4247  if (code == MULT_EXPR)
4248    mul_stmts.quick_push (g2);
4249
4250  FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4251    {
4252      if (is_gimple_debug (use_stmt))
4253	continue;
4254
4255      gimple *orig_use_stmt = use_stmt;
4256      int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4257					    maxval, NULL);
4258      if (ovf_use == 0)
4259	{
4260	  gcc_assert (code != BIT_NOT_EXPR);
4261	  if (maxval)
4262	    {
4263	      tree use_lhs = gimple_assign_lhs (use_stmt);
4264	      gimple_assign_set_rhs1 (use_stmt, new_lhs);
4265	      if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4266					     TREE_TYPE (new_lhs)))
4267		gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4268	      update_stmt (use_stmt);
4269	    }
4270	  continue;
4271	}
4272      if (gimple_code (use_stmt) == GIMPLE_COND)
4273	{
4274	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
4275	  gimple_cond_set_lhs (cond_stmt, ovf);
4276	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4277	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4278	}
4279      else
4280	{
4281	  gcc_checking_assert (is_gimple_assign (use_stmt));
4282	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4283	    {
4284	      gimple_assign_set_rhs1 (use_stmt, ovf);
4285	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4286	      gimple_assign_set_rhs_code (use_stmt,
4287					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4288	    }
4289	  else
4290	    {
4291	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4292				   == COND_EXPR);
4293	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4294				  boolean_type_node, ovf,
4295				  build_int_cst (type, 0));
4296	      gimple_assign_set_rhs1 (use_stmt, cond);
4297	    }
4298	}
4299      update_stmt (use_stmt);
4300      if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4301	{
4302	  gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4303	  maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4304					 cfg_changed);
4305	  gsi_remove (&gsi2, true);
4306	  release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4307	}
4308    }
4309  if (maxval)
4310    {
4311      gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4312      gsi_remove (&gsi2, true);
4313      if (add_stmt)
4314	{
4315	  gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4316					   new_lhs);
4317	  gsi2 = gsi_for_stmt (add_stmt);
4318	  gsi_replace (&gsi2, g, true);
4319	}
4320    }
4321  else if (code == BIT_NOT_EXPR)
4322    {
4323      *gsi = gsi_for_stmt (stmt);
4324      gsi_remove (gsi, true);
4325      release_ssa_name (lhs);
4326      return true;
4327    }
4328  return false;
4329}
4330
4331/* Return true if target has support for divmod.  */
4332
4333static bool
4334target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4335{
4336  /* If target supports hardware divmod insn, use it for divmod.  */
4337  if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4338    return true;
4339
4340  /* Check if libfunc for divmod is available.  */
4341  rtx libfunc = optab_libfunc (divmod_optab, mode);
4342  if (libfunc != NULL_RTX)
4343    {
4344      /* If optab_handler exists for div_optab, perhaps in a wider mode,
4345	 we don't want to use the libfunc even if it exists for given mode.  */
4346      machine_mode div_mode;
4347      FOR_EACH_MODE_FROM (div_mode, mode)
4348	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4349	  return false;
4350
4351      return targetm.expand_divmod_libfunc != NULL;
4352    }
4353
4354  return false;
4355}
4356
4357/* Check if stmt is candidate for divmod transform.  */
4358
4359static bool
4360divmod_candidate_p (gassign *stmt)
4361{
4362  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4363  machine_mode mode = TYPE_MODE (type);
4364  optab divmod_optab, div_optab;
4365
4366  if (TYPE_UNSIGNED (type))
4367    {
4368      divmod_optab = udivmod_optab;
4369      div_optab = udiv_optab;
4370    }
4371  else
4372    {
4373      divmod_optab = sdivmod_optab;
4374      div_optab = sdiv_optab;
4375    }
4376
4377  tree op1 = gimple_assign_rhs1 (stmt);
4378  tree op2 = gimple_assign_rhs2 (stmt);
4379
4380  /* Disable the transform if either is a constant, since division-by-constant
4381     may have specialized expansion.  */
4382  if (CONSTANT_CLASS_P (op1))
4383    return false;
4384
4385  if (CONSTANT_CLASS_P (op2))
4386    {
4387      if (integer_pow2p (op2))
4388	return false;
4389
4390      if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4391	  && TYPE_PRECISION (type) <= BITS_PER_WORD)
4392	return false;
4393
4394      /* If the divisor is not power of 2 and the precision wider than
4395	 HWI, expand_divmod punts on that, so in that case it is better
4396	 to use divmod optab or libfunc.  Similarly if choose_multiplier
4397	 might need pre/post shifts of BITS_PER_WORD or more.  */
4398    }
4399
4400  /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4401     expand using the [su]divv optabs.  */
4402  if (TYPE_OVERFLOW_TRAPS (type))
4403    return false;
4404
4405  if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4406    return false;
4407
4408  return true;
4409}
4410
4411/* This function looks for:
4412   t1 = a TRUNC_DIV_EXPR b;
4413   t2 = a TRUNC_MOD_EXPR b;
4414   and transforms it to the following sequence:
4415   complex_tmp = DIVMOD (a, b);
4416   t1 = REALPART_EXPR(a);
4417   t2 = IMAGPART_EXPR(b);
4418   For conditions enabling the transform see divmod_candidate_p().
4419
4420   The pass has three parts:
4421   1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4422      other trunc_div_expr and trunc_mod_expr stmts.
4423   2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4424      to stmts vector.
4425   3) Insert DIVMOD call just before top_stmt and update entries in
4426      stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4427      IMAGPART_EXPR for mod).  */
4428
4429static bool
4430convert_to_divmod (gassign *stmt)
4431{
4432  if (stmt_can_throw_internal (cfun, stmt)
4433      || !divmod_candidate_p (stmt))
4434    return false;
4435
4436  tree op1 = gimple_assign_rhs1 (stmt);
4437  tree op2 = gimple_assign_rhs2 (stmt);
4438
4439  imm_use_iterator use_iter;
4440  gimple *use_stmt;
4441  auto_vec<gimple *> stmts;
4442
4443  gimple *top_stmt = stmt;
4444  basic_block top_bb = gimple_bb (stmt);
4445
4446  /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4447     at-least stmt and possibly other trunc_div/trunc_mod stmts
4448     having same operands as stmt.  */
4449
4450  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4451    {
4452      if (is_gimple_assign (use_stmt)
4453	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4454	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4455	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4456	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4457	{
4458	  if (stmt_can_throw_internal (cfun, use_stmt))
4459	    continue;
4460
4461	  basic_block bb = gimple_bb (use_stmt);
4462
4463	  if (bb == top_bb)
4464	    {
4465	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4466		top_stmt = use_stmt;
4467	    }
4468	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4469	    {
4470	      top_bb = bb;
4471	      top_stmt = use_stmt;
4472	    }
4473	}
4474    }
4475
4476  tree top_op1 = gimple_assign_rhs1 (top_stmt);
4477  tree top_op2 = gimple_assign_rhs2 (top_stmt);
4478
4479  stmts.safe_push (top_stmt);
4480  bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4481
4482  /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4483     to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4484     gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4485     2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4486
4487  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4488    {
4489      if (is_gimple_assign (use_stmt)
4490	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4491	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4492	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4493	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4494	{
4495	  if (use_stmt == top_stmt
4496	      || stmt_can_throw_internal (cfun, use_stmt)
4497	      || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4498	    continue;
4499
4500	  stmts.safe_push (use_stmt);
4501	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4502	    div_seen = true;
4503	}
4504    }
4505
4506  if (!div_seen)
4507    return false;
4508
4509  /* Part 3: Create libcall to internal fn DIVMOD:
4510     divmod_tmp = DIVMOD (op1, op2).  */
4511
4512  gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4513  tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4514				 call_stmt, "divmod_tmp");
4515  gimple_call_set_lhs (call_stmt, res);
4516  /* We rejected throwing statements above.  */
4517  gimple_call_set_nothrow (call_stmt, true);
4518
4519  /* Insert the call before top_stmt.  */
4520  gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4521  gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4522
4523  widen_mul_stats.divmod_calls_inserted++;
4524
4525  /* Update all statements in stmts vector:
4526     lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4527     lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4528
4529  for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4530    {
4531      tree new_rhs;
4532
4533      switch (gimple_assign_rhs_code (use_stmt))
4534	{
4535	  case TRUNC_DIV_EXPR:
4536	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4537	    break;
4538
4539	  case TRUNC_MOD_EXPR:
4540	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4541	    break;
4542
4543	  default:
4544	    gcc_unreachable ();
4545	}
4546
4547      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4548      gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4549      update_stmt (use_stmt);
4550    }
4551
4552  return true;
4553}
4554
4555/* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4556   its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
4557   value is true iff we converted the statement.  */
4558
4559static bool
4560convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4561{
4562  tree lhs = gimple_assign_lhs (stmt);
4563  tree stype = TREE_TYPE (lhs);
4564  tree sarg0 = gimple_assign_rhs1 (stmt);
4565  tree sarg1 = gimple_assign_rhs2 (stmt);
4566
4567  if (TREE_CODE (stype) != INTEGER_TYPE
4568      || TREE_CODE (sarg1) != INTEGER_CST
4569      || TREE_CODE (sarg0) != SSA_NAME
4570      || !tree_fits_uhwi_p (sarg1)
4571      || !has_single_use (sarg0))
4572    return false;
4573
4574  gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4575  if (!def)
4576    return false;
4577
4578  enum tree_code mcode = gimple_assign_rhs_code (def);
4579  if (mcode == NOP_EXPR)
4580    {
4581      tree tmp = gimple_assign_rhs1 (def);
4582      if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4583	return false;
4584      def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4585      if (!def)
4586	return false;
4587      mcode = gimple_assign_rhs_code (def);
4588    }
4589
4590  if (mcode != WIDEN_MULT_EXPR
4591      || gimple_bb (def) != gimple_bb (stmt))
4592    return false;
4593  tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4594  if (TREE_CODE (mtype) != INTEGER_TYPE
4595      || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4596    return false;
4597
4598  tree mop1 = gimple_assign_rhs1 (def);
4599  tree mop2 = gimple_assign_rhs2 (def);
4600  tree optype = TREE_TYPE (mop1);
4601  bool unsignedp = TYPE_UNSIGNED (optype);
4602  unsigned int prec = TYPE_PRECISION (optype);
4603
4604  if (unsignedp != TYPE_UNSIGNED (mtype)
4605      || TYPE_PRECISION (mtype) != 2 * prec)
4606    return false;
4607
4608  unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4609  if (bits < prec || bits >= 2 * prec)
4610    return false;
4611
4612  /* For the time being, require operands to have the same sign.  */
4613  if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4614    return false;
4615
4616  machine_mode mode = TYPE_MODE (optype);
4617  optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4618  if (optab_handler (tab, mode) == CODE_FOR_nothing)
4619    return false;
4620
4621  location_t loc = gimple_location (stmt);
4622  tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4623					   MULT_HIGHPART_EXPR, mop1, mop2);
4624  tree highpart2 = highpart1;
4625  tree ntype = optype;
4626
4627  if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4628    {
4629      ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4630				    : signed_type_for (optype);
4631      highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4632    }
4633  if (bits > prec)
4634    highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4635					RSHIFT_EXPR, highpart2,
4636					build_int_cst (ntype, bits - prec));
4637
4638  gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4639  gsi_replace (gsi, new_stmt, true);
4640
4641  widen_mul_stats.highpart_mults_inserted++;
4642  return true;
4643}
4644
4645/* If target has spaceship<MODE>3 expander, pattern recognize
4646   <bb 2> [local count: 1073741824]:
4647   if (a_2(D) == b_3(D))
4648     goto <bb 6>; [34.00%]
4649   else
4650     goto <bb 3>; [66.00%]
4651
4652   <bb 3> [local count: 708669601]:
4653   if (a_2(D) < b_3(D))
4654     goto <bb 6>; [1.04%]
4655   else
4656     goto <bb 4>; [98.96%]
4657
4658   <bb 4> [local count: 701299439]:
4659   if (a_2(D) > b_3(D))
4660     goto <bb 5>; [48.89%]
4661   else
4662     goto <bb 6>; [51.11%]
4663
4664   <bb 5> [local count: 342865295]:
4665
4666   <bb 6> [local count: 1073741824]:
4667   and turn it into:
4668   <bb 2> [local count: 1073741824]:
4669   _1 = .SPACESHIP (a_2(D), b_3(D));
4670   if (_1 == 0)
4671     goto <bb 6>; [34.00%]
4672   else
4673     goto <bb 3>; [66.00%]
4674
4675   <bb 3> [local count: 708669601]:
4676   if (_1 == -1)
4677     goto <bb 6>; [1.04%]
4678   else
4679     goto <bb 4>; [98.96%]
4680
4681   <bb 4> [local count: 701299439]:
4682   if (_1 == 1)
4683     goto <bb 5>; [48.89%]
4684   else
4685     goto <bb 6>; [51.11%]
4686
4687   <bb 5> [local count: 342865295]:
4688
4689   <bb 6> [local count: 1073741824]:
4690   so that the backend can emit optimal comparison and
4691   conditional jump sequence.  */
4692
4693static void
4694optimize_spaceship (gimple *stmt)
4695{
4696  enum tree_code code = gimple_cond_code (stmt);
4697  if (code != EQ_EXPR && code != NE_EXPR)
4698    return;
4699  tree arg1 = gimple_cond_lhs (stmt);
4700  tree arg2 = gimple_cond_rhs (stmt);
4701  if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4702      || optab_handler (spaceship_optab,
4703			TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4704      || operand_equal_p (arg1, arg2, 0))
4705    return;
4706
4707  basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4708  edge em1 = NULL, e1 = NULL, e2 = NULL;
4709  bb1 = EDGE_SUCC (bb0, 1)->dest;
4710  if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4711    bb1 = EDGE_SUCC (bb0, 0)->dest;
4712
4713  gimple *g = last_stmt (bb1);
4714  if (g == NULL
4715      || gimple_code (g) != GIMPLE_COND
4716      || !single_pred_p (bb1)
4717      || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4718	  ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4719	  : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4720	     || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4721      || !cond_only_block_p (bb1))
4722    return;
4723
4724  enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4725			  ? LT_EXPR : GT_EXPR);
4726  switch (gimple_cond_code (g))
4727    {
4728    case LT_EXPR:
4729    case LE_EXPR:
4730      break;
4731    case GT_EXPR:
4732    case GE_EXPR:
4733      ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4734      break;
4735    default:
4736      return;
4737    }
4738
4739  for (int i = 0; i < 2; ++i)
4740    {
4741      /* With NaNs, </<=/>/>= are false, so we need to look for the
4742	 third comparison on the false edge from whatever non-equality
4743	 comparison the second comparison is.  */
4744      if (HONOR_NANS (TREE_TYPE (arg1))
4745	  && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4746	continue;
4747
4748      bb2 = EDGE_SUCC (bb1, i)->dest;
4749      g = last_stmt (bb2);
4750      if (g == NULL
4751	  || gimple_code (g) != GIMPLE_COND
4752	  || !single_pred_p (bb2)
4753	  || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4754	      ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4755	      : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4756		 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4757	  || !cond_only_block_p (bb2)
4758	  || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4759	continue;
4760
4761      enum tree_code ccode2
4762	= (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4763      switch (gimple_cond_code (g))
4764	{
4765	case LT_EXPR:
4766	case LE_EXPR:
4767	  break;
4768	case GT_EXPR:
4769	case GE_EXPR:
4770	  ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4771	  break;
4772	default:
4773	  continue;
4774	}
4775      if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4776	continue;
4777
4778      if ((ccode == LT_EXPR)
4779	  ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4780	{
4781	  em1 = EDGE_SUCC (bb1, 1 - i);
4782	  e1 = EDGE_SUCC (bb2, 0);
4783	  e2 = EDGE_SUCC (bb2, 1);
4784	  if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4785	    std::swap (e1, e2);
4786	}
4787      else
4788	{
4789	  e1 = EDGE_SUCC (bb1, 1 - i);
4790	  em1 = EDGE_SUCC (bb2, 0);
4791	  e2 = EDGE_SUCC (bb2, 1);
4792	  if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4793	    std::swap (em1, e2);
4794	}
4795      break;
4796    }
4797
4798  if (em1 == NULL)
4799    {
4800      if ((ccode == LT_EXPR)
4801	  ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4802	{
4803	  em1 = EDGE_SUCC (bb1, 1);
4804	  e1 = EDGE_SUCC (bb1, 0);
4805	  e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4806	}
4807      else
4808	{
4809	  em1 = EDGE_SUCC (bb1, 0);
4810	  e1 = EDGE_SUCC (bb1, 1);
4811	  e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4812	}
4813    }
4814
4815  g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4816  tree lhs = make_ssa_name (integer_type_node);
4817  gimple_call_set_lhs (g, lhs);
4818  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4819  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4820
4821  gcond *cond = as_a <gcond *> (stmt);
4822  gimple_cond_set_lhs (cond, lhs);
4823  gimple_cond_set_rhs (cond, integer_zero_node);
4824  update_stmt (stmt);
4825
4826  g = last_stmt (bb1);
4827  cond = as_a <gcond *> (g);
4828  gimple_cond_set_lhs (cond, lhs);
4829  if (em1->src == bb1 && e2 != em1)
4830    {
4831      gimple_cond_set_rhs (cond, integer_minus_one_node);
4832      gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4833				  ? EQ_EXPR : NE_EXPR);
4834    }
4835  else
4836    {
4837      gcc_assert (e1->src == bb1 && e2 != e1);
4838      gimple_cond_set_rhs (cond, integer_one_node);
4839      gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4840				  ? EQ_EXPR : NE_EXPR);
4841    }
4842  update_stmt (g);
4843
4844  if (e2 != e1 && e2 != em1)
4845    {
4846      g = last_stmt (bb2);
4847      cond = as_a <gcond *> (g);
4848      gimple_cond_set_lhs (cond, lhs);
4849      if (em1->src == bb2)
4850	gimple_cond_set_rhs (cond, integer_minus_one_node);
4851      else
4852	{
4853	  gcc_assert (e1->src == bb2);
4854	  gimple_cond_set_rhs (cond, integer_one_node);
4855	}
4856      gimple_cond_set_code (cond,
4857			    (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4858      update_stmt (g);
4859    }
4860
4861  wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4862  wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4863  set_range_info (lhs, VR_RANGE, wm1, w2);
4864}
4865
4866
4867/* Find integer multiplications where the operands are extended from
4868   smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4869   or MULT_HIGHPART_EXPR where appropriate.  */
4870
4871namespace {
4872
4873const pass_data pass_data_optimize_widening_mul =
4874{
4875  GIMPLE_PASS, /* type */
4876  "widening_mul", /* name */
4877  OPTGROUP_NONE, /* optinfo_flags */
4878  TV_TREE_WIDEN_MUL, /* tv_id */
4879  PROP_ssa, /* properties_required */
4880  0, /* properties_provided */
4881  0, /* properties_destroyed */
4882  0, /* todo_flags_start */
4883  TODO_update_ssa, /* todo_flags_finish */
4884};
4885
4886class pass_optimize_widening_mul : public gimple_opt_pass
4887{
4888public:
4889  pass_optimize_widening_mul (gcc::context *ctxt)
4890    : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4891  {}
4892
4893  /* opt_pass methods: */
4894  virtual bool gate (function *)
4895    {
4896      return flag_expensive_optimizations && optimize;
4897    }
4898
4899  virtual unsigned int execute (function *);
4900
4901}; // class pass_optimize_widening_mul
4902
4903/* Walker class to perform the transformation in reverse dominance order. */
4904
4905class math_opts_dom_walker : public dom_walker
4906{
4907public:
4908  /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4909     if walking modidifes the CFG.  */
4910
4911  math_opts_dom_walker (bool *cfg_changed_p)
4912    : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4913      m_cfg_changed_p (cfg_changed_p) {}
4914
4915  /* The actual actions performed in the walk.  */
4916
4917  virtual void after_dom_children (basic_block);
4918
4919  /* Set of results of chains of multiply and add statement combinations that
4920     were not transformed into FMAs because of active deferring.  */
4921  hash_set<tree> m_last_result_set;
4922
4923  /* Pointer to a flag of the user that needs to be set if CFG has been
4924     modified.  */
4925  bool *m_cfg_changed_p;
4926};
4927
4928void
4929math_opts_dom_walker::after_dom_children (basic_block bb)
4930{
4931  gimple_stmt_iterator gsi;
4932
4933  fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4934
4935  for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4936    {
4937      gimple *stmt = gsi_stmt (gsi);
4938      enum tree_code code;
4939
4940      if (is_gimple_assign (stmt))
4941	{
4942	  code = gimple_assign_rhs_code (stmt);
4943	  switch (code)
4944	    {
4945	    case MULT_EXPR:
4946	      if (!convert_mult_to_widen (stmt, &gsi)
4947		  && !convert_expand_mult_copysign (stmt, &gsi)
4948		  && convert_mult_to_fma (stmt,
4949					  gimple_assign_rhs1 (stmt),
4950					  gimple_assign_rhs2 (stmt),
4951					  &fma_state))
4952		{
4953		  gsi_remove (&gsi, true);
4954		  release_defs (stmt);
4955		  continue;
4956		}
4957	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4958	      break;
4959
4960	    case PLUS_EXPR:
4961	    case MINUS_EXPR:
4962	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
4963		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4964	      break;
4965
4966	    case BIT_NOT_EXPR:
4967	      if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4968		continue;
4969	      break;
4970
4971	    case TRUNC_MOD_EXPR:
4972	      convert_to_divmod (as_a<gassign *> (stmt));
4973	      break;
4974
4975	    case RSHIFT_EXPR:
4976	      convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4977	      break;
4978
4979	    default:;
4980	    }
4981	}
4982      else if (is_gimple_call (stmt))
4983	{
4984	  switch (gimple_call_combined_fn (stmt))
4985	    {
4986	    CASE_CFN_POW:
4987	      if (gimple_call_lhs (stmt)
4988		  && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4989		  && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4990				 &dconst2)
4991		  && convert_mult_to_fma (stmt,
4992					  gimple_call_arg (stmt, 0),
4993					  gimple_call_arg (stmt, 0),
4994					  &fma_state))
4995		{
4996		  unlink_stmt_vdef (stmt);
4997		  if (gsi_remove (&gsi, true)
4998		      && gimple_purge_dead_eh_edges (bb))
4999		    *m_cfg_changed_p = true;
5000		  release_defs (stmt);
5001		  continue;
5002		}
5003	      break;
5004
5005	    case CFN_COND_MUL:
5006	      if (convert_mult_to_fma (stmt,
5007				       gimple_call_arg (stmt, 1),
5008				       gimple_call_arg (stmt, 2),
5009				       &fma_state,
5010				       gimple_call_arg (stmt, 0)))
5011
5012		{
5013		  gsi_remove (&gsi, true);
5014		  release_defs (stmt);
5015		  continue;
5016		}
5017	      break;
5018
5019	    case CFN_LAST:
5020	      cancel_fma_deferring (&fma_state);
5021	      break;
5022
5023	    default:
5024	      break;
5025	    }
5026	}
5027      else if (gimple_code (stmt) == GIMPLE_COND)
5028	optimize_spaceship (stmt);
5029      gsi_next (&gsi);
5030    }
5031  if (fma_state.m_deferring_p
5032      && fma_state.m_initial_phi)
5033    {
5034      gcc_checking_assert (fma_state.m_last_result);
5035      if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5036						 &m_last_result_set))
5037	cancel_fma_deferring (&fma_state);
5038      else
5039	m_last_result_set.add (fma_state.m_last_result);
5040    }
5041}
5042
5043
5044unsigned int
5045pass_optimize_widening_mul::execute (function *fun)
5046{
5047  bool cfg_changed = false;
5048
5049  memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5050  calculate_dominance_info (CDI_DOMINATORS);
5051  renumber_gimple_stmt_uids (cfun);
5052
5053  math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5054
5055  statistics_counter_event (fun, "widening multiplications inserted",
5056			    widen_mul_stats.widen_mults_inserted);
5057  statistics_counter_event (fun, "widening maccs inserted",
5058			    widen_mul_stats.maccs_inserted);
5059  statistics_counter_event (fun, "fused multiply-adds inserted",
5060			    widen_mul_stats.fmas_inserted);
5061  statistics_counter_event (fun, "divmod calls inserted",
5062			    widen_mul_stats.divmod_calls_inserted);
5063  statistics_counter_event (fun, "highpart multiplications inserted",
5064			    widen_mul_stats.highpart_mults_inserted);
5065
5066  return cfg_changed ? TODO_cleanup_cfg : 0;
5067}
5068
5069} // anon namespace
5070
5071gimple_opt_pass *
5072make_pass_optimize_widening_mul (gcc::context *ctxt)
5073{
5074  return new pass_optimize_widening_mul (ctxt);
5075}
5076