tree-ssa-sink.c revision 1.10
1/* Code sinking for trees
2   Copyright (C) 2001-2018 Free Software Foundation, Inc.
3   Contributed by Daniel Berlin <dan@dberlin.org>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "tree.h"
26#include "gimple.h"
27#include "cfghooks.h"
28#include "tree-pass.h"
29#include "ssa.h"
30#include "gimple-pretty-print.h"
31#include "fold-const.h"
32#include "stor-layout.h"
33#include "cfganal.h"
34#include "gimple-iterator.h"
35#include "tree-cfg.h"
36#include "cfgloop.h"
37#include "params.h"
38
39/* TODO:
40   1. Sinking store only using scalar promotion (IE without moving the RHS):
41
42   *q = p;
43   p = p + 1;
44   if (something)
45     *q = <not p>;
46   else
47     y = *q;
48
49
50   should become
51   sinktemp = p;
52   p = p + 1;
53   if (something)
54     *q = <not p>;
55   else
56   {
57     *q = sinktemp;
58     y = *q
59   }
60   Store copy propagation will take care of the store elimination above.
61
62
63   2. Sinking using Partial Dead Code Elimination.  */
64
65
66static struct
67{
68  /* The number of statements sunk down the flowgraph by code sinking.  */
69  int sunk;
70
71} sink_stats;
72
73
74/* Given a PHI, and one of its arguments (DEF), find the edge for
75   that argument and return it.  If the argument occurs twice in the PHI node,
76   we return NULL.  */
77
78static basic_block
79find_bb_for_arg (gphi *phi, tree def)
80{
81  size_t i;
82  bool foundone = false;
83  basic_block result = NULL;
84  for (i = 0; i < gimple_phi_num_args (phi); i++)
85    if (PHI_ARG_DEF (phi, i) == def)
86      {
87	if (foundone)
88	  return NULL;
89	foundone = true;
90	result = gimple_phi_arg_edge (phi, i)->src;
91      }
92  return result;
93}
94
95/* When the first immediate use is in a statement, then return true if all
96   immediate uses in IMM are in the same statement.
97   We could also do the case where  the first immediate use is in a phi node,
98   and all the other uses are in phis in the same basic block, but this
99   requires some expensive checking later (you have to make sure no def/vdef
100   in the statement occurs for multiple edges in the various phi nodes it's
101   used in, so that you only have one place you can sink it to.  */
102
103static bool
104all_immediate_uses_same_place (def_operand_p def_p)
105{
106  tree var = DEF_FROM_PTR (def_p);
107  imm_use_iterator imm_iter;
108  use_operand_p use_p;
109
110  gimple *firstuse = NULL;
111  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
112    {
113      if (is_gimple_debug (USE_STMT (use_p)))
114	continue;
115      if (firstuse == NULL)
116	firstuse = USE_STMT (use_p);
117      else
118	if (firstuse != USE_STMT (use_p))
119	  return false;
120    }
121
122  return true;
123}
124
125/* Find the nearest common dominator of all of the immediate uses in IMM.  */
126
127static basic_block
128nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
129{
130  tree var = DEF_FROM_PTR (def_p);
131  auto_bitmap blocks;
132  basic_block commondom;
133  unsigned int j;
134  bitmap_iterator bi;
135  imm_use_iterator imm_iter;
136  use_operand_p use_p;
137
138  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
139    {
140      gimple *usestmt = USE_STMT (use_p);
141      basic_block useblock;
142
143      if (gphi *phi = dyn_cast <gphi *> (usestmt))
144	{
145	  int idx = PHI_ARG_INDEX_FROM_USE (use_p);
146
147	  useblock = gimple_phi_arg_edge (phi, idx)->src;
148	}
149      else if (is_gimple_debug (usestmt))
150	{
151	  *debug_stmts = true;
152	  continue;
153	}
154      else
155	{
156	  useblock = gimple_bb (usestmt);
157	}
158
159      /* Short circuit. Nothing dominates the entry block.  */
160      if (useblock == ENTRY_BLOCK_PTR_FOR_FN (cfun))
161	return NULL;
162
163      bitmap_set_bit (blocks, useblock->index);
164    }
165  commondom = BASIC_BLOCK_FOR_FN (cfun, bitmap_first_set_bit (blocks));
166  EXECUTE_IF_SET_IN_BITMAP (blocks, 0, j, bi)
167    commondom = nearest_common_dominator (CDI_DOMINATORS, commondom,
168					  BASIC_BLOCK_FOR_FN (cfun, j));
169  return commondom;
170}
171
172/* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
173   tree, return the best basic block between them (inclusive) to place
174   statements.
175
176   We want the most control dependent block in the shallowest loop nest.
177
178   If the resulting block is in a shallower loop nest, then use it.  Else
179   only use the resulting block if it has significantly lower execution
180   frequency than EARLY_BB to avoid gratutious statement movement.  We
181   consider statements with VOPS more desirable to move.
182
183   This pass would obviously benefit from PDO as it utilizes block
184   frequencies.  It would also benefit from recomputing frequencies
185   if profile data is not available since frequencies often get out
186   of sync with reality.  */
187
188static basic_block
189select_best_block (basic_block early_bb,
190		   basic_block late_bb,
191		   gimple *stmt)
192{
193  basic_block best_bb = late_bb;
194  basic_block temp_bb = late_bb;
195  int threshold;
196
197  while (temp_bb != early_bb)
198    {
199      /* If we've moved into a lower loop nest, then that becomes
200	 our best block.  */
201      if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
202	best_bb = temp_bb;
203
204      /* Walk up the dominator tree, hopefully we'll find a shallower
205 	 loop nest.  */
206      temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
207    }
208
209  /* If we found a shallower loop nest, then we always consider that
210     a win.  This will always give us the most control dependent block
211     within that loop nest.  */
212  if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
213    return best_bb;
214
215  /* Get the sinking threshold.  If the statement to be moved has memory
216     operands, then increase the threshold by 7% as those are even more
217     profitable to avoid, clamping at 100%.  */
218  threshold = PARAM_VALUE (PARAM_SINK_FREQUENCY_THRESHOLD);
219  if (gimple_vuse (stmt) || gimple_vdef (stmt))
220    {
221      threshold += 7;
222      if (threshold > 100)
223	threshold = 100;
224    }
225
226  /* If BEST_BB is at the same nesting level, then require it to have
227     significantly lower execution frequency to avoid gratutious movement.  */
228  if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
229      /* If result of comparsion is unknown, preffer EARLY_BB.
230	 Thus use !(...>=..) rather than (...<...)  */
231      && !(best_bb->count.apply_scale (100, 1)
232	   >= early_bb->count.apply_scale (threshold, 1)))
233    return best_bb;
234
235  /* No better block found, so return EARLY_BB, which happens to be the
236     statement's original block.  */
237  return early_bb;
238}
239
240/* Given a statement (STMT) and the basic block it is currently in (FROMBB),
241   determine the location to sink the statement to, if any.
242   Returns true if there is such location; in that case, TOGSI points to the
243   statement before that STMT should be moved.  */
244
245static bool
246statement_sink_location (gimple *stmt, basic_block frombb,
247			 gimple_stmt_iterator *togsi, bool *zero_uses_p)
248{
249  gimple *use;
250  use_operand_p one_use = NULL_USE_OPERAND_P;
251  basic_block sinkbb;
252  use_operand_p use_p;
253  def_operand_p def_p;
254  ssa_op_iter iter;
255  imm_use_iterator imm_iter;
256
257  *zero_uses_p = false;
258
259  /* We only can sink assignments and non-looping const/pure calls.  */
260  int cf;
261  if (!is_gimple_assign (stmt)
262      && (!is_gimple_call (stmt)
263	  || !((cf = gimple_call_flags (stmt)) & (ECF_CONST|ECF_PURE))
264	  || (cf & ECF_LOOPING_CONST_OR_PURE)))
265    return false;
266
267  /* We only can sink stmts with a single definition.  */
268  def_p = single_ssa_def_operand (stmt, SSA_OP_ALL_DEFS);
269  if (def_p == NULL_DEF_OPERAND_P)
270    return false;
271
272  /* There are a few classes of things we can't or don't move, some because we
273     don't have code to handle it, some because it's not profitable and some
274     because it's not legal.
275
276     We can't sink things that may be global stores, at least not without
277     calculating a lot more information, because we may cause it to no longer
278     be seen by an external routine that needs it depending on where it gets
279     moved to.
280
281     We can't sink statements that end basic blocks without splitting the
282     incoming edge for the sink location to place it there.
283
284     We can't sink statements that have volatile operands.
285
286     We don't want to sink dead code, so anything with 0 immediate uses is not
287     sunk.
288
289     Don't sink BLKmode assignments if current function has any local explicit
290     register variables, as BLKmode assignments may involve memcpy or memset
291     calls or, on some targets, inline expansion thereof that sometimes need
292     to use specific hard registers.
293
294  */
295  if (stmt_ends_bb_p (stmt)
296      || gimple_has_side_effects (stmt)
297      || (cfun->has_local_explicit_reg_vars
298	  && TYPE_MODE (TREE_TYPE (gimple_get_lhs (stmt))) == BLKmode))
299    return false;
300
301  /* Return if there are no immediate uses of this stmt.  */
302  if (has_zero_uses (DEF_FROM_PTR (def_p)))
303    {
304      *zero_uses_p = true;
305      return false;
306    }
307
308  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (DEF_FROM_PTR (def_p)))
309    return false;
310
311  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
312    {
313      tree use = USE_FROM_PTR (use_p);
314      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use))
315	return false;
316    }
317
318  use = NULL;
319
320  /* If stmt is a store the one and only use needs to be the VOP
321     merging PHI node.  */
322  if (virtual_operand_p (DEF_FROM_PTR (def_p)))
323    {
324      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325	{
326	  gimple *use_stmt = USE_STMT (use_p);
327
328	  /* A killing definition is not a use.  */
329	  if ((gimple_has_lhs (use_stmt)
330	       && operand_equal_p (gimple_get_lhs (stmt),
331				   gimple_get_lhs (use_stmt), 0))
332	      || stmt_kills_ref_p (use_stmt, gimple_get_lhs (stmt)))
333	    {
334	      /* If use_stmt is or might be a nop assignment then USE_STMT
335	         acts as a use as well as definition.  */
336	      if (stmt != use_stmt
337		  && ref_maybe_used_by_stmt_p (use_stmt,
338					       gimple_get_lhs (stmt)))
339		return false;
340	      continue;
341	    }
342
343	  if (gimple_code (use_stmt) != GIMPLE_PHI)
344	    return false;
345
346	  if (use
347	      && use != use_stmt)
348	    return false;
349
350	  use = use_stmt;
351	}
352      if (!use)
353	return false;
354    }
355  /* If all the immediate uses are not in the same place, find the nearest
356     common dominator of all the immediate uses.  For PHI nodes, we have to
357     find the nearest common dominator of all of the predecessor blocks, since
358     that is where insertion would have to take place.  */
359  else if (gimple_vuse (stmt)
360	   || !all_immediate_uses_same_place (def_p))
361    {
362      bool debug_stmts = false;
363      basic_block commondom = nearest_common_dominator_of_uses (def_p,
364								&debug_stmts);
365
366      if (commondom == frombb)
367	return false;
368
369      /* If this is a load then do not sink past any stores.
370	 ???  This is overly simple but cheap.  We basically look
371	 for an existing load with the same VUSE in the path to one
372	 of the sink candidate blocks and we adjust commondom to the
373	 nearest to commondom.  */
374      if (gimple_vuse (stmt))
375	{
376	  /* Do not sink loads from hard registers.  */
377	  if (gimple_assign_single_p (stmt)
378	      && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL
379	      && DECL_HARD_REGISTER (gimple_assign_rhs1 (stmt)))
380	    return false;
381
382	  imm_use_iterator imm_iter;
383	  use_operand_p use_p;
384	  basic_block found = NULL;
385	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt))
386	    {
387	      gimple *use_stmt = USE_STMT (use_p);
388	      basic_block bb = gimple_bb (use_stmt);
389	      /* For PHI nodes the block we know sth about
390		 is the incoming block with the use.  */
391	      if (gimple_code (use_stmt) == GIMPLE_PHI)
392		bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
393	      /* Any dominator of commondom would be ok with
394	         adjusting commondom to that block.  */
395	      bb = nearest_common_dominator (CDI_DOMINATORS, bb, commondom);
396	      if (!found)
397		found = bb;
398	      else if (dominated_by_p (CDI_DOMINATORS, bb, found))
399		found = bb;
400	      /* If we can't improve, stop.  */
401	      if (found == commondom)
402		break;
403	    }
404	  commondom = found;
405	  if (commondom == frombb)
406	    return false;
407	}
408
409      /* Our common dominator has to be dominated by frombb in order to be a
410	 trivially safe place to put this statement, since it has multiple
411	 uses.  */
412      if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
413	return false;
414
415      commondom = select_best_block (frombb, commondom, stmt);
416
417      if (commondom == frombb)
418	return false;
419
420      *togsi = gsi_after_labels (commondom);
421
422      return true;
423    }
424  else
425    {
426      FOR_EACH_IMM_USE_FAST (one_use, imm_iter, DEF_FROM_PTR (def_p))
427	{
428	  if (is_gimple_debug (USE_STMT (one_use)))
429	    continue;
430	  break;
431	}
432      use = USE_STMT (one_use);
433
434      if (gimple_code (use) != GIMPLE_PHI)
435	{
436	  sinkbb = gimple_bb (use);
437	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
438
439	  if (sinkbb == frombb)
440	    return false;
441
442	  if (sinkbb == gimple_bb (use))
443	    *togsi = gsi_for_stmt (use);
444	  else
445	    *togsi = gsi_after_labels (sinkbb);
446
447	  return true;
448	}
449    }
450
451  sinkbb = find_bb_for_arg (as_a <gphi *> (use), DEF_FROM_PTR (def_p));
452
453  /* This can happen if there are multiple uses in a PHI.  */
454  if (!sinkbb)
455    return false;
456
457  sinkbb = select_best_block (frombb, sinkbb, stmt);
458  if (!sinkbb || sinkbb == frombb)
459    return false;
460
461  /* If the latch block is empty, don't make it non-empty by sinking
462     something into it.  */
463  if (sinkbb == frombb->loop_father->latch
464      && empty_block_p (sinkbb))
465    return false;
466
467  *togsi = gsi_after_labels (sinkbb);
468
469  return true;
470}
471
472/* Perform code sinking on BB */
473
474static void
475sink_code_in_bb (basic_block bb)
476{
477  basic_block son;
478  gimple_stmt_iterator gsi;
479  edge_iterator ei;
480  edge e;
481  bool last = true;
482
483  /* If this block doesn't dominate anything, there can't be any place to sink
484     the statements to.  */
485  if (first_dom_son (CDI_DOMINATORS, bb) == NULL)
486    goto earlyout;
487
488  /* We can't move things across abnormal edges, so don't try.  */
489  FOR_EACH_EDGE (e, ei, bb->succs)
490    if (e->flags & EDGE_ABNORMAL)
491      goto earlyout;
492
493  for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
494    {
495      gimple *stmt = gsi_stmt (gsi);
496      gimple_stmt_iterator togsi;
497      bool zero_uses_p;
498
499      if (!statement_sink_location (stmt, bb, &togsi, &zero_uses_p))
500	{
501	  gimple_stmt_iterator saved = gsi;
502	  if (!gsi_end_p (gsi))
503	    gsi_prev (&gsi);
504	  /* If we face a dead stmt remove it as it possibly blocks
505	     sinking of uses.  */
506	  if (zero_uses_p
507	      && ! gimple_vdef (stmt))
508	    {
509	      gsi_remove (&saved, true);
510	      release_defs (stmt);
511	    }
512	  else
513	    last = false;
514	  continue;
515	}
516      if (dump_file)
517	{
518	  fprintf (dump_file, "Sinking ");
519	  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
520	  fprintf (dump_file, " from bb %d to bb %d\n",
521		   bb->index, (gsi_bb (togsi))->index);
522	}
523
524      /* Update virtual operands of statements in the path we
525         do not sink to.  */
526      if (gimple_vdef (stmt))
527	{
528	  imm_use_iterator iter;
529	  use_operand_p use_p;
530	  gimple *vuse_stmt;
531
532	  FOR_EACH_IMM_USE_STMT (vuse_stmt, iter, gimple_vdef (stmt))
533	    if (gimple_code (vuse_stmt) != GIMPLE_PHI)
534	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
535		SET_USE (use_p, gimple_vuse (stmt));
536	}
537
538      /* If this is the end of the basic block, we need to insert at the end
539         of the basic block.  */
540      if (gsi_end_p (togsi))
541	gsi_move_to_bb_end (&gsi, gsi_bb (togsi));
542      else
543	gsi_move_before (&gsi, &togsi);
544
545      sink_stats.sunk++;
546
547      /* If we've just removed the last statement of the BB, the
548	 gsi_end_p() test below would fail, but gsi_prev() would have
549	 succeeded, and we want it to succeed.  So we keep track of
550	 whether we're at the last statement and pick up the new last
551	 statement.  */
552      if (last)
553	{
554	  gsi = gsi_last_bb (bb);
555	  continue;
556	}
557
558      last = false;
559      if (!gsi_end_p (gsi))
560	gsi_prev (&gsi);
561
562    }
563 earlyout:
564  for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
565       son;
566       son = next_dom_son (CDI_POST_DOMINATORS, son))
567    {
568      sink_code_in_bb (son);
569    }
570}
571
572/* Perform code sinking.
573   This moves code down the flowgraph when we know it would be
574   profitable to do so, or it wouldn't increase the number of
575   executions of the statement.
576
577   IE given
578
579   a_1 = b + c;
580   if (<something>)
581   {
582   }
583   else
584   {
585     foo (&b, &c);
586     a_5 = b + c;
587   }
588   a_6 = PHI (a_5, a_1);
589   USE a_6.
590
591   we'll transform this into:
592
593   if (<something>)
594   {
595      a_1 = b + c;
596   }
597   else
598   {
599      foo (&b, &c);
600      a_5 = b + c;
601   }
602   a_6 = PHI (a_5, a_1);
603   USE a_6.
604
605   Note that this reduces the number of computations of a = b + c to 1
606   when we take the else edge, instead of 2.
607*/
608namespace {
609
610const pass_data pass_data_sink_code =
611{
612  GIMPLE_PASS, /* type */
613  "sink", /* name */
614  OPTGROUP_NONE, /* optinfo_flags */
615  TV_TREE_SINK, /* tv_id */
616  /* PROP_no_crit_edges is ensured by running split_critical_edges in
617     pass_data_sink_code::execute ().  */
618  ( PROP_cfg | PROP_ssa ), /* properties_required */
619  0, /* properties_provided */
620  0, /* properties_destroyed */
621  0, /* todo_flags_start */
622  TODO_update_ssa, /* todo_flags_finish */
623};
624
625class pass_sink_code : public gimple_opt_pass
626{
627public:
628  pass_sink_code (gcc::context *ctxt)
629    : gimple_opt_pass (pass_data_sink_code, ctxt)
630  {}
631
632  /* opt_pass methods: */
633  virtual bool gate (function *) { return flag_tree_sink != 0; }
634  virtual unsigned int execute (function *);
635
636}; // class pass_sink_code
637
638unsigned int
639pass_sink_code::execute (function *fun)
640{
641  loop_optimizer_init (LOOPS_NORMAL);
642  split_critical_edges ();
643  connect_infinite_loops_to_exit ();
644  memset (&sink_stats, 0, sizeof (sink_stats));
645  calculate_dominance_info (CDI_DOMINATORS);
646  calculate_dominance_info (CDI_POST_DOMINATORS);
647  sink_code_in_bb (EXIT_BLOCK_PTR_FOR_FN (fun));
648  statistics_counter_event (fun, "Sunk statements", sink_stats.sunk);
649  free_dominance_info (CDI_POST_DOMINATORS);
650  remove_fake_exit_edges ();
651  loop_optimizer_finalize ();
652
653  return 0;
654}
655
656} // anon namespace
657
658gimple_opt_pass *
659make_pass_sink_code (gcc::context *ctxt)
660{
661  return new pass_sink_code (ctxt);
662}
663