postreload-gcse.c revision 1.3
1/* Post reload partially redundant load elimination
2   Copyright (C) 2004-2013 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "diagnostic-core.h"
25
26#include "rtl.h"
27#include "tree.h"
28#include "tm_p.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "flags.h"
32#include "insn-config.h"
33#include "recog.h"
34#include "basic-block.h"
35#include "function.h"
36#include "expr.h"
37#include "except.h"
38#include "intl.h"
39#include "obstack.h"
40#include "hashtab.h"
41#include "params.h"
42#include "target.h"
43#include "tree-pass.h"
44#include "dbgcnt.h"
45
46/* The following code implements gcse after reload, the purpose of this
47   pass is to cleanup redundant loads generated by reload and other
48   optimizations that come after gcse. It searches for simple inter-block
49   redundancies and tries to eliminate them by adding moves and loads
50   in cold places.
51
52   Perform partially redundant load elimination, try to eliminate redundant
53   loads created by the reload pass.  We try to look for full or partial
54   redundant loads fed by one or more loads/stores in predecessor BBs,
55   and try adding loads to make them fully redundant.  We also check if
56   it's worth adding loads to be able to delete the redundant load.
57
58   Algorithm:
59   1. Build available expressions hash table:
60       For each load/store instruction, if the loaded/stored memory didn't
61       change until the end of the basic block add this memory expression to
62       the hash table.
63   2. Perform Redundancy elimination:
64      For each load instruction do the following:
65	 perform partial redundancy elimination, check if it's worth adding
66	 loads to make the load fully redundant.  If so add loads and
67	 register copies and delete the load.
68   3. Delete instructions made redundant in step 2.
69
70   Future enhancement:
71     If the loaded register is used/defined between load and some store,
72     look for some other free register between load and all its stores,
73     and replace the load with a copy from this register to the loaded
74     register.
75*/
76
77
78/* Keep statistics of this pass.  */
79static struct
80{
81  int moves_inserted;
82  int copies_inserted;
83  int insns_deleted;
84} stats;
85
86/* We need to keep a hash table of expressions.  The table entries are of
87   type 'struct expr', and for each expression there is a single linked
88   list of occurrences.  */
89
90/* The table itself.  */
91static htab_t expr_table;
92
93/* Expression elements in the hash table.  */
94struct expr
95{
96  /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
97  rtx expr;
98
99  /* The same hash for this entry.  */
100  hashval_t hash;
101
102  /* List of available occurrence in basic blocks in the function.  */
103  struct occr *avail_occr;
104};
105
106static struct obstack expr_obstack;
107
108/* Occurrence of an expression.
109   There is at most one occurrence per basic block.  If a pattern appears
110   more than once, the last appearance is used.  */
111
112struct occr
113{
114  /* Next occurrence of this expression.  */
115  struct occr *next;
116  /* The insn that computes the expression.  */
117  rtx insn;
118  /* Nonzero if this [anticipatable] occurrence has been deleted.  */
119  char deleted_p;
120};
121
122static struct obstack occr_obstack;
123
124/* The following structure holds the information about the occurrences of
125   the redundant instructions.  */
126struct unoccr
127{
128  struct unoccr *next;
129  edge pred;
130  rtx insn;
131};
132
133static struct obstack unoccr_obstack;
134
135/* Array where each element is the CUID if the insn that last set the hard
136   register with the number of the element, since the start of the current
137   basic block.
138
139   This array is used during the building of the hash table (step 1) to
140   determine if a reg is killed before the end of a basic block.
141
142   It is also used when eliminating partial redundancies (step 2) to see
143   if a reg was modified since the start of a basic block.  */
144static int *reg_avail_info;
145
146/* A list of insns that may modify memory within the current basic block.  */
147struct modifies_mem
148{
149  rtx insn;
150  struct modifies_mem *next;
151};
152static struct modifies_mem *modifies_mem_list;
153
154/* The modifies_mem structs also go on an obstack, only this obstack is
155   freed each time after completing the analysis or transformations on
156   a basic block.  So we allocate a dummy modifies_mem_obstack_bottom
157   object on the obstack to keep track of the bottom of the obstack.  */
158static struct obstack modifies_mem_obstack;
159static struct modifies_mem  *modifies_mem_obstack_bottom;
160
161/* Mapping of insn UIDs to CUIDs.
162   CUIDs are like UIDs except they increase monotonically in each basic
163   block, have no gaps, and only apply to real insns.  */
164static int *uid_cuid;
165#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
166
167
168/* Helpers for memory allocation/freeing.  */
169static void alloc_mem (void);
170static void free_mem (void);
171
172/* Support for hash table construction and transformations.  */
173static bool oprs_unchanged_p (rtx, rtx, bool);
174static void record_last_reg_set_info (rtx, rtx);
175static void record_last_reg_set_info_regno (rtx, int);
176static void record_last_mem_set_info (rtx);
177static void record_last_set_info (rtx, const_rtx, void *);
178static void record_opr_changes (rtx);
179
180static void find_mem_conflicts (rtx, const_rtx, void *);
181static int load_killed_in_block_p (int, rtx, bool);
182static void reset_opr_set_tables (void);
183
184/* Hash table support.  */
185static hashval_t hash_expr (rtx, int *);
186static hashval_t hash_expr_for_htab (const void *);
187static int expr_equiv_p (const void *, const void *);
188static void insert_expr_in_table (rtx, rtx);
189static struct expr *lookup_expr_in_table (rtx);
190static int dump_hash_table_entry (void **, void *);
191static void dump_hash_table (FILE *);
192
193/* Helpers for eliminate_partially_redundant_load.  */
194static bool reg_killed_on_edge (rtx, edge);
195static bool reg_used_on_edge (rtx, edge);
196
197static rtx get_avail_load_store_reg (rtx);
198
199static bool bb_has_well_behaved_predecessors (basic_block);
200static struct occr* get_bb_avail_insn (basic_block, struct occr *);
201static void hash_scan_set (rtx);
202static void compute_hash_table (void);
203
204/* The work horses of this pass.  */
205static void eliminate_partially_redundant_load (basic_block,
206						rtx,
207						struct expr *);
208static void eliminate_partially_redundant_loads (void);
209
210
211/* Allocate memory for the CUID mapping array and register/memory
212   tracking tables.  */
213
214static void
215alloc_mem (void)
216{
217  int i;
218  basic_block bb;
219  rtx insn;
220
221  /* Find the largest UID and create a mapping from UIDs to CUIDs.  */
222  uid_cuid = XCNEWVEC (int, get_max_uid () + 1);
223  i = 1;
224  FOR_EACH_BB (bb)
225    FOR_BB_INSNS (bb, insn)
226      {
227        if (INSN_P (insn))
228	  uid_cuid[INSN_UID (insn)] = i++;
229	else
230	  uid_cuid[INSN_UID (insn)] = i;
231      }
232
233  /* Allocate the available expressions hash table.  We don't want to
234     make the hash table too small, but unnecessarily making it too large
235     also doesn't help.  The i/4 is a gcse.c relic, and seems like a
236     reasonable choice.  */
237  expr_table = htab_create (MAX (i / 4, 13),
238			    hash_expr_for_htab, expr_equiv_p, NULL);
239
240  /* We allocate everything on obstacks because we often can roll back
241     the whole obstack to some point.  Freeing obstacks is very fast.  */
242  gcc_obstack_init (&expr_obstack);
243  gcc_obstack_init (&occr_obstack);
244  gcc_obstack_init (&unoccr_obstack);
245  gcc_obstack_init (&modifies_mem_obstack);
246
247  /* Working array used to track the last set for each register
248     in the current block.  */
249  reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int));
250
251  /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
252     can roll it back in reset_opr_set_tables.  */
253  modifies_mem_obstack_bottom =
254    (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
255					   sizeof (struct modifies_mem));
256}
257
258/* Free memory allocated by alloc_mem.  */
259
260static void
261free_mem (void)
262{
263  free (uid_cuid);
264
265  htab_delete (expr_table);
266
267  obstack_free (&expr_obstack, NULL);
268  obstack_free (&occr_obstack, NULL);
269  obstack_free (&unoccr_obstack, NULL);
270  obstack_free (&modifies_mem_obstack, NULL);
271
272  free (reg_avail_info);
273}
274
275
276/* Hash expression X.
277   DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
278   or if the expression contains something we don't want to insert in the
279   table.  */
280
281static hashval_t
282hash_expr (rtx x, int *do_not_record_p)
283{
284  *do_not_record_p = 0;
285  return hash_rtx (x, GET_MODE (x), do_not_record_p,
286		   NULL,  /*have_reg_qty=*/false);
287}
288
289/* Callback for hashtab.
290   Return the hash value for expression EXP.  We don't actually hash
291   here, we just return the cached hash value.  */
292
293static hashval_t
294hash_expr_for_htab (const void *expp)
295{
296  const struct expr *const exp = (const struct expr *) expp;
297  return exp->hash;
298}
299
300/* Callback for hashtab.
301   Return nonzero if exp1 is equivalent to exp2.  */
302
303static int
304expr_equiv_p (const void *exp1p, const void *exp2p)
305{
306  const struct expr *const exp1 = (const struct expr *) exp1p;
307  const struct expr *const exp2 = (const struct expr *) exp2p;
308  int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
309
310  gcc_assert (!equiv_p || exp1->hash == exp2->hash);
311  return equiv_p;
312}
313
314
315/* Insert expression X in INSN in the hash TABLE.
316   If it is already present, record it as the last occurrence in INSN's
317   basic block.  */
318
319static void
320insert_expr_in_table (rtx x, rtx insn)
321{
322  int do_not_record_p;
323  hashval_t hash;
324  struct expr *cur_expr, **slot;
325  struct occr *avail_occr, *last_occr = NULL;
326
327  hash = hash_expr (x, &do_not_record_p);
328
329  /* Do not insert expression in the table if it contains volatile operands,
330     or if hash_expr determines the expression is something we don't want
331     to or can't handle.  */
332  if (do_not_record_p)
333    return;
334
335  /* We anticipate that redundant expressions are rare, so for convenience
336     allocate a new hash table element here already and set its fields.
337     If we don't do this, we need a hack with a static struct expr.  Anyway,
338     obstack_free is really fast and one more obstack_alloc doesn't hurt if
339     we're going to see more expressions later on.  */
340  cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
341					    sizeof (struct expr));
342  cur_expr->expr = x;
343  cur_expr->hash = hash;
344  cur_expr->avail_occr = NULL;
345
346  slot = (struct expr **) htab_find_slot_with_hash (expr_table, cur_expr,
347						    hash, INSERT);
348
349  if (! (*slot))
350    /* The expression isn't found, so insert it.  */
351    *slot = cur_expr;
352  else
353    {
354      /* The expression is already in the table, so roll back the
355	 obstack and use the existing table entry.  */
356      obstack_free (&expr_obstack, cur_expr);
357      cur_expr = *slot;
358    }
359
360  /* Search for another occurrence in the same basic block.  */
361  avail_occr = cur_expr->avail_occr;
362  while (avail_occr
363	 && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
364    {
365      /* If an occurrence isn't found, save a pointer to the end of
366	 the list.  */
367      last_occr = avail_occr;
368      avail_occr = avail_occr->next;
369    }
370
371  if (avail_occr)
372    /* Found another instance of the expression in the same basic block.
373       Prefer this occurrence to the currently recorded one.  We want
374       the last one in the block and the block is scanned from start
375       to end.  */
376    avail_occr->insn = insn;
377  else
378    {
379      /* First occurrence of this expression in this basic block.  */
380      avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
381						  sizeof (struct occr));
382
383      /* First occurrence of this expression in any block?  */
384      if (cur_expr->avail_occr == NULL)
385        cur_expr->avail_occr = avail_occr;
386      else
387        last_occr->next = avail_occr;
388
389      avail_occr->insn = insn;
390      avail_occr->next = NULL;
391      avail_occr->deleted_p = 0;
392    }
393}
394
395
396/* Lookup pattern PAT in the expression hash table.
397   The result is a pointer to the table entry, or NULL if not found.  */
398
399static struct expr *
400lookup_expr_in_table (rtx pat)
401{
402  int do_not_record_p;
403  struct expr **slot, *tmp_expr;
404  hashval_t hash = hash_expr (pat, &do_not_record_p);
405
406  if (do_not_record_p)
407    return NULL;
408
409  tmp_expr = (struct expr *) obstack_alloc (&expr_obstack,
410					    sizeof (struct expr));
411  tmp_expr->expr = pat;
412  tmp_expr->hash = hash;
413  tmp_expr->avail_occr = NULL;
414
415  slot = (struct expr **) htab_find_slot_with_hash (expr_table, tmp_expr,
416                                                    hash, INSERT);
417  obstack_free (&expr_obstack, tmp_expr);
418
419  if (!slot)
420    return NULL;
421  else
422    return (*slot);
423}
424
425
426/* Dump all expressions and occurrences that are currently in the
427   expression hash table to FILE.  */
428
429/* This helper is called via htab_traverse.  */
430static int
431dump_hash_table_entry (void **slot, void *filep)
432{
433  struct expr *expr = (struct expr *) *slot;
434  FILE *file = (FILE *) filep;
435  struct occr *occr;
436
437  fprintf (file, "expr: ");
438  print_rtl (file, expr->expr);
439  fprintf (file,"\nhashcode: %u\n", expr->hash);
440  fprintf (file,"list of occurrences:\n");
441  occr = expr->avail_occr;
442  while (occr)
443    {
444      rtx insn = occr->insn;
445      print_rtl_single (file, insn);
446      fprintf (file, "\n");
447      occr = occr->next;
448    }
449  fprintf (file, "\n");
450  return 1;
451}
452
453static void
454dump_hash_table (FILE *file)
455{
456  fprintf (file, "\n\nexpression hash table\n");
457  fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
458           (long) htab_size (expr_table),
459           (long) htab_elements (expr_table),
460           htab_collisions (expr_table));
461  if (htab_elements (expr_table) > 0)
462    {
463      fprintf (file, "\n\ntable entries:\n");
464      htab_traverse (expr_table, dump_hash_table_entry, file);
465    }
466  fprintf (file, "\n");
467}
468
469/* Return true if register X is recorded as being set by an instruction
470   whose CUID is greater than the one given.  */
471
472static bool
473reg_changed_after_insn_p (rtx x, int cuid)
474{
475  unsigned int regno, end_regno;
476
477  regno = REGNO (x);
478  end_regno = END_HARD_REGNO (x);
479  do
480    if (reg_avail_info[regno] > cuid)
481      return true;
482  while (++regno < end_regno);
483  return false;
484}
485
486/* Return nonzero if the operands of expression X are unchanged
487   1) from the start of INSN's basic block up to but not including INSN
488      if AFTER_INSN is false, or
489   2) from INSN to the end of INSN's basic block if AFTER_INSN is true.  */
490
491static bool
492oprs_unchanged_p (rtx x, rtx insn, bool after_insn)
493{
494  int i, j;
495  enum rtx_code code;
496  const char *fmt;
497
498  if (x == 0)
499    return 1;
500
501  code = GET_CODE (x);
502  switch (code)
503    {
504    case REG:
505      /* We are called after register allocation.  */
506      gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER);
507      if (after_insn)
508	return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1);
509      else
510	return !reg_changed_after_insn_p (x, 0);
511
512    case MEM:
513      if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn))
514	return 0;
515      else
516	return oprs_unchanged_p (XEXP (x, 0), insn, after_insn);
517
518    case PC:
519    case CC0: /*FIXME*/
520    case CONST:
521    CASE_CONST_ANY:
522    case SYMBOL_REF:
523    case LABEL_REF:
524    case ADDR_VEC:
525    case ADDR_DIFF_VEC:
526      return 1;
527
528    case PRE_DEC:
529    case PRE_INC:
530    case POST_DEC:
531    case POST_INC:
532    case PRE_MODIFY:
533    case POST_MODIFY:
534      if (after_insn)
535	return 0;
536      break;
537
538    default:
539      break;
540    }
541
542  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
543    {
544      if (fmt[i] == 'e')
545	{
546	  if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn))
547	    return 0;
548	}
549      else if (fmt[i] == 'E')
550	for (j = 0; j < XVECLEN (x, i); j++)
551	  if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn))
552	    return 0;
553    }
554
555  return 1;
556}
557
558
559/* Used for communication between find_mem_conflicts and
560   load_killed_in_block_p.  Nonzero if find_mem_conflicts finds a
561   conflict between two memory references.
562   This is a bit of a hack to work around the limitations of note_stores.  */
563static int mems_conflict_p;
564
565/* DEST is the output of an instruction.  If it is a memory reference, and
566   possibly conflicts with the load found in DATA, then set mems_conflict_p
567   to a nonzero value.  */
568
569static void
570find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
571		    void *data)
572{
573  rtx mem_op = (rtx) data;
574
575  while (GET_CODE (dest) == SUBREG
576	 || GET_CODE (dest) == ZERO_EXTRACT
577	 || GET_CODE (dest) == STRICT_LOW_PART)
578    dest = XEXP (dest, 0);
579
580  /* If DEST is not a MEM, then it will not conflict with the load.  Note
581     that function calls are assumed to clobber memory, but are handled
582     elsewhere.  */
583  if (! MEM_P (dest))
584    return;
585
586  if (true_dependence (dest, GET_MODE (dest), mem_op))
587    mems_conflict_p = 1;
588}
589
590
591/* Return nonzero if the expression in X (a memory reference) is killed
592   in the current basic block before (if AFTER_INSN is false) or after
593   (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
594
595   This function assumes that the modifies_mem table is flushed when
596   the hash table construction or redundancy elimination phases start
597   processing a new basic block.  */
598
599static int
600load_killed_in_block_p (int uid_limit, rtx x, bool after_insn)
601{
602  struct modifies_mem *list_entry = modifies_mem_list;
603
604  while (list_entry)
605    {
606      rtx setter = list_entry->insn;
607
608      /* Ignore entries in the list that do not apply.  */
609      if ((after_insn
610	   && INSN_CUID (setter) < uid_limit)
611	  || (! after_insn
612	      && INSN_CUID (setter) > uid_limit))
613	{
614	  list_entry = list_entry->next;
615	  continue;
616	}
617
618      /* If SETTER is a call everything is clobbered.  Note that calls
619	 to pure functions are never put on the list, so we need not
620	 worry about them.  */
621      if (CALL_P (setter))
622	return 1;
623
624      /* SETTER must be an insn of some kind that sets memory.  Call
625	 note_stores to examine each hunk of memory that is modified.
626	 It will set mems_conflict_p to nonzero if there may be a
627	 conflict between X and SETTER.  */
628      mems_conflict_p = 0;
629      note_stores (PATTERN (setter), find_mem_conflicts, x);
630      if (mems_conflict_p)
631	return 1;
632
633      list_entry = list_entry->next;
634    }
635  return 0;
636}
637
638
639/* Record register first/last/block set information for REGNO in INSN.  */
640
641static inline void
642record_last_reg_set_info (rtx insn, rtx reg)
643{
644  unsigned int regno, end_regno;
645
646  regno = REGNO (reg);
647  end_regno = END_HARD_REGNO (reg);
648  do
649    reg_avail_info[regno] = INSN_CUID (insn);
650  while (++regno < end_regno);
651}
652
653static inline void
654record_last_reg_set_info_regno (rtx insn, int regno)
655{
656  reg_avail_info[regno] = INSN_CUID (insn);
657}
658
659
660/* Record memory modification information for INSN.  We do not actually care
661   about the memory location(s) that are set, or even how they are set (consider
662   a CALL_INSN).  We merely need to record which insns modify memory.  */
663
664static void
665record_last_mem_set_info (rtx insn)
666{
667  struct modifies_mem *list_entry;
668
669  list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
670						      sizeof (struct modifies_mem));
671  list_entry->insn = insn;
672  list_entry->next = modifies_mem_list;
673  modifies_mem_list = list_entry;
674}
675
676/* Called from compute_hash_table via note_stores to handle one
677   SET or CLOBBER in an insn.  DATA is really the instruction in which
678   the SET is taking place.  */
679
680static void
681record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
682{
683  rtx last_set_insn = (rtx) data;
684
685  if (GET_CODE (dest) == SUBREG)
686    dest = SUBREG_REG (dest);
687
688  if (REG_P (dest))
689    record_last_reg_set_info (last_set_insn, dest);
690  else if (MEM_P (dest))
691    {
692      /* Ignore pushes, they don't clobber memory.  They may still
693	 clobber the stack pointer though.  Some targets do argument
694	 pushes without adding REG_INC notes.  See e.g. PR25196,
695	 where a pushsi2 on i386 doesn't have REG_INC notes.  Note
696	 such changes here too.  */
697      if (! push_operand (dest, GET_MODE (dest)))
698	record_last_mem_set_info (last_set_insn);
699      else
700	record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM);
701    }
702}
703
704
705/* Reset tables used to keep track of what's still available since the
706   start of the block.  */
707
708static void
709reset_opr_set_tables (void)
710{
711  memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int));
712  obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom);
713  modifies_mem_list = NULL;
714}
715
716
717/* Record things set by INSN.
718   This data is used by oprs_unchanged_p.  */
719
720static void
721record_opr_changes (rtx insn)
722{
723  rtx note;
724
725  /* Find all stores and record them.  */
726  note_stores (PATTERN (insn), record_last_set_info, insn);
727
728  /* Also record autoincremented REGs for this insn as changed.  */
729  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
730    if (REG_NOTE_KIND (note) == REG_INC)
731      record_last_reg_set_info (insn, XEXP (note, 0));
732
733  /* Finally, if this is a call, record all call clobbers.  */
734  if (CALL_P (insn))
735    {
736      unsigned int regno;
737      rtx link, x;
738      hard_reg_set_iterator hrsi;
739      EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi)
740	record_last_reg_set_info_regno (insn, regno);
741
742      for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
743	if (GET_CODE (XEXP (link, 0)) == CLOBBER)
744	  {
745	    x = XEXP (XEXP (link, 0), 0);
746	    if (REG_P (x))
747	      {
748		gcc_assert (HARD_REGISTER_P (x));
749		record_last_reg_set_info (insn, x);
750	      }
751	  }
752
753      if (! RTL_CONST_OR_PURE_CALL_P (insn))
754	record_last_mem_set_info (insn);
755    }
756}
757
758
759/* Scan the pattern of INSN and add an entry to the hash TABLE.
760   After reload we are interested in loads/stores only.  */
761
762static void
763hash_scan_set (rtx insn)
764{
765  rtx pat = PATTERN (insn);
766  rtx src = SET_SRC (pat);
767  rtx dest = SET_DEST (pat);
768
769  /* We are only interested in loads and stores.  */
770  if (! MEM_P (src) && ! MEM_P (dest))
771    return;
772
773  /* Don't mess with jumps and nops.  */
774  if (JUMP_P (insn) || set_noop_p (pat))
775    return;
776
777  if (REG_P (dest))
778    {
779      if (/* Don't CSE something if we can't do a reg/reg copy.  */
780	  can_copy_p (GET_MODE (dest))
781	  /* Is SET_SRC something we want to gcse?  */
782	  && general_operand (src, GET_MODE (src))
783#ifdef STACK_REGS
784	  /* Never consider insns touching the register stack.  It may
785	     create situations that reg-stack cannot handle (e.g. a stack
786	     register live across an abnormal edge).  */
787	  && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG)
788#endif
789	  /* An expression is not available if its operands are
790	     subsequently modified, including this insn.  */
791	  && oprs_unchanged_p (src, insn, true))
792	{
793	  insert_expr_in_table (src, insn);
794	}
795    }
796  else if (REG_P (src))
797    {
798      /* Only record sets of pseudo-regs in the hash table.  */
799      if (/* Don't CSE something if we can't do a reg/reg copy.  */
800	  can_copy_p (GET_MODE (src))
801	  /* Is SET_DEST something we want to gcse?  */
802	  && general_operand (dest, GET_MODE (dest))
803#ifdef STACK_REGS
804	  /* As above for STACK_REGS.  */
805	  && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG)
806#endif
807	  && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
808	  /* Check if the memory expression is killed after insn.  */
809	  && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true)
810	  && oprs_unchanged_p (XEXP (dest, 0), insn, true))
811	{
812	  insert_expr_in_table (dest, insn);
813	}
814    }
815}
816
817
818/* Create hash table of memory expressions available at end of basic
819   blocks.  Basically you should think of this hash table as the
820   representation of AVAIL_OUT.  This is the set of expressions that
821   is generated in a basic block and not killed before the end of the
822   same basic block.  Notice that this is really a local computation.  */
823
824static void
825compute_hash_table (void)
826{
827  basic_block bb;
828
829  FOR_EACH_BB (bb)
830    {
831      rtx insn;
832
833      /* First pass over the instructions records information used to
834	 determine when registers and memory are last set.
835	 Since we compute a "local" AVAIL_OUT, reset the tables that
836	 help us keep track of what has been modified since the start
837	 of the block.  */
838      reset_opr_set_tables ();
839      FOR_BB_INSNS (bb, insn)
840	{
841	  if (INSN_P (insn))
842            record_opr_changes (insn);
843	}
844
845      /* The next pass actually builds the hash table.  */
846      FOR_BB_INSNS (bb, insn)
847	if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
848	  hash_scan_set (insn);
849    }
850}
851
852
853/* Check if register REG is killed in any insn waiting to be inserted on
854   edge E.  This function is required to check that our data flow analysis
855   is still valid prior to commit_edge_insertions.  */
856
857static bool
858reg_killed_on_edge (rtx reg, edge e)
859{
860  rtx insn;
861
862  for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
863    if (INSN_P (insn) && reg_set_p (reg, insn))
864      return true;
865
866  return false;
867}
868
869/* Similar to above - check if register REG is used in any insn waiting
870   to be inserted on edge E.
871   Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
872   with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p.  */
873
874static bool
875reg_used_on_edge (rtx reg, edge e)
876{
877  rtx insn;
878
879  for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
880    if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
881      return true;
882
883  return false;
884}
885
886/* Return the loaded/stored register of a load/store instruction.  */
887
888static rtx
889get_avail_load_store_reg (rtx insn)
890{
891  if (REG_P (SET_DEST (PATTERN (insn))))
892    /* A load.  */
893    return SET_DEST(PATTERN(insn));
894  else
895    {
896      /* A store.  */
897      gcc_assert (REG_P (SET_SRC (PATTERN (insn))));
898      return SET_SRC (PATTERN (insn));
899    }
900}
901
902/* Return nonzero if the predecessors of BB are "well behaved".  */
903
904static bool
905bb_has_well_behaved_predecessors (basic_block bb)
906{
907  edge pred;
908  edge_iterator ei;
909
910  if (EDGE_COUNT (bb->preds) == 0)
911    return false;
912
913  FOR_EACH_EDGE (pred, ei, bb->preds)
914    {
915      if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
916	return false;
917
918      if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label)
919	return false;
920
921      if (JUMP_TABLE_DATA_P (BB_END (pred->src)))
922	return false;
923    }
924  return true;
925}
926
927
928/* Search for the occurrences of expression in BB.  */
929
930static struct occr*
931get_bb_avail_insn (basic_block bb, struct occr *occr)
932{
933  for (; occr != NULL; occr = occr->next)
934    if (BLOCK_FOR_INSN (occr->insn) == bb)
935      return occr;
936  return NULL;
937}
938
939
940/* This handles the case where several stores feed a partially redundant
941   load. It checks if the redundancy elimination is possible and if it's
942   worth it.
943
944   Redundancy elimination is possible if,
945   1) None of the operands of an insn have been modified since the start
946      of the current basic block.
947   2) In any predecessor of the current basic block, the same expression
948      is generated.
949
950   See the function body for the heuristics that determine if eliminating
951   a redundancy is also worth doing, assuming it is possible.  */
952
953static void
954eliminate_partially_redundant_load (basic_block bb, rtx insn,
955				    struct expr *expr)
956{
957  edge pred;
958  rtx avail_insn = NULL_RTX;
959  rtx avail_reg;
960  rtx dest, pat;
961  struct occr *a_occr;
962  struct unoccr *occr, *avail_occrs = NULL;
963  struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL;
964  int npred_ok = 0;
965  gcov_type ok_count = 0; /* Redundant load execution count.  */
966  gcov_type critical_count = 0; /* Execution count of critical edges.  */
967  edge_iterator ei;
968  bool critical_edge_split = false;
969
970  /* The execution count of the loads to be added to make the
971     load fully redundant.  */
972  gcov_type not_ok_count = 0;
973  basic_block pred_bb;
974
975  pat = PATTERN (insn);
976  dest = SET_DEST (pat);
977
978  /* Check that the loaded register is not used, set, or killed from the
979     beginning of the block.  */
980  if (reg_changed_after_insn_p (dest, 0)
981      || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn))
982    return;
983
984  /* Check potential for replacing load with copy for predecessors.  */
985  FOR_EACH_EDGE (pred, ei, bb->preds)
986    {
987      rtx next_pred_bb_end;
988
989      avail_insn = NULL_RTX;
990      avail_reg = NULL_RTX;
991      pred_bb = pred->src;
992      next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
993      for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
994	   a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
995	{
996	  /* Check if the loaded register is not used.  */
997	  avail_insn = a_occr->insn;
998	  avail_reg = get_avail_load_store_reg (avail_insn);
999	  gcc_assert (avail_reg);
1000
1001	  /* Make sure we can generate a move from register avail_reg to
1002	     dest.  */
1003	  extract_insn (gen_move_insn (copy_rtx (dest),
1004				       copy_rtx (avail_reg)));
1005	  if (! constrain_operands (1)
1006	      || reg_killed_on_edge (avail_reg, pred)
1007	      || reg_used_on_edge (dest, pred))
1008	    {
1009	      avail_insn = NULL;
1010	      continue;
1011	    }
1012	  if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end))
1013	    /* AVAIL_INSN remains non-null.  */
1014	    break;
1015	  else
1016	    avail_insn = NULL;
1017	}
1018
1019      if (EDGE_CRITICAL_P (pred))
1020	critical_count += pred->count;
1021
1022      if (avail_insn != NULL_RTX)
1023	{
1024	  npred_ok++;
1025	  ok_count += pred->count;
1026	  if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest),
1027						    copy_rtx (avail_reg)))))
1028	    {
1029	      /* Check if there is going to be a split.  */
1030	      if (EDGE_CRITICAL_P (pred))
1031		critical_edge_split = true;
1032	    }
1033	  else /* Its a dead move no need to generate.  */
1034	    continue;
1035	  occr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1036						  sizeof (struct unoccr));
1037	  occr->insn = avail_insn;
1038	  occr->pred = pred;
1039	  occr->next = avail_occrs;
1040	  avail_occrs = occr;
1041	  if (! rollback_unoccr)
1042	    rollback_unoccr = occr;
1043	}
1044      else
1045	{
1046	  /* Adding a load on a critical edge will cause a split.  */
1047	  if (EDGE_CRITICAL_P (pred))
1048	    critical_edge_split = true;
1049	  not_ok_count += pred->count;
1050	  unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1051						    sizeof (struct unoccr));
1052	  unoccr->insn = NULL_RTX;
1053	  unoccr->pred = pred;
1054	  unoccr->next = unavail_occrs;
1055	  unavail_occrs = unoccr;
1056	  if (! rollback_unoccr)
1057	    rollback_unoccr = unoccr;
1058	}
1059    }
1060
1061  if (/* No load can be replaced by copy.  */
1062      npred_ok == 0
1063      /* Prevent exploding the code.  */
1064      || (optimize_bb_for_size_p (bb) && npred_ok > 1)
1065      /* If we don't have profile information we cannot tell if splitting
1066         a critical edge is profitable or not so don't do it.  */
1067      || ((! profile_info || ! flag_branch_probabilities
1068	   || targetm.cannot_modify_jumps_p ())
1069	  && critical_edge_split))
1070    goto cleanup;
1071
1072  /* Check if it's worth applying the partial redundancy elimination.  */
1073  if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
1074    goto cleanup;
1075  if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
1076    goto cleanup;
1077
1078  /* Generate moves to the loaded register from where
1079     the memory is available.  */
1080  for (occr = avail_occrs; occr; occr = occr->next)
1081    {
1082      avail_insn = occr->insn;
1083      pred = occr->pred;
1084      /* Set avail_reg to be the register having the value of the
1085	 memory.  */
1086      avail_reg = get_avail_load_store_reg (avail_insn);
1087      gcc_assert (avail_reg);
1088
1089      insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
1090					  copy_rtx (avail_reg)),
1091			   pred);
1092      stats.moves_inserted++;
1093
1094      if (dump_file)
1095	fprintf (dump_file,
1096		 "generating move from %d to %d on edge from %d to %d\n",
1097		 REGNO (avail_reg),
1098		 REGNO (dest),
1099		 pred->src->index,
1100		 pred->dest->index);
1101    }
1102
1103  /* Regenerate loads where the memory is unavailable.  */
1104  for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
1105    {
1106      pred = unoccr->pred;
1107      insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
1108      stats.copies_inserted++;
1109
1110      if (dump_file)
1111	{
1112	  fprintf (dump_file,
1113		   "generating on edge from %d to %d a copy of load: ",
1114		   pred->src->index,
1115		   pred->dest->index);
1116	  print_rtl (dump_file, PATTERN (insn));
1117	  fprintf (dump_file, "\n");
1118	}
1119    }
1120
1121  /* Delete the insn if it is not available in this block and mark it
1122     for deletion if it is available. If insn is available it may help
1123     discover additional redundancies, so mark it for later deletion.  */
1124  for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
1125       a_occr && (a_occr->insn != insn);
1126       a_occr = get_bb_avail_insn (bb, a_occr->next))
1127    ;
1128
1129  if (!a_occr)
1130    {
1131      stats.insns_deleted++;
1132
1133      if (dump_file)
1134	{
1135	  fprintf (dump_file, "deleting insn:\n");
1136          print_rtl_single (dump_file, insn);
1137          fprintf (dump_file, "\n");
1138	}
1139      delete_insn (insn);
1140    }
1141  else
1142    a_occr->deleted_p = 1;
1143
1144cleanup:
1145  if (rollback_unoccr)
1146    obstack_free (&unoccr_obstack, rollback_unoccr);
1147}
1148
1149/* Performing the redundancy elimination as described before.  */
1150
1151static void
1152eliminate_partially_redundant_loads (void)
1153{
1154  rtx insn;
1155  basic_block bb;
1156
1157  /* Note we start at block 1.  */
1158
1159  if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
1160    return;
1161
1162  FOR_BB_BETWEEN (bb,
1163		  ENTRY_BLOCK_PTR->next_bb->next_bb,
1164		  EXIT_BLOCK_PTR,
1165		  next_bb)
1166    {
1167      /* Don't try anything on basic blocks with strange predecessors.  */
1168      if (! bb_has_well_behaved_predecessors (bb))
1169	continue;
1170
1171      /* Do not try anything on cold basic blocks.  */
1172      if (optimize_bb_for_size_p (bb))
1173	continue;
1174
1175      /* Reset the table of things changed since the start of the current
1176	 basic block.  */
1177      reset_opr_set_tables ();
1178
1179      /* Look at all insns in the current basic block and see if there are
1180	 any loads in it that we can record.  */
1181      FOR_BB_INSNS (bb, insn)
1182	{
1183	  /* Is it a load - of the form (set (reg) (mem))?  */
1184	  if (NONJUMP_INSN_P (insn)
1185              && GET_CODE (PATTERN (insn)) == SET
1186	      && REG_P (SET_DEST (PATTERN (insn)))
1187	      && MEM_P (SET_SRC (PATTERN (insn))))
1188	    {
1189	      rtx pat = PATTERN (insn);
1190	      rtx src = SET_SRC (pat);
1191	      struct expr *expr;
1192
1193	      if (!MEM_VOLATILE_P (src)
1194		  && GET_MODE (src) != BLKmode
1195		  && general_operand (src, GET_MODE (src))
1196		  /* Are the operands unchanged since the start of the
1197		     block?  */
1198		  && oprs_unchanged_p (src, insn, false)
1199		  && !(cfun->can_throw_non_call_exceptions && may_trap_p (src))
1200		  && !side_effects_p (src)
1201		  /* Is the expression recorded?  */
1202		  && (expr = lookup_expr_in_table (src)) != NULL)
1203		{
1204		  /* We now have a load (insn) and an available memory at
1205		     its BB start (expr). Try to remove the loads if it is
1206		     redundant.  */
1207		  eliminate_partially_redundant_load (bb, insn, expr);
1208		}
1209	    }
1210
1211	  /* Keep track of everything modified by this insn, so that we
1212	     know what has been modified since the start of the current
1213	     basic block.  */
1214	  if (INSN_P (insn))
1215	    record_opr_changes (insn);
1216	}
1217    }
1218
1219  commit_edge_insertions ();
1220}
1221
1222/* Go over the expression hash table and delete insns that were
1223   marked for later deletion.  */
1224
1225/* This helper is called via htab_traverse.  */
1226static int
1227delete_redundant_insns_1 (void **slot, void *data ATTRIBUTE_UNUSED)
1228{
1229  struct expr *expr = (struct expr *) *slot;
1230  struct occr *occr;
1231
1232  for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1233    {
1234      if (occr->deleted_p && dbg_cnt (gcse2_delete))
1235	{
1236	  delete_insn (occr->insn);
1237	  stats.insns_deleted++;
1238
1239	  if (dump_file)
1240	    {
1241	      fprintf (dump_file, "deleting insn:\n");
1242	      print_rtl_single (dump_file, occr->insn);
1243	      fprintf (dump_file, "\n");
1244	    }
1245	}
1246    }
1247
1248  return 1;
1249}
1250
1251static void
1252delete_redundant_insns (void)
1253{
1254  htab_traverse (expr_table, delete_redundant_insns_1, NULL);
1255  if (dump_file)
1256    fprintf (dump_file, "\n");
1257}
1258
1259/* Main entry point of the GCSE after reload - clean some redundant loads
1260   due to spilling.  */
1261
1262static void
1263gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
1264{
1265
1266  memset (&stats, 0, sizeof (stats));
1267
1268  /* Allocate memory for this pass.
1269     Also computes and initializes the insns' CUIDs.  */
1270  alloc_mem ();
1271
1272  /* We need alias analysis.  */
1273  init_alias_analysis ();
1274
1275  compute_hash_table ();
1276
1277  if (dump_file)
1278    dump_hash_table (dump_file);
1279
1280  if (htab_elements (expr_table) > 0)
1281    {
1282      eliminate_partially_redundant_loads ();
1283      delete_redundant_insns ();
1284
1285      if (dump_file)
1286	{
1287	  fprintf (dump_file, "GCSE AFTER RELOAD stats:\n");
1288	  fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted);
1289	  fprintf (dump_file, "moves inserted:  %d\n", stats.moves_inserted);
1290	  fprintf (dump_file, "insns deleted:   %d\n", stats.insns_deleted);
1291	  fprintf (dump_file, "\n\n");
1292	}
1293
1294      statistics_counter_event (cfun, "copies inserted",
1295				stats.copies_inserted);
1296      statistics_counter_event (cfun, "moves inserted",
1297				stats.moves_inserted);
1298      statistics_counter_event (cfun, "insns deleted",
1299				stats.insns_deleted);
1300    }
1301
1302  /* We are finished with alias.  */
1303  end_alias_analysis ();
1304
1305  free_mem ();
1306}
1307
1308
1309static bool
1310gate_handle_gcse2 (void)
1311{
1312  return (optimize > 0 && flag_gcse_after_reload
1313	  && optimize_function_for_speed_p (cfun));
1314}
1315
1316
1317static unsigned int
1318rest_of_handle_gcse2 (void)
1319{
1320  gcse_after_reload_main (get_insns ());
1321  rebuild_jump_labels (get_insns ());
1322  return 0;
1323}
1324
1325struct rtl_opt_pass pass_gcse2 =
1326{
1327 {
1328  RTL_PASS,
1329  "gcse2",                              /* name */
1330  OPTGROUP_NONE,                        /* optinfo_flags */
1331  gate_handle_gcse2,                    /* gate */
1332  rest_of_handle_gcse2,                 /* execute */
1333  NULL,                                 /* sub */
1334  NULL,                                 /* next */
1335  0,                                    /* static_pass_number */
1336  TV_GCSE_AFTER_RELOAD,                 /* tv_id */
1337  0,                                    /* properties_required */
1338  0,                                    /* properties_provided */
1339  0,                                    /* properties_destroyed */
1340  0,                                    /* todo_flags_start */
1341  TODO_verify_rtl_sharing
1342  | TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
1343 }
1344};
1345