1/* Early (pre-RA) rematerialization
2   Copyright (C) 2017-2020 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "rtl.h"
25#include "df.h"
26#include "tree-pass.h"
27#include "memmodel.h"
28#include "emit-rtl.h"
29#include "insn-config.h"
30#include "recog.h"
31/* FIXME: The next two are only needed for gen_move_insn.  */
32#include "tree.h"
33#include "expr.h"
34#include "target.h"
35#include "inchash.h"
36#include "rtlhash.h"
37#include "print-rtl.h"
38#include "rtl-iter.h"
39#include "regs.h"
40#include "function-abi.h"
41
42/* This pass runs before register allocation and implements an aggressive
43   form of rematerialization.  It looks for pseudo registers R of mode M
44   for which:
45
46     (a) there are no call-preserved registers of mode M; and
47     (b) spilling R to the stack is expensive.
48
49   The assumption is that it's better to recompute R after each call instead
50   of spilling it, even if this extends the live ranges of other registers.
51
52   The motivating example for which these conditions hold are AArch64 SVE
53   vectors and predicates.  Spilling them to the stack makes the frame
54   variable-sized, which we'd like to avoid if possible.  It's also very
55   rare for SVE values to be "naturally" live across a call: usually this
56   happens as a result of CSE or other code motion.
57
58   The pass is split into the following phases:
59
60   Collection phase
61   ================
62
63   First we go through all pseudo registers looking for any that meet
64   the conditions above.  For each such register R, we go through each
65   instruction that defines R to see whether any of them are suitable
66   rematerialization candidates.  If at least one is, we treat all the
67   instructions that define R as candidates, but record which ones are
68   not in fact suitable.  These unsuitable candidates exist only for the
69   sake of calculating reaching definitions (see below).
70
71   A "candidate" is a single instruction that we want to rematerialize
72   and a "candidate register" is a register that is set by at least one
73   candidate.
74
75   Candidate sorting
76   =================
77
78   Next we sort the candidates based on the cfg postorder, so that if
79   candidate C1 uses candidate C2, C1 has a lower index than C2.
80   This is useful when iterating through candidate bitmaps.
81
82   Reaching definition calculation
83   ===============================
84
85   We then compute standard reaching-definition sets for each candidate.
86   Each set specifies which candidates might provide the current definition
87   of a live candidate register.
88
89   From here on, a candidate C is "live" at a point P if the candidate
90   register defined by C is live at P and if C's definition reaches P.
91   An instruction I "uses" a candidate C if I takes the register defined by
92   C as input and if C is one of the reaching definitions of that register.
93
94   Candidate validation and value numbering
95   ========================================
96
97   Next we simultaneously decide which candidates are valid and look
98   for candidates that are equivalent to each other, assigning numbers
99   to each unique candidate value.  A candidate C is invalid if:
100
101     (a) C uses an invalid candidate;
102
103     (b) there is a cycle of candidate uses involving C; or
104
105     (c) C takes a candidate register R as input and the reaching
106         definitions of R do not have the same value number.
107
108   We assign a "representative" candidate C to each value number and from
109   here on replace references to other candidates with that value number
110   with references to C.  It is then only possible to rematerialize a
111   register R at point P if (after this replacement) there is a single
112   reaching definition of R at P.
113
114   Local phase
115   ===========
116
117   During this phase we go through each block and look for cases in which:
118
119     (a) an instruction I comes between two call instructions CI1 and CI2;
120
121     (b) I uses a candidate register R;
122
123     (c) a candidate C provides the only reaching definition of R; and
124
125     (d) C does not come between CI1 and I.
126
127   We then emit a copy of C after CI1, as well as the transitive closure
128   TC of the candidates used by C.  The copies of TC might use the original
129   candidate registers or new temporary registers, depending on circumstances.
130
131   For example, if elsewhere we have:
132
133       C3: R3 <- f3 (...)
134	   ...
135       C2: R2 <- f2 (...)
136	   ...
137       C1: R1 <- f1 (R2, R3, ...)  // uses C2 and C3
138
139   then for a block containing:
140
141      CI1: call
142	   ...
143	I: use R1  // uses C1
144	   ...
145      CI2: call
146
147   we would emit:
148
149      CI1: call
150      C3': R3' <- f3 (...)
151      C2': R2' <- f2 (...)
152      C1': R1 <- f1 (R2', R3', ...)
153	   ...
154	I: use R1
155	   ...
156      CI2: call
157
158   where R2' and R3' might be fresh registers.  If instead we had:
159
160      CI1: call
161	   ...
162       I1: use R1  // uses C1
163	   ...
164       I2: use R3  // uses C3
165	   ...
166      CI2: call
167
168   we would keep the original R3:
169
170      CI1: call
171      C3': R3 <- f3 (...)
172      C2': R2' <- f2 (...)
173      C1': R1 <- f1 (R2', R3, ...)
174	   ...
175       I1: use R1  // uses C1
176	   ...
177       I2: use R3  // uses C3
178	   ...
179      CI2: call
180
181   We also record the last call in each block (if any) and compute:
182
183     rd_after_call:
184       The set of candidates that either (a) are defined outside the block
185       and are live after the last call or (b) are defined within the block
186       and reach the end of the last call.  (We don't track whether the
187       latter values are live or not.)
188
189     required_after_call:
190       The set of candidates that need to be rematerialized after the
191       last call in order to satisfy uses in the block itself.
192
193     required_in:
194       The set of candidates that are live on entry to the block and are
195       used without an intervening call.
196
197   In addition, we compute the initial values of the sets required by
198   the global phase below.
199
200   Global phase
201   ============
202
203   We next compute a maximal solution to the following availability
204   problem:
205
206     available_in:
207       The set of candidates that are live on entry to a block and can
208       be used at that point without rematerialization.
209
210     available_out:
211       The set of candidates that are live on exit from a block and can
212       be used at that point without rematerialization.
213
214     available_locally:
215       The subset of available_out that is due to code in the block itself.
216       It contains candidates that are defined or used in the block and
217       not invalidated by a later call.
218
219   We then go through each block B and look for an appropriate place
220   to insert copies of required_in - available_in.  Conceptually we
221   start by placing the copies at the head of B, but then move the
222   copy of a candidate C to predecessors if:
223
224     (a) that seems cheaper;
225
226     (b) there is more than one reaching definition of C's register at
227	 the head of B; or
228
229     (c) copying C would clobber a hard register that is live on entry to B.
230
231   Moving a copy of C to a predecessor block PB involves:
232
233     (1) adding C to PB's required_after_call, if PB contains a call; or
234
235     (2) adding C PB's required_in otherwise.
236
237   C is then available on output from each PB and on input to B.
238
239   Once all this is done, we emit instructions for the final required_in
240   and required_after_call sets.  */
241
242namespace {
243
244/* An invalid candidate index, used to indicate that there is more than
245   one reaching definition.  */
246const unsigned int MULTIPLE_CANDIDATES = -1U;
247
248/* Pass-specific information about one basic block.  */
249struct remat_block_info {
250  /* The last call instruction in the block.  */
251  rtx_insn *last_call;
252
253  /* The set of candidates that are live on entry to the block.  NULL is
254     equivalent to an empty set.  */
255  bitmap rd_in;
256
257  /* The set of candidates that are live on exit from the block.  This might
258     reuse rd_in.  NULL is equivalent to an empty set.  */
259  bitmap rd_out;
260
261  /* The subset of RD_OUT that comes from local definitions.  NULL is
262     equivalent to an empty set.  */
263  bitmap rd_gen;
264
265  /* The set of candidates that the block invalidates (because it defines
266     the register to something else, or because the register's value is
267     no longer important).  NULL is equivalent to an empty set.  */
268  bitmap rd_kill;
269
270  /* The set of candidates that either (a) are defined outside the block
271     and are live after LAST_CALL or (b) are defined within the block
272     and reach the instruction after LAST_CALL.  (We don't track whether
273     the latter values are live or not.)
274
275     Only used if LAST_CALL is nonnull.  NULL is equivalent to an
276     empty set.  */
277  bitmap rd_after_call;
278
279  /* Candidates that are live and available without rematerialization
280     on entry to the block.  NULL is equivalent to an empty set.  */
281  bitmap available_in;
282
283  /* Candidates that become available without rematerialization within the
284     block, and remain so on exit.  NULL is equivalent to an empty set.  */
285  bitmap available_locally;
286
287  /* Candidates that are available without rematerialization on exit from
288     the block.  This might reuse available_in or available_locally.  */
289  bitmap available_out;
290
291  /* Candidates that need to be rematerialized either at the start of the
292     block or before entering the block.  */
293  bitmap required_in;
294
295  /* Candidates that need to be rematerialized after LAST_CALL.
296     Only used if LAST_CALL is nonnull.  */
297  bitmap required_after_call;
298
299  /* The number of candidates in the block.  */
300  unsigned int num_candidates;
301
302  /* The earliest candidate in the block (i.e. the one with the
303     highest index).  Only valid if NUM_CANDIDATES is nonzero.  */
304  unsigned int first_candidate;
305
306  /* The best (lowest) execution frequency for rematerializing REQUIRED_IN.
307     This is the execution frequency of the block if LOCAL_REMAT_CHEAPER_P,
308     otherwise it is the sum of the execution frequencies of whichever
309     predecessor blocks would do the rematerialization.  */
310  int remat_frequency;
311
312  /* True if the block ends with an abnormal call.  */
313  unsigned int abnormal_call_p : 1;
314
315  /* Used to record whether a graph traversal has visited this block.  */
316  unsigned int visited_p : 1;
317
318  /* True if we have calculated REMAT_FREQUENCY.  */
319  unsigned int remat_frequency_valid_p : 1;
320
321  /* True if it is cheaper to rematerialize candidates at the start of
322     the block, rather than moving them to predecessor blocks.  */
323  unsigned int local_remat_cheaper_p : 1;
324};
325
326/* Information about a group of candidates with the same value number.  */
327struct remat_equiv_class {
328  /* The candidates that have the same value number.  */
329  bitmap members;
330
331  /* The candidate that was first added to MEMBERS.  */
332  unsigned int earliest;
333
334  /* The candidate that represents the others.  This is always the one
335     with the highest index.  */
336  unsigned int representative;
337};
338
339/* Information about an instruction that we might want to rematerialize.  */
340struct remat_candidate {
341  /* The pseudo register that the instruction sets.  */
342  unsigned int regno;
343
344  /* A temporary register used when rematerializing uses of this candidate,
345     if REGNO doesn't have the right value or isn't worth using.  */
346  unsigned int copy_regno;
347
348  /* True if we intend to rematerialize this instruction by emitting
349     a move of a constant into REGNO, false if we intend to emit a
350     copy of the original instruction.  */
351  unsigned int constant_p : 1;
352
353  /* True if we still think it's possible to rematerialize INSN.  */
354  unsigned int can_copy_p : 1;
355
356  /* Used to record whether a graph traversal has visited this candidate.  */
357  unsigned int visited_p : 1;
358
359  /* True if we have verified that it's possible to rematerialize INSN.
360     Once this is true, both it and CAN_COPY_P remain true.  */
361  unsigned int validated_p : 1;
362
363  /* True if we have "stabilized" INSN, i.e. ensured that all non-candidate
364     registers read by INSN will have the same value when rematerializing INSN.
365     Only ever true if CAN_COPY_P.  */
366  unsigned int stabilized_p : 1;
367
368  /* Hash value used for value numbering.  */
369  hashval_t hash;
370
371  /* The instruction that sets REGNO.  */
372  rtx_insn *insn;
373
374  /* If CONSTANT_P, the value that should be moved into REGNO when
375     rematerializing, otherwise the pattern of the instruction that
376     should be used.  */
377  rtx remat_rtx;
378
379  /* The set of candidates that INSN takes as input.  NULL is equivalent
380     to the empty set.  All candidates in this set have a higher index
381     than the current candidate.  */
382  bitmap uses;
383
384  /* The set of hard registers that would be clobbered by rematerializing
385     the candidate, including (transitively) all those that would be
386     clobbered by rematerializing USES.  */
387  bitmap clobbers;
388
389  /* The equivalence class to which the candidate belongs, or null if none.  */
390  remat_equiv_class *equiv_class;
391};
392
393/* Hash functions used for value numbering.  */
394struct remat_candidate_hasher : nofree_ptr_hash <remat_candidate>
395{
396  typedef value_type compare_type;
397  static hashval_t hash (const remat_candidate *);
398  static bool equal (const remat_candidate *, const remat_candidate *);
399};
400
401/* Main class for this pass.  */
402class early_remat {
403public:
404  early_remat (function *, sbitmap);
405  ~early_remat ();
406
407  void run (void);
408
409private:
410  bitmap alloc_bitmap (void);
411  bitmap get_bitmap (bitmap *);
412  void init_temp_bitmap (bitmap *);
413  void copy_temp_bitmap (bitmap *, bitmap *);
414
415  void dump_insn_id (rtx_insn *);
416  void dump_candidate_bitmap (bitmap);
417  void dump_all_candidates (void);
418  void dump_edge_list (basic_block, bool);
419  void dump_block_info (basic_block);
420  void dump_all_blocks (void);
421
422  bool interesting_regno_p (unsigned int);
423  remat_candidate *add_candidate (rtx_insn *, unsigned int, bool);
424  bool maybe_add_candidate (rtx_insn *, unsigned int);
425  bool collect_candidates (void);
426  void init_block_info (void);
427  void sort_candidates (void);
428  void finalize_candidate_indices (void);
429  void record_equiv_candidates (unsigned int, unsigned int);
430  static bool rd_confluence_n (edge);
431  static bool rd_transfer (int);
432  void compute_rd (void);
433  unsigned int canon_candidate (unsigned int);
434  void canon_bitmap (bitmap *);
435  unsigned int resolve_reaching_def (bitmap);
436  bool check_candidate_uses (unsigned int);
437  void compute_clobbers (unsigned int);
438  void assign_value_number (unsigned int);
439  void decide_candidate_validity (void);
440  void restrict_remat_for_unavail_regs (bitmap, const_bitmap);
441  void restrict_remat_for_call (bitmap, rtx_insn *);
442  bool stable_use_p (unsigned int);
443  void emit_copy_before (unsigned int, rtx, rtx);
444  void stabilize_pattern (unsigned int);
445  void replace_dest_with_copy (unsigned int);
446  void stabilize_candidate_uses (unsigned int, bitmap, bitmap, bitmap,
447				 bitmap);
448  void emit_remat_insns (bitmap, bitmap, bitmap, rtx_insn *);
449  bool set_available_out (remat_block_info *);
450  void process_block (basic_block);
451  void local_phase (void);
452  static bool avail_confluence_n (edge);
453  static bool avail_transfer (int);
454  void compute_availability (void);
455  void unshare_available_sets (remat_block_info *);
456  bool can_move_across_edge_p (edge);
457  bool local_remat_cheaper_p (unsigned int);
458  bool need_to_move_candidate_p (unsigned int, unsigned int);
459  void compute_minimum_move_set (unsigned int, bitmap);
460  void move_to_predecessors (unsigned int, bitmap, bitmap);
461  void choose_rematerialization_points (void);
462  void emit_remat_insns_for_block (basic_block);
463  void global_phase (void);
464
465  /* The function that we're optimizing.  */
466  function *m_fn;
467
468  /* The modes that we want to rematerialize.  */
469  sbitmap m_selected_modes;
470
471  /* All rematerialization candidates, identified by their index into the
472     vector.  */
473  auto_vec<remat_candidate> m_candidates;
474
475  /* The set of candidate registers.  */
476  bitmap_head m_candidate_regnos;
477
478  /* Temporary sets.  */
479  bitmap_head m_tmp_bitmap;
480  bitmap m_available;
481  bitmap m_required;
482
483  /* Information about each basic block.  */
484  auto_vec<remat_block_info> m_block_info;
485
486  /* A mapping from register numbers to the set of associated candidates.
487     Only valid for registers in M_CANDIDATE_REGNOS.  */
488  auto_vec<bitmap> m_regno_to_candidates;
489
490  /* An obstack used for allocating bitmaps, so that we can free them all
491     in one go.  */
492  bitmap_obstack m_obstack;
493
494  /* A hash table of candidates used for value numbering.  If a candidate
495     in the table is in an equivalence class, the candidate is marked as
496     the earliest member of the class.  */
497  hash_table<remat_candidate_hasher> m_value_table;
498
499  /* Used temporarily by callback functions.  */
500  static early_remat *er;
501};
502
503}
504
505early_remat *early_remat::er;
506
507/* rtx_equal_p_cb callback that treats any two SCRATCHes as equal.
508   This allows us to compare two copies of a pattern, even though their
509   SCRATCHes are always distinct.  */
510
511static int
512scratch_equal (const_rtx *x, const_rtx *y, rtx *nx, rtx *ny)
513{
514  if (GET_CODE (*x) == SCRATCH && GET_CODE (*y) == SCRATCH)
515    {
516      *nx = const0_rtx;
517      *ny = const0_rtx;
518      return 1;
519    }
520  return 0;
521}
522
523/* Hash callback functions for remat_candidate.  */
524
525hashval_t
526remat_candidate_hasher::hash (const remat_candidate *cand)
527{
528  return cand->hash;
529}
530
531bool
532remat_candidate_hasher::equal (const remat_candidate *cand1,
533			       const remat_candidate *cand2)
534{
535  return (cand1->regno == cand2->regno
536	  && cand1->constant_p == cand2->constant_p
537	  && (cand1->constant_p
538	      ? rtx_equal_p (cand1->remat_rtx, cand2->remat_rtx)
539	      : rtx_equal_p_cb (cand1->remat_rtx, cand2->remat_rtx,
540				scratch_equal))
541	  && (!cand1->uses || bitmap_equal_p (cand1->uses, cand2->uses)));
542}
543
544/* Return true if B is null or empty.  */
545
546inline bool
547empty_p (bitmap b)
548{
549  return !b || bitmap_empty_p (b);
550}
551
552/* Allocate a new bitmap.  It will be automatically freed at the end of
553   the pass.  */
554
555inline bitmap
556early_remat::alloc_bitmap (void)
557{
558  return bitmap_alloc (&m_obstack);
559}
560
561/* Initialize *PTR to an empty bitmap if it is currently null.  */
562
563inline bitmap
564early_remat::get_bitmap (bitmap *ptr)
565{
566  if (!*ptr)
567    *ptr = alloc_bitmap ();
568  return *ptr;
569}
570
571/* *PTR is either null or empty.  If it is null, initialize it to an
572   empty bitmap.  */
573
574inline void
575early_remat::init_temp_bitmap (bitmap *ptr)
576{
577  if (!*ptr)
578    *ptr = alloc_bitmap ();
579  else
580    gcc_checking_assert (bitmap_empty_p (*ptr));
581}
582
583/* Move *SRC to *DEST and leave *SRC empty.  */
584
585inline void
586early_remat::copy_temp_bitmap (bitmap *dest, bitmap *src)
587{
588  if (!empty_p (*src))
589    {
590      *dest = *src;
591      *src = NULL;
592    }
593  else
594    *dest = NULL;
595}
596
597/* Print INSN's identifier to the dump file.  */
598
599void
600early_remat::dump_insn_id (rtx_insn *insn)
601{
602  fprintf (dump_file, "%d[bb:%d]", INSN_UID (insn),
603	   BLOCK_FOR_INSN (insn)->index);
604}
605
606/* Print candidate set CANDIDATES to the dump file, with a leading space.  */
607
608void
609early_remat::dump_candidate_bitmap (bitmap candidates)
610{
611  if (empty_p (candidates))
612    {
613      fprintf (dump_file, " none");
614      return;
615    }
616
617  unsigned int cand_index;
618  bitmap_iterator bi;
619  EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
620    fprintf (dump_file, " %d", cand_index);
621}
622
623/* Print information about all candidates to the dump file.  */
624
625void
626early_remat::dump_all_candidates (void)
627{
628  fprintf (dump_file, "\n;; Candidates:\n;;\n");
629  fprintf (dump_file, ";; %5s %5s %8s %s\n", "#", "reg", "mode", "insn");
630  fprintf (dump_file, ";; %5s %5s %8s %s\n", "=", "===", "====", "====");
631  unsigned int cand_index;
632  remat_candidate *cand;
633  FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
634    {
635      fprintf (dump_file, ";; %5d %5d %8s ", cand_index, cand->regno,
636	       GET_MODE_NAME (GET_MODE (regno_reg_rtx[cand->regno])));
637      dump_insn_id (cand->insn);
638      if (!cand->can_copy_p)
639	fprintf (dump_file, "   -- can't copy");
640      fprintf (dump_file, "\n");
641    }
642
643  fprintf (dump_file, "\n;; Register-to-candidate mapping:\n;;\n");
644  unsigned int regno;
645  bitmap_iterator bi;
646  EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
647    {
648      fprintf (dump_file, ";; %5d:", regno);
649      dump_candidate_bitmap (m_regno_to_candidates[regno]);
650      fprintf (dump_file, "\n");
651    }
652}
653
654/* Print the predecessors or successors of BB to the dump file, with a
655   leading space.  DO_SUCC is true to print successors and false to print
656   predecessors.  */
657
658void
659early_remat::dump_edge_list (basic_block bb, bool do_succ)
660{
661  edge e;
662  edge_iterator ei;
663  FOR_EACH_EDGE (e, ei, do_succ ? bb->succs : bb->preds)
664    dump_edge_info (dump_file, e, TDF_NONE, do_succ);
665}
666
667/* Print information about basic block BB to the dump file.  */
668
669void
670early_remat::dump_block_info (basic_block bb)
671{
672  remat_block_info *info = &m_block_info[bb->index];
673  fprintf (dump_file, ";;\n;; Block %d:", bb->index);
674  int width = 25;
675
676  fprintf (dump_file, "\n;;%*s:", width, "predecessors");
677  dump_edge_list (bb, false);
678
679  fprintf (dump_file, "\n;;%*s:", width, "successors");
680  dump_edge_list (bb, true);
681
682  fprintf (dump_file, "\n;;%*s: %d", width, "frequency",
683	   bb->count.to_frequency (m_fn));
684
685  if (info->last_call)
686    fprintf (dump_file, "\n;;%*s: %d", width, "last call",
687	     INSN_UID (info->last_call));
688
689  if (!empty_p (info->rd_in))
690    {
691      fprintf (dump_file, "\n;;%*s:", width, "RD in");
692      dump_candidate_bitmap (info->rd_in);
693    }
694  if (!empty_p (info->rd_kill))
695    {
696      fprintf (dump_file, "\n;;%*s:", width, "RD kill");
697      dump_candidate_bitmap (info->rd_kill);
698    }
699  if (!empty_p (info->rd_gen))
700    {
701      fprintf (dump_file, "\n;;%*s:", width, "RD gen");
702      dump_candidate_bitmap (info->rd_gen);
703    }
704  if (!empty_p (info->rd_after_call))
705    {
706      fprintf (dump_file, "\n;;%*s:", width, "RD after call");
707      dump_candidate_bitmap (info->rd_after_call);
708    }
709  if (!empty_p (info->rd_out))
710    {
711      fprintf (dump_file, "\n;;%*s:", width, "RD out");
712      if (info->rd_in == info->rd_out)
713	fprintf (dump_file, " RD in");
714      else
715	dump_candidate_bitmap (info->rd_out);
716    }
717  if (!empty_p (info->available_in))
718    {
719      fprintf (dump_file, "\n;;%*s:", width, "available in");
720      dump_candidate_bitmap (info->available_in);
721    }
722  if (!empty_p (info->available_locally))
723    {
724      fprintf (dump_file, "\n;;%*s:", width, "available locally");
725      dump_candidate_bitmap (info->available_locally);
726    }
727  if (!empty_p (info->available_out))
728    {
729      fprintf (dump_file, "\n;;%*s:", width, "available out");
730      if (info->available_in == info->available_out)
731	fprintf (dump_file, " available in");
732      else if (info->available_locally == info->available_out)
733	fprintf (dump_file, " available locally");
734      else
735	dump_candidate_bitmap (info->available_out);
736    }
737  if (!empty_p (info->required_in))
738    {
739      fprintf (dump_file, "\n;;%*s:", width, "required in");
740      dump_candidate_bitmap (info->required_in);
741    }
742  if (!empty_p (info->required_after_call))
743    {
744      fprintf (dump_file, "\n;;%*s:", width, "required after call");
745      dump_candidate_bitmap (info->required_after_call);
746    }
747  fprintf (dump_file, "\n");
748}
749
750/* Print information about all basic blocks to the dump file.  */
751
752void
753early_remat::dump_all_blocks (void)
754{
755  basic_block bb;
756  FOR_EACH_BB_FN (bb, m_fn)
757    dump_block_info (bb);
758}
759
760/* Return true if REGNO is worth rematerializing.  */
761
762bool
763early_remat::interesting_regno_p (unsigned int regno)
764{
765  /* Ignore unused registers.  */
766  rtx reg = regno_reg_rtx[regno];
767  if (!reg || DF_REG_DEF_COUNT (regno) == 0)
768    return false;
769
770  /* Make sure the register has a mode that we want to rematerialize.  */
771  if (!bitmap_bit_p (m_selected_modes, GET_MODE (reg)))
772    return false;
773
774  /* Ignore values that might sometimes be used uninitialized.  We could
775     instead add dummy candidates for the entry block definition, and so
776     handle uses that are definitely not uninitialized, but the combination
777     of the two should be rare in practice.  */
778  if (bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
779    return false;
780
781  return true;
782}
783
784/* Record the set of register REGNO in instruction INSN as a
785   rematerialization candidate.  CAN_COPY_P is true unless we already
786   know that rematerialization is impossible (in which case the candidate
787   only exists for the reaching definition calculation).
788
789   The candidate's index is not fixed at this stage.  */
790
791remat_candidate *
792early_remat::add_candidate (rtx_insn *insn, unsigned int regno,
793			    bool can_copy_p)
794{
795  remat_candidate cand;
796  memset (&cand, 0, sizeof (cand));
797  cand.regno = regno;
798  cand.insn = insn;
799  cand.remat_rtx = PATTERN (insn);
800  cand.can_copy_p = can_copy_p;
801  m_candidates.safe_push (cand);
802
803  bitmap_set_bit (&m_candidate_regnos, regno);
804
805  return &m_candidates.last ();
806}
807
808/* Return true if we can rematerialize the set of register REGNO in
809   instruction INSN, and add it as a candidate if so.  When returning
810   false, print the reason to the dump file.  */
811
812bool
813early_remat::maybe_add_candidate (rtx_insn *insn, unsigned int regno)
814{
815#define FAILURE_FORMAT ";; Can't rematerialize set of reg %d in %d[bb:%d]: "
816#define FAILURE_ARGS regno, INSN_UID (insn), BLOCK_FOR_INSN (insn)->index
817
818  /* The definition must come from an ordinary instruction.  */
819  basic_block bb = BLOCK_FOR_INSN (insn);
820  if (!NONJUMP_INSN_P (insn)
821      || (insn == BB_END (bb)
822	  && has_abnormal_or_eh_outgoing_edge_p (bb)))
823    {
824      if (dump_file)
825	fprintf (dump_file, FAILURE_FORMAT "insn alters control flow\n",
826		 FAILURE_ARGS);
827      return false;
828    }
829
830  /* Prefer to rematerialize constants directly -- it's much easier.  */
831  machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
832  if (rtx note = find_reg_equal_equiv_note (insn))
833    {
834      rtx val = XEXP (note, 0);
835      if (CONSTANT_P (val)
836	  && targetm.legitimate_constant_p (mode, val))
837	{
838	  remat_candidate *cand = add_candidate (insn, regno, true);
839	  cand->constant_p = true;
840	  cand->remat_rtx = val;
841	  return true;
842	}
843    }
844
845  /* See whether the target has reasons to prevent a copy.  */
846  if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (insn))
847    {
848      if (dump_file)
849	fprintf (dump_file, FAILURE_FORMAT "target forbids copying\n",
850		 FAILURE_ARGS);
851      return false;
852    }
853
854  /* We can't copy trapping instructions.  */
855  rtx pat = PATTERN (insn);
856  if (may_trap_p (pat))
857    {
858      if (dump_file)
859	fprintf (dump_file, FAILURE_FORMAT "insn might trap\n", FAILURE_ARGS);
860      return false;
861    }
862
863  /* We can't copy instructions that read memory, unless we know that
864     the contents never change.  */
865  subrtx_iterator::array_type array;
866  FOR_EACH_SUBRTX (iter, array, pat, ALL)
867    if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
868      {
869	if (dump_file)
870	  fprintf (dump_file, FAILURE_FORMAT "insn references non-constant"
871		   " memory\n", FAILURE_ARGS);
872	return false;
873      }
874
875  /* Check each defined register.  */
876  df_ref ref;
877  FOR_EACH_INSN_DEF (ref, insn)
878    {
879      unsigned int def_regno = DF_REF_REGNO (ref);
880      if (def_regno == regno)
881	{
882	  /* Make sure the definition is write-only.  (Partial definitions,
883	     such as setting the low part and clobbering the high part,
884	     are otherwise OK.)  */
885	  if (DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE))
886	    {
887	      if (dump_file)
888		fprintf (dump_file, FAILURE_FORMAT "destination is"
889			 " read-modify-write\n", FAILURE_ARGS);
890	      return false;
891	    }
892	}
893      else
894	{
895	  /* The instruction can set additional registers, provided that
896	     they're hard registers.  This is useful for instructions
897	     that alter the condition codes.  */
898	  if (!HARD_REGISTER_NUM_P (def_regno))
899	    {
900	      if (dump_file)
901		fprintf (dump_file, FAILURE_FORMAT "insn also sets"
902			 " pseudo reg %d\n", FAILURE_ARGS, def_regno);
903	      return false;
904	    }
905	}
906    }
907
908  /* If the instruction uses fixed hard registers, check that those
909     registers have the same value throughout the function.  If the
910     instruction uses non-fixed hard registers, check that we can
911     replace them with pseudos.  */
912  FOR_EACH_INSN_USE (ref, insn)
913    {
914      unsigned int use_regno = DF_REF_REGNO (ref);
915      if (HARD_REGISTER_NUM_P (use_regno) && fixed_regs[use_regno])
916	{
917	  if (rtx_unstable_p (DF_REF_REAL_REG (ref)))
918	    {
919	      if (dump_file)
920		fprintf (dump_file, FAILURE_FORMAT "insn uses fixed hard reg"
921			 " %d\n", FAILURE_ARGS, use_regno);
922	      return false;
923	    }
924	}
925      else if (HARD_REGISTER_NUM_P (use_regno))
926	{
927	  /* Allocate a dummy pseudo register and temporarily install it.
928	     Make the register number depend on the mode, which should
929	     provide enough sharing for match_dup while also weeding
930	     out cases in which operands with different modes are
931	     explicitly tied.  */
932	  rtx *loc = DF_REF_REAL_LOC (ref);
933	  unsigned int size = RTX_CODE_SIZE (REG);
934	  rtx new_reg = (rtx) alloca (size);
935	  memset (new_reg, 0, size);
936	  PUT_CODE (new_reg, REG);
937	  set_mode_and_regno (new_reg, GET_MODE (*loc),
938			      LAST_VIRTUAL_REGISTER + 1 + GET_MODE (*loc));
939	  validate_change (insn, loc, new_reg, 1);
940	}
941    }
942  bool ok_p = verify_changes (0);
943  cancel_changes (0);
944  if (!ok_p)
945    {
946      if (dump_file)
947	fprintf (dump_file, FAILURE_FORMAT "insn does not allow hard"
948		 " register inputs to be replaced\n", FAILURE_ARGS);
949      return false;
950    }
951
952#undef FAILURE_ARGS
953#undef FAILURE_FORMAT
954
955  add_candidate (insn, regno, true);
956  return true;
957}
958
959/* Calculate the set of rematerialization candidates.  Return true if
960   we find at least one.  */
961
962bool
963early_remat::collect_candidates (void)
964{
965  unsigned int nregs = DF_REG_SIZE (df);
966  for (unsigned int regno = FIRST_PSEUDO_REGISTER; regno < nregs; ++regno)
967    if (interesting_regno_p (regno))
968      {
969	/* Create candidates for all suitable definitions.  */
970	bitmap_clear (&m_tmp_bitmap);
971	unsigned int bad = 0;
972	unsigned int id = 0;
973	for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
974	     ref = DF_REF_NEXT_REG (ref))
975	  {
976	    rtx_insn *insn = DF_REF_INSN (ref);
977	    if (maybe_add_candidate (insn, regno))
978	      bitmap_set_bit (&m_tmp_bitmap, id);
979	    else
980	      bad += 1;
981	    id += 1;
982	  }
983
984	/* If we found at least one suitable definition, add dummy
985	   candidates for the rest, so that we can see which definitions
986	   are live where.  */
987	if (!bitmap_empty_p (&m_tmp_bitmap) && bad)
988	  {
989	    id = 0;
990	    for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
991		 ref = DF_REF_NEXT_REG (ref))
992	      {
993		if (!bitmap_bit_p (&m_tmp_bitmap, id))
994		  add_candidate (DF_REF_INSN (ref), regno, false);
995		id += 1;
996	      }
997	  }
998      }
999
1000
1001  return !m_candidates.is_empty ();
1002}
1003
1004/* Initialize the m_block_info array.  */
1005
1006void
1007early_remat::init_block_info (void)
1008{
1009  unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1010  m_block_info.safe_grow_cleared (n_blocks);
1011}
1012
1013/* Maps basic block indices to their position in the post order.  */
1014static unsigned int *postorder_index;
1015
1016/* Order remat_candidates X_IN and Y_IN according to the cfg postorder.  */
1017
1018static int
1019compare_candidates (const void *x_in, const void *y_in)
1020{
1021  const remat_candidate *x = (const remat_candidate *) x_in;
1022  const remat_candidate *y = (const remat_candidate *) y_in;
1023  basic_block x_bb = BLOCK_FOR_INSN (x->insn);
1024  basic_block y_bb = BLOCK_FOR_INSN (y->insn);
1025  if (x_bb != y_bb)
1026    /* Make X and Y follow block postorder.  */
1027    return postorder_index[x_bb->index] - postorder_index[y_bb->index];
1028
1029  /* Make X and Y follow a backward traversal of the containing block.  */
1030  return DF_INSN_LUID (y->insn) - DF_INSN_LUID (x->insn);
1031}
1032
1033/* Sort the collected rematerialization candidates so that they follow
1034   cfg postorder.  */
1035
1036void
1037early_remat::sort_candidates (void)
1038{
1039  /* Make sure the DF LUIDs are up-to-date for all the blocks we
1040     care about.  */
1041  bitmap_clear (&m_tmp_bitmap);
1042  unsigned int cand_index;
1043  remat_candidate *cand;
1044  FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1045    {
1046      basic_block bb = BLOCK_FOR_INSN (cand->insn);
1047      if (bitmap_set_bit (&m_tmp_bitmap, bb->index))
1048	df_recompute_luids (bb);
1049    }
1050
1051  /* Create a mapping from block numbers to their position in the
1052     postorder.  */
1053  unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1054  int *postorder = df_get_postorder (DF_BACKWARD);
1055  unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
1056  postorder_index = new unsigned int[n_blocks];
1057  for (unsigned int i = 0; i < postorder_len; ++i)
1058    postorder_index[postorder[i]] = i;
1059
1060  m_candidates.qsort (compare_candidates);
1061
1062  delete[] postorder_index;
1063}
1064
1065/* Commit to the current candidate indices and initialize cross-references.  */
1066
1067void
1068early_remat::finalize_candidate_indices (void)
1069{
1070  /* Create a bitmap for each candidate register.  */
1071  m_regno_to_candidates.safe_grow (max_reg_num ());
1072  unsigned int regno;
1073  bitmap_iterator bi;
1074  EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
1075    m_regno_to_candidates[regno] = alloc_bitmap ();
1076
1077  /* Go through each candidate and record its index.  */
1078  unsigned int cand_index;
1079  remat_candidate *cand;
1080  FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1081    {
1082      basic_block bb = BLOCK_FOR_INSN (cand->insn);
1083      remat_block_info *info = &m_block_info[bb->index];
1084      info->num_candidates += 1;
1085      info->first_candidate = cand_index;
1086      bitmap_set_bit (m_regno_to_candidates[cand->regno], cand_index);
1087    }
1088}
1089
1090/* Record that candidates CAND1_INDEX and CAND2_INDEX are equivalent.
1091   CAND1_INDEX might already have an equivalence class, but CAND2_INDEX
1092   doesn't.  */
1093
1094void
1095early_remat::record_equiv_candidates (unsigned int cand1_index,
1096				      unsigned int cand2_index)
1097{
1098  if (dump_file)
1099    fprintf (dump_file, ";; Candidate %d is equivalent to candidate %d\n",
1100	     cand2_index, cand1_index);
1101
1102  remat_candidate *cand1 = &m_candidates[cand1_index];
1103  remat_candidate *cand2 = &m_candidates[cand2_index];
1104  gcc_checking_assert (!cand2->equiv_class);
1105
1106  remat_equiv_class *ec = cand1->equiv_class;
1107  if (!ec)
1108    {
1109      ec = XOBNEW (&m_obstack.obstack, remat_equiv_class);
1110      ec->members = alloc_bitmap ();
1111      bitmap_set_bit (ec->members, cand1_index);
1112      ec->earliest = cand1_index;
1113      ec->representative = cand1_index;
1114      cand1->equiv_class = ec;
1115    }
1116  cand2->equiv_class = ec;
1117  bitmap_set_bit (ec->members, cand2_index);
1118  if (cand2_index > ec->representative)
1119    ec->representative = cand2_index;
1120}
1121
1122/* Propagate information from the rd_out set of E->src to the rd_in set
1123   of E->dest, when computing global reaching definitions.  Return true
1124   if something changed.  */
1125
1126bool
1127early_remat::rd_confluence_n (edge e)
1128{
1129  remat_block_info *src = &er->m_block_info[e->src->index];
1130  remat_block_info *dest = &er->m_block_info[e->dest->index];
1131
1132  /* available_in temporarily contains the set of candidates whose
1133     registers are live on entry.  */
1134  if (empty_p (src->rd_out) || empty_p (dest->available_in))
1135    return false;
1136
1137  return bitmap_ior_and_into (er->get_bitmap (&dest->rd_in),
1138			      src->rd_out, dest->available_in);
1139}
1140
1141/* Propagate information from the rd_in set of block BB_INDEX to rd_out.
1142   Return true if something changed.  */
1143
1144bool
1145early_remat::rd_transfer (int bb_index)
1146{
1147  remat_block_info *info = &er->m_block_info[bb_index];
1148
1149  if (empty_p (info->rd_in))
1150    return false;
1151
1152  if (empty_p (info->rd_kill))
1153    {
1154      gcc_checking_assert (empty_p (info->rd_gen));
1155      if (!info->rd_out)
1156	info->rd_out = info->rd_in;
1157      else
1158	gcc_checking_assert (info->rd_out == info->rd_in);
1159      /* Assume that we only get called if something changed.  */
1160      return true;
1161    }
1162
1163  if (empty_p (info->rd_gen))
1164    return bitmap_and_compl (er->get_bitmap (&info->rd_out),
1165			     info->rd_in, info->rd_kill);
1166
1167  return bitmap_ior_and_compl (er->get_bitmap (&info->rd_out), info->rd_gen,
1168			       info->rd_in, info->rd_kill);
1169}
1170
1171/* Calculate the rd_* sets for each block.  */
1172
1173void
1174early_remat::compute_rd (void)
1175{
1176  /* First calculate the rd_kill and rd_gen sets, using the fact
1177     that m_candidates is sorted in order of decreasing LUID.  */
1178  unsigned int cand_index;
1179  remat_candidate *cand;
1180  FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1181    {
1182      rtx_insn *insn = cand->insn;
1183      basic_block bb = BLOCK_FOR_INSN (insn);
1184      remat_block_info *info = &m_block_info[bb->index];
1185      bitmap kill = m_regno_to_candidates[cand->regno];
1186      bitmap_ior_into (get_bitmap (&info->rd_kill), kill);
1187      if (bitmap_bit_p (DF_LR_OUT (bb), cand->regno))
1188	{
1189	  bitmap_and_compl_into (get_bitmap (&info->rd_gen), kill);
1190	  bitmap_set_bit (info->rd_gen, cand_index);
1191	}
1192    }
1193
1194  /* Set up the initial values of the other sets.  */
1195  basic_block bb;
1196  FOR_EACH_BB_FN (bb, m_fn)
1197    {
1198      remat_block_info *info = &m_block_info[bb->index];
1199      unsigned int regno;
1200      bitmap_iterator bi;
1201      EXECUTE_IF_AND_IN_BITMAP (DF_LR_IN (bb), &m_candidate_regnos,
1202				0, regno, bi)
1203	{
1204	  /* Use available_in to record the set of candidates whose
1205	     registers are live on entry (i.e. a maximum bound on rd_in).  */
1206	  bitmap_ior_into (get_bitmap (&info->available_in),
1207			   m_regno_to_candidates[regno]);
1208
1209	  /* Add registers that die in a block to the block's kill set,
1210	     so that we don't needlessly propagate them through the rest
1211	     of the function.  */
1212	  if (!bitmap_bit_p (DF_LR_OUT (bb), regno))
1213	    bitmap_ior_into (get_bitmap (&info->rd_kill),
1214			     m_regno_to_candidates[regno]);
1215	}
1216
1217      /* Initialize each block's rd_out to the minimal set (the set of
1218	 local definitions).  */
1219      if (!empty_p (info->rd_gen))
1220	bitmap_copy (get_bitmap (&info->rd_out), info->rd_gen);
1221    }
1222
1223  /* Iterate until we reach a fixed point.  */
1224  er = this;
1225  bitmap_clear (&m_tmp_bitmap);
1226  bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
1227  df_simple_dataflow (DF_FORWARD, NULL, NULL, rd_confluence_n, rd_transfer,
1228		      &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
1229		      df_get_n_blocks (DF_FORWARD));
1230  er = 0;
1231
1232  /* Work out which definitions reach which candidates, again taking
1233     advantage of the candidate order.  */
1234  bitmap_head reaching;
1235  bitmap_initialize (&reaching, &m_obstack);
1236  basic_block old_bb = NULL;
1237  FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1238    {
1239      bb = BLOCK_FOR_INSN (cand->insn);
1240      if (bb != old_bb)
1241	{
1242	  /* Get the definitions that reach the start of the new block.  */
1243	  remat_block_info *info = &m_block_info[bb->index];
1244	  if (info->rd_in)
1245	    bitmap_copy (&reaching, info->rd_in);
1246	  else
1247	    bitmap_clear (&reaching);
1248	  old_bb = bb;
1249	}
1250      else
1251	{
1252	  /* Process the definitions of the previous instruction.  */
1253	  bitmap kill = m_regno_to_candidates[cand[1].regno];
1254	  bitmap_and_compl_into (&reaching, kill);
1255	  bitmap_set_bit (&reaching, cand_index + 1);
1256	}
1257
1258      if (cand->can_copy_p && !cand->constant_p)
1259	{
1260	  df_ref ref;
1261	  FOR_EACH_INSN_USE (ref, cand->insn)
1262	    {
1263	      unsigned int regno = DF_REF_REGNO (ref);
1264	      if (bitmap_bit_p (&m_candidate_regnos, regno))
1265		{
1266		  bitmap defs = m_regno_to_candidates[regno];
1267		  bitmap_and (&m_tmp_bitmap, defs, &reaching);
1268		  bitmap_ior_into (get_bitmap (&cand->uses), &m_tmp_bitmap);
1269		}
1270	    }
1271	}
1272    }
1273  bitmap_clear (&reaching);
1274}
1275
1276/* If CAND_INDEX is in an equivalence class, return the representative
1277   of the class, otherwise return CAND_INDEX.  */
1278
1279inline unsigned int
1280early_remat::canon_candidate (unsigned int cand_index)
1281{
1282  if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1283    return ec->representative;
1284  return cand_index;
1285}
1286
1287/* Make candidate set *PTR refer to candidates using the representative
1288   of each equivalence class.  */
1289
1290void
1291early_remat::canon_bitmap (bitmap *ptr)
1292{
1293  bitmap old_set = *ptr;
1294  if (empty_p (old_set))
1295    return;
1296
1297  bitmap new_set = NULL;
1298  unsigned int old_index;
1299  bitmap_iterator bi;
1300  EXECUTE_IF_SET_IN_BITMAP (old_set, 0, old_index, bi)
1301    {
1302      unsigned int new_index = canon_candidate (old_index);
1303      if (old_index != new_index)
1304	{
1305	  if (!new_set)
1306	    {
1307	      new_set = alloc_bitmap ();
1308	      bitmap_copy (new_set, old_set);
1309	    }
1310	  bitmap_clear_bit (new_set, old_index);
1311	  bitmap_set_bit (new_set, new_index);
1312	}
1313    }
1314  if (new_set)
1315    {
1316      BITMAP_FREE (*ptr);
1317      *ptr = new_set;
1318    }
1319}
1320
1321/* If the candidates in REACHING all have the same value, return the
1322   earliest instance of that value (i.e. the first one to be added
1323   to m_value_table), otherwise return MULTIPLE_CANDIDATES.  */
1324
1325unsigned int
1326early_remat::resolve_reaching_def (bitmap reaching)
1327{
1328  unsigned int cand_index = bitmap_first_set_bit (reaching);
1329  if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1330    {
1331      if (!bitmap_intersect_compl_p (reaching, ec->members))
1332	return ec->earliest;
1333    }
1334  else if (bitmap_single_bit_set_p (reaching))
1335    return cand_index;
1336
1337  return MULTIPLE_CANDIDATES;
1338}
1339
1340/* Check whether all candidate registers used by candidate CAND_INDEX have
1341   unique definitions.  Return true if so, replacing the candidate's uses
1342   set with the appropriate form for value numbering.  */
1343
1344bool
1345early_remat::check_candidate_uses (unsigned int cand_index)
1346{
1347  remat_candidate *cand = &m_candidates[cand_index];
1348
1349  /* Process the uses for each register in turn.  */
1350  bitmap_head uses;
1351  bitmap_initialize (&uses, &m_obstack);
1352  bitmap_copy (&uses, cand->uses);
1353  bitmap uses_ec = alloc_bitmap ();
1354  while (!bitmap_empty_p (&uses))
1355    {
1356      /* Get the register for the lowest-indexed candidate remaining,
1357	 and the reaching definitions of that register.  */
1358      unsigned int first = bitmap_first_set_bit (&uses);
1359      unsigned int regno = m_candidates[first].regno;
1360      bitmap_and (&m_tmp_bitmap, &uses, m_regno_to_candidates[regno]);
1361
1362      /* See whether all reaching definitions have the same value and if
1363	 so get the index of the first candidate we saw with that value.  */
1364      unsigned int def = resolve_reaching_def (&m_tmp_bitmap);
1365      if (def == MULTIPLE_CANDIDATES)
1366	{
1367	  if (dump_file)
1368	    fprintf (dump_file, ";; Removing candidate %d because there is"
1369		     " more than one reaching definition of reg %d\n",
1370		     cand_index, regno);
1371	  cand->can_copy_p = false;
1372	  break;
1373	}
1374      bitmap_set_bit (uses_ec, def);
1375      bitmap_and_compl_into (&uses, &m_tmp_bitmap);
1376    }
1377  BITMAP_FREE (cand->uses);
1378  cand->uses = uses_ec;
1379  return cand->can_copy_p;
1380}
1381
1382/* Calculate the set of hard registers that would be clobbered by
1383   rematerializing candidate CAND_INDEX.  At this point the candidate's
1384   set of uses is final.  */
1385
1386void
1387early_remat::compute_clobbers (unsigned int cand_index)
1388{
1389  remat_candidate *cand = &m_candidates[cand_index];
1390  if (cand->uses)
1391    {
1392      unsigned int use_index;
1393      bitmap_iterator bi;
1394      EXECUTE_IF_SET_IN_BITMAP (cand->uses, 0, use_index, bi)
1395	if (bitmap clobbers = m_candidates[use_index].clobbers)
1396	  bitmap_ior_into (get_bitmap (&cand->clobbers), clobbers);
1397    }
1398
1399  df_ref ref;
1400  FOR_EACH_INSN_DEF (ref, cand->insn)
1401    {
1402      unsigned int def_regno = DF_REF_REGNO (ref);
1403      if (def_regno != cand->regno)
1404	bitmap_set_bit (get_bitmap (&cand->clobbers), def_regno);
1405    }
1406}
1407
1408/* Mark candidate CAND_INDEX as validated and add it to the value table.  */
1409
1410void
1411early_remat::assign_value_number (unsigned int cand_index)
1412{
1413  remat_candidate *cand = &m_candidates[cand_index];
1414  gcc_checking_assert (cand->can_copy_p && !cand->validated_p);
1415
1416  compute_clobbers (cand_index);
1417  cand->validated_p = true;
1418
1419  inchash::hash h;
1420  h.add_int (cand->regno);
1421  inchash::add_rtx (cand->remat_rtx, h);
1422  cand->hash = h.end ();
1423
1424  remat_candidate **slot
1425    = m_value_table.find_slot_with_hash (cand, cand->hash, INSERT);
1426  if (!*slot)
1427    {
1428      *slot = cand;
1429      if (dump_file)
1430	fprintf (dump_file, ";; Candidate %d is not equivalent to"
1431		 " others seen so far\n", cand_index);
1432    }
1433  else
1434    record_equiv_candidates (*slot - m_candidates.address (), cand_index);
1435}
1436
1437/* Make a final decision about which candidates are valid and assign
1438   value numbers to those that are.  */
1439
1440void
1441early_remat::decide_candidate_validity (void)
1442{
1443  auto_vec<unsigned int, 16> stack;
1444  unsigned int cand1_index;
1445  remat_candidate *cand1;
1446  FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1447    {
1448      if (!cand1->can_copy_p || cand1->validated_p)
1449	continue;
1450
1451      if (empty_p (cand1->uses))
1452	{
1453	  assign_value_number (cand1_index);
1454	  continue;
1455	}
1456
1457      stack.safe_push (cand1_index);
1458      while (!stack.is_empty ())
1459	{
1460	  unsigned int cand2_index = stack.last ();
1461	  unsigned int watermark = stack.length ();
1462	  remat_candidate *cand2 = &m_candidates[cand2_index];
1463	  if (!cand2->can_copy_p || cand2->validated_p)
1464	    {
1465	      stack.pop ();
1466	      continue;
1467	    }
1468	  cand2->visited_p = true;
1469	  unsigned int cand3_index;
1470	  bitmap_iterator bi;
1471	  EXECUTE_IF_SET_IN_BITMAP (cand2->uses, 0, cand3_index, bi)
1472	    {
1473	      remat_candidate *cand3 = &m_candidates[cand3_index];
1474	      if (!cand3->can_copy_p)
1475		{
1476		  if (dump_file)
1477		    fprintf (dump_file, ";; Removing candidate %d because"
1478			     " it uses removed candidate %d\n", cand2_index,
1479			     cand3_index);
1480		  cand2->can_copy_p = false;
1481		  break;
1482		}
1483	      if (!cand3->validated_p)
1484		{
1485		  if (empty_p (cand3->uses))
1486		    assign_value_number (cand3_index);
1487		  else if (cand3->visited_p)
1488		    {
1489		      if (dump_file)
1490			fprintf (dump_file, ";; Removing candidate %d"
1491				 " because its definition is cyclic\n",
1492				 cand2_index);
1493		      cand2->can_copy_p = false;
1494		      break;
1495		    }
1496		  else
1497		    stack.safe_push (cand3_index);
1498		}
1499	    }
1500	  if (!cand2->can_copy_p)
1501	    {
1502	      cand2->visited_p = false;
1503	      stack.truncate (watermark - 1);
1504	    }
1505	  else if (watermark == stack.length ())
1506	    {
1507	      cand2->visited_p = false;
1508	      if (check_candidate_uses (cand2_index))
1509		assign_value_number (cand2_index);
1510	      stack.pop ();
1511	    }
1512	}
1513    }
1514
1515  /* Ensure that the candidates always use the same candidate index
1516     to refer to an equivalence class.  */
1517  FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1518    if (cand1->can_copy_p && !empty_p (cand1->uses))
1519      {
1520	canon_bitmap (&cand1->uses);
1521	gcc_checking_assert (bitmap_first_set_bit (cand1->uses) > cand1_index);
1522      }
1523}
1524
1525/* Remove any candidates in CANDIDATES that would clobber a register in
1526   UNAVAIL_REGS.  */
1527
1528void
1529early_remat::restrict_remat_for_unavail_regs (bitmap candidates,
1530					      const_bitmap unavail_regs)
1531{
1532  bitmap_clear (&m_tmp_bitmap);
1533  unsigned int cand_index;
1534  bitmap_iterator bi;
1535  EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
1536    {
1537      remat_candidate *cand = &m_candidates[cand_index];
1538      if (cand->clobbers
1539	  && bitmap_intersect_p (cand->clobbers, unavail_regs))
1540	bitmap_set_bit (&m_tmp_bitmap, cand_index);
1541    }
1542  bitmap_and_compl_into (candidates, &m_tmp_bitmap);
1543}
1544
1545/* Remove any candidates in CANDIDATES that would clobber a register
1546   that is potentially live across CALL.  */
1547
1548void
1549early_remat::restrict_remat_for_call (bitmap candidates, rtx_insn *call)
1550{
1551  /* We don't know whether partially-clobbered registers are live
1552     across the call or not, so assume that they are.  */
1553  bitmap_view<HARD_REG_SET> call_preserved_regs
1554    (~insn_callee_abi (call).full_reg_clobbers ());
1555  restrict_remat_for_unavail_regs (candidates, call_preserved_regs);
1556}
1557
1558/* Assuming that every path reaching a point P contains a copy of a
1559   use U of REGNO, return true if another copy of U at P would have
1560   access to the same value of REGNO.  */
1561
1562bool
1563early_remat::stable_use_p (unsigned int regno)
1564{
1565  /* Conservatively assume not for hard registers.  */
1566  if (HARD_REGISTER_NUM_P (regno))
1567    return false;
1568
1569  /* See if REGNO has a single definition and is never used uninitialized.
1570     In this case the definition of REGNO dominates the common dominator
1571     of the uses U, which in turn dominates P.  */
1572  if (DF_REG_DEF_COUNT (regno) == 1
1573      && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
1574    return true;
1575
1576  return false;
1577}
1578
1579/* Emit a copy from register DEST to register SRC before candidate
1580   CAND_INDEX's instruction.  */
1581
1582void
1583early_remat::emit_copy_before (unsigned int cand_index, rtx dest, rtx src)
1584{
1585  remat_candidate *cand = &m_candidates[cand_index];
1586  if (dump_file)
1587    {
1588      fprintf (dump_file, ";; Stabilizing insn ");
1589      dump_insn_id (cand->insn);
1590      fprintf (dump_file, " by copying source reg %d:%s to temporary reg %d\n",
1591	       REGNO (src), GET_MODE_NAME (GET_MODE (src)), REGNO (dest));
1592    }
1593  emit_insn_before (gen_move_insn (dest, src), cand->insn);
1594}
1595
1596/* Check whether any inputs to candidate CAND_INDEX's instruction could
1597   change at rematerialization points and replace them with new pseudo
1598   registers if so.  */
1599
1600void
1601early_remat::stabilize_pattern (unsigned int cand_index)
1602{
1603  remat_candidate *cand = &m_candidates[cand_index];
1604  if (cand->stabilized_p)
1605    return;
1606
1607  remat_equiv_class *ec = cand->equiv_class;
1608  gcc_checking_assert (!ec || cand_index == ec->representative);
1609
1610  /* Record the replacements we've made so far, so that we don't
1611     create two separate registers for match_dups.  Lookup is O(n),
1612     but the n is very small.  */
1613  typedef std::pair<rtx, rtx> reg_pair;
1614  auto_vec<reg_pair, 16> reg_map;
1615
1616  rtx_insn *insn = cand->insn;
1617  df_ref ref;
1618  FOR_EACH_INSN_USE (ref, insn)
1619    {
1620      unsigned int old_regno = DF_REF_REGNO (ref);
1621      rtx *loc = DF_REF_REAL_LOC (ref);
1622
1623      if (HARD_REGISTER_NUM_P (old_regno) && fixed_regs[old_regno])
1624	{
1625	  /* We checked when adding the candidate that the value is stable.  */
1626	  gcc_checking_assert (!rtx_unstable_p (*loc));
1627	  continue;
1628	}
1629
1630      if (bitmap_bit_p (&m_candidate_regnos, old_regno))
1631	/* We already know which candidate provides the definition
1632	   and will handle it during copying.  */
1633	continue;
1634
1635      if (stable_use_p (old_regno))
1636	/* We can continue to use the existing register.  */
1637	continue;
1638
1639      /* We need to replace the register.  See whether we've already
1640	 created a suitable copy.  */
1641      rtx old_reg = *loc;
1642      rtx new_reg = NULL_RTX;
1643      machine_mode mode = GET_MODE (old_reg);
1644      reg_pair *p;
1645      unsigned int pi;
1646      FOR_EACH_VEC_ELT (reg_map, pi, p)
1647	if (REGNO (p->first) == old_regno
1648	    && GET_MODE (p->first) == mode)
1649	  {
1650	    new_reg = p->second;
1651	    break;
1652	  }
1653
1654      if (!new_reg)
1655	{
1656	  /* Create a new register and initialize it just before
1657	     the instruction.  */
1658	  new_reg = gen_reg_rtx (mode);
1659	  reg_map.safe_push (reg_pair (old_reg, new_reg));
1660	  if (ec)
1661	    {
1662	      unsigned int member_index;
1663	      bitmap_iterator bi;
1664	      EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1665		emit_copy_before (member_index, new_reg, old_reg);
1666	    }
1667	  else
1668	    emit_copy_before (cand_index, new_reg, old_reg);
1669	}
1670      validate_change (insn, loc, new_reg, true);
1671    }
1672  if (num_changes_pending ())
1673    {
1674      if (!apply_change_group ())
1675	/* We checked when adding the candidates that the pattern allows
1676	   hard registers to be replaced.  Nothing else should make the
1677	   changes invalid.  */
1678	gcc_unreachable ();
1679
1680      if (ec)
1681	{
1682	  /* Copy the new pattern to other members of the equivalence
1683	     class.  */
1684	  unsigned int member_index;
1685	  bitmap_iterator bi;
1686	  EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1687	    if (cand_index != member_index)
1688	      {
1689		rtx_insn *other_insn = m_candidates[member_index].insn;
1690		if (!validate_change (other_insn, &PATTERN (other_insn),
1691				      copy_insn (PATTERN (insn)), 0))
1692		  /* If the original instruction was valid then the copy
1693		     should be too.  */
1694		  gcc_unreachable ();
1695	      }
1696	}
1697    }
1698
1699  cand->stabilized_p = true;
1700}
1701
1702/* Change CAND's instruction so that it sets CAND->copy_regno instead
1703   of CAND->regno.  */
1704
1705void
1706early_remat::replace_dest_with_copy (unsigned int cand_index)
1707{
1708  remat_candidate *cand = &m_candidates[cand_index];
1709  df_ref def;
1710  FOR_EACH_INSN_DEF (def, cand->insn)
1711    if (DF_REF_REGNO (def) == cand->regno)
1712      validate_change (cand->insn, DF_REF_REAL_LOC (def),
1713		       regno_reg_rtx[cand->copy_regno], 1);
1714}
1715
1716/* Make sure that the candidates used by candidate CAND_INDEX are available.
1717   There are two ways of doing this for an input candidate I:
1718
1719   (1) Using the existing register number and ensuring that I is available.
1720
1721   (2) Using a new register number (recorded in copy_regno) and adding I
1722       to VIA_COPY.  This guarantees that making I available does not
1723       conflict with other uses of the original register.
1724
1725   REQUIRED is the set of candidates that are required but not available
1726   before the copy of CAND_INDEX.  AVAILABLE is the set of candidates
1727   that are already available before the copy of CAND_INDEX.  REACHING
1728   is the set of candidates that reach the copy of CAND_INDEX.  VIA_COPY
1729   is the set of candidates that will use new register numbers recorded
1730   in copy_regno instead of the original ones.  */
1731
1732void
1733early_remat::stabilize_candidate_uses (unsigned int cand_index,
1734				       bitmap required, bitmap available,
1735				       bitmap reaching, bitmap via_copy)
1736{
1737  remat_candidate *cand = &m_candidates[cand_index];
1738  df_ref use;
1739  FOR_EACH_INSN_USE (use, cand->insn)
1740    {
1741      unsigned int regno = DF_REF_REGNO (use);
1742      if (!bitmap_bit_p (&m_candidate_regnos, regno))
1743	continue;
1744
1745      /* Work out which candidate provides the definition.  */
1746      bitmap defs = m_regno_to_candidates[regno];
1747      bitmap_and (&m_tmp_bitmap, cand->uses, defs);
1748      gcc_checking_assert (bitmap_single_bit_set_p (&m_tmp_bitmap));
1749      unsigned int def_index = bitmap_first_set_bit (&m_tmp_bitmap);
1750
1751      /* First see if DEF_INDEX is the only reaching definition of REGNO
1752	 at this point too and if it is or will become available.  We can
1753	 continue to use REGNO if so.  */
1754      bitmap_and (&m_tmp_bitmap, reaching, defs);
1755      if (bitmap_single_bit_set_p (&m_tmp_bitmap)
1756	  && bitmap_first_set_bit (&m_tmp_bitmap) == def_index
1757	  && ((available && bitmap_bit_p (available, def_index))
1758	      || bitmap_bit_p (required, def_index)))
1759	{
1760	  if (dump_file)
1761	    fprintf (dump_file, ";; Keeping reg %d for use of candidate %d"
1762		     " in candidate %d\n", regno, def_index, cand_index);
1763	  continue;
1764	}
1765
1766      /* Otherwise fall back to using a copy.  There are other cases
1767	 in which we *could* continue to use REGNO, but there's not
1768	 really much point.  Using a separate register ought to make
1769	 things easier for the register allocator.  */
1770      remat_candidate *def_cand = &m_candidates[def_index];
1771      rtx *loc = DF_REF_REAL_LOC (use);
1772      rtx new_reg;
1773      if (bitmap_set_bit (via_copy, def_index))
1774	{
1775	  new_reg = gen_reg_rtx (GET_MODE (*loc));
1776	  def_cand->copy_regno = REGNO (new_reg);
1777	  if (dump_file)
1778	    fprintf (dump_file, ";; Creating reg %d for use of candidate %d"
1779		     " in candidate %d\n", REGNO (new_reg), def_index,
1780		     cand_index);
1781	}
1782      else
1783	new_reg = regno_reg_rtx[def_cand->copy_regno];
1784      validate_change (cand->insn, loc, new_reg, 1);
1785    }
1786}
1787
1788/* Rematerialize the candidates in REQUIRED after instruction INSN,
1789   given that the candidates in AVAILABLE are already available
1790   and that REACHING is the set of candidates live after INSN.
1791   REQUIRED and AVAILABLE are disjoint on entry.
1792
1793   Clear REQUIRED on exit.  */
1794
1795void
1796early_remat::emit_remat_insns (bitmap required, bitmap available,
1797			       bitmap reaching, rtx_insn *insn)
1798{
1799  /* Quick exit if there's nothing to do.  */
1800  if (empty_p (required))
1801    return;
1802
1803  /* Only reaching definitions should be available or required.  */
1804  gcc_checking_assert (!bitmap_intersect_compl_p (required, reaching));
1805  if (available)
1806    gcc_checking_assert (!bitmap_intersect_compl_p (available, reaching));
1807
1808  bitmap_head via_copy;
1809  bitmap_initialize (&via_copy, &m_obstack);
1810  while (!bitmap_empty_p (required) || !bitmap_empty_p (&via_copy))
1811    {
1812      /* Pick the lowest-indexed candidate left.  */
1813      unsigned int required_index = (bitmap_empty_p (required)
1814				     ? ~0U : bitmap_first_set_bit (required));
1815      unsigned int via_copy_index = (bitmap_empty_p (&via_copy)
1816				     ? ~0U : bitmap_first_set_bit (&via_copy));
1817      unsigned int cand_index = MIN (required_index, via_copy_index);
1818      remat_candidate *cand = &m_candidates[cand_index];
1819
1820      bool via_copy_p = (cand_index == via_copy_index);
1821      if (via_copy_p)
1822	bitmap_clear_bit (&via_copy, cand_index);
1823      else
1824	{
1825	  /* Remove all candidates for the same register from REQUIRED.  */
1826	  bitmap_and (&m_tmp_bitmap, reaching,
1827		      m_regno_to_candidates[cand->regno]);
1828	  bitmap_and_compl_into (required, &m_tmp_bitmap);
1829	  gcc_checking_assert (!bitmap_bit_p (required, cand_index));
1830
1831	  /* Only rematerialize if we have a single reaching definition
1832	     of the register.  */
1833	  if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
1834	    {
1835	      if (dump_file)
1836		{
1837		  fprintf (dump_file, ";; Can't rematerialize reg %d after ",
1838			   cand->regno);
1839		  dump_insn_id (insn);
1840		  fprintf (dump_file, ": more than one reaching definition\n");
1841		}
1842	      continue;
1843	    }
1844
1845	  /* Skip candidates that can't be rematerialized.  */
1846	  if (!cand->can_copy_p)
1847	    continue;
1848
1849	  /* Check the function precondition.  */
1850	  gcc_checking_assert (!available
1851			       || !bitmap_bit_p (available, cand_index));
1852	}
1853
1854      /* Invalid candidates should have been weeded out by now.  */
1855      gcc_assert (cand->can_copy_p);
1856
1857      rtx new_pattern;
1858      if (cand->constant_p)
1859	{
1860	  /* Emit a simple move.  */
1861	  unsigned int regno = via_copy_p ? cand->copy_regno : cand->regno;
1862	  new_pattern = gen_move_insn (regno_reg_rtx[regno], cand->remat_rtx);
1863	}
1864      else
1865	{
1866	  /* If this is the first time we've copied the instruction, make
1867	     sure that any inputs will have the same value after INSN.  */
1868	  stabilize_pattern (cand_index);
1869
1870	  /* Temporarily adjust the original instruction so that it has
1871	     the right form for the copy.  */
1872	  if (via_copy_p)
1873	    replace_dest_with_copy (cand_index);
1874	  if (cand->uses)
1875	    stabilize_candidate_uses (cand_index, required, available,
1876				      reaching, &via_copy);
1877
1878	  /* Get the new instruction pattern.  */
1879	  new_pattern = copy_insn (cand->remat_rtx);
1880
1881	  /* Undo the temporary changes.  */
1882	  cancel_changes (0);
1883	}
1884
1885      /* Emit the new instruction.  */
1886      rtx_insn *new_insn = emit_insn_after (new_pattern, insn);
1887
1888      if (dump_file)
1889	{
1890	  fprintf (dump_file, ";; Rematerializing candidate %d after ",
1891		   cand_index);
1892	  dump_insn_id (insn);
1893	  if (via_copy_p)
1894	    fprintf (dump_file, " with new destination reg %d",
1895		     cand->copy_regno);
1896	  fprintf (dump_file, ":\n\n");
1897	  print_rtl_single (dump_file, new_insn);
1898	  fprintf (dump_file, "\n");
1899	}
1900    }
1901}
1902
1903/* Recompute INFO's available_out set, given that it's distinct from
1904   available_in and available_locally.  */
1905
1906bool
1907early_remat::set_available_out (remat_block_info *info)
1908{
1909  if (empty_p (info->available_locally))
1910    return bitmap_and_compl (get_bitmap (&info->available_out),
1911			     info->available_in, info->rd_kill);
1912
1913  if (empty_p (info->rd_kill))
1914    return bitmap_ior (get_bitmap (&info->available_out),
1915		       info->available_locally, info->available_in);
1916
1917  return bitmap_ior_and_compl (get_bitmap (&info->available_out),
1918			       info->available_locally, info->available_in,
1919			       info->rd_kill);
1920}
1921
1922/* If BB has more than one call, decide which candidates should be
1923   rematerialized after the non-final calls and emit the associated
1924   instructions.  Record other information about the block in preparation
1925   for the global phase.  */
1926
1927void
1928early_remat::process_block (basic_block bb)
1929{
1930  remat_block_info *info = &m_block_info[bb->index];
1931  rtx_insn *last_call = NULL;
1932  rtx_insn *insn;
1933
1934  /* Ensure that we always use the same candidate index to refer to an
1935     equivalence class.  */
1936  if (info->rd_out == info->rd_in)
1937    {
1938      canon_bitmap (&info->rd_in);
1939      info->rd_out = info->rd_in;
1940    }
1941  else
1942    {
1943      canon_bitmap (&info->rd_in);
1944      canon_bitmap (&info->rd_out);
1945    }
1946  canon_bitmap (&info->rd_kill);
1947  canon_bitmap (&info->rd_gen);
1948
1949  /* The set of candidates that should be rematerialized on entry to the
1950     block or after the previous call (whichever is more recent).  */
1951  init_temp_bitmap (&m_required);
1952
1953  /* The set of candidates that reach the current instruction (i.e. are
1954     live just before the instruction).  */
1955  bitmap_head reaching;
1956  bitmap_initialize (&reaching, &m_obstack);
1957  if (info->rd_in)
1958    bitmap_copy (&reaching, info->rd_in);
1959
1960  /* The set of candidates that are live and available without
1961     rematerialization just before the current instruction.  This only
1962     accounts for earlier candidates in the block, or those that become
1963     available by being added to M_REQUIRED.  */
1964  init_temp_bitmap (&m_available);
1965
1966  /* Get the range of candidates in the block.  */
1967  unsigned int next_candidate = info->first_candidate;
1968  unsigned int num_candidates = info->num_candidates;
1969  remat_candidate *next_def = (num_candidates > 0
1970			       ? &m_candidates[next_candidate]
1971			       : NULL);
1972
1973  FOR_BB_INSNS (bb, insn)
1974    {
1975      if (!NONDEBUG_INSN_P (insn))
1976	continue;
1977
1978      /* First process uses, since this is a forward walk.  */
1979      df_ref ref;
1980      FOR_EACH_INSN_USE (ref, insn)
1981	{
1982	  unsigned int regno = DF_REF_REGNO (ref);
1983	  if (bitmap_bit_p (&m_candidate_regnos, regno))
1984	    {
1985	      bitmap defs = m_regno_to_candidates[regno];
1986	      bitmap_and (&m_tmp_bitmap, defs, &reaching);
1987	      gcc_checking_assert (!bitmap_empty_p (&m_tmp_bitmap));
1988	      if (!bitmap_intersect_p (defs, m_available))
1989		{
1990		  /* There has been no definition of the register since
1991		     the last call or the start of the block (whichever
1992		     is most recent).  Mark the reaching definitions
1993		     as required at that point and thus available here.  */
1994		  bitmap_ior_into (m_required, &m_tmp_bitmap);
1995		  bitmap_ior_into (m_available, &m_tmp_bitmap);
1996		}
1997	    }
1998	}
1999
2000      if (CALL_P (insn))
2001	{
2002	  if (!last_call)
2003	    {
2004	      /* The first call in the block.  Record which candidates are
2005		 required at the start of the block.  */
2006	      copy_temp_bitmap (&info->required_in, &m_required);
2007	      init_temp_bitmap (&m_required);
2008	    }
2009	  else
2010	    {
2011	      /* The fully-local case: candidates that need to be
2012		 rematerialized after a previous call in the block.  */
2013	      restrict_remat_for_call (m_required, last_call);
2014	      emit_remat_insns (m_required, NULL, info->rd_after_call,
2015				last_call);
2016	    }
2017	  last_call = insn;
2018	  bitmap_clear (m_available);
2019	  gcc_checking_assert (empty_p (m_required));
2020	}
2021
2022      /* Now process definitions.  */
2023      while (next_def && insn == next_def->insn)
2024	{
2025	  unsigned int gen = canon_candidate (next_candidate);
2026
2027	  /* Other candidates with the same regno are not available
2028	     any more.  */
2029	  bitmap kill = m_regno_to_candidates[next_def->regno];
2030	  bitmap_and_compl_into (m_available, kill);
2031	  bitmap_and_compl_into (&reaching, kill);
2032
2033	  /* Record that this candidate is available without
2034	     rematerialization.  */
2035	  bitmap_set_bit (m_available, gen);
2036	  bitmap_set_bit (&reaching, gen);
2037
2038	  /* Find the next candidate in the block.  */
2039	  num_candidates -= 1;
2040	  next_candidate -= 1;
2041	  if (num_candidates > 0)
2042	    next_def -= 1;
2043	  else
2044	    next_def = NULL;
2045	}
2046
2047      if (insn == last_call)
2048	bitmap_copy (get_bitmap (&info->rd_after_call), &reaching);
2049    }
2050  bitmap_clear (&reaching);
2051  gcc_checking_assert (num_candidates == 0);
2052
2053  /* Remove values from the available set if they aren't live (and so
2054     aren't interesting to successor blocks).  */
2055  if (info->rd_out)
2056    bitmap_and_into (m_available, info->rd_out);
2057
2058  /* Record the accumulated information.  */
2059  info->last_call = last_call;
2060  info->abnormal_call_p = (last_call
2061			   && last_call == BB_END (bb)
2062			   && has_abnormal_or_eh_outgoing_edge_p (bb));
2063  copy_temp_bitmap (&info->available_locally, &m_available);
2064  if (last_call)
2065    copy_temp_bitmap (&info->required_after_call, &m_required);
2066  else
2067    copy_temp_bitmap (&info->required_in, &m_required);
2068
2069  /* Assume at first that all live-in values are available without
2070     rematerialization (i.e. start with the most optimistic assumption).  */
2071  if (info->available_in)
2072    {
2073      if (info->rd_in)
2074	bitmap_copy (info->available_in, info->rd_in);
2075      else
2076	BITMAP_FREE (info->available_in);
2077    }
2078
2079  if (last_call || empty_p (info->available_in))
2080    /* The values available on exit from the block are exactly those that
2081       are available locally.  This set doesn't change.  */
2082    info->available_out = info->available_locally;
2083  else if (empty_p (info->available_locally) && empty_p (info->rd_kill))
2084    /* The values available on exit are the same as those available on entry.
2085       Updating one updates the other.  */
2086    info->available_out = info->available_in;
2087  else
2088    set_available_out (info);
2089}
2090
2091/* Process each block as for process_block, visiting dominators before
2092   the blocks they dominate.  */
2093
2094void
2095early_remat::local_phase (void)
2096{
2097  if (dump_file)
2098    fprintf (dump_file, "\n;; Local phase:\n");
2099
2100  int *postorder = df_get_postorder (DF_BACKWARD);
2101  unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
2102  for (unsigned int i = postorder_len; i-- > 0; )
2103    if (postorder[i] >= NUM_FIXED_BLOCKS)
2104      process_block (BASIC_BLOCK_FOR_FN (m_fn, postorder[i]));
2105}
2106
2107/* Return true if available values survive across edge E.  */
2108
2109static inline bool
2110available_across_edge_p (edge e)
2111{
2112  return (e->flags & EDGE_EH) == 0;
2113}
2114
2115/* Propagate information from the available_out set of E->src to the
2116   available_in set of E->dest, when computing global availability.
2117   Return true if something changed.  */
2118
2119bool
2120early_remat::avail_confluence_n (edge e)
2121{
2122  remat_block_info *src = &er->m_block_info[e->src->index];
2123  remat_block_info *dest = &er->m_block_info[e->dest->index];
2124
2125  if (!available_across_edge_p (e))
2126    return false;
2127
2128  if (empty_p (dest->available_in))
2129    return false;
2130
2131  if (!src->available_out)
2132    {
2133      bitmap_clear (dest->available_in);
2134      return true;
2135    }
2136
2137  return bitmap_and_into (dest->available_in, src->available_out);
2138}
2139
2140/* Propagate information from the available_in set of block BB_INDEX
2141   to available_out.  Return true if something changed.  */
2142
2143bool
2144early_remat::avail_transfer (int bb_index)
2145{
2146  remat_block_info *info = &er->m_block_info[bb_index];
2147
2148  if (info->available_out == info->available_locally)
2149    return false;
2150
2151  if (info->available_out == info->available_in)
2152    /* Assume that we are only called if the input changed.  */
2153    return true;
2154
2155  return er->set_available_out (info);
2156}
2157
2158/* Compute global availability for the function, starting with the local
2159   information computed by local_phase.  */
2160
2161void
2162early_remat::compute_availability (void)
2163{
2164  /* We use df_simple_dataflow instead of the lcm routines for three reasons:
2165
2166     (1) it avoids recomputing the traversal order;
2167     (2) many of the sets are likely to be sparse, so we don't necessarily
2168	 want to use sbitmaps; and
2169     (3) it means we can avoid creating an explicit kill set for the call.  */
2170  er = this;
2171  bitmap_clear (&m_tmp_bitmap);
2172  bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
2173  df_simple_dataflow (DF_FORWARD, NULL, NULL,
2174		      avail_confluence_n, avail_transfer,
2175		      &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
2176		      df_get_n_blocks (DF_FORWARD));
2177  er = 0;
2178
2179  /* Restrict the required_in sets to values that aren't available.  */
2180  basic_block bb;
2181  FOR_EACH_BB_FN (bb, m_fn)
2182    {
2183      remat_block_info *info = &m_block_info[bb->index];
2184      if (info->required_in && info->available_in)
2185	bitmap_and_compl_into (info->required_in, info->available_in);
2186    }
2187}
2188
2189/* Make sure that INFO's available_out and available_in sets are unique.  */
2190
2191inline void
2192early_remat::unshare_available_sets (remat_block_info *info)
2193{
2194  if (info->available_in && info->available_in == info->available_out)
2195    {
2196      info->available_in = alloc_bitmap ();
2197      bitmap_copy (info->available_in, info->available_out);
2198    }
2199}
2200
2201/* Return true if it is possible to move rematerializations from the
2202   destination of E to the source of E.  */
2203
2204inline bool
2205early_remat::can_move_across_edge_p (edge e)
2206{
2207  return (available_across_edge_p (e)
2208	  && !m_block_info[e->src->index].abnormal_call_p);
2209}
2210
2211/* Return true if it is cheaper to rematerialize values at the head of
2212   block QUERY_BB_INDEX instead of rematerializing in its predecessors.  */
2213
2214bool
2215early_remat::local_remat_cheaper_p (unsigned int query_bb_index)
2216{
2217  if (m_block_info[query_bb_index].remat_frequency_valid_p)
2218    return m_block_info[query_bb_index].local_remat_cheaper_p;
2219
2220  /* Iteratively compute the cost of rematerializing values in the
2221     predecessor blocks, then compare that with the cost of
2222     rematerializing at the head of the block.
2223
2224     A cycle indicates that there is no call on that execution path,
2225     so it isn't necessary to rematerialize on that path.  */
2226  auto_vec<basic_block, 16> stack;
2227  stack.quick_push (BASIC_BLOCK_FOR_FN (m_fn, query_bb_index));
2228  while (!stack.is_empty ())
2229    {
2230      basic_block bb = stack.last ();
2231      remat_block_info *info = &m_block_info[bb->index];
2232      if (info->remat_frequency_valid_p)
2233	{
2234	  stack.pop ();
2235	  continue;
2236	}
2237
2238      info->visited_p = true;
2239      int frequency = 0;
2240      bool can_move_p = true;
2241      edge e;
2242      edge_iterator ei;
2243      FOR_EACH_EDGE (e, ei, bb->preds)
2244	if (!can_move_across_edge_p (e))
2245	  {
2246	    can_move_p = false;
2247	    break;
2248	  }
2249	else if (m_block_info[e->src->index].last_call)
2250	  /* We'll rematerialize after the call.  */
2251	  frequency += e->src->count.to_frequency (m_fn);
2252	else if (m_block_info[e->src->index].remat_frequency_valid_p)
2253	  /* Add the cost of rematerializing at the head of E->src
2254	     or in its predecessors (whichever is cheaper).  */
2255	  frequency += m_block_info[e->src->index].remat_frequency;
2256	else if (!m_block_info[e->src->index].visited_p)
2257	  /* Queue E->src and then revisit this block again.  */
2258	  stack.safe_push (e->src);
2259
2260      /* Come back to this block later if we need to process some of
2261	 its predecessors.  */
2262      if (stack.last () != bb)
2263	continue;
2264
2265      /* If rematerializing in and before the block have equal cost, prefer
2266	 rematerializing in the block.  This should shorten the live range.  */
2267      int bb_frequency = bb->count.to_frequency (m_fn);
2268      if (!can_move_p || frequency >= bb_frequency)
2269	{
2270	  info->local_remat_cheaper_p = true;
2271	  info->remat_frequency = bb_frequency;
2272	}
2273      else
2274	info->remat_frequency = frequency;
2275      info->remat_frequency_valid_p = true;
2276      info->visited_p = false;
2277      if (dump_file)
2278	{
2279	  if (!can_move_p)
2280	    fprintf (dump_file, ";; Need to rematerialize at the head of"
2281		     " block %d; cannot move to predecessors.\n", bb->index);
2282	  else
2283	    {
2284	      fprintf (dump_file, ";; Block %d has frequency %d,"
2285		       " rematerializing in predecessors has frequency %d",
2286		       bb->index, bb_frequency, frequency);
2287	      if (info->local_remat_cheaper_p)
2288		fprintf (dump_file, "; prefer to rematerialize"
2289			 " in the block\n");
2290	      else
2291		fprintf (dump_file, "; prefer to rematerialize"
2292			 " in predecessors\n");
2293	    }
2294	}
2295      stack.pop ();
2296    }
2297  return m_block_info[query_bb_index].local_remat_cheaper_p;
2298}
2299
2300/* Return true if we cannot rematerialize candidate CAND_INDEX at the head of
2301   block BB_INDEX.  */
2302
2303bool
2304early_remat::need_to_move_candidate_p (unsigned int bb_index,
2305				       unsigned int cand_index)
2306{
2307  remat_block_info *info = &m_block_info[bb_index];
2308  remat_candidate *cand = &m_candidates[cand_index];
2309  basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2310
2311  /* If there is more than one reaching definition of REGNO,
2312     we'll need to rematerialize in predecessors instead.  */
2313  bitmap_and (&m_tmp_bitmap, info->rd_in, m_regno_to_candidates[cand->regno]);
2314  if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
2315    {
2316      if (dump_file)
2317	fprintf (dump_file, ";; Cannot rematerialize %d at the"
2318		 " head of block %d because there is more than one"
2319		 " reaching definition of reg %d\n", cand_index,
2320		 bb_index, cand->regno);
2321      return true;
2322    }
2323
2324  /* Likewise if rematerializing CAND here would clobber a live register.  */
2325  if (cand->clobbers
2326      && bitmap_intersect_p (cand->clobbers, DF_LR_IN (bb)))
2327    {
2328      if (dump_file)
2329	fprintf (dump_file, ";; Cannot rematerialize %d at the"
2330		 " head of block %d because it would clobber live"
2331		 " registers\n", cand_index, bb_index);
2332      return true;
2333    }
2334
2335  return false;
2336}
2337
2338/* Set REQUIRED to the minimum set of candidates that must be rematerialized
2339   in predecessors of block BB_INDEX instead of at the start of the block.  */
2340
2341void
2342early_remat::compute_minimum_move_set (unsigned int bb_index,
2343				       bitmap required)
2344{
2345  remat_block_info *info = &m_block_info[bb_index];
2346  bitmap_head remaining;
2347
2348  bitmap_clear (required);
2349  bitmap_initialize (&remaining, &m_obstack);
2350  bitmap_copy (&remaining, info->required_in);
2351  while (!bitmap_empty_p (&remaining))
2352    {
2353      unsigned int cand_index = bitmap_first_set_bit (&remaining);
2354      remat_candidate *cand = &m_candidates[cand_index];
2355      bitmap_clear_bit (&remaining, cand_index);
2356
2357      /* Leave invalid candidates where they are.  */
2358      if (!cand->can_copy_p)
2359	continue;
2360
2361      /* Decide whether to move this candidate.  */
2362      if (!bitmap_bit_p (required, cand_index))
2363	{
2364	  if (!need_to_move_candidate_p (bb_index, cand_index))
2365	    continue;
2366	  bitmap_set_bit (required, cand_index);
2367	}
2368
2369      /* Also move values used by the candidate, so that we don't
2370	 rematerialize them twice.  */
2371      if (cand->uses)
2372	{
2373	  bitmap_ior_and_into (required, cand->uses, info->required_in);
2374	  bitmap_ior_and_into (&remaining, cand->uses, info->required_in);
2375	}
2376    }
2377}
2378
2379/* Make the predecessors of BB_INDEX rematerialize the candidates in
2380   REQUIRED.  Add any blocks whose required_in set changes to
2381   PENDING_BLOCKS.  */
2382
2383void
2384early_remat::move_to_predecessors (unsigned int bb_index, bitmap required,
2385				   bitmap pending_blocks)
2386{
2387  if (empty_p (required))
2388    return;
2389  remat_block_info *dest_info = &m_block_info[bb_index];
2390  basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2391  edge e;
2392  edge_iterator ei;
2393  FOR_EACH_EDGE (e, ei, bb->preds)
2394    {
2395      remat_block_info *src_info = &m_block_info[e->src->index];
2396
2397      /* Restrict the set we add to the reaching definitions.  */
2398      bitmap_and (&m_tmp_bitmap, required, src_info->rd_out);
2399      if (bitmap_empty_p (&m_tmp_bitmap))
2400	continue;
2401
2402      if (!can_move_across_edge_p (e))
2403	{
2404	  /* We can't move the rematerialization and we can't do it at
2405	     the start of the block either.  In this case we just give up
2406	     and rely on spilling to make the values available across E.  */
2407	  if (dump_file)
2408	    {
2409	      fprintf (dump_file, ";; Cannot rematerialize the following"
2410		       " candidates in block %d:", e->src->index);
2411	      dump_candidate_bitmap (required);
2412	      fprintf (dump_file, "\n");
2413	    }
2414	  continue;
2415	}
2416
2417      /* Remove candidates that are already available.  */
2418      if (src_info->available_out)
2419	{
2420	  bitmap_and_compl_into (&m_tmp_bitmap, src_info->available_out);
2421	  if (bitmap_empty_p (&m_tmp_bitmap))
2422	    continue;
2423	}
2424
2425      /* Add the remaining candidates to the appropriate required set.  */
2426      if (dump_file)
2427	{
2428	  fprintf (dump_file, ";; Moving this set from block %d"
2429		   " to block %d:", bb_index, e->src->index);
2430	  dump_candidate_bitmap (&m_tmp_bitmap);
2431	  fprintf (dump_file, "\n");
2432	}
2433      /* If the source block contains a call, we want to rematerialize
2434	 after the call, otherwise we want to rematerialize at the start
2435	 of the block.  */
2436      bitmap src_required = get_bitmap (src_info->last_call
2437					? &src_info->required_after_call
2438					: &src_info->required_in);
2439      if (bitmap_ior_into (src_required, &m_tmp_bitmap))
2440	{
2441	  if (!src_info->last_call)
2442	    bitmap_set_bit (pending_blocks, e->src->index);
2443	  unshare_available_sets (src_info);
2444	  bitmap_ior_into (get_bitmap (&src_info->available_out),
2445			   &m_tmp_bitmap);
2446	}
2447    }
2448
2449  /* The candidates are now available on entry to the block.  */
2450  bitmap_and_compl_into (dest_info->required_in, required);
2451  unshare_available_sets (dest_info);
2452  bitmap_ior_into (get_bitmap (&dest_info->available_in), required);
2453}
2454
2455/* Go through the candidates that are currently marked as being
2456   rematerialized at the beginning of a block.  Decide in each case
2457   whether that's valid and profitable; if it isn't, move the
2458   rematerialization to predecessor blocks instead.  */
2459
2460void
2461early_remat::choose_rematerialization_points (void)
2462{
2463  bitmap_head required;
2464  bitmap_head pending_blocks;
2465
2466  int *postorder = df_get_postorder (DF_BACKWARD);
2467  unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
2468  bitmap_initialize (&required, &m_obstack);
2469  bitmap_initialize (&pending_blocks, &m_obstack);
2470  do
2471    /* Process the blocks in postorder, to reduce the number of iterations
2472       of the outer loop.  */
2473    for (unsigned int i = 0; i < postorder_len; ++i)
2474      {
2475	unsigned int bb_index = postorder[i];
2476	remat_block_info *info = &m_block_info[bb_index];
2477	bitmap_clear_bit (&pending_blocks, bb_index);
2478
2479	if (empty_p (info->required_in))
2480	  continue;
2481
2482	if (info->available_in)
2483	  gcc_checking_assert (!bitmap_intersect_p (info->required_in,
2484						    info->available_in));
2485
2486	if (local_remat_cheaper_p (bb_index))
2487	  {
2488	    /* We'd prefer to rematerialize at the head of the block.
2489	       Only move candidates if we need to.  */
2490	    compute_minimum_move_set (bb_index, &required);
2491	    move_to_predecessors (bb_index, &required, &pending_blocks);
2492	  }
2493	else
2494	  move_to_predecessors (bb_index, info->required_in,
2495				&pending_blocks);
2496      }
2497  while (!bitmap_empty_p (&pending_blocks));
2498  bitmap_clear (&required);
2499}
2500
2501/* Emit all rematerialization instructions queued for BB.  */
2502
2503void
2504early_remat::emit_remat_insns_for_block (basic_block bb)
2505{
2506  remat_block_info *info = &m_block_info[bb->index];
2507
2508  if (info->last_call && !empty_p (info->required_after_call))
2509    {
2510      restrict_remat_for_call (info->required_after_call, info->last_call);
2511      emit_remat_insns (info->required_after_call, NULL,
2512			info->rd_after_call, info->last_call);
2513    }
2514
2515  if (!empty_p (info->required_in))
2516    {
2517      rtx_insn *insn = BB_HEAD (bb);
2518      while (insn != BB_END (bb)
2519	     && !INSN_P (NEXT_INSN (insn)))
2520	insn = NEXT_INSN (insn);
2521      restrict_remat_for_unavail_regs (info->required_in, DF_LR_IN (bb));
2522      emit_remat_insns (info->required_in, info->available_in,
2523			info->rd_in, insn);
2524    }
2525}
2526
2527/* Decide which candidates in each block's REQUIRED_IN set need to be
2528   rematerialized and decide where the rematerialization instructions
2529   should go.  Emit queued rematerialization instructions at the start
2530   of blocks and after the last calls in blocks.  */
2531
2532void
2533early_remat::global_phase (void)
2534{
2535  compute_availability ();
2536  if (dump_file)
2537    {
2538      fprintf (dump_file, "\n;; Blocks after computing global"
2539	       " availability:\n");
2540      dump_all_blocks ();
2541    }
2542
2543  choose_rematerialization_points ();
2544  if (dump_file)
2545    {
2546      fprintf (dump_file, "\n;; Blocks after choosing rematerialization"
2547	       " points:\n");
2548      dump_all_blocks ();
2549    }
2550
2551  basic_block bb;
2552  FOR_EACH_BB_FN (bb, m_fn)
2553    emit_remat_insns_for_block (bb);
2554}
2555
2556/* Main function for the pass.  */
2557
2558void
2559early_remat::run (void)
2560{
2561  df_analyze ();
2562
2563  if (!collect_candidates ())
2564    return;
2565
2566  init_block_info ();
2567  sort_candidates ();
2568  finalize_candidate_indices ();
2569  if (dump_file)
2570    dump_all_candidates ();
2571
2572  compute_rd ();
2573  decide_candidate_validity ();
2574  local_phase ();
2575  global_phase ();
2576}
2577
2578early_remat::early_remat (function *fn, sbitmap selected_modes)
2579  : m_fn (fn),
2580    m_selected_modes (selected_modes),
2581    m_available (0),
2582    m_required (0),
2583    m_value_table (63)
2584{
2585  bitmap_obstack_initialize (&m_obstack);
2586  bitmap_initialize (&m_candidate_regnos, &m_obstack);
2587  bitmap_initialize (&m_tmp_bitmap, &m_obstack);
2588}
2589
2590early_remat::~early_remat ()
2591{
2592  bitmap_obstack_release (&m_obstack);
2593}
2594
2595namespace {
2596
2597const pass_data pass_data_early_remat =
2598{
2599  RTL_PASS, /* type */
2600  "early_remat", /* name */
2601  OPTGROUP_NONE, /* optinfo_flags */
2602  TV_EARLY_REMAT, /* tv_id */
2603  0, /* properties_required */
2604  0, /* properties_provided */
2605  0, /* properties_destroyed */
2606  0, /* todo_flags_start */
2607  TODO_df_finish, /* todo_flags_finish */
2608};
2609
2610class pass_early_remat : public rtl_opt_pass
2611{
2612public:
2613  pass_early_remat (gcc::context *ctxt)
2614    : rtl_opt_pass (pass_data_early_remat, ctxt)
2615  {}
2616
2617  /* opt_pass methods: */
2618  virtual bool gate (function *)
2619  {
2620    return optimize > 1 && NUM_POLY_INT_COEFFS > 1;
2621  }
2622
2623  virtual unsigned int execute (function *f)
2624  {
2625    auto_sbitmap selected_modes (NUM_MACHINE_MODES);
2626    bitmap_clear (selected_modes);
2627    targetm.select_early_remat_modes (selected_modes);
2628    if (!bitmap_empty_p (selected_modes))
2629      early_remat (f, selected_modes).run ();
2630    return 0;
2631  }
2632}; // class pass_early_remat
2633
2634} // anon namespace
2635
2636rtl_opt_pass *
2637make_pass_early_remat (gcc::context *ctxt)
2638{
2639  return new pass_early_remat (ctxt);
2640}
2641