1/* Decompose multiword subregs.
2   Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3   Contributed by Richard Henderson <rth@redhat.com>
4		  Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "machmode.h"
26#include "tm.h"
27#include "rtl.h"
28#include "tm_p.h"
29#include "timevar.h"
30#include "flags.h"
31#include "insn-config.h"
32#include "obstack.h"
33#include "basic-block.h"
34#include "recog.h"
35#include "bitmap.h"
36#include "expr.h"
37#include "except.h"
38#include "regs.h"
39#include "tree-pass.h"
40#include "df.h"
41
42#ifdef STACK_GROWS_DOWNWARD
43# undef STACK_GROWS_DOWNWARD
44# define STACK_GROWS_DOWNWARD 1
45#else
46# define STACK_GROWS_DOWNWARD 0
47#endif
48
49DEF_VEC_P (bitmap);
50DEF_VEC_ALLOC_P (bitmap,heap);
51
52/* Decompose multi-word pseudo-registers into individual
53   pseudo-registers when possible.  This is possible when all the uses
54   of a multi-word register are via SUBREG, or are copies of the
55   register to another location.  Breaking apart the register permits
56   more CSE and permits better register allocation.  */
57
58/* Bit N in this bitmap is set if regno N is used in a context in
59   which we can decompose it.  */
60static bitmap decomposable_context;
61
62/* Bit N in this bitmap is set if regno N is used in a context in
63   which it can not be decomposed.  */
64static bitmap non_decomposable_context;
65
66/* Bit N in the bitmap in element M of this array is set if there is a
67   copy from reg M to reg N.  */
68static VEC(bitmap,heap) *reg_copy_graph;
69
70/* Return whether X is a simple object which we can take a word_mode
71   subreg of.  */
72
73static bool
74simple_move_operand (rtx x)
75{
76  if (GET_CODE (x) == SUBREG)
77    x = SUBREG_REG (x);
78
79  if (!OBJECT_P (x))
80    return false;
81
82  if (GET_CODE (x) == LABEL_REF
83      || GET_CODE (x) == SYMBOL_REF
84      || GET_CODE (x) == HIGH
85      || GET_CODE (x) == CONST)
86    return false;
87
88  if (MEM_P (x)
89      && (MEM_VOLATILE_P (x)
90	  || mode_dependent_address_p (XEXP (x, 0))))
91    return false;
92
93  return true;
94}
95
96/* If INSN is a single set between two objects, return the single set.
97   Such an insn can always be decomposed.  INSN should have been
98   passed to recog and extract_insn before this is called.  */
99
100static rtx
101simple_move (rtx insn)
102{
103  rtx x;
104  rtx set;
105  enum machine_mode mode;
106
107  if (recog_data.n_operands != 2)
108    return NULL_RTX;
109
110  set = single_set (insn);
111  if (!set)
112    return NULL_RTX;
113
114  x = SET_DEST (set);
115  if (x != recog_data.operand[0] && x != recog_data.operand[1])
116    return NULL_RTX;
117  if (!simple_move_operand (x))
118    return NULL_RTX;
119
120  x = SET_SRC (set);
121  if (x != recog_data.operand[0] && x != recog_data.operand[1])
122    return NULL_RTX;
123  /* For the src we can handle ASM_OPERANDS, and it is beneficial for
124     things like x86 rdtsc which returns a DImode value.  */
125  if (GET_CODE (x) != ASM_OPERANDS
126      && !simple_move_operand (x))
127    return NULL_RTX;
128
129  /* We try to decompose in integer modes, to avoid generating
130     inefficient code copying between integer and floating point
131     registers.  That means that we can't decompose if this is a
132     non-integer mode for which there is no integer mode of the same
133     size.  */
134  mode = GET_MODE (SET_SRC (set));
135  if (!SCALAR_INT_MODE_P (mode)
136      && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
137	  == BLKmode))
138    return NULL_RTX;
139
140  /* Reject PARTIAL_INT modes.  They are used for processor specific
141     purposes and it's probably best not to tamper with them.  */
142  if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
143    return NULL_RTX;
144
145  return set;
146}
147
148/* If SET is a copy from one multi-word pseudo-register to another,
149   record that in reg_copy_graph.  Return whether it is such a
150   copy.  */
151
152static bool
153find_pseudo_copy (rtx set)
154{
155  rtx dest = SET_DEST (set);
156  rtx src = SET_SRC (set);
157  unsigned int rd, rs;
158  bitmap b;
159
160  if (!REG_P (dest) || !REG_P (src))
161    return false;
162
163  rd = REGNO (dest);
164  rs = REGNO (src);
165  if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
166    return false;
167
168  if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
169    return false;
170
171  b = VEC_index (bitmap, reg_copy_graph, rs);
172  if (b == NULL)
173    {
174      b = BITMAP_ALLOC (NULL);
175      VEC_replace (bitmap, reg_copy_graph, rs, b);
176    }
177
178  bitmap_set_bit (b, rd);
179
180  return true;
181}
182
183/* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
184   where they are copied to another register, add the register to
185   which they are copied to DECOMPOSABLE_CONTEXT.  Use
186   NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
187   copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
188
189static void
190propagate_pseudo_copies (void)
191{
192  bitmap queue, propagate;
193
194  queue = BITMAP_ALLOC (NULL);
195  propagate = BITMAP_ALLOC (NULL);
196
197  bitmap_copy (queue, decomposable_context);
198  do
199    {
200      bitmap_iterator iter;
201      unsigned int i;
202
203      bitmap_clear (propagate);
204
205      EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
206	{
207	  bitmap b = VEC_index (bitmap, reg_copy_graph, i);
208	  if (b)
209	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
210	}
211
212      bitmap_and_compl (queue, propagate, decomposable_context);
213      bitmap_ior_into (decomposable_context, propagate);
214    }
215  while (!bitmap_empty_p (queue));
216
217  BITMAP_FREE (queue);
218  BITMAP_FREE (propagate);
219}
220
221/* A pointer to one of these values is passed to
222   find_decomposable_subregs via for_each_rtx.  */
223
224enum classify_move_insn
225{
226  /* Not a simple move from one location to another.  */
227  NOT_SIMPLE_MOVE,
228  /* A simple move from one pseudo-register to another.  */
229  SIMPLE_PSEUDO_REG_MOVE,
230  /* A simple move involving a non-pseudo-register.  */
231  SIMPLE_MOVE
232};
233
234/* This is called via for_each_rtx.  If we find a SUBREG which we
235   could use to decompose a pseudo-register, set a bit in
236   DECOMPOSABLE_CONTEXT.  If we find an unadorned register which is
237   not a simple pseudo-register copy, DATA will point at the type of
238   move, and we set a bit in DECOMPOSABLE_CONTEXT or
239   NON_DECOMPOSABLE_CONTEXT as appropriate.  */
240
241static int
242find_decomposable_subregs (rtx *px, void *data)
243{
244  enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
245  rtx x = *px;
246
247  if (x == NULL_RTX)
248    return 0;
249
250  if (GET_CODE (x) == SUBREG)
251    {
252      rtx inner = SUBREG_REG (x);
253      unsigned int regno, outer_size, inner_size, outer_words, inner_words;
254
255      if (!REG_P (inner))
256	return 0;
257
258      regno = REGNO (inner);
259      if (HARD_REGISTER_NUM_P (regno))
260	return -1;
261
262      outer_size = GET_MODE_SIZE (GET_MODE (x));
263      inner_size = GET_MODE_SIZE (GET_MODE (inner));
264      outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
265      inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
266
267      /* We only try to decompose single word subregs of multi-word
268	 registers.  When we find one, we return -1 to avoid iterating
269	 over the inner register.
270
271	 ??? This doesn't allow, e.g., DImode subregs of TImode values
272	 on 32-bit targets.  We would need to record the way the
273	 pseudo-register was used, and only decompose if all the uses
274	 were the same number and size of pieces.  Hopefully this
275	 doesn't happen much.  */
276
277      if (outer_words == 1 && inner_words > 1)
278	{
279	  bitmap_set_bit (decomposable_context, regno);
280	  return -1;
281	}
282
283      /* If this is a cast from one mode to another, where the modes
284	 have the same size, and they are not tieable, then mark this
285	 register as non-decomposable.  If we decompose it we are
286	 likely to mess up whatever the backend is trying to do.  */
287      if (outer_words > 1
288	  && outer_size == inner_size
289	  && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
290	{
291	  bitmap_set_bit (non_decomposable_context, regno);
292	  return -1;
293	}
294    }
295  else if (REG_P (x))
296    {
297      unsigned int regno;
298
299      /* We will see an outer SUBREG before we see the inner REG, so
300	 when we see a plain REG here it means a direct reference to
301	 the register.
302
303	 If this is not a simple copy from one location to another,
304	 then we can not decompose this register.  If this is a simple
305	 copy from one pseudo-register to another, and the mode is right
306	 then we mark the register as decomposable.
307	 Otherwise we don't say anything about this register --
308	 it could be decomposed, but whether that would be
309	 profitable depends upon how it is used elsewhere.
310
311	 We only set bits in the bitmap for multi-word
312	 pseudo-registers, since those are the only ones we care about
313	 and it keeps the size of the bitmaps down.  */
314
315      regno = REGNO (x);
316      if (!HARD_REGISTER_NUM_P (regno)
317	  && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
318	{
319	  switch (*pcmi)
320	    {
321	    case NOT_SIMPLE_MOVE:
322	      bitmap_set_bit (non_decomposable_context, regno);
323	      break;
324	    case SIMPLE_PSEUDO_REG_MOVE:
325	      if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
326		bitmap_set_bit (decomposable_context, regno);
327	      break;
328	    case SIMPLE_MOVE:
329	      break;
330	    default:
331	      gcc_unreachable ();
332	    }
333	}
334    }
335  else if (MEM_P (x))
336    {
337      enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
338
339      /* Any registers used in a MEM do not participate in a
340	 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE.  Do our own recursion
341	 here, and return -1 to block the parent's recursion.  */
342      for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
343      return -1;
344    }
345
346  return 0;
347}
348
349/* Decompose REGNO into word-sized components.  We smash the REG node
350   in place.  This ensures that (1) something goes wrong quickly if we
351   fail to make some replacement, and (2) the debug information inside
352   the symbol table is automatically kept up to date.  */
353
354static void
355decompose_register (unsigned int regno)
356{
357  rtx reg;
358  unsigned int words, i;
359  rtvec v;
360
361  reg = regno_reg_rtx[regno];
362
363  regno_reg_rtx[regno] = NULL_RTX;
364
365  words = GET_MODE_SIZE (GET_MODE (reg));
366  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
367
368  v = rtvec_alloc (words);
369  for (i = 0; i < words; ++i)
370    RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
371
372  PUT_CODE (reg, CONCATN);
373  XVEC (reg, 0) = v;
374
375  if (dump_file)
376    {
377      fprintf (dump_file, "; Splitting reg %u ->", regno);
378      for (i = 0; i < words; ++i)
379	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
380      fputc ('\n', dump_file);
381    }
382}
383
384/* Get a SUBREG of a CONCATN.  */
385
386static rtx
387simplify_subreg_concatn (enum machine_mode outermode, rtx op,
388			 unsigned int byte)
389{
390  unsigned int inner_size;
391  enum machine_mode innermode;
392  rtx part;
393  unsigned int final_offset;
394
395  gcc_assert (GET_CODE (op) == CONCATN);
396  gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
397
398  innermode = GET_MODE (op);
399  gcc_assert (byte < GET_MODE_SIZE (innermode));
400  gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
401
402  inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
403  part = XVECEXP (op, 0, byte / inner_size);
404  final_offset = byte % inner_size;
405  if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
406    return NULL_RTX;
407
408  return simplify_gen_subreg (outermode, part, GET_MODE (part), final_offset);
409}
410
411/* Wrapper around simplify_gen_subreg which handles CONCATN.  */
412
413static rtx
414simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
415			     enum machine_mode innermode, unsigned int byte)
416{
417  rtx ret;
418
419  /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
420     If OP is a SUBREG of a CONCATN, then it must be a simple mode
421     change with the same size and offset 0, or it must extract a
422     part.  We shouldn't see anything else here.  */
423  if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
424    {
425      rtx op2;
426
427      if ((GET_MODE_SIZE (GET_MODE (op))
428	   == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
429	  && SUBREG_BYTE (op) == 0)
430	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
431					    GET_MODE (SUBREG_REG (op)), byte);
432
433      op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
434				     SUBREG_BYTE (op));
435      if (op2 == NULL_RTX)
436	{
437	  /* We don't handle paradoxical subregs here.  */
438	  gcc_assert (GET_MODE_SIZE (outermode)
439		      <= GET_MODE_SIZE (GET_MODE (op)));
440	  gcc_assert (GET_MODE_SIZE (GET_MODE (op))
441		      <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
442	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
443					 byte + SUBREG_BYTE (op));
444	  gcc_assert (op2 != NULL_RTX);
445	  return op2;
446	}
447
448      op = op2;
449      gcc_assert (op != NULL_RTX);
450      gcc_assert (innermode == GET_MODE (op));
451    }
452
453  if (GET_CODE (op) == CONCATN)
454    return simplify_subreg_concatn (outermode, op, byte);
455
456  ret = simplify_gen_subreg (outermode, op, innermode, byte);
457
458  /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
459     resolve_simple_move will ask for the high part of the paradoxical
460     subreg, which does not have a value.  Just return a zero.  */
461  if (ret == NULL_RTX
462      && GET_CODE (op) == SUBREG
463      && SUBREG_BYTE (op) == 0
464      && (GET_MODE_SIZE (innermode)
465	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
466    return CONST0_RTX (outermode);
467
468  gcc_assert (ret != NULL_RTX);
469  return ret;
470}
471
472/* Return whether we should resolve X into the registers into which it
473   was decomposed.  */
474
475static bool
476resolve_reg_p (rtx x)
477{
478  return GET_CODE (x) == CONCATN;
479}
480
481/* Return whether X is a SUBREG of a register which we need to
482   resolve.  */
483
484static bool
485resolve_subreg_p (rtx x)
486{
487  if (GET_CODE (x) != SUBREG)
488    return false;
489  return resolve_reg_p (SUBREG_REG (x));
490}
491
492/* This is called via for_each_rtx.  Look for SUBREGs which need to be
493   decomposed.  */
494
495static int
496resolve_subreg_use (rtx *px, void *data)
497{
498  rtx insn = (rtx) data;
499  rtx x = *px;
500
501  if (x == NULL_RTX)
502    return 0;
503
504  if (resolve_subreg_p (x))
505    {
506      x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
507				   SUBREG_BYTE (x));
508
509      /* It is possible for a note to contain a reference which we can
510	 decompose.  In this case, return 1 to the caller to indicate
511	 that the note must be removed.  */
512      if (!x)
513	{
514	  gcc_assert (!insn);
515	  return 1;
516	}
517
518      validate_change (insn, px, x, 1);
519      return -1;
520    }
521
522  if (resolve_reg_p (x))
523    {
524      /* Return 1 to the caller to indicate that we found a direct
525	 reference to a register which is being decomposed.  This can
526	 happen inside notes, multiword shift or zero-extend
527	 instructions.  */
528      return 1;
529    }
530
531  return 0;
532}
533
534/* This is called via for_each_rtx.  Look for SUBREGs which can be
535   decomposed and decomposed REGs that need copying.  */
536
537static int
538adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
539{
540  rtx x = *px;
541
542  if (x == NULL_RTX)
543    return 0;
544
545  if (resolve_subreg_p (x))
546    {
547      x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
548				   SUBREG_BYTE (x));
549
550      if (x)
551	*px = x;
552      else
553	x = copy_rtx (*px);
554    }
555
556  if (resolve_reg_p (x))
557    *px = copy_rtx (x);
558
559  return 0;
560}
561
562/* Resolve any decomposed registers which appear in register notes on
563   INSN.  */
564
565static void
566resolve_reg_notes (rtx insn)
567{
568  rtx *pnote, note;
569
570  note = find_reg_equal_equiv_note (insn);
571  if (note)
572    {
573      int old_count = num_validated_changes ();
574      if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
575	remove_note (insn, note);
576      else
577	if (old_count != num_validated_changes ())
578	  df_notes_rescan (insn);
579    }
580
581  pnote = &REG_NOTES (insn);
582  while (*pnote != NULL_RTX)
583    {
584      bool del = false;
585
586      note = *pnote;
587      switch (REG_NOTE_KIND (note))
588	{
589	case REG_DEAD:
590	case REG_UNUSED:
591	  if (resolve_reg_p (XEXP (note, 0)))
592	    del = true;
593	  break;
594
595	default:
596	  break;
597	}
598
599      if (del)
600	*pnote = XEXP (note, 1);
601      else
602	pnote = &XEXP (note, 1);
603    }
604}
605
606/* Return whether X can be decomposed into subwords.  */
607
608static bool
609can_decompose_p (rtx x)
610{
611  if (REG_P (x))
612    {
613      unsigned int regno = REGNO (x);
614
615      if (HARD_REGISTER_NUM_P (regno))
616	return (validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD)
617		&& HARD_REGNO_MODE_OK (regno, word_mode));
618      else
619	return !bitmap_bit_p (non_decomposable_context, regno);
620    }
621
622  return true;
623}
624
625/* Decompose the registers used in a simple move SET within INSN.  If
626   we don't change anything, return INSN, otherwise return the start
627   of the sequence of moves.  */
628
629static rtx
630resolve_simple_move (rtx set, rtx insn)
631{
632  rtx src, dest, real_dest, insns;
633  enum machine_mode orig_mode, dest_mode;
634  unsigned int words;
635  bool pushing;
636
637  src = SET_SRC (set);
638  dest = SET_DEST (set);
639  orig_mode = GET_MODE (dest);
640
641  words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
642  if (words <= 1)
643    return insn;
644
645  start_sequence ();
646
647  /* We have to handle copying from a SUBREG of a decomposed reg where
648     the SUBREG is larger than word size.  Rather than assume that we
649     can take a word_mode SUBREG of the destination, we copy to a new
650     register and then copy that to the destination.  */
651
652  real_dest = NULL_RTX;
653
654  if (GET_CODE (src) == SUBREG
655      && resolve_reg_p (SUBREG_REG (src))
656      && (SUBREG_BYTE (src) != 0
657	  || (GET_MODE_SIZE (orig_mode)
658	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
659    {
660      real_dest = dest;
661      dest = gen_reg_rtx (orig_mode);
662      if (REG_P (real_dest))
663	REG_ATTRS (dest) = REG_ATTRS (real_dest);
664    }
665
666  /* Similarly if we are copying to a SUBREG of a decomposed reg where
667     the SUBREG is larger than word size.  */
668
669  if (GET_CODE (dest) == SUBREG
670      && resolve_reg_p (SUBREG_REG (dest))
671      && (SUBREG_BYTE (dest) != 0
672	  || (GET_MODE_SIZE (orig_mode)
673	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
674    {
675      rtx reg, minsn, smove;
676
677      reg = gen_reg_rtx (orig_mode);
678      minsn = emit_move_insn (reg, src);
679      smove = single_set (minsn);
680      gcc_assert (smove != NULL_RTX);
681      resolve_simple_move (smove, minsn);
682      src = reg;
683    }
684
685  /* If we didn't have any big SUBREGS of decomposed registers, and
686     neither side of the move is a register we are decomposing, then
687     we don't have to do anything here.  */
688
689  if (src == SET_SRC (set)
690      && dest == SET_DEST (set)
691      && !resolve_reg_p (src)
692      && !resolve_subreg_p (src)
693      && !resolve_reg_p (dest)
694      && !resolve_subreg_p (dest))
695    {
696      end_sequence ();
697      return insn;
698    }
699
700  /* It's possible for the code to use a subreg of a decomposed
701     register while forming an address.  We need to handle that before
702     passing the address to emit_move_insn.  We pass NULL_RTX as the
703     insn parameter to resolve_subreg_use because we can not validate
704     the insn yet.  */
705  if (MEM_P (src) || MEM_P (dest))
706    {
707      int acg;
708
709      if (MEM_P (src))
710	for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
711      if (MEM_P (dest))
712	for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
713      acg = apply_change_group ();
714      gcc_assert (acg);
715    }
716
717  /* If SRC is a register which we can't decompose, or has side
718     effects, we need to move via a temporary register.  */
719
720  if (!can_decompose_p (src)
721      || side_effects_p (src)
722      || GET_CODE (src) == ASM_OPERANDS)
723    {
724      rtx reg;
725
726      reg = gen_reg_rtx (orig_mode);
727      emit_move_insn (reg, src);
728      src = reg;
729    }
730
731  /* If DEST is a register which we can't decompose, or has side
732     effects, we need to first move to a temporary register.  We
733     handle the common case of pushing an operand directly.  We also
734     go through a temporary register if it holds a floating point
735     value.  This gives us better code on systems which can't move
736     data easily between integer and floating point registers.  */
737
738  dest_mode = orig_mode;
739  pushing = push_operand (dest, dest_mode);
740  if (!can_decompose_p (dest)
741      || (side_effects_p (dest) && !pushing)
742      || (!SCALAR_INT_MODE_P (dest_mode)
743	  && !resolve_reg_p (dest)
744	  && !resolve_subreg_p (dest)))
745    {
746      if (real_dest == NULL_RTX)
747	real_dest = dest;
748      if (!SCALAR_INT_MODE_P (dest_mode))
749	{
750	  dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
751				     MODE_INT, 0);
752	  gcc_assert (dest_mode != BLKmode);
753	}
754      dest = gen_reg_rtx (dest_mode);
755      if (REG_P (real_dest))
756	REG_ATTRS (dest) = REG_ATTRS (real_dest);
757    }
758
759  if (pushing)
760    {
761      unsigned int i, j, jinc;
762
763      gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
764      gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
765      gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
766
767      if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
768	{
769	  j = 0;
770	  jinc = 1;
771	}
772      else
773	{
774	  j = words - 1;
775	  jinc = -1;
776	}
777
778      for (i = 0; i < words; ++i, j += jinc)
779	{
780	  rtx temp;
781
782	  temp = copy_rtx (XEXP (dest, 0));
783	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
784					       j * UNITS_PER_WORD);
785	  emit_move_insn (temp,
786			  simplify_gen_subreg_concatn (word_mode, src,
787						       orig_mode,
788						       j * UNITS_PER_WORD));
789	}
790    }
791  else
792    {
793      unsigned int i;
794
795      if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
796	emit_clobber (dest);
797
798      for (i = 0; i < words; ++i)
799	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
800						     dest_mode,
801						     i * UNITS_PER_WORD),
802			simplify_gen_subreg_concatn (word_mode, src,
803						     orig_mode,
804						     i * UNITS_PER_WORD));
805    }
806
807  if (real_dest != NULL_RTX)
808    {
809      rtx mdest, minsn, smove;
810
811      if (dest_mode == orig_mode)
812	mdest = dest;
813      else
814	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
815      minsn = emit_move_insn (real_dest, mdest);
816
817      smove = single_set (minsn);
818      gcc_assert (smove != NULL_RTX);
819
820      resolve_simple_move (smove, minsn);
821    }
822
823  insns = get_insns ();
824  end_sequence ();
825
826  copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
827
828  emit_insn_before (insns, insn);
829
830  delete_insn (insn);
831
832  return insns;
833}
834
835/* Change a CLOBBER of a decomposed register into a CLOBBER of the
836   component registers.  Return whether we changed something.  */
837
838static bool
839resolve_clobber (rtx pat, rtx insn)
840{
841  rtx reg;
842  enum machine_mode orig_mode;
843  unsigned int words, i;
844  int ret;
845
846  reg = XEXP (pat, 0);
847  if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
848    return false;
849
850  orig_mode = GET_MODE (reg);
851  words = GET_MODE_SIZE (orig_mode);
852  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
853
854  ret = validate_change (NULL_RTX, &XEXP (pat, 0),
855			 simplify_gen_subreg_concatn (word_mode, reg,
856						      orig_mode, 0),
857			 0);
858  df_insn_rescan (insn);
859  gcc_assert (ret != 0);
860
861  for (i = words - 1; i > 0; --i)
862    {
863      rtx x;
864
865      x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
866				       i * UNITS_PER_WORD);
867      x = gen_rtx_CLOBBER (VOIDmode, x);
868      emit_insn_after (x, insn);
869    }
870
871  resolve_reg_notes (insn);
872
873  return true;
874}
875
876/* A USE of a decomposed register is no longer meaningful.  Return
877   whether we changed something.  */
878
879static bool
880resolve_use (rtx pat, rtx insn)
881{
882  if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
883    {
884      delete_insn (insn);
885      return true;
886    }
887
888  resolve_reg_notes (insn);
889
890  return false;
891}
892
893/* A VAR_LOCATION can be simplified.  */
894
895static void
896resolve_debug (rtx insn)
897{
898  for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
899
900  df_insn_rescan (insn);
901
902  resolve_reg_notes (insn);
903}
904
905/* Checks if INSN is a decomposable multiword-shift or zero-extend and
906   sets the decomposable_context bitmap accordingly.  A non-zero value
907   is returned if a decomposable insn has been found.  */
908
909static int
910find_decomposable_shift_zext (rtx insn)
911{
912  rtx set;
913  rtx op;
914  rtx op_operand;
915
916  set = single_set (insn);
917  if (!set)
918    return 0;
919
920  op = SET_SRC (set);
921  if (GET_CODE (op) != ASHIFT
922      && GET_CODE (op) != LSHIFTRT
923      && GET_CODE (op) != ZERO_EXTEND)
924    return 0;
925
926  op_operand = XEXP (op, 0);
927  if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
928      || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
929      || HARD_REGISTER_NUM_P (REGNO (op_operand))
930      || !SCALAR_INT_MODE_P (GET_MODE (op)))
931    return 0;
932
933  if (GET_CODE (op) == ZERO_EXTEND)
934    {
935      if (GET_MODE (op_operand) != word_mode
936	  || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
937	return 0;
938    }
939  else /* left or right shift */
940    {
941      if (!CONST_INT_P (XEXP (op, 1))
942	  || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
943	  || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
944	return 0;
945    }
946
947  bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
948
949  if (GET_CODE (op) != ZERO_EXTEND)
950    bitmap_set_bit (decomposable_context, REGNO (op_operand));
951
952  return 1;
953}
954
955/* Decompose a more than word wide shift (in INSN) of a multiword
956   pseudo or a multiword zero-extend of a wordmode pseudo into a move
957   and 'set to zero' insn.  Return a pointer to the new insn when a
958   replacement was done.  */
959
960static rtx
961resolve_shift_zext (rtx insn)
962{
963  rtx set;
964  rtx op;
965  rtx op_operand;
966  rtx insns;
967  rtx src_reg, dest_reg, dest_zero;
968  int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
969
970  set = single_set (insn);
971  if (!set)
972    return NULL_RTX;
973
974  op = SET_SRC (set);
975  if (GET_CODE (op) != ASHIFT
976      && GET_CODE (op) != LSHIFTRT
977      && GET_CODE (op) != ZERO_EXTEND)
978    return NULL_RTX;
979
980  op_operand = XEXP (op, 0);
981
982  if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
983    return NULL_RTX;
984
985  /* src_reg_num is the number of the word mode register which we
986     are operating on.  For a left shift and a zero_extend on little
987     endian machines this is register 0.  */
988  src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
989
990  if (WORDS_BIG_ENDIAN
991      && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
992    src_reg_num = 1 - src_reg_num;
993
994  if (GET_CODE (op) == ZERO_EXTEND)
995    dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
996  else
997    dest_reg_num = 1 - src_reg_num;
998
999  offset1 = UNITS_PER_WORD * dest_reg_num;
1000  offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1001  src_offset = UNITS_PER_WORD * src_reg_num;
1002
1003  if (WORDS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1004    {
1005      offset1 += UNITS_PER_WORD - 1;
1006      offset2 += UNITS_PER_WORD - 1;
1007      src_offset += UNITS_PER_WORD - 1;
1008    }
1009
1010  start_sequence ();
1011
1012  dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1013                                          GET_MODE (SET_DEST (set)),
1014                                          offset1);
1015  dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1016                                           GET_MODE (SET_DEST (set)),
1017                                           offset2);
1018  src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1019                                         GET_MODE (op_operand),
1020                                         src_offset);
1021  if (GET_CODE (op) != ZERO_EXTEND)
1022    {
1023      int shift_count = INTVAL (XEXP (op, 1));
1024      if (shift_count > BITS_PER_WORD)
1025	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1026				LSHIFT_EXPR : RSHIFT_EXPR,
1027				word_mode, src_reg,
1028				build_int_cst (NULL_TREE,
1029					       shift_count - BITS_PER_WORD),
1030				dest_reg, 1);
1031    }
1032
1033  if (dest_reg != src_reg)
1034    emit_move_insn (dest_reg, src_reg);
1035  emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1036  insns = get_insns ();
1037
1038  end_sequence ();
1039
1040  emit_insn_before (insns, insn);
1041
1042  if (dump_file)
1043    {
1044      rtx in;
1045      fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1046      for (in = insns; in != insn; in = NEXT_INSN (in))
1047	fprintf (dump_file, "%d ", INSN_UID (in));
1048      fprintf (dump_file, "\n");
1049    }
1050
1051  delete_insn (insn);
1052  return insns;
1053}
1054
1055/* Look for registers which are always accessed via word-sized SUBREGs
1056   or via copies.  Decompose these registers into several word-sized
1057   pseudo-registers.  */
1058
1059static void
1060decompose_multiword_subregs (void)
1061{
1062  unsigned int max;
1063  basic_block bb;
1064
1065  if (df)
1066    df_set_flags (DF_DEFER_INSN_RESCAN);
1067
1068  max = max_reg_num ();
1069
1070  /* First see if there are any multi-word pseudo-registers.  If there
1071     aren't, there is nothing we can do.  This should speed up this
1072     pass in the normal case, since it should be faster than scanning
1073     all the insns.  */
1074  {
1075    unsigned int i;
1076
1077    for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1078      {
1079	if (regno_reg_rtx[i] != NULL
1080	    && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD)
1081	  break;
1082      }
1083    if (i == max)
1084      return;
1085  }
1086
1087  /* FIXME: When the dataflow branch is merged, we can change this
1088     code to look for each multi-word pseudo-register and to find each
1089     insn which sets or uses that register.  That should be faster
1090     than scanning all the insns.  */
1091
1092  decomposable_context = BITMAP_ALLOC (NULL);
1093  non_decomposable_context = BITMAP_ALLOC (NULL);
1094
1095  reg_copy_graph = VEC_alloc (bitmap, heap, max);
1096  VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1097  memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1098
1099  FOR_EACH_BB (bb)
1100    {
1101      rtx insn;
1102
1103      FOR_BB_INSNS (bb, insn)
1104	{
1105	  rtx set;
1106	  enum classify_move_insn cmi;
1107	  int i, n;
1108
1109	  if (!INSN_P (insn)
1110	      || GET_CODE (PATTERN (insn)) == CLOBBER
1111	      || GET_CODE (PATTERN (insn)) == USE)
1112	    continue;
1113
1114	  if (find_decomposable_shift_zext (insn))
1115	    continue;
1116
1117	  recog_memoized (insn);
1118	  extract_insn (insn);
1119
1120	  set = simple_move (insn);
1121
1122	  if (!set)
1123	    cmi = NOT_SIMPLE_MOVE;
1124	  else
1125	    {
1126	      if (find_pseudo_copy (set))
1127		cmi = SIMPLE_PSEUDO_REG_MOVE;
1128	      else
1129		cmi = SIMPLE_MOVE;
1130	    }
1131
1132	  n = recog_data.n_operands;
1133	  for (i = 0; i < n; ++i)
1134	    {
1135	      for_each_rtx (&recog_data.operand[i],
1136			    find_decomposable_subregs,
1137			    &cmi);
1138
1139	      /* We handle ASM_OPERANDS as a special case to support
1140		 things like x86 rdtsc which returns a DImode value.
1141		 We can decompose the output, which will certainly be
1142		 operand 0, but not the inputs.  */
1143
1144	      if (cmi == SIMPLE_MOVE
1145		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1146		{
1147		  gcc_assert (i == 0);
1148		  cmi = NOT_SIMPLE_MOVE;
1149		}
1150	    }
1151	}
1152    }
1153
1154  bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1155  if (!bitmap_empty_p (decomposable_context))
1156    {
1157      sbitmap sub_blocks;
1158      unsigned int i;
1159      sbitmap_iterator sbi;
1160      bitmap_iterator iter;
1161      unsigned int regno;
1162
1163      propagate_pseudo_copies ();
1164
1165      sub_blocks = sbitmap_alloc (last_basic_block);
1166      sbitmap_zero (sub_blocks);
1167
1168      EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1169	decompose_register (regno);
1170
1171      FOR_EACH_BB (bb)
1172	{
1173	  rtx insn;
1174
1175	  FOR_BB_INSNS (bb, insn)
1176	    {
1177	      rtx pat;
1178
1179	      if (!INSN_P (insn))
1180		continue;
1181
1182	      pat = PATTERN (insn);
1183	      if (GET_CODE (pat) == CLOBBER)
1184		resolve_clobber (pat, insn);
1185	      else if (GET_CODE (pat) == USE)
1186		resolve_use (pat, insn);
1187	      else if (DEBUG_INSN_P (insn))
1188		resolve_debug (insn);
1189	      else
1190		{
1191		  rtx set;
1192		  int i;
1193
1194		  recog_memoized (insn);
1195		  extract_insn (insn);
1196
1197		  set = simple_move (insn);
1198		  if (set)
1199		    {
1200		      rtx orig_insn = insn;
1201		      bool cfi = control_flow_insn_p (insn);
1202
1203		      /* We can end up splitting loads to multi-word pseudos
1204			 into separate loads to machine word size pseudos.
1205			 When this happens, we first had one load that can
1206			 throw, and after resolve_simple_move we'll have a
1207			 bunch of loads (at least two).  All those loads may
1208			 trap if we can have non-call exceptions, so they
1209			 all will end the current basic block.  We split the
1210			 block after the outer loop over all insns, but we
1211			 make sure here that we will be able to split the
1212			 basic block and still produce the correct control
1213			 flow graph for it.  */
1214		      gcc_assert (!cfi
1215				  || (flag_non_call_exceptions
1216				      && can_throw_internal (insn)));
1217
1218		      insn = resolve_simple_move (set, insn);
1219		      if (insn != orig_insn)
1220			{
1221			  recog_memoized (insn);
1222			  extract_insn (insn);
1223
1224			  if (cfi)
1225			    SET_BIT (sub_blocks, bb->index);
1226			}
1227		    }
1228		  else
1229		    {
1230		      rtx decomposed_shift;
1231
1232		      decomposed_shift = resolve_shift_zext (insn);
1233		      if (decomposed_shift != NULL_RTX)
1234			{
1235			  insn = decomposed_shift;
1236			  recog_memoized (insn);
1237			  extract_insn (insn);
1238			}
1239		    }
1240
1241		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1242		    for_each_rtx (recog_data.operand_loc[i],
1243				  resolve_subreg_use,
1244				  insn);
1245
1246		  resolve_reg_notes (insn);
1247
1248		  if (num_validated_changes () > 0)
1249		    {
1250		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1251			{
1252			  rtx *pl = recog_data.dup_loc[i];
1253			  int dup_num = recog_data.dup_num[i];
1254			  rtx *px = recog_data.operand_loc[dup_num];
1255
1256			  validate_unshare_change (insn, pl, *px, 1);
1257			}
1258
1259		      i = apply_change_group ();
1260		      gcc_assert (i);
1261		    }
1262		}
1263	    }
1264	}
1265
1266      /* If we had insns to split that caused control flow insns in the middle
1267	 of a basic block, split those blocks now.  Note that we only handle
1268	 the case where splitting a load has caused multiple possibly trapping
1269	 loads to appear.  */
1270      EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1271	{
1272	  rtx insn, end;
1273	  edge fallthru;
1274
1275	  bb = BASIC_BLOCK (i);
1276	  insn = BB_HEAD (bb);
1277	  end = BB_END (bb);
1278
1279	  while (insn != end)
1280	    {
1281	      if (control_flow_insn_p (insn))
1282		{
1283		  /* Split the block after insn.  There will be a fallthru
1284		     edge, which is OK so we keep it.  We have to create the
1285		     exception edges ourselves.  */
1286		  fallthru = split_block (bb, insn);
1287		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1288		  bb = fallthru->dest;
1289		  insn = BB_HEAD (bb);
1290		}
1291	      else
1292	        insn = NEXT_INSN (insn);
1293	    }
1294	}
1295
1296      sbitmap_free (sub_blocks);
1297    }
1298
1299  {
1300    unsigned int i;
1301    bitmap b;
1302
1303    for (i = 0; VEC_iterate (bitmap, reg_copy_graph, i, b); ++i)
1304      if (b)
1305	BITMAP_FREE (b);
1306  }
1307
1308  VEC_free (bitmap, heap, reg_copy_graph);
1309
1310  BITMAP_FREE (decomposable_context);
1311  BITMAP_FREE (non_decomposable_context);
1312}
1313
1314/* Gate function for lower subreg pass.  */
1315
1316static bool
1317gate_handle_lower_subreg (void)
1318{
1319  return flag_split_wide_types != 0;
1320}
1321
1322/* Implement first lower subreg pass.  */
1323
1324static unsigned int
1325rest_of_handle_lower_subreg (void)
1326{
1327  decompose_multiword_subregs ();
1328  return 0;
1329}
1330
1331/* Implement second lower subreg pass.  */
1332
1333static unsigned int
1334rest_of_handle_lower_subreg2 (void)
1335{
1336  decompose_multiword_subregs ();
1337  return 0;
1338}
1339
1340struct rtl_opt_pass pass_lower_subreg =
1341{
1342 {
1343  RTL_PASS,
1344  "subreg1",	                        /* name */
1345  gate_handle_lower_subreg,             /* gate */
1346  rest_of_handle_lower_subreg,          /* execute */
1347  NULL,                                 /* sub */
1348  NULL,                                 /* next */
1349  0,                                    /* static_pass_number */
1350  TV_LOWER_SUBREG,                      /* tv_id */
1351  0,                                    /* properties_required */
1352  0,                                    /* properties_provided */
1353  0,                                    /* properties_destroyed */
1354  0,                                    /* todo_flags_start */
1355  TODO_dump_func |
1356  TODO_ggc_collect |
1357  TODO_verify_flow                      /* todo_flags_finish */
1358 }
1359};
1360
1361struct rtl_opt_pass pass_lower_subreg2 =
1362{
1363 {
1364  RTL_PASS,
1365  "subreg2",	                        /* name */
1366  gate_handle_lower_subreg,             /* gate */
1367  rest_of_handle_lower_subreg2,          /* execute */
1368  NULL,                                 /* sub */
1369  NULL,                                 /* next */
1370  0,                                    /* static_pass_number */
1371  TV_LOWER_SUBREG,                      /* tv_id */
1372  0,                                    /* properties_required */
1373  0,                                    /* properties_provided */
1374  0,                                    /* properties_destroyed */
1375  0,                                    /* todo_flags_start */
1376  TODO_df_finish | TODO_verify_rtl_sharing |
1377  TODO_dump_func |
1378  TODO_ggc_collect |
1379  TODO_verify_flow                      /* todo_flags_finish */
1380 }
1381};
1382