sparc.c revision 90075
1/* Subroutines for insn-output.c for Sun SPARC.
2   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3   1999, 2000, 2001 Free Software Foundation, Inc.
4   Contributed by Michael Tiemann (tiemann@cygnus.com)
5   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
6   at Cygnus Support.
7
8This file is part of GNU CC.
9
10GNU CC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
14
15GNU CC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GNU CC; see the file COPYING.  If not, write to
22the Free Software Foundation, 59 Temple Place - Suite 330,
23Boston, MA 02111-1307, USA.  */
24
25#include "config.h"
26#include "system.h"
27#include "tree.h"
28#include "rtl.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "function.h"
38#include "expr.h"
39#include "optabs.h"
40#include "libfuncs.h"
41#include "recog.h"
42#include "toplev.h"
43#include "ggc.h"
44#include "tm_p.h"
45#include "debug.h"
46#include "target.h"
47#include "target-def.h"
48
49/* 1 if the caller has placed an "unimp" insn immediately after the call.
50   This is used in v8 code when calling a function that returns a structure.
51   v9 doesn't have this.  Be careful to have this test be the same as that
52   used on the call.  */
53
54#define SKIP_CALLERS_UNIMP_P  \
55(!TARGET_ARCH64 && current_function_returns_struct			\
56 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
57 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
58     == INTEGER_CST))
59
60/* Global variables for machine-dependent things.  */
61
62/* Size of frame.  Need to know this to emit return insns from leaf procedures.
63   ACTUAL_FSIZE is set by compute_frame_size() which is called during the
64   reload pass.  This is important as the value is later used in insn
65   scheduling (to see what can go in a delay slot).
66   APPARENT_FSIZE is the size of the stack less the register save area and less
67   the outgoing argument area.  It is used when saving call preserved regs.  */
68static int apparent_fsize;
69static int actual_fsize;
70
71/* Number of live general or floating point registers needed to be saved
72   (as 4-byte quantities).  This is only done if TARGET_EPILOGUE.  */
73static int num_gfregs;
74
75/* Save the operands last given to a compare for use when we
76   generate a scc or bcc insn.  */
77
78rtx sparc_compare_op0, sparc_compare_op1;
79
80/* We may need an epilogue if we spill too many registers.
81   If this is non-zero, then we branch here for the epilogue.  */
82static rtx leaf_label;
83
84#ifdef LEAF_REGISTERS
85
86/* Vector to say how input registers are mapped to output
87   registers.  FRAME_POINTER_REGNUM cannot be remapped by
88   this function to eliminate it.  You must use -fomit-frame-pointer
89   to get that.  */
90const char leaf_reg_remap[] =
91{ 0, 1, 2, 3, 4, 5, 6, 7,
92  -1, -1, -1, -1, -1, -1, 14, -1,
93  -1, -1, -1, -1, -1, -1, -1, -1,
94  8, 9, 10, 11, 12, 13, -1, 15,
95
96  32, 33, 34, 35, 36, 37, 38, 39,
97  40, 41, 42, 43, 44, 45, 46, 47,
98  48, 49, 50, 51, 52, 53, 54, 55,
99  56, 57, 58, 59, 60, 61, 62, 63,
100  64, 65, 66, 67, 68, 69, 70, 71,
101  72, 73, 74, 75, 76, 77, 78, 79,
102  80, 81, 82, 83, 84, 85, 86, 87,
103  88, 89, 90, 91, 92, 93, 94, 95,
104  96, 97, 98, 99, 100};
105
106/* Vector, indexed by hard register number, which contains 1
107   for a register that is allowable in a candidate for leaf
108   function treatment.  */
109char sparc_leaf_regs[] =
110{ 1, 1, 1, 1, 1, 1, 1, 1,
111  0, 0, 0, 0, 0, 0, 1, 0,
112  0, 0, 0, 0, 0, 0, 0, 0,
113  1, 1, 1, 1, 1, 1, 0, 1,
114  1, 1, 1, 1, 1, 1, 1, 1,
115  1, 1, 1, 1, 1, 1, 1, 1,
116  1, 1, 1, 1, 1, 1, 1, 1,
117  1, 1, 1, 1, 1, 1, 1, 1,
118  1, 1, 1, 1, 1, 1, 1, 1,
119  1, 1, 1, 1, 1, 1, 1, 1,
120  1, 1, 1, 1, 1, 1, 1, 1,
121  1, 1, 1, 1, 1, 1, 1, 1,
122  1, 1, 1, 1, 1};
123
124#endif
125
126/* Name of where we pretend to think the frame pointer points.
127   Normally, this is "%fp", but if we are in a leaf procedure,
128   this is "%sp+something".  We record "something" separately as it may be
129   too big for reg+constant addressing.  */
130
131static const char *frame_base_name;
132static int frame_base_offset;
133
134static void sparc_init_modes	PARAMS ((void));
135static int save_regs		PARAMS ((FILE *, int, int, const char *,
136				       int, int, int));
137static int restore_regs		PARAMS ((FILE *, int, int, const char *, int, int));
138static void build_big_number	PARAMS ((FILE *, int, const char *));
139static int function_arg_slotno	PARAMS ((const CUMULATIVE_ARGS *,
140				       enum machine_mode, tree, int, int,
141				       int *, int *));
142
143static int supersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
144static int hypersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
145static int ultrasparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
146
147static void sparc_output_addr_vec PARAMS ((rtx));
148static void sparc_output_addr_diff_vec PARAMS ((rtx));
149static void sparc_output_deferred_case_vectors PARAMS ((void));
150static void sparc_add_gc_roots    PARAMS ((void));
151static void mark_ultrasparc_pipeline_state PARAMS ((void *));
152static int check_return_regs PARAMS ((rtx));
153static int epilogue_renumber PARAMS ((rtx *, int));
154static bool sparc_assemble_integer PARAMS ((rtx, unsigned int, int));
155static int ultra_cmove_results_ready_p PARAMS ((rtx));
156static int ultra_fpmode_conflict_exists PARAMS ((enum machine_mode));
157static rtx *ultra_find_type PARAMS ((int, rtx *, int));
158static void ultra_build_types_avail PARAMS ((rtx *, int));
159static void ultra_flush_pipeline PARAMS ((void));
160static void ultra_rescan_pipeline_state PARAMS ((rtx *, int));
161static int set_extends PARAMS ((rtx));
162static void output_restore_regs PARAMS ((FILE *, int));
163static void sparc_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
164static void sparc_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
165static void sparc_flat_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
166static void sparc_flat_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
167static void sparc_nonflat_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT,
168						     int));
169static void sparc_nonflat_function_prologue PARAMS ((FILE *, HOST_WIDE_INT,
170						     int));
171#ifdef OBJECT_FORMAT_ELF
172static void sparc_elf_asm_named_section PARAMS ((const char *, unsigned int));
173#endif
174static void ultrasparc_sched_reorder PARAMS ((FILE *, int, rtx *, int));
175static int ultrasparc_variable_issue PARAMS ((rtx));
176static void ultrasparc_sched_init PARAMS ((void));
177
178static int sparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
179static int sparc_issue_rate PARAMS ((void));
180static int sparc_variable_issue PARAMS ((FILE *, int, rtx, int));
181static void sparc_sched_init PARAMS ((FILE *, int, int));
182static int sparc_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
183
184/* Option handling.  */
185
186/* Code model option as passed by user.  */
187const char *sparc_cmodel_string;
188/* Parsed value.  */
189enum cmodel sparc_cmodel;
190
191char sparc_hard_reg_printed[8];
192
193struct sparc_cpu_select sparc_select[] =
194{
195  /* switch	name,		tune	arch */
196  { (char *)0,	"default",	1,	1 },
197  { (char *)0,	"-mcpu=",	1,	1 },
198  { (char *)0,	"-mtune=",	1,	0 },
199  { 0, 0, 0, 0 }
200};
201
202/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
203enum processor_type sparc_cpu;
204
205/* Initialize the GCC target structure.  */
206
207/* The sparc default is to use .half rather than .short for aligned
208   HI objects.  Use .word instead of .long on non-ELF systems.  */
209#undef TARGET_ASM_ALIGNED_HI_OP
210#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
211#ifndef OBJECT_FORMAT_ELF
212#undef TARGET_ASM_ALIGNED_SI_OP
213#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
214#endif
215
216#undef TARGET_ASM_UNALIGNED_HI_OP
217#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
218#undef TARGET_ASM_UNALIGNED_SI_OP
219#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
220#undef TARGET_ASM_UNALIGNED_DI_OP
221#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
222
223/* The target hook has to handle DI-mode values.  */
224#undef TARGET_ASM_INTEGER
225#define TARGET_ASM_INTEGER sparc_assemble_integer
226
227#undef TARGET_ASM_FUNCTION_PROLOGUE
228#define TARGET_ASM_FUNCTION_PROLOGUE sparc_output_function_prologue
229#undef TARGET_ASM_FUNCTION_EPILOGUE
230#define TARGET_ASM_FUNCTION_EPILOGUE sparc_output_function_epilogue
231
232#undef TARGET_SCHED_ADJUST_COST
233#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
234#undef TARGET_SCHED_ISSUE_RATE
235#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
236#undef TARGET_SCHED_VARIABLE_ISSUE
237#define TARGET_SCHED_VARIABLE_ISSUE sparc_variable_issue
238#undef TARGET_SCHED_INIT
239#define TARGET_SCHED_INIT sparc_sched_init
240#undef TARGET_SCHED_REORDER
241#define TARGET_SCHED_REORDER sparc_sched_reorder
242
243struct gcc_target targetm = TARGET_INITIALIZER;
244
245/* Validate and override various options, and do some machine dependent
246   initialization.  */
247
248void
249sparc_override_options ()
250{
251  static struct code_model {
252    const char *const name;
253    const int value;
254  } const cmodels[] = {
255    { "32", CM_32 },
256    { "medlow", CM_MEDLOW },
257    { "medmid", CM_MEDMID },
258    { "medany", CM_MEDANY },
259    { "embmedany", CM_EMBMEDANY },
260    { 0, 0 }
261  };
262  const struct code_model *cmodel;
263  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
264  static struct cpu_default {
265    const int cpu;
266    const char *const name;
267  } const cpu_default[] = {
268    /* There must be one entry here for each TARGET_CPU value.  */
269    { TARGET_CPU_sparc, "cypress" },
270    { TARGET_CPU_sparclet, "tsc701" },
271    { TARGET_CPU_sparclite, "f930" },
272    { TARGET_CPU_v8, "v8" },
273    { TARGET_CPU_hypersparc, "hypersparc" },
274    { TARGET_CPU_sparclite86x, "sparclite86x" },
275    { TARGET_CPU_supersparc, "supersparc" },
276    { TARGET_CPU_v9, "v9" },
277    { TARGET_CPU_ultrasparc, "ultrasparc" },
278    { 0, 0 }
279  };
280  const struct cpu_default *def;
281  /* Table of values for -m{cpu,tune}=.  */
282  static struct cpu_table {
283    const char *const name;
284    const enum processor_type processor;
285    const int disable;
286    const int enable;
287  } const cpu_table[] = {
288    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
289    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
290    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
291    /* TI TMS390Z55 supersparc */
292    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
293    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
294    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
295       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
296    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
297    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
298    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
299    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
300      MASK_SPARCLITE },
301    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
302    /* TEMIC sparclet */
303    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
304    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
305    /* TI ultrasparc I, II, IIi */
306    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
307    /* Although insns using %y are deprecated, it is a clear win on current
308       ultrasparcs.  */
309    						    |MASK_DEPRECATED_V8_INSNS},
310    { 0, 0, 0, 0 }
311  };
312  const struct cpu_table *cpu;
313  const struct sparc_cpu_select *sel;
314  int fpu;
315
316#ifndef SPARC_BI_ARCH
317  /* Check for unsupported architecture size.  */
318  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
319    error ("%s is not supported by this configuration",
320	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
321#endif
322
323  /* We force all 64bit archs to use 128 bit long double */
324  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
325    {
326      error ("-mlong-double-64 not allowed with -m64");
327      target_flags |= MASK_LONG_DOUBLE_128;
328    }
329
330  /* Code model selection.  */
331  sparc_cmodel = SPARC_DEFAULT_CMODEL;
332
333#ifdef SPARC_BI_ARCH
334  if (TARGET_ARCH32)
335    sparc_cmodel = CM_32;
336#endif
337
338  if (sparc_cmodel_string != NULL)
339    {
340      if (TARGET_ARCH64)
341	{
342	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
343	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
344	      break;
345	  if (cmodel->name == NULL)
346	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
347	  else
348	    sparc_cmodel = cmodel->value;
349	}
350      else
351	error ("-mcmodel= is not supported on 32 bit systems");
352    }
353
354  fpu = TARGET_FPU; /* save current -mfpu status */
355
356  /* Set the default CPU.  */
357  for (def = &cpu_default[0]; def->name; ++def)
358    if (def->cpu == TARGET_CPU_DEFAULT)
359      break;
360  if (! def->name)
361    abort ();
362  sparc_select[0].string = def->name;
363
364  for (sel = &sparc_select[0]; sel->name; ++sel)
365    {
366      if (sel->string)
367	{
368	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
369	    if (! strcmp (sel->string, cpu->name))
370	      {
371		if (sel->set_tune_p)
372		  sparc_cpu = cpu->processor;
373
374		if (sel->set_arch_p)
375		  {
376		    target_flags &= ~cpu->disable;
377		    target_flags |= cpu->enable;
378		  }
379		break;
380	      }
381
382	  if (! cpu->name)
383	    error ("bad value (%s) for %s switch", sel->string, sel->name);
384	}
385    }
386
387  /* If -mfpu or -mno-fpu was explicitly used, don't override with
388     the processor default.  Clear MASK_FPU_SET to avoid confusing
389     the reverse mapping from switch values to names.  */
390  if (TARGET_FPU_SET)
391    {
392      target_flags = (target_flags & ~MASK_FPU) | fpu;
393      target_flags &= ~MASK_FPU_SET;
394    }
395
396  /* Don't allow -mvis if FPU is disabled.  */
397  if (! TARGET_FPU)
398    target_flags &= ~MASK_VIS;
399
400  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
401     are available.
402     -m64 also implies v9.  */
403  if (TARGET_VIS || TARGET_ARCH64)
404    {
405      target_flags |= MASK_V9;
406      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
407    }
408
409  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
410  if (TARGET_V9 && TARGET_ARCH32)
411    target_flags |= MASK_DEPRECATED_V8_INSNS;
412
413  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
414  if (! TARGET_V9 || TARGET_ARCH64)
415    target_flags &= ~MASK_V8PLUS;
416
417  /* Don't use stack biasing in 32 bit mode.  */
418  if (TARGET_ARCH32)
419    target_flags &= ~MASK_STACK_BIAS;
420
421  /* Supply a default value for align_functions.  */
422  if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
423    align_functions = 32;
424
425  /* Validate PCC_STRUCT_RETURN.  */
426  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
427    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
428
429  /* Only use .uaxword when compiling for a 64-bit target.  */
430  if (!TARGET_ARCH64)
431    targetm.asm_out.unaligned_op.di = NULL;
432
433  /* Do various machine dependent initializations.  */
434  sparc_init_modes ();
435
436  if ((profile_flag)
437      && sparc_cmodel != CM_32 && sparc_cmodel != CM_MEDLOW)
438    {
439      error ("profiling does not support code models other than medlow");
440    }
441
442  /* Register global variables with the garbage collector.  */
443  sparc_add_gc_roots ();
444}
445
446/* Miscellaneous utilities.  */
447
448/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
449   or branch on register contents instructions.  */
450
451int
452v9_regcmp_p (code)
453     enum rtx_code code;
454{
455  return (code == EQ || code == NE || code == GE || code == LT
456	  || code == LE || code == GT);
457}
458
459
460/* Operand constraints.  */
461
462/* Return non-zero only if OP is a register of mode MODE,
463   or const0_rtx.  */
464
465int
466reg_or_0_operand (op, mode)
467     rtx op;
468     enum machine_mode mode;
469{
470  if (register_operand (op, mode))
471    return 1;
472  if (op == const0_rtx)
473    return 1;
474  if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
475      && CONST_DOUBLE_HIGH (op) == 0
476      && CONST_DOUBLE_LOW (op) == 0)
477    return 1;
478  if (fp_zero_operand (op, mode))
479    return 1;
480  return 0;
481}
482
483/* Nonzero if OP is a floating point value with value 0.0.  */
484
485int
486fp_zero_operand (op, mode)
487     rtx op;
488     enum machine_mode mode;
489{
490  if (GET_MODE_CLASS (GET_MODE (op)) != MODE_FLOAT)
491    return 0;
492  return op == CONST0_RTX (mode);
493}
494
495/* Nonzero if OP is a floating point constant which can
496   be loaded into an integer register using a single
497   sethi instruction.  */
498
499int
500fp_sethi_p (op)
501     rtx op;
502{
503  if (GET_CODE (op) == CONST_DOUBLE)
504    {
505      REAL_VALUE_TYPE r;
506      long i;
507
508      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
509      if (REAL_VALUES_EQUAL (r, dconst0) &&
510	  ! REAL_VALUE_MINUS_ZERO (r))
511	return 0;
512      REAL_VALUE_TO_TARGET_SINGLE (r, i);
513      if (SPARC_SETHI_P (i))
514	return 1;
515    }
516
517  return 0;
518}
519
520/* Nonzero if OP is a floating point constant which can
521   be loaded into an integer register using a single
522   mov instruction.  */
523
524int
525fp_mov_p (op)
526     rtx op;
527{
528  if (GET_CODE (op) == CONST_DOUBLE)
529    {
530      REAL_VALUE_TYPE r;
531      long i;
532
533      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
534      if (REAL_VALUES_EQUAL (r, dconst0) &&
535	  ! REAL_VALUE_MINUS_ZERO (r))
536	return 0;
537      REAL_VALUE_TO_TARGET_SINGLE (r, i);
538      if (SPARC_SIMM13_P (i))
539	return 1;
540    }
541
542  return 0;
543}
544
545/* Nonzero if OP is a floating point constant which can
546   be loaded into an integer register using a high/losum
547   instruction sequence.  */
548
549int
550fp_high_losum_p (op)
551     rtx op;
552{
553  /* The constraints calling this should only be in
554     SFmode move insns, so any constant which cannot
555     be moved using a single insn will do.  */
556  if (GET_CODE (op) == CONST_DOUBLE)
557    {
558      REAL_VALUE_TYPE r;
559      long i;
560
561      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
562      if (REAL_VALUES_EQUAL (r, dconst0) &&
563	  ! REAL_VALUE_MINUS_ZERO (r))
564	return 0;
565      REAL_VALUE_TO_TARGET_SINGLE (r, i);
566      if (! SPARC_SETHI_P (i)
567          && ! SPARC_SIMM13_P (i))
568	return 1;
569    }
570
571  return 0;
572}
573
574/* Nonzero if OP is an integer register.  */
575
576int
577intreg_operand (op, mode)
578     rtx op;
579     enum machine_mode mode ATTRIBUTE_UNUSED;
580{
581  return (register_operand (op, SImode)
582	  || (TARGET_ARCH64 && register_operand (op, DImode)));
583}
584
585/* Nonzero if OP is a floating point condition code register.  */
586
587int
588fcc_reg_operand (op, mode)
589     rtx op;
590     enum machine_mode mode;
591{
592  /* This can happen when recog is called from combine.  Op may be a MEM.
593     Fail instead of calling abort in this case.  */
594  if (GET_CODE (op) != REG)
595    return 0;
596
597  if (mode != VOIDmode && mode != GET_MODE (op))
598    return 0;
599  if (mode == VOIDmode
600      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
601    return 0;
602
603#if 0	/* ??? ==> 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
604  if (reg_renumber == 0)
605    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
606  return REGNO_OK_FOR_CCFP_P (REGNO (op));
607#else
608  return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
609#endif
610}
611
612/* Nonzero if OP is an integer or floating point condition code register.  */
613
614int
615icc_or_fcc_reg_operand (op, mode)
616     rtx op;
617     enum machine_mode mode;
618{
619  if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
620    {
621      if (mode != VOIDmode && mode != GET_MODE (op))
622	return 0;
623      if (mode == VOIDmode
624	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
625	return 0;
626      return 1;
627    }
628
629  return fcc_reg_operand (op, mode);
630}
631
632/* Nonzero if OP can appear as the dest of a RESTORE insn.  */
633int
634restore_operand (op, mode)
635     rtx op;
636     enum machine_mode mode;
637{
638  return (GET_CODE (op) == REG && GET_MODE (op) == mode
639	  && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
640}
641
642/* Call insn on SPARC can take a PC-relative constant address, or any regular
643   memory address.  */
644
645int
646call_operand (op, mode)
647     rtx op;
648     enum machine_mode mode;
649{
650  if (GET_CODE (op) != MEM)
651    abort ();
652  op = XEXP (op, 0);
653  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
654}
655
656int
657call_operand_address (op, mode)
658     rtx op;
659     enum machine_mode mode;
660{
661  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
662}
663
664/* Returns 1 if OP is either a symbol reference or a sum of a symbol
665   reference and a constant.  */
666
667int
668symbolic_operand (op, mode)
669     register rtx op;
670     enum machine_mode mode;
671{
672  enum machine_mode omode = GET_MODE (op);
673
674  if (omode != mode && omode != VOIDmode && mode != VOIDmode)
675    return 0;
676
677  switch (GET_CODE (op))
678    {
679    case SYMBOL_REF:
680    case LABEL_REF:
681      return 1;
682
683    case CONST:
684      op = XEXP (op, 0);
685      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
686	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
687	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
688
689    default:
690      return 0;
691    }
692}
693
694/* Return truth value of statement that OP is a symbolic memory
695   operand of mode MODE.  */
696
697int
698symbolic_memory_operand (op, mode)
699     rtx op;
700     enum machine_mode mode ATTRIBUTE_UNUSED;
701{
702  if (GET_CODE (op) == SUBREG)
703    op = SUBREG_REG (op);
704  if (GET_CODE (op) != MEM)
705    return 0;
706  op = XEXP (op, 0);
707  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
708	  || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
709}
710
711/* Return truth value of statement that OP is a LABEL_REF of mode MODE.  */
712
713int
714label_ref_operand (op, mode)
715     rtx op;
716     enum machine_mode mode;
717{
718  if (GET_CODE (op) != LABEL_REF)
719    return 0;
720  if (GET_MODE (op) != mode)
721    return 0;
722  return 1;
723}
724
725/* Return 1 if the operand is an argument used in generating pic references
726   in either the medium/low or medium/anywhere code models of sparc64.  */
727
728int
729sp64_medium_pic_operand (op, mode)
730     rtx op;
731     enum machine_mode mode ATTRIBUTE_UNUSED;
732{
733  /* Check for (const (minus (symbol_ref:GOT)
734                             (const (minus (label) (pc))))).  */
735  if (GET_CODE (op) != CONST)
736    return 0;
737  op = XEXP (op, 0);
738  if (GET_CODE (op) != MINUS)
739    return 0;
740  if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
741    return 0;
742  /* ??? Ensure symbol is GOT.  */
743  if (GET_CODE (XEXP (op, 1)) != CONST)
744    return 0;
745  if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
746    return 0;
747  return 1;
748}
749
750/* Return 1 if the operand is a data segment reference.  This includes
751   the readonly data segment, or in other words anything but the text segment.
752   This is needed in the medium/anywhere code model on v9.  These values
753   are accessed with EMBMEDANY_BASE_REG.  */
754
755int
756data_segment_operand (op, mode)
757     rtx op;
758     enum machine_mode mode ATTRIBUTE_UNUSED;
759{
760  switch (GET_CODE (op))
761    {
762    case SYMBOL_REF :
763      return ! SYMBOL_REF_FLAG (op);
764    case PLUS :
765      /* Assume canonical format of symbol + constant.
766	 Fall through.  */
767    case CONST :
768      return data_segment_operand (XEXP (op, 0), VOIDmode);
769    default :
770      return 0;
771    }
772}
773
774/* Return 1 if the operand is a text segment reference.
775   This is needed in the medium/anywhere code model on v9.  */
776
777int
778text_segment_operand (op, mode)
779     rtx op;
780     enum machine_mode mode ATTRIBUTE_UNUSED;
781{
782  switch (GET_CODE (op))
783    {
784    case LABEL_REF :
785      return 1;
786    case SYMBOL_REF :
787      return SYMBOL_REF_FLAG (op);
788    case PLUS :
789      /* Assume canonical format of symbol + constant.
790	 Fall through.  */
791    case CONST :
792      return text_segment_operand (XEXP (op, 0), VOIDmode);
793    default :
794      return 0;
795    }
796}
797
798/* Return 1 if the operand is either a register or a memory operand that is
799   not symbolic.  */
800
801int
802reg_or_nonsymb_mem_operand (op, mode)
803    register rtx op;
804    enum machine_mode mode;
805{
806  if (register_operand (op, mode))
807    return 1;
808
809  if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
810    return 1;
811
812  return 0;
813}
814
815int
816splittable_symbolic_memory_operand (op, mode)
817     rtx op;
818     enum machine_mode mode ATTRIBUTE_UNUSED;
819{
820  if (GET_CODE (op) != MEM)
821    return 0;
822  if (! symbolic_operand (XEXP (op, 0), Pmode))
823    return 0;
824  return 1;
825}
826
827int
828splittable_immediate_memory_operand (op, mode)
829     rtx op;
830     enum machine_mode mode ATTRIBUTE_UNUSED;
831{
832  if (GET_CODE (op) != MEM)
833    return 0;
834  if (! immediate_operand (XEXP (op, 0), Pmode))
835    return 0;
836  return 1;
837}
838
839/* Return truth value of whether OP is EQ or NE.  */
840
841int
842eq_or_neq (op, mode)
843     rtx op;
844     enum machine_mode mode ATTRIBUTE_UNUSED;
845{
846  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
847}
848
849/* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
850   or LTU for non-floating-point.  We handle those specially.  */
851
852int
853normal_comp_operator (op, mode)
854     rtx op;
855     enum machine_mode mode ATTRIBUTE_UNUSED;
856{
857  enum rtx_code code = GET_CODE (op);
858
859  if (GET_RTX_CLASS (code) != '<')
860    return 0;
861
862  if (GET_MODE (XEXP (op, 0)) == CCFPmode
863      || GET_MODE (XEXP (op, 0)) == CCFPEmode)
864    return 1;
865
866  return (code != NE && code != EQ && code != GEU && code != LTU);
867}
868
869/* Return 1 if this is a comparison operator.  This allows the use of
870   MATCH_OPERATOR to recognize all the branch insns.  */
871
872int
873noov_compare_op (op, mode)
874    register rtx op;
875    enum machine_mode mode ATTRIBUTE_UNUSED;
876{
877  enum rtx_code code = GET_CODE (op);
878
879  if (GET_RTX_CLASS (code) != '<')
880    return 0;
881
882  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
883    /* These are the only branches which work with CC_NOOVmode.  */
884    return (code == EQ || code == NE || code == GE || code == LT);
885  return 1;
886}
887
888/* Nonzero if OP is a comparison operator suitable for use in v9
889   conditional move or branch on register contents instructions.  */
890
891int
892v9_regcmp_op (op, mode)
893     register rtx op;
894     enum machine_mode mode ATTRIBUTE_UNUSED;
895{
896  enum rtx_code code = GET_CODE (op);
897
898  if (GET_RTX_CLASS (code) != '<')
899    return 0;
900
901  return v9_regcmp_p (code);
902}
903
904/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
905
906int
907extend_op (op, mode)
908     rtx op;
909     enum machine_mode mode ATTRIBUTE_UNUSED;
910{
911  return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
912}
913
914/* Return nonzero if OP is an operator of mode MODE which can set
915   the condition codes explicitly.  We do not include PLUS and MINUS
916   because these require CC_NOOVmode, which we handle explicitly.  */
917
918int
919cc_arithop (op, mode)
920     rtx op;
921     enum machine_mode mode ATTRIBUTE_UNUSED;
922{
923  if (GET_CODE (op) == AND
924      || GET_CODE (op) == IOR
925      || GET_CODE (op) == XOR)
926    return 1;
927
928  return 0;
929}
930
931/* Return nonzero if OP is an operator of mode MODE which can bitwise
932   complement its second operand and set the condition codes explicitly.  */
933
934int
935cc_arithopn (op, mode)
936     rtx op;
937     enum machine_mode mode ATTRIBUTE_UNUSED;
938{
939  /* XOR is not here because combine canonicalizes (xor (not ...) ...)
940     and (xor ... (not ...)) to (not (xor ...)).  */
941  return (GET_CODE (op) == AND
942	  || GET_CODE (op) == IOR);
943}
944
945/* Return true if OP is a register, or is a CONST_INT that can fit in a
946   signed 13 bit immediate field.  This is an acceptable SImode operand for
947   most 3 address instructions.  */
948
949int
950arith_operand (op, mode)
951     rtx op;
952     enum machine_mode mode;
953{
954  int val;
955  if (register_operand (op, mode))
956    return 1;
957  if (GET_CODE (op) != CONST_INT)
958    return 0;
959  val = INTVAL (op) & 0xffffffff;
960  return SPARC_SIMM13_P (val);
961}
962
963/* Return true if OP is a constant 4096  */
964
965int
966arith_4096_operand (op, mode)
967     rtx op;
968     enum machine_mode mode ATTRIBUTE_UNUSED;
969{
970  int val;
971  if (GET_CODE (op) != CONST_INT)
972    return 0;
973  val = INTVAL (op) & 0xffffffff;
974  return val == 4096;
975}
976
977/* Return true if OP is suitable as second operand for add/sub */
978
979int
980arith_add_operand (op, mode)
981     rtx op;
982     enum machine_mode mode;
983{
984  return arith_operand (op, mode) || arith_4096_operand (op, mode);
985}
986
987/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
988   immediate field of OR and XOR instructions.  Used for 64-bit
989   constant formation patterns.  */
990int
991const64_operand (op, mode)
992     rtx op;
993     enum machine_mode mode ATTRIBUTE_UNUSED;
994{
995  return ((GET_CODE (op) == CONST_INT
996	   && SPARC_SIMM13_P (INTVAL (op)))
997#if HOST_BITS_PER_WIDE_INT != 64
998	  || (GET_CODE (op) == CONST_DOUBLE
999	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1000	      && (CONST_DOUBLE_HIGH (op) ==
1001		  ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
1002		   (HOST_WIDE_INT)0xffffffff : 0)))
1003#endif
1004	  );
1005}
1006
1007/* The same, but only for sethi instructions.  */
1008int
1009const64_high_operand (op, mode)
1010     rtx op;
1011     enum machine_mode mode ATTRIBUTE_UNUSED;
1012{
1013  return ((GET_CODE (op) == CONST_INT
1014	   && (INTVAL (op) & 0xfffffc00) != 0
1015	   && SPARC_SETHI_P (INTVAL (op))
1016#if HOST_BITS_PER_WIDE_INT != 64
1017	   /* Must be positive on non-64bit host else the
1018	      optimizer is fooled into thinking that sethi
1019	      sign extends, even though it does not.  */
1020	   && INTVAL (op) >= 0
1021#endif
1022	   )
1023	  || (GET_CODE (op) == CONST_DOUBLE
1024	      && CONST_DOUBLE_HIGH (op) == 0
1025	      && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
1026	      && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
1027}
1028
1029/* Return true if OP is a register, or is a CONST_INT that can fit in a
1030   signed 11 bit immediate field.  This is an acceptable SImode operand for
1031   the movcc instructions.  */
1032
1033int
1034arith11_operand (op, mode)
1035     rtx op;
1036     enum machine_mode mode;
1037{
1038  return (register_operand (op, mode)
1039	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
1040}
1041
1042/* Return true if OP is a register, or is a CONST_INT that can fit in a
1043   signed 10 bit immediate field.  This is an acceptable SImode operand for
1044   the movrcc instructions.  */
1045
1046int
1047arith10_operand (op, mode)
1048     rtx op;
1049     enum machine_mode mode;
1050{
1051  return (register_operand (op, mode)
1052	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
1053}
1054
1055/* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
1056   immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
1057   immediate field.
1058   v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1059   can fit in a 13 bit immediate field.  This is an acceptable DImode operand
1060   for most 3 address instructions.  */
1061
1062int
1063arith_double_operand (op, mode)
1064     rtx op;
1065     enum machine_mode mode;
1066{
1067  return (register_operand (op, mode)
1068	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
1069	  || (! TARGET_ARCH64
1070	      && GET_CODE (op) == CONST_DOUBLE
1071	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
1072	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
1073	  || (TARGET_ARCH64
1074	      && GET_CODE (op) == CONST_DOUBLE
1075	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
1076	      && ((CONST_DOUBLE_HIGH (op) == -1
1077		   && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
1078		  || (CONST_DOUBLE_HIGH (op) == 0
1079		      && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
1080}
1081
1082/* Return true if OP is a constant 4096 for DImode on ARCH64 */
1083
1084int
1085arith_double_4096_operand (op, mode)
1086     rtx op;
1087     enum machine_mode mode ATTRIBUTE_UNUSED;
1088{
1089  return (TARGET_ARCH64 &&
1090  	  ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
1091  	   (GET_CODE (op) == CONST_DOUBLE &&
1092  	    CONST_DOUBLE_LOW (op) == 4096 &&
1093  	    CONST_DOUBLE_HIGH (op) == 0)));
1094}
1095
1096/* Return true if OP is suitable as second operand for add/sub in DImode */
1097
1098int
1099arith_double_add_operand (op, mode)
1100     rtx op;
1101     enum machine_mode mode;
1102{
1103  return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
1104}
1105
1106/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1107   can fit in an 11 bit immediate field.  This is an acceptable DImode
1108   operand for the movcc instructions.  */
1109/* ??? Replace with arith11_operand?  */
1110
1111int
1112arith11_double_operand (op, mode)
1113     rtx op;
1114     enum machine_mode mode;
1115{
1116  return (register_operand (op, mode)
1117	  || (GET_CODE (op) == CONST_DOUBLE
1118	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1119	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
1120	      && ((CONST_DOUBLE_HIGH (op) == -1
1121		   && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
1122		  || (CONST_DOUBLE_HIGH (op) == 0
1123		      && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
1124	  || (GET_CODE (op) == CONST_INT
1125	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1126	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
1127}
1128
1129/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1130   can fit in an 10 bit immediate field.  This is an acceptable DImode
1131   operand for the movrcc instructions.  */
1132/* ??? Replace with arith10_operand?  */
1133
1134int
1135arith10_double_operand (op, mode)
1136     rtx op;
1137     enum machine_mode mode;
1138{
1139  return (register_operand (op, mode)
1140	  || (GET_CODE (op) == CONST_DOUBLE
1141	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1142	      && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
1143	      && ((CONST_DOUBLE_HIGH (op) == -1
1144		   && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1145		  || (CONST_DOUBLE_HIGH (op) == 0
1146		      && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1147	  || (GET_CODE (op) == CONST_INT
1148	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1149	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1150}
1151
1152/* Return truth value of whether OP is an integer which fits the
1153   range constraining immediate operands in most three-address insns,
1154   which have a 13 bit immediate field.  */
1155
1156int
1157small_int (op, mode)
1158     rtx op;
1159     enum machine_mode mode ATTRIBUTE_UNUSED;
1160{
1161  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1162}
1163
1164int
1165small_int_or_double (op, mode)
1166     rtx op;
1167     enum machine_mode mode ATTRIBUTE_UNUSED;
1168{
1169  return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1170	  || (GET_CODE (op) == CONST_DOUBLE
1171	      && CONST_DOUBLE_HIGH (op) == 0
1172	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1173}
1174
1175/* Recognize operand values for the umul instruction.  That instruction sign
1176   extends immediate values just like all other sparc instructions, but
1177   interprets the extended result as an unsigned number.  */
1178
1179int
1180uns_small_int (op, mode)
1181     rtx op;
1182     enum machine_mode mode ATTRIBUTE_UNUSED;
1183{
1184#if HOST_BITS_PER_WIDE_INT > 32
1185  /* All allowed constants will fit a CONST_INT.  */
1186  return (GET_CODE (op) == CONST_INT
1187	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1188	      || (INTVAL (op) >= 0xFFFFF000
1189                  && INTVAL (op) <= 0xFFFFFFFF)));
1190#else
1191  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1192	  || (GET_CODE (op) == CONST_DOUBLE
1193	      && CONST_DOUBLE_HIGH (op) == 0
1194	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1195#endif
1196}
1197
1198int
1199uns_arith_operand (op, mode)
1200     rtx op;
1201     enum machine_mode mode;
1202{
1203  return register_operand (op, mode) || uns_small_int (op, mode);
1204}
1205
1206/* Return truth value of statement that OP is a call-clobbered register.  */
1207int
1208clobbered_register (op, mode)
1209     rtx op;
1210     enum machine_mode mode ATTRIBUTE_UNUSED;
1211{
1212  return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1213}
1214
1215/* Return 1 if OP is a valid operand for the source of a move insn.  */
1216
1217int
1218input_operand (op, mode)
1219     rtx op;
1220     enum machine_mode mode;
1221{
1222  /* If both modes are non-void they must be the same.  */
1223  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1224    return 0;
1225
1226  /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary.  */
1227  if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1228    return 1;
1229
1230  /* Allow any one instruction integer constant, and all CONST_INT
1231     variants when we are working in DImode and !arch64.  */
1232  if (GET_MODE_CLASS (mode) == MODE_INT
1233      && ((GET_CODE (op) == CONST_INT
1234	   && ((SPARC_SETHI_P (INTVAL (op))
1235		&& (! TARGET_ARCH64
1236		    || (INTVAL (op) >= 0)
1237		    || mode == SImode
1238		    || mode == HImode
1239		    || mode == QImode))
1240	       || SPARC_SIMM13_P (INTVAL (op))
1241	       || (mode == DImode
1242		   && ! TARGET_ARCH64)))
1243	  || (TARGET_ARCH64
1244	      && GET_CODE (op) == CONST_DOUBLE
1245	      && ((CONST_DOUBLE_HIGH (op) == 0
1246		   && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1247		  ||
1248#if HOST_BITS_PER_WIDE_INT == 64
1249		  (CONST_DOUBLE_HIGH (op) == 0
1250		   && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1251#else
1252		  (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1253		   && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1254			&& CONST_DOUBLE_HIGH (op) == 0)
1255		       || (CONST_DOUBLE_HIGH (op) == -1
1256			   && CONST_DOUBLE_LOW (op) & 0x80000000) != 0))
1257#endif
1258		  ))))
1259    return 1;
1260
1261  /* If !arch64 and this is a DImode const, allow it so that
1262     the splits can be generated.  */
1263  if (! TARGET_ARCH64
1264      && mode == DImode
1265      && GET_CODE (op) == CONST_DOUBLE)
1266    return 1;
1267
1268  if (register_operand (op, mode))
1269    return 1;
1270
1271  if (GET_MODE_CLASS (mode) == MODE_FLOAT
1272      && GET_CODE (op) == CONST_DOUBLE)
1273    return 1;
1274
1275  /* If this is a SUBREG, look inside so that we handle
1276     paradoxical ones.  */
1277  if (GET_CODE (op) == SUBREG)
1278    op = SUBREG_REG (op);
1279
1280  /* Check for valid MEM forms.  */
1281  if (GET_CODE (op) == MEM)
1282    {
1283      rtx inside = XEXP (op, 0);
1284
1285      if (GET_CODE (inside) == LO_SUM)
1286	{
1287	  /* We can't allow these because all of the splits
1288	     (eventually as they trickle down into DFmode
1289	     splits) require offsettable memory references.  */
1290	  if (! TARGET_V9
1291	      && GET_MODE (op) == TFmode)
1292	    return 0;
1293
1294	  return (register_operand (XEXP (inside, 0), Pmode)
1295		  && CONSTANT_P (XEXP (inside, 1)));
1296	}
1297      return memory_address_p (mode, inside);
1298    }
1299
1300  return 0;
1301}
1302
1303
1304/* We know it can't be done in one insn when we get here,
1305   the movsi expander guarentees this.  */
1306void
1307sparc_emit_set_const32 (op0, op1)
1308     rtx op0;
1309     rtx op1;
1310{
1311  enum machine_mode mode = GET_MODE (op0);
1312  rtx temp;
1313
1314  if (GET_CODE (op1) == CONST_INT)
1315    {
1316      HOST_WIDE_INT value = INTVAL (op1);
1317
1318      if (SPARC_SETHI_P (value)
1319	  || SPARC_SIMM13_P (value))
1320	abort ();
1321    }
1322
1323  /* Full 2-insn decomposition is needed.  */
1324  if (reload_in_progress || reload_completed)
1325    temp = op0;
1326  else
1327    temp = gen_reg_rtx (mode);
1328
1329  if (GET_CODE (op1) == CONST_INT)
1330    {
1331      /* Emit them as real moves instead of a HIGH/LO_SUM,
1332	 this way CSE can see everything and reuse intermediate
1333	 values if it wants.  */
1334      if (TARGET_ARCH64
1335	  && HOST_BITS_PER_WIDE_INT != 64
1336	  && (INTVAL (op1) & 0x80000000) != 0)
1337	emit_insn (gen_rtx_SET
1338		   (VOIDmode, temp,
1339		    gen_rtx_CONST_DOUBLE (VOIDmode, INTVAL (op1) & 0xfffffc00,
1340					  0)));
1341      else
1342	emit_insn (gen_rtx_SET (VOIDmode, temp,
1343				GEN_INT (INTVAL (op1) & 0xfffffc00)));
1344
1345      emit_insn (gen_rtx_SET (VOIDmode,
1346			      op0,
1347			      gen_rtx_IOR (mode, temp,
1348					   GEN_INT (INTVAL (op1) & 0x3ff))));
1349    }
1350  else
1351    {
1352      /* A symbol, emit in the traditional way.  */
1353      emit_insn (gen_rtx_SET (VOIDmode, temp,
1354			      gen_rtx_HIGH (mode, op1)));
1355      emit_insn (gen_rtx_SET (VOIDmode,
1356			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
1357
1358    }
1359}
1360
1361
1362/* Sparc-v9 code-model support.  */
1363void
1364sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1365     rtx op0;
1366     rtx op1;
1367     rtx temp1;
1368{
1369  switch (sparc_cmodel)
1370    {
1371    case CM_MEDLOW:
1372      /* The range spanned by all instructions in the object is less
1373	 than 2^31 bytes (2GB) and the distance from any instruction
1374	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1375	 than 2^31 bytes (2GB).
1376
1377	 The executable must be in the low 4TB of the virtual address
1378	 space.
1379
1380	 sethi	%hi(symbol), %temp
1381	 or	%temp, %lo(symbol), %reg  */
1382      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1383      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1384      break;
1385
1386    case CM_MEDMID:
1387      /* The range spanned by all instructions in the object is less
1388	 than 2^31 bytes (2GB) and the distance from any instruction
1389	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1390	 than 2^31 bytes (2GB).
1391
1392	 The executable must be in the low 16TB of the virtual address
1393	 space.
1394
1395	 sethi	%h44(symbol), %temp1
1396	 or	%temp1, %m44(symbol), %temp2
1397	 sllx	%temp2, 12, %temp3
1398	 or	%temp3, %l44(symbol), %reg  */
1399      emit_insn (gen_seth44 (op0, op1));
1400      emit_insn (gen_setm44 (op0, op0, op1));
1401      emit_insn (gen_rtx_SET (VOIDmode, temp1,
1402			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1403      emit_insn (gen_setl44 (op0, temp1, op1));
1404      break;
1405
1406    case CM_MEDANY:
1407      /* The range spanned by all instructions in the object is less
1408	 than 2^31 bytes (2GB) and the distance from any instruction
1409	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1410	 than 2^31 bytes (2GB).
1411
1412	 The executable can be placed anywhere in the virtual address
1413	 space.
1414
1415	 sethi	%hh(symbol), %temp1
1416	 sethi	%lm(symbol), %temp2
1417	 or	%temp1, %hm(symbol), %temp3
1418	 or	%temp2, %lo(symbol), %temp4
1419	 sllx	%temp3, 32, %temp5
1420	 or	%temp4, %temp5, %reg  */
1421
1422      /* Getting this right wrt. reloading is really tricky.
1423	 We _MUST_ have a separate temporary at this point,
1424	 if we don't barf immediately instead of generating
1425	 incorrect code.  */
1426      if (rtx_equal_p (temp1, op0))
1427	abort ();
1428
1429      emit_insn (gen_sethh (op0, op1));
1430      emit_insn (gen_setlm (temp1, op1));
1431      emit_insn (gen_sethm (op0, op0, op1));
1432      emit_insn (gen_rtx_SET (VOIDmode, op0,
1433			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1434      emit_insn (gen_rtx_SET (VOIDmode, op0,
1435			      gen_rtx_PLUS (DImode, op0, temp1)));
1436      emit_insn (gen_setlo (op0, op0, op1));
1437      break;
1438
1439    case CM_EMBMEDANY:
1440      /* Old old old backwards compatibility kruft here.
1441	 Essentially it is MEDLOW with a fixed 64-bit
1442	 virtual base added to all data segment addresses.
1443	 Text-segment stuff is computed like MEDANY, we can't
1444	 reuse the code above because the relocation knobs
1445	 look different.
1446
1447	 Data segment:	sethi	%hi(symbol), %temp1
1448			or	%temp1, %lo(symbol), %temp2
1449			add	%temp2, EMBMEDANY_BASE_REG, %reg
1450
1451	 Text segment:	sethi	%uhi(symbol), %temp1
1452			sethi	%hi(symbol), %temp2
1453			or	%temp1, %ulo(symbol), %temp3
1454			or	%temp2, %lo(symbol), %temp4
1455			sllx	%temp3, 32, %temp5
1456			or	%temp4, %temp5, %reg  */
1457      if (data_segment_operand (op1, GET_MODE (op1)))
1458	{
1459	  emit_insn (gen_embmedany_sethi (temp1, op1));
1460	  emit_insn (gen_embmedany_brsum (op0, temp1));
1461	  emit_insn (gen_embmedany_losum (op0, op0, op1));
1462	}
1463      else
1464	{
1465	  /* Getting this right wrt. reloading is really tricky.
1466	     We _MUST_ have a separate temporary at this point,
1467	     so we barf immediately instead of generating
1468	     incorrect code.  */
1469	  if (temp1 == op0)
1470	    abort ();
1471
1472	  emit_insn (gen_embmedany_textuhi (op0, op1));
1473	  emit_insn (gen_embmedany_texthi  (temp1, op1));
1474	  emit_insn (gen_embmedany_textulo (op0, op0, op1));
1475	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1476				  gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1477	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1478				  gen_rtx_PLUS (DImode, op0, temp1)));
1479	  emit_insn (gen_embmedany_textlo  (op0, op0, op1));
1480	}
1481      break;
1482
1483    default:
1484      abort();
1485    }
1486}
1487
1488/* These avoid problems when cross compiling.  If we do not
1489   go through all this hair then the optimizer will see
1490   invalid REG_EQUAL notes or in some cases none at all.  */
1491static void sparc_emit_set_safe_HIGH64 PARAMS ((rtx, HOST_WIDE_INT));
1492static rtx gen_safe_SET64 PARAMS ((rtx, HOST_WIDE_INT));
1493static rtx gen_safe_OR64 PARAMS ((rtx, HOST_WIDE_INT));
1494static rtx gen_safe_XOR64 PARAMS ((rtx, HOST_WIDE_INT));
1495
1496#if HOST_BITS_PER_WIDE_INT == 64
1497#define GEN_HIGHINT64(__x)		GEN_INT ((__x) & 0xfffffc00)
1498#define GEN_INT64(__x)			GEN_INT (__x)
1499#else
1500#define GEN_HIGHINT64(__x) \
1501	gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & 0xfffffc00, 0)
1502#define GEN_INT64(__x) \
1503	gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & 0xffffffff, \
1504			      ((__x) & 0x80000000 \
1505			       ? 0xffffffff : 0))
1506#endif
1507
1508/* The optimizer is not to assume anything about exactly
1509   which bits are set for a HIGH, they are unspecified.
1510   Unfortunately this leads to many missed optimizations
1511   during CSE.  We mask out the non-HIGH bits, and matches
1512   a plain movdi, to alleviate this problem.  */
1513static void
1514sparc_emit_set_safe_HIGH64 (dest, val)
1515     rtx dest;
1516     HOST_WIDE_INT val;
1517{
1518  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1519}
1520
1521static rtx
1522gen_safe_SET64 (dest, val)
1523     rtx dest;
1524     HOST_WIDE_INT val;
1525{
1526  return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1527}
1528
1529static rtx
1530gen_safe_OR64 (src, val)
1531     rtx src;
1532     HOST_WIDE_INT val;
1533{
1534  return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1535}
1536
1537static rtx
1538gen_safe_XOR64 (src, val)
1539     rtx src;
1540     HOST_WIDE_INT val;
1541{
1542  return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1543}
1544
1545/* Worker routines for 64-bit constant formation on arch64.
1546   One of the key things to be doing in these emissions is
1547   to create as many temp REGs as possible.  This makes it
1548   possible for half-built constants to be used later when
1549   such values are similar to something required later on.
1550   Without doing this, the optimizer cannot see such
1551   opportunities.  */
1552
1553static void sparc_emit_set_const64_quick1
1554	PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT, int));
1555
1556static void
1557sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1558  rtx op0;
1559  rtx temp;
1560  unsigned HOST_WIDE_INT low_bits;
1561  int is_neg;
1562{
1563  unsigned HOST_WIDE_INT high_bits;
1564
1565  if (is_neg)
1566    high_bits = (~low_bits) & 0xffffffff;
1567  else
1568    high_bits = low_bits;
1569
1570  sparc_emit_set_safe_HIGH64 (temp, high_bits);
1571  if (!is_neg)
1572    {
1573      emit_insn (gen_rtx_SET (VOIDmode, op0,
1574			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1575    }
1576  else
1577    {
1578      /* If we are XOR'ing with -1, then we should emit a one's complement
1579	 instead.  This way the combiner will notice logical operations
1580	 such as ANDN later on and substitute.  */
1581      if ((low_bits & 0x3ff) == 0x3ff)
1582	{
1583	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1584				  gen_rtx_NOT (DImode, temp)));
1585	}
1586      else
1587	{
1588	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1589				  gen_safe_XOR64 (temp,
1590						  (-0x400 | (low_bits & 0x3ff)))));
1591	}
1592    }
1593}
1594
1595static void sparc_emit_set_const64_quick2
1596	PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT,
1597	       unsigned HOST_WIDE_INT, int));
1598
1599static void
1600sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1601  rtx op0;
1602  rtx temp;
1603  unsigned HOST_WIDE_INT high_bits;
1604  unsigned HOST_WIDE_INT low_immediate;
1605  int shift_count;
1606{
1607  rtx temp2 = op0;
1608
1609  if ((high_bits & 0xfffffc00) != 0)
1610    {
1611      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1612      if ((high_bits & ~0xfffffc00) != 0)
1613	emit_insn (gen_rtx_SET (VOIDmode, op0,
1614				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1615      else
1616	temp2 = temp;
1617    }
1618  else
1619    {
1620      emit_insn (gen_safe_SET64 (temp, high_bits));
1621      temp2 = temp;
1622    }
1623
1624  /* Now shift it up into place.  */
1625  emit_insn (gen_rtx_SET (VOIDmode, op0,
1626			  gen_rtx_ASHIFT (DImode, temp2,
1627					  GEN_INT (shift_count))));
1628
1629  /* If there is a low immediate part piece, finish up by
1630     putting that in as well.  */
1631  if (low_immediate != 0)
1632    emit_insn (gen_rtx_SET (VOIDmode, op0,
1633			    gen_safe_OR64 (op0, low_immediate)));
1634}
1635
1636static void sparc_emit_set_const64_longway
1637	PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1638
1639/* Full 64-bit constant decomposition.  Even though this is the
1640   'worst' case, we still optimize a few things away.  */
1641static void
1642sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1643     rtx op0;
1644     rtx temp;
1645     unsigned HOST_WIDE_INT high_bits;
1646     unsigned HOST_WIDE_INT low_bits;
1647{
1648  rtx sub_temp;
1649
1650  if (reload_in_progress || reload_completed)
1651    sub_temp = op0;
1652  else
1653    sub_temp = gen_reg_rtx (DImode);
1654
1655  if ((high_bits & 0xfffffc00) != 0)
1656    {
1657      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1658      if ((high_bits & ~0xfffffc00) != 0)
1659	emit_insn (gen_rtx_SET (VOIDmode,
1660				sub_temp,
1661				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1662      else
1663	sub_temp = temp;
1664    }
1665  else
1666    {
1667      emit_insn (gen_safe_SET64 (temp, high_bits));
1668      sub_temp = temp;
1669    }
1670
1671  if (!reload_in_progress && !reload_completed)
1672    {
1673      rtx temp2 = gen_reg_rtx (DImode);
1674      rtx temp3 = gen_reg_rtx (DImode);
1675      rtx temp4 = gen_reg_rtx (DImode);
1676
1677      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1678			      gen_rtx_ASHIFT (DImode, sub_temp,
1679					      GEN_INT (32))));
1680
1681      sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1682      if ((low_bits & ~0xfffffc00) != 0)
1683	{
1684	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1685				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1686	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1687				  gen_rtx_PLUS (DImode, temp4, temp3)));
1688	}
1689      else
1690	{
1691	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1692				  gen_rtx_PLUS (DImode, temp4, temp2)));
1693	}
1694    }
1695  else
1696    {
1697      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1698      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1699      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1700      int to_shift = 12;
1701
1702      /* We are in the middle of reload, so this is really
1703	 painful.  However we do still make an attempt to
1704	 avoid emitting truly stupid code.  */
1705      if (low1 != const0_rtx)
1706	{
1707	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1708				  gen_rtx_ASHIFT (DImode, sub_temp,
1709						  GEN_INT (to_shift))));
1710	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1711				  gen_rtx_IOR (DImode, op0, low1)));
1712	  sub_temp = op0;
1713	  to_shift = 12;
1714	}
1715      else
1716	{
1717	  to_shift += 12;
1718	}
1719      if (low2 != const0_rtx)
1720	{
1721	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1722				  gen_rtx_ASHIFT (DImode, sub_temp,
1723						  GEN_INT (to_shift))));
1724	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1725				  gen_rtx_IOR (DImode, op0, low2)));
1726	  sub_temp = op0;
1727	  to_shift = 8;
1728	}
1729      else
1730	{
1731	  to_shift += 8;
1732	}
1733      emit_insn (gen_rtx_SET (VOIDmode, op0,
1734			      gen_rtx_ASHIFT (DImode, sub_temp,
1735					      GEN_INT (to_shift))));
1736      if (low3 != const0_rtx)
1737	emit_insn (gen_rtx_SET (VOIDmode, op0,
1738				gen_rtx_IOR (DImode, op0, low3)));
1739      /* phew...  */
1740    }
1741}
1742
1743/* Analyze a 64-bit constant for certain properties.  */
1744static void analyze_64bit_constant
1745	PARAMS ((unsigned HOST_WIDE_INT,
1746	       unsigned HOST_WIDE_INT,
1747	       int *, int *, int *));
1748
1749static void
1750analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1751     unsigned HOST_WIDE_INT high_bits, low_bits;
1752     int *hbsp, *lbsp, *abbasp;
1753{
1754  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1755  int i;
1756
1757  lowest_bit_set = highest_bit_set = -1;
1758  i = 0;
1759  do
1760    {
1761      if ((lowest_bit_set == -1)
1762	  && ((low_bits >> i) & 1))
1763	lowest_bit_set = i;
1764      if ((highest_bit_set == -1)
1765	  && ((high_bits >> (32 - i - 1)) & 1))
1766	highest_bit_set = (64 - i - 1);
1767    }
1768  while (++i < 32
1769	 && ((highest_bit_set == -1)
1770	     || (lowest_bit_set == -1)));
1771  if (i == 32)
1772    {
1773      i = 0;
1774      do
1775	{
1776	  if ((lowest_bit_set == -1)
1777	      && ((high_bits >> i) & 1))
1778	    lowest_bit_set = i + 32;
1779	  if ((highest_bit_set == -1)
1780	      && ((low_bits >> (32 - i - 1)) & 1))
1781	    highest_bit_set = 32 - i - 1;
1782	}
1783      while (++i < 32
1784	     && ((highest_bit_set == -1)
1785		 || (lowest_bit_set == -1)));
1786    }
1787  /* If there are no bits set this should have gone out
1788     as one instruction!  */
1789  if (lowest_bit_set == -1
1790      || highest_bit_set == -1)
1791    abort ();
1792  all_bits_between_are_set = 1;
1793  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1794    {
1795      if (i < 32)
1796	{
1797	  if ((low_bits & (1 << i)) != 0)
1798	    continue;
1799	}
1800      else
1801	{
1802	  if ((high_bits & (1 << (i - 32))) != 0)
1803	    continue;
1804	}
1805      all_bits_between_are_set = 0;
1806      break;
1807    }
1808  *hbsp = highest_bit_set;
1809  *lbsp = lowest_bit_set;
1810  *abbasp = all_bits_between_are_set;
1811}
1812
1813static int const64_is_2insns
1814	PARAMS ((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1815
1816static int
1817const64_is_2insns (high_bits, low_bits)
1818     unsigned HOST_WIDE_INT high_bits, low_bits;
1819{
1820  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1821
1822  if (high_bits == 0
1823      || high_bits == 0xffffffff)
1824    return 1;
1825
1826  analyze_64bit_constant (high_bits, low_bits,
1827			  &highest_bit_set, &lowest_bit_set,
1828			  &all_bits_between_are_set);
1829
1830  if ((highest_bit_set == 63
1831       || lowest_bit_set == 0)
1832      && all_bits_between_are_set != 0)
1833    return 1;
1834
1835  if ((highest_bit_set - lowest_bit_set) < 21)
1836    return 1;
1837
1838  return 0;
1839}
1840
1841static unsigned HOST_WIDE_INT create_simple_focus_bits
1842	PARAMS ((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1843	       int, int));
1844
1845static unsigned HOST_WIDE_INT
1846create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1847     unsigned HOST_WIDE_INT high_bits, low_bits;
1848     int lowest_bit_set, shift;
1849{
1850  HOST_WIDE_INT hi, lo;
1851
1852  if (lowest_bit_set < 32)
1853    {
1854      lo = (low_bits >> lowest_bit_set) << shift;
1855      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1856    }
1857  else
1858    {
1859      lo = 0;
1860      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1861    }
1862  if (hi & lo)
1863    abort ();
1864  return (hi | lo);
1865}
1866
1867/* Here we are sure to be arch64 and this is an integer constant
1868   being loaded into a register.  Emit the most efficient
1869   insn sequence possible.  Detection of all the 1-insn cases
1870   has been done already.  */
1871void
1872sparc_emit_set_const64 (op0, op1)
1873     rtx op0;
1874     rtx op1;
1875{
1876  unsigned HOST_WIDE_INT high_bits, low_bits;
1877  int lowest_bit_set, highest_bit_set;
1878  int all_bits_between_are_set;
1879  rtx temp;
1880
1881  /* Sanity check that we know what we are working with.  */
1882  if (! TARGET_ARCH64)
1883    abort ();
1884
1885  if (GET_CODE (op0) != SUBREG)
1886    {
1887      if (GET_CODE (op0) != REG
1888	  || (REGNO (op0) >= SPARC_FIRST_FP_REG
1889	      && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1890	abort ();
1891    }
1892
1893  if (reload_in_progress || reload_completed)
1894    temp = op0;
1895  else
1896    temp = gen_reg_rtx (DImode);
1897
1898  if (GET_CODE (op1) != CONST_DOUBLE
1899      && GET_CODE (op1) != CONST_INT)
1900    {
1901      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1902      return;
1903    }
1904
1905  if (GET_CODE (op1) == CONST_DOUBLE)
1906    {
1907#if HOST_BITS_PER_WIDE_INT == 64
1908      high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1909      low_bits  = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1910#else
1911      high_bits = CONST_DOUBLE_HIGH (op1);
1912      low_bits = CONST_DOUBLE_LOW (op1);
1913#endif
1914    }
1915  else
1916    {
1917#if HOST_BITS_PER_WIDE_INT == 64
1918      high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1919      low_bits = (INTVAL (op1) & 0xffffffff);
1920#else
1921      high_bits = ((INTVAL (op1) < 0) ?
1922		   0xffffffff :
1923		   0x00000000);
1924      low_bits = INTVAL (op1);
1925#endif
1926    }
1927
1928  /* low_bits	bits 0  --> 31
1929     high_bits	bits 32 --> 63  */
1930
1931  analyze_64bit_constant (high_bits, low_bits,
1932			  &highest_bit_set, &lowest_bit_set,
1933			  &all_bits_between_are_set);
1934
1935  /* First try for a 2-insn sequence.  */
1936
1937  /* These situations are preferred because the optimizer can
1938   * do more things with them:
1939   * 1) mov	-1, %reg
1940   *    sllx	%reg, shift, %reg
1941   * 2) mov	-1, %reg
1942   *    srlx	%reg, shift, %reg
1943   * 3) mov	some_small_const, %reg
1944   *    sllx	%reg, shift, %reg
1945   */
1946  if (((highest_bit_set == 63
1947	|| lowest_bit_set == 0)
1948       && all_bits_between_are_set != 0)
1949      || ((highest_bit_set - lowest_bit_set) < 12))
1950    {
1951      HOST_WIDE_INT the_const = -1;
1952      int shift = lowest_bit_set;
1953
1954      if ((highest_bit_set != 63
1955	   && lowest_bit_set != 0)
1956	  || all_bits_between_are_set == 0)
1957	{
1958	  the_const =
1959	    create_simple_focus_bits (high_bits, low_bits,
1960				      lowest_bit_set, 0);
1961	}
1962      else if (lowest_bit_set == 0)
1963	shift = -(63 - highest_bit_set);
1964
1965      if (! SPARC_SIMM13_P (the_const))
1966	abort ();
1967
1968      emit_insn (gen_safe_SET64 (temp, the_const));
1969      if (shift > 0)
1970	emit_insn (gen_rtx_SET (VOIDmode,
1971				op0,
1972				gen_rtx_ASHIFT (DImode,
1973						temp,
1974						GEN_INT (shift))));
1975      else if (shift < 0)
1976	emit_insn (gen_rtx_SET (VOIDmode,
1977				op0,
1978				gen_rtx_LSHIFTRT (DImode,
1979						  temp,
1980						  GEN_INT (-shift))));
1981      else
1982	abort ();
1983      return;
1984    }
1985
1986  /* Now a range of 22 or less bits set somewhere.
1987   * 1) sethi	%hi(focus_bits), %reg
1988   *    sllx	%reg, shift, %reg
1989   * 2) sethi	%hi(focus_bits), %reg
1990   *    srlx	%reg, shift, %reg
1991   */
1992  if ((highest_bit_set - lowest_bit_set) < 21)
1993    {
1994      unsigned HOST_WIDE_INT focus_bits =
1995	create_simple_focus_bits (high_bits, low_bits,
1996				  lowest_bit_set, 10);
1997
1998      if (! SPARC_SETHI_P (focus_bits))
1999	 abort ();
2000
2001      sparc_emit_set_safe_HIGH64 (temp, focus_bits);
2002
2003      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
2004      if (lowest_bit_set < 10)
2005	emit_insn (gen_rtx_SET (VOIDmode,
2006				op0,
2007				gen_rtx_LSHIFTRT (DImode, temp,
2008						  GEN_INT (10 - lowest_bit_set))));
2009      else if (lowest_bit_set > 10)
2010	emit_insn (gen_rtx_SET (VOIDmode,
2011				op0,
2012				gen_rtx_ASHIFT (DImode, temp,
2013						GEN_INT (lowest_bit_set - 10))));
2014      else
2015	abort ();
2016      return;
2017    }
2018
2019  /* 1) sethi	%hi(low_bits), %reg
2020   *    or	%reg, %lo(low_bits), %reg
2021   * 2) sethi	%hi(~low_bits), %reg
2022   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2023   */
2024  if (high_bits == 0
2025      || high_bits == 0xffffffff)
2026    {
2027      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2028				     (high_bits == 0xffffffff));
2029      return;
2030    }
2031
2032  /* Now, try 3-insn sequences.  */
2033
2034  /* 1) sethi	%hi(high_bits), %reg
2035   *    or	%reg, %lo(high_bits), %reg
2036   *    sllx	%reg, 32, %reg
2037   */
2038  if (low_bits == 0)
2039    {
2040      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2041      return;
2042    }
2043
2044  /* We may be able to do something quick
2045     when the constant is negated, so try that.  */
2046  if (const64_is_2insns ((~high_bits) & 0xffffffff,
2047			 (~low_bits) & 0xfffffc00))
2048    {
2049      /* NOTE: The trailing bits get XOR'd so we need the
2050	 non-negated bits, not the negated ones.  */
2051      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2052
2053      if ((((~high_bits) & 0xffffffff) == 0
2054	   && ((~low_bits) & 0x80000000) == 0)
2055	  || (((~high_bits) & 0xffffffff) == 0xffffffff
2056	      && ((~low_bits) & 0x80000000) != 0))
2057	{
2058	  int fast_int = (~low_bits & 0xffffffff);
2059
2060	  if ((SPARC_SETHI_P (fast_int)
2061	       && (~high_bits & 0xffffffff) == 0)
2062	      || SPARC_SIMM13_P (fast_int))
2063	    emit_insn (gen_safe_SET64 (temp, fast_int));
2064	  else
2065	    sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
2066	}
2067      else
2068	{
2069	  rtx negated_const;
2070#if HOST_BITS_PER_WIDE_INT == 64
2071	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2072				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2073#else
2074	  negated_const = gen_rtx_CONST_DOUBLE (DImode,
2075						(~low_bits) & 0xfffffc00,
2076						(~high_bits) & 0xffffffff);
2077#endif
2078	  sparc_emit_set_const64 (temp, negated_const);
2079	}
2080
2081      /* If we are XOR'ing with -1, then we should emit a one's complement
2082	 instead.  This way the combiner will notice logical operations
2083	 such as ANDN later on and substitute.  */
2084      if (trailing_bits == 0x3ff)
2085	{
2086	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2087				  gen_rtx_NOT (DImode, temp)));
2088	}
2089      else
2090	{
2091	  emit_insn (gen_rtx_SET (VOIDmode,
2092				  op0,
2093				  gen_safe_XOR64 (temp,
2094						  (-0x400 | trailing_bits))));
2095	}
2096      return;
2097    }
2098
2099  /* 1) sethi	%hi(xxx), %reg
2100   *    or	%reg, %lo(xxx), %reg
2101   *	sllx	%reg, yyy, %reg
2102   *
2103   * ??? This is just a generalized version of the low_bits==0
2104   * thing above, FIXME...
2105   */
2106  if ((highest_bit_set - lowest_bit_set) < 32)
2107    {
2108      unsigned HOST_WIDE_INT focus_bits =
2109	create_simple_focus_bits (high_bits, low_bits,
2110				  lowest_bit_set, 0);
2111
2112      /* We can't get here in this state.  */
2113      if (highest_bit_set < 32
2114	  || lowest_bit_set >= 32)
2115	abort ();
2116
2117      /* So what we know is that the set bits straddle the
2118	 middle of the 64-bit word.  */
2119      sparc_emit_set_const64_quick2 (op0, temp,
2120				     focus_bits, 0,
2121				     lowest_bit_set);
2122      return;
2123    }
2124
2125  /* 1) sethi	%hi(high_bits), %reg
2126   *    or	%reg, %lo(high_bits), %reg
2127   *    sllx	%reg, 32, %reg
2128   *	or	%reg, low_bits, %reg
2129   */
2130  if (SPARC_SIMM13_P(low_bits)
2131      && ((int)low_bits > 0))
2132    {
2133      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2134      return;
2135    }
2136
2137  /* The easiest way when all else fails, is full decomposition.  */
2138#if 0
2139  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2140	  high_bits, low_bits, ~high_bits, ~low_bits);
2141#endif
2142  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2143}
2144
2145/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2146   return the mode to be used for the comparison.  For floating-point,
2147   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
2148   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
2149   processing is needed.  */
2150
2151enum machine_mode
2152select_cc_mode (op, x, y)
2153     enum rtx_code op;
2154     rtx x;
2155     rtx y ATTRIBUTE_UNUSED;
2156{
2157  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2158    {
2159      switch (op)
2160	{
2161	case EQ:
2162	case NE:
2163	case UNORDERED:
2164	case ORDERED:
2165	case UNLT:
2166	case UNLE:
2167	case UNGT:
2168	case UNGE:
2169	case UNEQ:
2170	case LTGT:
2171	  return CCFPmode;
2172
2173	case LT:
2174	case LE:
2175	case GT:
2176	case GE:
2177	  return CCFPEmode;
2178
2179	default:
2180	  abort ();
2181	}
2182    }
2183  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2184	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2185    {
2186      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2187	return CCX_NOOVmode;
2188      else
2189	return CC_NOOVmode;
2190    }
2191  else
2192    {
2193      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2194	return CCXmode;
2195      else
2196	return CCmode;
2197    }
2198}
2199
2200/* X and Y are two things to compare using CODE.  Emit the compare insn and
2201   return the rtx for the cc reg in the proper mode.  */
2202
2203rtx
2204gen_compare_reg (code, x, y)
2205     enum rtx_code code;
2206     rtx x, y;
2207{
2208  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2209  rtx cc_reg;
2210
2211  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2212     fcc regs (cse can't tell they're really call clobbered regs and will
2213     remove a duplicate comparison even if there is an intervening function
2214     call - it will then try to reload the cc reg via an int reg which is why
2215     we need the movcc patterns).  It is possible to provide the movcc
2216     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2217     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2218     to tell cse that CCFPE mode registers (even pseudos) are call
2219     clobbered.  */
2220
2221  /* ??? This is an experiment.  Rather than making changes to cse which may
2222     or may not be easy/clean, we do our own cse.  This is possible because
2223     we will generate hard registers.  Cse knows they're call clobbered (it
2224     doesn't know the same thing about pseudos). If we guess wrong, no big
2225     deal, but if we win, great!  */
2226
2227  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2228#if 1 /* experiment */
2229    {
2230      int reg;
2231      /* We cycle through the registers to ensure they're all exercised.  */
2232      static int next_fcc_reg = 0;
2233      /* Previous x,y for each fcc reg.  */
2234      static rtx prev_args[4][2];
2235
2236      /* Scan prev_args for x,y.  */
2237      for (reg = 0; reg < 4; reg++)
2238	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2239	  break;
2240      if (reg == 4)
2241	{
2242	  reg = next_fcc_reg;
2243	  prev_args[reg][0] = x;
2244	  prev_args[reg][1] = y;
2245	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2246	}
2247      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2248    }
2249#else
2250    cc_reg = gen_reg_rtx (mode);
2251#endif /* ! experiment */
2252  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2253    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2254  else
2255    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2256
2257  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2258			  gen_rtx_COMPARE (mode, x, y)));
2259
2260  return cc_reg;
2261}
2262
2263/* This function is used for v9 only.
2264   CODE is the code for an Scc's comparison.
2265   OPERANDS[0] is the target of the Scc insn.
2266   OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2267   been generated yet).
2268
2269   This function is needed to turn
2270
2271	   (set (reg:SI 110)
2272	       (gt (reg:CCX 100 %icc)
2273	           (const_int 0)))
2274   into
2275	   (set (reg:SI 110)
2276	       (gt:DI (reg:CCX 100 %icc)
2277	           (const_int 0)))
2278
2279   IE: The instruction recognizer needs to see the mode of the comparison to
2280   find the right instruction. We could use "gt:DI" right in the
2281   define_expand, but leaving it out allows us to handle DI, SI, etc.
2282
2283   We refer to the global sparc compare operands sparc_compare_op0 and
2284   sparc_compare_op1.  */
2285
2286int
2287gen_v9_scc (compare_code, operands)
2288     enum rtx_code compare_code;
2289     register rtx *operands;
2290{
2291  rtx temp, op0, op1;
2292
2293  if (! TARGET_ARCH64
2294      && (GET_MODE (sparc_compare_op0) == DImode
2295	  || GET_MODE (operands[0]) == DImode))
2296    return 0;
2297
2298  /* Handle the case where operands[0] == sparc_compare_op0.
2299     We "early clobber" the result.  */
2300  if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2301    {
2302      op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2303      emit_move_insn (op0, sparc_compare_op0);
2304    }
2305  else
2306    op0 = sparc_compare_op0;
2307  /* For consistency in the following.  */
2308  op1 = sparc_compare_op1;
2309
2310  /* Try to use the movrCC insns.  */
2311  if (TARGET_ARCH64
2312      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2313      && op1 == const0_rtx
2314      && v9_regcmp_p (compare_code))
2315    {
2316      /* Special case for op0 != 0.  This can be done with one instruction if
2317	 operands[0] == sparc_compare_op0.  We don't assume they are equal
2318	 now though.  */
2319
2320      if (compare_code == NE
2321	  && GET_MODE (operands[0]) == DImode
2322	  && GET_MODE (op0) == DImode)
2323	{
2324	  emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2325	  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2326			      gen_rtx_IF_THEN_ELSE (DImode,
2327				       gen_rtx_fmt_ee (compare_code, DImode,
2328						       op0, const0_rtx),
2329				       const1_rtx,
2330				       operands[0])));
2331	  return 1;
2332	}
2333
2334      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2335      if (GET_MODE (op0) != DImode)
2336	{
2337	  temp = gen_reg_rtx (DImode);
2338	  convert_move (temp, op0, 0);
2339	}
2340      else
2341	temp = op0;
2342      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2343			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2344				   gen_rtx_fmt_ee (compare_code, DImode,
2345						   temp, const0_rtx),
2346				   const1_rtx,
2347				   operands[0])));
2348      return 1;
2349    }
2350  else
2351    {
2352      operands[1] = gen_compare_reg (compare_code, op0, op1);
2353
2354      switch (GET_MODE (operands[1]))
2355	{
2356	  case CCmode :
2357	  case CCXmode :
2358	  case CCFPEmode :
2359	  case CCFPmode :
2360	    break;
2361	  default :
2362	    abort ();
2363	}
2364      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2365      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2366			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2367				   gen_rtx_fmt_ee (compare_code,
2368						   GET_MODE (operands[1]),
2369						   operands[1], const0_rtx),
2370				    const1_rtx, operands[0])));
2371      return 1;
2372    }
2373}
2374
2375/* Emit a conditional jump insn for the v9 architecture using comparison code
2376   CODE and jump target LABEL.
2377   This function exists to take advantage of the v9 brxx insns.  */
2378
2379void
2380emit_v9_brxx_insn (code, op0, label)
2381     enum rtx_code code;
2382     rtx op0, label;
2383{
2384  emit_jump_insn (gen_rtx_SET (VOIDmode,
2385			   pc_rtx,
2386			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2387				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2388						    op0, const0_rtx),
2389				    gen_rtx_LABEL_REF (VOIDmode, label),
2390				    pc_rtx)));
2391}
2392
2393/* Generate a DFmode part of a hard TFmode register.
2394   REG is the TFmode hard register, LOW is 1 for the
2395   low 64bit of the register and 0 otherwise.
2396 */
2397rtx
2398gen_df_reg (reg, low)
2399     rtx reg;
2400     int low;
2401{
2402  int regno = REGNO (reg);
2403
2404  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2405    regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2406  return gen_rtx_REG (DFmode, regno);
2407}
2408
2409/* Return nonzero if a return peephole merging return with
2410   setting of output register is ok.  */
2411int
2412leaf_return_peephole_ok ()
2413{
2414  return (actual_fsize == 0);
2415}
2416
2417/* Return nonzero if TRIAL can go into the function epilogue's
2418   delay slot.  SLOT is the slot we are trying to fill.  */
2419
2420int
2421eligible_for_epilogue_delay (trial, slot)
2422     rtx trial;
2423     int slot;
2424{
2425  rtx pat, src;
2426
2427  if (slot >= 1)
2428    return 0;
2429
2430  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2431    return 0;
2432
2433  if (get_attr_length (trial) != 1)
2434    return 0;
2435
2436  /* If there are any call-saved registers, we should scan TRIAL if it
2437     does not reference them.  For now just make it easy.  */
2438  if (num_gfregs)
2439    return 0;
2440
2441  /* If the function uses __builtin_eh_return, the eh_return machinery
2442     occupies the delay slot.  */
2443  if (current_function_calls_eh_return)
2444    return 0;
2445
2446  /* In the case of a true leaf function, anything can go into the delay slot.
2447     A delay slot only exists however if the frame size is zero, otherwise
2448     we will put an insn to adjust the stack after the return.  */
2449  if (current_function_uses_only_leaf_regs)
2450    {
2451      if (leaf_return_peephole_ok ())
2452	return ((get_attr_in_uncond_branch_delay (trial)
2453		 == IN_BRANCH_DELAY_TRUE));
2454      return 0;
2455    }
2456
2457  pat = PATTERN (trial);
2458
2459  /* Otherwise, only operations which can be done in tandem with
2460     a `restore' or `return' insn can go into the delay slot.  */
2461  if (GET_CODE (SET_DEST (pat)) != REG
2462      || REGNO (SET_DEST (pat)) < 24)
2463    return 0;
2464
2465  /* If this instruction sets up floating point register and we have a return
2466     instruction, it can probably go in.  But restore will not work
2467     with FP_REGS.  */
2468  if (REGNO (SET_DEST (pat)) >= 32)
2469    {
2470      if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
2471	  && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
2472	return 1;
2473      return 0;
2474    }
2475
2476  /* The set of insns matched here must agree precisely with the set of
2477     patterns paired with a RETURN in sparc.md.  */
2478
2479  src = SET_SRC (pat);
2480
2481  /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64.  */
2482  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2483      && arith_operand (src, GET_MODE (src)))
2484    {
2485      if (TARGET_ARCH64)
2486        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2487      else
2488        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2489    }
2490
2491  /* This matches "*return_di".  */
2492  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2493	   && arith_double_operand (src, GET_MODE (src)))
2494    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2495
2496  /* This matches "*return_sf_no_fpu".  */
2497  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2498	   && register_operand (src, SFmode))
2499    return 1;
2500
2501  /* If we have return instruction, anything that does not use
2502     local or output registers and can go into a delay slot wins.  */
2503  else if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
2504	   && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
2505    return 1;
2506
2507  /* This matches "*return_addsi".  */
2508  else if (GET_CODE (src) == PLUS
2509	   && arith_operand (XEXP (src, 0), SImode)
2510	   && arith_operand (XEXP (src, 1), SImode)
2511	   && (register_operand (XEXP (src, 0), SImode)
2512	       || register_operand (XEXP (src, 1), SImode)))
2513    return 1;
2514
2515  /* This matches "*return_adddi".  */
2516  else if (GET_CODE (src) == PLUS
2517	   && arith_double_operand (XEXP (src, 0), DImode)
2518	   && arith_double_operand (XEXP (src, 1), DImode)
2519	   && (register_operand (XEXP (src, 0), DImode)
2520	       || register_operand (XEXP (src, 1), DImode)))
2521    return 1;
2522
2523  /* This can match "*return_losum_[sd]i".
2524     Catch only some cases, so that return_losum* don't have
2525     to be too big.  */
2526  else if (GET_CODE (src) == LO_SUM
2527	   && ! TARGET_CM_MEDMID
2528	   && ((register_operand (XEXP (src, 0), SImode)
2529	        && immediate_operand (XEXP (src, 1), SImode))
2530	       || (TARGET_ARCH64
2531		   && register_operand (XEXP (src, 0), DImode)
2532		   && immediate_operand (XEXP (src, 1), DImode))))
2533    return 1;
2534
2535  /* sll{,x} reg,1,reg2 is add reg,reg,reg2 as well.  */
2536  else if (GET_CODE (src) == ASHIFT
2537	   && (register_operand (XEXP (src, 0), SImode)
2538	       || register_operand (XEXP (src, 0), DImode))
2539	   && XEXP (src, 1) == const1_rtx)
2540    return 1;
2541
2542  return 0;
2543}
2544
2545/* Return nonzero if TRIAL can go into the sibling call
2546   delay slot.  */
2547
2548int
2549eligible_for_sibcall_delay (trial)
2550     rtx trial;
2551{
2552  rtx pat, src;
2553
2554  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2555    return 0;
2556
2557  if (get_attr_length (trial) != 1)
2558    return 0;
2559
2560  pat = PATTERN (trial);
2561
2562  if (current_function_uses_only_leaf_regs)
2563    {
2564      /* If the tail call is done using the call instruction,
2565	 we have to restore %o7 in the delay slot.  */
2566      if ((TARGET_ARCH64 && ! TARGET_CM_MEDLOW) || flag_pic)
2567	return 0;
2568
2569      /* %g1 is used to build the function address */
2570      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2571	return 0;
2572
2573      return 1;
2574    }
2575
2576  /* Otherwise, only operations which can be done in tandem with
2577     a `restore' insn can go into the delay slot.  */
2578  if (GET_CODE (SET_DEST (pat)) != REG
2579      || REGNO (SET_DEST (pat)) < 24
2580      || REGNO (SET_DEST (pat)) >= 32)
2581    return 0;
2582
2583  /* If it mentions %o7, it can't go in, because sibcall will clobber it
2584     in most cases.  */
2585  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2586    return 0;
2587
2588  src = SET_SRC (pat);
2589
2590  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2591      && arith_operand (src, GET_MODE (src)))
2592    {
2593      if (TARGET_ARCH64)
2594        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2595      else
2596        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2597    }
2598
2599  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2600	   && arith_double_operand (src, GET_MODE (src)))
2601    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2602
2603  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2604	   && register_operand (src, SFmode))
2605    return 1;
2606
2607  else if (GET_CODE (src) == PLUS
2608	   && arith_operand (XEXP (src, 0), SImode)
2609	   && arith_operand (XEXP (src, 1), SImode)
2610	   && (register_operand (XEXP (src, 0), SImode)
2611	       || register_operand (XEXP (src, 1), SImode)))
2612    return 1;
2613
2614  else if (GET_CODE (src) == PLUS
2615	   && arith_double_operand (XEXP (src, 0), DImode)
2616	   && arith_double_operand (XEXP (src, 1), DImode)
2617	   && (register_operand (XEXP (src, 0), DImode)
2618	       || register_operand (XEXP (src, 1), DImode)))
2619    return 1;
2620
2621  else if (GET_CODE (src) == LO_SUM
2622	   && ! TARGET_CM_MEDMID
2623	   && ((register_operand (XEXP (src, 0), SImode)
2624	        && immediate_operand (XEXP (src, 1), SImode))
2625	       || (TARGET_ARCH64
2626		   && register_operand (XEXP (src, 0), DImode)
2627		   && immediate_operand (XEXP (src, 1), DImode))))
2628    return 1;
2629
2630  else if (GET_CODE (src) == ASHIFT
2631	   && (register_operand (XEXP (src, 0), SImode)
2632	       || register_operand (XEXP (src, 0), DImode))
2633	   && XEXP (src, 1) == const1_rtx)
2634    return 1;
2635
2636  return 0;
2637}
2638
2639static int
2640check_return_regs (x)
2641     rtx x;
2642{
2643  switch (GET_CODE (x))
2644    {
2645    case REG:
2646      return IN_OR_GLOBAL_P (x);
2647
2648    case CONST_INT:
2649    case CONST_DOUBLE:
2650    case CONST:
2651    case SYMBOL_REF:
2652    case LABEL_REF:
2653    return 1;
2654
2655    case SET:
2656    case IOR:
2657    case AND:
2658    case XOR:
2659    case PLUS:
2660    case MINUS:
2661      if (check_return_regs (XEXP (x, 1)) == 0)
2662  return 0;
2663    case NOT:
2664    case NEG:
2665    case MEM:
2666      return check_return_regs (XEXP (x, 0));
2667
2668    default:
2669      return 0;
2670    }
2671
2672}
2673
2674/* Return 1 if TRIAL references only in and global registers.  */
2675int
2676eligible_for_return_delay (trial)
2677     rtx trial;
2678{
2679  if (GET_CODE (PATTERN (trial)) != SET)
2680    return 0;
2681
2682  return check_return_regs (PATTERN (trial));
2683}
2684
2685int
2686short_branch (uid1, uid2)
2687     int uid1, uid2;
2688{
2689  int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2690
2691  /* Leave a few words of "slop".  */
2692  if (delta >= -1023 && delta <= 1022)
2693    return 1;
2694
2695  return 0;
2696}
2697
2698/* Return non-zero if REG is not used after INSN.
2699   We assume REG is a reload reg, and therefore does
2700   not live past labels or calls or jumps.  */
2701int
2702reg_unused_after (reg, insn)
2703     rtx reg;
2704     rtx insn;
2705{
2706  enum rtx_code code, prev_code = UNKNOWN;
2707
2708  while ((insn = NEXT_INSN (insn)))
2709    {
2710      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2711	return 1;
2712
2713      code = GET_CODE (insn);
2714      if (GET_CODE (insn) == CODE_LABEL)
2715	return 1;
2716
2717      if (GET_RTX_CLASS (code) == 'i')
2718	{
2719	  rtx set = single_set (insn);
2720	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2721	  if (set && in_src)
2722	    return 0;
2723	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2724	    return 1;
2725	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2726	    return 0;
2727	}
2728      prev_code = code;
2729    }
2730  return 1;
2731}
2732
2733/* The table we use to reference PIC data.  */
2734static rtx global_offset_table;
2735
2736/* The function we use to get at it.  */
2737static rtx get_pc_symbol;
2738static char get_pc_symbol_name[256];
2739
2740/* Ensure that we are not using patterns that are not OK with PIC.  */
2741
2742int
2743check_pic (i)
2744     int i;
2745{
2746  switch (flag_pic)
2747    {
2748    case 1:
2749      if (GET_CODE (recog_data.operand[i]) == SYMBOL_REF
2750	  || (GET_CODE (recog_data.operand[i]) == CONST
2751	      && ! (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
2752		    && (XEXP (XEXP (recog_data.operand[i], 0), 0)
2753			== global_offset_table)
2754		    && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
2755			== CONST))))
2756	abort ();
2757    case 2:
2758    default:
2759      return 1;
2760    }
2761}
2762
2763/* Return true if X is an address which needs a temporary register when
2764   reloaded while generating PIC code.  */
2765
2766int
2767pic_address_needs_scratch (x)
2768     rtx x;
2769{
2770  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2771  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2772      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2773      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2774      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2775    return 1;
2776
2777  return 0;
2778}
2779
2780/* Legitimize PIC addresses.  If the address is already position-independent,
2781   we return ORIG.  Newly generated position-independent addresses go into a
2782   reg.  This is REG if non zero, otherwise we allocate register(s) as
2783   necessary.  */
2784
2785rtx
2786legitimize_pic_address (orig, mode, reg)
2787     rtx orig;
2788     enum machine_mode mode ATTRIBUTE_UNUSED;
2789     rtx reg;
2790{
2791  if (GET_CODE (orig) == SYMBOL_REF)
2792    {
2793      rtx pic_ref, address;
2794      rtx insn;
2795
2796      if (reg == 0)
2797	{
2798	  if (reload_in_progress || reload_completed)
2799	    abort ();
2800	  else
2801	    reg = gen_reg_rtx (Pmode);
2802	}
2803
2804      if (flag_pic == 2)
2805	{
2806	  /* If not during reload, allocate another temp reg here for loading
2807	     in the address, so that these instructions can be optimized
2808	     properly.  */
2809	  rtx temp_reg = ((reload_in_progress || reload_completed)
2810			  ? reg : gen_reg_rtx (Pmode));
2811
2812	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2813	     won't get confused into thinking that these two instructions
2814	     are loading in the true address of the symbol.  If in the
2815	     future a PIC rtx exists, that should be used instead.  */
2816	  if (Pmode == SImode)
2817	    {
2818	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
2819	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2820	    }
2821	  else
2822	    {
2823	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
2824	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2825	    }
2826	  address = temp_reg;
2827	}
2828      else
2829	address = orig;
2830
2831      pic_ref = gen_rtx_MEM (Pmode,
2832			     gen_rtx_PLUS (Pmode,
2833					   pic_offset_table_rtx, address));
2834      current_function_uses_pic_offset_table = 1;
2835      RTX_UNCHANGING_P (pic_ref) = 1;
2836      insn = emit_move_insn (reg, pic_ref);
2837      /* Put a REG_EQUAL note on this insn, so that it can be optimized
2838	 by loop.  */
2839      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2840				  REG_NOTES (insn));
2841      return reg;
2842    }
2843  else if (GET_CODE (orig) == CONST)
2844    {
2845      rtx base, offset;
2846
2847      if (GET_CODE (XEXP (orig, 0)) == PLUS
2848	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2849	return orig;
2850
2851      if (reg == 0)
2852	{
2853	  if (reload_in_progress || reload_completed)
2854	    abort ();
2855	  else
2856	    reg = gen_reg_rtx (Pmode);
2857	}
2858
2859      if (GET_CODE (XEXP (orig, 0)) == PLUS)
2860	{
2861	  base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2862	  offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2863					 base == reg ? 0 : reg);
2864	}
2865      else
2866	abort ();
2867
2868      if (GET_CODE (offset) == CONST_INT)
2869	{
2870	  if (SMALL_INT (offset))
2871	    return plus_constant (base, INTVAL (offset));
2872	  else if (! reload_in_progress && ! reload_completed)
2873	    offset = force_reg (Pmode, offset);
2874	  else
2875	    /* If we reach here, then something is seriously wrong.  */
2876	    abort ();
2877	}
2878      return gen_rtx_PLUS (Pmode, base, offset);
2879    }
2880  else if (GET_CODE (orig) == LABEL_REF)
2881    /* ??? Why do we do this?  */
2882    /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2883       the register is live instead, in case it is eliminated.  */
2884    current_function_uses_pic_offset_table = 1;
2885
2886  return orig;
2887}
2888
2889/* Emit special PIC prologues.  */
2890
2891void
2892load_pic_register ()
2893{
2894  /* Labels to get the PC in the prologue of this function.  */
2895  int orig_flag_pic = flag_pic;
2896
2897  if (! flag_pic)
2898    abort ();
2899
2900  /* If we haven't emitted the special get_pc helper function, do so now.  */
2901  if (get_pc_symbol_name[0] == 0)
2902    {
2903      int align;
2904
2905      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2906      text_section ();
2907
2908      align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2909      if (align > 0)
2910	ASM_OUTPUT_ALIGN (asm_out_file, align);
2911      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2912      fputs ("\tretl\n\tadd\t%o7, %l7, %l7\n", asm_out_file);
2913    }
2914
2915  /* Initialize every time through, since we can't easily
2916     know this to be permanent.  */
2917  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2918  get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2919  flag_pic = 0;
2920
2921  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2922			 get_pc_symbol));
2923
2924  flag_pic = orig_flag_pic;
2925
2926  /* Need to emit this whether or not we obey regdecls,
2927     since setjmp/longjmp can cause life info to screw up.
2928     ??? In the case where we don't obey regdecls, this is not sufficient
2929     since we may not fall out the bottom.  */
2930  emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2931}
2932
2933/* Return 1 if RTX is a MEM which is known to be aligned to at
2934   least an 8 byte boundary.  */
2935
2936int
2937mem_min_alignment (mem, desired)
2938     rtx mem;
2939     int desired;
2940{
2941  rtx addr, base, offset;
2942
2943  /* If it's not a MEM we can't accept it.  */
2944  if (GET_CODE (mem) != MEM)
2945    return 0;
2946
2947  addr = XEXP (mem, 0);
2948  base = offset = NULL_RTX;
2949  if (GET_CODE (addr) == PLUS)
2950    {
2951      if (GET_CODE (XEXP (addr, 0)) == REG)
2952	{
2953	  base = XEXP (addr, 0);
2954
2955	  /* What we are saying here is that if the base
2956	     REG is aligned properly, the compiler will make
2957	     sure any REG based index upon it will be so
2958	     as well.  */
2959	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2960	    offset = XEXP (addr, 1);
2961	  else
2962	    offset = const0_rtx;
2963	}
2964    }
2965  else if (GET_CODE (addr) == REG)
2966    {
2967      base = addr;
2968      offset = const0_rtx;
2969    }
2970
2971  if (base != NULL_RTX)
2972    {
2973      int regno = REGNO (base);
2974
2975      if (regno != FRAME_POINTER_REGNUM
2976	  && regno != STACK_POINTER_REGNUM)
2977	{
2978	  /* Check if the compiler has recorded some information
2979	     about the alignment of the base REG.  If reload has
2980	     completed, we already matched with proper alignments.
2981	     If not running global_alloc, reload might give us
2982	     unaligned pointer to local stack though.  */
2983	  if (((cfun != 0
2984		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
2985	       || (optimize && reload_completed))
2986	      && (INTVAL (offset) & (desired - 1)) == 0)
2987	    return 1;
2988	}
2989      else
2990	{
2991	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2992	    return 1;
2993	}
2994    }
2995  else if (! TARGET_UNALIGNED_DOUBLES
2996	   || CONSTANT_P (addr)
2997	   || GET_CODE (addr) == LO_SUM)
2998    {
2999      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3000	 is true, in which case we can only assume that an access is aligned if
3001	 it is to a constant address, or the address involves a LO_SUM.  */
3002      return 1;
3003    }
3004
3005  /* An obviously unaligned address.  */
3006  return 0;
3007}
3008
3009
3010/* Vectors to keep interesting information about registers where it can easily
3011   be got.  We use to use the actual mode value as the bit number, but there
3012   are more than 32 modes now.  Instead we use two tables: one indexed by
3013   hard register number, and one indexed by mode.  */
3014
3015/* The purpose of sparc_mode_class is to shrink the range of modes so that
3016   they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
3017   mapped into one sparc_mode_class mode.  */
3018
3019enum sparc_mode_class {
3020  S_MODE, D_MODE, T_MODE, O_MODE,
3021  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3022  CC_MODE, CCFP_MODE
3023};
3024
3025/* Modes for single-word and smaller quantities.  */
3026#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3027
3028/* Modes for double-word and smaller quantities.  */
3029#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3030
3031/* Modes for quad-word and smaller quantities.  */
3032#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3033
3034/* Modes for 8-word and smaller quantities.  */
3035#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3036
3037/* Modes for single-float quantities.  We must allow any single word or
3038   smaller quantity.  This is because the fix/float conversion instructions
3039   take integer inputs/outputs from the float registers.  */
3040#define SF_MODES (S_MODES)
3041
3042/* Modes for double-float and smaller quantities.  */
3043#define DF_MODES (S_MODES | D_MODES)
3044
3045/* Modes for double-float only quantities.  */
3046#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3047
3048/* Modes for quad-float only quantities.  */
3049#define TF_ONLY_MODES (1 << (int) TF_MODE)
3050
3051/* Modes for quad-float and smaller quantities.  */
3052#define TF_MODES (DF_MODES | TF_ONLY_MODES)
3053
3054/* Modes for quad-float and double-float quantities.  */
3055#define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
3056
3057/* Modes for quad-float pair only quantities.  */
3058#define OF_ONLY_MODES (1 << (int) OF_MODE)
3059
3060/* Modes for quad-float pairs and smaller quantities.  */
3061#define OF_MODES (TF_MODES | OF_ONLY_MODES)
3062
3063#define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
3064
3065/* Modes for condition codes.  */
3066#define CC_MODES (1 << (int) CC_MODE)
3067#define CCFP_MODES (1 << (int) CCFP_MODE)
3068
3069/* Value is 1 if register/mode pair is acceptable on sparc.
3070   The funny mixture of D and T modes is because integer operations
3071   do not specially operate on tetra quantities, so non-quad-aligned
3072   registers can hold quadword quantities (except %o4 and %i4 because
3073   they cross fixed registers).  */
3074
3075/* This points to either the 32 bit or the 64 bit version.  */
3076const int *hard_regno_mode_classes;
3077
3078static const int hard_32bit_mode_classes[] = {
3079  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3080  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3081  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3082  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3083
3084  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3085  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3086  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3087  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3088
3089  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3090     and none can hold SFmode/SImode values.  */
3091  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3092  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3093  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3094  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3095
3096  /* %fcc[0123] */
3097  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3098
3099  /* %icc */
3100  CC_MODES
3101};
3102
3103static const int hard_64bit_mode_classes[] = {
3104  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3105  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3106  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3107  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3108
3109  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3110  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3111  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3112  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3113
3114  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3115     and none can hold SFmode/SImode values.  */
3116  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3117  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3118  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3119  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3120
3121  /* %fcc[0123] */
3122  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3123
3124  /* %icc */
3125  CC_MODES
3126};
3127
3128int sparc_mode_class [NUM_MACHINE_MODES];
3129
3130enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3131
3132static void
3133sparc_init_modes ()
3134{
3135  int i;
3136
3137  for (i = 0; i < NUM_MACHINE_MODES; i++)
3138    {
3139      switch (GET_MODE_CLASS (i))
3140	{
3141	case MODE_INT:
3142	case MODE_PARTIAL_INT:
3143	case MODE_COMPLEX_INT:
3144	  if (GET_MODE_SIZE (i) <= 4)
3145	    sparc_mode_class[i] = 1 << (int) S_MODE;
3146	  else if (GET_MODE_SIZE (i) == 8)
3147	    sparc_mode_class[i] = 1 << (int) D_MODE;
3148	  else if (GET_MODE_SIZE (i) == 16)
3149	    sparc_mode_class[i] = 1 << (int) T_MODE;
3150	  else if (GET_MODE_SIZE (i) == 32)
3151	    sparc_mode_class[i] = 1 << (int) O_MODE;
3152	  else
3153	    sparc_mode_class[i] = 0;
3154	  break;
3155	case MODE_FLOAT:
3156	case MODE_COMPLEX_FLOAT:
3157	  if (GET_MODE_SIZE (i) <= 4)
3158	    sparc_mode_class[i] = 1 << (int) SF_MODE;
3159	  else if (GET_MODE_SIZE (i) == 8)
3160	    sparc_mode_class[i] = 1 << (int) DF_MODE;
3161	  else if (GET_MODE_SIZE (i) == 16)
3162	    sparc_mode_class[i] = 1 << (int) TF_MODE;
3163	  else if (GET_MODE_SIZE (i) == 32)
3164	    sparc_mode_class[i] = 1 << (int) OF_MODE;
3165	  else
3166	    sparc_mode_class[i] = 0;
3167	  break;
3168	case MODE_CC:
3169	default:
3170	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
3171	     we must explicitly check for them here.  */
3172	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
3173	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3174	  else if (i == (int) CCmode || i == (int) CC_NOOVmode
3175		   || i == (int) CCXmode || i == (int) CCX_NOOVmode)
3176	    sparc_mode_class[i] = 1 << (int) CC_MODE;
3177	  else
3178	    sparc_mode_class[i] = 0;
3179	  break;
3180	}
3181    }
3182
3183  if (TARGET_ARCH64)
3184    hard_regno_mode_classes = hard_64bit_mode_classes;
3185  else
3186    hard_regno_mode_classes = hard_32bit_mode_classes;
3187
3188  /* Initialize the array used by REGNO_REG_CLASS.  */
3189  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3190    {
3191      if (i < 16 && TARGET_V8PLUS)
3192	sparc_regno_reg_class[i] = I64_REGS;
3193      else if (i < 32)
3194	sparc_regno_reg_class[i] = GENERAL_REGS;
3195      else if (i < 64)
3196	sparc_regno_reg_class[i] = FP_REGS;
3197      else if (i < 96)
3198	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3199      else if (i < 100)
3200	sparc_regno_reg_class[i] = FPCC_REGS;
3201      else
3202	sparc_regno_reg_class[i] = NO_REGS;
3203    }
3204}
3205
3206/* Save non call used registers from LOW to HIGH at BASE+OFFSET.
3207   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
3208   v9 int regs as it simplifies the code.  */
3209
3210static int
3211save_regs (file, low, high, base, offset, n_regs, real_offset)
3212     FILE *file;
3213     int low, high;
3214     const char *base;
3215     int offset;
3216     int n_regs;
3217     int real_offset;
3218{
3219  int i;
3220
3221  if (TARGET_ARCH64 && high <= 32)
3222    {
3223      for (i = low; i < high; i++)
3224	{
3225	  if (regs_ever_live[i] && ! call_used_regs[i])
3226	    {
3227	      fprintf (file, "\tstx\t%s, [%s+%d]\n",
3228		       reg_names[i], base, offset + 4 * n_regs);
3229	      if (dwarf2out_do_frame ())
3230		dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
3231	      n_regs += 2;
3232	    }
3233	}
3234    }
3235  else
3236    {
3237      for (i = low; i < high; i += 2)
3238	{
3239	  if (regs_ever_live[i] && ! call_used_regs[i])
3240	    {
3241	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3242		{
3243		  fprintf (file, "\tstd\t%s, [%s+%d]\n",
3244			   reg_names[i], base, offset + 4 * n_regs);
3245		  if (dwarf2out_do_frame ())
3246		    {
3247		      char *l = dwarf2out_cfi_label ();
3248		      dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
3249		      dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
3250		    }
3251		  n_regs += 2;
3252		}
3253	      else
3254		{
3255		  fprintf (file, "\tst\t%s, [%s+%d]\n",
3256			   reg_names[i], base, offset + 4 * n_regs);
3257		  if (dwarf2out_do_frame ())
3258		    dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
3259		  n_regs += 2;
3260		}
3261	    }
3262	  else
3263	    {
3264	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3265		{
3266		  fprintf (file, "\tst\t%s, [%s+%d]\n",
3267			   reg_names[i+1], base, offset + 4 * n_regs + 4);
3268		  if (dwarf2out_do_frame ())
3269		    dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
3270		  n_regs += 2;
3271		}
3272	    }
3273	}
3274    }
3275  return n_regs;
3276}
3277
3278/* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
3279
3280   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
3281   v9 int regs as it simplifies the code.  */
3282
3283static int
3284restore_regs (file, low, high, base, offset, n_regs)
3285     FILE *file;
3286     int low, high;
3287     const char *base;
3288     int offset;
3289     int n_regs;
3290{
3291  int i;
3292
3293  if (TARGET_ARCH64 && high <= 32)
3294    {
3295      for (i = low; i < high; i++)
3296	{
3297	  if (regs_ever_live[i] && ! call_used_regs[i])
3298	    fprintf (file, "\tldx\t[%s+%d], %s\n",
3299	      base, offset + 4 * n_regs, reg_names[i]),
3300	    n_regs += 2;
3301	}
3302    }
3303  else
3304    {
3305      for (i = low; i < high; i += 2)
3306	{
3307	  if (regs_ever_live[i] && ! call_used_regs[i])
3308	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3309	      fprintf (file, "\tldd\t[%s+%d], %s\n",
3310		       base, offset + 4 * n_regs, reg_names[i]),
3311	      n_regs += 2;
3312	    else
3313	      fprintf (file, "\tld\t[%s+%d], %s\n",
3314		       base, offset + 4 * n_regs, reg_names[i]),
3315	      n_regs += 2;
3316	  else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3317	    fprintf (file, "\tld\t[%s+%d], %s\n",
3318		     base, offset + 4 * n_regs + 4, reg_names[i+1]),
3319	    n_regs += 2;
3320	}
3321    }
3322  return n_regs;
3323}
3324
3325/* Compute the frame size required by the function.  This function is called
3326   during the reload pass and also by output_function_prologue().  */
3327
3328int
3329compute_frame_size (size, leaf_function)
3330     int size;
3331     int leaf_function;
3332{
3333  int n_regs = 0, i;
3334  int outgoing_args_size = (current_function_outgoing_args_size
3335			    + REG_PARM_STACK_SPACE (current_function_decl));
3336
3337  if (TARGET_EPILOGUE)
3338    {
3339      /* N_REGS is the number of 4-byte regs saved thus far.  This applies
3340	 even to v9 int regs to be consistent with save_regs/restore_regs.  */
3341
3342      if (TARGET_ARCH64)
3343	{
3344	  for (i = 0; i < 8; i++)
3345	    if (regs_ever_live[i] && ! call_used_regs[i])
3346	      n_regs += 2;
3347	}
3348      else
3349	{
3350	  for (i = 0; i < 8; i += 2)
3351	    if ((regs_ever_live[i] && ! call_used_regs[i])
3352		|| (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3353	      n_regs += 2;
3354	}
3355
3356      for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3357	if ((regs_ever_live[i] && ! call_used_regs[i])
3358	    || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3359	  n_regs += 2;
3360    }
3361
3362  /* Set up values for use in `function_epilogue'.  */
3363  num_gfregs = n_regs;
3364
3365  if (leaf_function && n_regs == 0
3366      && size == 0 && current_function_outgoing_args_size == 0)
3367    {
3368      actual_fsize = apparent_fsize = 0;
3369    }
3370  else
3371    {
3372      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3373         The stack bias (if any) is taken out to undo its effects.  */
3374      apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3375      apparent_fsize += n_regs * 4;
3376      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3377    }
3378
3379  /* Make sure nothing can clobber our register windows.
3380     If a SAVE must be done, or there is a stack-local variable,
3381     the register window area must be allocated.
3382     ??? For v8 we apparently need an additional 8 bytes of reserved space.  */
3383  if (leaf_function == 0 || size > 0)
3384    actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3385
3386  return SPARC_STACK_ALIGN (actual_fsize);
3387}
3388
3389/* Build a (32 bit) big number in a register.  */
3390/* ??? We may be able to use the set macro here too.  */
3391
3392static void
3393build_big_number (file, num, reg)
3394     FILE *file;
3395     int num;
3396     const char *reg;
3397{
3398  if (num >= 0 || ! TARGET_ARCH64)
3399    {
3400      fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3401      if ((num & 0x3ff) != 0)
3402	fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3403    }
3404  else /* num < 0 && TARGET_ARCH64 */
3405    {
3406      /* Sethi does not sign extend, so we must use a little trickery
3407	 to use it for negative numbers.  Invert the constant before
3408	 loading it in, then use xor immediate to invert the loaded bits
3409	 (along with the upper 32 bits) to the desired constant.  This
3410	 works because the sethi and immediate fields overlap.  */
3411      int asize = num;
3412      int inv = ~asize;
3413      int low = -0x400 + (asize & 0x3FF);
3414
3415      fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3416	       inv, reg, reg, low, reg);
3417    }
3418}
3419
3420/* Output any necessary .register pseudo-ops.  */
3421void
3422sparc_output_scratch_registers (file)
3423     FILE *file ATTRIBUTE_UNUSED;
3424{
3425#ifdef HAVE_AS_REGISTER_PSEUDO_OP
3426  int i;
3427
3428  if (TARGET_ARCH32)
3429    return;
3430
3431  /* Check if %g[2367] were used without
3432     .register being printed for them already.  */
3433  for (i = 2; i < 8; i++)
3434    {
3435      if (regs_ever_live [i]
3436	  && ! sparc_hard_reg_printed [i])
3437	{
3438	  sparc_hard_reg_printed [i] = 1;
3439	  fprintf (file, "\t.register\t%%g%d, #scratch\n", i);
3440	}
3441      if (i == 3) i = 5;
3442    }
3443#endif
3444}
3445
3446/* This function generates the assembly code for function entry.
3447   FILE is a stdio stream to output the code to.
3448   SIZE is an int: how many units of temporary storage to allocate.
3449   Refer to the array `regs_ever_live' to determine which registers
3450   to save; `regs_ever_live[I]' is nonzero if register number I
3451   is ever used in the function.  This macro is responsible for
3452   knowing which registers should not be saved even if used.  */
3453
3454/* On SPARC, move-double insns between fpu and cpu need an 8-byte block
3455   of memory.  If any fpu reg is used in the function, we allocate
3456   such a block here, at the bottom of the frame, just in case it's needed.
3457
3458   If this function is a leaf procedure, then we may choose not
3459   to do a "save" insn.  The decision about whether or not
3460   to do this is made in regclass.c.  */
3461
3462static void
3463sparc_output_function_prologue (file, size)
3464     FILE *file;
3465     HOST_WIDE_INT size;
3466{
3467  if (TARGET_FLAT)
3468    sparc_flat_function_prologue (file, size);
3469  else
3470    sparc_nonflat_function_prologue (file, size,
3471				     current_function_uses_only_leaf_regs);
3472}
3473
3474/* Output code for the function prologue.  */
3475
3476static void
3477sparc_nonflat_function_prologue (file, size, leaf_function)
3478     FILE *file;
3479     HOST_WIDE_INT size;
3480     int leaf_function;
3481{
3482  sparc_output_scratch_registers (file);
3483
3484  /* Need to use actual_fsize, since we are also allocating
3485     space for our callee (and our own register save area).  */
3486  actual_fsize = compute_frame_size (size, leaf_function);
3487
3488  if (leaf_function)
3489    {
3490      frame_base_name = "%sp";
3491      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3492    }
3493  else
3494    {
3495      frame_base_name = "%fp";
3496      frame_base_offset = SPARC_STACK_BIAS;
3497    }
3498
3499  /* This is only for the human reader.  */
3500  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3501
3502  if (actual_fsize == 0)
3503    /* do nothing.  */ ;
3504  else if (! leaf_function)
3505    {
3506      if (actual_fsize <= 4096)
3507	fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3508      else if (actual_fsize <= 8192)
3509	{
3510	  fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3511	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3512	}
3513      else
3514	{
3515	  build_big_number (file, -actual_fsize, "%g1");
3516	  fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3517	}
3518    }
3519  else /* leaf function */
3520    {
3521      if (actual_fsize <= 4096)
3522	fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3523      else if (actual_fsize <= 8192)
3524	{
3525	  fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3526	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3527	}
3528      else
3529	{
3530	  build_big_number (file, -actual_fsize, "%g1");
3531	  fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3532	}
3533    }
3534
3535  if (dwarf2out_do_frame () && actual_fsize)
3536    {
3537      char *label = dwarf2out_cfi_label ();
3538
3539      /* The canonical frame address refers to the top of the frame.  */
3540      dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3541				 : FRAME_POINTER_REGNUM),
3542			 frame_base_offset);
3543
3544      if (! leaf_function)
3545	{
3546	  /* Note the register window save.  This tells the unwinder that
3547	     it needs to restore the window registers from the previous
3548	     frame's window save area at 0(cfa).  */
3549	  dwarf2out_window_save (label);
3550
3551	  /* The return address (-8) is now in %i7.  */
3552	  dwarf2out_return_reg (label, 31);
3553	}
3554    }
3555
3556  /* If doing anything with PIC, do it now.  */
3557  if (! flag_pic)
3558    fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3559
3560  /* Call saved registers are saved just above the outgoing argument area.  */
3561  if (num_gfregs)
3562    {
3563      int offset, real_offset, n_regs;
3564      const char *base;
3565
3566      real_offset = -apparent_fsize;
3567      offset = -apparent_fsize + frame_base_offset;
3568      if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3569	{
3570	  /* ??? This might be optimized a little as %g1 might already have a
3571	     value close enough that a single add insn will do.  */
3572	  /* ??? Although, all of this is probably only a temporary fix
3573	     because if %g1 can hold a function result, then
3574	     output_function_epilogue will lose (the result will get
3575	     clobbered).  */
3576	  build_big_number (file, offset, "%g1");
3577	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3578	  base = "%g1";
3579	  offset = 0;
3580	}
3581      else
3582	{
3583	  base = frame_base_name;
3584	}
3585
3586      n_regs = 0;
3587      if (TARGET_EPILOGUE && ! leaf_function)
3588	/* ??? Originally saved regs 0-15 here.  */
3589	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3590      else if (leaf_function)
3591	/* ??? Originally saved regs 0-31 here.  */
3592	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3593      if (TARGET_EPILOGUE)
3594	save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3595		   real_offset);
3596    }
3597
3598  leaf_label = 0;
3599  if (leaf_function && actual_fsize != 0)
3600    {
3601      /* warning ("leaf procedure with frame size %d", actual_fsize); */
3602      if (! TARGET_EPILOGUE)
3603	leaf_label = gen_label_rtx ();
3604    }
3605}
3606
3607/* Output code to restore any call saved registers.  */
3608
3609static void
3610output_restore_regs (file, leaf_function)
3611     FILE *file;
3612     int leaf_function;
3613{
3614  int offset, n_regs;
3615  const char *base;
3616
3617  offset = -apparent_fsize + frame_base_offset;
3618  if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3619    {
3620      build_big_number (file, offset, "%g1");
3621      fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3622      base = "%g1";
3623      offset = 0;
3624    }
3625  else
3626    {
3627      base = frame_base_name;
3628    }
3629
3630  n_regs = 0;
3631  if (TARGET_EPILOGUE && ! leaf_function)
3632    /* ??? Originally saved regs 0-15 here.  */
3633    n_regs = restore_regs (file, 0, 8, base, offset, 0);
3634  else if (leaf_function)
3635    /* ??? Originally saved regs 0-31 here.  */
3636    n_regs = restore_regs (file, 0, 8, base, offset, 0);
3637  if (TARGET_EPILOGUE)
3638    restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3639}
3640
3641/* This function generates the assembly code for function exit,
3642   on machines that need it.
3643
3644   The function epilogue should not depend on the current stack pointer!
3645   It should use the frame pointer only.  This is mandatory because
3646   of alloca; we also take advantage of it to omit stack adjustments
3647   before returning.  */
3648
3649static void
3650sparc_output_function_epilogue (file, size)
3651     FILE *file;
3652     HOST_WIDE_INT size;
3653{
3654  if (TARGET_FLAT)
3655    sparc_flat_function_epilogue (file, size);
3656  else
3657    sparc_nonflat_function_epilogue (file, size,
3658				     current_function_uses_only_leaf_regs);
3659}
3660
3661/* Output code for the function epilogue.  */
3662
3663static void
3664sparc_nonflat_function_epilogue (file, size, leaf_function)
3665     FILE *file;
3666     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3667     int leaf_function;
3668{
3669  const char *ret;
3670
3671  if (leaf_label)
3672    {
3673      emit_label_after (leaf_label, get_last_insn ());
3674      final_scan_insn (get_last_insn (), file, 0, 0, 1);
3675    }
3676
3677  if (current_function_epilogue_delay_list == 0)
3678    {
3679      /* If code does not drop into the epilogue, we need
3680	 do nothing except output pending case vectors.  */
3681      rtx insn = get_last_insn ();
3682      if (GET_CODE (insn) == NOTE)
3683      insn = prev_nonnote_insn (insn);
3684      if (insn && GET_CODE (insn) == BARRIER)
3685      goto output_vectors;
3686    }
3687
3688  if (num_gfregs)
3689    output_restore_regs (file, leaf_function);
3690
3691  /* Work out how to skip the caller's unimp instruction if required.  */
3692  if (leaf_function)
3693    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3694  else
3695    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3696
3697  if (TARGET_EPILOGUE || leaf_label)
3698    {
3699      int old_target_epilogue = TARGET_EPILOGUE;
3700      target_flags &= ~old_target_epilogue;
3701
3702      if (! leaf_function)
3703	{
3704	  if (current_function_calls_eh_return)
3705	    {
3706	      if (current_function_epilogue_delay_list)
3707		abort ();
3708	      if (SKIP_CALLERS_UNIMP_P)
3709		abort ();
3710
3711	      fputs ("\trestore\n\tretl\n\tadd\t%sp, %g1, %sp\n", file);
3712	    }
3713	  /* If we wound up with things in our delay slot, flush them here.  */
3714	  else if (current_function_epilogue_delay_list)
3715	    {
3716	      rtx delay = PATTERN (XEXP (current_function_epilogue_delay_list, 0));
3717
3718	      if (TARGET_V9 && ! epilogue_renumber (&delay, 1))
3719		{
3720		  epilogue_renumber (&delay, 0);
3721		  fputs (SKIP_CALLERS_UNIMP_P
3722			 ? "\treturn\t%i7+12\n"
3723			 : "\treturn\t%i7+8\n", file);
3724		  final_scan_insn (XEXP (current_function_epilogue_delay_list, 0), file, 1, 0, 0);
3725		}
3726	      else
3727		{
3728		  rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3729						   get_last_insn ());
3730		  rtx src;
3731
3732		  if (GET_CODE (delay) != SET)
3733		    abort();
3734
3735		  src = SET_SRC (delay);
3736		  if (GET_CODE (src) == ASHIFT)
3737		    {
3738		      if (XEXP (src, 1) != const1_rtx)
3739			abort();
3740		      SET_SRC (delay) = gen_rtx_PLUS (GET_MODE (src), XEXP (src, 0),
3741						      XEXP (src, 0));
3742		    }
3743
3744		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3745					gen_rtvec (2, delay, PATTERN (insn)));
3746		  final_scan_insn (insn, file, 1, 0, 1);
3747		}
3748	    }
3749	  else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3750	    fputs ("\treturn\t%i7+8\n\tnop\n", file);
3751	  else
3752	    fprintf (file, "\t%s\n\trestore\n", ret);
3753	}
3754      else if (current_function_calls_eh_return)
3755	abort ();
3756      /* All of the following cases are for leaf functions.  */
3757      else if (current_function_epilogue_delay_list)
3758	{
3759	  /* eligible_for_epilogue_delay_slot ensures that if this is a
3760	     leaf function, then we will only have insn in the delay slot
3761	     if the frame size is zero, thus no adjust for the stack is
3762	     needed here.  */
3763	  if (actual_fsize != 0)
3764	    abort ();
3765	  fprintf (file, "\t%s\n", ret);
3766	  final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3767			   file, 1, 0, 1);
3768	}
3769      /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3770	 avoid generating confusing assembly language output.  */
3771      else if (actual_fsize == 0)
3772	fprintf (file, "\t%s\n\tnop\n", ret);
3773      else if (actual_fsize <= 4096)
3774	fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3775      else if (actual_fsize <= 8192)
3776	fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3777		 ret, actual_fsize - 4096);
3778      else if ((actual_fsize & 0x3ff) == 0)
3779	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3780		 actual_fsize, ret);
3781      else
3782	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3783		 actual_fsize, actual_fsize, ret);
3784      target_flags |= old_target_epilogue;
3785    }
3786
3787 output_vectors:
3788  sparc_output_deferred_case_vectors ();
3789}
3790
3791/* Output a sibling call.  */
3792
3793const char *
3794output_sibcall (insn, call_operand)
3795     rtx insn, call_operand;
3796{
3797  int leaf_regs = current_function_uses_only_leaf_regs;
3798  rtx operands[3];
3799  int delay_slot = dbr_sequence_length () > 0;
3800
3801  if (num_gfregs)
3802    {
3803      /* Call to restore global regs might clobber
3804	 the delay slot. Instead of checking for this
3805	 output the delay slot now.  */
3806      if (delay_slot)
3807	{
3808	  rtx delay = NEXT_INSN (insn);
3809
3810	  if (! delay)
3811	    abort ();
3812
3813	  final_scan_insn (delay, asm_out_file, 1, 0, 1);
3814	  PATTERN (delay) = gen_blockage ();
3815	  INSN_CODE (delay) = -1;
3816	  delay_slot = 0;
3817	}
3818      output_restore_regs (asm_out_file, leaf_regs);
3819    }
3820
3821  operands[0] = call_operand;
3822
3823  if (leaf_regs)
3824    {
3825#ifdef HAVE_AS_RELAX_OPTION
3826      /* If as and ld are relaxing tail call insns into branch always,
3827	 use or %o7,%g0,X; call Y; or X,%g0,%o7 always, so that it can
3828	 be optimized.  With sethi/jmpl as nor ld has no easy way how to
3829	 find out if somebody does not branch between the sethi and jmpl.  */
3830      int spare_slot = 0;
3831#else
3832      int spare_slot = ((TARGET_ARCH32 || TARGET_CM_MEDLOW) && ! flag_pic);
3833#endif
3834      int size = 0;
3835
3836      if ((actual_fsize || ! spare_slot) && delay_slot)
3837	{
3838	  rtx delay = NEXT_INSN (insn);
3839
3840	  if (! delay)
3841	    abort ();
3842
3843	  final_scan_insn (delay, asm_out_file, 1, 0, 1);
3844	  PATTERN (delay) = gen_blockage ();
3845	  INSN_CODE (delay) = -1;
3846	  delay_slot = 0;
3847	}
3848      if (actual_fsize)
3849	{
3850	  if (actual_fsize <= 4096)
3851	    size = actual_fsize;
3852	  else if (actual_fsize <= 8192)
3853	    {
3854	      fputs ("\tsub\t%sp, -4096, %sp\n", asm_out_file);
3855	      size = actual_fsize - 4096;
3856	    }
3857	  else if ((actual_fsize & 0x3ff) == 0)
3858	    fprintf (asm_out_file,
3859		     "\tsethi\t%%hi(%d), %%g1\n\tadd\t%%sp, %%g1, %%sp\n",
3860		     actual_fsize);
3861	  else
3862	    {
3863	      fprintf (asm_out_file,
3864		       "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n",
3865		       actual_fsize, actual_fsize);
3866	      fputs ("\tadd\t%%sp, %%g1, %%sp\n", asm_out_file);
3867	    }
3868	}
3869      if (spare_slot)
3870	{
3871	  output_asm_insn ("sethi\t%%hi(%a0), %%g1", operands);
3872	  output_asm_insn ("jmpl\t%%g1 + %%lo(%a0), %%g0", operands);
3873	  if (size)
3874	    fprintf (asm_out_file, "\t sub\t%%sp, -%d, %%sp\n", size);
3875	  else if (! delay_slot)
3876	    fputs ("\t nop\n", asm_out_file);
3877	}
3878      else
3879	{
3880	  if (size)
3881	    fprintf (asm_out_file, "\tsub\t%%sp, -%d, %%sp\n", size);
3882	  /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
3883	     it into branch if possible.  */
3884	  output_asm_insn ("or\t%%o7, %%g0, %%g1", operands);
3885	  output_asm_insn ("call\t%a0, 0", operands);
3886	  output_asm_insn (" or\t%%g1, %%g0, %%o7", operands);
3887	}
3888      return "";
3889    }
3890
3891  output_asm_insn ("call\t%a0, 0", operands);
3892  if (delay_slot)
3893    {
3894      rtx delay = NEXT_INSN (insn), pat;
3895
3896      if (! delay)
3897	abort ();
3898
3899      pat = PATTERN (delay);
3900      if (GET_CODE (pat) != SET)
3901	abort ();
3902
3903      operands[0] = SET_DEST (pat);
3904      pat = SET_SRC (pat);
3905      switch (GET_CODE (pat))
3906	{
3907	case PLUS:
3908	  operands[1] = XEXP (pat, 0);
3909	  operands[2] = XEXP (pat, 1);
3910	  output_asm_insn (" restore %r1, %2, %Y0", operands);
3911	  break;
3912	case LO_SUM:
3913	  operands[1] = XEXP (pat, 0);
3914	  operands[2] = XEXP (pat, 1);
3915	  output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
3916	  break;
3917	case ASHIFT:
3918	  operands[1] = XEXP (pat, 0);
3919	  output_asm_insn (" restore %r1, %r1, %Y0", operands);
3920	  break;
3921	default:
3922	  operands[1] = pat;
3923	  output_asm_insn (" restore %%g0, %1, %Y0", operands);
3924	  break;
3925	}
3926      PATTERN (delay) = gen_blockage ();
3927      INSN_CODE (delay) = -1;
3928    }
3929  else
3930    fputs ("\t restore\n", asm_out_file);
3931  return "";
3932}
3933
3934/* Functions for handling argument passing.
3935
3936   For v8 the first six args are normally in registers and the rest are
3937   pushed.  Any arg that starts within the first 6 words is at least
3938   partially passed in a register unless its data type forbids.
3939
3940   For v9, the argument registers are laid out as an array of 16 elements
3941   and arguments are added sequentially.  The first 6 int args and up to the
3942   first 16 fp args (depending on size) are passed in regs.
3943
3944   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
3945   ----    -----   --------   -----   ------------------   ------   -----------
3946    15   [SP+248]              %f31       %f30,%f31         %d30
3947    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
3948    13   [SP+232]              %f27       %f26,%f27         %d26
3949    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
3950    11   [SP+216]              %f23       %f22,%f23         %d22
3951    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
3952     9   [SP+200]              %f19       %f18,%f19         %d18
3953     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
3954     7   [SP+184]              %f15       %f14,%f15         %d14
3955     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
3956     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
3957     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
3958     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
3959     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
3960     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
3961     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
3962
3963   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3964
3965   Integral arguments are always passed as 64 bit quantities appropriately
3966   extended.
3967
3968   Passing of floating point values is handled as follows.
3969   If a prototype is in scope:
3970     If the value is in a named argument (i.e. not a stdarg function or a
3971     value not part of the `...') then the value is passed in the appropriate
3972     fp reg.
3973     If the value is part of the `...' and is passed in one of the first 6
3974     slots then the value is passed in the appropriate int reg.
3975     If the value is part of the `...' and is not passed in one of the first 6
3976     slots then the value is passed in memory.
3977   If a prototype is not in scope:
3978     If the value is one of the first 6 arguments the value is passed in the
3979     appropriate integer reg and the appropriate fp reg.
3980     If the value is not one of the first 6 arguments the value is passed in
3981     the appropriate fp reg and in memory.
3982   */
3983
3984/* Maximum number of int regs for args.  */
3985#define SPARC_INT_ARG_MAX 6
3986/* Maximum number of fp regs for args.  */
3987#define SPARC_FP_ARG_MAX 16
3988
3989#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3990
3991/* Handle the INIT_CUMULATIVE_ARGS macro.
3992   Initialize a variable CUM of type CUMULATIVE_ARGS
3993   for a call to a function whose data type is FNTYPE.
3994   For a library call, FNTYPE is 0.  */
3995
3996void
3997init_cumulative_args (cum, fntype, libname, indirect)
3998     CUMULATIVE_ARGS *cum;
3999     tree fntype;
4000     rtx libname ATTRIBUTE_UNUSED;
4001     int indirect ATTRIBUTE_UNUSED;
4002{
4003  cum->words = 0;
4004  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4005  cum->libcall_p = fntype == 0;
4006}
4007
4008/* Compute the slot number to pass an argument in.
4009   Returns the slot number or -1 if passing on the stack.
4010
4011   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4012    the preceding args and about the function being called.
4013   MODE is the argument's machine mode.
4014   TYPE is the data type of the argument (as a tree).
4015    This is null for libcalls where that information may
4016    not be available.
4017   NAMED is nonzero if this argument is a named parameter
4018    (otherwise it is an extra parameter matching an ellipsis).
4019   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4020   *PREGNO records the register number to use if scalar type.
4021   *PPADDING records the amount of padding needed in words.  */
4022
4023static int
4024function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
4025     const CUMULATIVE_ARGS *cum;
4026     enum machine_mode mode;
4027     tree type;
4028     int named;
4029     int incoming_p;
4030     int *pregno;
4031     int *ppadding;
4032{
4033  int regbase = (incoming_p
4034		 ? SPARC_INCOMING_INT_ARG_FIRST
4035		 : SPARC_OUTGOING_INT_ARG_FIRST);
4036  int slotno = cum->words;
4037  int regno;
4038
4039  *ppadding = 0;
4040
4041  if (type != 0 && TREE_ADDRESSABLE (type))
4042    return -1;
4043  if (TARGET_ARCH32
4044      && type != 0 && mode == BLKmode
4045      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4046    return -1;
4047
4048  switch (mode)
4049    {
4050    case VOIDmode :
4051      /* MODE is VOIDmode when generating the actual call.
4052	 See emit_call_1.  */
4053      return -1;
4054
4055    case QImode : case CQImode :
4056    case HImode : case CHImode :
4057    case SImode : case CSImode :
4058    case DImode : case CDImode :
4059    case TImode : case CTImode :
4060      if (slotno >= SPARC_INT_ARG_MAX)
4061	return -1;
4062      regno = regbase + slotno;
4063      break;
4064
4065    case SFmode : case SCmode :
4066    case DFmode : case DCmode :
4067    case TFmode : case TCmode :
4068      if (TARGET_ARCH32)
4069	{
4070	  if (slotno >= SPARC_INT_ARG_MAX)
4071	    return -1;
4072	  regno = regbase + slotno;
4073	}
4074      else
4075	{
4076	  if ((mode == TFmode || mode == TCmode)
4077	      && (slotno & 1) != 0)
4078	    slotno++, *ppadding = 1;
4079	  if (TARGET_FPU && named)
4080	    {
4081	      if (slotno >= SPARC_FP_ARG_MAX)
4082		return -1;
4083	      regno = SPARC_FP_ARG_FIRST + slotno * 2;
4084	      if (mode == SFmode)
4085		regno++;
4086	    }
4087	  else
4088	    {
4089	      if (slotno >= SPARC_INT_ARG_MAX)
4090		return -1;
4091	      regno = regbase + slotno;
4092	    }
4093	}
4094      break;
4095
4096    case BLKmode :
4097      /* For sparc64, objects requiring 16 byte alignment get it.  */
4098      if (TARGET_ARCH64)
4099	{
4100	  if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
4101	    slotno++, *ppadding = 1;
4102	}
4103
4104      if (TARGET_ARCH32
4105	  || (type && TREE_CODE (type) == UNION_TYPE))
4106	{
4107	  if (slotno >= SPARC_INT_ARG_MAX)
4108	    return -1;
4109	  regno = regbase + slotno;
4110	}
4111      else
4112	{
4113	  tree field;
4114	  int intregs_p = 0, fpregs_p = 0;
4115	  /* The ABI obviously doesn't specify how packed
4116	     structures are passed.  These are defined to be passed
4117	     in int regs if possible, otherwise memory.  */
4118	  int packed_p = 0;
4119
4120	  /* First see what kinds of registers we need.  */
4121	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4122	    {
4123	      if (TREE_CODE (field) == FIELD_DECL)
4124		{
4125		  if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
4126		      && TARGET_FPU)
4127		    fpregs_p = 1;
4128		  else
4129		    intregs_p = 1;
4130		  if (DECL_PACKED (field))
4131		    packed_p = 1;
4132		}
4133	    }
4134	  if (packed_p || !named)
4135	    fpregs_p = 0, intregs_p = 1;
4136
4137	  /* If all arg slots are filled, then must pass on stack.  */
4138	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4139	    return -1;
4140	  /* If there are only int args and all int arg slots are filled,
4141	     then must pass on stack.  */
4142	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4143	    return -1;
4144	  /* Note that even if all int arg slots are filled, fp members may
4145	     still be passed in regs if such regs are available.
4146	     *PREGNO isn't set because there may be more than one, it's up
4147	     to the caller to compute them.  */
4148	  return slotno;
4149	}
4150      break;
4151
4152    default :
4153      abort ();
4154    }
4155
4156  *pregno = regno;
4157  return slotno;
4158}
4159
4160/* Handle recursive register counting for structure field layout.  */
4161
4162struct function_arg_record_value_parms
4163{
4164  rtx ret;
4165  int slotno, named, regbase;
4166  unsigned int nregs;
4167  int intoffset;
4168};
4169
4170static void function_arg_record_value_3
4171	PARAMS ((HOST_WIDE_INT, struct function_arg_record_value_parms *));
4172static void function_arg_record_value_2
4173	PARAMS ((tree, HOST_WIDE_INT,
4174		 struct function_arg_record_value_parms *));
4175static void function_arg_record_value_1
4176        PARAMS ((tree, HOST_WIDE_INT,
4177		 struct function_arg_record_value_parms *));
4178static rtx function_arg_record_value
4179	PARAMS ((tree, enum machine_mode, int, int, int));
4180
4181/* A subroutine of function_arg_record_value.  Traverse the structure
4182   recusively and determine how many registers will be required.  */
4183
4184static void
4185function_arg_record_value_1 (type, startbitpos, parms)
4186     tree type;
4187     HOST_WIDE_INT startbitpos;
4188     struct function_arg_record_value_parms *parms;
4189{
4190  tree field;
4191
4192  /* The ABI obviously doesn't specify how packed structures are
4193     passed.  These are defined to be passed in int regs if possible,
4194     otherwise memory.  */
4195  int packed_p = 0;
4196
4197  /* We need to compute how many registers are needed so we can
4198     allocate the PARALLEL but before we can do that we need to know
4199     whether there are any packed fields.  If there are, int regs are
4200     used regardless of whether there are fp values present.  */
4201  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4202    {
4203      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4204	{
4205	  packed_p = 1;
4206	  break;
4207	}
4208    }
4209
4210  /* Compute how many registers we need.  */
4211  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4212    {
4213      if (TREE_CODE (field) == FIELD_DECL)
4214	{
4215	  HOST_WIDE_INT bitpos = startbitpos;
4216
4217	  if (DECL_SIZE (field) != 0
4218	      && host_integerp (bit_position (field), 1))
4219	    bitpos += int_bit_position (field);
4220
4221	  /* ??? FIXME: else assume zero offset.  */
4222
4223	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4224	    function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
4225	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
4226	           && TARGET_FPU
4227	           && ! packed_p
4228	           && parms->named)
4229	    {
4230	      if (parms->intoffset != -1)
4231		{
4232		  int intslots, this_slotno;
4233
4234		  intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
4235		    / BITS_PER_WORD;
4236		  this_slotno = parms->slotno + parms->intoffset
4237		    / BITS_PER_WORD;
4238
4239		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4240		  intslots = MAX (intslots, 0);
4241		  parms->nregs += intslots;
4242		  parms->intoffset = -1;
4243		}
4244
4245	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
4246		 If it wasn't true we wouldn't be here.  */
4247	      parms->nregs += 1;
4248	    }
4249	  else
4250	    {
4251	      if (parms->intoffset == -1)
4252		parms->intoffset = bitpos;
4253	    }
4254	}
4255    }
4256}
4257
4258/* A subroutine of function_arg_record_value.  Assign the bits of the
4259   structure between parms->intoffset and bitpos to integer registers.  */
4260
4261static void
4262function_arg_record_value_3 (bitpos, parms)
4263     HOST_WIDE_INT bitpos;
4264     struct function_arg_record_value_parms *parms;
4265{
4266  enum machine_mode mode;
4267  unsigned int regno;
4268  unsigned int startbit, endbit;
4269  int this_slotno, intslots, intoffset;
4270  rtx reg;
4271
4272  if (parms->intoffset == -1)
4273    return;
4274
4275  intoffset = parms->intoffset;
4276  parms->intoffset = -1;
4277
4278  startbit = intoffset & -BITS_PER_WORD;
4279  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4280  intslots = (endbit - startbit) / BITS_PER_WORD;
4281  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
4282
4283  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4284  if (intslots <= 0)
4285    return;
4286
4287  /* If this is the trailing part of a word, only load that much into
4288     the register.  Otherwise load the whole register.  Note that in
4289     the latter case we may pick up unwanted bits.  It's not a problem
4290     at the moment but may wish to revisit.  */
4291
4292  if (intoffset % BITS_PER_WORD != 0)
4293    mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
4294			  MODE_INT, 0);
4295  else
4296    mode = word_mode;
4297
4298  intoffset /= BITS_PER_UNIT;
4299  do
4300    {
4301      regno = parms->regbase + this_slotno;
4302      reg = gen_rtx_REG (mode, regno);
4303      XVECEXP (parms->ret, 0, parms->nregs)
4304	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
4305
4306      this_slotno += 1;
4307      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
4308      parms->nregs += 1;
4309      intslots -= 1;
4310    }
4311  while (intslots > 0);
4312}
4313
4314/* A subroutine of function_arg_record_value.  Traverse the structure
4315   recursively and assign bits to floating point registers.  Track which
4316   bits in between need integer registers; invoke function_arg_record_value_3
4317   to make that happen.  */
4318
4319static void
4320function_arg_record_value_2 (type, startbitpos, parms)
4321     tree type;
4322     HOST_WIDE_INT startbitpos;
4323     struct function_arg_record_value_parms *parms;
4324{
4325  tree field;
4326  int packed_p = 0;
4327
4328  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4329    {
4330      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4331	{
4332	  packed_p = 1;
4333	  break;
4334	}
4335    }
4336
4337  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4338    {
4339      if (TREE_CODE (field) == FIELD_DECL)
4340	{
4341	  HOST_WIDE_INT bitpos = startbitpos;
4342
4343	  if (DECL_SIZE (field) != 0
4344	      && host_integerp (bit_position (field), 1))
4345	    bitpos += int_bit_position (field);
4346
4347	  /* ??? FIXME: else assume zero offset.  */
4348
4349	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4350	    function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
4351	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
4352	           && TARGET_FPU
4353	           && ! packed_p
4354	           && parms->named)
4355	    {
4356	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
4357	      rtx reg;
4358
4359	      function_arg_record_value_3 (bitpos, parms);
4360
4361	      reg = gen_rtx_REG (DECL_MODE (field),
4362			         (SPARC_FP_ARG_FIRST + this_slotno * 2
4363			          + (DECL_MODE (field) == SFmode
4364				     && (bitpos & 32) != 0)));
4365	      XVECEXP (parms->ret, 0, parms->nregs)
4366		= gen_rtx_EXPR_LIST (VOIDmode, reg,
4367			   GEN_INT (bitpos / BITS_PER_UNIT));
4368	      parms->nregs += 1;
4369	    }
4370	  else
4371	    {
4372	      if (parms->intoffset == -1)
4373		parms->intoffset = bitpos;
4374	    }
4375	}
4376    }
4377}
4378
4379/* Used by function_arg and function_value to implement the complex
4380   Sparc64 structure calling conventions.  */
4381
4382static rtx
4383function_arg_record_value (type, mode, slotno, named, regbase)
4384     tree type;
4385     enum machine_mode mode;
4386     int slotno, named, regbase;
4387{
4388  HOST_WIDE_INT typesize = int_size_in_bytes (type);
4389  struct function_arg_record_value_parms parms;
4390  unsigned int nregs;
4391
4392  parms.ret = NULL_RTX;
4393  parms.slotno = slotno;
4394  parms.named = named;
4395  parms.regbase = regbase;
4396
4397  /* Compute how many registers we need.  */
4398  parms.nregs = 0;
4399  parms.intoffset = 0;
4400  function_arg_record_value_1 (type, 0, &parms);
4401
4402  if (parms.intoffset != -1)
4403    {
4404      unsigned int startbit, endbit;
4405      int intslots, this_slotno;
4406
4407      startbit = parms.intoffset & -BITS_PER_WORD;
4408      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4409      intslots = (endbit - startbit) / BITS_PER_WORD;
4410      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
4411
4412      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4413      intslots = MAX (intslots, 0);
4414
4415      parms.nregs += intslots;
4416    }
4417  nregs = parms.nregs;
4418
4419  /* Allocate the vector and handle some annoying special cases.  */
4420  if (nregs == 0)
4421    {
4422      /* ??? Empty structure has no value?  Duh?  */
4423      if (typesize <= 0)
4424	{
4425	  /* Though there's nothing really to store, return a word register
4426	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
4427	     leads to breakage due to the fact that there are zero bytes to
4428	     load.  */
4429	  return gen_rtx_REG (mode, regbase);
4430	}
4431      else
4432	{
4433	  /* ??? C++ has structures with no fields, and yet a size.  Give up
4434	     for now and pass everything back in integer registers.  */
4435	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4436	}
4437      if (nregs + slotno > SPARC_INT_ARG_MAX)
4438	nregs = SPARC_INT_ARG_MAX - slotno;
4439    }
4440  if (nregs == 0)
4441    abort ();
4442
4443  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
4444
4445  /* Fill in the entries.  */
4446  parms.nregs = 0;
4447  parms.intoffset = 0;
4448  function_arg_record_value_2 (type, 0, &parms);
4449  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
4450
4451  if (parms.nregs != nregs)
4452    abort ();
4453
4454  return parms.ret;
4455}
4456
4457/* Handle the FUNCTION_ARG macro.
4458   Determine where to put an argument to a function.
4459   Value is zero to push the argument on the stack,
4460   or a hard register in which to store the argument.
4461
4462   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4463    the preceding args and about the function being called.
4464   MODE is the argument's machine mode.
4465   TYPE is the data type of the argument (as a tree).
4466    This is null for libcalls where that information may
4467    not be available.
4468   NAMED is nonzero if this argument is a named parameter
4469    (otherwise it is an extra parameter matching an ellipsis).
4470   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
4471
4472rtx
4473function_arg (cum, mode, type, named, incoming_p)
4474     const CUMULATIVE_ARGS *cum;
4475     enum machine_mode mode;
4476     tree type;
4477     int named;
4478     int incoming_p;
4479{
4480  int regbase = (incoming_p
4481		 ? SPARC_INCOMING_INT_ARG_FIRST
4482		 : SPARC_OUTGOING_INT_ARG_FIRST);
4483  int slotno, regno, padding;
4484  rtx reg;
4485
4486  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
4487				&regno, &padding);
4488
4489  if (slotno == -1)
4490    return 0;
4491
4492  if (TARGET_ARCH32)
4493    {
4494      reg = gen_rtx_REG (mode, regno);
4495      return reg;
4496    }
4497
4498  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
4499     but also have the slot allocated for them.
4500     If no prototype is in scope fp values in register slots get passed
4501     in two places, either fp regs and int regs or fp regs and memory.  */
4502  if ((GET_MODE_CLASS (mode) == MODE_FLOAT
4503       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4504      && SPARC_FP_REG_P (regno))
4505    {
4506      reg = gen_rtx_REG (mode, regno);
4507      if (cum->prototype_p || cum->libcall_p)
4508	{
4509	  /* "* 2" because fp reg numbers are recorded in 4 byte
4510	     quantities.  */
4511#if 0
4512	  /* ??? This will cause the value to be passed in the fp reg and
4513	     in the stack.  When a prototype exists we want to pass the
4514	     value in the reg but reserve space on the stack.  That's an
4515	     optimization, and is deferred [for a bit].  */
4516	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
4517	    return gen_rtx_PARALLEL (mode,
4518			    gen_rtvec (2,
4519				       gen_rtx_EXPR_LIST (VOIDmode,
4520						NULL_RTX, const0_rtx),
4521				       gen_rtx_EXPR_LIST (VOIDmode,
4522						reg, const0_rtx)));
4523	  else
4524#else
4525	  /* ??? It seems that passing back a register even when past
4526	     the area declared by REG_PARM_STACK_SPACE will allocate
4527	     space appropriately, and will not copy the data onto the
4528	     stack, exactly as we desire.
4529
4530	     This is due to locate_and_pad_parm being called in
4531	     expand_call whenever reg_parm_stack_space > 0, which
4532	     while benefical to our example here, would seem to be
4533	     in error from what had been intended.  Ho hum...  -- r~ */
4534#endif
4535	    return reg;
4536	}
4537      else
4538	{
4539	  rtx v0, v1;
4540
4541	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
4542	    {
4543	      int intreg;
4544
4545	      /* On incoming, we don't need to know that the value
4546		 is passed in %f0 and %i0, and it confuses other parts
4547		 causing needless spillage even on the simplest cases.  */
4548	      if (incoming_p)
4549		return reg;
4550
4551	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
4552			+ (regno - SPARC_FP_ARG_FIRST) / 2);
4553
4554	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
4555	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
4556				      const0_rtx);
4557	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
4558	    }
4559	  else
4560	    {
4561	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
4562	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
4563	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
4564	    }
4565	}
4566    }
4567  else if (type && TREE_CODE (type) == RECORD_TYPE)
4568    {
4569      /* Structures up to 16 bytes in size are passed in arg slots on the
4570	 stack and are promoted to registers where possible.  */
4571
4572      if (int_size_in_bytes (type) > 16)
4573	abort (); /* shouldn't get here */
4574
4575      return function_arg_record_value (type, mode, slotno, named, regbase);
4576    }
4577  else if (type && TREE_CODE (type) == UNION_TYPE)
4578    {
4579      enum machine_mode mode;
4580      int bytes = int_size_in_bytes (type);
4581
4582      if (bytes > 16)
4583	abort ();
4584
4585      mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4586      reg = gen_rtx_REG (mode, regno);
4587    }
4588  else
4589    {
4590      /* Scalar or complex int.  */
4591      reg = gen_rtx_REG (mode, regno);
4592    }
4593
4594  return reg;
4595}
4596
4597/* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4598   For an arg passed partly in registers and partly in memory,
4599   this is the number of registers used.
4600   For args passed entirely in registers or entirely in memory, zero.
4601
4602   Any arg that starts in the first 6 regs but won't entirely fit in them
4603   needs partial registers on v8.  On v9, structures with integer
4604   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4605   values that begin in the last fp reg [where "last fp reg" varies with the
4606   mode] will be split between that reg and memory.  */
4607
4608int
4609function_arg_partial_nregs (cum, mode, type, named)
4610     const CUMULATIVE_ARGS *cum;
4611     enum machine_mode mode;
4612     tree type;
4613     int named;
4614{
4615  int slotno, regno, padding;
4616
4617  /* We pass 0 for incoming_p here, it doesn't matter.  */
4618  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4619
4620  if (slotno == -1)
4621    return 0;
4622
4623  if (TARGET_ARCH32)
4624    {
4625      if ((slotno + (mode == BLKmode
4626		     ? ROUND_ADVANCE (int_size_in_bytes (type))
4627		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4628	  > NPARM_REGS (SImode))
4629	return NPARM_REGS (SImode) - slotno;
4630      return 0;
4631    }
4632  else
4633    {
4634      if (type && AGGREGATE_TYPE_P (type))
4635	{
4636	  int size = int_size_in_bytes (type);
4637	  int align = TYPE_ALIGN (type);
4638
4639	  if (align == 16)
4640	    slotno += slotno & 1;
4641	  if (size > 8 && size <= 16
4642	      && slotno == SPARC_INT_ARG_MAX - 1)
4643	    return 1;
4644	}
4645      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4646	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4647		   && ! TARGET_FPU))
4648	{
4649	  if (GET_MODE_ALIGNMENT (mode) == 128)
4650	    {
4651	      slotno += slotno & 1;
4652	      if (slotno == SPARC_INT_ARG_MAX - 2)
4653		return 1;
4654	    }
4655	  else
4656	    {
4657	      if (slotno == SPARC_INT_ARG_MAX - 1)
4658		return 1;
4659	    }
4660	}
4661      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4662	{
4663	  if (GET_MODE_ALIGNMENT (mode) == 128)
4664	    slotno += slotno & 1;
4665	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4666	      > SPARC_FP_ARG_MAX)
4667	    return 1;
4668	}
4669      return 0;
4670    }
4671}
4672
4673/* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4674   !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4675   quad-precision floats by invisible reference.
4676   v9: Aggregates greater than 16 bytes are passed by reference.
4677   For Pascal, also pass arrays by reference.  */
4678
4679int
4680function_arg_pass_by_reference (cum, mode, type, named)
4681     const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4682     enum machine_mode mode;
4683     tree type;
4684     int named ATTRIBUTE_UNUSED;
4685{
4686  if (TARGET_ARCH32)
4687    {
4688      return ((type && AGGREGATE_TYPE_P (type))
4689	      || mode == TFmode || mode == TCmode);
4690    }
4691  else
4692    {
4693      return ((type && TREE_CODE (type) == ARRAY_TYPE)
4694	      /* Consider complex values as aggregates, so care for TCmode.  */
4695	      || GET_MODE_SIZE (mode) > 16
4696	      || (type && AGGREGATE_TYPE_P (type)
4697		  && int_size_in_bytes (type) > 16));
4698    }
4699}
4700
4701/* Handle the FUNCTION_ARG_ADVANCE macro.
4702   Update the data in CUM to advance over an argument
4703   of mode MODE and data type TYPE.
4704   TYPE is null for libcalls where that information may not be available.  */
4705
4706void
4707function_arg_advance (cum, mode, type, named)
4708     CUMULATIVE_ARGS *cum;
4709     enum machine_mode mode;
4710     tree type;
4711     int named;
4712{
4713  int slotno, regno, padding;
4714
4715  /* We pass 0 for incoming_p here, it doesn't matter.  */
4716  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4717
4718  /* If register required leading padding, add it.  */
4719  if (slotno != -1)
4720    cum->words += padding;
4721
4722  if (TARGET_ARCH32)
4723    {
4724      cum->words += (mode != BLKmode
4725		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4726		     : ROUND_ADVANCE (int_size_in_bytes (type)));
4727    }
4728  else
4729    {
4730      if (type && AGGREGATE_TYPE_P (type))
4731	{
4732	  int size = int_size_in_bytes (type);
4733
4734	  if (size <= 8)
4735	    ++cum->words;
4736	  else if (size <= 16)
4737	    cum->words += 2;
4738	  else /* passed by reference */
4739	    ++cum->words;
4740	}
4741      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4742	{
4743	  cum->words += 2;
4744	}
4745      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4746	{
4747	  cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4748	}
4749      else
4750	{
4751	  cum->words += (mode != BLKmode
4752			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4753			 : ROUND_ADVANCE (int_size_in_bytes (type)));
4754	}
4755    }
4756}
4757
4758/* Handle the FUNCTION_ARG_PADDING macro.
4759   For the 64 bit ABI structs are always stored left shifted in their
4760   argument slot.  */
4761
4762enum direction
4763function_arg_padding (mode, type)
4764     enum machine_mode mode;
4765     tree type;
4766{
4767  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4768    return upward;
4769
4770  /* This is the default definition.  */
4771  return (! BYTES_BIG_ENDIAN
4772	  ? upward
4773	  : ((mode == BLKmode
4774	      ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4775		 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4776	      : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4777	     ? downward : upward));
4778}
4779
4780/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4781   For v9, function return values are subject to the same rules as arguments,
4782   except that up to 32-bytes may be returned in registers.  */
4783
4784rtx
4785function_value (type, mode, incoming_p)
4786     tree type;
4787     enum machine_mode mode;
4788     int incoming_p;
4789{
4790  int regno;
4791  int regbase = (incoming_p
4792		 ? SPARC_OUTGOING_INT_ARG_FIRST
4793		 : SPARC_INCOMING_INT_ARG_FIRST);
4794
4795  if (TARGET_ARCH64 && type)
4796    {
4797      if (TREE_CODE (type) == RECORD_TYPE)
4798	{
4799	  /* Structures up to 32 bytes in size are passed in registers,
4800	     promoted to fp registers where possible.  */
4801
4802	  if (int_size_in_bytes (type) > 32)
4803	    abort (); /* shouldn't get here */
4804
4805	  return function_arg_record_value (type, mode, 0, 1, regbase);
4806	}
4807      else if (AGGREGATE_TYPE_P (type))
4808	{
4809	  /* All other aggregate types are passed in an integer register
4810	     in a mode corresponding to the size of the type.  */
4811	  HOST_WIDE_INT bytes = int_size_in_bytes (type);
4812
4813	  if (bytes > 32)
4814	    abort ();
4815
4816	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4817	}
4818    }
4819
4820  if (TARGET_ARCH64
4821      && GET_MODE_CLASS (mode) == MODE_INT
4822      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4823      && type && ! AGGREGATE_TYPE_P (type))
4824    mode = DImode;
4825
4826  if (incoming_p)
4827    regno = BASE_RETURN_VALUE_REG (mode);
4828  else
4829    regno = BASE_OUTGOING_VALUE_REG (mode);
4830
4831  return gen_rtx_REG (mode, regno);
4832}
4833
4834/* Do what is necessary for `va_start'.  We look at the current function
4835   to determine if stdarg or varargs is used and return the address of
4836   the first unnamed parameter.  */
4837
4838rtx
4839sparc_builtin_saveregs ()
4840{
4841  int first_reg = current_function_args_info.words;
4842  rtx address;
4843  int regno;
4844
4845  for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4846    emit_move_insn (gen_rtx_MEM (word_mode,
4847				 gen_rtx_PLUS (Pmode,
4848					       frame_pointer_rtx,
4849					       GEN_INT (STACK_POINTER_OFFSET
4850							+ (UNITS_PER_WORD
4851							   * regno)))),
4852		    gen_rtx_REG (word_mode,
4853				 BASE_INCOMING_ARG_REG (word_mode) + regno));
4854
4855  address = gen_rtx_PLUS (Pmode,
4856			  frame_pointer_rtx,
4857			  GEN_INT (STACK_POINTER_OFFSET
4858				   + UNITS_PER_WORD * first_reg));
4859
4860  return address;
4861}
4862
4863/* Implement `va_start' for varargs and stdarg.  */
4864
4865void
4866sparc_va_start (stdarg_p, valist, nextarg)
4867     int stdarg_p ATTRIBUTE_UNUSED;
4868     tree valist;
4869     rtx nextarg;
4870{
4871  nextarg = expand_builtin_saveregs ();
4872  std_expand_builtin_va_start (1, valist, nextarg);
4873}
4874
4875/* Implement `va_arg'.  */
4876
4877rtx
4878sparc_va_arg (valist, type)
4879     tree valist, type;
4880{
4881  HOST_WIDE_INT size, rsize, align;
4882  tree addr, incr;
4883  rtx addr_rtx;
4884  int indirect = 0;
4885
4886  /* Round up sizeof(type) to a word.  */
4887  size = int_size_in_bytes (type);
4888  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
4889  align = 0;
4890
4891  if (TARGET_ARCH64)
4892    {
4893      if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
4894	align = 2 * UNITS_PER_WORD;
4895
4896      if (AGGREGATE_TYPE_P (type))
4897	{
4898	  if (size > 16)
4899	    {
4900	      indirect = 1;
4901	      size = rsize = UNITS_PER_WORD;
4902	    }
4903	  else
4904	    size = rsize;
4905	}
4906    }
4907  else
4908    {
4909      if (AGGREGATE_TYPE_P (type)
4910	  || TYPE_MODE (type) == TFmode
4911	  || TYPE_MODE (type) == TCmode)
4912	{
4913	  indirect = 1;
4914	  size = rsize = UNITS_PER_WORD;
4915	}
4916    }
4917
4918  incr = valist;
4919  if (align)
4920    {
4921      incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
4922			 build_int_2 (align - 1, 0)));
4923      incr = fold (build (BIT_AND_EXPR, ptr_type_node, incr,
4924			  build_int_2 (-align, -1)));
4925    }
4926
4927  addr = incr = save_expr (incr);
4928  if (BYTES_BIG_ENDIAN && size < rsize)
4929    {
4930      addr = fold (build (PLUS_EXPR, ptr_type_node, incr,
4931			  build_int_2 (rsize - size, 0)));
4932    }
4933  incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
4934		      build_int_2 (rsize, 0)));
4935
4936  incr = build (MODIFY_EXPR, ptr_type_node, valist, incr);
4937  TREE_SIDE_EFFECTS (incr) = 1;
4938  expand_expr (incr, const0_rtx, VOIDmode, EXPAND_NORMAL);
4939
4940  addr_rtx = expand_expr (addr, NULL, Pmode, EXPAND_NORMAL);
4941
4942  /* If the address isn't aligned properly for the type,
4943     we may need to copy to a temporary.
4944     FIXME: This is inefficient.  Usually we can do this
4945     in registers.  */
4946  if (align == 0
4947      && TYPE_ALIGN (type) > BITS_PER_WORD
4948      && !indirect)
4949    {
4950      /* FIXME: We really need to specify that the temporary is live
4951	 for the whole function because expand_builtin_va_arg wants
4952	 the alias set to be get_varargs_alias_set (), but in this
4953	 case the alias set is that for TYPE and if the memory gets
4954	 reused it will be reused with alias set TYPE.  */
4955      rtx tmp = assign_temp (type, 0, 1, 0);
4956      rtx dest_addr;
4957
4958      addr_rtx = force_reg (Pmode, addr_rtx);
4959      addr_rtx = gen_rtx_MEM (BLKmode, addr_rtx);
4960      set_mem_alias_set (addr_rtx, get_varargs_alias_set ());
4961      set_mem_align (addr_rtx, BITS_PER_WORD);
4962      tmp = shallow_copy_rtx (tmp);
4963      PUT_MODE (tmp, BLKmode);
4964      set_mem_alias_set (tmp, 0);
4965
4966      dest_addr = emit_block_move (tmp, addr_rtx, GEN_INT (rsize));
4967      if (dest_addr != NULL_RTX)
4968	addr_rtx = dest_addr;
4969      else
4970	addr_rtx = XCEXP (tmp, 0, MEM);
4971    }
4972
4973  if (indirect)
4974    {
4975      addr_rtx = force_reg (Pmode, addr_rtx);
4976      addr_rtx = gen_rtx_MEM (Pmode, addr_rtx);
4977      set_mem_alias_set (addr_rtx, get_varargs_alias_set ());
4978    }
4979
4980  return addr_rtx;
4981}
4982
4983/* Return the string to output a conditional branch to LABEL, which is
4984   the operand number of the label.  OP is the conditional expression.
4985   XEXP (OP, 0) is assumed to be a condition code register (integer or
4986   floating point) and its mode specifies what kind of comparison we made.
4987
4988   REVERSED is non-zero if we should reverse the sense of the comparison.
4989
4990   ANNUL is non-zero if we should generate an annulling branch.
4991
4992   NOOP is non-zero if we have to follow this branch by a noop.
4993
4994   INSN, if set, is the insn.  */
4995
4996char *
4997output_cbranch (op, label, reversed, annul, noop, insn)
4998     rtx op;
4999     int label;
5000     int reversed, annul, noop;
5001     rtx insn;
5002{
5003  static char string[32];
5004  enum rtx_code code = GET_CODE (op);
5005  rtx cc_reg = XEXP (op, 0);
5006  enum machine_mode mode = GET_MODE (cc_reg);
5007  static char v8_labelno[] = "%lX";
5008  static char v9_icc_labelno[] = "%%icc, %lX";
5009  static char v9_xcc_labelno[] = "%%xcc, %lX";
5010  static char v9_fcc_labelno[] = "%%fccX, %lY";
5011  char *labelno;
5012  const char *branch;
5013  int labeloff, spaces = 8;
5014
5015  if (reversed)
5016    {
5017      /* Reversal of FP compares takes care -- an ordered compare
5018	 becomes an unordered compare and vice versa.  */
5019      if (mode == CCFPmode || mode == CCFPEmode)
5020	code = reverse_condition_maybe_unordered (code);
5021      else
5022	code = reverse_condition (code);
5023    }
5024
5025  /* Start by writing the branch condition.  */
5026  if (mode == CCFPmode || mode == CCFPEmode)
5027    {
5028      switch (code)
5029	{
5030	case NE:
5031	  branch = "fbne";
5032	  break;
5033	case EQ:
5034	  branch = "fbe";
5035	  break;
5036	case GE:
5037	  branch = "fbge";
5038	  break;
5039	case GT:
5040	  branch = "fbg";
5041	  break;
5042	case LE:
5043	  branch = "fble";
5044	  break;
5045	case LT:
5046	  branch = "fbl";
5047	  break;
5048	case UNORDERED:
5049	  branch = "fbu";
5050	  break;
5051	case ORDERED:
5052	  branch = "fbo";
5053	  break;
5054	case UNGT:
5055	  branch = "fbug";
5056	  break;
5057	case UNLT:
5058	  branch = "fbul";
5059	  break;
5060	case UNEQ:
5061	  branch = "fbue";
5062	  break;
5063	case UNGE:
5064	  branch = "fbuge";
5065	  break;
5066	case UNLE:
5067	  branch = "fbule";
5068	  break;
5069	case LTGT:
5070	  branch = "fblg";
5071	  break;
5072
5073	default:
5074	  abort ();
5075	}
5076
5077      /* ??? !v9: FP branches cannot be preceded by another floating point
5078	 insn.  Because there is currently no concept of pre-delay slots,
5079	 we can fix this only by always emitting a nop before a floating
5080	 point branch.  */
5081
5082      string[0] = '\0';
5083      if (! TARGET_V9)
5084	strcpy (string, "nop\n\t");
5085      strcat (string, branch);
5086    }
5087  else
5088    {
5089      switch (code)
5090	{
5091	case NE:
5092	  branch = "bne";
5093	  break;
5094	case EQ:
5095	  branch = "be";
5096	  break;
5097	case GE:
5098	  if (mode == CC_NOOVmode)
5099	    branch = "bpos";
5100	  else
5101	    branch = "bge";
5102	  break;
5103	case GT:
5104	  branch = "bg";
5105	  break;
5106	case LE:
5107	  branch = "ble";
5108	  break;
5109	case LT:
5110	  if (mode == CC_NOOVmode)
5111	    branch = "bneg";
5112	  else
5113	    branch = "bl";
5114	  break;
5115	case GEU:
5116	  branch = "bgeu";
5117	  break;
5118	case GTU:
5119	  branch = "bgu";
5120	  break;
5121	case LEU:
5122	  branch = "bleu";
5123	  break;
5124	case LTU:
5125	  branch = "blu";
5126	  break;
5127
5128	default:
5129	  abort ();
5130	}
5131      strcpy (string, branch);
5132    }
5133  spaces -= strlen (branch);
5134
5135  /* Now add the annulling, the label, and a possible noop.  */
5136  if (annul)
5137    {
5138      strcat (string, ",a");
5139      spaces -= 2;
5140    }
5141
5142  if (! TARGET_V9)
5143    {
5144      labeloff = 2;
5145      labelno = v8_labelno;
5146    }
5147  else
5148    {
5149      rtx note;
5150
5151      if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
5152	{
5153	  strcat (string,
5154		  INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
5155	  spaces -= 3;
5156	}
5157
5158      labeloff = 9;
5159      if (mode == CCFPmode || mode == CCFPEmode)
5160	{
5161	  labeloff = 10;
5162	  labelno = v9_fcc_labelno;
5163	  /* Set the char indicating the number of the fcc reg to use.  */
5164	  labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
5165	}
5166      else if (mode == CCXmode || mode == CCX_NOOVmode)
5167	labelno = v9_xcc_labelno;
5168      else
5169	labelno = v9_icc_labelno;
5170    }
5171  /* Set the char indicating the number of the operand containing the
5172     label_ref.  */
5173  labelno[labeloff] = label + '0';
5174  if (spaces > 0)
5175    strcat (string, "\t");
5176  else
5177    strcat (string, " ");
5178  strcat (string, labelno);
5179
5180  if (noop)
5181    strcat (string, "\n\tnop");
5182
5183  return string;
5184}
5185
5186/* Emit a library call comparison between floating point X and Y.
5187   COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).
5188   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
5189   values as arguments instead of the TFmode registers themselves,
5190   that's why we cannot call emit_float_lib_cmp.  */
5191void
5192sparc_emit_float_lib_cmp (x, y, comparison)
5193     rtx x, y;
5194     enum rtx_code comparison;
5195{
5196  const char *qpfunc;
5197  rtx slot0, slot1, result, tem, tem2;
5198  enum machine_mode mode;
5199
5200  switch (comparison)
5201    {
5202    case EQ:
5203      qpfunc = (TARGET_ARCH64) ? "_Qp_feq" : "_Q_feq";
5204      break;
5205
5206    case NE:
5207      qpfunc = (TARGET_ARCH64) ? "_Qp_fne" : "_Q_fne";
5208      break;
5209
5210    case GT:
5211      qpfunc = (TARGET_ARCH64) ? "_Qp_fgt" : "_Q_fgt";
5212      break;
5213
5214    case GE:
5215      qpfunc = (TARGET_ARCH64) ? "_Qp_fge" : "_Q_fge";
5216      break;
5217
5218    case LT:
5219      qpfunc = (TARGET_ARCH64) ? "_Qp_flt" : "_Q_flt";
5220      break;
5221
5222    case LE:
5223      qpfunc = (TARGET_ARCH64) ? "_Qp_fle" : "_Q_fle";
5224      break;
5225
5226    case ORDERED:
5227    case UNORDERED:
5228    case UNGT:
5229    case UNLT:
5230    case UNEQ:
5231    case UNGE:
5232    case UNLE:
5233    case LTGT:
5234      qpfunc = (TARGET_ARCH64) ? "_Qp_cmp" : "_Q_cmp";
5235      break;
5236
5237    default:
5238      abort();
5239      break;
5240    }
5241
5242  if (TARGET_ARCH64)
5243    {
5244      if (GET_CODE (x) != MEM)
5245	{
5246	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
5247	  emit_insn (gen_rtx_SET (VOIDmode, slot0, x));
5248	}
5249      else
5250	slot0 = x;
5251
5252      if (GET_CODE (y) != MEM)
5253	{
5254	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
5255	  emit_insn (gen_rtx_SET (VOIDmode, slot1, y));
5256	}
5257      else
5258	slot1 = y;
5259
5260      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), 1,
5261			 DImode, 2,
5262			 XEXP (slot0, 0), Pmode,
5263			 XEXP (slot1, 0), Pmode);
5264
5265      mode = DImode;
5266    }
5267  else
5268    {
5269      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), 1,
5270			 SImode, 2,
5271			 x, TFmode, y, TFmode);
5272
5273      mode = SImode;
5274    }
5275
5276
5277  /* Immediately move the result of the libcall into a pseudo
5278     register so reload doesn't clobber the value if it needs
5279     the return register for a spill reg.  */
5280  result = gen_reg_rtx (mode);
5281  emit_move_insn (result, hard_libcall_value (mode));
5282
5283  switch (comparison)
5284    {
5285    default:
5286      emit_cmp_insn (result, const0_rtx, NE, NULL_RTX, mode, 0);
5287      break;
5288    case ORDERED:
5289    case UNORDERED:
5290      emit_cmp_insn (result, GEN_INT(3), comparison == UNORDERED ? EQ : NE,
5291		     NULL_RTX, mode, 0);
5292      break;
5293    case UNGT:
5294    case UNGE:
5295      emit_cmp_insn (result, const1_rtx,
5296		     comparison == UNGT ? GT : NE, NULL_RTX, mode, 0);
5297      break;
5298    case UNLE:
5299      emit_cmp_insn (result, const2_rtx, NE, NULL_RTX, mode, 0);
5300      break;
5301    case UNLT:
5302      tem = gen_reg_rtx (mode);
5303      if (TARGET_ARCH32)
5304	emit_insn (gen_andsi3 (tem, result, const1_rtx));
5305      else
5306	emit_insn (gen_anddi3 (tem, result, const1_rtx));
5307      emit_cmp_insn (tem, const0_rtx, NE, NULL_RTX, mode, 0);
5308      break;
5309    case UNEQ:
5310    case LTGT:
5311      tem = gen_reg_rtx (mode);
5312      if (TARGET_ARCH32)
5313	emit_insn (gen_addsi3 (tem, result, const1_rtx));
5314      else
5315	emit_insn (gen_adddi3 (tem, result, const1_rtx));
5316      tem2 = gen_reg_rtx (mode);
5317      if (TARGET_ARCH32)
5318	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
5319      else
5320	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
5321      emit_cmp_insn (tem2, const0_rtx, comparison == UNEQ ? EQ : NE,
5322		     NULL_RTX, mode, 0);
5323      break;
5324    }
5325}
5326
5327/* Return the string to output a conditional branch to LABEL, testing
5328   register REG.  LABEL is the operand number of the label; REG is the
5329   operand number of the reg.  OP is the conditional expression.  The mode
5330   of REG says what kind of comparison we made.
5331
5332   REVERSED is non-zero if we should reverse the sense of the comparison.
5333
5334   ANNUL is non-zero if we should generate an annulling branch.
5335
5336   NOOP is non-zero if we have to follow this branch by a noop.  */
5337
5338char *
5339output_v9branch (op, reg, label, reversed, annul, noop, insn)
5340     rtx op;
5341     int reg, label;
5342     int reversed, annul, noop;
5343     rtx insn;
5344{
5345  static char string[20];
5346  enum rtx_code code = GET_CODE (op);
5347  enum machine_mode mode = GET_MODE (XEXP (op, 0));
5348  static char labelno[] = "%X, %lX";
5349  rtx note;
5350  int spaces = 8;
5351
5352  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
5353  if (reversed)
5354    code = reverse_condition (code), reversed = 0;
5355
5356  /* Only 64 bit versions of these instructions exist.  */
5357  if (mode != DImode)
5358    abort ();
5359
5360  /* Start by writing the branch condition.  */
5361
5362  switch (code)
5363    {
5364    case NE:
5365      strcpy (string, "brnz");
5366      spaces -= 4;
5367      break;
5368
5369    case EQ:
5370      strcpy (string, "brz");
5371      spaces -= 3;
5372      break;
5373
5374    case GE:
5375      strcpy (string, "brgez");
5376      spaces -= 5;
5377      break;
5378
5379    case LT:
5380      strcpy (string, "brlz");
5381      spaces -= 4;
5382      break;
5383
5384    case LE:
5385      strcpy (string, "brlez");
5386      spaces -= 5;
5387      break;
5388
5389    case GT:
5390      strcpy (string, "brgz");
5391      spaces -= 4;
5392      break;
5393
5394    default:
5395      abort ();
5396    }
5397
5398  /* Now add the annulling, reg, label, and nop.  */
5399  if (annul)
5400    {
5401      strcat (string, ",a");
5402      spaces -= 2;
5403    }
5404
5405  if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
5406    {
5407      strcat (string,
5408	      INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
5409      spaces -= 3;
5410    }
5411
5412  labelno[1] = reg + '0';
5413  labelno[6] = label + '0';
5414  if (spaces > 0)
5415    strcat (string, "\t");
5416  else
5417    strcat (string, " ");
5418  strcat (string, labelno);
5419
5420  if (noop)
5421    strcat (string, "\n\tnop");
5422
5423  return string;
5424}
5425
5426/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
5427   Such instructions cannot be used in the delay slot of return insn on v9.
5428   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
5429 */
5430
5431static int
5432epilogue_renumber (where, test)
5433     register rtx *where;
5434     int test;
5435{
5436  register const char *fmt;
5437  register int i;
5438  register enum rtx_code code;
5439
5440  if (*where == 0)
5441    return 0;
5442
5443  code = GET_CODE (*where);
5444
5445  switch (code)
5446    {
5447    case REG:
5448      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
5449	return 1;
5450      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
5451	*where = gen_rtx (REG, GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
5452    case SCRATCH:
5453    case CC0:
5454    case PC:
5455    case CONST_INT:
5456    case CONST_DOUBLE:
5457      return 0;
5458
5459      /* Do not replace the frame pointer with the stack pointer because
5460	 it can cause the delayed instruction to load below the stack.
5461	 This occurs when instructions like:
5462
5463	 (set (reg/i:SI 24 %i0)
5464	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
5465                       (const_int -20 [0xffffffec])) 0))
5466
5467	 are in the return delayed slot.  */
5468    case PLUS:
5469      if (GET_CODE (XEXP (*where, 0)) == REG
5470	  && REGNO (XEXP (*where, 0)) == FRAME_POINTER_REGNUM
5471	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
5472	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
5473	return 1;
5474      break;
5475
5476    case MEM:
5477      if (SPARC_STACK_BIAS
5478	  && GET_CODE (XEXP (*where, 0)) == REG
5479	  && REGNO (XEXP (*where, 0)) == FRAME_POINTER_REGNUM)
5480	return 1;
5481      break;
5482
5483    default:
5484      break;
5485    }
5486
5487  fmt = GET_RTX_FORMAT (code);
5488
5489  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5490    {
5491      if (fmt[i] == 'E')
5492	{
5493	  register int j;
5494	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
5495	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
5496	      return 1;
5497	}
5498      else if (fmt[i] == 'e'
5499	       && epilogue_renumber (&(XEXP (*where, i)), test))
5500	return 1;
5501    }
5502  return 0;
5503}
5504
5505/* Output assembler code to return from a function.  */
5506
5507const char *
5508output_return (operands)
5509     rtx *operands;
5510{
5511  rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
5512
5513  if (leaf_label)
5514    {
5515      operands[0] = leaf_label;
5516      return "b%* %l0%(";
5517    }
5518  else if (current_function_uses_only_leaf_regs)
5519    {
5520      /* No delay slot in a leaf function.  */
5521      if (delay)
5522	abort ();
5523
5524      /* If we didn't allocate a frame pointer for the current function,
5525	 the stack pointer might have been adjusted.  Output code to
5526	 restore it now.  */
5527
5528      operands[0] = GEN_INT (actual_fsize);
5529
5530      /* Use sub of negated value in first two cases instead of add to
5531	 allow actual_fsize == 4096.  */
5532
5533      if (actual_fsize <= 4096)
5534	{
5535	  if (SKIP_CALLERS_UNIMP_P)
5536	    return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
5537	  else
5538	    return "retl\n\tsub\t%%sp, -%0, %%sp";
5539	}
5540      else if (actual_fsize <= 8192)
5541	{
5542	  operands[0] = GEN_INT (actual_fsize - 4096);
5543	  if (SKIP_CALLERS_UNIMP_P)
5544	    return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
5545	  else
5546	    return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
5547	}
5548      else if (SKIP_CALLERS_UNIMP_P)
5549	{
5550	  if ((actual_fsize & 0x3ff) != 0)
5551	    return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
5552	  else
5553	    return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
5554	}
5555      else
5556	{
5557	  if ((actual_fsize & 0x3ff) != 0)
5558	    return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tretl\n\tadd\t%%sp, %%g1, %%sp";
5559	  else
5560	    return "sethi\t%%hi(%a0), %%g1\n\tretl\n\tadd\t%%sp, %%g1, %%sp";
5561	}
5562    }
5563  else if (TARGET_V9)
5564    {
5565      if (delay)
5566	{
5567	  epilogue_renumber (&SET_DEST (PATTERN (delay)), 0);
5568	  epilogue_renumber (&SET_SRC (PATTERN (delay)), 0);
5569	}
5570      if (SKIP_CALLERS_UNIMP_P)
5571	return "return\t%%i7+12%#";
5572      else
5573	return "return\t%%i7+8%#";
5574    }
5575  else
5576    {
5577      if (delay)
5578	abort ();
5579      if (SKIP_CALLERS_UNIMP_P)
5580	return "jmp\t%%i7+12\n\trestore";
5581      else
5582	return "ret\n\trestore";
5583    }
5584}
5585
5586/* Leaf functions and non-leaf functions have different needs.  */
5587
5588static const int
5589reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
5590
5591static const int
5592reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
5593
5594static const int *const reg_alloc_orders[] = {
5595  reg_leaf_alloc_order,
5596  reg_nonleaf_alloc_order};
5597
5598void
5599order_regs_for_local_alloc ()
5600{
5601  static int last_order_nonleaf = 1;
5602
5603  if (regs_ever_live[15] != last_order_nonleaf)
5604    {
5605      last_order_nonleaf = !last_order_nonleaf;
5606      memcpy ((char *) reg_alloc_order,
5607	      (const char *) reg_alloc_orders[last_order_nonleaf],
5608	      FIRST_PSEUDO_REGISTER * sizeof (int));
5609    }
5610}
5611
5612/* Return 1 if REG and MEM are legitimate enough to allow the various
5613   mem<-->reg splits to be run.  */
5614
5615int
5616sparc_splitdi_legitimate (reg, mem)
5617     rtx reg;
5618     rtx mem;
5619{
5620  /* Punt if we are here by mistake.  */
5621  if (! reload_completed)
5622    abort ();
5623
5624  /* We must have an offsettable memory reference.  */
5625  if (! offsettable_memref_p (mem))
5626    return 0;
5627
5628  /* If we have legitimate args for ldd/std, we do not want
5629     the split to happen.  */
5630  if ((REGNO (reg) % 2) == 0
5631      && mem_min_alignment (mem, 8))
5632    return 0;
5633
5634  /* Success.  */
5635  return 1;
5636}
5637
5638/* Return 1 if x and y are some kind of REG and they refer to
5639   different hard registers.  This test is guarenteed to be
5640   run after reload.  */
5641
5642int
5643sparc_absnegfloat_split_legitimate (x, y)
5644     rtx x, y;
5645{
5646  if (GET_CODE (x) != REG)
5647    return 0;
5648  if (GET_CODE (y) != REG)
5649    return 0;
5650  if (REGNO (x) == REGNO (y))
5651    return 0;
5652  return 1;
5653}
5654
5655/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
5656   This makes them candidates for using ldd and std insns.
5657
5658   Note reg1 and reg2 *must* be hard registers.  */
5659
5660int
5661registers_ok_for_ldd_peep (reg1, reg2)
5662     rtx reg1, reg2;
5663{
5664  /* We might have been passed a SUBREG.  */
5665  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
5666    return 0;
5667
5668  if (REGNO (reg1) % 2 != 0)
5669    return 0;
5670
5671  /* Integer ldd is deprecated in SPARC V9 */
5672  if (TARGET_V9 && REGNO (reg1) < 32)
5673    return 0;
5674
5675  return (REGNO (reg1) == REGNO (reg2) - 1);
5676}
5677
5678/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
5679   an ldd or std insn.
5680
5681   This can only happen when addr1 and addr2, the addresses in mem1
5682   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
5683   addr1 must also be aligned on a 64-bit boundary.
5684
5685   Also iff dependent_reg_rtx is not null it should not be used to
5686   compute the address for mem1, i.e. we cannot optimize a sequence
5687   like:
5688   	ld [%o0], %o0
5689	ld [%o0 + 4], %o1
5690   to
5691   	ldd [%o0], %o0
5692   For stores we don't have a similar problem, so dependent_reg_rtx is
5693   NULL_RTX.  */
5694
5695int
5696mems_ok_for_ldd_peep (mem1, mem2, dependent_reg_rtx)
5697      rtx mem1, mem2, dependent_reg_rtx;
5698{
5699  rtx addr1, addr2;
5700  unsigned int reg1;
5701  int offset1;
5702
5703  /* The mems cannot be volatile.  */
5704  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
5705    return 0;
5706
5707  /* MEM1 should be aligned on a 64-bit boundary.  */
5708  if (MEM_ALIGN (mem1) < 64)
5709    return 0;
5710
5711  addr1 = XEXP (mem1, 0);
5712  addr2 = XEXP (mem2, 0);
5713
5714  /* Extract a register number and offset (if used) from the first addr.  */
5715  if (GET_CODE (addr1) == PLUS)
5716    {
5717      /* If not a REG, return zero.  */
5718      if (GET_CODE (XEXP (addr1, 0)) != REG)
5719	return 0;
5720      else
5721	{
5722          reg1 = REGNO (XEXP (addr1, 0));
5723	  /* The offset must be constant!  */
5724	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
5725            return 0;
5726          offset1 = INTVAL (XEXP (addr1, 1));
5727	}
5728    }
5729  else if (GET_CODE (addr1) != REG)
5730    return 0;
5731  else
5732    {
5733      reg1 = REGNO (addr1);
5734      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
5735      offset1 = 0;
5736    }
5737
5738  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
5739  if (GET_CODE (addr2) != PLUS)
5740    return 0;
5741
5742  if (GET_CODE (XEXP (addr2, 0)) != REG
5743      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
5744    return 0;
5745
5746  if (reg1 != REGNO (XEXP (addr2, 0)))
5747    return 0;
5748
5749  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
5750    return 0;
5751
5752  /* The first offset must be evenly divisible by 8 to ensure the
5753     address is 64 bit aligned.  */
5754  if (offset1 % 8 != 0)
5755    return 0;
5756
5757  /* The offset for the second addr must be 4 more than the first addr.  */
5758  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
5759    return 0;
5760
5761  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
5762     instructions.  */
5763  return 1;
5764}
5765
5766/* Return 1 if reg is a pseudo, or is the first register in
5767   a hard register pair.  This makes it a candidate for use in
5768   ldd and std insns.  */
5769
5770int
5771register_ok_for_ldd (reg)
5772     rtx reg;
5773{
5774  /* We might have been passed a SUBREG.  */
5775  if (GET_CODE (reg) != REG)
5776    return 0;
5777
5778  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
5779    return (REGNO (reg) % 2 == 0);
5780  else
5781    return 1;
5782}
5783
5784/* Print operand X (an rtx) in assembler syntax to file FILE.
5785   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5786   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5787
5788void
5789print_operand (file, x, code)
5790     FILE *file;
5791     rtx x;
5792     int code;
5793{
5794  switch (code)
5795    {
5796    case '#':
5797      /* Output a 'nop' if there's nothing for the delay slot.  */
5798      if (dbr_sequence_length () == 0)
5799	fputs ("\n\t nop", file);
5800      return;
5801    case '*':
5802      /* Output an annul flag if there's nothing for the delay slot and we
5803	 are optimizing.  This is always used with '(' below.  */
5804      /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
5805	 this is a dbx bug.  So, we only do this when optimizing.  */
5806      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
5807	 Always emit a nop in case the next instruction is a branch.  */
5808      if (dbr_sequence_length () == 0
5809	  && (optimize && (int)sparc_cpu < PROCESSOR_V9))
5810	fputs (",a", file);
5811      return;
5812    case '(':
5813      /* Output a 'nop' if there's nothing for the delay slot and we are
5814	 not optimizing.  This is always used with '*' above.  */
5815      if (dbr_sequence_length () == 0
5816	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
5817	fputs ("\n\t nop", file);
5818      return;
5819    case '_':
5820      /* Output the Embedded Medium/Anywhere code model base register.  */
5821      fputs (EMBMEDANY_BASE_REG, file);
5822      return;
5823    case '@':
5824      /* Print out what we are using as the frame pointer.  This might
5825	 be %fp, or might be %sp+offset.  */
5826      /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
5827      fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
5828      return;
5829    case 'Y':
5830      /* Adjust the operand to take into account a RESTORE operation.  */
5831      if (GET_CODE (x) == CONST_INT)
5832	break;
5833      else if (GET_CODE (x) != REG)
5834	output_operand_lossage ("invalid %%Y operand");
5835      else if (REGNO (x) < 8)
5836	fputs (reg_names[REGNO (x)], file);
5837      else if (REGNO (x) >= 24 && REGNO (x) < 32)
5838	fputs (reg_names[REGNO (x)-16], file);
5839      else
5840	output_operand_lossage ("invalid %%Y operand");
5841      return;
5842    case 'L':
5843      /* Print out the low order register name of a register pair.  */
5844      if (WORDS_BIG_ENDIAN)
5845	fputs (reg_names[REGNO (x)+1], file);
5846      else
5847	fputs (reg_names[REGNO (x)], file);
5848      return;
5849    case 'H':
5850      /* Print out the high order register name of a register pair.  */
5851      if (WORDS_BIG_ENDIAN)
5852	fputs (reg_names[REGNO (x)], file);
5853      else
5854	fputs (reg_names[REGNO (x)+1], file);
5855      return;
5856    case 'R':
5857      /* Print out the second register name of a register pair or quad.
5858	 I.e., R (%o0) => %o1.  */
5859      fputs (reg_names[REGNO (x)+1], file);
5860      return;
5861    case 'S':
5862      /* Print out the third register name of a register quad.
5863	 I.e., S (%o0) => %o2.  */
5864      fputs (reg_names[REGNO (x)+2], file);
5865      return;
5866    case 'T':
5867      /* Print out the fourth register name of a register quad.
5868	 I.e., T (%o0) => %o3.  */
5869      fputs (reg_names[REGNO (x)+3], file);
5870      return;
5871    case 'x':
5872      /* Print a condition code register.  */
5873      if (REGNO (x) == SPARC_ICC_REG)
5874	{
5875	  /* We don't handle CC[X]_NOOVmode because they're not supposed
5876	     to occur here.  */
5877	  if (GET_MODE (x) == CCmode)
5878	    fputs ("%icc", file);
5879	  else if (GET_MODE (x) == CCXmode)
5880	    fputs ("%xcc", file);
5881	  else
5882	    abort ();
5883	}
5884      else
5885	/* %fccN register */
5886	fputs (reg_names[REGNO (x)], file);
5887      return;
5888    case 'm':
5889      /* Print the operand's address only.  */
5890      output_address (XEXP (x, 0));
5891      return;
5892    case 'r':
5893      /* In this case we need a register.  Use %g0 if the
5894	 operand is const0_rtx.  */
5895      if (x == const0_rtx
5896	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5897	{
5898	  fputs ("%g0", file);
5899	  return;
5900	}
5901      else
5902	break;
5903
5904    case 'A':
5905      switch (GET_CODE (x))
5906	{
5907	case IOR: fputs ("or", file); break;
5908	case AND: fputs ("and", file); break;
5909	case XOR: fputs ("xor", file); break;
5910	default: output_operand_lossage ("invalid %%A operand");
5911	}
5912      return;
5913
5914    case 'B':
5915      switch (GET_CODE (x))
5916	{
5917	case IOR: fputs ("orn", file); break;
5918	case AND: fputs ("andn", file); break;
5919	case XOR: fputs ("xnor", file); break;
5920	default: output_operand_lossage ("invalid %%B operand");
5921	}
5922      return;
5923
5924      /* These are used by the conditional move instructions.  */
5925    case 'c' :
5926    case 'C':
5927      {
5928	enum rtx_code rc = GET_CODE (x);
5929
5930	if (code == 'c')
5931	  {
5932	    enum machine_mode mode = GET_MODE (XEXP (x, 0));
5933	    if (mode == CCFPmode || mode == CCFPEmode)
5934	      rc = reverse_condition_maybe_unordered (GET_CODE (x));
5935	    else
5936	      rc = reverse_condition (GET_CODE (x));
5937	  }
5938	switch (rc)
5939	  {
5940	  case NE: fputs ("ne", file); break;
5941	  case EQ: fputs ("e", file); break;
5942	  case GE: fputs ("ge", file); break;
5943	  case GT: fputs ("g", file); break;
5944	  case LE: fputs ("le", file); break;
5945	  case LT: fputs ("l", file); break;
5946	  case GEU: fputs ("geu", file); break;
5947	  case GTU: fputs ("gu", file); break;
5948	  case LEU: fputs ("leu", file); break;
5949	  case LTU: fputs ("lu", file); break;
5950	  case LTGT: fputs ("lg", file); break;
5951	  case UNORDERED: fputs ("u", file); break;
5952	  case ORDERED: fputs ("o", file); break;
5953	  case UNLT: fputs ("ul", file); break;
5954	  case UNLE: fputs ("ule", file); break;
5955	  case UNGT: fputs ("ug", file); break;
5956	  case UNGE: fputs ("uge", file); break;
5957	  case UNEQ: fputs ("ue", file); break;
5958	  default: output_operand_lossage (code == 'c'
5959					   ? "invalid %%c operand"
5960					   : "invalid %%C operand");
5961	  }
5962	return;
5963      }
5964
5965      /* These are used by the movr instruction pattern.  */
5966    case 'd':
5967    case 'D':
5968      {
5969	enum rtx_code rc = (code == 'd'
5970			    ? reverse_condition (GET_CODE (x))
5971			    : GET_CODE (x));
5972	switch (rc)
5973	  {
5974	  case NE: fputs ("ne", file); break;
5975	  case EQ: fputs ("e", file); break;
5976	  case GE: fputs ("gez", file); break;
5977	  case LT: fputs ("lz", file); break;
5978	  case LE: fputs ("lez", file); break;
5979	  case GT: fputs ("gz", file); break;
5980	  default: output_operand_lossage (code == 'd'
5981					   ? "invalid %%d operand"
5982					   : "invalid %%D operand");
5983	  }
5984	return;
5985      }
5986
5987    case 'b':
5988      {
5989	/* Print a sign-extended character.  */
5990	int i = INTVAL (x) & 0xff;
5991	if (i & 0x80)
5992	  i |= 0xffffff00;
5993	fprintf (file, "%d", i);
5994	return;
5995      }
5996
5997    case 'f':
5998      /* Operand must be a MEM; write its address.  */
5999      if (GET_CODE (x) != MEM)
6000	output_operand_lossage ("invalid %%f operand");
6001      output_address (XEXP (x, 0));
6002      return;
6003
6004    case 0:
6005      /* Do nothing special.  */
6006      break;
6007
6008    default:
6009      /* Undocumented flag.  */
6010      output_operand_lossage ("invalid operand output code");
6011    }
6012
6013  if (GET_CODE (x) == REG)
6014    fputs (reg_names[REGNO (x)], file);
6015  else if (GET_CODE (x) == MEM)
6016    {
6017      fputc ('[', file);
6018	/* Poor Sun assembler doesn't understand absolute addressing.  */
6019      if (CONSTANT_P (XEXP (x, 0)))
6020	fputs ("%g0+", file);
6021      output_address (XEXP (x, 0));
6022      fputc (']', file);
6023    }
6024  else if (GET_CODE (x) == HIGH)
6025    {
6026      fputs ("%hi(", file);
6027      output_addr_const (file, XEXP (x, 0));
6028      fputc (')', file);
6029    }
6030  else if (GET_CODE (x) == LO_SUM)
6031    {
6032      print_operand (file, XEXP (x, 0), 0);
6033      if (TARGET_CM_MEDMID)
6034	fputs ("+%l44(", file);
6035      else
6036	fputs ("+%lo(", file);
6037      output_addr_const (file, XEXP (x, 1));
6038      fputc (')', file);
6039    }
6040  else if (GET_CODE (x) == CONST_DOUBLE
6041	   && (GET_MODE (x) == VOIDmode
6042	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
6043    {
6044      if (CONST_DOUBLE_HIGH (x) == 0)
6045	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
6046      else if (CONST_DOUBLE_HIGH (x) == -1
6047	       && CONST_DOUBLE_LOW (x) < 0)
6048	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
6049      else
6050	output_operand_lossage ("long long constant not a valid immediate operand");
6051    }
6052  else if (GET_CODE (x) == CONST_DOUBLE)
6053    output_operand_lossage ("floating point constant not a valid immediate operand");
6054  else { output_addr_const (file, x); }
6055}
6056
6057/* Target hook for assembling integer objects.  The sparc version has
6058   special handling for aligned DI-mode objects.  */
6059
6060static bool
6061sparc_assemble_integer (x, size, aligned_p)
6062     rtx x;
6063     unsigned int size;
6064     int aligned_p;
6065{
6066  /* ??? We only output .xword's for symbols and only then in environments
6067     where the assembler can handle them.  */
6068  if (aligned_p && size == 8
6069      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
6070    {
6071      if (TARGET_V9)
6072	{
6073	  assemble_integer_with_op ("\t.xword\t", x);
6074	  return true;
6075	}
6076      else
6077	{
6078	  assemble_aligned_integer (4, const0_rtx);
6079	  assemble_aligned_integer (4, x);
6080	  return true;
6081	}
6082    }
6083  return default_assemble_integer (x, size, aligned_p);
6084}
6085
6086/* Return the value of a code used in the .proc pseudo-op that says
6087   what kind of result this function returns.  For non-C types, we pick
6088   the closest C type.  */
6089
6090#ifndef CHAR_TYPE_SIZE
6091#define CHAR_TYPE_SIZE BITS_PER_UNIT
6092#endif
6093
6094#ifndef SHORT_TYPE_SIZE
6095#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
6096#endif
6097
6098#ifndef INT_TYPE_SIZE
6099#define INT_TYPE_SIZE BITS_PER_WORD
6100#endif
6101
6102#ifndef LONG_TYPE_SIZE
6103#define LONG_TYPE_SIZE BITS_PER_WORD
6104#endif
6105
6106#ifndef LONG_LONG_TYPE_SIZE
6107#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
6108#endif
6109
6110#ifndef FLOAT_TYPE_SIZE
6111#define FLOAT_TYPE_SIZE BITS_PER_WORD
6112#endif
6113
6114#ifndef DOUBLE_TYPE_SIZE
6115#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
6116#endif
6117
6118#ifndef LONG_DOUBLE_TYPE_SIZE
6119#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
6120#endif
6121
6122unsigned long
6123sparc_type_code (type)
6124     register tree type;
6125{
6126  register unsigned long qualifiers = 0;
6127  register unsigned shift;
6128
6129  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
6130     setting more, since some assemblers will give an error for this.  Also,
6131     we must be careful to avoid shifts of 32 bits or more to avoid getting
6132     unpredictable results.  */
6133
6134  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
6135    {
6136      switch (TREE_CODE (type))
6137	{
6138	case ERROR_MARK:
6139	  return qualifiers;
6140
6141	case ARRAY_TYPE:
6142	  qualifiers |= (3 << shift);
6143	  break;
6144
6145	case FUNCTION_TYPE:
6146	case METHOD_TYPE:
6147	  qualifiers |= (2 << shift);
6148	  break;
6149
6150	case POINTER_TYPE:
6151	case REFERENCE_TYPE:
6152	case OFFSET_TYPE:
6153	  qualifiers |= (1 << shift);
6154	  break;
6155
6156	case RECORD_TYPE:
6157	  return (qualifiers | 8);
6158
6159	case UNION_TYPE:
6160	case QUAL_UNION_TYPE:
6161	  return (qualifiers | 9);
6162
6163	case ENUMERAL_TYPE:
6164	  return (qualifiers | 10);
6165
6166	case VOID_TYPE:
6167	  return (qualifiers | 16);
6168
6169	case INTEGER_TYPE:
6170	  /* If this is a range type, consider it to be the underlying
6171	     type.  */
6172	  if (TREE_TYPE (type) != 0)
6173	    break;
6174
6175	  /* Carefully distinguish all the standard types of C,
6176	     without messing up if the language is not C.  We do this by
6177	     testing TYPE_PRECISION and TREE_UNSIGNED.  The old code used to
6178	     look at both the names and the above fields, but that's redundant.
6179	     Any type whose size is between two C types will be considered
6180	     to be the wider of the two types.  Also, we do not have a
6181	     special code to use for "long long", so anything wider than
6182	     long is treated the same.  Note that we can't distinguish
6183	     between "int" and "long" in this code if they are the same
6184	     size, but that's fine, since neither can the assembler.  */
6185
6186	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
6187	    return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
6188
6189	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
6190	    return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
6191
6192	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
6193	    return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
6194
6195	  else
6196	    return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
6197
6198	case REAL_TYPE:
6199	  /* If this is a range type, consider it to be the underlying
6200	     type.  */
6201	  if (TREE_TYPE (type) != 0)
6202	    break;
6203
6204	  /* Carefully distinguish all the standard types of C,
6205	     without messing up if the language is not C.  */
6206
6207	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
6208	    return (qualifiers | 6);
6209
6210	  else
6211	    return (qualifiers | 7);
6212
6213	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
6214	  /* ??? We need to distinguish between double and float complex types,
6215	     but I don't know how yet because I can't reach this code from
6216	     existing front-ends.  */
6217	  return (qualifiers | 7);	/* Who knows? */
6218
6219	case CHAR_TYPE:		/* GNU Pascal CHAR type.  Not used in C.  */
6220	case BOOLEAN_TYPE:	/* GNU Fortran BOOLEAN type.  */
6221	case FILE_TYPE:		/* GNU Pascal FILE type.  */
6222	case SET_TYPE:		/* GNU Pascal SET type.  */
6223	case LANG_TYPE:		/* ? */
6224	  return qualifiers;
6225
6226	default:
6227	  abort ();		/* Not a type! */
6228        }
6229    }
6230
6231  return qualifiers;
6232}
6233
6234/* Nested function support.  */
6235
6236/* Emit RTL insns to initialize the variable parts of a trampoline.
6237   FNADDR is an RTX for the address of the function's pure code.
6238   CXT is an RTX for the static chain value for the function.
6239
6240   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
6241   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
6242   (to store insns).  This is a bit excessive.  Perhaps a different
6243   mechanism would be better here.
6244
6245   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
6246
6247void
6248sparc_initialize_trampoline (tramp, fnaddr, cxt)
6249     rtx tramp, fnaddr, cxt;
6250{
6251  /* SPARC 32 bit trampoline:
6252
6253 	sethi	%hi(fn), %g1
6254 	sethi	%hi(static), %g2
6255 	jmp	%g1+%lo(fn)
6256 	or	%g2, %lo(static), %g2
6257
6258    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
6259    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
6260   */
6261#ifdef TRANSFER_FROM_TRAMPOLINE
6262  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
6263                     0, VOIDmode, 1, tramp, Pmode);
6264#endif
6265
6266  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
6267		  expand_binop (SImode, ior_optab,
6268				expand_shift (RSHIFT_EXPR, SImode, fnaddr,
6269					      size_int (10), 0, 1),
6270				GEN_INT (0x03000000),
6271				NULL_RTX, 1, OPTAB_DIRECT));
6272
6273  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
6274		  expand_binop (SImode, ior_optab,
6275				expand_shift (RSHIFT_EXPR, SImode, cxt,
6276					      size_int (10), 0, 1),
6277				GEN_INT (0x05000000),
6278				NULL_RTX, 1, OPTAB_DIRECT));
6279
6280  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
6281		  expand_binop (SImode, ior_optab,
6282				expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
6283				GEN_INT (0x81c06000),
6284				NULL_RTX, 1, OPTAB_DIRECT));
6285
6286  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
6287		  expand_binop (SImode, ior_optab,
6288				expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
6289				GEN_INT (0x8410a000),
6290				NULL_RTX, 1, OPTAB_DIRECT));
6291
6292  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
6293  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
6294     aligned on a 16 byte boundary so one flush clears it all.  */
6295  if (sparc_cpu != PROCESSOR_ULTRASPARC)
6296    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
6297						     plus_constant (tramp, 8)))));
6298}
6299
6300/* The 64 bit version is simpler because it makes more sense to load the
6301   values as "immediate" data out of the trampoline.  It's also easier since
6302   we can read the PC without clobbering a register.  */
6303
6304void
6305sparc64_initialize_trampoline (tramp, fnaddr, cxt)
6306     rtx tramp, fnaddr, cxt;
6307{
6308#ifdef TRANSFER_FROM_TRAMPOLINE
6309  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
6310                     0, VOIDmode, 1, tramp, Pmode);
6311#endif
6312
6313  /*
6314	rd	%pc, %g1
6315	ldx	[%g1+24], %g5
6316	jmp	%g5
6317	ldx	[%g1+16], %g5
6318	+16 bytes data
6319   */
6320
6321  emit_move_insn (gen_rtx_MEM (SImode, tramp),
6322		  GEN_INT (0x83414000));
6323  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
6324		  GEN_INT (0xca586018));
6325  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
6326		  GEN_INT (0x81c14000));
6327  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
6328		  GEN_INT (0xca586010));
6329  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
6330  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
6331  emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
6332
6333  if (sparc_cpu != PROCESSOR_ULTRASPARC)
6334    emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
6335}
6336
6337/* Subroutines to support a flat (single) register window calling
6338   convention.  */
6339
6340/* Single-register window sparc stack frames look like:
6341
6342             Before call		        After call
6343        +-----------------------+	+-----------------------+
6344   high |		        |	|			|
6345   mem  |  caller's temps.    	|       |  caller's temps.    	|
6346	|       		|       |       	        |
6347        +-----------------------+	+-----------------------+
6348 	|       		|	|		        |
6349        |  arguments on stack.  |	|  arguments on stack.  |
6350	|       		|      	|			|
6351        +-----------------------+FP+92->+-----------------------+
6352 	|  6 words to save     	|	|  6 words to save	|
6353	|  arguments passed	|	|  arguments passed	|
6354	|  in registers, even	|	|  in registers, even	|
6355       	|  if not passed.       |      	|  if not passed.	|
6356 SP+68->+-----------------------+FP+68->+-----------------------+
6357        | 1 word struct addr	|      	| 1 word struct addr	|
6358        +-----------------------+FP+64->+-----------------------+
6359        |			|	|			|
6360        | 16 word reg save area	|	| 16 word reg save area |
6361       	|                       |      	|			|
6362    SP->+-----------------------+   FP->+-----------------------+
6363				        | 4 word area for	|
6364				       	| fp/alu reg moves	|
6365				 FP-16->+-----------------------+
6366				        |			|
6367				        |  local variables	|
6368				        |			|
6369				        +-----------------------+
6370				        |		        |
6371                                        |  fp register save     |
6372				        |			|
6373				        +-----------------------+
6374				        |		        |
6375                                        |  gp register save     |
6376                                        |       		|
6377				        +-----------------------+
6378				        |			|
6379                                        |  alloca allocations   |
6380        			        |			|
6381				        +-----------------------+
6382				        |			|
6383                                        |  arguments on stack   |
6384        			       	|		        |
6385				 SP+92->+-----------------------+
6386                                        |  6 words to save      |
6387				        |  arguments passed     |
6388                                        |  in registers, even   |
6389   low                                 	|  if not passed.       |
6390   memory        		 SP+68->+-----------------------+
6391				       	| 1 word struct addr	|
6392				 SP+64->+-----------------------+
6393				        |			|
6394				        I 16 word reg save area |
6395				       	|			|
6396				    SP->+-----------------------+  */
6397
6398/* Structure to be filled in by sparc_flat_compute_frame_size with register
6399   save masks, and offsets for the current function.  */
6400
6401struct sparc_frame_info
6402{
6403  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
6404  unsigned long var_size;	/* # bytes that variables take up.  */
6405  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
6406  unsigned long extra_size;	/* # bytes of extra gunk.  */
6407  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
6408  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
6409  unsigned long gmask;		/* Mask of saved gp registers.  */
6410  unsigned long fmask;		/* Mask of saved fp registers.  */
6411  unsigned long reg_offset;	/* Offset from new sp to store regs.  */
6412  int		initialized;	/* Nonzero if frame size already calculated.  */
6413};
6414
6415/* Current frame information calculated by sparc_flat_compute_frame_size.  */
6416struct sparc_frame_info current_frame_info;
6417
6418/* Zero structure to initialize current_frame_info.  */
6419struct sparc_frame_info zero_frame_info;
6420
6421/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
6422
6423#define RETURN_ADDR_REGNUM 15
6424#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
6425#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
6426
6427#define MUST_SAVE_REGISTER(regno) \
6428 ((regs_ever_live[regno] && !call_used_regs[regno])		\
6429  || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)	\
6430  || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
6431
6432/* Return the bytes needed to compute the frame pointer from the current
6433   stack pointer.  */
6434
6435unsigned long
6436sparc_flat_compute_frame_size (size)
6437     int size;			/* # of var. bytes allocated.  */
6438{
6439  int regno;
6440  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
6441  unsigned long var_size;	/* # bytes that variables take up.  */
6442  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
6443  unsigned long extra_size;	/* # extra bytes.  */
6444  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
6445  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
6446  unsigned long gmask;		/* Mask of saved gp registers.  */
6447  unsigned long fmask;		/* Mask of saved fp registers.  */
6448  unsigned long reg_offset;	/* Offset to register save area.  */
6449  int           need_aligned_p;	/* 1 if need the save area 8 byte aligned.  */
6450
6451  /* This is the size of the 16 word reg save area, 1 word struct addr
6452     area, and 4 word fp/alu register copy area.  */
6453  extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
6454  var_size = size;
6455  gp_reg_size = 0;
6456  fp_reg_size = 0;
6457  gmask = 0;
6458  fmask = 0;
6459  reg_offset = 0;
6460  need_aligned_p = 0;
6461
6462  args_size = 0;
6463  if (!leaf_function_p ())
6464    {
6465      /* Also include the size needed for the 6 parameter registers.  */
6466      args_size = current_function_outgoing_args_size + 24;
6467    }
6468  total_size = var_size + args_size;
6469
6470  /* Calculate space needed for gp registers.  */
6471  for (regno = 1; regno <= 31; regno++)
6472    {
6473      if (MUST_SAVE_REGISTER (regno))
6474	{
6475	  /* If we need to save two regs in a row, ensure there's room to bump
6476	     up the address to align it to a doubleword boundary.  */
6477	  if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
6478	    {
6479	      if (gp_reg_size % 8 != 0)
6480		gp_reg_size += 4;
6481	      gp_reg_size += 2 * UNITS_PER_WORD;
6482	      gmask |= 3 << regno;
6483	      regno++;
6484	      need_aligned_p = 1;
6485	    }
6486	  else
6487	    {
6488	      gp_reg_size += UNITS_PER_WORD;
6489	      gmask |= 1 << regno;
6490	    }
6491	}
6492    }
6493
6494  /* Calculate space needed for fp registers.  */
6495  for (regno = 32; regno <= 63; regno++)
6496    {
6497      if (regs_ever_live[regno] && !call_used_regs[regno])
6498	{
6499	  fp_reg_size += UNITS_PER_WORD;
6500	  fmask |= 1 << (regno - 32);
6501	}
6502    }
6503
6504  if (gmask || fmask)
6505    {
6506      int n;
6507      reg_offset = FIRST_PARM_OFFSET(0) + args_size;
6508      /* Ensure save area is 8 byte aligned if we need it.  */
6509      n = reg_offset % 8;
6510      if (need_aligned_p && n != 0)
6511	{
6512	  total_size += 8 - n;
6513	  reg_offset += 8 - n;
6514	}
6515      total_size += gp_reg_size + fp_reg_size;
6516    }
6517
6518  /* If we must allocate a stack frame at all, we must also allocate
6519     room for register window spillage, so as to be binary compatible
6520     with libraries and operating systems that do not use -mflat.  */
6521  if (total_size > 0)
6522    total_size += extra_size;
6523  else
6524    extra_size = 0;
6525
6526  total_size = SPARC_STACK_ALIGN (total_size);
6527
6528  /* Save other computed information.  */
6529  current_frame_info.total_size  = total_size;
6530  current_frame_info.var_size    = var_size;
6531  current_frame_info.args_size   = args_size;
6532  current_frame_info.extra_size  = extra_size;
6533  current_frame_info.gp_reg_size = gp_reg_size;
6534  current_frame_info.fp_reg_size = fp_reg_size;
6535  current_frame_info.gmask	 = gmask;
6536  current_frame_info.fmask	 = fmask;
6537  current_frame_info.reg_offset	 = reg_offset;
6538  current_frame_info.initialized = reload_completed;
6539
6540  /* Ok, we're done.  */
6541  return total_size;
6542}
6543
6544/* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
6545   OFFSET.
6546
6547   BASE_REG must be 8 byte aligned.  This allows us to test OFFSET for
6548   appropriate alignment and use DOUBLEWORD_OP when we can.  We assume
6549   [BASE_REG+OFFSET] will always be a valid address.
6550
6551   WORD_OP is either "st" for save, "ld" for restore.
6552   DOUBLEWORD_OP is either "std" for save, "ldd" for restore.  */
6553
6554void
6555sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
6556			 doubleword_op, base_offset)
6557     FILE *file;
6558     const char *base_reg;
6559     unsigned int offset;
6560     unsigned long gmask;
6561     unsigned long fmask;
6562     const char *word_op;
6563     const char *doubleword_op;
6564     unsigned long base_offset;
6565{
6566  int regno;
6567
6568  if (gmask == 0 && fmask == 0)
6569    return;
6570
6571  /* Save registers starting from high to low.  We've already saved the
6572     previous frame pointer and previous return address for the debugger's
6573     sake.  The debugger allows us to not need a nop in the epilog if at least
6574     one register is reloaded in addition to return address.  */
6575
6576  if (gmask)
6577    {
6578      for (regno = 1; regno <= 31; regno++)
6579	{
6580	  if ((gmask & (1L << regno)) != 0)
6581	    {
6582	      if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
6583		{
6584		  /* We can save two registers in a row.  If we're not at a
6585		     double word boundary, move to one.
6586		     sparc_flat_compute_frame_size ensures there's room to do
6587		     this.  */
6588		  if (offset % 8 != 0)
6589		    offset += UNITS_PER_WORD;
6590
6591		  if (word_op[0] == 's')
6592		    {
6593		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
6594			       doubleword_op, reg_names[regno],
6595			       base_reg, offset);
6596		      if (dwarf2out_do_frame ())
6597			{
6598			  char *l = dwarf2out_cfi_label ();
6599			  dwarf2out_reg_save (l, regno, offset + base_offset);
6600			  dwarf2out_reg_save
6601			    (l, regno+1, offset+base_offset + UNITS_PER_WORD);
6602			}
6603		    }
6604		  else
6605		    fprintf (file, "\t%s\t[%s+%d], %s\n",
6606			     doubleword_op, base_reg, offset,
6607			     reg_names[regno]);
6608
6609		  offset += 2 * UNITS_PER_WORD;
6610		  regno++;
6611		}
6612	      else
6613		{
6614		  if (word_op[0] == 's')
6615		    {
6616		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
6617			       word_op, reg_names[regno],
6618			       base_reg, offset);
6619		      if (dwarf2out_do_frame ())
6620			dwarf2out_reg_save ("", regno, offset + base_offset);
6621		    }
6622		  else
6623		    fprintf (file, "\t%s\t[%s+%d], %s\n",
6624			     word_op, base_reg, offset, reg_names[regno]);
6625
6626		  offset += UNITS_PER_WORD;
6627		}
6628	    }
6629	}
6630    }
6631
6632  if (fmask)
6633    {
6634      for (regno = 32; regno <= 63; regno++)
6635	{
6636	  if ((fmask & (1L << (regno - 32))) != 0)
6637	    {
6638	      if (word_op[0] == 's')
6639		{
6640		  fprintf (file, "\t%s\t%s, [%s+%d]\n",
6641			   word_op, reg_names[regno],
6642			   base_reg, offset);
6643		  if (dwarf2out_do_frame ())
6644		    dwarf2out_reg_save ("", regno, offset + base_offset);
6645		}
6646	      else
6647		fprintf (file, "\t%s\t[%s+%d], %s\n",
6648			 word_op, base_reg, offset, reg_names[regno]);
6649
6650	      offset += UNITS_PER_WORD;
6651	    }
6652	}
6653    }
6654}
6655
6656/* Set up the stack and frame (if desired) for the function.  */
6657
6658static void
6659sparc_flat_function_prologue (file, size)
6660     FILE *file;
6661     HOST_WIDE_INT size;
6662{
6663  const char *sp_str = reg_names[STACK_POINTER_REGNUM];
6664  unsigned long gmask = current_frame_info.gmask;
6665
6666  sparc_output_scratch_registers (file);
6667
6668  /* This is only for the human reader.  */
6669  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
6670  fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
6671	   ASM_COMMENT_START,
6672	   current_frame_info.var_size,
6673	   current_frame_info.gp_reg_size / 4,
6674	   current_frame_info.fp_reg_size / 4,
6675	   current_function_outgoing_args_size,
6676	   current_frame_info.extra_size);
6677
6678  size = SPARC_STACK_ALIGN (size);
6679  size = (! current_frame_info.initialized
6680	  ? sparc_flat_compute_frame_size (size)
6681	  : current_frame_info.total_size);
6682
6683  /* These cases shouldn't happen.  Catch them now.  */
6684  if (size == 0 && (gmask || current_frame_info.fmask))
6685    abort ();
6686
6687  /* Allocate our stack frame by decrementing %sp.
6688     At present, the only algorithm gdb can use to determine if this is a
6689     flat frame is if we always set %i7 if we set %sp.  This can be optimized
6690     in the future by putting in some sort of debugging information that says
6691     this is a `flat' function.  However, there is still the case of debugging
6692     code without such debugging information (including cases where most fns
6693     have such info, but there is one that doesn't).  So, always do this now
6694     so we don't get a lot of code out there that gdb can't handle.
6695     If the frame pointer isn't needn't then that's ok - gdb won't be able to
6696     distinguish us from a non-flat function but there won't (and shouldn't)
6697     be any differences anyway.  The return pc is saved (if necessary) right
6698     after %i7 so gdb won't have to look too far to find it.  */
6699  if (size > 0)
6700    {
6701      unsigned int reg_offset = current_frame_info.reg_offset;
6702      const char *const fp_str = reg_names[FRAME_POINTER_REGNUM];
6703      static const char *const t1_str = "%g1";
6704
6705      /* Things get a little tricky if local variables take up more than ~4096
6706	 bytes and outgoing arguments take up more than ~4096 bytes.  When that
6707	 happens, the register save area can't be accessed from either end of
6708	 the frame.  Handle this by decrementing %sp to the start of the gp
6709	 register save area, save the regs, update %i7, and then set %sp to its
6710	 final value.  Given that we only have one scratch register to play
6711	 with it is the cheapest solution, and it helps gdb out as it won't
6712	 slow down recognition of flat functions.
6713	 Don't change the order of insns emitted here without checking with
6714	 the gdb folk first.  */
6715
6716      /* Is the entire register save area offsettable from %sp?  */
6717      if (reg_offset < 4096 - 64 * (unsigned) UNITS_PER_WORD)
6718	{
6719	  if (size <= 4096)
6720	    {
6721	      fprintf (file, "\tadd\t%s, %d, %s\n",
6722		       sp_str, (int) -size, sp_str);
6723	      if (gmask & FRAME_POINTER_MASK)
6724		{
6725		  fprintf (file, "\tst\t%s, [%s+%d]\n",
6726			   fp_str, sp_str, reg_offset);
6727		  fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
6728			   sp_str, (int) -size, fp_str, ASM_COMMENT_START);
6729		  reg_offset += 4;
6730		}
6731	    }
6732	  else
6733	    {
6734	      fprintf (file, "\tset\t");
6735	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, size);
6736	      fprintf (file, ", %s\n\tsub\t%s, %s, %s\n",
6737		       t1_str, sp_str, t1_str, sp_str);
6738	      if (gmask & FRAME_POINTER_MASK)
6739		{
6740		  fprintf (file, "\tst\t%s, [%s+%d]\n",
6741			   fp_str, sp_str, reg_offset);
6742		  fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
6743			   sp_str, t1_str, fp_str, ASM_COMMENT_START);
6744		  reg_offset += 4;
6745		}
6746	    }
6747	  if (dwarf2out_do_frame ())
6748	    {
6749	      char *l = dwarf2out_cfi_label ();
6750	      if (gmask & FRAME_POINTER_MASK)
6751		{
6752		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
6753				      reg_offset - 4 - size);
6754		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
6755		}
6756	      else
6757		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
6758	    }
6759	  if (gmask & RETURN_ADDR_MASK)
6760	    {
6761	      fprintf (file, "\tst\t%s, [%s+%d]\n",
6762		       reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
6763	      if (dwarf2out_do_frame ())
6764		dwarf2out_return_save ("", reg_offset - size);
6765	      reg_offset += 4;
6766	    }
6767	  sparc_flat_save_restore (file, sp_str, reg_offset,
6768				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6769				   current_frame_info.fmask,
6770				   "st", "std", -size);
6771	}
6772      else
6773	{
6774	  /* Subtract %sp in two steps, but make sure there is always a
6775	     64 byte register save area, and %sp is properly aligned.  */
6776	  /* Amount to decrement %sp by, the first time.  */
6777	  unsigned HOST_WIDE_INT size1 = ((size - reg_offset + 64) + 15) & -16;
6778	  /* Offset to register save area from %sp.  */
6779	  unsigned HOST_WIDE_INT offset = size1 - (size - reg_offset);
6780
6781	  if (size1 <= 4096)
6782	    {
6783	      fprintf (file, "\tadd\t%s, %d, %s\n",
6784		       sp_str, (int) -size1, sp_str);
6785	      if (gmask & FRAME_POINTER_MASK)
6786		{
6787		  fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
6788			   fp_str, sp_str, (int) offset, sp_str, (int) -size1,
6789			   fp_str, ASM_COMMENT_START);
6790		  offset += 4;
6791		}
6792	    }
6793	  else
6794	    {
6795	      fprintf (file, "\tset\t");
6796	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, size1);
6797	      fprintf (file, ", %s\n\tsub\t%s, %s, %s\n",
6798		       t1_str, sp_str, t1_str, sp_str);
6799	      if (gmask & FRAME_POINTER_MASK)
6800		{
6801		  fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
6802			   fp_str, sp_str, (int) offset, sp_str, t1_str,
6803			   fp_str, ASM_COMMENT_START);
6804		  offset += 4;
6805		}
6806	    }
6807	  if (dwarf2out_do_frame ())
6808	    {
6809	      char *l = dwarf2out_cfi_label ();
6810	      if (gmask & FRAME_POINTER_MASK)
6811		{
6812		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
6813				      offset - 4 - size1);
6814		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
6815		}
6816	      else
6817		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
6818	    }
6819	  if (gmask & RETURN_ADDR_MASK)
6820	    {
6821	      fprintf (file, "\tst\t%s, [%s+%d]\n",
6822		       reg_names[RETURN_ADDR_REGNUM], sp_str, (int) offset);
6823	      if (dwarf2out_do_frame ())
6824		/* offset - size1 == reg_offset - size
6825		   if reg_offset were updated above like offset.  */
6826		dwarf2out_return_save ("", offset - size1);
6827	      offset += 4;
6828	    }
6829	  sparc_flat_save_restore (file, sp_str, offset,
6830				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6831				   current_frame_info.fmask,
6832				   "st", "std", -size1);
6833	  fprintf (file, "\tset\t");
6834	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, size - size1);
6835	  fprintf (file, ", %s\n\tsub\t%s, %s, %s\n",
6836		   t1_str, sp_str, t1_str, sp_str);
6837	  if (dwarf2out_do_frame ())
6838	    if (! (gmask & FRAME_POINTER_MASK))
6839	      dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6840	}
6841    }
6842
6843  fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6844}
6845
6846/* Do any necessary cleanup after a function to restore stack, frame,
6847   and regs.  */
6848
6849static void
6850sparc_flat_function_epilogue (file, size)
6851     FILE *file;
6852     HOST_WIDE_INT size;
6853{
6854  rtx epilogue_delay = current_function_epilogue_delay_list;
6855  int noepilogue = FALSE;
6856
6857  /* This is only for the human reader.  */
6858  fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6859
6860  /* The epilogue does not depend on any registers, but the stack
6861     registers, so we assume that if we have 1 pending nop, it can be
6862     ignored, and 2 it must be filled (2 nops occur for integer
6863     multiply and divide).  */
6864
6865  size = SPARC_STACK_ALIGN (size);
6866  size = (!current_frame_info.initialized
6867	   ? sparc_flat_compute_frame_size (size)
6868	   : current_frame_info.total_size);
6869
6870  if (size == 0 && epilogue_delay == 0)
6871    {
6872      rtx insn = get_last_insn ();
6873
6874      /* If the last insn was a BARRIER, we don't have to write any code
6875	 because a jump (aka return) was put there.  */
6876      if (GET_CODE (insn) == NOTE)
6877	insn = prev_nonnote_insn (insn);
6878      if (insn && GET_CODE (insn) == BARRIER)
6879	noepilogue = TRUE;
6880    }
6881
6882  if (!noepilogue)
6883    {
6884      unsigned HOST_WIDE_INT reg_offset = current_frame_info.reg_offset;
6885      unsigned HOST_WIDE_INT size1;
6886      const char *const sp_str = reg_names[STACK_POINTER_REGNUM];
6887      const char *const fp_str = reg_names[FRAME_POINTER_REGNUM];
6888      static const char *const t1_str = "%g1";
6889
6890      /* In the reload sequence, we don't need to fill the load delay
6891	 slots for most of the loads, also see if we can fill the final
6892	 delay slot if not otherwise filled by the reload sequence.  */
6893
6894      if (size > 4095)
6895        {
6896	  fprintf (file, "\tset\t");
6897	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, size);
6898	  fprintf (file, ", %s\n", t1_str);
6899	}
6900
6901      if (frame_pointer_needed)
6902	{
6903	  if (size > 4095)
6904	    fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6905		     fp_str, t1_str, sp_str, ASM_COMMENT_START);
6906	  else
6907	    fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6908		     fp_str, (int) size, sp_str, ASM_COMMENT_START);
6909	}
6910
6911      /* Is the entire register save area offsettable from %sp?  */
6912      if (reg_offset < 4096 - 64 * (unsigned) UNITS_PER_WORD)
6913	{
6914	  size1 = 0;
6915	}
6916      else
6917	{
6918	  /* Restore %sp in two steps, but make sure there is always a
6919	     64 byte register save area, and %sp is properly aligned.  */
6920	  /* Amount to increment %sp by, the first time.  */
6921	  size1 = ((reg_offset - 64 - 16) + 15) & -16;
6922	  /* Offset to register save area from %sp.  */
6923	  reg_offset = size1 - reg_offset;
6924
6925	  fprintf (file, "\tset\t");
6926	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, size1);
6927	  fprintf (file, ", %s\n\tadd\t%s, %s, %s\n",
6928		   t1_str, sp_str, t1_str, sp_str);
6929	}
6930
6931      /* We must restore the frame pointer and return address reg first
6932	 because they are treated specially by the prologue output code.  */
6933      if (current_frame_info.gmask & FRAME_POINTER_MASK)
6934	{
6935	  fprintf (file, "\tld\t[%s+%d], %s\n",
6936		   sp_str, (int) reg_offset, fp_str);
6937	  reg_offset += 4;
6938	}
6939      if (current_frame_info.gmask & RETURN_ADDR_MASK)
6940	{
6941	  fprintf (file, "\tld\t[%s+%d], %s\n",
6942		   sp_str, (int) reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6943	  reg_offset += 4;
6944	}
6945
6946      /* Restore any remaining saved registers.  */
6947      sparc_flat_save_restore (file, sp_str, reg_offset,
6948			       current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6949			       current_frame_info.fmask,
6950			       "ld", "ldd", 0);
6951
6952      /* If we had to increment %sp in two steps, record it so the second
6953	 restoration in the epilogue finishes up.  */
6954      if (size1 > 0)
6955	{
6956	  size -= size1;
6957	  if (size > 4095)
6958	    {
6959	      fprintf (file, "\tset\t");
6960	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, size);
6961	      fprintf (file, ", %s\n", t1_str);
6962	    }
6963	}
6964
6965      if (current_function_returns_struct)
6966	fprintf (file, "\tjmp\t%%o7+12\n");
6967      else
6968	fprintf (file, "\tretl\n");
6969
6970      /* If the only register saved is the return address, we need a
6971	 nop, unless we have an instruction to put into it.  Otherwise
6972	 we don't since reloading multiple registers doesn't reference
6973	 the register being loaded.  */
6974
6975      if (epilogue_delay)
6976	{
6977	  if (size)
6978	    abort ();
6979	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6980	}
6981
6982      else if (size > 4095)
6983	fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6984
6985      else if (size > 0)
6986	fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, (int) size, sp_str);
6987
6988      else
6989	fprintf (file, "\tnop\n");
6990    }
6991
6992  /* Reset state info for each function.  */
6993  current_frame_info = zero_frame_info;
6994
6995  sparc_output_deferred_case_vectors ();
6996}
6997
6998/* Define the number of delay slots needed for the function epilogue.
6999
7000   On the sparc, we need a slot if either no stack has been allocated,
7001   or the only register saved is the return register.  */
7002
7003int
7004sparc_flat_epilogue_delay_slots ()
7005{
7006  if (!current_frame_info.initialized)
7007    (void) sparc_flat_compute_frame_size (get_frame_size ());
7008
7009  if (current_frame_info.total_size == 0)
7010    return 1;
7011
7012  return 0;
7013}
7014
7015/* Return true if TRIAL is a valid insn for the epilogue delay slot.
7016   Any single length instruction which doesn't reference the stack or frame
7017   pointer is OK.  */
7018
7019int
7020sparc_flat_eligible_for_epilogue_delay (trial, slot)
7021     rtx trial;
7022     int slot ATTRIBUTE_UNUSED;
7023{
7024  rtx pat = PATTERN (trial);
7025
7026  if (get_attr_length (trial) != 1)
7027    return 0;
7028
7029  if (! reg_mentioned_p (stack_pointer_rtx, pat)
7030      && ! reg_mentioned_p (frame_pointer_rtx, pat))
7031    return 1;
7032
7033  return 0;
7034}
7035
7036/* Adjust the cost of a scheduling dependency.  Return the new cost of
7037   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7038
7039static int
7040supersparc_adjust_cost (insn, link, dep_insn, cost)
7041     rtx insn;
7042     rtx link;
7043     rtx dep_insn;
7044     int cost;
7045{
7046  enum attr_type insn_type;
7047
7048  if (! recog_memoized (insn))
7049    return 0;
7050
7051  insn_type = get_attr_type (insn);
7052
7053  if (REG_NOTE_KIND (link) == 0)
7054    {
7055      /* Data dependency; DEP_INSN writes a register that INSN reads some
7056	 cycles later.  */
7057
7058      /* if a load, then the dependence must be on the memory address;
7059	 add an extra "cycle".  Note that the cost could be two cycles
7060	 if the reg was written late in an instruction group; we ca not tell
7061	 here.  */
7062      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7063	return cost + 3;
7064
7065      /* Get the delay only if the address of the store is the dependence.  */
7066      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7067	{
7068	  rtx pat = PATTERN(insn);
7069	  rtx dep_pat = PATTERN (dep_insn);
7070
7071	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7072	    return cost;  /* This should not happen!  */
7073
7074	  /* The dependency between the two instructions was on the data that
7075	     is being stored.  Assume that this implies that the address of the
7076	     store is not dependent.  */
7077	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7078	    return cost;
7079
7080	  return cost + 3;  /* An approximation.  */
7081	}
7082
7083      /* A shift instruction cannot receive its data from an instruction
7084	 in the same cycle; add a one cycle penalty.  */
7085      if (insn_type == TYPE_SHIFT)
7086	return cost + 3;   /* Split before cascade into shift.  */
7087    }
7088  else
7089    {
7090      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7091	 INSN writes some cycles later.  */
7092
7093      /* These are only significant for the fpu unit; writing a fp reg before
7094         the fpu has finished with it stalls the processor.  */
7095
7096      /* Reusing an integer register causes no problems.  */
7097      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7098	return 0;
7099    }
7100
7101  return cost;
7102}
7103
7104static int
7105hypersparc_adjust_cost (insn, link, dep_insn, cost)
7106     rtx insn;
7107     rtx link;
7108     rtx dep_insn;
7109     int cost;
7110{
7111  enum attr_type insn_type, dep_type;
7112  rtx pat = PATTERN(insn);
7113  rtx dep_pat = PATTERN (dep_insn);
7114
7115  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7116    return cost;
7117
7118  insn_type = get_attr_type (insn);
7119  dep_type = get_attr_type (dep_insn);
7120
7121  switch (REG_NOTE_KIND (link))
7122    {
7123    case 0:
7124      /* Data dependency; DEP_INSN writes a register that INSN reads some
7125	 cycles later.  */
7126
7127      switch (insn_type)
7128	{
7129	case TYPE_STORE:
7130	case TYPE_FPSTORE:
7131	  /* Get the delay iff the address of the store is the dependence.  */
7132	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7133	    return cost;
7134
7135	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7136	    return cost;
7137	  return cost + 3;
7138
7139	case TYPE_LOAD:
7140	case TYPE_SLOAD:
7141	case TYPE_FPLOAD:
7142	  /* If a load, then the dependence must be on the memory address.  If
7143	     the addresses aren't equal, then it might be a false dependency */
7144	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7145	    {
7146	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7147		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7148		  || GET_CODE (SET_SRC (pat)) != MEM
7149		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7150				    XEXP (SET_SRC (pat), 0)))
7151		return cost + 2;
7152
7153	      return cost + 8;
7154	    }
7155	  break;
7156
7157	case TYPE_BRANCH:
7158	  /* Compare to branch latency is 0.  There is no benefit from
7159	     separating compare and branch.  */
7160	  if (dep_type == TYPE_COMPARE)
7161	    return 0;
7162	  /* Floating point compare to branch latency is less than
7163	     compare to conditional move.  */
7164	  if (dep_type == TYPE_FPCMP)
7165	    return cost - 1;
7166	  break;
7167	default:
7168	  break;
7169	}
7170	break;
7171
7172    case REG_DEP_ANTI:
7173      /* Anti-dependencies only penalize the fpu unit.  */
7174      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7175        return 0;
7176      break;
7177
7178    default:
7179      break;
7180    }
7181
7182  return cost;
7183}
7184
7185static int
7186ultrasparc_adjust_cost (insn, link, dep_insn, cost)
7187     rtx insn;
7188     rtx link;
7189     rtx dep_insn;
7190     int cost;
7191{
7192  enum attr_type insn_type, dep_type;
7193  rtx pat = PATTERN(insn);
7194  rtx dep_pat = PATTERN (dep_insn);
7195
7196  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7197    return cost;
7198
7199  insn_type = get_attr_type (insn);
7200  dep_type = get_attr_type (dep_insn);
7201
7202  /* Nothing issues in parallel with integer multiplies, so
7203     mark as zero cost since the scheduler can not do anything
7204     about it.  */
7205  if (insn_type == TYPE_IMUL || insn_type == TYPE_IDIV)
7206    return 0;
7207
7208#define SLOW_FP(dep_type) \
7209(dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \
7210 dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
7211
7212  switch (REG_NOTE_KIND (link))
7213    {
7214    case 0:
7215      /* Data dependency; DEP_INSN writes a register that INSN reads some
7216	 cycles later.  */
7217
7218      if (dep_type == TYPE_CMOVE)
7219	{
7220	  /* Instructions that read the result of conditional moves cannot
7221	     be in the same group or the following group.  */
7222	  return cost + 1;
7223	}
7224
7225      switch (insn_type)
7226	{
7227	  /* UltraSPARC can dual issue a store and an instruction setting
7228	     the value stored, except for divide and square root.  */
7229	case TYPE_FPSTORE:
7230	  if (! SLOW_FP (dep_type))
7231	    return 0;
7232	  return cost;
7233
7234	case TYPE_STORE:
7235	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7236	    return cost;
7237
7238	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7239	    /* The dependency between the two instructions is on the data
7240	       that is being stored.  Assume that the address of the store
7241	       is not also dependent.  */
7242	    return 0;
7243	  return cost;
7244
7245	case TYPE_LOAD:
7246	case TYPE_SLOAD:
7247	case TYPE_FPLOAD:
7248	  /* A load does not return data until at least 11 cycles after
7249	     a store to the same location.  3 cycles are accounted for
7250	     in the load latency; add the other 8 here.  */
7251	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7252	    {
7253	      /* If the addresses are not equal this may be a false
7254		 dependency because pointer aliasing could not be
7255		 determined.  Add only 2 cycles in that case.  2 is
7256		 an arbitrary compromise between 8, which would cause
7257		 the scheduler to generate worse code elsewhere to
7258		 compensate for a dependency which might not really
7259		 exist, and 0.  */
7260	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7261		  || GET_CODE (SET_SRC (pat)) != MEM
7262		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7263		  || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
7264				    XEXP (SET_DEST (dep_pat), 0)))
7265		return cost + 2;
7266
7267	      return cost + 8;
7268	    }
7269	  return cost;
7270
7271	case TYPE_BRANCH:
7272	  /* Compare to branch latency is 0.  There is no benefit from
7273	     separating compare and branch.  */
7274	  if (dep_type == TYPE_COMPARE)
7275	    return 0;
7276	  /* Floating point compare to branch latency is less than
7277	     compare to conditional move.  */
7278	  if (dep_type == TYPE_FPCMP)
7279	    return cost - 1;
7280	  return cost;
7281
7282	case TYPE_FPCMOVE:
7283	  /* FMOVR class instructions can not issue in the same cycle
7284	     or the cycle after an instruction which writes any
7285	     integer register.  Model this as cost 2 for dependent
7286	     instructions.  */
7287	  if (dep_type == TYPE_IALU
7288	      && cost < 2)
7289	    return 2;
7290	  /* Otherwise check as for integer conditional moves.  */
7291
7292	case TYPE_CMOVE:
7293	  /* Conditional moves involving integer registers wait until
7294	     3 cycles after loads return data.  The interlock applies
7295	     to all loads, not just dependent loads, but that is hard
7296	     to model.  */
7297	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
7298	    return cost + 3;
7299	  return cost;
7300
7301	default:
7302	  break;
7303	}
7304      break;
7305
7306    case REG_DEP_ANTI:
7307      /* Divide and square root lock destination registers for full latency.  */
7308      if (! SLOW_FP (dep_type))
7309	return 0;
7310      break;
7311
7312    case REG_DEP_OUTPUT:
7313      /* IEU and FPU instruction that have the same destination
7314	 register cannot be grouped together.  */
7315      return cost + 1;
7316
7317    default:
7318      break;
7319    }
7320
7321  /* Other costs not accounted for:
7322     - Single precision floating point loads lock the other half of
7323       the even/odd register pair.
7324     - Several hazards associated with ldd/std are ignored because these
7325       instructions are rarely generated for V9.
7326     - The floating point pipeline can not have both a single and double
7327       precision operation active at the same time.  Format conversions
7328       and graphics instructions are given honorary double precision status.
7329     - call and jmpl are always the first instruction in a group.  */
7330
7331  return cost;
7332
7333#undef SLOW_FP
7334}
7335
7336static int
7337sparc_adjust_cost(insn, link, dep, cost)
7338     rtx insn;
7339     rtx link;
7340     rtx dep;
7341     int cost;
7342{
7343  switch (sparc_cpu)
7344    {
7345    case PROCESSOR_SUPERSPARC:
7346      cost = supersparc_adjust_cost (insn, link, dep, cost);
7347      break;
7348    case PROCESSOR_HYPERSPARC:
7349    case PROCESSOR_SPARCLITE86X:
7350      cost = hypersparc_adjust_cost (insn, link, dep, cost);
7351      break;
7352    case PROCESSOR_ULTRASPARC:
7353      cost = ultrasparc_adjust_cost (insn, link, dep, cost);
7354      break;
7355    default:
7356      break;
7357    }
7358  return cost;
7359}
7360
7361/* This describes the state of the UltraSPARC pipeline during
7362   instruction scheduling.  */
7363
7364#define TMASK(__x)	((unsigned)1 << ((int)(__x)))
7365#define UMASK(__x)	((unsigned)1 << ((int)(__x)))
7366
7367enum ultra_code { NONE=0, /* no insn at all				*/
7368		  IEU0,   /* shifts and conditional moves		*/
7369		  IEU1,   /* condition code setting insns, calls+jumps	*/
7370		  IEUN,   /* all other single cycle ieu insns		*/
7371		  LSU,    /* loads and stores				*/
7372		  CTI,    /* branches					*/
7373		  FPM,    /* FPU pipeline 1, multiplies and divides	*/
7374		  FPA,    /* FPU pipeline 2, all other operations	*/
7375		  SINGLE, /* single issue instructions			*/
7376		  NUM_ULTRA_CODES };
7377
7378static enum ultra_code ultra_code_from_mask PARAMS ((int));
7379static void ultra_schedule_insn PARAMS ((rtx *, rtx *, int, enum ultra_code));
7380
7381static const char *const ultra_code_names[NUM_ULTRA_CODES] = {
7382  "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
7383  "FPM", "FPA", "SINGLE" };
7384
7385struct ultrasparc_pipeline_state {
7386  /* The insns in this group.  */
7387  rtx group[4];
7388
7389  /* The code for each insn.  */
7390  enum ultra_code codes[4];
7391
7392  /* Which insns in this group have been committed by the
7393     scheduler.  This is how we determine how many more
7394     can issue this cycle.  */
7395  char commit[4];
7396
7397  /* How many insns in this group.  */
7398  char group_size;
7399
7400  /* Mask of free slots still in this group.  */
7401  char free_slot_mask;
7402
7403  /* The slotter uses the following to determine what other
7404     insn types can still make their way into this group.  */
7405  char contents [NUM_ULTRA_CODES];
7406  char num_ieu_insns;
7407};
7408
7409#define ULTRA_NUM_HIST	8
7410static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
7411static int ultra_cur_hist;
7412static int ultra_cycles_elapsed;
7413
7414#define ultra_pipe	(ultra_pipe_hist[ultra_cur_hist])
7415
7416/* Given TYPE_MASK compute the ultra_code it has.  */
7417static enum ultra_code
7418ultra_code_from_mask (type_mask)
7419     int type_mask;
7420{
7421  if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
7422    return IEU0;
7423  else if (type_mask & (TMASK (TYPE_COMPARE) |
7424			TMASK (TYPE_CALL) |
7425			TMASK (TYPE_SIBCALL) |
7426			TMASK (TYPE_UNCOND_BRANCH)))
7427    return IEU1;
7428  else if (type_mask & TMASK (TYPE_IALU))
7429    return IEUN;
7430  else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7431			TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7432			TMASK (TYPE_FPSTORE)))
7433    return LSU;
7434  else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
7435			TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) |
7436			TMASK (TYPE_FPSQRTD)))
7437    return FPM;
7438  else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7439			TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
7440    return FPA;
7441  else if (type_mask & TMASK (TYPE_BRANCH))
7442    return CTI;
7443
7444  return SINGLE;
7445}
7446
7447/* Check INSN (a conditional move) and make sure that it's
7448   results are available at this cycle.  Return 1 if the
7449   results are in fact ready.  */
7450static int
7451ultra_cmove_results_ready_p (insn)
7452     rtx insn;
7453{
7454  struct ultrasparc_pipeline_state *up;
7455  int entry, slot;
7456
7457  /* If this got dispatched in the previous
7458     group, the results are not ready.  */
7459  entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
7460  up = &ultra_pipe_hist[entry];
7461  slot = 4;
7462  while (--slot >= 0)
7463    if (up->group[slot] == insn)
7464      return 0;
7465
7466  return 1;
7467}
7468
7469/* Walk backwards in pipeline history looking for FPU
7470   operations which use a mode different than FPMODE and
7471   will create a stall if an insn using FPMODE were to be
7472   dispatched this cycle.  */
7473static int
7474ultra_fpmode_conflict_exists (fpmode)
7475     enum machine_mode fpmode;
7476{
7477  int hist_ent;
7478  int hist_lim;
7479
7480  hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
7481  if (ultra_cycles_elapsed < 4)
7482    hist_lim = ultra_cycles_elapsed;
7483  else
7484    hist_lim = 4;
7485  while (hist_lim > 0)
7486    {
7487      struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
7488      int slot = 4;
7489
7490      while (--slot >= 0)
7491	{
7492	  rtx insn = up->group[slot];
7493	  enum machine_mode this_mode;
7494	  rtx pat;
7495
7496	  if (! insn
7497	      || GET_CODE (insn) != INSN
7498	      || (pat = PATTERN (insn)) == 0
7499	      || GET_CODE (pat) != SET)
7500	    continue;
7501
7502	  this_mode = GET_MODE (SET_DEST (pat));
7503	  if ((this_mode != SFmode
7504	       && this_mode != DFmode)
7505	      || this_mode == fpmode)
7506	    continue;
7507
7508	  /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
7509	     we will get a stall.  Loads and stores are independent
7510	     of these rules.  */
7511	  if (GET_CODE (SET_SRC (pat)) != ABS
7512	      && GET_CODE (SET_SRC (pat)) != NEG
7513	      && ((TMASK (get_attr_type (insn)) &
7514		   (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
7515		    TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) |
7516		    TMASK (TYPE_FPSQRTD) |
7517                    TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
7518	    return 1;
7519	}
7520      hist_lim--;
7521      hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1);
7522    }
7523
7524  /* No conflicts, safe to dispatch.  */
7525  return 0;
7526}
7527
7528/* Find an instruction in LIST which has one of the
7529   type attributes enumerated in TYPE_MASK.  START
7530   says where to begin the search.
7531
7532   NOTE: This scheme depends upon the fact that we
7533         have less than 32 distinct type attributes.  */
7534
7535static int ultra_types_avail;
7536
7537static rtx *
7538ultra_find_type (type_mask, list, start)
7539     int type_mask;
7540     rtx *list;
7541     int start;
7542{
7543  int i;
7544
7545  /* Short circuit if no such insn exists in the ready
7546     at the moment.  */
7547  if ((type_mask & ultra_types_avail) == 0)
7548    return 0;
7549
7550  for (i = start; i >= 0; i--)
7551    {
7552      rtx insn = list[i];
7553
7554      if (recog_memoized (insn) >= 0
7555	  && (TMASK(get_attr_type (insn)) & type_mask))
7556	{
7557	  enum machine_mode fpmode = SFmode;
7558	  rtx pat = 0;
7559	  int slot;
7560	  int check_depend = 0;
7561	  int check_fpmode_conflict = 0;
7562
7563	  if (GET_CODE (insn) == INSN
7564	      && (pat = PATTERN(insn)) != 0
7565	      && GET_CODE (pat) == SET
7566	      && !(type_mask & (TMASK (TYPE_STORE) |
7567				TMASK (TYPE_FPSTORE))))
7568	    {
7569	      check_depend = 1;
7570	      if (GET_MODE (SET_DEST (pat)) == SFmode
7571		  || GET_MODE (SET_DEST (pat)) == DFmode)
7572		{
7573		  fpmode = GET_MODE (SET_DEST (pat));
7574		  check_fpmode_conflict = 1;
7575		}
7576	    }
7577
7578	  slot = 4;
7579	  while(--slot >= 0)
7580	    {
7581	      rtx slot_insn = ultra_pipe.group[slot];
7582	      rtx slot_pat;
7583
7584	      /* Already issued, bad dependency, or FPU
7585		 mode conflict.  */
7586	      if (slot_insn != 0
7587		  && (slot_pat = PATTERN (slot_insn)) != 0
7588		  && ((insn == slot_insn)
7589		      || (check_depend == 1
7590			  && GET_CODE (slot_insn) == INSN
7591			  && GET_CODE (slot_pat) == SET
7592			  && ((GET_CODE (SET_DEST (slot_pat)) == REG
7593			       && GET_CODE (SET_SRC (pat)) == REG
7594			       && REGNO (SET_DEST (slot_pat)) ==
7595			            REGNO (SET_SRC (pat)))
7596			      || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
7597				  && GET_CODE (SET_SRC (pat)) == SUBREG
7598				  && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
7599				       REGNO (SUBREG_REG (SET_SRC (pat)))
7600				  && SUBREG_BYTE (SET_DEST (slot_pat)) ==
7601				       SUBREG_BYTE (SET_SRC (pat)))))
7602		      || (check_fpmode_conflict == 1
7603			  && GET_CODE (slot_insn) == INSN
7604			  && GET_CODE (slot_pat) == SET
7605			  && (GET_MODE (SET_DEST (slot_pat)) == SFmode
7606			      || GET_MODE (SET_DEST (slot_pat)) == DFmode)
7607			  && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
7608		goto next;
7609	    }
7610
7611	  /* Check for peculiar result availability and dispatch
7612	     interference situations.  */
7613	  if (pat != 0
7614	      && ultra_cycles_elapsed > 0)
7615	    {
7616	      rtx link;
7617
7618	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
7619		{
7620		  rtx link_insn = XEXP (link, 0);
7621		  if (GET_CODE (link_insn) == INSN
7622		      && recog_memoized (link_insn) >= 0
7623		      && (TMASK (get_attr_type (link_insn)) &
7624			  (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
7625		      && ! ultra_cmove_results_ready_p (link_insn))
7626		    goto next;
7627		}
7628
7629	      if (check_fpmode_conflict
7630		  && ultra_fpmode_conflict_exists (fpmode))
7631		goto next;
7632	    }
7633
7634	  return &list[i];
7635	}
7636    next:
7637      ;
7638    }
7639  return 0;
7640}
7641
7642static void
7643ultra_build_types_avail (ready, n_ready)
7644  rtx *ready;
7645  int n_ready;
7646{
7647  int i = n_ready - 1;
7648
7649  ultra_types_avail = 0;
7650  while(i >= 0)
7651    {
7652      rtx insn = ready[i];
7653
7654      if (recog_memoized (insn) >= 0)
7655	ultra_types_avail |= TMASK (get_attr_type (insn));
7656
7657      i -= 1;
7658    }
7659}
7660
7661/* Place insn pointed to my IP into the pipeline.
7662   Make element THIS of READY be that insn if it
7663   is not already.  TYPE indicates the pipeline class
7664   this insn falls into.  */
7665static void
7666ultra_schedule_insn (ip, ready, this, type)
7667     rtx *ip;
7668     rtx *ready;
7669     int this;
7670     enum ultra_code type;
7671{
7672  int pipe_slot;
7673  char mask = ultra_pipe.free_slot_mask;
7674  rtx temp;
7675
7676  /* Obtain free slot.  */
7677  for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
7678    if ((mask & (1 << pipe_slot)) != 0)
7679      break;
7680  if (pipe_slot == 4)
7681    abort ();
7682
7683  /* In it goes, and it hasn't been committed yet.  */
7684  ultra_pipe.group[pipe_slot] = *ip;
7685  ultra_pipe.codes[pipe_slot] = type;
7686  ultra_pipe.contents[type] = 1;
7687  if (UMASK (type) &
7688      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
7689    ultra_pipe.num_ieu_insns += 1;
7690
7691  ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
7692  ultra_pipe.group_size += 1;
7693  ultra_pipe.commit[pipe_slot] = 0;
7694
7695  /* Update ready list.  */
7696  temp = *ip;
7697  while (ip != &ready[this])
7698    {
7699      ip[0] = ip[1];
7700      ++ip;
7701    }
7702  *ip = temp;
7703}
7704
7705/* Advance to the next pipeline group.  */
7706static void
7707ultra_flush_pipeline ()
7708{
7709  ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1);
7710  ultra_cycles_elapsed += 1;
7711  memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
7712  ultra_pipe.free_slot_mask = 0xf;
7713}
7714
7715/* Init our data structures for this current block.  */
7716static void
7717ultrasparc_sched_init ()
7718{
7719  memset ((char *) ultra_pipe_hist, 0, sizeof ultra_pipe_hist);
7720  ultra_cur_hist = 0;
7721  ultra_cycles_elapsed = 0;
7722  ultra_pipe.free_slot_mask = 0xf;
7723}
7724
7725static void
7726sparc_sched_init (dump, sched_verbose, max_ready)
7727     FILE *dump ATTRIBUTE_UNUSED;
7728     int sched_verbose ATTRIBUTE_UNUSED;
7729     int max_ready ATTRIBUTE_UNUSED;
7730{
7731  if (sparc_cpu == PROCESSOR_ULTRASPARC)
7732    ultrasparc_sched_init ();
7733}
7734
7735/* INSN has been scheduled, update pipeline commit state
7736   and return how many instructions are still to be
7737   scheduled in this group.  */
7738static int
7739ultrasparc_variable_issue (insn)
7740     rtx insn;
7741{
7742  struct ultrasparc_pipeline_state *up = &ultra_pipe;
7743  int i, left_to_fire;
7744
7745  left_to_fire = 0;
7746  for (i = 0; i < 4; i++)
7747    {
7748      if (up->group[i] == 0)
7749	continue;
7750
7751      if (up->group[i] == insn)
7752	{
7753	  up->commit[i] = 1;
7754	}
7755      else if (! up->commit[i])
7756	left_to_fire++;
7757    }
7758
7759  return left_to_fire;
7760}
7761
7762static int
7763sparc_variable_issue (dump, sched_verbose, insn, cim)
7764     FILE *dump ATTRIBUTE_UNUSED;
7765     int sched_verbose ATTRIBUTE_UNUSED;
7766     rtx insn;
7767     int cim;
7768{
7769  if (sparc_cpu == PROCESSOR_ULTRASPARC)
7770    return ultrasparc_variable_issue (insn);
7771  else
7772    return cim - 1;
7773}
7774
7775/* In actual_hazard_this_instance, we may have yanked some
7776   instructions from the ready list due to conflict cost
7777   adjustments.  If so, and such an insn was in our pipeline
7778   group, remove it and update state.  */
7779static void
7780ultra_rescan_pipeline_state (ready, n_ready)
7781     rtx *ready;
7782     int n_ready;
7783{
7784  struct ultrasparc_pipeline_state *up = &ultra_pipe;
7785  int i;
7786
7787  for (i = 0; i < 4; i++)
7788    {
7789      rtx insn = up->group[i];
7790      int j;
7791
7792      if (! insn)
7793	continue;
7794
7795      /* If it has been committed, then it was removed from
7796	 the ready list because it was actually scheduled,
7797	 and that is not the case we are searching for here.  */
7798      if (up->commit[i] != 0)
7799	continue;
7800
7801      for (j = n_ready - 1; j >= 0; j--)
7802	if (ready[j] == insn)
7803	  break;
7804
7805      /* If we didn't find it, toss it.  */
7806      if (j < 0)
7807	{
7808	  enum ultra_code ucode = up->codes[i];
7809
7810	  up->group[i] = 0;
7811	  up->codes[i] = NONE;
7812	  up->contents[ucode] = 0;
7813	  if (UMASK (ucode) &
7814	      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
7815	    up->num_ieu_insns -= 1;
7816
7817	  up->free_slot_mask |= (1 << i);
7818	  up->group_size -= 1;
7819	  up->commit[i] = 0;
7820	}
7821    }
7822}
7823
7824static void
7825ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
7826     FILE *dump;
7827     int sched_verbose;
7828     rtx *ready;
7829     int n_ready;
7830{
7831  struct ultrasparc_pipeline_state *up = &ultra_pipe;
7832  int i, this_insn;
7833
7834  if (sched_verbose)
7835    {
7836      int n;
7837
7838      fprintf (dump, "\n;;\tUltraSPARC Looking at [");
7839      for (n = n_ready - 1; n >= 0; n--)
7840	{
7841	  rtx insn = ready[n];
7842	  enum ultra_code ucode;
7843
7844	  if (recog_memoized (insn) < 0)
7845	    continue;
7846	  ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
7847	  if (n != 0)
7848	    fprintf (dump, "%s(%d) ",
7849		     ultra_code_names[ucode],
7850		     INSN_UID (insn));
7851	  else
7852	    fprintf (dump, "%s(%d)",
7853		     ultra_code_names[ucode],
7854		     INSN_UID (insn));
7855	}
7856      fprintf (dump, "]\n");
7857    }
7858
7859  this_insn = n_ready - 1;
7860
7861  /* Skip over junk we don't understand.  */
7862  while ((this_insn >= 0)
7863	 && recog_memoized (ready[this_insn]) < 0)
7864    this_insn--;
7865
7866  ultra_build_types_avail (ready, this_insn + 1);
7867
7868  while (this_insn >= 0) {
7869    int old_group_size = up->group_size;
7870
7871    if (up->group_size != 0)
7872      {
7873	int num_committed;
7874
7875	num_committed = (up->commit[0] + up->commit[1] +
7876			 up->commit[2] + up->commit[3]);
7877	/* If nothing has been commited from our group, or all of
7878	   them have.  Clear out the (current cycle's) pipeline
7879	   state and start afresh.  */
7880	if (num_committed == 0
7881	    || num_committed == up->group_size)
7882	  {
7883	    ultra_flush_pipeline ();
7884	    up = &ultra_pipe;
7885	    old_group_size = 0;
7886	  }
7887	else
7888	  {
7889	    /* OK, some ready list insns got requeued and thus removed
7890	       from the ready list.  Account for this fact.  */
7891	    ultra_rescan_pipeline_state (ready, n_ready);
7892
7893	    /* Something "changed", make this look like a newly
7894	       formed group so the code at the end of the loop
7895	       knows that progress was in fact made.  */
7896	    if (up->group_size != old_group_size)
7897	      old_group_size = 0;
7898	  }
7899      }
7900
7901    if (up->group_size == 0)
7902      {
7903	/* If the pipeline is (still) empty and we have any single
7904	   group insns, get them out now as this is a good time.  */
7905	rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_IDIV) |
7906				    TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7907				    TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7908				   ready, this_insn);
7909	if (ip)
7910	  {
7911	    ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7912	    break;
7913	  }
7914
7915	/* If we are not in the process of emptying out the pipe, try to
7916	   obtain an instruction which must be the first in it's group.  */
7917	ip = ultra_find_type ((TMASK (TYPE_CALL) |
7918			       TMASK (TYPE_SIBCALL) |
7919			       TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7920			       TMASK (TYPE_UNCOND_BRANCH)),
7921			      ready, this_insn);
7922	if (ip)
7923	  {
7924	    ultra_schedule_insn (ip, ready, this_insn, IEU1);
7925	    this_insn--;
7926	  }
7927	else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7928					 TMASK (TYPE_FPDIVD) |
7929					 TMASK (TYPE_FPSQRTS) |
7930					 TMASK (TYPE_FPSQRTD)),
7931					ready, this_insn)) != 0)
7932	  {
7933	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7934	    this_insn--;
7935	  }
7936      }
7937
7938    /* Try to fill the integer pipeline.  First, look for an IEU0 specific
7939       operation.  We can't do more IEU operations if the first 3 slots are
7940       all full or we have dispatched two IEU insns already.  */
7941    if ((up->free_slot_mask & 0x7) != 0
7942	&& up->num_ieu_insns < 2
7943	&& up->contents[IEU0] == 0
7944	&& up->contents[IEUN] == 0)
7945      {
7946	rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7947	if (ip)
7948	  {
7949	    ultra_schedule_insn (ip, ready, this_insn, IEU0);
7950	    this_insn--;
7951	  }
7952      }
7953
7954    /* If we can, try to find an IEU1 specific or an unnamed
7955       IEU instruction.  */
7956    if ((up->free_slot_mask & 0x7) != 0
7957	&& up->num_ieu_insns < 2)
7958      {
7959	rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) |
7960				    (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7961				   ready, this_insn);
7962	if (ip)
7963	  {
7964	    rtx insn = *ip;
7965
7966	    ultra_schedule_insn (ip, ready, this_insn,
7967				 (!up->contents[IEU1]
7968				  && get_attr_type (insn) == TYPE_COMPARE)
7969				 ? IEU1 : IEUN);
7970	    this_insn--;
7971	  }
7972      }
7973
7974    /* If only one IEU insn has been found, try to find another unnamed
7975       IEU operation or an IEU1 specific one.  */
7976    if ((up->free_slot_mask & 0x7) != 0
7977	&& up->num_ieu_insns < 2)
7978      {
7979	rtx *ip;
7980	int tmask = TMASK (TYPE_IALU);
7981
7982	if (!up->contents[IEU1])
7983	  tmask |= TMASK (TYPE_COMPARE);
7984	ip = ultra_find_type (tmask, ready, this_insn);
7985	if (ip)
7986	  {
7987	    rtx insn = *ip;
7988
7989	    ultra_schedule_insn (ip, ready, this_insn,
7990				 (!up->contents[IEU1]
7991				  && get_attr_type (insn) == TYPE_COMPARE)
7992				 ? IEU1 : IEUN);
7993	    this_insn--;
7994	  }
7995      }
7996
7997    /* Try for a load or store, but such an insn can only be issued
7998       if it is within' one of the first 3 slots.  */
7999    if ((up->free_slot_mask & 0x7) != 0
8000        && up->contents[LSU] == 0)
8001      {
8002	rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
8003				   TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
8004				   TMASK (TYPE_FPSTORE)), ready, this_insn);
8005	if (ip)
8006	  {
8007	    ultra_schedule_insn (ip, ready, this_insn, LSU);
8008	    this_insn--;
8009	  }
8010      }
8011
8012    /* Now find FPU operations, first FPM class.  But not divisions or
8013       square-roots because those will break the group up.  Unlike all
8014       the previous types, these can go in any slot.  */
8015    if (up->free_slot_mask != 0
8016	&& up->contents[FPM] == 0)
8017      {
8018	rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
8019	if (ip)
8020	  {
8021	    ultra_schedule_insn (ip, ready, this_insn, FPM);
8022	    this_insn--;
8023	  }
8024      }
8025
8026    /* Continue on with FPA class if we have not filled the group already.  */
8027    if (up->free_slot_mask != 0
8028	&& up->contents[FPA] == 0)
8029      {
8030	rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
8031				    TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
8032				   ready, this_insn);
8033	if (ip)
8034	  {
8035	    ultra_schedule_insn (ip, ready, this_insn, FPA);
8036	    this_insn--;
8037	  }
8038      }
8039
8040    /* Finally, maybe stick a branch in here.  */
8041    if (up->free_slot_mask != 0
8042	&& up->contents[CTI] == 0)
8043      {
8044	rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
8045
8046	/* Try to slip in a branch only if it is one of the
8047	   next 2 in the ready list.  */
8048	if (ip && ((&ready[this_insn] - ip) < 2))
8049	  {
8050	    ultra_schedule_insn (ip, ready, this_insn, CTI);
8051	    this_insn--;
8052	  }
8053      }
8054
8055    up->group_size = 0;
8056    for (i = 0; i < 4; i++)
8057      if ((up->free_slot_mask & (1 << i)) == 0)
8058	up->group_size++;
8059
8060    /* See if we made any progress...  */
8061    if (old_group_size != up->group_size)
8062      break;
8063
8064    /* Clean out the (current cycle's) pipeline state
8065       and try once more.  If we placed no instructions
8066       into the pipeline at all, it means a real hard
8067       conflict exists with some earlier issued instruction
8068       so we must advance to the next cycle to clear it up.  */
8069    if (up->group_size == 0)
8070      {
8071	ultra_flush_pipeline ();
8072	up = &ultra_pipe;
8073      }
8074    else
8075      {
8076	memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
8077	ultra_pipe.free_slot_mask = 0xf;
8078      }
8079  }
8080
8081  if (sched_verbose)
8082    {
8083      int n, gsize;
8084
8085      fprintf (dump, ";;\tUltraSPARC Launched   [");
8086      gsize = up->group_size;
8087      for (n = 0; n < 4; n++)
8088	{
8089	  rtx insn = up->group[n];
8090
8091	  if (! insn)
8092	    continue;
8093
8094	  gsize -= 1;
8095	  if (gsize != 0)
8096	    fprintf (dump, "%s(%d) ",
8097		     ultra_code_names[up->codes[n]],
8098		     INSN_UID (insn));
8099	  else
8100	    fprintf (dump, "%s(%d)",
8101		     ultra_code_names[up->codes[n]],
8102		     INSN_UID (insn));
8103	}
8104      fprintf (dump, "]\n");
8105    }
8106}
8107
8108static int
8109sparc_sched_reorder (dump, sched_verbose, ready, n_readyp, clock)
8110     FILE *dump;
8111     int sched_verbose;
8112     rtx *ready;
8113     int *n_readyp;
8114     int clock ATTRIBUTE_UNUSED;
8115{
8116  if (sparc_cpu == PROCESSOR_ULTRASPARC)
8117    ultrasparc_sched_reorder (dump, sched_verbose, ready, *n_readyp);
8118  return sparc_issue_rate ();
8119}
8120
8121static int
8122sparc_issue_rate ()
8123{
8124  switch (sparc_cpu)
8125    {
8126    default:
8127      return 1;
8128    case PROCESSOR_V9:
8129      /* Assume V9 processors are capable of at least dual-issue.  */
8130      return 2;
8131    case PROCESSOR_SUPERSPARC:
8132      return 3;
8133    case PROCESSOR_HYPERSPARC:
8134    case PROCESSOR_SPARCLITE86X:
8135      return 2;
8136    case PROCESSOR_ULTRASPARC:
8137      return 4;
8138    }
8139}
8140
8141static int
8142set_extends (insn)
8143     rtx insn;
8144{
8145  register rtx pat = PATTERN (insn);
8146
8147  switch (GET_CODE (SET_SRC (pat)))
8148    {
8149      /* Load and some shift instructions zero extend.  */
8150    case MEM:
8151    case ZERO_EXTEND:
8152      /* sethi clears the high bits */
8153    case HIGH:
8154      /* LO_SUM is used with sethi.  sethi cleared the high
8155	 bits and the values used with lo_sum are positive */
8156    case LO_SUM:
8157      /* Store flag stores 0 or 1 */
8158    case LT: case LTU:
8159    case GT: case GTU:
8160    case LE: case LEU:
8161    case GE: case GEU:
8162    case EQ:
8163    case NE:
8164      return 1;
8165    case AND:
8166      {
8167	rtx op0 = XEXP (SET_SRC (pat), 0);
8168	rtx op1 = XEXP (SET_SRC (pat), 1);
8169	if (GET_CODE (op1) == CONST_INT)
8170	  return INTVAL (op1) >= 0;
8171	if (GET_CODE (op0) != REG)
8172	  return 0;
8173	if (sparc_check_64 (op0, insn) == 1)
8174	  return 1;
8175	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8176      }
8177    case IOR:
8178    case XOR:
8179      {
8180	rtx op0 = XEXP (SET_SRC (pat), 0);
8181	rtx op1 = XEXP (SET_SRC (pat), 1);
8182	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
8183	  return 0;
8184	if (GET_CODE (op1) == CONST_INT)
8185	  return INTVAL (op1) >= 0;
8186	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8187      }
8188    case ASHIFT:
8189    case LSHIFTRT:
8190      return GET_MODE (SET_SRC (pat)) == SImode;
8191      /* Positive integers leave the high bits zero.  */
8192    case CONST_DOUBLE:
8193      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
8194    case CONST_INT:
8195      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
8196    case ASHIFTRT:
8197    case SIGN_EXTEND:
8198      return - (GET_MODE (SET_SRC (pat)) == SImode);
8199    case REG:
8200      return sparc_check_64 (SET_SRC (pat), insn);
8201    default:
8202      return 0;
8203    }
8204}
8205
8206/* We _ought_ to have only one kind per function, but...  */
8207static rtx sparc_addr_diff_list;
8208static rtx sparc_addr_list;
8209
8210void
8211sparc_defer_case_vector (lab, vec, diff)
8212     rtx lab, vec;
8213     int diff;
8214{
8215  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
8216  if (diff)
8217    sparc_addr_diff_list
8218      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
8219  else
8220    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
8221}
8222
8223static void
8224sparc_output_addr_vec (vec)
8225     rtx vec;
8226{
8227  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8228  int idx, vlen = XVECLEN (body, 0);
8229
8230#ifdef ASM_OUTPUT_ADDR_VEC_START
8231  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8232#endif
8233
8234#ifdef ASM_OUTPUT_CASE_LABEL
8235  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8236			 NEXT_INSN (lab));
8237#else
8238  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8239#endif
8240
8241  for (idx = 0; idx < vlen; idx++)
8242    {
8243      ASM_OUTPUT_ADDR_VEC_ELT
8244	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
8245    }
8246
8247#ifdef ASM_OUTPUT_ADDR_VEC_END
8248  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8249#endif
8250}
8251
8252static void
8253sparc_output_addr_diff_vec (vec)
8254     rtx vec;
8255{
8256  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8257  rtx base = XEXP (XEXP (body, 0), 0);
8258  int idx, vlen = XVECLEN (body, 1);
8259
8260#ifdef ASM_OUTPUT_ADDR_VEC_START
8261  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8262#endif
8263
8264#ifdef ASM_OUTPUT_CASE_LABEL
8265  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8266			 NEXT_INSN (lab));
8267#else
8268  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8269#endif
8270
8271  for (idx = 0; idx < vlen; idx++)
8272    {
8273      ASM_OUTPUT_ADDR_DIFF_ELT
8274        (asm_out_file,
8275         body,
8276         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
8277         CODE_LABEL_NUMBER (base));
8278    }
8279
8280#ifdef ASM_OUTPUT_ADDR_VEC_END
8281  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8282#endif
8283}
8284
8285static void
8286sparc_output_deferred_case_vectors ()
8287{
8288  rtx t;
8289  int align;
8290
8291  if (sparc_addr_list == NULL_RTX
8292      && sparc_addr_diff_list == NULL_RTX)
8293    return;
8294
8295  /* Align to cache line in the function's code section.  */
8296  function_section (current_function_decl);
8297
8298  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
8299  if (align > 0)
8300    ASM_OUTPUT_ALIGN (asm_out_file, align);
8301
8302  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
8303    sparc_output_addr_vec (XEXP (t, 0));
8304  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
8305    sparc_output_addr_diff_vec (XEXP (t, 0));
8306
8307  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
8308}
8309
8310/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
8311   unknown.  Return 1 if the high bits are zero, -1 if the register is
8312   sign extended.  */
8313int
8314sparc_check_64 (x, insn)
8315     rtx x, insn;
8316{
8317  /* If a register is set only once it is safe to ignore insns this
8318     code does not know how to handle.  The loop will either recognize
8319     the single set and return the correct value or fail to recognize
8320     it and return 0.  */
8321  int set_once = 0;
8322  rtx y = x;
8323
8324  if (GET_CODE (x) != REG)
8325    abort ();
8326
8327  if (GET_MODE (x) == DImode)
8328    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
8329
8330  if (flag_expensive_optimizations
8331      && REG_N_SETS (REGNO (y)) == 1)
8332    set_once = 1;
8333
8334  if (insn == 0)
8335    {
8336      if (set_once)
8337	insn = get_last_insn_anywhere ();
8338      else
8339	return 0;
8340    }
8341
8342  while ((insn = PREV_INSN (insn)))
8343    {
8344      switch (GET_CODE (insn))
8345	{
8346	case JUMP_INSN:
8347	case NOTE:
8348	  break;
8349	case CODE_LABEL:
8350	case CALL_INSN:
8351	default:
8352	  if (! set_once)
8353	    return 0;
8354	  break;
8355	case INSN:
8356	  {
8357	    rtx pat = PATTERN (insn);
8358	    if (GET_CODE (pat) != SET)
8359	      return 0;
8360	    if (rtx_equal_p (x, SET_DEST (pat)))
8361	      return set_extends (insn);
8362	    if (y && rtx_equal_p (y, SET_DEST (pat)))
8363	      return set_extends (insn);
8364	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
8365	      return 0;
8366	  }
8367	}
8368    }
8369  return 0;
8370}
8371
8372char *
8373sparc_v8plus_shift (operands, insn, opcode)
8374     rtx *operands;
8375     rtx insn;
8376     const char *opcode;
8377{
8378  static char asm_code[60];
8379
8380  if (GET_CODE (operands[3]) == SCRATCH)
8381    operands[3] = operands[0];
8382  if (GET_CODE (operands[1]) == CONST_INT)
8383    {
8384      output_asm_insn ("mov\t%1, %3", operands);
8385    }
8386  else
8387    {
8388      output_asm_insn ("sllx\t%H1, 32, %3", operands);
8389      if (sparc_check_64 (operands[1], insn) <= 0)
8390	output_asm_insn ("srl\t%L1, 0, %L1", operands);
8391      output_asm_insn ("or\t%L1, %3, %3", operands);
8392    }
8393
8394  strcpy(asm_code, opcode);
8395  if (which_alternative != 2)
8396    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8397  else
8398    return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8399}
8400
8401
8402/* Return 1 if DEST and SRC reference only global and in registers.  */
8403
8404int
8405sparc_return_peephole_ok (dest, src)
8406     rtx dest, src;
8407{
8408  if (! TARGET_V9)
8409    return 0;
8410  if (current_function_uses_only_leaf_regs)
8411    return 0;
8412  if (GET_CODE (src) != CONST_INT
8413      && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
8414    return 0;
8415  return IN_OR_GLOBAL_P (dest);
8416}
8417
8418/* Output assembler code to FILE to increment profiler label # LABELNO
8419   for profiling a function entry.
8420
8421   32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
8422   during profiling so we need to save/restore it around the call to mcount.
8423   We're guaranteed that a save has just been done, and we use the space
8424   allocated for intreg/fpreg value passing.  */
8425
8426void
8427sparc_function_profiler (file, labelno)
8428     FILE *file;
8429     int labelno;
8430{
8431  char buf[32];
8432  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8433
8434  if (! TARGET_ARCH64)
8435    fputs ("\tst\t%g2, [%fp-4]\n", file);
8436
8437  fputs ("\tsethi\t%hi(", file);
8438  assemble_name (file, buf);
8439  fputs ("), %o0\n", file);
8440
8441  fputs ("\tcall\t", file);
8442  assemble_name (file, MCOUNT_FUNCTION);
8443  putc ('\n', file);
8444
8445  fputs ("\t or\t%o0, %lo(", file);
8446  assemble_name (file, buf);
8447  fputs ("), %o0\n", file);
8448
8449  if (! TARGET_ARCH64)
8450    fputs ("\tld\t[%fp-4], %g2\n", file);
8451}
8452
8453
8454/* Mark ARG, which is really a struct ultrasparc_pipline_state *, for
8455   GC.  */
8456
8457static void
8458mark_ultrasparc_pipeline_state (arg)
8459     void *arg;
8460{
8461  struct ultrasparc_pipeline_state *ups;
8462  size_t i;
8463
8464  ups = (struct ultrasparc_pipeline_state *) arg;
8465  for (i = 0; i < sizeof (ups->group) / sizeof (rtx); ++i)
8466    ggc_mark_rtx (ups->group[i]);
8467}
8468
8469/* Called to register all of our global variables with the garbage
8470   collector.  */
8471
8472static void
8473sparc_add_gc_roots ()
8474{
8475  ggc_add_rtx_root (&sparc_compare_op0, 1);
8476  ggc_add_rtx_root (&sparc_compare_op1, 1);
8477  ggc_add_rtx_root (&leaf_label, 1);
8478  ggc_add_rtx_root (&global_offset_table, 1);
8479  ggc_add_rtx_root (&get_pc_symbol, 1);
8480  ggc_add_rtx_root (&sparc_addr_diff_list, 1);
8481  ggc_add_rtx_root (&sparc_addr_list, 1);
8482  ggc_add_root (ultra_pipe_hist, ARRAY_SIZE (ultra_pipe_hist),
8483		sizeof (ultra_pipe_hist[0]), &mark_ultrasparc_pipeline_state);
8484}
8485
8486#ifdef OBJECT_FORMAT_ELF
8487static void
8488sparc_elf_asm_named_section (name, flags)
8489     const char *name;
8490     unsigned int flags;
8491{
8492  if (flags & SECTION_MERGE)
8493    {
8494      /* entsize cannot be expressed in this section attributes
8495	 encoding style.  */
8496      default_elf_asm_named_section (name, flags);
8497      return;
8498    }
8499
8500  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8501
8502  if (!(flags & SECTION_DEBUG))
8503    fputs (",#alloc", asm_out_file);
8504  if (flags & SECTION_WRITE)
8505    fputs (",#write", asm_out_file);
8506  if (flags & SECTION_CODE)
8507    fputs (",#execinstr", asm_out_file);
8508
8509  /* ??? Handle SECTION_BSS.  */
8510
8511  fputc ('\n', asm_out_file);
8512}
8513#endif /* OBJECT_FORMAT_ELF */
8514