sparc.c revision 96263
1/* Subroutines for insn-output.c for Sun SPARC.
2   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3   1999, 2000, 2001 Free Software Foundation, Inc.
4   Contributed by Michael Tiemann (tiemann@cygnus.com)
5   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
6   at Cygnus Support.
7
8This file is part of GNU CC.
9
10GNU CC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
14
15GNU CC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GNU CC; see the file COPYING.  If not, write to
22the Free Software Foundation, 59 Temple Place - Suite 330,
23Boston, MA 02111-1307, USA.  */
24
25#include "config.h"
26#include "system.h"
27#include "tree.h"
28#include "rtl.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "function.h"
38#include "expr.h"
39#include "optabs.h"
40#include "libfuncs.h"
41#include "recog.h"
42#include "toplev.h"
43#include "ggc.h"
44#include "tm_p.h"
45#include "debug.h"
46#include "target.h"
47#include "target-def.h"
48
49/* 1 if the caller has placed an "unimp" insn immediately after the call.
50   This is used in v8 code when calling a function that returns a structure.
51   v9 doesn't have this.  Be careful to have this test be the same as that
52   used on the call.  */
53
54#define SKIP_CALLERS_UNIMP_P  \
55(!TARGET_ARCH64 && current_function_returns_struct			\
56 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
57 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
58     == INTEGER_CST))
59
60/* Global variables for machine-dependent things.  */
61
62/* Size of frame.  Need to know this to emit return insns from leaf procedures.
63   ACTUAL_FSIZE is set by compute_frame_size() which is called during the
64   reload pass.  This is important as the value is later used in insn
65   scheduling (to see what can go in a delay slot).
66   APPARENT_FSIZE is the size of the stack less the register save area and less
67   the outgoing argument area.  It is used when saving call preserved regs.  */
68static int apparent_fsize;
69static int actual_fsize;
70
71/* Number of live general or floating point registers needed to be
72   saved (as 4-byte quantities).  */
73static int num_gfregs;
74
75/* Save the operands last given to a compare for use when we
76   generate a scc or bcc insn.  */
77rtx sparc_compare_op0, sparc_compare_op1;
78
79/* Coordinate with the md file wrt special insns created by
80   sparc_nonflat_function_epilogue.  */
81bool sparc_emitting_epilogue;
82
83#ifdef LEAF_REGISTERS
84
85/* Vector to say how input registers are mapped to output registers.
86   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
87   eliminate it.  You must use -fomit-frame-pointer to get that.  */
88char leaf_reg_remap[] =
89{ 0, 1, 2, 3, 4, 5, 6, 7,
90  -1, -1, -1, -1, -1, -1, 14, -1,
91  -1, -1, -1, -1, -1, -1, -1, -1,
92  8, 9, 10, 11, 12, 13, -1, 15,
93
94  32, 33, 34, 35, 36, 37, 38, 39,
95  40, 41, 42, 43, 44, 45, 46, 47,
96  48, 49, 50, 51, 52, 53, 54, 55,
97  56, 57, 58, 59, 60, 61, 62, 63,
98  64, 65, 66, 67, 68, 69, 70, 71,
99  72, 73, 74, 75, 76, 77, 78, 79,
100  80, 81, 82, 83, 84, 85, 86, 87,
101  88, 89, 90, 91, 92, 93, 94, 95,
102  96, 97, 98, 99, 100};
103
104/* Vector, indexed by hard register number, which contains 1
105   for a register that is allowable in a candidate for leaf
106   function treatment.  */
107char sparc_leaf_regs[] =
108{ 1, 1, 1, 1, 1, 1, 1, 1,
109  0, 0, 0, 0, 0, 0, 1, 0,
110  0, 0, 0, 0, 0, 0, 0, 0,
111  1, 1, 1, 1, 1, 1, 0, 1,
112  1, 1, 1, 1, 1, 1, 1, 1,
113  1, 1, 1, 1, 1, 1, 1, 1,
114  1, 1, 1, 1, 1, 1, 1, 1,
115  1, 1, 1, 1, 1, 1, 1, 1,
116  1, 1, 1, 1, 1, 1, 1, 1,
117  1, 1, 1, 1, 1, 1, 1, 1,
118  1, 1, 1, 1, 1, 1, 1, 1,
119  1, 1, 1, 1, 1, 1, 1, 1,
120  1, 1, 1, 1, 1};
121
122#endif
123
124/* Name of where we pretend to think the frame pointer points.
125   Normally, this is "%fp", but if we are in a leaf procedure,
126   this is "%sp+something".  We record "something" separately as it may be
127   too big for reg+constant addressing.  */
128
129static const char *frame_base_name;
130static int frame_base_offset;
131
132static void sparc_init_modes	PARAMS ((void));
133static int save_regs		PARAMS ((FILE *, int, int, const char *,
134				       int, int, int));
135static int restore_regs		PARAMS ((FILE *, int, int, const char *, int, int));
136static void build_big_number	PARAMS ((FILE *, int, const char *));
137static int function_arg_slotno	PARAMS ((const CUMULATIVE_ARGS *,
138				       enum machine_mode, tree, int, int,
139				       int *, int *));
140
141static int supersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
142static int hypersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
143static int ultrasparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
144
145static void sparc_output_addr_vec PARAMS ((rtx));
146static void sparc_output_addr_diff_vec PARAMS ((rtx));
147static void sparc_output_deferred_case_vectors PARAMS ((void));
148static void sparc_add_gc_roots    PARAMS ((void));
149static void mark_ultrasparc_pipeline_state PARAMS ((void *));
150static int check_return_regs PARAMS ((rtx));
151static int epilogue_renumber PARAMS ((rtx *, int));
152static bool sparc_assemble_integer PARAMS ((rtx, unsigned int, int));
153static int ultra_cmove_results_ready_p PARAMS ((rtx));
154static int ultra_fpmode_conflict_exists PARAMS ((enum machine_mode));
155static rtx *ultra_find_type PARAMS ((int, rtx *, int));
156static void ultra_build_types_avail PARAMS ((rtx *, int));
157static void ultra_flush_pipeline PARAMS ((void));
158static void ultra_rescan_pipeline_state PARAMS ((rtx *, int));
159static int set_extends PARAMS ((rtx));
160static void output_restore_regs PARAMS ((FILE *, int));
161static void sparc_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
162static void sparc_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
163static void sparc_flat_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
164static void sparc_flat_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
165static void sparc_nonflat_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT,
166						     int));
167static void sparc_nonflat_function_prologue PARAMS ((FILE *, HOST_WIDE_INT,
168						     int));
169#ifdef OBJECT_FORMAT_ELF
170static void sparc_elf_asm_named_section PARAMS ((const char *, unsigned int));
171#endif
172static void ultrasparc_sched_reorder PARAMS ((FILE *, int, rtx *, int));
173static int ultrasparc_variable_issue PARAMS ((rtx));
174static void ultrasparc_sched_init PARAMS ((void));
175
176static int sparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
177static int sparc_issue_rate PARAMS ((void));
178static int sparc_variable_issue PARAMS ((FILE *, int, rtx, int));
179static void sparc_sched_init PARAMS ((FILE *, int, int));
180static int sparc_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
181
182static void emit_soft_tfmode_libcall PARAMS ((const char *, int, rtx *));
183static void emit_soft_tfmode_binop PARAMS ((enum rtx_code, rtx *));
184static void emit_soft_tfmode_unop PARAMS ((enum rtx_code, rtx *));
185static void emit_soft_tfmode_cvt PARAMS ((enum rtx_code, rtx *));
186static void emit_hard_tfmode_operation PARAMS ((enum rtx_code, rtx *));
187
188/* Option handling.  */
189
190/* Code model option as passed by user.  */
191const char *sparc_cmodel_string;
192/* Parsed value.  */
193enum cmodel sparc_cmodel;
194
195char sparc_hard_reg_printed[8];
196
197struct sparc_cpu_select sparc_select[] =
198{
199  /* switch	name,		tune	arch */
200  { (char *)0,	"default",	1,	1 },
201  { (char *)0,	"-mcpu=",	1,	1 },
202  { (char *)0,	"-mtune=",	1,	0 },
203  { 0, 0, 0, 0 }
204};
205
206/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
207enum processor_type sparc_cpu;
208
209/* Initialize the GCC target structure.  */
210
211/* The sparc default is to use .half rather than .short for aligned
212   HI objects.  Use .word instead of .long on non-ELF systems.  */
213#undef TARGET_ASM_ALIGNED_HI_OP
214#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
215#ifndef OBJECT_FORMAT_ELF
216#undef TARGET_ASM_ALIGNED_SI_OP
217#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
218#endif
219
220#undef TARGET_ASM_UNALIGNED_HI_OP
221#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
222#undef TARGET_ASM_UNALIGNED_SI_OP
223#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
224#undef TARGET_ASM_UNALIGNED_DI_OP
225#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
226
227/* The target hook has to handle DI-mode values.  */
228#undef TARGET_ASM_INTEGER
229#define TARGET_ASM_INTEGER sparc_assemble_integer
230
231#undef TARGET_ASM_FUNCTION_PROLOGUE
232#define TARGET_ASM_FUNCTION_PROLOGUE sparc_output_function_prologue
233#undef TARGET_ASM_FUNCTION_EPILOGUE
234#define TARGET_ASM_FUNCTION_EPILOGUE sparc_output_function_epilogue
235
236#undef TARGET_SCHED_ADJUST_COST
237#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
238#undef TARGET_SCHED_ISSUE_RATE
239#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
240#undef TARGET_SCHED_VARIABLE_ISSUE
241#define TARGET_SCHED_VARIABLE_ISSUE sparc_variable_issue
242#undef TARGET_SCHED_INIT
243#define TARGET_SCHED_INIT sparc_sched_init
244#undef TARGET_SCHED_REORDER
245#define TARGET_SCHED_REORDER sparc_sched_reorder
246
247struct gcc_target targetm = TARGET_INITIALIZER;
248
249/* Validate and override various options, and do some machine dependent
250   initialization.  */
251
252void
253sparc_override_options ()
254{
255  static struct code_model {
256    const char *const name;
257    const int value;
258  } const cmodels[] = {
259    { "32", CM_32 },
260    { "medlow", CM_MEDLOW },
261    { "medmid", CM_MEDMID },
262    { "medany", CM_MEDANY },
263    { "embmedany", CM_EMBMEDANY },
264    { 0, 0 }
265  };
266  const struct code_model *cmodel;
267  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
268  static struct cpu_default {
269    const int cpu;
270    const char *const name;
271  } const cpu_default[] = {
272    /* There must be one entry here for each TARGET_CPU value.  */
273    { TARGET_CPU_sparc, "cypress" },
274    { TARGET_CPU_sparclet, "tsc701" },
275    { TARGET_CPU_sparclite, "f930" },
276    { TARGET_CPU_v8, "v8" },
277    { TARGET_CPU_hypersparc, "hypersparc" },
278    { TARGET_CPU_sparclite86x, "sparclite86x" },
279    { TARGET_CPU_supersparc, "supersparc" },
280    { TARGET_CPU_v9, "v9" },
281    { TARGET_CPU_ultrasparc, "ultrasparc" },
282    { 0, 0 }
283  };
284  const struct cpu_default *def;
285  /* Table of values for -m{cpu,tune}=.  */
286  static struct cpu_table {
287    const char *const name;
288    const enum processor_type processor;
289    const int disable;
290    const int enable;
291  } const cpu_table[] = {
292    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
293    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
294    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
295    /* TI TMS390Z55 supersparc */
296    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
297    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
298    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
299       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
300    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
301    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
302    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
303    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
304      MASK_SPARCLITE },
305    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
306    /* TEMIC sparclet */
307    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
308    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
309    /* TI ultrasparc I, II, IIi */
310    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
311    /* Although insns using %y are deprecated, it is a clear win on current
312       ultrasparcs.  */
313    						    |MASK_DEPRECATED_V8_INSNS},
314    { 0, 0, 0, 0 }
315  };
316  const struct cpu_table *cpu;
317  const struct sparc_cpu_select *sel;
318  int fpu;
319
320#ifndef SPARC_BI_ARCH
321  /* Check for unsupported architecture size.  */
322  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
323    error ("%s is not supported by this configuration",
324	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
325#endif
326
327  /* We force all 64bit archs to use 128 bit long double */
328  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
329    {
330      error ("-mlong-double-64 not allowed with -m64");
331      target_flags |= MASK_LONG_DOUBLE_128;
332    }
333
334  /* Code model selection.  */
335  sparc_cmodel = SPARC_DEFAULT_CMODEL;
336
337#ifdef SPARC_BI_ARCH
338  if (TARGET_ARCH32)
339    sparc_cmodel = CM_32;
340#endif
341
342  if (sparc_cmodel_string != NULL)
343    {
344      if (TARGET_ARCH64)
345	{
346	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
347	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
348	      break;
349	  if (cmodel->name == NULL)
350	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
351	  else
352	    sparc_cmodel = cmodel->value;
353	}
354      else
355	error ("-mcmodel= is not supported on 32 bit systems");
356    }
357
358  fpu = TARGET_FPU; /* save current -mfpu status */
359
360  /* Set the default CPU.  */
361  for (def = &cpu_default[0]; def->name; ++def)
362    if (def->cpu == TARGET_CPU_DEFAULT)
363      break;
364  if (! def->name)
365    abort ();
366  sparc_select[0].string = def->name;
367
368  for (sel = &sparc_select[0]; sel->name; ++sel)
369    {
370      if (sel->string)
371	{
372	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
373	    if (! strcmp (sel->string, cpu->name))
374	      {
375		if (sel->set_tune_p)
376		  sparc_cpu = cpu->processor;
377
378		if (sel->set_arch_p)
379		  {
380		    target_flags &= ~cpu->disable;
381		    target_flags |= cpu->enable;
382		  }
383		break;
384	      }
385
386	  if (! cpu->name)
387	    error ("bad value (%s) for %s switch", sel->string, sel->name);
388	}
389    }
390
391  /* If -mfpu or -mno-fpu was explicitly used, don't override with
392     the processor default.  Clear MASK_FPU_SET to avoid confusing
393     the reverse mapping from switch values to names.  */
394  if (TARGET_FPU_SET)
395    {
396      target_flags = (target_flags & ~MASK_FPU) | fpu;
397      target_flags &= ~MASK_FPU_SET;
398    }
399
400  /* Don't allow -mvis if FPU is disabled.  */
401  if (! TARGET_FPU)
402    target_flags &= ~MASK_VIS;
403
404  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
405     are available.
406     -m64 also implies v9.  */
407  if (TARGET_VIS || TARGET_ARCH64)
408    {
409      target_flags |= MASK_V9;
410      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
411    }
412
413  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
414  if (TARGET_V9 && TARGET_ARCH32)
415    target_flags |= MASK_DEPRECATED_V8_INSNS;
416
417  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
418  if (! TARGET_V9 || TARGET_ARCH64)
419    target_flags &= ~MASK_V8PLUS;
420
421  /* Don't use stack biasing in 32 bit mode.  */
422  if (TARGET_ARCH32)
423    target_flags &= ~MASK_STACK_BIAS;
424
425  /* Supply a default value for align_functions.  */
426  if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
427    align_functions = 32;
428
429  /* Validate PCC_STRUCT_RETURN.  */
430  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
431    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
432
433  /* Only use .uaxword when compiling for a 64-bit target.  */
434  if (!TARGET_ARCH64)
435    targetm.asm_out.unaligned_op.di = NULL;
436
437  /* Do various machine dependent initializations.  */
438  sparc_init_modes ();
439
440  /* Register global variables with the garbage collector.  */
441  sparc_add_gc_roots ();
442}
443
444/* Miscellaneous utilities.  */
445
446/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
447   or branch on register contents instructions.  */
448
449int
450v9_regcmp_p (code)
451     enum rtx_code code;
452{
453  return (code == EQ || code == NE || code == GE || code == LT
454	  || code == LE || code == GT);
455}
456
457
458/* Operand constraints.  */
459
460/* Return non-zero only if OP is a register of mode MODE,
461   or const0_rtx.  */
462
463int
464reg_or_0_operand (op, mode)
465     rtx op;
466     enum machine_mode mode;
467{
468  if (register_operand (op, mode))
469    return 1;
470  if (op == const0_rtx)
471    return 1;
472  if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
473      && CONST_DOUBLE_HIGH (op) == 0
474      && CONST_DOUBLE_LOW (op) == 0)
475    return 1;
476  if (fp_zero_operand (op, mode))
477    return 1;
478  return 0;
479}
480
481/* Nonzero if OP is a floating point value with value 0.0.  */
482
483int
484fp_zero_operand (op, mode)
485     rtx op;
486     enum machine_mode mode;
487{
488  if (GET_MODE_CLASS (GET_MODE (op)) != MODE_FLOAT)
489    return 0;
490  return op == CONST0_RTX (mode);
491}
492
493/* Nonzero if OP is a register operand in floating point register.  */
494
495int
496fp_register_operand (op, mode)
497     rtx op;
498     enum machine_mode mode;
499{
500  if (! register_operand (op, mode))
501    return 0;
502  if (GET_CODE (op) == SUBREG)
503    op = SUBREG_REG (op);
504  return GET_CODE (op) == REG && SPARC_FP_REG_P (REGNO (op));
505}
506
507/* Nonzero if OP is a floating point constant which can
508   be loaded into an integer register using a single
509   sethi instruction.  */
510
511int
512fp_sethi_p (op)
513     rtx op;
514{
515  if (GET_CODE (op) == CONST_DOUBLE)
516    {
517      REAL_VALUE_TYPE r;
518      long i;
519
520      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
521      if (REAL_VALUES_EQUAL (r, dconst0) &&
522	  ! REAL_VALUE_MINUS_ZERO (r))
523	return 0;
524      REAL_VALUE_TO_TARGET_SINGLE (r, i);
525      if (SPARC_SETHI_P (i))
526	return 1;
527    }
528
529  return 0;
530}
531
532/* Nonzero if OP is a floating point constant which can
533   be loaded into an integer register using a single
534   mov instruction.  */
535
536int
537fp_mov_p (op)
538     rtx op;
539{
540  if (GET_CODE (op) == CONST_DOUBLE)
541    {
542      REAL_VALUE_TYPE r;
543      long i;
544
545      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
546      if (REAL_VALUES_EQUAL (r, dconst0) &&
547	  ! REAL_VALUE_MINUS_ZERO (r))
548	return 0;
549      REAL_VALUE_TO_TARGET_SINGLE (r, i);
550      if (SPARC_SIMM13_P (i))
551	return 1;
552    }
553
554  return 0;
555}
556
557/* Nonzero if OP is a floating point constant which can
558   be loaded into an integer register using a high/losum
559   instruction sequence.  */
560
561int
562fp_high_losum_p (op)
563     rtx op;
564{
565  /* The constraints calling this should only be in
566     SFmode move insns, so any constant which cannot
567     be moved using a single insn will do.  */
568  if (GET_CODE (op) == CONST_DOUBLE)
569    {
570      REAL_VALUE_TYPE r;
571      long i;
572
573      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
574      if (REAL_VALUES_EQUAL (r, dconst0) &&
575	  ! REAL_VALUE_MINUS_ZERO (r))
576	return 0;
577      REAL_VALUE_TO_TARGET_SINGLE (r, i);
578      if (! SPARC_SETHI_P (i)
579          && ! SPARC_SIMM13_P (i))
580	return 1;
581    }
582
583  return 0;
584}
585
586/* Nonzero if OP is an integer register.  */
587
588int
589intreg_operand (op, mode)
590     rtx op;
591     enum machine_mode mode ATTRIBUTE_UNUSED;
592{
593  return (register_operand (op, SImode)
594	  || (TARGET_ARCH64 && register_operand (op, DImode)));
595}
596
597/* Nonzero if OP is a floating point condition code register.  */
598
599int
600fcc_reg_operand (op, mode)
601     rtx op;
602     enum machine_mode mode;
603{
604  /* This can happen when recog is called from combine.  Op may be a MEM.
605     Fail instead of calling abort in this case.  */
606  if (GET_CODE (op) != REG)
607    return 0;
608
609  if (mode != VOIDmode && mode != GET_MODE (op))
610    return 0;
611  if (mode == VOIDmode
612      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
613    return 0;
614
615#if 0	/* ??? ==> 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
616  if (reg_renumber == 0)
617    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
618  return REGNO_OK_FOR_CCFP_P (REGNO (op));
619#else
620  return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
621#endif
622}
623
624/* Nonzero if OP is a floating point condition code fcc0 register.  */
625
626int
627fcc0_reg_operand (op, mode)
628     rtx op;
629     enum machine_mode mode;
630{
631  /* This can happen when recog is called from combine.  Op may be a MEM.
632     Fail instead of calling abort in this case.  */
633  if (GET_CODE (op) != REG)
634    return 0;
635
636  if (mode != VOIDmode && mode != GET_MODE (op))
637    return 0;
638  if (mode == VOIDmode
639      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
640    return 0;
641
642  return REGNO (op) == SPARC_FCC_REG;
643}
644
645/* Nonzero if OP is an integer or floating point condition code register.  */
646
647int
648icc_or_fcc_reg_operand (op, mode)
649     rtx op;
650     enum machine_mode mode;
651{
652  if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
653    {
654      if (mode != VOIDmode && mode != GET_MODE (op))
655	return 0;
656      if (mode == VOIDmode
657	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
658	return 0;
659      return 1;
660    }
661
662  return fcc_reg_operand (op, mode);
663}
664
665/* Nonzero if OP can appear as the dest of a RESTORE insn.  */
666int
667restore_operand (op, mode)
668     rtx op;
669     enum machine_mode mode;
670{
671  return (GET_CODE (op) == REG && GET_MODE (op) == mode
672	  && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
673}
674
675/* Call insn on SPARC can take a PC-relative constant address, or any regular
676   memory address.  */
677
678int
679call_operand (op, mode)
680     rtx op;
681     enum machine_mode mode;
682{
683  if (GET_CODE (op) != MEM)
684    abort ();
685  op = XEXP (op, 0);
686  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
687}
688
689int
690call_operand_address (op, mode)
691     rtx op;
692     enum machine_mode mode;
693{
694  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
695}
696
697/* Returns 1 if OP is either a symbol reference or a sum of a symbol
698   reference and a constant.  */
699
700int
701symbolic_operand (op, mode)
702     register rtx op;
703     enum machine_mode mode;
704{
705  enum machine_mode omode = GET_MODE (op);
706
707  if (omode != mode && omode != VOIDmode && mode != VOIDmode)
708    return 0;
709
710  switch (GET_CODE (op))
711    {
712    case SYMBOL_REF:
713    case LABEL_REF:
714      return 1;
715
716    case CONST:
717      op = XEXP (op, 0);
718      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
719	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
720	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
721
722    default:
723      return 0;
724    }
725}
726
727/* Return truth value of statement that OP is a symbolic memory
728   operand of mode MODE.  */
729
730int
731symbolic_memory_operand (op, mode)
732     rtx op;
733     enum machine_mode mode ATTRIBUTE_UNUSED;
734{
735  if (GET_CODE (op) == SUBREG)
736    op = SUBREG_REG (op);
737  if (GET_CODE (op) != MEM)
738    return 0;
739  op = XEXP (op, 0);
740  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
741	  || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
742}
743
744/* Return truth value of statement that OP is a LABEL_REF of mode MODE.  */
745
746int
747label_ref_operand (op, mode)
748     rtx op;
749     enum machine_mode mode;
750{
751  if (GET_CODE (op) != LABEL_REF)
752    return 0;
753  if (GET_MODE (op) != mode)
754    return 0;
755  return 1;
756}
757
758/* Return 1 if the operand is an argument used in generating pic references
759   in either the medium/low or medium/anywhere code models of sparc64.  */
760
761int
762sp64_medium_pic_operand (op, mode)
763     rtx op;
764     enum machine_mode mode ATTRIBUTE_UNUSED;
765{
766  /* Check for (const (minus (symbol_ref:GOT)
767                             (const (minus (label) (pc))))).  */
768  if (GET_CODE (op) != CONST)
769    return 0;
770  op = XEXP (op, 0);
771  if (GET_CODE (op) != MINUS)
772    return 0;
773  if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
774    return 0;
775  /* ??? Ensure symbol is GOT.  */
776  if (GET_CODE (XEXP (op, 1)) != CONST)
777    return 0;
778  if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
779    return 0;
780  return 1;
781}
782
783/* Return 1 if the operand is a data segment reference.  This includes
784   the readonly data segment, or in other words anything but the text segment.
785   This is needed in the medium/anywhere code model on v9.  These values
786   are accessed with EMBMEDANY_BASE_REG.  */
787
788int
789data_segment_operand (op, mode)
790     rtx op;
791     enum machine_mode mode ATTRIBUTE_UNUSED;
792{
793  switch (GET_CODE (op))
794    {
795    case SYMBOL_REF :
796      return ! SYMBOL_REF_FLAG (op);
797    case PLUS :
798      /* Assume canonical format of symbol + constant.
799	 Fall through.  */
800    case CONST :
801      return data_segment_operand (XEXP (op, 0), VOIDmode);
802    default :
803      return 0;
804    }
805}
806
807/* Return 1 if the operand is a text segment reference.
808   This is needed in the medium/anywhere code model on v9.  */
809
810int
811text_segment_operand (op, mode)
812     rtx op;
813     enum machine_mode mode ATTRIBUTE_UNUSED;
814{
815  switch (GET_CODE (op))
816    {
817    case LABEL_REF :
818      return 1;
819    case SYMBOL_REF :
820      return SYMBOL_REF_FLAG (op);
821    case PLUS :
822      /* Assume canonical format of symbol + constant.
823	 Fall through.  */
824    case CONST :
825      return text_segment_operand (XEXP (op, 0), VOIDmode);
826    default :
827      return 0;
828    }
829}
830
831/* Return 1 if the operand is either a register or a memory operand that is
832   not symbolic.  */
833
834int
835reg_or_nonsymb_mem_operand (op, mode)
836    register rtx op;
837    enum machine_mode mode;
838{
839  if (register_operand (op, mode))
840    return 1;
841
842  if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
843    return 1;
844
845  return 0;
846}
847
848int
849splittable_symbolic_memory_operand (op, mode)
850     rtx op;
851     enum machine_mode mode ATTRIBUTE_UNUSED;
852{
853  if (GET_CODE (op) != MEM)
854    return 0;
855  if (! symbolic_operand (XEXP (op, 0), Pmode))
856    return 0;
857  return 1;
858}
859
860int
861splittable_immediate_memory_operand (op, mode)
862     rtx op;
863     enum machine_mode mode ATTRIBUTE_UNUSED;
864{
865  if (GET_CODE (op) != MEM)
866    return 0;
867  if (! immediate_operand (XEXP (op, 0), Pmode))
868    return 0;
869  return 1;
870}
871
872/* Return truth value of whether OP is EQ or NE.  */
873
874int
875eq_or_neq (op, mode)
876     rtx op;
877     enum machine_mode mode ATTRIBUTE_UNUSED;
878{
879  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
880}
881
882/* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
883   or LTU for non-floating-point.  We handle those specially.  */
884
885int
886normal_comp_operator (op, mode)
887     rtx op;
888     enum machine_mode mode ATTRIBUTE_UNUSED;
889{
890  enum rtx_code code = GET_CODE (op);
891
892  if (GET_RTX_CLASS (code) != '<')
893    return 0;
894
895  if (GET_MODE (XEXP (op, 0)) == CCFPmode
896      || GET_MODE (XEXP (op, 0)) == CCFPEmode)
897    return 1;
898
899  return (code != NE && code != EQ && code != GEU && code != LTU);
900}
901
902/* Return 1 if this is a comparison operator.  This allows the use of
903   MATCH_OPERATOR to recognize all the branch insns.  */
904
905int
906noov_compare_op (op, mode)
907    register rtx op;
908    enum machine_mode mode ATTRIBUTE_UNUSED;
909{
910  enum rtx_code code = GET_CODE (op);
911
912  if (GET_RTX_CLASS (code) != '<')
913    return 0;
914
915  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode
916      || GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
917    /* These are the only branches which work with CC_NOOVmode.  */
918    return (code == EQ || code == NE || code == GE || code == LT);
919  return 1;
920}
921
922/* Return 1 if this is a 64-bit comparison operator.  This allows the use of
923   MATCH_OPERATOR to recognize all the branch insns.  */
924
925int
926noov_compare64_op (op, mode)
927    register rtx op;
928    enum machine_mode mode ATTRIBUTE_UNUSED;
929{
930  enum rtx_code code = GET_CODE (op);
931
932  if (! TARGET_V9)
933    return 0;
934
935  if (GET_RTX_CLASS (code) != '<')
936    return 0;
937
938  if (GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
939    /* These are the only branches which work with CCX_NOOVmode.  */
940    return (code == EQ || code == NE || code == GE || code == LT);
941  return (GET_MODE (XEXP (op, 0)) == CCXmode);
942}
943
944/* Nonzero if OP is a comparison operator suitable for use in v9
945   conditional move or branch on register contents instructions.  */
946
947int
948v9_regcmp_op (op, mode)
949     register rtx op;
950     enum machine_mode mode ATTRIBUTE_UNUSED;
951{
952  enum rtx_code code = GET_CODE (op);
953
954  if (GET_RTX_CLASS (code) != '<')
955    return 0;
956
957  return v9_regcmp_p (code);
958}
959
960/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
961
962int
963extend_op (op, mode)
964     rtx op;
965     enum machine_mode mode ATTRIBUTE_UNUSED;
966{
967  return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
968}
969
970/* Return nonzero if OP is an operator of mode MODE which can set
971   the condition codes explicitly.  We do not include PLUS and MINUS
972   because these require CC_NOOVmode, which we handle explicitly.  */
973
974int
975cc_arithop (op, mode)
976     rtx op;
977     enum machine_mode mode ATTRIBUTE_UNUSED;
978{
979  if (GET_CODE (op) == AND
980      || GET_CODE (op) == IOR
981      || GET_CODE (op) == XOR)
982    return 1;
983
984  return 0;
985}
986
987/* Return nonzero if OP is an operator of mode MODE which can bitwise
988   complement its second operand and set the condition codes explicitly.  */
989
990int
991cc_arithopn (op, mode)
992     rtx op;
993     enum machine_mode mode ATTRIBUTE_UNUSED;
994{
995  /* XOR is not here because combine canonicalizes (xor (not ...) ...)
996     and (xor ... (not ...)) to (not (xor ...)).  */
997  return (GET_CODE (op) == AND
998	  || GET_CODE (op) == IOR);
999}
1000
1001/* Return true if OP is a register, or is a CONST_INT that can fit in a
1002   signed 13 bit immediate field.  This is an acceptable SImode operand for
1003   most 3 address instructions.  */
1004
1005int
1006arith_operand (op, mode)
1007     rtx op;
1008     enum machine_mode mode;
1009{
1010  if (register_operand (op, mode))
1011    return 1;
1012  if (GET_CODE (op) != CONST_INT)
1013    return 0;
1014  return SMALL_INT32 (op);
1015}
1016
1017/* Return true if OP is a constant 4096  */
1018
1019int
1020arith_4096_operand (op, mode)
1021     rtx op;
1022     enum machine_mode mode ATTRIBUTE_UNUSED;
1023{
1024  if (GET_CODE (op) != CONST_INT)
1025    return 0;
1026  else
1027    return INTVAL (op) == 4096;
1028}
1029
1030/* Return true if OP is suitable as second operand for add/sub */
1031
1032int
1033arith_add_operand (op, mode)
1034     rtx op;
1035     enum machine_mode mode;
1036{
1037  return arith_operand (op, mode) || arith_4096_operand (op, mode);
1038}
1039
1040/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
1041   immediate field of OR and XOR instructions.  Used for 64-bit
1042   constant formation patterns.  */
1043int
1044const64_operand (op, mode)
1045     rtx op;
1046     enum machine_mode mode ATTRIBUTE_UNUSED;
1047{
1048  return ((GET_CODE (op) == CONST_INT
1049	   && SPARC_SIMM13_P (INTVAL (op)))
1050#if HOST_BITS_PER_WIDE_INT != 64
1051	  || (GET_CODE (op) == CONST_DOUBLE
1052	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1053	      && (CONST_DOUBLE_HIGH (op) ==
1054		  ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
1055		   (HOST_WIDE_INT)-1 : 0)))
1056#endif
1057	  );
1058}
1059
1060/* The same, but only for sethi instructions.  */
1061int
1062const64_high_operand (op, mode)
1063     rtx op;
1064     enum machine_mode mode;
1065{
1066  return ((GET_CODE (op) == CONST_INT
1067	   && (INTVAL (op) & ~(HOST_WIDE_INT)0x3ff) != 0
1068	   && SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))
1069	   )
1070	  || (GET_CODE (op) == CONST_DOUBLE
1071	      && CONST_DOUBLE_HIGH (op) == 0
1072	      && (CONST_DOUBLE_LOW (op) & ~(HOST_WIDE_INT)0x3ff) != 0
1073	      && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
1074}
1075
1076/* Return true if OP is a register, or is a CONST_INT that can fit in a
1077   signed 11 bit immediate field.  This is an acceptable SImode operand for
1078   the movcc instructions.  */
1079
1080int
1081arith11_operand (op, mode)
1082     rtx op;
1083     enum machine_mode mode;
1084{
1085  return (register_operand (op, mode)
1086	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
1087}
1088
1089/* Return true if OP is a register, or is a CONST_INT that can fit in a
1090   signed 10 bit immediate field.  This is an acceptable SImode operand for
1091   the movrcc instructions.  */
1092
1093int
1094arith10_operand (op, mode)
1095     rtx op;
1096     enum machine_mode mode;
1097{
1098  return (register_operand (op, mode)
1099	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
1100}
1101
1102/* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
1103   immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
1104   immediate field.
1105   v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1106   can fit in a 13 bit immediate field.  This is an acceptable DImode operand
1107   for most 3 address instructions.  */
1108
1109int
1110arith_double_operand (op, mode)
1111     rtx op;
1112     enum machine_mode mode;
1113{
1114  return (register_operand (op, mode)
1115	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
1116	  || (! TARGET_ARCH64
1117	      && GET_CODE (op) == CONST_DOUBLE
1118	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
1119	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
1120	  || (TARGET_ARCH64
1121	      && GET_CODE (op) == CONST_DOUBLE
1122	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
1123	      && ((CONST_DOUBLE_HIGH (op) == -1
1124		   && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
1125		  || (CONST_DOUBLE_HIGH (op) == 0
1126		      && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
1127}
1128
1129/* Return true if OP is a constant 4096 for DImode on ARCH64 */
1130
1131int
1132arith_double_4096_operand (op, mode)
1133     rtx op;
1134     enum machine_mode mode ATTRIBUTE_UNUSED;
1135{
1136  return (TARGET_ARCH64 &&
1137  	  ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
1138  	   (GET_CODE (op) == CONST_DOUBLE &&
1139  	    CONST_DOUBLE_LOW (op) == 4096 &&
1140  	    CONST_DOUBLE_HIGH (op) == 0)));
1141}
1142
1143/* Return true if OP is suitable as second operand for add/sub in DImode */
1144
1145int
1146arith_double_add_operand (op, mode)
1147     rtx op;
1148     enum machine_mode mode;
1149{
1150  return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
1151}
1152
1153/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1154   can fit in an 11 bit immediate field.  This is an acceptable DImode
1155   operand for the movcc instructions.  */
1156/* ??? Replace with arith11_operand?  */
1157
1158int
1159arith11_double_operand (op, mode)
1160     rtx op;
1161     enum machine_mode mode;
1162{
1163  return (register_operand (op, mode)
1164	  || (GET_CODE (op) == CONST_DOUBLE
1165	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1166	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
1167	      && ((CONST_DOUBLE_HIGH (op) == -1
1168		   && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
1169		  || (CONST_DOUBLE_HIGH (op) == 0
1170		      && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
1171	  || (GET_CODE (op) == CONST_INT
1172	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1173	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
1174}
1175
1176/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
1177   can fit in an 10 bit immediate field.  This is an acceptable DImode
1178   operand for the movrcc instructions.  */
1179/* ??? Replace with arith10_operand?  */
1180
1181int
1182arith10_double_operand (op, mode)
1183     rtx op;
1184     enum machine_mode mode;
1185{
1186  return (register_operand (op, mode)
1187	  || (GET_CODE (op) == CONST_DOUBLE
1188	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1189	      && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
1190	      && ((CONST_DOUBLE_HIGH (op) == -1
1191		   && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1192		  || (CONST_DOUBLE_HIGH (op) == 0
1193		      && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1194	  || (GET_CODE (op) == CONST_INT
1195	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1196	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1197}
1198
1199/* Return truth value of whether OP is an integer which fits the
1200   range constraining immediate operands in most three-address insns,
1201   which have a 13 bit immediate field.  */
1202
1203int
1204small_int (op, mode)
1205     rtx op;
1206     enum machine_mode mode ATTRIBUTE_UNUSED;
1207{
1208  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1209}
1210
1211int
1212small_int_or_double (op, mode)
1213     rtx op;
1214     enum machine_mode mode ATTRIBUTE_UNUSED;
1215{
1216  return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1217	  || (GET_CODE (op) == CONST_DOUBLE
1218	      && CONST_DOUBLE_HIGH (op) == 0
1219	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1220}
1221
1222/* Recognize operand values for the umul instruction.  That instruction sign
1223   extends immediate values just like all other sparc instructions, but
1224   interprets the extended result as an unsigned number.  */
1225
1226int
1227uns_small_int (op, mode)
1228     rtx op;
1229     enum machine_mode mode ATTRIBUTE_UNUSED;
1230{
1231#if HOST_BITS_PER_WIDE_INT > 32
1232  /* All allowed constants will fit a CONST_INT.  */
1233  return (GET_CODE (op) == CONST_INT
1234	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1235	      || (INTVAL (op) >= 0xFFFFF000
1236                  && INTVAL (op) <= 0xFFFFFFFF)));
1237#else
1238  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1239	  || (GET_CODE (op) == CONST_DOUBLE
1240	      && CONST_DOUBLE_HIGH (op) == 0
1241	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1242#endif
1243}
1244
1245int
1246uns_arith_operand (op, mode)
1247     rtx op;
1248     enum machine_mode mode;
1249{
1250  return register_operand (op, mode) || uns_small_int (op, mode);
1251}
1252
1253/* Return truth value of statement that OP is a call-clobbered register.  */
1254int
1255clobbered_register (op, mode)
1256     rtx op;
1257     enum machine_mode mode ATTRIBUTE_UNUSED;
1258{
1259  return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1260}
1261
1262/* Return 1 if OP is a valid operand for the source of a move insn.  */
1263
1264int
1265input_operand (op, mode)
1266     rtx op;
1267     enum machine_mode mode;
1268{
1269  /* If both modes are non-void they must be the same.  */
1270  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1271    return 0;
1272
1273  /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary.  */
1274  if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1275    return 1;
1276
1277  /* Allow any one instruction integer constant, and all CONST_INT
1278     variants when we are working in DImode and !arch64.  */
1279  if (GET_MODE_CLASS (mode) == MODE_INT
1280      && ((GET_CODE (op) == CONST_INT
1281	   && (SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))
1282	       || SPARC_SIMM13_P (INTVAL (op))
1283	       || (mode == DImode
1284		   && ! TARGET_ARCH64)))
1285	  || (TARGET_ARCH64
1286	      && GET_CODE (op) == CONST_DOUBLE
1287	      && ((CONST_DOUBLE_HIGH (op) == 0
1288		   && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1289		  ||
1290#if HOST_BITS_PER_WIDE_INT == 64
1291		  (CONST_DOUBLE_HIGH (op) == 0
1292		   && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1293#else
1294		  (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1295		   && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1296			&& CONST_DOUBLE_HIGH (op) == 0)
1297		       || (CONST_DOUBLE_HIGH (op) == -1
1298			   && CONST_DOUBLE_LOW (op) & 0x80000000) != 0))
1299#endif
1300		  ))))
1301    return 1;
1302
1303  /* If !arch64 and this is a DImode const, allow it so that
1304     the splits can be generated.  */
1305  if (! TARGET_ARCH64
1306      && mode == DImode
1307      && GET_CODE (op) == CONST_DOUBLE)
1308    return 1;
1309
1310  if (register_operand (op, mode))
1311    return 1;
1312
1313  if (GET_MODE_CLASS (mode) == MODE_FLOAT
1314      && GET_CODE (op) == CONST_DOUBLE)
1315    return 1;
1316
1317  /* If this is a SUBREG, look inside so that we handle
1318     paradoxical ones.  */
1319  if (GET_CODE (op) == SUBREG)
1320    op = SUBREG_REG (op);
1321
1322  /* Check for valid MEM forms.  */
1323  if (GET_CODE (op) == MEM)
1324    {
1325      rtx inside = XEXP (op, 0);
1326
1327      if (GET_CODE (inside) == LO_SUM)
1328	{
1329	  /* We can't allow these because all of the splits
1330	     (eventually as they trickle down into DFmode
1331	     splits) require offsettable memory references.  */
1332	  if (! TARGET_V9
1333	      && GET_MODE (op) == TFmode)
1334	    return 0;
1335
1336	  return (register_operand (XEXP (inside, 0), Pmode)
1337		  && CONSTANT_P (XEXP (inside, 1)));
1338	}
1339      return memory_address_p (mode, inside);
1340    }
1341
1342  return 0;
1343}
1344
1345
1346/* We know it can't be done in one insn when we get here,
1347   the movsi expander guarentees this.  */
1348void
1349sparc_emit_set_const32 (op0, op1)
1350     rtx op0;
1351     rtx op1;
1352{
1353  enum machine_mode mode = GET_MODE (op0);
1354  rtx temp;
1355
1356  if (GET_CODE (op1) == CONST_INT)
1357    {
1358      HOST_WIDE_INT value = INTVAL (op1);
1359
1360      if (SPARC_SETHI_P (value & GET_MODE_MASK (mode))
1361	  || SPARC_SIMM13_P (value))
1362	abort ();
1363    }
1364
1365  /* Full 2-insn decomposition is needed.  */
1366  if (reload_in_progress || reload_completed)
1367    temp = op0;
1368  else
1369    temp = gen_reg_rtx (mode);
1370
1371  if (GET_CODE (op1) == CONST_INT)
1372    {
1373      /* Emit them as real moves instead of a HIGH/LO_SUM,
1374	 this way CSE can see everything and reuse intermediate
1375	 values if it wants.  */
1376      if (TARGET_ARCH64
1377	  && HOST_BITS_PER_WIDE_INT != 64
1378	  && (INTVAL (op1) & 0x80000000) != 0)
1379	emit_insn (gen_rtx_SET
1380		   (VOIDmode, temp,
1381		    gen_rtx_CONST_DOUBLE (VOIDmode,
1382					  INTVAL (op1) & ~(HOST_WIDE_INT)0x3ff,
1383					  0)));
1384      else
1385	emit_insn (gen_rtx_SET (VOIDmode, temp,
1386				GEN_INT (INTVAL (op1)
1387					 & ~(HOST_WIDE_INT)0x3ff)));
1388
1389      emit_insn (gen_rtx_SET (VOIDmode,
1390			      op0,
1391			      gen_rtx_IOR (mode, temp,
1392					   GEN_INT (INTVAL (op1) & 0x3ff))));
1393    }
1394  else
1395    {
1396      /* A symbol, emit in the traditional way.  */
1397      emit_insn (gen_rtx_SET (VOIDmode, temp,
1398			      gen_rtx_HIGH (mode, op1)));
1399      emit_insn (gen_rtx_SET (VOIDmode,
1400			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
1401
1402    }
1403}
1404
1405
1406/* Sparc-v9 code-model support.  */
1407void
1408sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1409     rtx op0;
1410     rtx op1;
1411     rtx temp1;
1412{
1413  rtx ti_temp1 = 0;
1414
1415  if (temp1 && GET_MODE (temp1) == TImode)
1416    {
1417      ti_temp1 = temp1;
1418      temp1 = gen_rtx_REG (DImode, REGNO (temp1));
1419    }
1420
1421  switch (sparc_cmodel)
1422    {
1423    case CM_MEDLOW:
1424      /* The range spanned by all instructions in the object is less
1425	 than 2^31 bytes (2GB) and the distance from any instruction
1426	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1427	 than 2^31 bytes (2GB).
1428
1429	 The executable must be in the low 4TB of the virtual address
1430	 space.
1431
1432	 sethi	%hi(symbol), %temp
1433	 or	%temp, %lo(symbol), %reg  */
1434      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1435      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1436      break;
1437
1438    case CM_MEDMID:
1439      /* The range spanned by all instructions in the object is less
1440	 than 2^31 bytes (2GB) and the distance from any instruction
1441	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1442	 than 2^31 bytes (2GB).
1443
1444	 The executable must be in the low 16TB of the virtual address
1445	 space.
1446
1447	 sethi	%h44(symbol), %temp1
1448	 or	%temp1, %m44(symbol), %temp2
1449	 sllx	%temp2, 12, %temp3
1450	 or	%temp3, %l44(symbol), %reg  */
1451      emit_insn (gen_seth44 (op0, op1));
1452      emit_insn (gen_setm44 (op0, op0, op1));
1453      emit_insn (gen_rtx_SET (VOIDmode, temp1,
1454			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1455      emit_insn (gen_setl44 (op0, temp1, op1));
1456      break;
1457
1458    case CM_MEDANY:
1459      /* The range spanned by all instructions in the object is less
1460	 than 2^31 bytes (2GB) and the distance from any instruction
1461	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1462	 than 2^31 bytes (2GB).
1463
1464	 The executable can be placed anywhere in the virtual address
1465	 space.
1466
1467	 sethi	%hh(symbol), %temp1
1468	 sethi	%lm(symbol), %temp2
1469	 or	%temp1, %hm(symbol), %temp3
1470	 or	%temp2, %lo(symbol), %temp4
1471	 sllx	%temp3, 32, %temp5
1472	 or	%temp4, %temp5, %reg  */
1473
1474      /* It is possible that one of the registers we got for operands[2]
1475	 might coincide with that of operands[0] (which is why we made
1476	 it TImode).  Pick the other one to use as our scratch.  */
1477      if (rtx_equal_p (temp1, op0))
1478	{
1479	  if (ti_temp1)
1480	    temp1 = gen_rtx_REG (DImode, REGNO (temp1) + 1);
1481	  else
1482	    abort();
1483	}
1484
1485      emit_insn (gen_sethh (op0, op1));
1486      emit_insn (gen_setlm (temp1, op1));
1487      emit_insn (gen_sethm (op0, op0, op1));
1488      emit_insn (gen_rtx_SET (VOIDmode, op0,
1489			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1490      emit_insn (gen_rtx_SET (VOIDmode, op0,
1491			      gen_rtx_PLUS (DImode, op0, temp1)));
1492      emit_insn (gen_setlo (op0, op0, op1));
1493      break;
1494
1495    case CM_EMBMEDANY:
1496      /* Old old old backwards compatibility kruft here.
1497	 Essentially it is MEDLOW with a fixed 64-bit
1498	 virtual base added to all data segment addresses.
1499	 Text-segment stuff is computed like MEDANY, we can't
1500	 reuse the code above because the relocation knobs
1501	 look different.
1502
1503	 Data segment:	sethi	%hi(symbol), %temp1
1504			or	%temp1, %lo(symbol), %temp2
1505			add	%temp2, EMBMEDANY_BASE_REG, %reg
1506
1507	 Text segment:	sethi	%uhi(symbol), %temp1
1508			sethi	%hi(symbol), %temp2
1509			or	%temp1, %ulo(symbol), %temp3
1510			or	%temp2, %lo(symbol), %temp4
1511			sllx	%temp3, 32, %temp5
1512			or	%temp4, %temp5, %reg  */
1513      if (data_segment_operand (op1, GET_MODE (op1)))
1514	{
1515	  emit_insn (gen_embmedany_sethi (temp1, op1));
1516	  emit_insn (gen_embmedany_brsum (op0, temp1));
1517	  emit_insn (gen_embmedany_losum (op0, op0, op1));
1518	}
1519      else
1520	{
1521	  /* It is possible that one of the registers we got for operands[2]
1522	     might coincide with that of operands[0] (which is why we made
1523	     it TImode).  Pick the other one to use as our scratch.  */
1524	  if (rtx_equal_p (temp1, op0))
1525	    {
1526	      if (ti_temp1)
1527		temp1 = gen_rtx_REG (DImode, REGNO (temp1) + 1);
1528	      else
1529		abort();
1530	    }
1531
1532	  emit_insn (gen_embmedany_textuhi (op0, op1));
1533	  emit_insn (gen_embmedany_texthi  (temp1, op1));
1534	  emit_insn (gen_embmedany_textulo (op0, op0, op1));
1535	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1536				  gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1537	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1538				  gen_rtx_PLUS (DImode, op0, temp1)));
1539	  emit_insn (gen_embmedany_textlo  (op0, op0, op1));
1540	}
1541      break;
1542
1543    default:
1544      abort();
1545    }
1546}
1547
1548/* These avoid problems when cross compiling.  If we do not
1549   go through all this hair then the optimizer will see
1550   invalid REG_EQUAL notes or in some cases none at all.  */
1551static void sparc_emit_set_safe_HIGH64 PARAMS ((rtx, HOST_WIDE_INT));
1552static rtx gen_safe_SET64 PARAMS ((rtx, HOST_WIDE_INT));
1553static rtx gen_safe_OR64 PARAMS ((rtx, HOST_WIDE_INT));
1554static rtx gen_safe_XOR64 PARAMS ((rtx, HOST_WIDE_INT));
1555
1556#if HOST_BITS_PER_WIDE_INT == 64
1557#define GEN_HIGHINT64(__x)		GEN_INT ((__x) & ~(HOST_WIDE_INT)0x3ff)
1558#define GEN_INT64(__x)			GEN_INT (__x)
1559#else
1560#define GEN_HIGHINT64(__x) \
1561	gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & ~(HOST_WIDE_INT)0x3ff, 0)
1562#define GEN_INT64(__x) \
1563	gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & 0xffffffff, \
1564			      ((__x) & 0x80000000 \
1565			       ? -1 : 0))
1566#endif
1567
1568/* The optimizer is not to assume anything about exactly
1569   which bits are set for a HIGH, they are unspecified.
1570   Unfortunately this leads to many missed optimizations
1571   during CSE.  We mask out the non-HIGH bits, and matches
1572   a plain movdi, to alleviate this problem.  */
1573static void
1574sparc_emit_set_safe_HIGH64 (dest, val)
1575     rtx dest;
1576     HOST_WIDE_INT val;
1577{
1578  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1579}
1580
1581static rtx
1582gen_safe_SET64 (dest, val)
1583     rtx dest;
1584     HOST_WIDE_INT val;
1585{
1586  return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1587}
1588
1589static rtx
1590gen_safe_OR64 (src, val)
1591     rtx src;
1592     HOST_WIDE_INT val;
1593{
1594  return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1595}
1596
1597static rtx
1598gen_safe_XOR64 (src, val)
1599     rtx src;
1600     HOST_WIDE_INT val;
1601{
1602  return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1603}
1604
1605/* Worker routines for 64-bit constant formation on arch64.
1606   One of the key things to be doing in these emissions is
1607   to create as many temp REGs as possible.  This makes it
1608   possible for half-built constants to be used later when
1609   such values are similar to something required later on.
1610   Without doing this, the optimizer cannot see such
1611   opportunities.  */
1612
1613static void sparc_emit_set_const64_quick1
1614	PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT, int));
1615
1616static void
1617sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1618  rtx op0;
1619  rtx temp;
1620  unsigned HOST_WIDE_INT low_bits;
1621  int is_neg;
1622{
1623  unsigned HOST_WIDE_INT high_bits;
1624
1625  if (is_neg)
1626    high_bits = (~low_bits) & 0xffffffff;
1627  else
1628    high_bits = low_bits;
1629
1630  sparc_emit_set_safe_HIGH64 (temp, high_bits);
1631  if (!is_neg)
1632    {
1633      emit_insn (gen_rtx_SET (VOIDmode, op0,
1634			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1635    }
1636  else
1637    {
1638      /* If we are XOR'ing with -1, then we should emit a one's complement
1639	 instead.  This way the combiner will notice logical operations
1640	 such as ANDN later on and substitute.  */
1641      if ((low_bits & 0x3ff) == 0x3ff)
1642	{
1643	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1644				  gen_rtx_NOT (DImode, temp)));
1645	}
1646      else
1647	{
1648	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1649				  gen_safe_XOR64 (temp,
1650						  (-(HOST_WIDE_INT)0x400
1651						   | (low_bits & 0x3ff)))));
1652	}
1653    }
1654}
1655
1656static void sparc_emit_set_const64_quick2
1657	PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT,
1658	       unsigned HOST_WIDE_INT, int));
1659
1660static void
1661sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1662  rtx op0;
1663  rtx temp;
1664  unsigned HOST_WIDE_INT high_bits;
1665  unsigned HOST_WIDE_INT low_immediate;
1666  int shift_count;
1667{
1668  rtx temp2 = op0;
1669
1670  if ((high_bits & 0xfffffc00) != 0)
1671    {
1672      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1673      if ((high_bits & ~0xfffffc00) != 0)
1674	emit_insn (gen_rtx_SET (VOIDmode, op0,
1675				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1676      else
1677	temp2 = temp;
1678    }
1679  else
1680    {
1681      emit_insn (gen_safe_SET64 (temp, high_bits));
1682      temp2 = temp;
1683    }
1684
1685  /* Now shift it up into place.  */
1686  emit_insn (gen_rtx_SET (VOIDmode, op0,
1687			  gen_rtx_ASHIFT (DImode, temp2,
1688					  GEN_INT (shift_count))));
1689
1690  /* If there is a low immediate part piece, finish up by
1691     putting that in as well.  */
1692  if (low_immediate != 0)
1693    emit_insn (gen_rtx_SET (VOIDmode, op0,
1694			    gen_safe_OR64 (op0, low_immediate)));
1695}
1696
1697static void sparc_emit_set_const64_longway
1698	PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1699
1700/* Full 64-bit constant decomposition.  Even though this is the
1701   'worst' case, we still optimize a few things away.  */
1702static void
1703sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1704     rtx op0;
1705     rtx temp;
1706     unsigned HOST_WIDE_INT high_bits;
1707     unsigned HOST_WIDE_INT low_bits;
1708{
1709  rtx sub_temp;
1710
1711  if (reload_in_progress || reload_completed)
1712    sub_temp = op0;
1713  else
1714    sub_temp = gen_reg_rtx (DImode);
1715
1716  if ((high_bits & 0xfffffc00) != 0)
1717    {
1718      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1719      if ((high_bits & ~0xfffffc00) != 0)
1720	emit_insn (gen_rtx_SET (VOIDmode,
1721				sub_temp,
1722				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1723      else
1724	sub_temp = temp;
1725    }
1726  else
1727    {
1728      emit_insn (gen_safe_SET64 (temp, high_bits));
1729      sub_temp = temp;
1730    }
1731
1732  if (!reload_in_progress && !reload_completed)
1733    {
1734      rtx temp2 = gen_reg_rtx (DImode);
1735      rtx temp3 = gen_reg_rtx (DImode);
1736      rtx temp4 = gen_reg_rtx (DImode);
1737
1738      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1739			      gen_rtx_ASHIFT (DImode, sub_temp,
1740					      GEN_INT (32))));
1741
1742      sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1743      if ((low_bits & ~0xfffffc00) != 0)
1744	{
1745	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1746				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1747	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1748				  gen_rtx_PLUS (DImode, temp4, temp3)));
1749	}
1750      else
1751	{
1752	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1753				  gen_rtx_PLUS (DImode, temp4, temp2)));
1754	}
1755    }
1756  else
1757    {
1758      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1759      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1760      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1761      int to_shift = 12;
1762
1763      /* We are in the middle of reload, so this is really
1764	 painful.  However we do still make an attempt to
1765	 avoid emitting truly stupid code.  */
1766      if (low1 != const0_rtx)
1767	{
1768	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1769				  gen_rtx_ASHIFT (DImode, sub_temp,
1770						  GEN_INT (to_shift))));
1771	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1772				  gen_rtx_IOR (DImode, op0, low1)));
1773	  sub_temp = op0;
1774	  to_shift = 12;
1775	}
1776      else
1777	{
1778	  to_shift += 12;
1779	}
1780      if (low2 != const0_rtx)
1781	{
1782	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1783				  gen_rtx_ASHIFT (DImode, sub_temp,
1784						  GEN_INT (to_shift))));
1785	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1786				  gen_rtx_IOR (DImode, op0, low2)));
1787	  sub_temp = op0;
1788	  to_shift = 8;
1789	}
1790      else
1791	{
1792	  to_shift += 8;
1793	}
1794      emit_insn (gen_rtx_SET (VOIDmode, op0,
1795			      gen_rtx_ASHIFT (DImode, sub_temp,
1796					      GEN_INT (to_shift))));
1797      if (low3 != const0_rtx)
1798	emit_insn (gen_rtx_SET (VOIDmode, op0,
1799				gen_rtx_IOR (DImode, op0, low3)));
1800      /* phew...  */
1801    }
1802}
1803
1804/* Analyze a 64-bit constant for certain properties.  */
1805static void analyze_64bit_constant
1806	PARAMS ((unsigned HOST_WIDE_INT,
1807	       unsigned HOST_WIDE_INT,
1808	       int *, int *, int *));
1809
1810static void
1811analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1812     unsigned HOST_WIDE_INT high_bits, low_bits;
1813     int *hbsp, *lbsp, *abbasp;
1814{
1815  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1816  int i;
1817
1818  lowest_bit_set = highest_bit_set = -1;
1819  i = 0;
1820  do
1821    {
1822      if ((lowest_bit_set == -1)
1823	  && ((low_bits >> i) & 1))
1824	lowest_bit_set = i;
1825      if ((highest_bit_set == -1)
1826	  && ((high_bits >> (32 - i - 1)) & 1))
1827	highest_bit_set = (64 - i - 1);
1828    }
1829  while (++i < 32
1830	 && ((highest_bit_set == -1)
1831	     || (lowest_bit_set == -1)));
1832  if (i == 32)
1833    {
1834      i = 0;
1835      do
1836	{
1837	  if ((lowest_bit_set == -1)
1838	      && ((high_bits >> i) & 1))
1839	    lowest_bit_set = i + 32;
1840	  if ((highest_bit_set == -1)
1841	      && ((low_bits >> (32 - i - 1)) & 1))
1842	    highest_bit_set = 32 - i - 1;
1843	}
1844      while (++i < 32
1845	     && ((highest_bit_set == -1)
1846		 || (lowest_bit_set == -1)));
1847    }
1848  /* If there are no bits set this should have gone out
1849     as one instruction!  */
1850  if (lowest_bit_set == -1
1851      || highest_bit_set == -1)
1852    abort ();
1853  all_bits_between_are_set = 1;
1854  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1855    {
1856      if (i < 32)
1857	{
1858	  if ((low_bits & (1 << i)) != 0)
1859	    continue;
1860	}
1861      else
1862	{
1863	  if ((high_bits & (1 << (i - 32))) != 0)
1864	    continue;
1865	}
1866      all_bits_between_are_set = 0;
1867      break;
1868    }
1869  *hbsp = highest_bit_set;
1870  *lbsp = lowest_bit_set;
1871  *abbasp = all_bits_between_are_set;
1872}
1873
1874static int const64_is_2insns
1875	PARAMS ((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1876
1877static int
1878const64_is_2insns (high_bits, low_bits)
1879     unsigned HOST_WIDE_INT high_bits, low_bits;
1880{
1881  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1882
1883  if (high_bits == 0
1884      || high_bits == 0xffffffff)
1885    return 1;
1886
1887  analyze_64bit_constant (high_bits, low_bits,
1888			  &highest_bit_set, &lowest_bit_set,
1889			  &all_bits_between_are_set);
1890
1891  if ((highest_bit_set == 63
1892       || lowest_bit_set == 0)
1893      && all_bits_between_are_set != 0)
1894    return 1;
1895
1896  if ((highest_bit_set - lowest_bit_set) < 21)
1897    return 1;
1898
1899  return 0;
1900}
1901
1902static unsigned HOST_WIDE_INT create_simple_focus_bits
1903	PARAMS ((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1904	       int, int));
1905
1906static unsigned HOST_WIDE_INT
1907create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1908     unsigned HOST_WIDE_INT high_bits, low_bits;
1909     int lowest_bit_set, shift;
1910{
1911  HOST_WIDE_INT hi, lo;
1912
1913  if (lowest_bit_set < 32)
1914    {
1915      lo = (low_bits >> lowest_bit_set) << shift;
1916      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1917    }
1918  else
1919    {
1920      lo = 0;
1921      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1922    }
1923  if (hi & lo)
1924    abort ();
1925  return (hi | lo);
1926}
1927
1928/* Here we are sure to be arch64 and this is an integer constant
1929   being loaded into a register.  Emit the most efficient
1930   insn sequence possible.  Detection of all the 1-insn cases
1931   has been done already.  */
1932void
1933sparc_emit_set_const64 (op0, op1)
1934     rtx op0;
1935     rtx op1;
1936{
1937  unsigned HOST_WIDE_INT high_bits, low_bits;
1938  int lowest_bit_set, highest_bit_set;
1939  int all_bits_between_are_set;
1940  rtx temp;
1941
1942  /* Sanity check that we know what we are working with.  */
1943  if (! TARGET_ARCH64)
1944    abort ();
1945
1946  if (GET_CODE (op0) != SUBREG)
1947    {
1948      if (GET_CODE (op0) != REG
1949	  || (REGNO (op0) >= SPARC_FIRST_FP_REG
1950	      && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1951	abort ();
1952    }
1953
1954  if (reload_in_progress || reload_completed)
1955    temp = op0;
1956  else
1957    temp = gen_reg_rtx (DImode);
1958
1959  if (GET_CODE (op1) != CONST_DOUBLE
1960      && GET_CODE (op1) != CONST_INT)
1961    {
1962      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1963      return;
1964    }
1965
1966  if (GET_CODE (op1) == CONST_DOUBLE)
1967    {
1968#if HOST_BITS_PER_WIDE_INT == 64
1969      high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1970      low_bits  = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1971#else
1972      high_bits = CONST_DOUBLE_HIGH (op1);
1973      low_bits = CONST_DOUBLE_LOW (op1);
1974#endif
1975    }
1976  else
1977    {
1978#if HOST_BITS_PER_WIDE_INT == 64
1979      high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1980      low_bits = (INTVAL (op1) & 0xffffffff);
1981#else
1982      high_bits = ((INTVAL (op1) < 0) ?
1983		   0xffffffff :
1984		   0x00000000);
1985      low_bits = INTVAL (op1);
1986#endif
1987    }
1988
1989  /* low_bits	bits 0  --> 31
1990     high_bits	bits 32 --> 63  */
1991
1992  analyze_64bit_constant (high_bits, low_bits,
1993			  &highest_bit_set, &lowest_bit_set,
1994			  &all_bits_between_are_set);
1995
1996  /* First try for a 2-insn sequence.  */
1997
1998  /* These situations are preferred because the optimizer can
1999   * do more things with them:
2000   * 1) mov	-1, %reg
2001   *    sllx	%reg, shift, %reg
2002   * 2) mov	-1, %reg
2003   *    srlx	%reg, shift, %reg
2004   * 3) mov	some_small_const, %reg
2005   *    sllx	%reg, shift, %reg
2006   */
2007  if (((highest_bit_set == 63
2008	|| lowest_bit_set == 0)
2009       && all_bits_between_are_set != 0)
2010      || ((highest_bit_set - lowest_bit_set) < 12))
2011    {
2012      HOST_WIDE_INT the_const = -1;
2013      int shift = lowest_bit_set;
2014
2015      if ((highest_bit_set != 63
2016	   && lowest_bit_set != 0)
2017	  || all_bits_between_are_set == 0)
2018	{
2019	  the_const =
2020	    create_simple_focus_bits (high_bits, low_bits,
2021				      lowest_bit_set, 0);
2022	}
2023      else if (lowest_bit_set == 0)
2024	shift = -(63 - highest_bit_set);
2025
2026      if (! SPARC_SIMM13_P (the_const))
2027	abort ();
2028
2029      emit_insn (gen_safe_SET64 (temp, the_const));
2030      if (shift > 0)
2031	emit_insn (gen_rtx_SET (VOIDmode,
2032				op0,
2033				gen_rtx_ASHIFT (DImode,
2034						temp,
2035						GEN_INT (shift))));
2036      else if (shift < 0)
2037	emit_insn (gen_rtx_SET (VOIDmode,
2038				op0,
2039				gen_rtx_LSHIFTRT (DImode,
2040						  temp,
2041						  GEN_INT (-shift))));
2042      else
2043	abort ();
2044      return;
2045    }
2046
2047  /* Now a range of 22 or less bits set somewhere.
2048   * 1) sethi	%hi(focus_bits), %reg
2049   *    sllx	%reg, shift, %reg
2050   * 2) sethi	%hi(focus_bits), %reg
2051   *    srlx	%reg, shift, %reg
2052   */
2053  if ((highest_bit_set - lowest_bit_set) < 21)
2054    {
2055      unsigned HOST_WIDE_INT focus_bits =
2056	create_simple_focus_bits (high_bits, low_bits,
2057				  lowest_bit_set, 10);
2058
2059      if (! SPARC_SETHI_P (focus_bits))
2060	 abort ();
2061
2062      sparc_emit_set_safe_HIGH64 (temp, focus_bits);
2063
2064      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
2065      if (lowest_bit_set < 10)
2066	emit_insn (gen_rtx_SET (VOIDmode,
2067				op0,
2068				gen_rtx_LSHIFTRT (DImode, temp,
2069						  GEN_INT (10 - lowest_bit_set))));
2070      else if (lowest_bit_set > 10)
2071	emit_insn (gen_rtx_SET (VOIDmode,
2072				op0,
2073				gen_rtx_ASHIFT (DImode, temp,
2074						GEN_INT (lowest_bit_set - 10))));
2075      else
2076	abort ();
2077      return;
2078    }
2079
2080  /* 1) sethi	%hi(low_bits), %reg
2081   *    or	%reg, %lo(low_bits), %reg
2082   * 2) sethi	%hi(~low_bits), %reg
2083   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2084   */
2085  if (high_bits == 0
2086      || high_bits == 0xffffffff)
2087    {
2088      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2089				     (high_bits == 0xffffffff));
2090      return;
2091    }
2092
2093  /* Now, try 3-insn sequences.  */
2094
2095  /* 1) sethi	%hi(high_bits), %reg
2096   *    or	%reg, %lo(high_bits), %reg
2097   *    sllx	%reg, 32, %reg
2098   */
2099  if (low_bits == 0)
2100    {
2101      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2102      return;
2103    }
2104
2105  /* We may be able to do something quick
2106     when the constant is negated, so try that.  */
2107  if (const64_is_2insns ((~high_bits) & 0xffffffff,
2108			 (~low_bits) & 0xfffffc00))
2109    {
2110      /* NOTE: The trailing bits get XOR'd so we need the
2111	 non-negated bits, not the negated ones.  */
2112      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2113
2114      if ((((~high_bits) & 0xffffffff) == 0
2115	   && ((~low_bits) & 0x80000000) == 0)
2116	  || (((~high_bits) & 0xffffffff) == 0xffffffff
2117	      && ((~low_bits) & 0x80000000) != 0))
2118	{
2119	  int fast_int = (~low_bits & 0xffffffff);
2120
2121	  if ((SPARC_SETHI_P (fast_int)
2122	       && (~high_bits & 0xffffffff) == 0)
2123	      || SPARC_SIMM13_P (fast_int))
2124	    emit_insn (gen_safe_SET64 (temp, fast_int));
2125	  else
2126	    sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
2127	}
2128      else
2129	{
2130	  rtx negated_const;
2131#if HOST_BITS_PER_WIDE_INT == 64
2132	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2133				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2134#else
2135	  negated_const = gen_rtx_CONST_DOUBLE (DImode,
2136						(~low_bits) & 0xfffffc00,
2137						(~high_bits) & 0xffffffff);
2138#endif
2139	  sparc_emit_set_const64 (temp, negated_const);
2140	}
2141
2142      /* If we are XOR'ing with -1, then we should emit a one's complement
2143	 instead.  This way the combiner will notice logical operations
2144	 such as ANDN later on and substitute.  */
2145      if (trailing_bits == 0x3ff)
2146	{
2147	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2148				  gen_rtx_NOT (DImode, temp)));
2149	}
2150      else
2151	{
2152	  emit_insn (gen_rtx_SET (VOIDmode,
2153				  op0,
2154				  gen_safe_XOR64 (temp,
2155						  (-0x400 | trailing_bits))));
2156	}
2157      return;
2158    }
2159
2160  /* 1) sethi	%hi(xxx), %reg
2161   *    or	%reg, %lo(xxx), %reg
2162   *	sllx	%reg, yyy, %reg
2163   *
2164   * ??? This is just a generalized version of the low_bits==0
2165   * thing above, FIXME...
2166   */
2167  if ((highest_bit_set - lowest_bit_set) < 32)
2168    {
2169      unsigned HOST_WIDE_INT focus_bits =
2170	create_simple_focus_bits (high_bits, low_bits,
2171				  lowest_bit_set, 0);
2172
2173      /* We can't get here in this state.  */
2174      if (highest_bit_set < 32
2175	  || lowest_bit_set >= 32)
2176	abort ();
2177
2178      /* So what we know is that the set bits straddle the
2179	 middle of the 64-bit word.  */
2180      sparc_emit_set_const64_quick2 (op0, temp,
2181				     focus_bits, 0,
2182				     lowest_bit_set);
2183      return;
2184    }
2185
2186  /* 1) sethi	%hi(high_bits), %reg
2187   *    or	%reg, %lo(high_bits), %reg
2188   *    sllx	%reg, 32, %reg
2189   *	or	%reg, low_bits, %reg
2190   */
2191  if (SPARC_SIMM13_P(low_bits)
2192      && ((int)low_bits > 0))
2193    {
2194      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2195      return;
2196    }
2197
2198  /* The easiest way when all else fails, is full decomposition.  */
2199#if 0
2200  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2201	  high_bits, low_bits, ~high_bits, ~low_bits);
2202#endif
2203  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2204}
2205
2206/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2207   return the mode to be used for the comparison.  For floating-point,
2208   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
2209   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
2210   processing is needed.  */
2211
2212enum machine_mode
2213select_cc_mode (op, x, y)
2214     enum rtx_code op;
2215     rtx x;
2216     rtx y ATTRIBUTE_UNUSED;
2217{
2218  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2219    {
2220      switch (op)
2221	{
2222	case EQ:
2223	case NE:
2224	case UNORDERED:
2225	case ORDERED:
2226	case UNLT:
2227	case UNLE:
2228	case UNGT:
2229	case UNGE:
2230	case UNEQ:
2231	case LTGT:
2232	  return CCFPmode;
2233
2234	case LT:
2235	case LE:
2236	case GT:
2237	case GE:
2238	  return CCFPEmode;
2239
2240	default:
2241	  abort ();
2242	}
2243    }
2244  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2245	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2246    {
2247      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2248	return CCX_NOOVmode;
2249      else
2250	return CC_NOOVmode;
2251    }
2252  else
2253    {
2254      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2255	return CCXmode;
2256      else
2257	return CCmode;
2258    }
2259}
2260
2261/* X and Y are two things to compare using CODE.  Emit the compare insn and
2262   return the rtx for the cc reg in the proper mode.  */
2263
2264rtx
2265gen_compare_reg (code, x, y)
2266     enum rtx_code code;
2267     rtx x, y;
2268{
2269  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2270  rtx cc_reg;
2271
2272  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2273     fcc regs (cse can't tell they're really call clobbered regs and will
2274     remove a duplicate comparison even if there is an intervening function
2275     call - it will then try to reload the cc reg via an int reg which is why
2276     we need the movcc patterns).  It is possible to provide the movcc
2277     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2278     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2279     to tell cse that CCFPE mode registers (even pseudos) are call
2280     clobbered.  */
2281
2282  /* ??? This is an experiment.  Rather than making changes to cse which may
2283     or may not be easy/clean, we do our own cse.  This is possible because
2284     we will generate hard registers.  Cse knows they're call clobbered (it
2285     doesn't know the same thing about pseudos). If we guess wrong, no big
2286     deal, but if we win, great!  */
2287
2288  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2289#if 1 /* experiment */
2290    {
2291      int reg;
2292      /* We cycle through the registers to ensure they're all exercised.  */
2293      static int next_fcc_reg = 0;
2294      /* Previous x,y for each fcc reg.  */
2295      static rtx prev_args[4][2];
2296
2297      /* Scan prev_args for x,y.  */
2298      for (reg = 0; reg < 4; reg++)
2299	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2300	  break;
2301      if (reg == 4)
2302	{
2303	  reg = next_fcc_reg;
2304	  prev_args[reg][0] = x;
2305	  prev_args[reg][1] = y;
2306	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2307	}
2308      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2309    }
2310#else
2311    cc_reg = gen_reg_rtx (mode);
2312#endif /* ! experiment */
2313  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2314    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2315  else
2316    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2317
2318  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2319			  gen_rtx_COMPARE (mode, x, y)));
2320
2321  return cc_reg;
2322}
2323
2324/* This function is used for v9 only.
2325   CODE is the code for an Scc's comparison.
2326   OPERANDS[0] is the target of the Scc insn.
2327   OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2328   been generated yet).
2329
2330   This function is needed to turn
2331
2332	   (set (reg:SI 110)
2333	       (gt (reg:CCX 100 %icc)
2334	           (const_int 0)))
2335   into
2336	   (set (reg:SI 110)
2337	       (gt:DI (reg:CCX 100 %icc)
2338	           (const_int 0)))
2339
2340   IE: The instruction recognizer needs to see the mode of the comparison to
2341   find the right instruction. We could use "gt:DI" right in the
2342   define_expand, but leaving it out allows us to handle DI, SI, etc.
2343
2344   We refer to the global sparc compare operands sparc_compare_op0 and
2345   sparc_compare_op1.  */
2346
2347int
2348gen_v9_scc (compare_code, operands)
2349     enum rtx_code compare_code;
2350     register rtx *operands;
2351{
2352  rtx temp, op0, op1;
2353
2354  if (! TARGET_ARCH64
2355      && (GET_MODE (sparc_compare_op0) == DImode
2356	  || GET_MODE (operands[0]) == DImode))
2357    return 0;
2358
2359  op0 = sparc_compare_op0;
2360  op1 = sparc_compare_op1;
2361
2362  /* Try to use the movrCC insns.  */
2363  if (TARGET_ARCH64
2364      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2365      && op1 == const0_rtx
2366      && v9_regcmp_p (compare_code))
2367    {
2368      /* Special case for op0 != 0.  This can be done with one instruction if
2369	 operands[0] == sparc_compare_op0.  */
2370
2371      if (compare_code == NE
2372	  && GET_MODE (operands[0]) == DImode
2373	  && rtx_equal_p (op0, operands[0]))
2374	{
2375	  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2376			      gen_rtx_IF_THEN_ELSE (DImode,
2377				       gen_rtx_fmt_ee (compare_code, DImode,
2378						       op0, const0_rtx),
2379				       const1_rtx,
2380				       operands[0])));
2381	  return 1;
2382	}
2383
2384      if (reg_overlap_mentioned_p (operands[0], op0))
2385	{
2386	  /* Handle the case where operands[0] == sparc_compare_op0.
2387	     We "early clobber" the result.  */
2388	  op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2389	  emit_move_insn (op0, sparc_compare_op0);
2390	}
2391
2392      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2393      if (GET_MODE (op0) != DImode)
2394	{
2395	  temp = gen_reg_rtx (DImode);
2396	  convert_move (temp, op0, 0);
2397	}
2398      else
2399	temp = op0;
2400      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2401			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2402				   gen_rtx_fmt_ee (compare_code, DImode,
2403						   temp, const0_rtx),
2404				   const1_rtx,
2405				   operands[0])));
2406      return 1;
2407    }
2408  else
2409    {
2410      operands[1] = gen_compare_reg (compare_code, op0, op1);
2411
2412      switch (GET_MODE (operands[1]))
2413	{
2414	  case CCmode :
2415	  case CCXmode :
2416	  case CCFPEmode :
2417	  case CCFPmode :
2418	    break;
2419	  default :
2420	    abort ();
2421	}
2422      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2423      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2424			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2425				   gen_rtx_fmt_ee (compare_code,
2426						   GET_MODE (operands[1]),
2427						   operands[1], const0_rtx),
2428				    const1_rtx, operands[0])));
2429      return 1;
2430    }
2431}
2432
2433/* Emit a conditional jump insn for the v9 architecture using comparison code
2434   CODE and jump target LABEL.
2435   This function exists to take advantage of the v9 brxx insns.  */
2436
2437void
2438emit_v9_brxx_insn (code, op0, label)
2439     enum rtx_code code;
2440     rtx op0, label;
2441{
2442  emit_jump_insn (gen_rtx_SET (VOIDmode,
2443			   pc_rtx,
2444			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2445				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2446						    op0, const0_rtx),
2447				    gen_rtx_LABEL_REF (VOIDmode, label),
2448				    pc_rtx)));
2449}
2450
2451/* Generate a DFmode part of a hard TFmode register.
2452   REG is the TFmode hard register, LOW is 1 for the
2453   low 64bit of the register and 0 otherwise.
2454 */
2455rtx
2456gen_df_reg (reg, low)
2457     rtx reg;
2458     int low;
2459{
2460  int regno = REGNO (reg);
2461
2462  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2463    regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2464  return gen_rtx_REG (DFmode, regno);
2465}
2466
2467/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
2468   Unlike normal calls, TFmode operands are passed by reference.  It is
2469   assumed that no more than 3 operands are required.  */
2470
2471static void
2472emit_soft_tfmode_libcall (func_name, nargs, operands)
2473     const char *func_name;
2474     int nargs;
2475     rtx *operands;
2476{
2477  rtx ret_slot = NULL, arg[3], func_sym;
2478  int i;
2479
2480  /* We only expect to be called for conversions, unary, and binary ops.  */
2481  if (nargs < 2 || nargs > 3)
2482    abort ();
2483
2484  for (i = 0; i < nargs; ++i)
2485    {
2486      rtx this_arg = operands[i];
2487      rtx this_slot;
2488
2489      /* TFmode arguments and return values are passed by reference.  */
2490      if (GET_MODE (this_arg) == TFmode)
2491	{
2492	  if (GET_CODE (this_arg) == MEM)
2493	    this_arg = XEXP (this_arg, 0);
2494	  else if (CONSTANT_P (this_arg))
2495	    {
2496	      this_slot = force_const_mem (TFmode, this_arg);
2497	      this_arg = XEXP (this_slot, 0);
2498	    }
2499	  else
2500	    {
2501	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2502
2503	      /* Operand 0 is the return value.  We'll copy it out later.  */
2504	      if (i > 0)
2505		emit_move_insn (this_slot, this_arg);
2506	      else
2507		ret_slot = this_slot;
2508
2509	      this_arg = XEXP (this_slot, 0);
2510	    }
2511	}
2512
2513      arg[i] = this_arg;
2514    }
2515
2516  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2517
2518  if (GET_MODE (operands[0]) == TFmode)
2519    {
2520      if (nargs == 2)
2521	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2522			   arg[0], GET_MODE (arg[0]),
2523			   arg[1], GET_MODE (arg[1]));
2524      else
2525	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2526			   arg[0], GET_MODE (arg[0]),
2527			   arg[1], GET_MODE (arg[1]),
2528			   arg[2], GET_MODE (arg[2]));
2529
2530      if (ret_slot)
2531	emit_move_insn (operands[0], ret_slot);
2532    }
2533  else
2534    {
2535      rtx ret;
2536
2537      if (nargs != 2)
2538	abort ();
2539
2540      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2541				     GET_MODE (operands[0]), 1,
2542				     arg[1], GET_MODE (arg[1]));
2543
2544      if (ret != operands[0])
2545	emit_move_insn (operands[0], ret);
2546    }
2547}
2548
2549/* Expand soft-float TFmode calls to sparc abi routines.  */
2550
2551static void
2552emit_soft_tfmode_binop (code, operands)
2553     enum rtx_code code;
2554     rtx *operands;
2555{
2556  const char *func;
2557
2558  switch (code)
2559    {
2560    case PLUS:
2561      func = "_Qp_add";
2562      break;
2563    case MINUS:
2564      func = "_Qp_sub";
2565      break;
2566    case MULT:
2567      func = "_Qp_mul";
2568      break;
2569    case DIV:
2570      func = "_Qp_div";
2571      break;
2572    default:
2573      abort ();
2574    }
2575
2576  emit_soft_tfmode_libcall (func, 3, operands);
2577}
2578
2579static void
2580emit_soft_tfmode_unop (code, operands)
2581     enum rtx_code code;
2582     rtx *operands;
2583{
2584  const char *func;
2585
2586  switch (code)
2587    {
2588    case SQRT:
2589      func = "_Qp_sqrt";
2590      break;
2591    default:
2592      abort ();
2593    }
2594
2595  emit_soft_tfmode_libcall (func, 2, operands);
2596}
2597
2598static void
2599emit_soft_tfmode_cvt (code, operands)
2600     enum rtx_code code;
2601     rtx *operands;
2602{
2603  const char *func;
2604
2605  switch (code)
2606    {
2607    case FLOAT_EXTEND:
2608      switch (GET_MODE (operands[1]))
2609	{
2610	case SFmode:
2611	  func = "_Qp_stoq";
2612	  break;
2613	case DFmode:
2614	  func = "_Qp_dtoq";
2615	  break;
2616	default:
2617	  abort ();
2618	}
2619      break;
2620
2621    case FLOAT_TRUNCATE:
2622      switch (GET_MODE (operands[0]))
2623	{
2624	case SFmode:
2625	  func = "_Qp_qtos";
2626	  break;
2627	case DFmode:
2628	  func = "_Qp_qtod";
2629	  break;
2630	default:
2631	  abort ();
2632	}
2633      break;
2634
2635    case FLOAT:
2636      switch (GET_MODE (operands[1]))
2637	{
2638	case SImode:
2639	  func = "_Qp_itoq";
2640	  break;
2641	case DImode:
2642	  func = "_Qp_xtoq";
2643	  break;
2644	default:
2645	  abort ();
2646	}
2647      break;
2648
2649    case UNSIGNED_FLOAT:
2650      switch (GET_MODE (operands[1]))
2651	{
2652	case SImode:
2653	  func = "_Qp_uitoq";
2654	  break;
2655	case DImode:
2656	  func = "_Qp_uxtoq";
2657	  break;
2658	default:
2659	  abort ();
2660	}
2661      break;
2662
2663    case FIX:
2664      switch (GET_MODE (operands[0]))
2665	{
2666	case SImode:
2667	  func = "_Qp_qtoi";
2668	  break;
2669	case DImode:
2670	  func = "_Qp_qtox";
2671	  break;
2672	default:
2673	  abort ();
2674	}
2675      break;
2676
2677    case UNSIGNED_FIX:
2678      switch (GET_MODE (operands[0]))
2679	{
2680	case SImode:
2681	  func = "_Qp_qtoui";
2682	  break;
2683	case DImode:
2684	  func = "_Qp_qtoux";
2685	  break;
2686	default:
2687	  abort ();
2688	}
2689      break;
2690
2691    default:
2692      abort ();
2693    }
2694
2695  emit_soft_tfmode_libcall (func, 2, operands);
2696}
2697
2698/* Expand a hard-float tfmode operation.  All arguments must be in
2699   registers.  */
2700
2701static void
2702emit_hard_tfmode_operation (code, operands)
2703     enum rtx_code code;
2704     rtx *operands;
2705{
2706  rtx op, dest;
2707
2708  if (GET_RTX_CLASS (code) == '1')
2709    {
2710      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2711      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2712    }
2713  else
2714    {
2715      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2716      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2717      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2718			   operands[1], operands[2]);
2719    }
2720
2721  if (register_operand (operands[0], VOIDmode))
2722    dest = operands[0];
2723  else
2724    dest = gen_reg_rtx (GET_MODE (operands[0]));
2725
2726  emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2727
2728  if (dest != operands[0])
2729    emit_move_insn (operands[0], dest);
2730}
2731
2732void
2733emit_tfmode_binop (code, operands)
2734     enum rtx_code code;
2735     rtx *operands;
2736{
2737  if (TARGET_HARD_QUAD)
2738    emit_hard_tfmode_operation (code, operands);
2739  else
2740    emit_soft_tfmode_binop (code, operands);
2741}
2742
2743void
2744emit_tfmode_unop (code, operands)
2745     enum rtx_code code;
2746     rtx *operands;
2747{
2748  if (TARGET_HARD_QUAD)
2749    emit_hard_tfmode_operation (code, operands);
2750  else
2751    emit_soft_tfmode_unop (code, operands);
2752}
2753
2754void
2755emit_tfmode_cvt (code, operands)
2756     enum rtx_code code;
2757     rtx *operands;
2758{
2759  if (TARGET_HARD_QUAD)
2760    emit_hard_tfmode_operation (code, operands);
2761  else
2762    emit_soft_tfmode_cvt (code, operands);
2763}
2764
2765/* Return nonzero if a return peephole merging return with
2766   setting of output register is ok.  */
2767int
2768leaf_return_peephole_ok ()
2769{
2770  return (actual_fsize == 0);
2771}
2772
2773/* Return nonzero if a branch/jump/call instruction will be emitting
2774   nop into its delay slot.  */
2775
2776int
2777empty_delay_slot (insn)
2778     rtx insn;
2779{
2780  rtx seq;
2781
2782  /* If no previous instruction (should not happen), return true.  */
2783  if (PREV_INSN (insn) == NULL)
2784    return 1;
2785
2786  seq = NEXT_INSN (PREV_INSN (insn));
2787  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2788    return 0;
2789
2790  return 1;
2791}
2792
2793/* Return nonzero if TRIAL can go into the function epilogue's
2794   delay slot.  SLOT is the slot we are trying to fill.  */
2795
2796int
2797eligible_for_epilogue_delay (trial, slot)
2798     rtx trial;
2799     int slot;
2800{
2801  rtx pat, src;
2802
2803  if (slot >= 1)
2804    return 0;
2805
2806  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2807    return 0;
2808
2809  if (get_attr_length (trial) != 1)
2810    return 0;
2811
2812  /* If there are any call-saved registers, we should scan TRIAL if it
2813     does not reference them.  For now just make it easy.  */
2814  if (num_gfregs)
2815    return 0;
2816
2817  /* If the function uses __builtin_eh_return, the eh_return machinery
2818     occupies the delay slot.  */
2819  if (current_function_calls_eh_return)
2820    return 0;
2821
2822  /* In the case of a true leaf function, anything can go into the delay slot.
2823     A delay slot only exists however if the frame size is zero, otherwise
2824     we will put an insn to adjust the stack after the return.  */
2825  if (current_function_uses_only_leaf_regs)
2826    {
2827      if (leaf_return_peephole_ok ())
2828	return ((get_attr_in_uncond_branch_delay (trial)
2829		 == IN_BRANCH_DELAY_TRUE));
2830      return 0;
2831    }
2832
2833  pat = PATTERN (trial);
2834
2835  /* Otherwise, only operations which can be done in tandem with
2836     a `restore' or `return' insn can go into the delay slot.  */
2837  if (GET_CODE (SET_DEST (pat)) != REG
2838      || REGNO (SET_DEST (pat)) < 24)
2839    return 0;
2840
2841  /* If this instruction sets up floating point register and we have a return
2842     instruction, it can probably go in.  But restore will not work
2843     with FP_REGS.  */
2844  if (REGNO (SET_DEST (pat)) >= 32)
2845    {
2846      if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
2847	  && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
2848	return 1;
2849      return 0;
2850    }
2851
2852  /* The set of insns matched here must agree precisely with the set of
2853     patterns paired with a RETURN in sparc.md.  */
2854
2855  src = SET_SRC (pat);
2856
2857  /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64.  */
2858  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2859      && arith_operand (src, GET_MODE (src)))
2860    {
2861      if (TARGET_ARCH64)
2862        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2863      else
2864        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2865    }
2866
2867  /* This matches "*return_di".  */
2868  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2869	   && arith_double_operand (src, GET_MODE (src)))
2870    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2871
2872  /* This matches "*return_sf_no_fpu".  */
2873  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2874	   && register_operand (src, SFmode))
2875    return 1;
2876
2877  /* If we have return instruction, anything that does not use
2878     local or output registers and can go into a delay slot wins.  */
2879  else if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
2880	   && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
2881    return 1;
2882
2883  /* This matches "*return_addsi".  */
2884  else if (GET_CODE (src) == PLUS
2885	   && arith_operand (XEXP (src, 0), SImode)
2886	   && arith_operand (XEXP (src, 1), SImode)
2887	   && (register_operand (XEXP (src, 0), SImode)
2888	       || register_operand (XEXP (src, 1), SImode)))
2889    return 1;
2890
2891  /* This matches "*return_adddi".  */
2892  else if (GET_CODE (src) == PLUS
2893	   && arith_double_operand (XEXP (src, 0), DImode)
2894	   && arith_double_operand (XEXP (src, 1), DImode)
2895	   && (register_operand (XEXP (src, 0), DImode)
2896	       || register_operand (XEXP (src, 1), DImode)))
2897    return 1;
2898
2899  /* This can match "*return_losum_[sd]i".
2900     Catch only some cases, so that return_losum* don't have
2901     to be too big.  */
2902  else if (GET_CODE (src) == LO_SUM
2903	   && ! TARGET_CM_MEDMID
2904	   && ((register_operand (XEXP (src, 0), SImode)
2905	        && immediate_operand (XEXP (src, 1), SImode))
2906	       || (TARGET_ARCH64
2907		   && register_operand (XEXP (src, 0), DImode)
2908		   && immediate_operand (XEXP (src, 1), DImode))))
2909    return 1;
2910
2911  /* sll{,x} reg,1,reg2 is add reg,reg,reg2 as well.  */
2912  else if (GET_CODE (src) == ASHIFT
2913	   && (register_operand (XEXP (src, 0), SImode)
2914	       || register_operand (XEXP (src, 0), DImode))
2915	   && XEXP (src, 1) == const1_rtx)
2916    return 1;
2917
2918  return 0;
2919}
2920
2921/* Return nonzero if TRIAL can go into the sibling call
2922   delay slot.  */
2923
2924int
2925eligible_for_sibcall_delay (trial)
2926     rtx trial;
2927{
2928  rtx pat, src;
2929
2930  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2931    return 0;
2932
2933  if (get_attr_length (trial) != 1)
2934    return 0;
2935
2936  pat = PATTERN (trial);
2937
2938  if (current_function_uses_only_leaf_regs)
2939    {
2940      /* If the tail call is done using the call instruction,
2941	 we have to restore %o7 in the delay slot.  */
2942      if ((TARGET_ARCH64 && ! TARGET_CM_MEDLOW) || flag_pic)
2943	return 0;
2944
2945      /* %g1 is used to build the function address */
2946      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2947	return 0;
2948
2949      return 1;
2950    }
2951
2952  /* Otherwise, only operations which can be done in tandem with
2953     a `restore' insn can go into the delay slot.  */
2954  if (GET_CODE (SET_DEST (pat)) != REG
2955      || REGNO (SET_DEST (pat)) < 24
2956      || REGNO (SET_DEST (pat)) >= 32)
2957    return 0;
2958
2959  /* If it mentions %o7, it can't go in, because sibcall will clobber it
2960     in most cases.  */
2961  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2962    return 0;
2963
2964  src = SET_SRC (pat);
2965
2966  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2967      && arith_operand (src, GET_MODE (src)))
2968    {
2969      if (TARGET_ARCH64)
2970        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2971      else
2972        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2973    }
2974
2975  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2976	   && arith_double_operand (src, GET_MODE (src)))
2977    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2978
2979  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2980	   && register_operand (src, SFmode))
2981    return 1;
2982
2983  else if (GET_CODE (src) == PLUS
2984	   && arith_operand (XEXP (src, 0), SImode)
2985	   && arith_operand (XEXP (src, 1), SImode)
2986	   && (register_operand (XEXP (src, 0), SImode)
2987	       || register_operand (XEXP (src, 1), SImode)))
2988    return 1;
2989
2990  else if (GET_CODE (src) == PLUS
2991	   && arith_double_operand (XEXP (src, 0), DImode)
2992	   && arith_double_operand (XEXP (src, 1), DImode)
2993	   && (register_operand (XEXP (src, 0), DImode)
2994	       || register_operand (XEXP (src, 1), DImode)))
2995    return 1;
2996
2997  else if (GET_CODE (src) == LO_SUM
2998	   && ! TARGET_CM_MEDMID
2999	   && ((register_operand (XEXP (src, 0), SImode)
3000	        && immediate_operand (XEXP (src, 1), SImode))
3001	       || (TARGET_ARCH64
3002		   && register_operand (XEXP (src, 0), DImode)
3003		   && immediate_operand (XEXP (src, 1), DImode))))
3004    return 1;
3005
3006  else if (GET_CODE (src) == ASHIFT
3007	   && (register_operand (XEXP (src, 0), SImode)
3008	       || register_operand (XEXP (src, 0), DImode))
3009	   && XEXP (src, 1) == const1_rtx)
3010    return 1;
3011
3012  return 0;
3013}
3014
3015static int
3016check_return_regs (x)
3017     rtx x;
3018{
3019  switch (GET_CODE (x))
3020    {
3021    case REG:
3022      return IN_OR_GLOBAL_P (x);
3023
3024    case CONST_INT:
3025    case CONST_DOUBLE:
3026    case CONST:
3027    case SYMBOL_REF:
3028    case LABEL_REF:
3029    return 1;
3030
3031    case SET:
3032    case IOR:
3033    case AND:
3034    case XOR:
3035    case PLUS:
3036    case MINUS:
3037      if (check_return_regs (XEXP (x, 1)) == 0)
3038  return 0;
3039    case NOT:
3040    case NEG:
3041    case MEM:
3042      return check_return_regs (XEXP (x, 0));
3043
3044    default:
3045      return 0;
3046    }
3047
3048}
3049
3050/* Return 1 if TRIAL references only in and global registers.  */
3051int
3052eligible_for_return_delay (trial)
3053     rtx trial;
3054{
3055  if (GET_CODE (PATTERN (trial)) != SET)
3056    return 0;
3057
3058  return check_return_regs (PATTERN (trial));
3059}
3060
3061int
3062short_branch (uid1, uid2)
3063     int uid1, uid2;
3064{
3065  int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
3066
3067  /* Leave a few words of "slop".  */
3068  if (delta >= -1023 && delta <= 1022)
3069    return 1;
3070
3071  return 0;
3072}
3073
3074/* Return non-zero if REG is not used after INSN.
3075   We assume REG is a reload reg, and therefore does
3076   not live past labels or calls or jumps.  */
3077int
3078reg_unused_after (reg, insn)
3079     rtx reg;
3080     rtx insn;
3081{
3082  enum rtx_code code, prev_code = UNKNOWN;
3083
3084  while ((insn = NEXT_INSN (insn)))
3085    {
3086      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
3087	return 1;
3088
3089      code = GET_CODE (insn);
3090      if (GET_CODE (insn) == CODE_LABEL)
3091	return 1;
3092
3093      if (GET_RTX_CLASS (code) == 'i')
3094	{
3095	  rtx set = single_set (insn);
3096	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
3097	  if (set && in_src)
3098	    return 0;
3099	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
3100	    return 1;
3101	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
3102	    return 0;
3103	}
3104      prev_code = code;
3105    }
3106  return 1;
3107}
3108
3109/* The table we use to reference PIC data.  */
3110static rtx global_offset_table;
3111
3112/* The function we use to get at it.  */
3113static rtx get_pc_symbol;
3114static char get_pc_symbol_name[256];
3115
3116/* Ensure that we are not using patterns that are not OK with PIC.  */
3117
3118int
3119check_pic (i)
3120     int i;
3121{
3122  switch (flag_pic)
3123    {
3124    case 1:
3125      if (GET_CODE (recog_data.operand[i]) == SYMBOL_REF
3126	  || (GET_CODE (recog_data.operand[i]) == CONST
3127	      && ! (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
3128		    && (XEXP (XEXP (recog_data.operand[i], 0), 0)
3129			== global_offset_table)
3130		    && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
3131			== CONST))))
3132	abort ();
3133    case 2:
3134    default:
3135      return 1;
3136    }
3137}
3138
3139/* Return true if X is an address which needs a temporary register when
3140   reloaded while generating PIC code.  */
3141
3142int
3143pic_address_needs_scratch (x)
3144     rtx x;
3145{
3146  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
3147  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3148      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3149      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3150      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3151    return 1;
3152
3153  return 0;
3154}
3155
3156/* Legitimize PIC addresses.  If the address is already position-independent,
3157   we return ORIG.  Newly generated position-independent addresses go into a
3158   reg.  This is REG if non zero, otherwise we allocate register(s) as
3159   necessary.  */
3160
3161rtx
3162legitimize_pic_address (orig, mode, reg)
3163     rtx orig;
3164     enum machine_mode mode ATTRIBUTE_UNUSED;
3165     rtx reg;
3166{
3167  if (GET_CODE (orig) == SYMBOL_REF)
3168    {
3169      rtx pic_ref, address;
3170      rtx insn;
3171
3172      if (reg == 0)
3173	{
3174	  if (reload_in_progress || reload_completed)
3175	    abort ();
3176	  else
3177	    reg = gen_reg_rtx (Pmode);
3178	}
3179
3180      if (flag_pic == 2)
3181	{
3182	  /* If not during reload, allocate another temp reg here for loading
3183	     in the address, so that these instructions can be optimized
3184	     properly.  */
3185	  rtx temp_reg = ((reload_in_progress || reload_completed)
3186			  ? reg : gen_reg_rtx (Pmode));
3187
3188	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3189	     won't get confused into thinking that these two instructions
3190	     are loading in the true address of the symbol.  If in the
3191	     future a PIC rtx exists, that should be used instead.  */
3192	  if (Pmode == SImode)
3193	    {
3194	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
3195	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3196	    }
3197	  else
3198	    {
3199	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
3200	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3201	    }
3202	  address = temp_reg;
3203	}
3204      else
3205	address = orig;
3206
3207      pic_ref = gen_rtx_MEM (Pmode,
3208			     gen_rtx_PLUS (Pmode,
3209					   pic_offset_table_rtx, address));
3210      current_function_uses_pic_offset_table = 1;
3211      RTX_UNCHANGING_P (pic_ref) = 1;
3212      insn = emit_move_insn (reg, pic_ref);
3213      /* Put a REG_EQUAL note on this insn, so that it can be optimized
3214	 by loop.  */
3215      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3216				  REG_NOTES (insn));
3217      return reg;
3218    }
3219  else if (GET_CODE (orig) == CONST)
3220    {
3221      rtx base, offset;
3222
3223      if (GET_CODE (XEXP (orig, 0)) == PLUS
3224	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3225	return orig;
3226
3227      if (reg == 0)
3228	{
3229	  if (reload_in_progress || reload_completed)
3230	    abort ();
3231	  else
3232	    reg = gen_reg_rtx (Pmode);
3233	}
3234
3235      if (GET_CODE (XEXP (orig, 0)) == PLUS)
3236	{
3237	  base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3238	  offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3239					 base == reg ? 0 : reg);
3240	}
3241      else
3242	abort ();
3243
3244      if (GET_CODE (offset) == CONST_INT)
3245	{
3246	  if (SMALL_INT (offset))
3247	    return plus_constant (base, INTVAL (offset));
3248	  else if (! reload_in_progress && ! reload_completed)
3249	    offset = force_reg (Pmode, offset);
3250	  else
3251	    /* If we reach here, then something is seriously wrong.  */
3252	    abort ();
3253	}
3254      return gen_rtx_PLUS (Pmode, base, offset);
3255    }
3256  else if (GET_CODE (orig) == LABEL_REF)
3257    /* ??? Why do we do this?  */
3258    /* Now movsi_pic_label_ref uses it, but we ought to be checking that
3259       the register is live instead, in case it is eliminated.  */
3260    current_function_uses_pic_offset_table = 1;
3261
3262  return orig;
3263}
3264
3265/* Emit special PIC prologues.  */
3266
3267void
3268load_pic_register ()
3269{
3270  /* Labels to get the PC in the prologue of this function.  */
3271  int orig_flag_pic = flag_pic;
3272
3273  if (! flag_pic)
3274    abort ();
3275
3276  /* If we haven't emitted the special get_pc helper function, do so now.  */
3277  if (get_pc_symbol_name[0] == 0)
3278    {
3279      int align;
3280
3281      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
3282      text_section ();
3283
3284      align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
3285      if (align > 0)
3286	ASM_OUTPUT_ALIGN (asm_out_file, align);
3287      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
3288      fputs ("\tretl\n\tadd\t%o7, %l7, %l7\n", asm_out_file);
3289    }
3290
3291  /* Initialize every time through, since we can't easily
3292     know this to be permanent.  */
3293  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3294  get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
3295  flag_pic = 0;
3296
3297  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
3298			 get_pc_symbol));
3299
3300  flag_pic = orig_flag_pic;
3301
3302  /* Need to emit this whether or not we obey regdecls,
3303     since setjmp/longjmp can cause life info to screw up.
3304     ??? In the case where we don't obey regdecls, this is not sufficient
3305     since we may not fall out the bottom.  */
3306  emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3307}
3308
3309/* Return 1 if RTX is a MEM which is known to be aligned to at
3310   least a DESIRED byte boundary.  */
3311
3312int
3313mem_min_alignment (mem, desired)
3314     rtx mem;
3315     int desired;
3316{
3317  rtx addr, base, offset;
3318
3319  /* If it's not a MEM we can't accept it.  */
3320  if (GET_CODE (mem) != MEM)
3321    return 0;
3322
3323  addr = XEXP (mem, 0);
3324  base = offset = NULL_RTX;
3325  if (GET_CODE (addr) == PLUS)
3326    {
3327      if (GET_CODE (XEXP (addr, 0)) == REG)
3328	{
3329	  base = XEXP (addr, 0);
3330
3331	  /* What we are saying here is that if the base
3332	     REG is aligned properly, the compiler will make
3333	     sure any REG based index upon it will be so
3334	     as well.  */
3335	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3336	    offset = XEXP (addr, 1);
3337	  else
3338	    offset = const0_rtx;
3339	}
3340    }
3341  else if (GET_CODE (addr) == REG)
3342    {
3343      base = addr;
3344      offset = const0_rtx;
3345    }
3346
3347  if (base != NULL_RTX)
3348    {
3349      int regno = REGNO (base);
3350
3351      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3352	{
3353	  /* Check if the compiler has recorded some information
3354	     about the alignment of the base REG.  If reload has
3355	     completed, we already matched with proper alignments.
3356	     If not running global_alloc, reload might give us
3357	     unaligned pointer to local stack though.  */
3358	  if (((cfun != 0
3359		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3360	       || (optimize && reload_completed))
3361	      && (INTVAL (offset) & (desired - 1)) == 0)
3362	    return 1;
3363	}
3364      else
3365	{
3366	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3367	    return 1;
3368	}
3369    }
3370  else if (! TARGET_UNALIGNED_DOUBLES
3371	   || CONSTANT_P (addr)
3372	   || GET_CODE (addr) == LO_SUM)
3373    {
3374      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3375	 is true, in which case we can only assume that an access is aligned if
3376	 it is to a constant address, or the address involves a LO_SUM.  */
3377      return 1;
3378    }
3379
3380  /* An obviously unaligned address.  */
3381  return 0;
3382}
3383
3384
3385/* Vectors to keep interesting information about registers where it can easily
3386   be got.  We use to use the actual mode value as the bit number, but there
3387   are more than 32 modes now.  Instead we use two tables: one indexed by
3388   hard register number, and one indexed by mode.  */
3389
3390/* The purpose of sparc_mode_class is to shrink the range of modes so that
3391   they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
3392   mapped into one sparc_mode_class mode.  */
3393
3394enum sparc_mode_class {
3395  S_MODE, D_MODE, T_MODE, O_MODE,
3396  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3397  CC_MODE, CCFP_MODE
3398};
3399
3400/* Modes for single-word and smaller quantities.  */
3401#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3402
3403/* Modes for double-word and smaller quantities.  */
3404#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3405
3406/* Modes for quad-word and smaller quantities.  */
3407#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3408
3409/* Modes for 8-word and smaller quantities.  */
3410#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3411
3412/* Modes for single-float quantities.  We must allow any single word or
3413   smaller quantity.  This is because the fix/float conversion instructions
3414   take integer inputs/outputs from the float registers.  */
3415#define SF_MODES (S_MODES)
3416
3417/* Modes for double-float and smaller quantities.  */
3418#define DF_MODES (S_MODES | D_MODES)
3419
3420/* Modes for double-float only quantities.  */
3421#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3422
3423/* Modes for quad-float only quantities.  */
3424#define TF_ONLY_MODES (1 << (int) TF_MODE)
3425
3426/* Modes for quad-float and smaller quantities.  */
3427#define TF_MODES (DF_MODES | TF_ONLY_MODES)
3428
3429/* Modes for quad-float and double-float quantities.  */
3430#define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
3431
3432/* Modes for quad-float pair only quantities.  */
3433#define OF_ONLY_MODES (1 << (int) OF_MODE)
3434
3435/* Modes for quad-float pairs and smaller quantities.  */
3436#define OF_MODES (TF_MODES | OF_ONLY_MODES)
3437
3438#define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
3439
3440/* Modes for condition codes.  */
3441#define CC_MODES (1 << (int) CC_MODE)
3442#define CCFP_MODES (1 << (int) CCFP_MODE)
3443
3444/* Value is 1 if register/mode pair is acceptable on sparc.
3445   The funny mixture of D and T modes is because integer operations
3446   do not specially operate on tetra quantities, so non-quad-aligned
3447   registers can hold quadword quantities (except %o4 and %i4 because
3448   they cross fixed registers).  */
3449
3450/* This points to either the 32 bit or the 64 bit version.  */
3451const int *hard_regno_mode_classes;
3452
3453static const int hard_32bit_mode_classes[] = {
3454  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3455  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3456  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3457  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3458
3459  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3460  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3461  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3462  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3463
3464  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3465     and none can hold SFmode/SImode values.  */
3466  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3467  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3468  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3469  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3470
3471  /* %fcc[0123] */
3472  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3473
3474  /* %icc */
3475  CC_MODES
3476};
3477
3478static const int hard_64bit_mode_classes[] = {
3479  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3480  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3481  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3482  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3483
3484  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3485  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3486  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3487  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3488
3489  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3490     and none can hold SFmode/SImode values.  */
3491  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3492  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3493  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3494  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3495
3496  /* %fcc[0123] */
3497  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3498
3499  /* %icc */
3500  CC_MODES
3501};
3502
3503int sparc_mode_class [NUM_MACHINE_MODES];
3504
3505enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3506
3507static void
3508sparc_init_modes ()
3509{
3510  int i;
3511
3512  for (i = 0; i < NUM_MACHINE_MODES; i++)
3513    {
3514      switch (GET_MODE_CLASS (i))
3515	{
3516	case MODE_INT:
3517	case MODE_PARTIAL_INT:
3518	case MODE_COMPLEX_INT:
3519	  if (GET_MODE_SIZE (i) <= 4)
3520	    sparc_mode_class[i] = 1 << (int) S_MODE;
3521	  else if (GET_MODE_SIZE (i) == 8)
3522	    sparc_mode_class[i] = 1 << (int) D_MODE;
3523	  else if (GET_MODE_SIZE (i) == 16)
3524	    sparc_mode_class[i] = 1 << (int) T_MODE;
3525	  else if (GET_MODE_SIZE (i) == 32)
3526	    sparc_mode_class[i] = 1 << (int) O_MODE;
3527	  else
3528	    sparc_mode_class[i] = 0;
3529	  break;
3530	case MODE_FLOAT:
3531	case MODE_COMPLEX_FLOAT:
3532	  if (GET_MODE_SIZE (i) <= 4)
3533	    sparc_mode_class[i] = 1 << (int) SF_MODE;
3534	  else if (GET_MODE_SIZE (i) == 8)
3535	    sparc_mode_class[i] = 1 << (int) DF_MODE;
3536	  else if (GET_MODE_SIZE (i) == 16)
3537	    sparc_mode_class[i] = 1 << (int) TF_MODE;
3538	  else if (GET_MODE_SIZE (i) == 32)
3539	    sparc_mode_class[i] = 1 << (int) OF_MODE;
3540	  else
3541	    sparc_mode_class[i] = 0;
3542	  break;
3543	case MODE_CC:
3544	default:
3545	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
3546	     we must explicitly check for them here.  */
3547	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
3548	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3549	  else if (i == (int) CCmode || i == (int) CC_NOOVmode
3550		   || i == (int) CCXmode || i == (int) CCX_NOOVmode)
3551	    sparc_mode_class[i] = 1 << (int) CC_MODE;
3552	  else
3553	    sparc_mode_class[i] = 0;
3554	  break;
3555	}
3556    }
3557
3558  if (TARGET_ARCH64)
3559    hard_regno_mode_classes = hard_64bit_mode_classes;
3560  else
3561    hard_regno_mode_classes = hard_32bit_mode_classes;
3562
3563  /* Initialize the array used by REGNO_REG_CLASS.  */
3564  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3565    {
3566      if (i < 16 && TARGET_V8PLUS)
3567	sparc_regno_reg_class[i] = I64_REGS;
3568      else if (i < 32 || i == FRAME_POINTER_REGNUM)
3569	sparc_regno_reg_class[i] = GENERAL_REGS;
3570      else if (i < 64)
3571	sparc_regno_reg_class[i] = FP_REGS;
3572      else if (i < 96)
3573	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3574      else if (i < 100)
3575	sparc_regno_reg_class[i] = FPCC_REGS;
3576      else
3577	sparc_regno_reg_class[i] = NO_REGS;
3578    }
3579}
3580
3581/* Save non call used registers from LOW to HIGH at BASE+OFFSET.
3582   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
3583   v9 int regs as it simplifies the code.  */
3584
3585static int
3586save_regs (file, low, high, base, offset, n_regs, real_offset)
3587     FILE *file;
3588     int low, high;
3589     const char *base;
3590     int offset;
3591     int n_regs;
3592     int real_offset;
3593{
3594  int i;
3595
3596  if (TARGET_ARCH64 && high <= 32)
3597    {
3598      for (i = low; i < high; i++)
3599	{
3600	  if (regs_ever_live[i] && ! call_used_regs[i])
3601	    {
3602	      fprintf (file, "\tstx\t%s, [%s+%d]\n",
3603		       reg_names[i], base, offset + 4 * n_regs);
3604	      if (dwarf2out_do_frame ())
3605		dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
3606	      n_regs += 2;
3607	    }
3608	}
3609    }
3610  else
3611    {
3612      for (i = low; i < high; i += 2)
3613	{
3614	  if (regs_ever_live[i] && ! call_used_regs[i])
3615	    {
3616	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3617		{
3618		  fprintf (file, "\tstd\t%s, [%s+%d]\n",
3619			   reg_names[i], base, offset + 4 * n_regs);
3620		  if (dwarf2out_do_frame ())
3621		    {
3622		      char *l = dwarf2out_cfi_label ();
3623		      dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
3624		      dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
3625		    }
3626		  n_regs += 2;
3627		}
3628	      else
3629		{
3630		  fprintf (file, "\tst\t%s, [%s+%d]\n",
3631			   reg_names[i], base, offset + 4 * n_regs);
3632		  if (dwarf2out_do_frame ())
3633		    dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
3634		  n_regs += 2;
3635		}
3636	    }
3637	  else
3638	    {
3639	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3640		{
3641		  fprintf (file, "\tst\t%s, [%s+%d]\n",
3642			   reg_names[i+1], base, offset + 4 * n_regs + 4);
3643		  if (dwarf2out_do_frame ())
3644		    dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
3645		  n_regs += 2;
3646		}
3647	    }
3648	}
3649    }
3650  return n_regs;
3651}
3652
3653/* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
3654
3655   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
3656   v9 int regs as it simplifies the code.  */
3657
3658static int
3659restore_regs (file, low, high, base, offset, n_regs)
3660     FILE *file;
3661     int low, high;
3662     const char *base;
3663     int offset;
3664     int n_regs;
3665{
3666  int i;
3667
3668  if (TARGET_ARCH64 && high <= 32)
3669    {
3670      for (i = low; i < high; i++)
3671	{
3672	  if (regs_ever_live[i] && ! call_used_regs[i])
3673	    fprintf (file, "\tldx\t[%s+%d], %s\n",
3674	      base, offset + 4 * n_regs, reg_names[i]),
3675	    n_regs += 2;
3676	}
3677    }
3678  else
3679    {
3680      for (i = low; i < high; i += 2)
3681	{
3682	  if (regs_ever_live[i] && ! call_used_regs[i])
3683	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3684	      fprintf (file, "\tldd\t[%s+%d], %s\n",
3685		       base, offset + 4 * n_regs, reg_names[i]),
3686	      n_regs += 2;
3687	    else
3688	      fprintf (file, "\tld\t[%s+%d], %s\n",
3689		       base, offset + 4 * n_regs, reg_names[i]),
3690	      n_regs += 2;
3691	  else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3692	    fprintf (file, "\tld\t[%s+%d], %s\n",
3693		     base, offset + 4 * n_regs + 4, reg_names[i+1]),
3694	    n_regs += 2;
3695	}
3696    }
3697  return n_regs;
3698}
3699
3700/* Compute the frame size required by the function.  This function is called
3701   during the reload pass and also by output_function_prologue().  */
3702
3703int
3704compute_frame_size (size, leaf_function)
3705     int size;
3706     int leaf_function;
3707{
3708  int n_regs = 0, i;
3709  int outgoing_args_size = (current_function_outgoing_args_size
3710			    + REG_PARM_STACK_SPACE (current_function_decl));
3711
3712  /* N_REGS is the number of 4-byte regs saved thus far.  This applies
3713     even to v9 int regs to be consistent with save_regs/restore_regs.  */
3714
3715  if (TARGET_ARCH64)
3716    {
3717      for (i = 0; i < 8; i++)
3718	if (regs_ever_live[i] && ! call_used_regs[i])
3719	  n_regs += 2;
3720    }
3721  else
3722    {
3723      for (i = 0; i < 8; i += 2)
3724	if ((regs_ever_live[i] && ! call_used_regs[i])
3725	    || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3726	  n_regs += 2;
3727    }
3728
3729  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3730    if ((regs_ever_live[i] && ! call_used_regs[i])
3731	|| (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3732      n_regs += 2;
3733
3734  /* Set up values for use in `function_epilogue'.  */
3735  num_gfregs = n_regs;
3736
3737  if (leaf_function && n_regs == 0
3738      && size == 0 && current_function_outgoing_args_size == 0)
3739    {
3740      actual_fsize = apparent_fsize = 0;
3741    }
3742  else
3743    {
3744      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
3745      apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3746      apparent_fsize += n_regs * 4;
3747      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3748    }
3749
3750  /* Make sure nothing can clobber our register windows.
3751     If a SAVE must be done, or there is a stack-local variable,
3752     the register window area must be allocated.
3753     ??? For v8 we apparently need an additional 8 bytes of reserved space.  */
3754  if (leaf_function == 0 || size > 0)
3755    actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3756
3757  return SPARC_STACK_ALIGN (actual_fsize);
3758}
3759
3760/* Build a (32 bit) big number in a register.  */
3761/* ??? We may be able to use the set macro here too.  */
3762
3763static void
3764build_big_number (file, num, reg)
3765     FILE *file;
3766     int num;
3767     const char *reg;
3768{
3769  if (num >= 0 || ! TARGET_ARCH64)
3770    {
3771      fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3772      if ((num & 0x3ff) != 0)
3773	fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3774    }
3775  else /* num < 0 && TARGET_ARCH64 */
3776    {
3777      /* Sethi does not sign extend, so we must use a little trickery
3778	 to use it for negative numbers.  Invert the constant before
3779	 loading it in, then use xor immediate to invert the loaded bits
3780	 (along with the upper 32 bits) to the desired constant.  This
3781	 works because the sethi and immediate fields overlap.  */
3782      int asize = num;
3783      int inv = ~asize;
3784      int low = -0x400 + (asize & 0x3FF);
3785
3786      fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3787	       inv, reg, reg, low, reg);
3788    }
3789}
3790
3791/* Output any necessary .register pseudo-ops.  */
3792void
3793sparc_output_scratch_registers (file)
3794     FILE *file ATTRIBUTE_UNUSED;
3795{
3796#ifdef HAVE_AS_REGISTER_PSEUDO_OP
3797  int i;
3798
3799  if (TARGET_ARCH32)
3800    return;
3801
3802  /* Check if %g[2367] were used without
3803     .register being printed for them already.  */
3804  for (i = 2; i < 8; i++)
3805    {
3806      if (regs_ever_live [i]
3807	  && ! sparc_hard_reg_printed [i])
3808	{
3809	  sparc_hard_reg_printed [i] = 1;
3810	  fprintf (file, "\t.register\t%%g%d, #scratch\n", i);
3811	}
3812      if (i == 3) i = 5;
3813    }
3814#endif
3815}
3816
3817/* This function generates the assembly code for function entry.
3818   FILE is a stdio stream to output the code to.
3819   SIZE is an int: how many units of temporary storage to allocate.
3820   Refer to the array `regs_ever_live' to determine which registers
3821   to save; `regs_ever_live[I]' is nonzero if register number I
3822   is ever used in the function.  This macro is responsible for
3823   knowing which registers should not be saved even if used.  */
3824
3825/* On SPARC, move-double insns between fpu and cpu need an 8-byte block
3826   of memory.  If any fpu reg is used in the function, we allocate
3827   such a block here, at the bottom of the frame, just in case it's needed.
3828
3829   If this function is a leaf procedure, then we may choose not
3830   to do a "save" insn.  The decision about whether or not
3831   to do this is made in regclass.c.  */
3832
3833static void
3834sparc_output_function_prologue (file, size)
3835     FILE *file;
3836     HOST_WIDE_INT size;
3837{
3838  if (TARGET_FLAT)
3839    sparc_flat_function_prologue (file, size);
3840  else
3841    sparc_nonflat_function_prologue (file, size,
3842				     current_function_uses_only_leaf_regs);
3843}
3844
3845/* Output code for the function prologue.  */
3846
3847static void
3848sparc_nonflat_function_prologue (file, size, leaf_function)
3849     FILE *file;
3850     HOST_WIDE_INT size;
3851     int leaf_function;
3852{
3853  sparc_output_scratch_registers (file);
3854
3855  /* Need to use actual_fsize, since we are also allocating
3856     space for our callee (and our own register save area).  */
3857  actual_fsize = compute_frame_size (size, leaf_function);
3858
3859  if (leaf_function)
3860    {
3861      frame_base_name = "%sp";
3862      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3863    }
3864  else
3865    {
3866      frame_base_name = "%fp";
3867      frame_base_offset = SPARC_STACK_BIAS;
3868    }
3869
3870  /* This is only for the human reader.  */
3871  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3872
3873  if (actual_fsize == 0)
3874    /* do nothing.  */ ;
3875  else if (! leaf_function)
3876    {
3877      if (actual_fsize <= 4096)
3878	fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3879      else if (actual_fsize <= 8192)
3880	{
3881	  fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3882	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3883	}
3884      else
3885	{
3886	  build_big_number (file, -actual_fsize, "%g1");
3887	  fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3888	}
3889    }
3890  else /* leaf function */
3891    {
3892      if (actual_fsize <= 4096)
3893	fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3894      else if (actual_fsize <= 8192)
3895	{
3896	  fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3897	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3898	}
3899      else
3900	{
3901	  build_big_number (file, -actual_fsize, "%g1");
3902	  fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3903	}
3904    }
3905
3906  if (dwarf2out_do_frame () && actual_fsize)
3907    {
3908      char *label = dwarf2out_cfi_label ();
3909
3910      /* The canonical frame address refers to the top of the frame.  */
3911      dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3912				 : HARD_FRAME_POINTER_REGNUM),
3913			 frame_base_offset);
3914
3915      if (! leaf_function)
3916	{
3917	  /* Note the register window save.  This tells the unwinder that
3918	     it needs to restore the window registers from the previous
3919	     frame's window save area at 0(cfa).  */
3920	  dwarf2out_window_save (label);
3921
3922	  /* The return address (-8) is now in %i7.  */
3923	  dwarf2out_return_reg (label, 31);
3924	}
3925    }
3926
3927  /* If doing anything with PIC, do it now.  */
3928  if (! flag_pic)
3929    fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3930
3931  /* Call saved registers are saved just above the outgoing argument area.  */
3932  if (num_gfregs)
3933    {
3934      int offset, real_offset, n_regs;
3935      const char *base;
3936
3937      real_offset = -apparent_fsize;
3938      offset = -apparent_fsize + frame_base_offset;
3939      if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3940	{
3941	  /* ??? This might be optimized a little as %g1 might already have a
3942	     value close enough that a single add insn will do.  */
3943	  /* ??? Although, all of this is probably only a temporary fix
3944	     because if %g1 can hold a function result, then
3945	     output_function_epilogue will lose (the result will get
3946	     clobbered).  */
3947	  build_big_number (file, offset, "%g1");
3948	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3949	  base = "%g1";
3950	  offset = 0;
3951	}
3952      else
3953	{
3954	  base = frame_base_name;
3955	}
3956
3957      n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3958      save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3959		 real_offset);
3960    }
3961}
3962
3963/* Output code to restore any call saved registers.  */
3964
3965static void
3966output_restore_regs (file, leaf_function)
3967     FILE *file;
3968     int leaf_function ATTRIBUTE_UNUSED;
3969{
3970  int offset, n_regs;
3971  const char *base;
3972
3973  offset = -apparent_fsize + frame_base_offset;
3974  if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3975    {
3976      build_big_number (file, offset, "%g1");
3977      fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3978      base = "%g1";
3979      offset = 0;
3980    }
3981  else
3982    {
3983      base = frame_base_name;
3984    }
3985
3986  n_regs = restore_regs (file, 0, 8, base, offset, 0);
3987  restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3988}
3989
3990/* This function generates the assembly code for function exit,
3991   on machines that need it.
3992
3993   The function epilogue should not depend on the current stack pointer!
3994   It should use the frame pointer only.  This is mandatory because
3995   of alloca; we also take advantage of it to omit stack adjustments
3996   before returning.  */
3997
3998static void
3999sparc_output_function_epilogue (file, size)
4000     FILE *file;
4001     HOST_WIDE_INT size;
4002{
4003  if (TARGET_FLAT)
4004    sparc_flat_function_epilogue (file, size);
4005  else
4006    sparc_nonflat_function_epilogue (file, size,
4007				     current_function_uses_only_leaf_regs);
4008}
4009
4010/* Output code for the function epilogue.  */
4011
4012static void
4013sparc_nonflat_function_epilogue (file, size, leaf_function)
4014     FILE *file;
4015     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4016     int leaf_function;
4017{
4018  const char *ret;
4019
4020  if (current_function_epilogue_delay_list == 0)
4021    {
4022      /* If code does not drop into the epilogue, we need
4023	 do nothing except output pending case vectors.
4024
4025	 We have to still output a dummy nop for the sake of
4026	 sane backtraces.  Otherwise, if the last two instructions
4027	 of a function were call foo; dslot; this can make the return
4028	 PC of foo (ie. address of call instruction plus 8) point to
4029	 the first instruction in the next function.  */
4030      rtx insn;
4031
4032      fputs("\tnop\n", file);
4033
4034      insn = get_last_insn ();
4035      if (GET_CODE (insn) == NOTE)
4036	      insn = prev_nonnote_insn (insn);
4037      if (insn && GET_CODE (insn) == BARRIER)
4038	      goto output_vectors;
4039    }
4040
4041  if (num_gfregs)
4042    output_restore_regs (file, leaf_function);
4043
4044  /* Work out how to skip the caller's unimp instruction if required.  */
4045  if (leaf_function)
4046    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
4047  else
4048    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
4049
4050  if (! leaf_function)
4051    {
4052      if (current_function_calls_eh_return)
4053	{
4054	  if (current_function_epilogue_delay_list)
4055	    abort ();
4056	  if (SKIP_CALLERS_UNIMP_P)
4057	    abort ();
4058
4059	  fputs ("\trestore\n\tretl\n\tadd\t%sp, %g1, %sp\n", file);
4060	}
4061      /* If we wound up with things in our delay slot, flush them here.  */
4062      else if (current_function_epilogue_delay_list)
4063	{
4064	  rtx delay = PATTERN (XEXP (current_function_epilogue_delay_list, 0));
4065
4066	  if (TARGET_V9 && ! epilogue_renumber (&delay, 1))
4067	    {
4068	      epilogue_renumber (&delay, 0);
4069	      fputs (SKIP_CALLERS_UNIMP_P
4070		     ? "\treturn\t%i7+12\n"
4071		     : "\treturn\t%i7+8\n", file);
4072	      final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
4073			       file, 1, 0, 0);
4074	    }
4075	  else
4076	    {
4077	      rtx insn, src;
4078
4079	      if (GET_CODE (delay) != SET)
4080		abort();
4081
4082	      src = SET_SRC (delay);
4083	      if (GET_CODE (src) == ASHIFT)
4084		{
4085		  if (XEXP (src, 1) != const1_rtx)
4086		    abort();
4087		  SET_SRC (delay)
4088		    = gen_rtx_PLUS (GET_MODE (src), XEXP (src, 0),
4089				    XEXP (src, 0));
4090		}
4091
4092	      insn = gen_rtx_PARALLEL (VOIDmode,
4093				       gen_rtvec (2, delay,
4094						  gen_rtx_RETURN (VOIDmode)));
4095	      insn = emit_jump_insn (insn);
4096
4097	      sparc_emitting_epilogue = true;
4098	      final_scan_insn (insn, file, 1, 0, 1);
4099	      sparc_emitting_epilogue = false;
4100	    }
4101	}
4102      else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
4103	fputs ("\treturn\t%i7+8\n\tnop\n", file);
4104      else
4105	fprintf (file, "\t%s\n\trestore\n", ret);
4106    }
4107  /* All of the following cases are for leaf functions.  */
4108  else if (current_function_calls_eh_return)
4109    abort ();
4110  else if (current_function_epilogue_delay_list)
4111    {
4112      /* eligible_for_epilogue_delay_slot ensures that if this is a
4113	 leaf function, then we will only have insn in the delay slot
4114	 if the frame size is zero, thus no adjust for the stack is
4115	 needed here.  */
4116      if (actual_fsize != 0)
4117	abort ();
4118      fprintf (file, "\t%s\n", ret);
4119      final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
4120		       file, 1, 0, 1);
4121    }
4122  /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
4123	 avoid generating confusing assembly language output.  */
4124  else if (actual_fsize == 0)
4125    fprintf (file, "\t%s\n\tnop\n", ret);
4126  else if (actual_fsize <= 4096)
4127    fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
4128  else if (actual_fsize <= 8192)
4129    fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
4130	     ret, actual_fsize - 4096);
4131  else if ((actual_fsize & 0x3ff) == 0)
4132    fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
4133	     actual_fsize, ret);
4134  else
4135    fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
4136	     actual_fsize, actual_fsize, ret);
4137
4138 output_vectors:
4139  sparc_output_deferred_case_vectors ();
4140}
4141
4142/* Output a sibling call.  */
4143
4144const char *
4145output_sibcall (insn, call_operand)
4146     rtx insn, call_operand;
4147{
4148  int leaf_regs = current_function_uses_only_leaf_regs;
4149  rtx operands[3];
4150  int delay_slot = dbr_sequence_length () > 0;
4151
4152  if (num_gfregs)
4153    {
4154      /* Call to restore global regs might clobber
4155	 the delay slot. Instead of checking for this
4156	 output the delay slot now.  */
4157      if (delay_slot)
4158	{
4159	  rtx delay = NEXT_INSN (insn);
4160
4161	  if (! delay)
4162	    abort ();
4163
4164	  final_scan_insn (delay, asm_out_file, 1, 0, 1);
4165	  PATTERN (delay) = gen_blockage ();
4166	  INSN_CODE (delay) = -1;
4167	  delay_slot = 0;
4168	}
4169      output_restore_regs (asm_out_file, leaf_regs);
4170    }
4171
4172  operands[0] = call_operand;
4173
4174  if (leaf_regs)
4175    {
4176#ifdef HAVE_AS_RELAX_OPTION
4177      /* If as and ld are relaxing tail call insns into branch always,
4178	 use or %o7,%g0,X; call Y; or X,%g0,%o7 always, so that it can
4179	 be optimized.  With sethi/jmpl as nor ld has no easy way how to
4180	 find out if somebody does not branch between the sethi and jmpl.  */
4181      int spare_slot = 0;
4182#else
4183      int spare_slot = ((TARGET_ARCH32 || TARGET_CM_MEDLOW) && ! flag_pic);
4184#endif
4185      int size = 0;
4186
4187      if ((actual_fsize || ! spare_slot) && delay_slot)
4188	{
4189	  rtx delay = NEXT_INSN (insn);
4190
4191	  if (! delay)
4192	    abort ();
4193
4194	  final_scan_insn (delay, asm_out_file, 1, 0, 1);
4195	  PATTERN (delay) = gen_blockage ();
4196	  INSN_CODE (delay) = -1;
4197	  delay_slot = 0;
4198	}
4199      if (actual_fsize)
4200	{
4201	  if (actual_fsize <= 4096)
4202	    size = actual_fsize;
4203	  else if (actual_fsize <= 8192)
4204	    {
4205	      fputs ("\tsub\t%sp, -4096, %sp\n", asm_out_file);
4206	      size = actual_fsize - 4096;
4207	    }
4208	  else if ((actual_fsize & 0x3ff) == 0)
4209	    fprintf (asm_out_file,
4210		     "\tsethi\t%%hi(%d), %%g1\n\tadd\t%%sp, %%g1, %%sp\n",
4211		     actual_fsize);
4212	  else
4213	    {
4214	      fprintf (asm_out_file,
4215		       "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n",
4216		       actual_fsize, actual_fsize);
4217	      fputs ("\tadd\t%%sp, %%g1, %%sp\n", asm_out_file);
4218	    }
4219	}
4220      if (spare_slot)
4221	{
4222	  output_asm_insn ("sethi\t%%hi(%a0), %%g1", operands);
4223	  output_asm_insn ("jmpl\t%%g1 + %%lo(%a0), %%g0", operands);
4224	  if (size)
4225	    fprintf (asm_out_file, "\t sub\t%%sp, -%d, %%sp\n", size);
4226	  else if (! delay_slot)
4227	    fputs ("\t nop\n", asm_out_file);
4228	}
4229      else
4230	{
4231	  if (size)
4232	    fprintf (asm_out_file, "\tsub\t%%sp, -%d, %%sp\n", size);
4233	  /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4234	     it into branch if possible.  */
4235	  output_asm_insn ("or\t%%o7, %%g0, %%g1", operands);
4236	  output_asm_insn ("call\t%a0, 0", operands);
4237	  output_asm_insn (" or\t%%g1, %%g0, %%o7", operands);
4238	}
4239      return "";
4240    }
4241
4242  output_asm_insn ("call\t%a0, 0", operands);
4243  if (delay_slot)
4244    {
4245      rtx delay = NEXT_INSN (insn), pat;
4246
4247      if (! delay)
4248	abort ();
4249
4250      pat = PATTERN (delay);
4251      if (GET_CODE (pat) != SET)
4252	abort ();
4253
4254      operands[0] = SET_DEST (pat);
4255      pat = SET_SRC (pat);
4256      switch (GET_CODE (pat))
4257	{
4258	case PLUS:
4259	  operands[1] = XEXP (pat, 0);
4260	  operands[2] = XEXP (pat, 1);
4261	  output_asm_insn (" restore %r1, %2, %Y0", operands);
4262	  break;
4263	case LO_SUM:
4264	  operands[1] = XEXP (pat, 0);
4265	  operands[2] = XEXP (pat, 1);
4266	  output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4267	  break;
4268	case ASHIFT:
4269	  operands[1] = XEXP (pat, 0);
4270	  output_asm_insn (" restore %r1, %r1, %Y0", operands);
4271	  break;
4272	default:
4273	  operands[1] = pat;
4274	  output_asm_insn (" restore %%g0, %1, %Y0", operands);
4275	  break;
4276	}
4277      PATTERN (delay) = gen_blockage ();
4278      INSN_CODE (delay) = -1;
4279    }
4280  else
4281    fputs ("\t restore\n", asm_out_file);
4282  return "";
4283}
4284
4285/* Functions for handling argument passing.
4286
4287   For v8 the first six args are normally in registers and the rest are
4288   pushed.  Any arg that starts within the first 6 words is at least
4289   partially passed in a register unless its data type forbids.
4290
4291   For v9, the argument registers are laid out as an array of 16 elements
4292   and arguments are added sequentially.  The first 6 int args and up to the
4293   first 16 fp args (depending on size) are passed in regs.
4294
4295   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
4296   ----    -----   --------   -----   ------------------   ------   -----------
4297    15   [SP+248]              %f31       %f30,%f31         %d30
4298    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
4299    13   [SP+232]              %f27       %f26,%f27         %d26
4300    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
4301    11   [SP+216]              %f23       %f22,%f23         %d22
4302    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
4303     9   [SP+200]              %f19       %f18,%f19         %d18
4304     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
4305     7   [SP+184]              %f15       %f14,%f15         %d14
4306     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
4307     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
4308     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
4309     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
4310     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
4311     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
4312     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
4313
4314   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4315
4316   Integral arguments are always passed as 64 bit quantities appropriately
4317   extended.
4318
4319   Passing of floating point values is handled as follows.
4320   If a prototype is in scope:
4321     If the value is in a named argument (i.e. not a stdarg function or a
4322     value not part of the `...') then the value is passed in the appropriate
4323     fp reg.
4324     If the value is part of the `...' and is passed in one of the first 6
4325     slots then the value is passed in the appropriate int reg.
4326     If the value is part of the `...' and is not passed in one of the first 6
4327     slots then the value is passed in memory.
4328   If a prototype is not in scope:
4329     If the value is one of the first 6 arguments the value is passed in the
4330     appropriate integer reg and the appropriate fp reg.
4331     If the value is not one of the first 6 arguments the value is passed in
4332     the appropriate fp reg and in memory.
4333   */
4334
4335/* Maximum number of int regs for args.  */
4336#define SPARC_INT_ARG_MAX 6
4337/* Maximum number of fp regs for args.  */
4338#define SPARC_FP_ARG_MAX 16
4339
4340#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4341
4342/* Handle the INIT_CUMULATIVE_ARGS macro.
4343   Initialize a variable CUM of type CUMULATIVE_ARGS
4344   for a call to a function whose data type is FNTYPE.
4345   For a library call, FNTYPE is 0.  */
4346
4347void
4348init_cumulative_args (cum, fntype, libname, indirect)
4349     CUMULATIVE_ARGS *cum;
4350     tree fntype;
4351     rtx libname ATTRIBUTE_UNUSED;
4352     int indirect ATTRIBUTE_UNUSED;
4353{
4354  cum->words = 0;
4355  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4356  cum->libcall_p = fntype == 0;
4357}
4358
4359/* Compute the slot number to pass an argument in.
4360   Returns the slot number or -1 if passing on the stack.
4361
4362   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4363    the preceding args and about the function being called.
4364   MODE is the argument's machine mode.
4365   TYPE is the data type of the argument (as a tree).
4366    This is null for libcalls where that information may
4367    not be available.
4368   NAMED is nonzero if this argument is a named parameter
4369    (otherwise it is an extra parameter matching an ellipsis).
4370   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4371   *PREGNO records the register number to use if scalar type.
4372   *PPADDING records the amount of padding needed in words.  */
4373
4374static int
4375function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
4376     const CUMULATIVE_ARGS *cum;
4377     enum machine_mode mode;
4378     tree type;
4379     int named;
4380     int incoming_p;
4381     int *pregno;
4382     int *ppadding;
4383{
4384  int regbase = (incoming_p
4385		 ? SPARC_INCOMING_INT_ARG_FIRST
4386		 : SPARC_OUTGOING_INT_ARG_FIRST);
4387  int slotno = cum->words;
4388  int regno;
4389
4390  *ppadding = 0;
4391
4392  if (type != 0 && TREE_ADDRESSABLE (type))
4393    return -1;
4394  if (TARGET_ARCH32
4395      && type != 0 && mode == BLKmode
4396      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4397    return -1;
4398
4399  switch (mode)
4400    {
4401    case VOIDmode :
4402      /* MODE is VOIDmode when generating the actual call.
4403	 See emit_call_1.  */
4404      return -1;
4405
4406    case QImode : case CQImode :
4407    case HImode : case CHImode :
4408    case SImode : case CSImode :
4409    case DImode : case CDImode :
4410    case TImode : case CTImode :
4411      if (slotno >= SPARC_INT_ARG_MAX)
4412	return -1;
4413      regno = regbase + slotno;
4414      break;
4415
4416    case SFmode : case SCmode :
4417    case DFmode : case DCmode :
4418    case TFmode : case TCmode :
4419      if (TARGET_ARCH32)
4420	{
4421	  if (slotno >= SPARC_INT_ARG_MAX)
4422	    return -1;
4423	  regno = regbase + slotno;
4424	}
4425      else
4426	{
4427	  if ((mode == TFmode || mode == TCmode)
4428	      && (slotno & 1) != 0)
4429	    slotno++, *ppadding = 1;
4430	  if (TARGET_FPU && named)
4431	    {
4432	      if (slotno >= SPARC_FP_ARG_MAX)
4433		return -1;
4434	      regno = SPARC_FP_ARG_FIRST + slotno * 2;
4435	      if (mode == SFmode)
4436		regno++;
4437	    }
4438	  else
4439	    {
4440	      if (slotno >= SPARC_INT_ARG_MAX)
4441		return -1;
4442	      regno = regbase + slotno;
4443	    }
4444	}
4445      break;
4446
4447    case BLKmode :
4448      /* For sparc64, objects requiring 16 byte alignment get it.  */
4449      if (TARGET_ARCH64)
4450	{
4451	  if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
4452	    slotno++, *ppadding = 1;
4453	}
4454
4455      if (TARGET_ARCH32
4456	  || (type && TREE_CODE (type) == UNION_TYPE))
4457	{
4458	  if (slotno >= SPARC_INT_ARG_MAX)
4459	    return -1;
4460	  regno = regbase + slotno;
4461	}
4462      else
4463	{
4464	  tree field;
4465	  int intregs_p = 0, fpregs_p = 0;
4466	  /* The ABI obviously doesn't specify how packed
4467	     structures are passed.  These are defined to be passed
4468	     in int regs if possible, otherwise memory.  */
4469	  int packed_p = 0;
4470
4471	  /* First see what kinds of registers we need.  */
4472	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4473	    {
4474	      if (TREE_CODE (field) == FIELD_DECL)
4475		{
4476		  if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
4477		      && TARGET_FPU)
4478		    fpregs_p = 1;
4479		  else
4480		    intregs_p = 1;
4481		  if (DECL_PACKED (field))
4482		    packed_p = 1;
4483		}
4484	    }
4485	  if (packed_p || !named)
4486	    fpregs_p = 0, intregs_p = 1;
4487
4488	  /* If all arg slots are filled, then must pass on stack.  */
4489	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4490	    return -1;
4491	  /* If there are only int args and all int arg slots are filled,
4492	     then must pass on stack.  */
4493	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4494	    return -1;
4495	  /* Note that even if all int arg slots are filled, fp members may
4496	     still be passed in regs if such regs are available.
4497	     *PREGNO isn't set because there may be more than one, it's up
4498	     to the caller to compute them.  */
4499	  return slotno;
4500	}
4501      break;
4502
4503    default :
4504      abort ();
4505    }
4506
4507  *pregno = regno;
4508  return slotno;
4509}
4510
4511/* Handle recursive register counting for structure field layout.  */
4512
4513struct function_arg_record_value_parms
4514{
4515  rtx ret;
4516  int slotno, named, regbase;
4517  unsigned int nregs;
4518  int intoffset;
4519};
4520
4521static void function_arg_record_value_3
4522	PARAMS ((HOST_WIDE_INT, struct function_arg_record_value_parms *));
4523static void function_arg_record_value_2
4524	PARAMS ((tree, HOST_WIDE_INT,
4525		 struct function_arg_record_value_parms *));
4526static void function_arg_record_value_1
4527        PARAMS ((tree, HOST_WIDE_INT,
4528		 struct function_arg_record_value_parms *));
4529static rtx function_arg_record_value
4530	PARAMS ((tree, enum machine_mode, int, int, int));
4531
4532/* A subroutine of function_arg_record_value.  Traverse the structure
4533   recusively and determine how many registers will be required.  */
4534
4535static void
4536function_arg_record_value_1 (type, startbitpos, parms)
4537     tree type;
4538     HOST_WIDE_INT startbitpos;
4539     struct function_arg_record_value_parms *parms;
4540{
4541  tree field;
4542
4543  /* The ABI obviously doesn't specify how packed structures are
4544     passed.  These are defined to be passed in int regs if possible,
4545     otherwise memory.  */
4546  int packed_p = 0;
4547
4548  /* We need to compute how many registers are needed so we can
4549     allocate the PARALLEL but before we can do that we need to know
4550     whether there are any packed fields.  If there are, int regs are
4551     used regardless of whether there are fp values present.  */
4552  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4553    {
4554      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4555	{
4556	  packed_p = 1;
4557	  break;
4558	}
4559    }
4560
4561  /* Compute how many registers we need.  */
4562  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4563    {
4564      if (TREE_CODE (field) == FIELD_DECL)
4565	{
4566	  HOST_WIDE_INT bitpos = startbitpos;
4567
4568	  if (DECL_SIZE (field) != 0
4569	      && host_integerp (bit_position (field), 1))
4570	    bitpos += int_bit_position (field);
4571
4572	  /* ??? FIXME: else assume zero offset.  */
4573
4574	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4575	    function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
4576	  else if ((TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
4577		    || (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE
4578			&& (TREE_CODE (TREE_TYPE (TREE_TYPE (field)))
4579			    == REAL_TYPE)))
4580	           && TARGET_FPU
4581	           && ! packed_p
4582	           && parms->named)
4583	    {
4584	      if (parms->intoffset != -1)
4585		{
4586		  int intslots, this_slotno;
4587
4588		  intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
4589		    / BITS_PER_WORD;
4590		  this_slotno = parms->slotno + parms->intoffset
4591		    / BITS_PER_WORD;
4592
4593		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4594		  intslots = MAX (intslots, 0);
4595		  parms->nregs += intslots;
4596		  parms->intoffset = -1;
4597		}
4598
4599	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
4600		 If it wasn't true we wouldn't be here.  */
4601	      parms->nregs += 1;
4602	      if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
4603		parms->nregs += 1;
4604	    }
4605	  else
4606	    {
4607	      if (parms->intoffset == -1)
4608		parms->intoffset = bitpos;
4609	    }
4610	}
4611    }
4612}
4613
4614/* A subroutine of function_arg_record_value.  Assign the bits of the
4615   structure between parms->intoffset and bitpos to integer registers.  */
4616
4617static void
4618function_arg_record_value_3 (bitpos, parms)
4619     HOST_WIDE_INT bitpos;
4620     struct function_arg_record_value_parms *parms;
4621{
4622  enum machine_mode mode;
4623  unsigned int regno;
4624  unsigned int startbit, endbit;
4625  int this_slotno, intslots, intoffset;
4626  rtx reg;
4627
4628  if (parms->intoffset == -1)
4629    return;
4630
4631  intoffset = parms->intoffset;
4632  parms->intoffset = -1;
4633
4634  startbit = intoffset & -BITS_PER_WORD;
4635  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4636  intslots = (endbit - startbit) / BITS_PER_WORD;
4637  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
4638
4639  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4640  if (intslots <= 0)
4641    return;
4642
4643  /* If this is the trailing part of a word, only load that much into
4644     the register.  Otherwise load the whole register.  Note that in
4645     the latter case we may pick up unwanted bits.  It's not a problem
4646     at the moment but may wish to revisit.  */
4647
4648  if (intoffset % BITS_PER_WORD != 0)
4649    mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
4650			  MODE_INT, 0);
4651  else
4652    mode = word_mode;
4653
4654  intoffset /= BITS_PER_UNIT;
4655  do
4656    {
4657      regno = parms->regbase + this_slotno;
4658      reg = gen_rtx_REG (mode, regno);
4659      XVECEXP (parms->ret, 0, parms->nregs)
4660	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
4661
4662      this_slotno += 1;
4663      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
4664      parms->nregs += 1;
4665      intslots -= 1;
4666    }
4667  while (intslots > 0);
4668}
4669
4670/* A subroutine of function_arg_record_value.  Traverse the structure
4671   recursively and assign bits to floating point registers.  Track which
4672   bits in between need integer registers; invoke function_arg_record_value_3
4673   to make that happen.  */
4674
4675static void
4676function_arg_record_value_2 (type, startbitpos, parms)
4677     tree type;
4678     HOST_WIDE_INT startbitpos;
4679     struct function_arg_record_value_parms *parms;
4680{
4681  tree field;
4682  int packed_p = 0;
4683
4684  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4685    {
4686      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4687	{
4688	  packed_p = 1;
4689	  break;
4690	}
4691    }
4692
4693  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4694    {
4695      if (TREE_CODE (field) == FIELD_DECL)
4696	{
4697	  HOST_WIDE_INT bitpos = startbitpos;
4698
4699	  if (DECL_SIZE (field) != 0
4700	      && host_integerp (bit_position (field), 1))
4701	    bitpos += int_bit_position (field);
4702
4703	  /* ??? FIXME: else assume zero offset.  */
4704
4705	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4706	    function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
4707	  else if ((TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
4708		    || (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE
4709			&& (TREE_CODE (TREE_TYPE (TREE_TYPE (field)))
4710			    == REAL_TYPE)))
4711	           && TARGET_FPU
4712	           && ! packed_p
4713	           && parms->named)
4714	    {
4715	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
4716	      int regno;
4717	      enum machine_mode mode = DECL_MODE (field);
4718	      rtx reg;
4719
4720	      function_arg_record_value_3 (bitpos, parms);
4721	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2
4722		      + ((mode == SFmode || mode == SCmode)
4723			 && (bitpos & 32) != 0);
4724	      switch (mode)
4725		{
4726		case SCmode: mode = SFmode; break;
4727		case DCmode: mode = DFmode; break;
4728		case TCmode: mode = TFmode; break;
4729		default: break;
4730		}
4731	      reg = gen_rtx_REG (mode, regno);
4732	      XVECEXP (parms->ret, 0, parms->nregs)
4733		= gen_rtx_EXPR_LIST (VOIDmode, reg,
4734			   GEN_INT (bitpos / BITS_PER_UNIT));
4735	      parms->nregs += 1;
4736	      if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
4737		{
4738		  regno += GET_MODE_SIZE (mode) / 4;
4739	  	  reg = gen_rtx_REG (mode, regno);
4740		  XVECEXP (parms->ret, 0, parms->nregs)
4741		    = gen_rtx_EXPR_LIST (VOIDmode, reg,
4742			GEN_INT ((bitpos + GET_MODE_BITSIZE (mode))
4743				 / BITS_PER_UNIT));
4744		  parms->nregs += 1;
4745		}
4746	    }
4747	  else
4748	    {
4749	      if (parms->intoffset == -1)
4750		parms->intoffset = bitpos;
4751	    }
4752	}
4753    }
4754}
4755
4756/* Used by function_arg and function_value to implement the complex
4757   Sparc64 structure calling conventions.  */
4758
4759static rtx
4760function_arg_record_value (type, mode, slotno, named, regbase)
4761     tree type;
4762     enum machine_mode mode;
4763     int slotno, named, regbase;
4764{
4765  HOST_WIDE_INT typesize = int_size_in_bytes (type);
4766  struct function_arg_record_value_parms parms;
4767  unsigned int nregs;
4768
4769  parms.ret = NULL_RTX;
4770  parms.slotno = slotno;
4771  parms.named = named;
4772  parms.regbase = regbase;
4773
4774  /* Compute how many registers we need.  */
4775  parms.nregs = 0;
4776  parms.intoffset = 0;
4777  function_arg_record_value_1 (type, 0, &parms);
4778
4779  if (parms.intoffset != -1)
4780    {
4781      unsigned int startbit, endbit;
4782      int intslots, this_slotno;
4783
4784      startbit = parms.intoffset & -BITS_PER_WORD;
4785      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4786      intslots = (endbit - startbit) / BITS_PER_WORD;
4787      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
4788
4789      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4790      intslots = MAX (intslots, 0);
4791
4792      parms.nregs += intslots;
4793    }
4794  nregs = parms.nregs;
4795
4796  /* Allocate the vector and handle some annoying special cases.  */
4797  if (nregs == 0)
4798    {
4799      /* ??? Empty structure has no value?  Duh?  */
4800      if (typesize <= 0)
4801	{
4802	  /* Though there's nothing really to store, return a word register
4803	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
4804	     leads to breakage due to the fact that there are zero bytes to
4805	     load.  */
4806	  return gen_rtx_REG (mode, regbase);
4807	}
4808      else
4809	{
4810	  /* ??? C++ has structures with no fields, and yet a size.  Give up
4811	     for now and pass everything back in integer registers.  */
4812	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4813	}
4814      if (nregs + slotno > SPARC_INT_ARG_MAX)
4815	nregs = SPARC_INT_ARG_MAX - slotno;
4816    }
4817  if (nregs == 0)
4818    abort ();
4819
4820  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
4821
4822  /* Fill in the entries.  */
4823  parms.nregs = 0;
4824  parms.intoffset = 0;
4825  function_arg_record_value_2 (type, 0, &parms);
4826  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
4827
4828  if (parms.nregs != nregs)
4829    abort ();
4830
4831  return parms.ret;
4832}
4833
4834/* Handle the FUNCTION_ARG macro.
4835   Determine where to put an argument to a function.
4836   Value is zero to push the argument on the stack,
4837   or a hard register in which to store the argument.
4838
4839   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4840    the preceding args and about the function being called.
4841   MODE is the argument's machine mode.
4842   TYPE is the data type of the argument (as a tree).
4843    This is null for libcalls where that information may
4844    not be available.
4845   NAMED is nonzero if this argument is a named parameter
4846    (otherwise it is an extra parameter matching an ellipsis).
4847   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
4848
4849rtx
4850function_arg (cum, mode, type, named, incoming_p)
4851     const CUMULATIVE_ARGS *cum;
4852     enum machine_mode mode;
4853     tree type;
4854     int named;
4855     int incoming_p;
4856{
4857  int regbase = (incoming_p
4858		 ? SPARC_INCOMING_INT_ARG_FIRST
4859		 : SPARC_OUTGOING_INT_ARG_FIRST);
4860  int slotno, regno, padding;
4861  rtx reg;
4862
4863  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
4864				&regno, &padding);
4865
4866  if (slotno == -1)
4867    return 0;
4868
4869  if (TARGET_ARCH32)
4870    {
4871      reg = gen_rtx_REG (mode, regno);
4872      return reg;
4873    }
4874
4875  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
4876     but also have the slot allocated for them.
4877     If no prototype is in scope fp values in register slots get passed
4878     in two places, either fp regs and int regs or fp regs and memory.  */
4879  if ((GET_MODE_CLASS (mode) == MODE_FLOAT
4880       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4881      && SPARC_FP_REG_P (regno))
4882    {
4883      reg = gen_rtx_REG (mode, regno);
4884      if (cum->prototype_p || cum->libcall_p)
4885	{
4886	  /* "* 2" because fp reg numbers are recorded in 4 byte
4887	     quantities.  */
4888#if 0
4889	  /* ??? This will cause the value to be passed in the fp reg and
4890	     in the stack.  When a prototype exists we want to pass the
4891	     value in the reg but reserve space on the stack.  That's an
4892	     optimization, and is deferred [for a bit].  */
4893	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
4894	    return gen_rtx_PARALLEL (mode,
4895			    gen_rtvec (2,
4896				       gen_rtx_EXPR_LIST (VOIDmode,
4897						NULL_RTX, const0_rtx),
4898				       gen_rtx_EXPR_LIST (VOIDmode,
4899						reg, const0_rtx)));
4900	  else
4901#else
4902	  /* ??? It seems that passing back a register even when past
4903	     the area declared by REG_PARM_STACK_SPACE will allocate
4904	     space appropriately, and will not copy the data onto the
4905	     stack, exactly as we desire.
4906
4907	     This is due to locate_and_pad_parm being called in
4908	     expand_call whenever reg_parm_stack_space > 0, which
4909	     while benefical to our example here, would seem to be
4910	     in error from what had been intended.  Ho hum...  -- r~ */
4911#endif
4912	    return reg;
4913	}
4914      else
4915	{
4916	  rtx v0, v1;
4917
4918	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
4919	    {
4920	      int intreg;
4921
4922	      /* On incoming, we don't need to know that the value
4923		 is passed in %f0 and %i0, and it confuses other parts
4924		 causing needless spillage even on the simplest cases.  */
4925	      if (incoming_p)
4926		return reg;
4927
4928	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
4929			+ (regno - SPARC_FP_ARG_FIRST) / 2);
4930
4931	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
4932	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
4933				      const0_rtx);
4934	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
4935	    }
4936	  else
4937	    {
4938	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
4939	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
4940	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
4941	    }
4942	}
4943    }
4944  else if (type && TREE_CODE (type) == RECORD_TYPE)
4945    {
4946      /* Structures up to 16 bytes in size are passed in arg slots on the
4947	 stack and are promoted to registers where possible.  */
4948
4949      if (int_size_in_bytes (type) > 16)
4950	abort (); /* shouldn't get here */
4951
4952      return function_arg_record_value (type, mode, slotno, named, regbase);
4953    }
4954  else if (type && TREE_CODE (type) == UNION_TYPE)
4955    {
4956      enum machine_mode mode;
4957      int bytes = int_size_in_bytes (type);
4958
4959      if (bytes > 16)
4960	abort ();
4961
4962      mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4963      reg = gen_rtx_REG (mode, regno);
4964    }
4965  else
4966    {
4967      /* Scalar or complex int.  */
4968      reg = gen_rtx_REG (mode, regno);
4969    }
4970
4971  return reg;
4972}
4973
4974/* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4975   For an arg passed partly in registers and partly in memory,
4976   this is the number of registers used.
4977   For args passed entirely in registers or entirely in memory, zero.
4978
4979   Any arg that starts in the first 6 regs but won't entirely fit in them
4980   needs partial registers on v8.  On v9, structures with integer
4981   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4982   values that begin in the last fp reg [where "last fp reg" varies with the
4983   mode] will be split between that reg and memory.  */
4984
4985int
4986function_arg_partial_nregs (cum, mode, type, named)
4987     const CUMULATIVE_ARGS *cum;
4988     enum machine_mode mode;
4989     tree type;
4990     int named;
4991{
4992  int slotno, regno, padding;
4993
4994  /* We pass 0 for incoming_p here, it doesn't matter.  */
4995  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4996
4997  if (slotno == -1)
4998    return 0;
4999
5000  if (TARGET_ARCH32)
5001    {
5002      if ((slotno + (mode == BLKmode
5003		     ? ROUND_ADVANCE (int_size_in_bytes (type))
5004		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5005	  > NPARM_REGS (SImode))
5006	return NPARM_REGS (SImode) - slotno;
5007      return 0;
5008    }
5009  else
5010    {
5011      if (type && AGGREGATE_TYPE_P (type))
5012	{
5013	  int size = int_size_in_bytes (type);
5014	  int align = TYPE_ALIGN (type);
5015
5016	  if (align == 16)
5017	    slotno += slotno & 1;
5018	  if (size > 8 && size <= 16
5019	      && slotno == SPARC_INT_ARG_MAX - 1)
5020	    return 1;
5021	}
5022      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5023	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5024		   && ! TARGET_FPU))
5025	{
5026	  if (GET_MODE_ALIGNMENT (mode) == 128)
5027	    {
5028	      slotno += slotno & 1;
5029	      if (slotno == SPARC_INT_ARG_MAX - 2)
5030		return 1;
5031	    }
5032	  else
5033	    {
5034	      if (slotno == SPARC_INT_ARG_MAX - 1)
5035		return 1;
5036	    }
5037	}
5038      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5039	{
5040	  if (GET_MODE_ALIGNMENT (mode) == 128)
5041	    slotno += slotno & 1;
5042	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5043	      > SPARC_FP_ARG_MAX)
5044	    return 1;
5045	}
5046      return 0;
5047    }
5048}
5049
5050/* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
5051   !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
5052   quad-precision floats by invisible reference.
5053   v9: Aggregates greater than 16 bytes are passed by reference.
5054   For Pascal, also pass arrays by reference.  */
5055
5056int
5057function_arg_pass_by_reference (cum, mode, type, named)
5058     const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
5059     enum machine_mode mode;
5060     tree type;
5061     int named ATTRIBUTE_UNUSED;
5062{
5063  if (TARGET_ARCH32)
5064    {
5065      return ((type && AGGREGATE_TYPE_P (type))
5066	      || mode == TFmode || mode == TCmode);
5067    }
5068  else
5069    {
5070      return ((type && TREE_CODE (type) == ARRAY_TYPE)
5071	      /* Consider complex values as aggregates, so care for TCmode.  */
5072	      || GET_MODE_SIZE (mode) > 16
5073	      || (type
5074		  && AGGREGATE_TYPE_P (type)
5075		  && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16));
5076    }
5077}
5078
5079/* Handle the FUNCTION_ARG_ADVANCE macro.
5080   Update the data in CUM to advance over an argument
5081   of mode MODE and data type TYPE.
5082   TYPE is null for libcalls where that information may not be available.  */
5083
5084void
5085function_arg_advance (cum, mode, type, named)
5086     CUMULATIVE_ARGS *cum;
5087     enum machine_mode mode;
5088     tree type;
5089     int named;
5090{
5091  int slotno, regno, padding;
5092
5093  /* We pass 0 for incoming_p here, it doesn't matter.  */
5094  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5095
5096  /* If register required leading padding, add it.  */
5097  if (slotno != -1)
5098    cum->words += padding;
5099
5100  if (TARGET_ARCH32)
5101    {
5102      cum->words += (mode != BLKmode
5103		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5104		     : ROUND_ADVANCE (int_size_in_bytes (type)));
5105    }
5106  else
5107    {
5108      if (type && AGGREGATE_TYPE_P (type))
5109	{
5110	  int size = int_size_in_bytes (type);
5111
5112	  if (size <= 8)
5113	    ++cum->words;
5114	  else if (size <= 16)
5115	    cum->words += 2;
5116	  else /* passed by reference */
5117	    ++cum->words;
5118	}
5119      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5120	{
5121	  cum->words += 2;
5122	}
5123      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5124	{
5125	  cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
5126	}
5127      else
5128	{
5129	  cum->words += (mode != BLKmode
5130			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5131			 : ROUND_ADVANCE (int_size_in_bytes (type)));
5132	}
5133    }
5134}
5135
5136/* Handle the FUNCTION_ARG_PADDING macro.
5137   For the 64 bit ABI structs are always stored left shifted in their
5138   argument slot.  */
5139
5140enum direction
5141function_arg_padding (mode, type)
5142     enum machine_mode mode;
5143     tree type;
5144{
5145  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5146    return upward;
5147
5148  /* This is the default definition.  */
5149  return (! BYTES_BIG_ENDIAN
5150	  ? upward
5151	  : ((mode == BLKmode
5152	      ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5153		 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
5154	      : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5155	     ? downward : upward));
5156}
5157
5158/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5159   For v9, function return values are subject to the same rules as arguments,
5160   except that up to 32-bytes may be returned in registers.  */
5161
5162rtx
5163function_value (type, mode, incoming_p)
5164     tree type;
5165     enum machine_mode mode;
5166     int incoming_p;
5167{
5168  int regno;
5169  int regbase = (incoming_p
5170		 ? SPARC_OUTGOING_INT_ARG_FIRST
5171		 : SPARC_INCOMING_INT_ARG_FIRST);
5172
5173  if (TARGET_ARCH64 && type)
5174    {
5175      if (TREE_CODE (type) == RECORD_TYPE)
5176	{
5177	  /* Structures up to 32 bytes in size are passed in registers,
5178	     promoted to fp registers where possible.  */
5179
5180	  if (int_size_in_bytes (type) > 32)
5181	    abort (); /* shouldn't get here */
5182
5183	  return function_arg_record_value (type, mode, 0, 1, regbase);
5184	}
5185      else if (AGGREGATE_TYPE_P (type))
5186	{
5187	  /* All other aggregate types are passed in an integer register
5188	     in a mode corresponding to the size of the type.  */
5189	  HOST_WIDE_INT bytes = int_size_in_bytes (type);
5190
5191	  if (bytes > 32)
5192	    abort ();
5193
5194	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
5195	}
5196    }
5197
5198  if (TARGET_ARCH64
5199      && GET_MODE_CLASS (mode) == MODE_INT
5200      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5201      && type && ! AGGREGATE_TYPE_P (type))
5202    mode = DImode;
5203
5204  if (incoming_p)
5205    regno = BASE_RETURN_VALUE_REG (mode);
5206  else
5207    regno = BASE_OUTGOING_VALUE_REG (mode);
5208
5209  return gen_rtx_REG (mode, regno);
5210}
5211
5212/* Do what is necessary for `va_start'.  We look at the current function
5213   to determine if stdarg or varargs is used and return the address of
5214   the first unnamed parameter.  */
5215
5216rtx
5217sparc_builtin_saveregs ()
5218{
5219  int first_reg = current_function_args_info.words;
5220  rtx address;
5221  int regno;
5222
5223  for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
5224    emit_move_insn (gen_rtx_MEM (word_mode,
5225				 gen_rtx_PLUS (Pmode,
5226					       frame_pointer_rtx,
5227					       GEN_INT (FIRST_PARM_OFFSET (0)
5228							+ (UNITS_PER_WORD
5229							   * regno)))),
5230		    gen_rtx_REG (word_mode,
5231				 BASE_INCOMING_ARG_REG (word_mode) + regno));
5232
5233  address = gen_rtx_PLUS (Pmode,
5234			  frame_pointer_rtx,
5235			  GEN_INT (FIRST_PARM_OFFSET (0)
5236				   + UNITS_PER_WORD * first_reg));
5237
5238  return address;
5239}
5240
5241/* Implement `va_start' for varargs and stdarg.  */
5242
5243void
5244sparc_va_start (stdarg_p, valist, nextarg)
5245     int stdarg_p ATTRIBUTE_UNUSED;
5246     tree valist;
5247     rtx nextarg;
5248{
5249  nextarg = expand_builtin_saveregs ();
5250  std_expand_builtin_va_start (1, valist, nextarg);
5251}
5252
5253/* Implement `va_arg'.  */
5254
5255rtx
5256sparc_va_arg (valist, type)
5257     tree valist, type;
5258{
5259  HOST_WIDE_INT size, rsize, align;
5260  tree addr, incr;
5261  rtx addr_rtx;
5262  int indirect = 0;
5263
5264  /* Round up sizeof(type) to a word.  */
5265  size = int_size_in_bytes (type);
5266  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5267  align = 0;
5268
5269  if (TARGET_ARCH64)
5270    {
5271      if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5272	align = 2 * UNITS_PER_WORD;
5273
5274      if (AGGREGATE_TYPE_P (type))
5275	{
5276	  if ((unsigned HOST_WIDE_INT) size > 16)
5277	    {
5278	      indirect = 1;
5279	      size = rsize = UNITS_PER_WORD;
5280	    }
5281	  /* SPARC v9 ABI states that structures up to 8 bytes in size are
5282	     given one 8 byte slot.  */
5283	  else if (size == 0)
5284	    size = rsize = UNITS_PER_WORD;
5285	  else
5286	    size = rsize;
5287	}
5288    }
5289  else
5290    {
5291      if (AGGREGATE_TYPE_P (type)
5292	  || TYPE_MODE (type) == TFmode
5293	  || TYPE_MODE (type) == TCmode)
5294	{
5295	  indirect = 1;
5296	  size = rsize = UNITS_PER_WORD;
5297	}
5298    }
5299
5300  incr = valist;
5301  if (align)
5302    {
5303      incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
5304			 build_int_2 (align - 1, 0)));
5305      incr = fold (build (BIT_AND_EXPR, ptr_type_node, incr,
5306			  build_int_2 (-align, -1)));
5307    }
5308
5309  addr = incr = save_expr (incr);
5310  if (BYTES_BIG_ENDIAN && size < rsize)
5311    {
5312      addr = fold (build (PLUS_EXPR, ptr_type_node, incr,
5313			  build_int_2 (rsize - size, 0)));
5314    }
5315  incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
5316		      build_int_2 (rsize, 0)));
5317
5318  incr = build (MODIFY_EXPR, ptr_type_node, valist, incr);
5319  TREE_SIDE_EFFECTS (incr) = 1;
5320  expand_expr (incr, const0_rtx, VOIDmode, EXPAND_NORMAL);
5321
5322  addr_rtx = expand_expr (addr, NULL, Pmode, EXPAND_NORMAL);
5323
5324  /* If the address isn't aligned properly for the type,
5325     we may need to copy to a temporary.
5326     FIXME: This is inefficient.  Usually we can do this
5327     in registers.  */
5328  if (align == 0
5329      && TYPE_ALIGN (type) > BITS_PER_WORD
5330      && !indirect)
5331    {
5332      /* FIXME: We really need to specify that the temporary is live
5333	 for the whole function because expand_builtin_va_arg wants
5334	 the alias set to be get_varargs_alias_set (), but in this
5335	 case the alias set is that for TYPE and if the memory gets
5336	 reused it will be reused with alias set TYPE.  */
5337      rtx tmp = assign_temp (type, 0, 1, 0);
5338      rtx dest_addr;
5339
5340      addr_rtx = force_reg (Pmode, addr_rtx);
5341      addr_rtx = gen_rtx_MEM (BLKmode, addr_rtx);
5342      set_mem_alias_set (addr_rtx, get_varargs_alias_set ());
5343      set_mem_align (addr_rtx, BITS_PER_WORD);
5344      tmp = shallow_copy_rtx (tmp);
5345      PUT_MODE (tmp, BLKmode);
5346      set_mem_alias_set (tmp, 0);
5347
5348      dest_addr = emit_block_move (tmp, addr_rtx, GEN_INT (rsize));
5349      if (dest_addr != NULL_RTX)
5350	addr_rtx = dest_addr;
5351      else
5352	addr_rtx = XCEXP (tmp, 0, MEM);
5353    }
5354
5355  if (indirect)
5356    {
5357      addr_rtx = force_reg (Pmode, addr_rtx);
5358      addr_rtx = gen_rtx_MEM (Pmode, addr_rtx);
5359      set_mem_alias_set (addr_rtx, get_varargs_alias_set ());
5360    }
5361
5362  return addr_rtx;
5363}
5364
5365/* Return the string to output a conditional branch to LABEL, which is
5366   the operand number of the label.  OP is the conditional expression.
5367   XEXP (OP, 0) is assumed to be a condition code register (integer or
5368   floating point) and its mode specifies what kind of comparison we made.
5369
5370   REVERSED is non-zero if we should reverse the sense of the comparison.
5371
5372   ANNUL is non-zero if we should generate an annulling branch.
5373
5374   NOOP is non-zero if we have to follow this branch by a noop.
5375
5376   INSN, if set, is the insn.  */
5377
5378char *
5379output_cbranch (op, dest, label, reversed, annul, noop, insn)
5380     rtx op, dest;
5381     int label;
5382     int reversed, annul, noop;
5383     rtx insn;
5384{
5385  static char string[50];
5386  enum rtx_code code = GET_CODE (op);
5387  rtx cc_reg = XEXP (op, 0);
5388  enum machine_mode mode = GET_MODE (cc_reg);
5389  const char *labelno, *branch;
5390  int spaces = 8, far;
5391  char *p;
5392
5393  /* v9 branches are limited to +-1MB.  If it is too far away,
5394     change
5395
5396     bne,pt %xcc, .LC30
5397
5398     to
5399
5400     be,pn %xcc, .+12
5401     nop
5402     ba .LC30
5403
5404     and
5405
5406     fbne,a,pn %fcc2, .LC29
5407
5408     to
5409
5410     fbe,pt %fcc2, .+16
5411     nop
5412     ba .LC29  */
5413
5414  far = get_attr_length (insn) >= 3;
5415  if (reversed ^ far)
5416    {
5417      /* Reversal of FP compares takes care -- an ordered compare
5418	 becomes an unordered compare and vice versa.  */
5419      if (mode == CCFPmode || mode == CCFPEmode)
5420	code = reverse_condition_maybe_unordered (code);
5421      else
5422	code = reverse_condition (code);
5423    }
5424
5425  /* Start by writing the branch condition.  */
5426  if (mode == CCFPmode || mode == CCFPEmode)
5427    {
5428      switch (code)
5429	{
5430	case NE:
5431	  branch = "fbne";
5432	  break;
5433	case EQ:
5434	  branch = "fbe";
5435	  break;
5436	case GE:
5437	  branch = "fbge";
5438	  break;
5439	case GT:
5440	  branch = "fbg";
5441	  break;
5442	case LE:
5443	  branch = "fble";
5444	  break;
5445	case LT:
5446	  branch = "fbl";
5447	  break;
5448	case UNORDERED:
5449	  branch = "fbu";
5450	  break;
5451	case ORDERED:
5452	  branch = "fbo";
5453	  break;
5454	case UNGT:
5455	  branch = "fbug";
5456	  break;
5457	case UNLT:
5458	  branch = "fbul";
5459	  break;
5460	case UNEQ:
5461	  branch = "fbue";
5462	  break;
5463	case UNGE:
5464	  branch = "fbuge";
5465	  break;
5466	case UNLE:
5467	  branch = "fbule";
5468	  break;
5469	case LTGT:
5470	  branch = "fblg";
5471	  break;
5472
5473	default:
5474	  abort ();
5475	}
5476
5477      /* ??? !v9: FP branches cannot be preceded by another floating point
5478	 insn.  Because there is currently no concept of pre-delay slots,
5479	 we can fix this only by always emitting a nop before a floating
5480	 point branch.  */
5481
5482      string[0] = '\0';
5483      if (! TARGET_V9)
5484	strcpy (string, "nop\n\t");
5485      strcat (string, branch);
5486    }
5487  else
5488    {
5489      switch (code)
5490	{
5491	case NE:
5492	  branch = "bne";
5493	  break;
5494	case EQ:
5495	  branch = "be";
5496	  break;
5497	case GE:
5498	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
5499	    branch = "bpos";
5500	  else
5501	    branch = "bge";
5502	  break;
5503	case GT:
5504	  branch = "bg";
5505	  break;
5506	case LE:
5507	  branch = "ble";
5508	  break;
5509	case LT:
5510	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
5511	    branch = "bneg";
5512	  else
5513	    branch = "bl";
5514	  break;
5515	case GEU:
5516	  branch = "bgeu";
5517	  break;
5518	case GTU:
5519	  branch = "bgu";
5520	  break;
5521	case LEU:
5522	  branch = "bleu";
5523	  break;
5524	case LTU:
5525	  branch = "blu";
5526	  break;
5527
5528	default:
5529	  abort ();
5530	}
5531      strcpy (string, branch);
5532    }
5533  spaces -= strlen (branch);
5534  p = strchr (string, '\0');
5535
5536  /* Now add the annulling, the label, and a possible noop.  */
5537  if (annul && ! far)
5538    {
5539      strcpy (p, ",a");
5540      p += 2;
5541      spaces -= 2;
5542    }
5543
5544  if (! TARGET_V9)
5545    labelno = "";
5546  else
5547    {
5548      rtx note;
5549      int v8 = 0;
5550
5551      if (! far && insn && INSN_ADDRESSES_SET_P ())
5552	{
5553	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
5554		       - INSN_ADDRESSES (INSN_UID (insn)));
5555	  /* Leave some instructions for "slop".  */
5556	  if (delta < -260000 || delta >= 260000)
5557	    v8 = 1;
5558	}
5559
5560      if (mode == CCFPmode || mode == CCFPEmode)
5561	{
5562	  static char v9_fcc_labelno[] = "%%fccX, ";
5563	  /* Set the char indicating the number of the fcc reg to use.  */
5564	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
5565	  labelno = v9_fcc_labelno;
5566	  if (v8)
5567	    {
5568	      if (REGNO (cc_reg) == SPARC_FCC_REG)
5569		labelno = "";
5570	      else
5571		abort ();
5572	    }
5573	}
5574      else if (mode == CCXmode || mode == CCX_NOOVmode)
5575	{
5576	  labelno = "%%xcc, ";
5577	  if (v8)
5578	    abort ();
5579	}
5580      else
5581	{
5582	  labelno = "%%icc, ";
5583	  if (v8)
5584	    labelno = "";
5585	}
5586
5587      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
5588	{
5589	  strcpy (p,
5590		  ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
5591		  ? ",pt" : ",pn");
5592	  p += 3;
5593	  spaces -= 3;
5594	}
5595    }
5596  if (spaces > 0)
5597    *p++ = '\t';
5598  else
5599    *p++ = ' ';
5600  strcpy (p, labelno);
5601  p = strchr (p, '\0');
5602  if (far)
5603    {
5604      strcpy (p, ".+12\n\tnop\n\tb\t");
5605      if (annul || noop)
5606        p[3] = '6';
5607      p += 13;
5608    }
5609  *p++ = '%';
5610  *p++ = 'l';
5611  /* Set the char indicating the number of the operand containing the
5612     label_ref.  */
5613  *p++ = label + '0';
5614  *p = '\0';
5615  if (noop)
5616    strcpy (p, "\n\tnop");
5617
5618  return string;
5619}
5620
5621/* Emit a library call comparison between floating point X and Y.
5622   COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).
5623   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
5624   values as arguments instead of the TFmode registers themselves,
5625   that's why we cannot call emit_float_lib_cmp.  */
5626void
5627sparc_emit_float_lib_cmp (x, y, comparison)
5628     rtx x, y;
5629     enum rtx_code comparison;
5630{
5631  const char *qpfunc;
5632  rtx slot0, slot1, result, tem, tem2;
5633  enum machine_mode mode;
5634
5635  switch (comparison)
5636    {
5637    case EQ:
5638      qpfunc = (TARGET_ARCH64) ? "_Qp_feq" : "_Q_feq";
5639      break;
5640
5641    case NE:
5642      qpfunc = (TARGET_ARCH64) ? "_Qp_fne" : "_Q_fne";
5643      break;
5644
5645    case GT:
5646      qpfunc = (TARGET_ARCH64) ? "_Qp_fgt" : "_Q_fgt";
5647      break;
5648
5649    case GE:
5650      qpfunc = (TARGET_ARCH64) ? "_Qp_fge" : "_Q_fge";
5651      break;
5652
5653    case LT:
5654      qpfunc = (TARGET_ARCH64) ? "_Qp_flt" : "_Q_flt";
5655      break;
5656
5657    case LE:
5658      qpfunc = (TARGET_ARCH64) ? "_Qp_fle" : "_Q_fle";
5659      break;
5660
5661    case ORDERED:
5662    case UNORDERED:
5663    case UNGT:
5664    case UNLT:
5665    case UNEQ:
5666    case UNGE:
5667    case UNLE:
5668    case LTGT:
5669      qpfunc = (TARGET_ARCH64) ? "_Qp_cmp" : "_Q_cmp";
5670      break;
5671
5672    default:
5673      abort();
5674      break;
5675    }
5676
5677  if (TARGET_ARCH64)
5678    {
5679      if (GET_CODE (x) != MEM)
5680	{
5681	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
5682	  emit_insn (gen_rtx_SET (VOIDmode, slot0, x));
5683	}
5684      else
5685	slot0 = x;
5686
5687      if (GET_CODE (y) != MEM)
5688	{
5689	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
5690	  emit_insn (gen_rtx_SET (VOIDmode, slot1, y));
5691	}
5692      else
5693	slot1 = y;
5694
5695      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
5696			 DImode, 2,
5697			 XEXP (slot0, 0), Pmode,
5698			 XEXP (slot1, 0), Pmode);
5699
5700      mode = DImode;
5701    }
5702  else
5703    {
5704      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
5705			 SImode, 2,
5706			 x, TFmode, y, TFmode);
5707
5708      mode = SImode;
5709    }
5710
5711
5712  /* Immediately move the result of the libcall into a pseudo
5713     register so reload doesn't clobber the value if it needs
5714     the return register for a spill reg.  */
5715  result = gen_reg_rtx (mode);
5716  emit_move_insn (result, hard_libcall_value (mode));
5717
5718  switch (comparison)
5719    {
5720    default:
5721      emit_cmp_insn (result, const0_rtx, NE, NULL_RTX, mode, 0);
5722      break;
5723    case ORDERED:
5724    case UNORDERED:
5725      emit_cmp_insn (result, GEN_INT(3), comparison == UNORDERED ? EQ : NE,
5726		     NULL_RTX, mode, 0);
5727      break;
5728    case UNGT:
5729    case UNGE:
5730      emit_cmp_insn (result, const1_rtx,
5731		     comparison == UNGT ? GT : NE, NULL_RTX, mode, 0);
5732      break;
5733    case UNLE:
5734      emit_cmp_insn (result, const2_rtx, NE, NULL_RTX, mode, 0);
5735      break;
5736    case UNLT:
5737      tem = gen_reg_rtx (mode);
5738      if (TARGET_ARCH32)
5739	emit_insn (gen_andsi3 (tem, result, const1_rtx));
5740      else
5741	emit_insn (gen_anddi3 (tem, result, const1_rtx));
5742      emit_cmp_insn (tem, const0_rtx, NE, NULL_RTX, mode, 0);
5743      break;
5744    case UNEQ:
5745    case LTGT:
5746      tem = gen_reg_rtx (mode);
5747      if (TARGET_ARCH32)
5748	emit_insn (gen_addsi3 (tem, result, const1_rtx));
5749      else
5750	emit_insn (gen_adddi3 (tem, result, const1_rtx));
5751      tem2 = gen_reg_rtx (mode);
5752      if (TARGET_ARCH32)
5753	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
5754      else
5755	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
5756      emit_cmp_insn (tem2, const0_rtx, comparison == UNEQ ? EQ : NE,
5757		     NULL_RTX, mode, 0);
5758      break;
5759    }
5760}
5761
5762/* Generate an unsigned DImode to FP conversion.  This is the same code
5763   optabs would emit if we didn't have TFmode patterns.  */
5764
5765void
5766sparc_emit_floatunsdi (operands)
5767     rtx operands[2];
5768{
5769  rtx neglab, donelab, i0, i1, f0, in, out;
5770  enum machine_mode mode;
5771
5772  out = operands[0];
5773  in = force_reg (DImode, operands[1]);
5774  mode = GET_MODE (out);
5775  neglab = gen_label_rtx ();
5776  donelab = gen_label_rtx ();
5777  i0 = gen_reg_rtx (DImode);
5778  i1 = gen_reg_rtx (DImode);
5779  f0 = gen_reg_rtx (mode);
5780
5781  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
5782
5783  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
5784  emit_jump_insn (gen_jump (donelab));
5785  emit_barrier ();
5786
5787  emit_label (neglab);
5788
5789  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
5790  emit_insn (gen_anddi3 (i1, in, const1_rtx));
5791  emit_insn (gen_iordi3 (i0, i0, i1));
5792  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
5793  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
5794
5795  emit_label (donelab);
5796}
5797
5798/* Return the string to output a conditional branch to LABEL, testing
5799   register REG.  LABEL is the operand number of the label; REG is the
5800   operand number of the reg.  OP is the conditional expression.  The mode
5801   of REG says what kind of comparison we made.
5802
5803   REVERSED is non-zero if we should reverse the sense of the comparison.
5804
5805   ANNUL is non-zero if we should generate an annulling branch.
5806
5807   NOOP is non-zero if we have to follow this branch by a noop.  */
5808
5809char *
5810output_v9branch (op, dest, reg, label, reversed, annul, noop, insn)
5811     rtx op, dest;
5812     int reg, label;
5813     int reversed, annul, noop;
5814     rtx insn;
5815{
5816  static char string[50];
5817  enum rtx_code code = GET_CODE (op);
5818  enum machine_mode mode = GET_MODE (XEXP (op, 0));
5819  rtx note;
5820  int far;
5821  char *p;
5822
5823  /* branch on register are limited to +-128KB.  If it is too far away,
5824     change
5825
5826     brnz,pt %g1, .LC30
5827
5828     to
5829
5830     brz,pn %g1, .+12
5831     nop
5832     ba,pt %xcc, .LC30
5833
5834     and
5835
5836     brgez,a,pn %o1, .LC29
5837
5838     to
5839
5840     brlz,pt %o1, .+16
5841     nop
5842     ba,pt %xcc, .LC29  */
5843
5844  far = get_attr_length (insn) >= 3;
5845
5846  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
5847  if (reversed ^ far)
5848    code = reverse_condition (code);
5849
5850  /* Only 64 bit versions of these instructions exist.  */
5851  if (mode != DImode)
5852    abort ();
5853
5854  /* Start by writing the branch condition.  */
5855
5856  switch (code)
5857    {
5858    case NE:
5859      strcpy (string, "brnz");
5860      break;
5861
5862    case EQ:
5863      strcpy (string, "brz");
5864      break;
5865
5866    case GE:
5867      strcpy (string, "brgez");
5868      break;
5869
5870    case LT:
5871      strcpy (string, "brlz");
5872      break;
5873
5874    case LE:
5875      strcpy (string, "brlez");
5876      break;
5877
5878    case GT:
5879      strcpy (string, "brgz");
5880      break;
5881
5882    default:
5883      abort ();
5884    }
5885
5886  p = strchr (string, '\0');
5887
5888  /* Now add the annulling, reg, label, and nop.  */
5889  if (annul && ! far)
5890    {
5891      strcpy (p, ",a");
5892      p += 2;
5893    }
5894
5895  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
5896    {
5897      strcpy (p,
5898	      ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
5899	      ? ",pt" : ",pn");
5900      p += 3;
5901    }
5902
5903  *p = p < string + 8 ? '\t' : ' ';
5904  p++;
5905  *p++ = '%';
5906  *p++ = '0' + reg;
5907  *p++ = ',';
5908  *p++ = ' ';
5909  if (far)
5910    {
5911      int veryfar = 1, delta;
5912
5913      if (INSN_ADDRESSES_SET_P ())
5914	{
5915	  delta = (INSN_ADDRESSES (INSN_UID (dest))
5916		   - INSN_ADDRESSES (INSN_UID (insn)));
5917	  /* Leave some instructions for "slop".  */
5918	  if (delta >= -260000 && delta < 260000)
5919	    veryfar = 0;
5920	}
5921
5922      strcpy (p, ".+12\n\tnop\n\t");
5923      if (annul || noop)
5924        p[3] = '6';
5925      p += 11;
5926      if (veryfar)
5927	{
5928	  strcpy (p, "b\t");
5929	  p += 2;
5930	}
5931      else
5932	{
5933	  strcpy (p, "ba,pt\t%%xcc, ");
5934	  p += 13;
5935	}
5936    }
5937  *p++ = '%';
5938  *p++ = 'l';
5939  *p++ = '0' + label;
5940  *p = '\0';
5941
5942  if (noop)
5943    strcpy (p, "\n\tnop");
5944
5945  return string;
5946}
5947
5948/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
5949   Such instructions cannot be used in the delay slot of return insn on v9.
5950   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
5951 */
5952
5953static int
5954epilogue_renumber (where, test)
5955     register rtx *where;
5956     int test;
5957{
5958  register const char *fmt;
5959  register int i;
5960  register enum rtx_code code;
5961
5962  if (*where == 0)
5963    return 0;
5964
5965  code = GET_CODE (*where);
5966
5967  switch (code)
5968    {
5969    case REG:
5970      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
5971	return 1;
5972      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
5973	*where = gen_rtx (REG, GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
5974    case SCRATCH:
5975    case CC0:
5976    case PC:
5977    case CONST_INT:
5978    case CONST_DOUBLE:
5979      return 0;
5980
5981      /* Do not replace the frame pointer with the stack pointer because
5982	 it can cause the delayed instruction to load below the stack.
5983	 This occurs when instructions like:
5984
5985	 (set (reg/i:SI 24 %i0)
5986	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
5987                       (const_int -20 [0xffffffec])) 0))
5988
5989	 are in the return delayed slot.  */
5990    case PLUS:
5991      if (GET_CODE (XEXP (*where, 0)) == REG
5992	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
5993	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
5994	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
5995	return 1;
5996      break;
5997
5998    case MEM:
5999      if (SPARC_STACK_BIAS
6000	  && GET_CODE (XEXP (*where, 0)) == REG
6001	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6002	return 1;
6003      break;
6004
6005    default:
6006      break;
6007    }
6008
6009  fmt = GET_RTX_FORMAT (code);
6010
6011  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6012    {
6013      if (fmt[i] == 'E')
6014	{
6015	  register int j;
6016	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6017	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6018	      return 1;
6019	}
6020      else if (fmt[i] == 'e'
6021	       && epilogue_renumber (&(XEXP (*where, i)), test))
6022	return 1;
6023    }
6024  return 0;
6025}
6026
6027/* Leaf functions and non-leaf functions have different needs.  */
6028
6029static const int
6030reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6031
6032static const int
6033reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6034
6035static const int *const reg_alloc_orders[] = {
6036  reg_leaf_alloc_order,
6037  reg_nonleaf_alloc_order};
6038
6039void
6040order_regs_for_local_alloc ()
6041{
6042  static int last_order_nonleaf = 1;
6043
6044  if (regs_ever_live[15] != last_order_nonleaf)
6045    {
6046      last_order_nonleaf = !last_order_nonleaf;
6047      memcpy ((char *) reg_alloc_order,
6048	      (const char *) reg_alloc_orders[last_order_nonleaf],
6049	      FIRST_PSEUDO_REGISTER * sizeof (int));
6050    }
6051}
6052
6053/* Return 1 if REG and MEM are legitimate enough to allow the various
6054   mem<-->reg splits to be run.  */
6055
6056int
6057sparc_splitdi_legitimate (reg, mem)
6058     rtx reg;
6059     rtx mem;
6060{
6061  /* Punt if we are here by mistake.  */
6062  if (! reload_completed)
6063    abort ();
6064
6065  /* We must have an offsettable memory reference.  */
6066  if (! offsettable_memref_p (mem))
6067    return 0;
6068
6069  /* If we have legitimate args for ldd/std, we do not want
6070     the split to happen.  */
6071  if ((REGNO (reg) % 2) == 0
6072      && mem_min_alignment (mem, 8))
6073    return 0;
6074
6075  /* Success.  */
6076  return 1;
6077}
6078
6079/* Return 1 if x and y are some kind of REG and they refer to
6080   different hard registers.  This test is guarenteed to be
6081   run after reload.  */
6082
6083int
6084sparc_absnegfloat_split_legitimate (x, y)
6085     rtx x, y;
6086{
6087  if (GET_CODE (x) != REG)
6088    return 0;
6089  if (GET_CODE (y) != REG)
6090    return 0;
6091  if (REGNO (x) == REGNO (y))
6092    return 0;
6093  return 1;
6094}
6095
6096/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6097   This makes them candidates for using ldd and std insns.
6098
6099   Note reg1 and reg2 *must* be hard registers.  */
6100
6101int
6102registers_ok_for_ldd_peep (reg1, reg2)
6103     rtx reg1, reg2;
6104{
6105  /* We might have been passed a SUBREG.  */
6106  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6107    return 0;
6108
6109  if (REGNO (reg1) % 2 != 0)
6110    return 0;
6111
6112  /* Integer ldd is deprecated in SPARC V9 */
6113  if (TARGET_V9 && REGNO (reg1) < 32)
6114    return 0;
6115
6116  return (REGNO (reg1) == REGNO (reg2) - 1);
6117}
6118
6119/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6120   an ldd or std insn.
6121
6122   This can only happen when addr1 and addr2, the addresses in mem1
6123   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6124   addr1 must also be aligned on a 64-bit boundary.
6125
6126   Also iff dependent_reg_rtx is not null it should not be used to
6127   compute the address for mem1, i.e. we cannot optimize a sequence
6128   like:
6129   	ld [%o0], %o0
6130	ld [%o0 + 4], %o1
6131   to
6132   	ldd [%o0], %o0
6133   nor:
6134	ld [%g3 + 4], %g3
6135	ld [%g3], %g2
6136   to
6137        ldd [%g3], %g2
6138
6139   But, note that the transformation from:
6140	ld [%g2 + 4], %g3
6141        ld [%g2], %g2
6142   to
6143	ldd [%g2], %g2
6144   is perfectly fine.  Thus, the peephole2 patterns always pass us
6145   the destination register of the first load, never the second one.
6146
6147   For stores we don't have a similar problem, so dependent_reg_rtx is
6148   NULL_RTX.  */
6149
6150int
6151mems_ok_for_ldd_peep (mem1, mem2, dependent_reg_rtx)
6152      rtx mem1, mem2, dependent_reg_rtx;
6153{
6154  rtx addr1, addr2;
6155  unsigned int reg1;
6156  int offset1;
6157
6158  /* The mems cannot be volatile.  */
6159  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6160    return 0;
6161
6162  /* MEM1 should be aligned on a 64-bit boundary.  */
6163  if (MEM_ALIGN (mem1) < 64)
6164    return 0;
6165
6166  addr1 = XEXP (mem1, 0);
6167  addr2 = XEXP (mem2, 0);
6168
6169  /* Extract a register number and offset (if used) from the first addr.  */
6170  if (GET_CODE (addr1) == PLUS)
6171    {
6172      /* If not a REG, return zero.  */
6173      if (GET_CODE (XEXP (addr1, 0)) != REG)
6174	return 0;
6175      else
6176	{
6177          reg1 = REGNO (XEXP (addr1, 0));
6178	  /* The offset must be constant!  */
6179	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6180            return 0;
6181          offset1 = INTVAL (XEXP (addr1, 1));
6182	}
6183    }
6184  else if (GET_CODE (addr1) != REG)
6185    return 0;
6186  else
6187    {
6188      reg1 = REGNO (addr1);
6189      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
6190      offset1 = 0;
6191    }
6192
6193  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
6194  if (GET_CODE (addr2) != PLUS)
6195    return 0;
6196
6197  if (GET_CODE (XEXP (addr2, 0)) != REG
6198      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6199    return 0;
6200
6201  if (reg1 != REGNO (XEXP (addr2, 0)))
6202    return 0;
6203
6204  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6205    return 0;
6206
6207  /* The first offset must be evenly divisible by 8 to ensure the
6208     address is 64 bit aligned.  */
6209  if (offset1 % 8 != 0)
6210    return 0;
6211
6212  /* The offset for the second addr must be 4 more than the first addr.  */
6213  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6214    return 0;
6215
6216  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
6217     instructions.  */
6218  return 1;
6219}
6220
6221/* Return 1 if reg is a pseudo, or is the first register in
6222   a hard register pair.  This makes it a candidate for use in
6223   ldd and std insns.  */
6224
6225int
6226register_ok_for_ldd (reg)
6227     rtx reg;
6228{
6229  /* We might have been passed a SUBREG.  */
6230  if (GET_CODE (reg) != REG)
6231    return 0;
6232
6233  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6234    return (REGNO (reg) % 2 == 0);
6235  else
6236    return 1;
6237}
6238
6239/* Print operand X (an rtx) in assembler syntax to file FILE.
6240   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6241   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
6242
6243void
6244print_operand (file, x, code)
6245     FILE *file;
6246     rtx x;
6247     int code;
6248{
6249  switch (code)
6250    {
6251    case '#':
6252      /* Output a 'nop' if there's nothing for the delay slot.  */
6253      if (dbr_sequence_length () == 0)
6254	fputs ("\n\t nop", file);
6255      return;
6256    case '*':
6257      /* Output an annul flag if there's nothing for the delay slot and we
6258	 are optimizing.  This is always used with '(' below.  */
6259      /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6260	 this is a dbx bug.  So, we only do this when optimizing.  */
6261      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
6262	 Always emit a nop in case the next instruction is a branch.  */
6263      if (dbr_sequence_length () == 0
6264	  && (optimize && (int)sparc_cpu < PROCESSOR_V9))
6265	fputs (",a", file);
6266      return;
6267    case '(':
6268      /* Output a 'nop' if there's nothing for the delay slot and we are
6269	 not optimizing.  This is always used with '*' above.  */
6270      if (dbr_sequence_length () == 0
6271	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
6272	fputs ("\n\t nop", file);
6273      return;
6274    case '_':
6275      /* Output the Embedded Medium/Anywhere code model base register.  */
6276      fputs (EMBMEDANY_BASE_REG, file);
6277      return;
6278    case '@':
6279      /* Print out what we are using as the frame pointer.  This might
6280	 be %fp, or might be %sp+offset.  */
6281      /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
6282      fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
6283      return;
6284    case 'Y':
6285      /* Adjust the operand to take into account a RESTORE operation.  */
6286      if (GET_CODE (x) == CONST_INT)
6287	break;
6288      else if (GET_CODE (x) != REG)
6289	output_operand_lossage ("invalid %%Y operand");
6290      else if (REGNO (x) < 8)
6291	fputs (reg_names[REGNO (x)], file);
6292      else if (REGNO (x) >= 24 && REGNO (x) < 32)
6293	fputs (reg_names[REGNO (x)-16], file);
6294      else
6295	output_operand_lossage ("invalid %%Y operand");
6296      return;
6297    case 'L':
6298      /* Print out the low order register name of a register pair.  */
6299      if (WORDS_BIG_ENDIAN)
6300	fputs (reg_names[REGNO (x)+1], file);
6301      else
6302	fputs (reg_names[REGNO (x)], file);
6303      return;
6304    case 'H':
6305      /* Print out the high order register name of a register pair.  */
6306      if (WORDS_BIG_ENDIAN)
6307	fputs (reg_names[REGNO (x)], file);
6308      else
6309	fputs (reg_names[REGNO (x)+1], file);
6310      return;
6311    case 'R':
6312      /* Print out the second register name of a register pair or quad.
6313	 I.e., R (%o0) => %o1.  */
6314      fputs (reg_names[REGNO (x)+1], file);
6315      return;
6316    case 'S':
6317      /* Print out the third register name of a register quad.
6318	 I.e., S (%o0) => %o2.  */
6319      fputs (reg_names[REGNO (x)+2], file);
6320      return;
6321    case 'T':
6322      /* Print out the fourth register name of a register quad.
6323	 I.e., T (%o0) => %o3.  */
6324      fputs (reg_names[REGNO (x)+3], file);
6325      return;
6326    case 'x':
6327      /* Print a condition code register.  */
6328      if (REGNO (x) == SPARC_ICC_REG)
6329	{
6330	  /* We don't handle CC[X]_NOOVmode because they're not supposed
6331	     to occur here.  */
6332	  if (GET_MODE (x) == CCmode)
6333	    fputs ("%icc", file);
6334	  else if (GET_MODE (x) == CCXmode)
6335	    fputs ("%xcc", file);
6336	  else
6337	    abort ();
6338	}
6339      else
6340	/* %fccN register */
6341	fputs (reg_names[REGNO (x)], file);
6342      return;
6343    case 'm':
6344      /* Print the operand's address only.  */
6345      output_address (XEXP (x, 0));
6346      return;
6347    case 'r':
6348      /* In this case we need a register.  Use %g0 if the
6349	 operand is const0_rtx.  */
6350      if (x == const0_rtx
6351	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
6352	{
6353	  fputs ("%g0", file);
6354	  return;
6355	}
6356      else
6357	break;
6358
6359    case 'A':
6360      switch (GET_CODE (x))
6361	{
6362	case IOR: fputs ("or", file); break;
6363	case AND: fputs ("and", file); break;
6364	case XOR: fputs ("xor", file); break;
6365	default: output_operand_lossage ("invalid %%A operand");
6366	}
6367      return;
6368
6369    case 'B':
6370      switch (GET_CODE (x))
6371	{
6372	case IOR: fputs ("orn", file); break;
6373	case AND: fputs ("andn", file); break;
6374	case XOR: fputs ("xnor", file); break;
6375	default: output_operand_lossage ("invalid %%B operand");
6376	}
6377      return;
6378
6379      /* These are used by the conditional move instructions.  */
6380    case 'c' :
6381    case 'C':
6382      {
6383	enum rtx_code rc = GET_CODE (x);
6384
6385	if (code == 'c')
6386	  {
6387	    enum machine_mode mode = GET_MODE (XEXP (x, 0));
6388	    if (mode == CCFPmode || mode == CCFPEmode)
6389	      rc = reverse_condition_maybe_unordered (GET_CODE (x));
6390	    else
6391	      rc = reverse_condition (GET_CODE (x));
6392	  }
6393	switch (rc)
6394	  {
6395	  case NE: fputs ("ne", file); break;
6396	  case EQ: fputs ("e", file); break;
6397	  case GE: fputs ("ge", file); break;
6398	  case GT: fputs ("g", file); break;
6399	  case LE: fputs ("le", file); break;
6400	  case LT: fputs ("l", file); break;
6401	  case GEU: fputs ("geu", file); break;
6402	  case GTU: fputs ("gu", file); break;
6403	  case LEU: fputs ("leu", file); break;
6404	  case LTU: fputs ("lu", file); break;
6405	  case LTGT: fputs ("lg", file); break;
6406	  case UNORDERED: fputs ("u", file); break;
6407	  case ORDERED: fputs ("o", file); break;
6408	  case UNLT: fputs ("ul", file); break;
6409	  case UNLE: fputs ("ule", file); break;
6410	  case UNGT: fputs ("ug", file); break;
6411	  case UNGE: fputs ("uge", file); break;
6412	  case UNEQ: fputs ("ue", file); break;
6413	  default: output_operand_lossage (code == 'c'
6414					   ? "invalid %%c operand"
6415					   : "invalid %%C operand");
6416	  }
6417	return;
6418      }
6419
6420      /* These are used by the movr instruction pattern.  */
6421    case 'd':
6422    case 'D':
6423      {
6424	enum rtx_code rc = (code == 'd'
6425			    ? reverse_condition (GET_CODE (x))
6426			    : GET_CODE (x));
6427	switch (rc)
6428	  {
6429	  case NE: fputs ("ne", file); break;
6430	  case EQ: fputs ("e", file); break;
6431	  case GE: fputs ("gez", file); break;
6432	  case LT: fputs ("lz", file); break;
6433	  case LE: fputs ("lez", file); break;
6434	  case GT: fputs ("gz", file); break;
6435	  default: output_operand_lossage (code == 'd'
6436					   ? "invalid %%d operand"
6437					   : "invalid %%D operand");
6438	  }
6439	return;
6440      }
6441
6442    case 'b':
6443      {
6444	/* Print a sign-extended character.  */
6445	int i = trunc_int_for_mode (INTVAL (x), QImode);
6446	fprintf (file, "%d", i);
6447	return;
6448      }
6449
6450    case 'f':
6451      /* Operand must be a MEM; write its address.  */
6452      if (GET_CODE (x) != MEM)
6453	output_operand_lossage ("invalid %%f operand");
6454      output_address (XEXP (x, 0));
6455      return;
6456
6457    case 0:
6458      /* Do nothing special.  */
6459      break;
6460
6461    default:
6462      /* Undocumented flag.  */
6463      output_operand_lossage ("invalid operand output code");
6464    }
6465
6466  if (GET_CODE (x) == REG)
6467    fputs (reg_names[REGNO (x)], file);
6468  else if (GET_CODE (x) == MEM)
6469    {
6470      fputc ('[', file);
6471	/* Poor Sun assembler doesn't understand absolute addressing.  */
6472      if (CONSTANT_P (XEXP (x, 0)))
6473	fputs ("%g0+", file);
6474      output_address (XEXP (x, 0));
6475      fputc (']', file);
6476    }
6477  else if (GET_CODE (x) == HIGH)
6478    {
6479      fputs ("%hi(", file);
6480      output_addr_const (file, XEXP (x, 0));
6481      fputc (')', file);
6482    }
6483  else if (GET_CODE (x) == LO_SUM)
6484    {
6485      print_operand (file, XEXP (x, 0), 0);
6486      if (TARGET_CM_MEDMID)
6487	fputs ("+%l44(", file);
6488      else
6489	fputs ("+%lo(", file);
6490      output_addr_const (file, XEXP (x, 1));
6491      fputc (')', file);
6492    }
6493  else if (GET_CODE (x) == CONST_DOUBLE
6494	   && (GET_MODE (x) == VOIDmode
6495	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
6496    {
6497      if (CONST_DOUBLE_HIGH (x) == 0)
6498	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
6499      else if (CONST_DOUBLE_HIGH (x) == -1
6500	       && CONST_DOUBLE_LOW (x) < 0)
6501	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
6502      else
6503	output_operand_lossage ("long long constant not a valid immediate operand");
6504    }
6505  else if (GET_CODE (x) == CONST_DOUBLE)
6506    output_operand_lossage ("floating point constant not a valid immediate operand");
6507  else { output_addr_const (file, x); }
6508}
6509
6510/* Target hook for assembling integer objects.  The sparc version has
6511   special handling for aligned DI-mode objects.  */
6512
6513static bool
6514sparc_assemble_integer (x, size, aligned_p)
6515     rtx x;
6516     unsigned int size;
6517     int aligned_p;
6518{
6519  /* ??? We only output .xword's for symbols and only then in environments
6520     where the assembler can handle them.  */
6521  if (aligned_p && size == 8
6522      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
6523    {
6524      if (TARGET_V9)
6525	{
6526	  assemble_integer_with_op ("\t.xword\t", x);
6527	  return true;
6528	}
6529      else
6530	{
6531	  assemble_aligned_integer (4, const0_rtx);
6532	  assemble_aligned_integer (4, x);
6533	  return true;
6534	}
6535    }
6536  return default_assemble_integer (x, size, aligned_p);
6537}
6538
6539/* Return the value of a code used in the .proc pseudo-op that says
6540   what kind of result this function returns.  For non-C types, we pick
6541   the closest C type.  */
6542
6543#ifndef CHAR_TYPE_SIZE
6544#define CHAR_TYPE_SIZE BITS_PER_UNIT
6545#endif
6546
6547#ifndef SHORT_TYPE_SIZE
6548#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
6549#endif
6550
6551#ifndef INT_TYPE_SIZE
6552#define INT_TYPE_SIZE BITS_PER_WORD
6553#endif
6554
6555#ifndef LONG_TYPE_SIZE
6556#define LONG_TYPE_SIZE BITS_PER_WORD
6557#endif
6558
6559#ifndef LONG_LONG_TYPE_SIZE
6560#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
6561#endif
6562
6563#ifndef FLOAT_TYPE_SIZE
6564#define FLOAT_TYPE_SIZE BITS_PER_WORD
6565#endif
6566
6567#ifndef DOUBLE_TYPE_SIZE
6568#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
6569#endif
6570
6571#ifndef LONG_DOUBLE_TYPE_SIZE
6572#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
6573#endif
6574
6575unsigned long
6576sparc_type_code (type)
6577     register tree type;
6578{
6579  register unsigned long qualifiers = 0;
6580  register unsigned shift;
6581
6582  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
6583     setting more, since some assemblers will give an error for this.  Also,
6584     we must be careful to avoid shifts of 32 bits or more to avoid getting
6585     unpredictable results.  */
6586
6587  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
6588    {
6589      switch (TREE_CODE (type))
6590	{
6591	case ERROR_MARK:
6592	  return qualifiers;
6593
6594	case ARRAY_TYPE:
6595	  qualifiers |= (3 << shift);
6596	  break;
6597
6598	case FUNCTION_TYPE:
6599	case METHOD_TYPE:
6600	  qualifiers |= (2 << shift);
6601	  break;
6602
6603	case POINTER_TYPE:
6604	case REFERENCE_TYPE:
6605	case OFFSET_TYPE:
6606	  qualifiers |= (1 << shift);
6607	  break;
6608
6609	case RECORD_TYPE:
6610	  return (qualifiers | 8);
6611
6612	case UNION_TYPE:
6613	case QUAL_UNION_TYPE:
6614	  return (qualifiers | 9);
6615
6616	case ENUMERAL_TYPE:
6617	  return (qualifiers | 10);
6618
6619	case VOID_TYPE:
6620	  return (qualifiers | 16);
6621
6622	case INTEGER_TYPE:
6623	  /* If this is a range type, consider it to be the underlying
6624	     type.  */
6625	  if (TREE_TYPE (type) != 0)
6626	    break;
6627
6628	  /* Carefully distinguish all the standard types of C,
6629	     without messing up if the language is not C.  We do this by
6630	     testing TYPE_PRECISION and TREE_UNSIGNED.  The old code used to
6631	     look at both the names and the above fields, but that's redundant.
6632	     Any type whose size is between two C types will be considered
6633	     to be the wider of the two types.  Also, we do not have a
6634	     special code to use for "long long", so anything wider than
6635	     long is treated the same.  Note that we can't distinguish
6636	     between "int" and "long" in this code if they are the same
6637	     size, but that's fine, since neither can the assembler.  */
6638
6639	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
6640	    return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
6641
6642	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
6643	    return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
6644
6645	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
6646	    return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
6647
6648	  else
6649	    return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
6650
6651	case REAL_TYPE:
6652	  /* If this is a range type, consider it to be the underlying
6653	     type.  */
6654	  if (TREE_TYPE (type) != 0)
6655	    break;
6656
6657	  /* Carefully distinguish all the standard types of C,
6658	     without messing up if the language is not C.  */
6659
6660	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
6661	    return (qualifiers | 6);
6662
6663	  else
6664	    return (qualifiers | 7);
6665
6666	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
6667	  /* ??? We need to distinguish between double and float complex types,
6668	     but I don't know how yet because I can't reach this code from
6669	     existing front-ends.  */
6670	  return (qualifiers | 7);	/* Who knows? */
6671
6672	case CHAR_TYPE:		/* GNU Pascal CHAR type.  Not used in C.  */
6673	case BOOLEAN_TYPE:	/* GNU Fortran BOOLEAN type.  */
6674	case FILE_TYPE:		/* GNU Pascal FILE type.  */
6675	case SET_TYPE:		/* GNU Pascal SET type.  */
6676	case LANG_TYPE:		/* ? */
6677	  return qualifiers;
6678
6679	default:
6680	  abort ();		/* Not a type! */
6681        }
6682    }
6683
6684  return qualifiers;
6685}
6686
6687/* Nested function support.  */
6688
6689/* Emit RTL insns to initialize the variable parts of a trampoline.
6690   FNADDR is an RTX for the address of the function's pure code.
6691   CXT is an RTX for the static chain value for the function.
6692
6693   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
6694   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
6695   (to store insns).  This is a bit excessive.  Perhaps a different
6696   mechanism would be better here.
6697
6698   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
6699
6700void
6701sparc_initialize_trampoline (tramp, fnaddr, cxt)
6702     rtx tramp, fnaddr, cxt;
6703{
6704  /* SPARC 32 bit trampoline:
6705
6706 	sethi	%hi(fn), %g1
6707 	sethi	%hi(static), %g2
6708 	jmp	%g1+%lo(fn)
6709 	or	%g2, %lo(static), %g2
6710
6711    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
6712    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
6713   */
6714#ifdef TRANSFER_FROM_TRAMPOLINE
6715  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
6716                     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
6717#endif
6718
6719  emit_move_insn
6720    (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
6721     expand_binop (SImode, ior_optab,
6722		   expand_shift (RSHIFT_EXPR, SImode, fnaddr,
6723				 size_int (10), 0, 1),
6724		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
6725		   NULL_RTX, 1, OPTAB_DIRECT));
6726
6727  emit_move_insn
6728    (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
6729     expand_binop (SImode, ior_optab,
6730		   expand_shift (RSHIFT_EXPR, SImode, cxt,
6731				 size_int (10), 0, 1),
6732		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
6733		   NULL_RTX, 1, OPTAB_DIRECT));
6734
6735  emit_move_insn
6736    (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
6737     expand_binop (SImode, ior_optab,
6738		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
6739		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
6740		   NULL_RTX, 1, OPTAB_DIRECT));
6741
6742  emit_move_insn
6743    (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
6744     expand_binop (SImode, ior_optab,
6745		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
6746		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
6747		   NULL_RTX, 1, OPTAB_DIRECT));
6748
6749  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
6750     aligned on a 16 byte boundary so one flush clears it all.  */
6751  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
6752  if (sparc_cpu != PROCESSOR_ULTRASPARC)
6753    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
6754						     plus_constant (tramp, 8)))));
6755}
6756
6757/* The 64 bit version is simpler because it makes more sense to load the
6758   values as "immediate" data out of the trampoline.  It's also easier since
6759   we can read the PC without clobbering a register.  */
6760
6761void
6762sparc64_initialize_trampoline (tramp, fnaddr, cxt)
6763     rtx tramp, fnaddr, cxt;
6764{
6765#ifdef TRANSFER_FROM_TRAMPOLINE
6766  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
6767                     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
6768#endif
6769
6770  /*
6771	rd	%pc, %g1
6772	ldx	[%g1+24], %g5
6773	jmp	%g5
6774	ldx	[%g1+16], %g5
6775	+16 bytes data
6776   */
6777
6778  emit_move_insn (gen_rtx_MEM (SImode, tramp),
6779		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
6780  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
6781		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
6782  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
6783		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
6784  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
6785		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
6786  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
6787  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
6788  emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
6789
6790  if (sparc_cpu != PROCESSOR_ULTRASPARC)
6791    emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
6792}
6793
6794/* Subroutines to support a flat (single) register window calling
6795   convention.  */
6796
6797/* Single-register window sparc stack frames look like:
6798
6799             Before call		        After call
6800        +-----------------------+	+-----------------------+
6801   high |		        |	|			|
6802   mem  |  caller's temps.    	|       |  caller's temps.    	|
6803	|       		|       |       	        |
6804        +-----------------------+	+-----------------------+
6805 	|       		|	|		        |
6806        |  arguments on stack.  |	|  arguments on stack.  |
6807	|       		|      	|			|
6808        +-----------------------+FP+92->+-----------------------+
6809 	|  6 words to save     	|	|  6 words to save	|
6810	|  arguments passed	|	|  arguments passed	|
6811	|  in registers, even	|	|  in registers, even	|
6812       	|  if not passed.       |      	|  if not passed.	|
6813 SP+68->+-----------------------+FP+68->+-----------------------+
6814        | 1 word struct addr	|      	| 1 word struct addr	|
6815        +-----------------------+FP+64->+-----------------------+
6816        |			|	|			|
6817        | 16 word reg save area	|	| 16 word reg save area |
6818       	|                       |      	|			|
6819    SP->+-----------------------+   FP->+-----------------------+
6820				        | 4 word area for	|
6821				       	| fp/alu reg moves	|
6822				 FP-16->+-----------------------+
6823				        |			|
6824				        |  local variables	|
6825				        |			|
6826				        +-----------------------+
6827				        |		        |
6828                                        |  fp register save     |
6829				        |			|
6830				        +-----------------------+
6831				        |		        |
6832                                        |  gp register save     |
6833                                        |       		|
6834				        +-----------------------+
6835				        |			|
6836                                        |  alloca allocations   |
6837        			        |			|
6838				        +-----------------------+
6839				        |			|
6840                                        |  arguments on stack   |
6841        			       	|		        |
6842				 SP+92->+-----------------------+
6843                                        |  6 words to save      |
6844				        |  arguments passed     |
6845                                        |  in registers, even   |
6846   low                                 	|  if not passed.       |
6847   memory        		 SP+68->+-----------------------+
6848				       	| 1 word struct addr	|
6849				 SP+64->+-----------------------+
6850				        |			|
6851				        I 16 word reg save area |
6852				       	|			|
6853				    SP->+-----------------------+  */
6854
6855/* Structure to be filled in by sparc_flat_compute_frame_size with register
6856   save masks, and offsets for the current function.  */
6857
6858struct sparc_frame_info
6859{
6860  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
6861  unsigned long var_size;	/* # bytes that variables take up.  */
6862  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
6863  unsigned long extra_size;	/* # bytes of extra gunk.  */
6864  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
6865  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
6866  unsigned long gmask;		/* Mask of saved gp registers.  */
6867  unsigned long fmask;		/* Mask of saved fp registers.  */
6868  unsigned long reg_offset;	/* Offset from new sp to store regs.  */
6869  int		initialized;	/* Nonzero if frame size already calculated.  */
6870};
6871
6872/* Current frame information calculated by sparc_flat_compute_frame_size.  */
6873struct sparc_frame_info current_frame_info;
6874
6875/* Zero structure to initialize current_frame_info.  */
6876struct sparc_frame_info zero_frame_info;
6877
6878/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
6879
6880#define RETURN_ADDR_REGNUM 15
6881#define HARD_FRAME_POINTER_MASK (1 << (HARD_FRAME_POINTER_REGNUM))
6882#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
6883
6884#define MUST_SAVE_REGISTER(regno) \
6885 ((regs_ever_live[regno] && !call_used_regs[regno])			\
6886  || (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)	\
6887  || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
6888
6889/* Return the bytes needed to compute the frame pointer from the current
6890   stack pointer.  */
6891
6892unsigned long
6893sparc_flat_compute_frame_size (size)
6894     int size;			/* # of var. bytes allocated.  */
6895{
6896  int regno;
6897  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
6898  unsigned long var_size;	/* # bytes that variables take up.  */
6899  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
6900  unsigned long extra_size;	/* # extra bytes.  */
6901  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
6902  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
6903  unsigned long gmask;		/* Mask of saved gp registers.  */
6904  unsigned long fmask;		/* Mask of saved fp registers.  */
6905  unsigned long reg_offset;	/* Offset to register save area.  */
6906  int           need_aligned_p;	/* 1 if need the save area 8 byte aligned.  */
6907
6908  /* This is the size of the 16 word reg save area, 1 word struct addr
6909     area, and 4 word fp/alu register copy area.  */
6910  extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
6911  var_size = size;
6912  gp_reg_size = 0;
6913  fp_reg_size = 0;
6914  gmask = 0;
6915  fmask = 0;
6916  reg_offset = 0;
6917  need_aligned_p = 0;
6918
6919  args_size = 0;
6920  if (!leaf_function_p ())
6921    {
6922      /* Also include the size needed for the 6 parameter registers.  */
6923      args_size = current_function_outgoing_args_size + 24;
6924    }
6925  total_size = var_size + args_size;
6926
6927  /* Calculate space needed for gp registers.  */
6928  for (regno = 1; regno <= 31; regno++)
6929    {
6930      if (MUST_SAVE_REGISTER (regno))
6931	{
6932	  /* If we need to save two regs in a row, ensure there's room to bump
6933	     up the address to align it to a doubleword boundary.  */
6934	  if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
6935	    {
6936	      if (gp_reg_size % 8 != 0)
6937		gp_reg_size += 4;
6938	      gp_reg_size += 2 * UNITS_PER_WORD;
6939	      gmask |= 3 << regno;
6940	      regno++;
6941	      need_aligned_p = 1;
6942	    }
6943	  else
6944	    {
6945	      gp_reg_size += UNITS_PER_WORD;
6946	      gmask |= 1 << regno;
6947	    }
6948	}
6949    }
6950
6951  /* Calculate space needed for fp registers.  */
6952  for (regno = 32; regno <= 63; regno++)
6953    {
6954      if (regs_ever_live[regno] && !call_used_regs[regno])
6955	{
6956	  fp_reg_size += UNITS_PER_WORD;
6957	  fmask |= 1 << (regno - 32);
6958	}
6959    }
6960
6961  if (gmask || fmask)
6962    {
6963      int n;
6964      reg_offset = FIRST_PARM_OFFSET(0) + args_size;
6965      /* Ensure save area is 8 byte aligned if we need it.  */
6966      n = reg_offset % 8;
6967      if (need_aligned_p && n != 0)
6968	{
6969	  total_size += 8 - n;
6970	  reg_offset += 8 - n;
6971	}
6972      total_size += gp_reg_size + fp_reg_size;
6973    }
6974
6975  /* If we must allocate a stack frame at all, we must also allocate
6976     room for register window spillage, so as to be binary compatible
6977     with libraries and operating systems that do not use -mflat.  */
6978  if (total_size > 0)
6979    total_size += extra_size;
6980  else
6981    extra_size = 0;
6982
6983  total_size = SPARC_STACK_ALIGN (total_size);
6984
6985  /* Save other computed information.  */
6986  current_frame_info.total_size  = total_size;
6987  current_frame_info.var_size    = var_size;
6988  current_frame_info.args_size   = args_size;
6989  current_frame_info.extra_size  = extra_size;
6990  current_frame_info.gp_reg_size = gp_reg_size;
6991  current_frame_info.fp_reg_size = fp_reg_size;
6992  current_frame_info.gmask	 = gmask;
6993  current_frame_info.fmask	 = fmask;
6994  current_frame_info.reg_offset	 = reg_offset;
6995  current_frame_info.initialized = reload_completed;
6996
6997  /* Ok, we're done.  */
6998  return total_size;
6999}
7000
7001/* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
7002   OFFSET.
7003
7004   BASE_REG must be 8 byte aligned.  This allows us to test OFFSET for
7005   appropriate alignment and use DOUBLEWORD_OP when we can.  We assume
7006   [BASE_REG+OFFSET] will always be a valid address.
7007
7008   WORD_OP is either "st" for save, "ld" for restore.
7009   DOUBLEWORD_OP is either "std" for save, "ldd" for restore.  */
7010
7011void
7012sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
7013			 doubleword_op, base_offset)
7014     FILE *file;
7015     const char *base_reg;
7016     unsigned int offset;
7017     unsigned long gmask;
7018     unsigned long fmask;
7019     const char *word_op;
7020     const char *doubleword_op;
7021     unsigned long base_offset;
7022{
7023  int regno;
7024
7025  if (gmask == 0 && fmask == 0)
7026    return;
7027
7028  /* Save registers starting from high to low.  We've already saved the
7029     previous frame pointer and previous return address for the debugger's
7030     sake.  The debugger allows us to not need a nop in the epilog if at least
7031     one register is reloaded in addition to return address.  */
7032
7033  if (gmask)
7034    {
7035      for (regno = 1; regno <= 31; regno++)
7036	{
7037	  if ((gmask & (1L << regno)) != 0)
7038	    {
7039	      if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
7040		{
7041		  /* We can save two registers in a row.  If we're not at a
7042		     double word boundary, move to one.
7043		     sparc_flat_compute_frame_size ensures there's room to do
7044		     this.  */
7045		  if (offset % 8 != 0)
7046		    offset += UNITS_PER_WORD;
7047
7048		  if (word_op[0] == 's')
7049		    {
7050		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
7051			       doubleword_op, reg_names[regno],
7052			       base_reg, offset);
7053		      if (dwarf2out_do_frame ())
7054			{
7055			  char *l = dwarf2out_cfi_label ();
7056			  dwarf2out_reg_save (l, regno, offset + base_offset);
7057			  dwarf2out_reg_save
7058			    (l, regno+1, offset+base_offset + UNITS_PER_WORD);
7059			}
7060		    }
7061		  else
7062		    fprintf (file, "\t%s\t[%s+%d], %s\n",
7063			     doubleword_op, base_reg, offset,
7064			     reg_names[regno]);
7065
7066		  offset += 2 * UNITS_PER_WORD;
7067		  regno++;
7068		}
7069	      else
7070		{
7071		  if (word_op[0] == 's')
7072		    {
7073		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
7074			       word_op, reg_names[regno],
7075			       base_reg, offset);
7076		      if (dwarf2out_do_frame ())
7077			dwarf2out_reg_save ("", regno, offset + base_offset);
7078		    }
7079		  else
7080		    fprintf (file, "\t%s\t[%s+%d], %s\n",
7081			     word_op, base_reg, offset, reg_names[regno]);
7082
7083		  offset += UNITS_PER_WORD;
7084		}
7085	    }
7086	}
7087    }
7088
7089  if (fmask)
7090    {
7091      for (regno = 32; regno <= 63; regno++)
7092	{
7093	  if ((fmask & (1L << (regno - 32))) != 0)
7094	    {
7095	      if (word_op[0] == 's')
7096		{
7097		  fprintf (file, "\t%s\t%s, [%s+%d]\n",
7098			   word_op, reg_names[regno],
7099			   base_reg, offset);
7100		  if (dwarf2out_do_frame ())
7101		    dwarf2out_reg_save ("", regno, offset + base_offset);
7102		}
7103	      else
7104		fprintf (file, "\t%s\t[%s+%d], %s\n",
7105			 word_op, base_reg, offset, reg_names[regno]);
7106
7107	      offset += UNITS_PER_WORD;
7108	    }
7109	}
7110    }
7111}
7112
7113/* Set up the stack and frame (if desired) for the function.  */
7114
7115static void
7116sparc_flat_function_prologue (file, size)
7117     FILE *file;
7118     HOST_WIDE_INT size;
7119{
7120  const char *sp_str = reg_names[STACK_POINTER_REGNUM];
7121  unsigned long gmask = current_frame_info.gmask;
7122
7123  sparc_output_scratch_registers (file);
7124
7125  /* This is only for the human reader.  */
7126  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
7127  fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
7128	   ASM_COMMENT_START,
7129	   current_frame_info.var_size,
7130	   current_frame_info.gp_reg_size / 4,
7131	   current_frame_info.fp_reg_size / 4,
7132	   current_function_outgoing_args_size,
7133	   current_frame_info.extra_size);
7134
7135  size = SPARC_STACK_ALIGN (size);
7136  size = (! current_frame_info.initialized
7137	  ? sparc_flat_compute_frame_size (size)
7138	  : current_frame_info.total_size);
7139
7140  /* These cases shouldn't happen.  Catch them now.  */
7141  if (size == 0 && (gmask || current_frame_info.fmask))
7142    abort ();
7143
7144  /* Allocate our stack frame by decrementing %sp.
7145     At present, the only algorithm gdb can use to determine if this is a
7146     flat frame is if we always set %i7 if we set %sp.  This can be optimized
7147     in the future by putting in some sort of debugging information that says
7148     this is a `flat' function.  However, there is still the case of debugging
7149     code without such debugging information (including cases where most fns
7150     have such info, but there is one that doesn't).  So, always do this now
7151     so we don't get a lot of code out there that gdb can't handle.
7152     If the frame pointer isn't needn't then that's ok - gdb won't be able to
7153     distinguish us from a non-flat function but there won't (and shouldn't)
7154     be any differences anyway.  The return pc is saved (if necessary) right
7155     after %i7 so gdb won't have to look too far to find it.  */
7156  if (size > 0)
7157    {
7158      unsigned int reg_offset = current_frame_info.reg_offset;
7159      const char *const fp_str = reg_names[HARD_FRAME_POINTER_REGNUM];
7160      static const char *const t1_str = "%g1";
7161
7162      /* Things get a little tricky if local variables take up more than ~4096
7163	 bytes and outgoing arguments take up more than ~4096 bytes.  When that
7164	 happens, the register save area can't be accessed from either end of
7165	 the frame.  Handle this by decrementing %sp to the start of the gp
7166	 register save area, save the regs, update %i7, and then set %sp to its
7167	 final value.  Given that we only have one scratch register to play
7168	 with it is the cheapest solution, and it helps gdb out as it won't
7169	 slow down recognition of flat functions.
7170	 Don't change the order of insns emitted here without checking with
7171	 the gdb folk first.  */
7172
7173      /* Is the entire register save area offsettable from %sp?  */
7174      if (reg_offset < 4096 - 64 * (unsigned) UNITS_PER_WORD)
7175	{
7176	  if (size <= 4096)
7177	    {
7178	      fprintf (file, "\tadd\t%s, %d, %s\n",
7179		       sp_str, (int) -size, sp_str);
7180	      if (gmask & HARD_FRAME_POINTER_MASK)
7181		{
7182		  fprintf (file, "\tst\t%s, [%s+%d]\n",
7183			   fp_str, sp_str, reg_offset);
7184		  fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
7185			   sp_str, (int) -size, fp_str, ASM_COMMENT_START);
7186		  reg_offset += 4;
7187		}
7188	    }
7189	  else
7190	    {
7191	      fprintf (file, "\tset\t");
7192	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, size);
7193	      fprintf (file, ", %s\n\tsub\t%s, %s, %s\n",
7194		       t1_str, sp_str, t1_str, sp_str);
7195	      if (gmask & HARD_FRAME_POINTER_MASK)
7196		{
7197		  fprintf (file, "\tst\t%s, [%s+%d]\n",
7198			   fp_str, sp_str, reg_offset);
7199		  fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
7200			   sp_str, t1_str, fp_str, ASM_COMMENT_START);
7201		  reg_offset += 4;
7202		}
7203	    }
7204	  if (dwarf2out_do_frame ())
7205	    {
7206	      char *l = dwarf2out_cfi_label ();
7207	      if (gmask & HARD_FRAME_POINTER_MASK)
7208		{
7209		  dwarf2out_reg_save (l, HARD_FRAME_POINTER_REGNUM,
7210				      reg_offset - 4 - size);
7211		  dwarf2out_def_cfa (l, HARD_FRAME_POINTER_REGNUM, 0);
7212		}
7213	      else
7214		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
7215	    }
7216	  if (gmask & RETURN_ADDR_MASK)
7217	    {
7218	      fprintf (file, "\tst\t%s, [%s+%d]\n",
7219		       reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
7220	      if (dwarf2out_do_frame ())
7221		dwarf2out_return_save ("", reg_offset - size);
7222	      reg_offset += 4;
7223	    }
7224	  sparc_flat_save_restore (file, sp_str, reg_offset,
7225				   gmask & ~(HARD_FRAME_POINTER_MASK | RETURN_ADDR_MASK),
7226				   current_frame_info.fmask,
7227				   "st", "std", -size);
7228	}
7229      else
7230	{
7231	  /* Subtract %sp in two steps, but make sure there is always a
7232	     64 byte register save area, and %sp is properly aligned.  */
7233	  /* Amount to decrement %sp by, the first time.  */
7234	  unsigned HOST_WIDE_INT size1 = ((size - reg_offset + 64) + 15) & -16;
7235	  /* Offset to register save area from %sp.  */
7236	  unsigned HOST_WIDE_INT offset = size1 - (size - reg_offset);
7237
7238	  if (size1 <= 4096)
7239	    {
7240	      fprintf (file, "\tadd\t%s, %d, %s\n",
7241		       sp_str, (int) -size1, sp_str);
7242	      if (gmask & HARD_FRAME_POINTER_MASK)
7243		{
7244		  fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
7245			   fp_str, sp_str, (int) offset, sp_str, (int) -size1,
7246			   fp_str, ASM_COMMENT_START);
7247		  offset += 4;
7248		}
7249	    }
7250	  else
7251	    {
7252	      fprintf (file, "\tset\t");
7253	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, size1);
7254	      fprintf (file, ", %s\n\tsub\t%s, %s, %s\n",
7255		       t1_str, sp_str, t1_str, sp_str);
7256	      if (gmask & HARD_FRAME_POINTER_MASK)
7257		{
7258		  fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
7259			   fp_str, sp_str, (int) offset, sp_str, t1_str,
7260			   fp_str, ASM_COMMENT_START);
7261		  offset += 4;
7262		}
7263	    }
7264	  if (dwarf2out_do_frame ())
7265	    {
7266	      char *l = dwarf2out_cfi_label ();
7267	      if (gmask & HARD_FRAME_POINTER_MASK)
7268		{
7269		  dwarf2out_reg_save (l, HARD_FRAME_POINTER_REGNUM,
7270				      offset - 4 - size1);
7271		  dwarf2out_def_cfa (l, HARD_FRAME_POINTER_REGNUM, 0);
7272		}
7273	      else
7274		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
7275	    }
7276	  if (gmask & RETURN_ADDR_MASK)
7277	    {
7278	      fprintf (file, "\tst\t%s, [%s+%d]\n",
7279		       reg_names[RETURN_ADDR_REGNUM], sp_str, (int) offset);
7280	      if (dwarf2out_do_frame ())
7281		/* offset - size1 == reg_offset - size
7282		   if reg_offset were updated above like offset.  */
7283		dwarf2out_return_save ("", offset - size1);
7284	      offset += 4;
7285	    }
7286	  sparc_flat_save_restore (file, sp_str, offset,
7287				   gmask & ~(HARD_FRAME_POINTER_MASK | RETURN_ADDR_MASK),
7288				   current_frame_info.fmask,
7289				   "st", "std", -size1);
7290	  fprintf (file, "\tset\t");
7291	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, size - size1);
7292	  fprintf (file, ", %s\n\tsub\t%s, %s, %s\n",
7293		   t1_str, sp_str, t1_str, sp_str);
7294	  if (dwarf2out_do_frame ())
7295	    if (! (gmask & HARD_FRAME_POINTER_MASK))
7296	      dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
7297	}
7298    }
7299
7300  fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
7301}
7302
7303/* Do any necessary cleanup after a function to restore stack, frame,
7304   and regs.  */
7305
7306static void
7307sparc_flat_function_epilogue (file, size)
7308     FILE *file;
7309     HOST_WIDE_INT size;
7310{
7311  rtx epilogue_delay = current_function_epilogue_delay_list;
7312  int noepilogue = FALSE;
7313
7314  /* This is only for the human reader.  */
7315  fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
7316
7317  /* The epilogue does not depend on any registers, but the stack
7318     registers, so we assume that if we have 1 pending nop, it can be
7319     ignored, and 2 it must be filled (2 nops occur for integer
7320     multiply and divide).  */
7321
7322  size = SPARC_STACK_ALIGN (size);
7323  size = (!current_frame_info.initialized
7324	   ? sparc_flat_compute_frame_size (size)
7325	   : current_frame_info.total_size);
7326
7327  if (size == 0 && epilogue_delay == 0)
7328    {
7329      rtx insn = get_last_insn ();
7330
7331      /* If the last insn was a BARRIER, we don't have to write any code
7332	 because a jump (aka return) was put there.  */
7333      if (GET_CODE (insn) == NOTE)
7334	insn = prev_nonnote_insn (insn);
7335      if (insn && GET_CODE (insn) == BARRIER)
7336	noepilogue = TRUE;
7337    }
7338
7339  if (!noepilogue)
7340    {
7341      unsigned HOST_WIDE_INT reg_offset = current_frame_info.reg_offset;
7342      unsigned HOST_WIDE_INT size1;
7343      const char *const sp_str = reg_names[STACK_POINTER_REGNUM];
7344      const char *const fp_str = reg_names[HARD_FRAME_POINTER_REGNUM];
7345      static const char *const t1_str = "%g1";
7346
7347      /* In the reload sequence, we don't need to fill the load delay
7348	 slots for most of the loads, also see if we can fill the final
7349	 delay slot if not otherwise filled by the reload sequence.  */
7350
7351      if (size > 4095)
7352        {
7353	  fprintf (file, "\tset\t");
7354	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, size);
7355	  fprintf (file, ", %s\n", t1_str);
7356	}
7357
7358      if (frame_pointer_needed)
7359	{
7360	  if (size > 4095)
7361	    fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
7362		     fp_str, t1_str, sp_str, ASM_COMMENT_START);
7363	  else
7364	    fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
7365		     fp_str, (int) size, sp_str, ASM_COMMENT_START);
7366	}
7367
7368      /* Is the entire register save area offsettable from %sp?  */
7369      if (reg_offset < 4096 - 64 * (unsigned) UNITS_PER_WORD)
7370	{
7371	  size1 = 0;
7372	}
7373      else
7374	{
7375	  /* Restore %sp in two steps, but make sure there is always a
7376	     64 byte register save area, and %sp is properly aligned.  */
7377	  /* Amount to increment %sp by, the first time.  */
7378	  size1 = ((reg_offset - 64 - 16) + 15) & -16;
7379	  /* Offset to register save area from %sp.  */
7380	  reg_offset = size1 - reg_offset;
7381
7382	  fprintf (file, "\tset\t");
7383	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, size1);
7384	  fprintf (file, ", %s\n\tadd\t%s, %s, %s\n",
7385		   t1_str, sp_str, t1_str, sp_str);
7386	}
7387
7388      /* We must restore the frame pointer and return address reg first
7389	 because they are treated specially by the prologue output code.  */
7390      if (current_frame_info.gmask & HARD_FRAME_POINTER_MASK)
7391	{
7392	  fprintf (file, "\tld\t[%s+%d], %s\n",
7393		   sp_str, (int) reg_offset, fp_str);
7394	  reg_offset += 4;
7395	}
7396      if (current_frame_info.gmask & RETURN_ADDR_MASK)
7397	{
7398	  fprintf (file, "\tld\t[%s+%d], %s\n",
7399		   sp_str, (int) reg_offset, reg_names[RETURN_ADDR_REGNUM]);
7400	  reg_offset += 4;
7401	}
7402
7403      /* Restore any remaining saved registers.  */
7404      sparc_flat_save_restore (file, sp_str, reg_offset,
7405			       current_frame_info.gmask & ~(HARD_FRAME_POINTER_MASK | RETURN_ADDR_MASK),
7406			       current_frame_info.fmask,
7407			       "ld", "ldd", 0);
7408
7409      /* If we had to increment %sp in two steps, record it so the second
7410	 restoration in the epilogue finishes up.  */
7411      if (size1 > 0)
7412	{
7413	  size -= size1;
7414	  if (size > 4095)
7415	    {
7416	      fprintf (file, "\tset\t");
7417	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, size);
7418	      fprintf (file, ", %s\n", t1_str);
7419	    }
7420	}
7421
7422      if (current_function_returns_struct)
7423	fprintf (file, "\tjmp\t%%o7+12\n");
7424      else
7425	fprintf (file, "\tretl\n");
7426
7427      /* If the only register saved is the return address, we need a
7428	 nop, unless we have an instruction to put into it.  Otherwise
7429	 we don't since reloading multiple registers doesn't reference
7430	 the register being loaded.  */
7431
7432      if (epilogue_delay)
7433	{
7434	  if (size)
7435	    abort ();
7436	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
7437	}
7438
7439      else if (size > 4095)
7440	fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
7441
7442      else if (size > 0)
7443	fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, (int) size, sp_str);
7444
7445      else
7446	fprintf (file, "\tnop\n");
7447    }
7448
7449  /* Reset state info for each function.  */
7450  current_frame_info = zero_frame_info;
7451
7452  sparc_output_deferred_case_vectors ();
7453}
7454
7455/* Define the number of delay slots needed for the function epilogue.
7456
7457   On the sparc, we need a slot if either no stack has been allocated,
7458   or the only register saved is the return register.  */
7459
7460int
7461sparc_flat_epilogue_delay_slots ()
7462{
7463  if (!current_frame_info.initialized)
7464    (void) sparc_flat_compute_frame_size (get_frame_size ());
7465
7466  if (current_frame_info.total_size == 0)
7467    return 1;
7468
7469  return 0;
7470}
7471
7472/* Return true if TRIAL is a valid insn for the epilogue delay slot.
7473   Any single length instruction which doesn't reference the stack or frame
7474   pointer is OK.  */
7475
7476int
7477sparc_flat_eligible_for_epilogue_delay (trial, slot)
7478     rtx trial;
7479     int slot ATTRIBUTE_UNUSED;
7480{
7481  rtx pat = PATTERN (trial);
7482
7483  if (get_attr_length (trial) != 1)
7484    return 0;
7485
7486  if (! reg_mentioned_p (stack_pointer_rtx, pat)
7487      && ! reg_mentioned_p (frame_pointer_rtx, pat))
7488    return 1;
7489
7490  return 0;
7491}
7492
7493/* Adjust the cost of a scheduling dependency.  Return the new cost of
7494   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7495
7496static int
7497supersparc_adjust_cost (insn, link, dep_insn, cost)
7498     rtx insn;
7499     rtx link;
7500     rtx dep_insn;
7501     int cost;
7502{
7503  enum attr_type insn_type;
7504
7505  if (! recog_memoized (insn))
7506    return 0;
7507
7508  insn_type = get_attr_type (insn);
7509
7510  if (REG_NOTE_KIND (link) == 0)
7511    {
7512      /* Data dependency; DEP_INSN writes a register that INSN reads some
7513	 cycles later.  */
7514
7515      /* if a load, then the dependence must be on the memory address;
7516	 add an extra "cycle".  Note that the cost could be two cycles
7517	 if the reg was written late in an instruction group; we ca not tell
7518	 here.  */
7519      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7520	return cost + 3;
7521
7522      /* Get the delay only if the address of the store is the dependence.  */
7523      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7524	{
7525	  rtx pat = PATTERN(insn);
7526	  rtx dep_pat = PATTERN (dep_insn);
7527
7528	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7529	    return cost;  /* This should not happen!  */
7530
7531	  /* The dependency between the two instructions was on the data that
7532	     is being stored.  Assume that this implies that the address of the
7533	     store is not dependent.  */
7534	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7535	    return cost;
7536
7537	  return cost + 3;  /* An approximation.  */
7538	}
7539
7540      /* A shift instruction cannot receive its data from an instruction
7541	 in the same cycle; add a one cycle penalty.  */
7542      if (insn_type == TYPE_SHIFT)
7543	return cost + 3;   /* Split before cascade into shift.  */
7544    }
7545  else
7546    {
7547      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7548	 INSN writes some cycles later.  */
7549
7550      /* These are only significant for the fpu unit; writing a fp reg before
7551         the fpu has finished with it stalls the processor.  */
7552
7553      /* Reusing an integer register causes no problems.  */
7554      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7555	return 0;
7556    }
7557
7558  return cost;
7559}
7560
7561static int
7562hypersparc_adjust_cost (insn, link, dep_insn, cost)
7563     rtx insn;
7564     rtx link;
7565     rtx dep_insn;
7566     int cost;
7567{
7568  enum attr_type insn_type, dep_type;
7569  rtx pat = PATTERN(insn);
7570  rtx dep_pat = PATTERN (dep_insn);
7571
7572  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7573    return cost;
7574
7575  insn_type = get_attr_type (insn);
7576  dep_type = get_attr_type (dep_insn);
7577
7578  switch (REG_NOTE_KIND (link))
7579    {
7580    case 0:
7581      /* Data dependency; DEP_INSN writes a register that INSN reads some
7582	 cycles later.  */
7583
7584      switch (insn_type)
7585	{
7586	case TYPE_STORE:
7587	case TYPE_FPSTORE:
7588	  /* Get the delay iff the address of the store is the dependence.  */
7589	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7590	    return cost;
7591
7592	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7593	    return cost;
7594	  return cost + 3;
7595
7596	case TYPE_LOAD:
7597	case TYPE_SLOAD:
7598	case TYPE_FPLOAD:
7599	  /* If a load, then the dependence must be on the memory address.  If
7600	     the addresses aren't equal, then it might be a false dependency */
7601	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7602	    {
7603	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7604		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7605		  || GET_CODE (SET_SRC (pat)) != MEM
7606		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7607				    XEXP (SET_SRC (pat), 0)))
7608		return cost + 2;
7609
7610	      return cost + 8;
7611	    }
7612	  break;
7613
7614	case TYPE_BRANCH:
7615	  /* Compare to branch latency is 0.  There is no benefit from
7616	     separating compare and branch.  */
7617	  if (dep_type == TYPE_COMPARE)
7618	    return 0;
7619	  /* Floating point compare to branch latency is less than
7620	     compare to conditional move.  */
7621	  if (dep_type == TYPE_FPCMP)
7622	    return cost - 1;
7623	  break;
7624	default:
7625	  break;
7626	}
7627	break;
7628
7629    case REG_DEP_ANTI:
7630      /* Anti-dependencies only penalize the fpu unit.  */
7631      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7632        return 0;
7633      break;
7634
7635    default:
7636      break;
7637    }
7638
7639  return cost;
7640}
7641
7642static int
7643ultrasparc_adjust_cost (insn, link, dep_insn, cost)
7644     rtx insn;
7645     rtx link;
7646     rtx dep_insn;
7647     int cost;
7648{
7649  enum attr_type insn_type, dep_type;
7650  rtx pat = PATTERN(insn);
7651  rtx dep_pat = PATTERN (dep_insn);
7652
7653  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7654    return cost;
7655
7656  insn_type = get_attr_type (insn);
7657  dep_type = get_attr_type (dep_insn);
7658
7659  /* Nothing issues in parallel with integer multiplies, so
7660     mark as zero cost since the scheduler can not do anything
7661     about it.  */
7662  if (insn_type == TYPE_IMUL || insn_type == TYPE_IDIV)
7663    return 0;
7664
7665#define SLOW_FP(dep_type) \
7666(dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \
7667 dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
7668
7669  switch (REG_NOTE_KIND (link))
7670    {
7671    case 0:
7672      /* Data dependency; DEP_INSN writes a register that INSN reads some
7673	 cycles later.  */
7674
7675      if (dep_type == TYPE_CMOVE)
7676	{
7677	  /* Instructions that read the result of conditional moves cannot
7678	     be in the same group or the following group.  */
7679	  return cost + 1;
7680	}
7681
7682      switch (insn_type)
7683	{
7684	  /* UltraSPARC can dual issue a store and an instruction setting
7685	     the value stored, except for divide and square root.  */
7686	case TYPE_FPSTORE:
7687	  if (! SLOW_FP (dep_type))
7688	    return 0;
7689	  return cost;
7690
7691	case TYPE_STORE:
7692	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7693	    return cost;
7694
7695	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7696	    /* The dependency between the two instructions is on the data
7697	       that is being stored.  Assume that the address of the store
7698	       is not also dependent.  */
7699	    return 0;
7700	  return cost;
7701
7702	case TYPE_LOAD:
7703	case TYPE_SLOAD:
7704	case TYPE_FPLOAD:
7705	  /* A load does not return data until at least 11 cycles after
7706	     a store to the same location.  3 cycles are accounted for
7707	     in the load latency; add the other 8 here.  */
7708	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7709	    {
7710	      /* If the addresses are not equal this may be a false
7711		 dependency because pointer aliasing could not be
7712		 determined.  Add only 2 cycles in that case.  2 is
7713		 an arbitrary compromise between 8, which would cause
7714		 the scheduler to generate worse code elsewhere to
7715		 compensate for a dependency which might not really
7716		 exist, and 0.  */
7717	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7718		  || GET_CODE (SET_SRC (pat)) != MEM
7719		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7720		  || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
7721				    XEXP (SET_DEST (dep_pat), 0)))
7722		return cost + 2;
7723
7724	      return cost + 8;
7725	    }
7726	  return cost;
7727
7728	case TYPE_BRANCH:
7729	  /* Compare to branch latency is 0.  There is no benefit from
7730	     separating compare and branch.  */
7731	  if (dep_type == TYPE_COMPARE)
7732	    return 0;
7733	  /* Floating point compare to branch latency is less than
7734	     compare to conditional move.  */
7735	  if (dep_type == TYPE_FPCMP)
7736	    return cost - 1;
7737	  return cost;
7738
7739	case TYPE_FPCMOVE:
7740	  /* FMOVR class instructions can not issue in the same cycle
7741	     or the cycle after an instruction which writes any
7742	     integer register.  Model this as cost 2 for dependent
7743	     instructions.  */
7744	  if (dep_type == TYPE_IALU
7745	      && cost < 2)
7746	    return 2;
7747	  /* Otherwise check as for integer conditional moves.  */
7748
7749	case TYPE_CMOVE:
7750	  /* Conditional moves involving integer registers wait until
7751	     3 cycles after loads return data.  The interlock applies
7752	     to all loads, not just dependent loads, but that is hard
7753	     to model.  */
7754	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
7755	    return cost + 3;
7756	  return cost;
7757
7758	default:
7759	  break;
7760	}
7761      break;
7762
7763    case REG_DEP_ANTI:
7764      /* Divide and square root lock destination registers for full latency.  */
7765      if (! SLOW_FP (dep_type))
7766	return 0;
7767      break;
7768
7769    case REG_DEP_OUTPUT:
7770      /* IEU and FPU instruction that have the same destination
7771	 register cannot be grouped together.  */
7772      return cost + 1;
7773
7774    default:
7775      break;
7776    }
7777
7778  /* Other costs not accounted for:
7779     - Single precision floating point loads lock the other half of
7780       the even/odd register pair.
7781     - Several hazards associated with ldd/std are ignored because these
7782       instructions are rarely generated for V9.
7783     - The floating point pipeline can not have both a single and double
7784       precision operation active at the same time.  Format conversions
7785       and graphics instructions are given honorary double precision status.
7786     - call and jmpl are always the first instruction in a group.  */
7787
7788  return cost;
7789
7790#undef SLOW_FP
7791}
7792
7793static int
7794sparc_adjust_cost(insn, link, dep, cost)
7795     rtx insn;
7796     rtx link;
7797     rtx dep;
7798     int cost;
7799{
7800  switch (sparc_cpu)
7801    {
7802    case PROCESSOR_SUPERSPARC:
7803      cost = supersparc_adjust_cost (insn, link, dep, cost);
7804      break;
7805    case PROCESSOR_HYPERSPARC:
7806    case PROCESSOR_SPARCLITE86X:
7807      cost = hypersparc_adjust_cost (insn, link, dep, cost);
7808      break;
7809    case PROCESSOR_ULTRASPARC:
7810      cost = ultrasparc_adjust_cost (insn, link, dep, cost);
7811      break;
7812    default:
7813      break;
7814    }
7815  return cost;
7816}
7817
7818/* This describes the state of the UltraSPARC pipeline during
7819   instruction scheduling.  */
7820
7821#define TMASK(__x)	((unsigned)1 << ((int)(__x)))
7822#define UMASK(__x)	((unsigned)1 << ((int)(__x)))
7823
7824enum ultra_code { NONE=0, /* no insn at all				*/
7825		  IEU0,   /* shifts and conditional moves		*/
7826		  IEU1,   /* condition code setting insns, calls+jumps	*/
7827		  IEUN,   /* all other single cycle ieu insns		*/
7828		  LSU,    /* loads and stores				*/
7829		  CTI,    /* branches					*/
7830		  FPM,    /* FPU pipeline 1, multiplies and divides	*/
7831		  FPA,    /* FPU pipeline 2, all other operations	*/
7832		  SINGLE, /* single issue instructions			*/
7833		  NUM_ULTRA_CODES };
7834
7835static enum ultra_code ultra_code_from_mask PARAMS ((int));
7836static void ultra_schedule_insn PARAMS ((rtx *, rtx *, int, enum ultra_code));
7837
7838static const char *const ultra_code_names[NUM_ULTRA_CODES] = {
7839  "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
7840  "FPM", "FPA", "SINGLE" };
7841
7842struct ultrasparc_pipeline_state {
7843  /* The insns in this group.  */
7844  rtx group[4];
7845
7846  /* The code for each insn.  */
7847  enum ultra_code codes[4];
7848
7849  /* Which insns in this group have been committed by the
7850     scheduler.  This is how we determine how many more
7851     can issue this cycle.  */
7852  char commit[4];
7853
7854  /* How many insns in this group.  */
7855  char group_size;
7856
7857  /* Mask of free slots still in this group.  */
7858  char free_slot_mask;
7859
7860  /* The slotter uses the following to determine what other
7861     insn types can still make their way into this group.  */
7862  char contents [NUM_ULTRA_CODES];
7863  char num_ieu_insns;
7864};
7865
7866#define ULTRA_NUM_HIST	8
7867static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
7868static int ultra_cur_hist;
7869static int ultra_cycles_elapsed;
7870
7871#define ultra_pipe	(ultra_pipe_hist[ultra_cur_hist])
7872
7873/* Given TYPE_MASK compute the ultra_code it has.  */
7874static enum ultra_code
7875ultra_code_from_mask (type_mask)
7876     int type_mask;
7877{
7878  if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
7879    return IEU0;
7880  else if (type_mask & (TMASK (TYPE_COMPARE) |
7881			TMASK (TYPE_CALL) |
7882			TMASK (TYPE_SIBCALL) |
7883			TMASK (TYPE_UNCOND_BRANCH)))
7884    return IEU1;
7885  else if (type_mask & TMASK (TYPE_IALU))
7886    return IEUN;
7887  else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7888			TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7889			TMASK (TYPE_FPSTORE)))
7890    return LSU;
7891  else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
7892			TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) |
7893			TMASK (TYPE_FPSQRTD)))
7894    return FPM;
7895  else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7896			TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
7897    return FPA;
7898  else if (type_mask & TMASK (TYPE_BRANCH))
7899    return CTI;
7900
7901  return SINGLE;
7902}
7903
7904/* Check INSN (a conditional move) and make sure that it's
7905   results are available at this cycle.  Return 1 if the
7906   results are in fact ready.  */
7907static int
7908ultra_cmove_results_ready_p (insn)
7909     rtx insn;
7910{
7911  struct ultrasparc_pipeline_state *up;
7912  int entry, slot;
7913
7914  /* If this got dispatched in the previous
7915     group, the results are not ready.  */
7916  entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
7917  up = &ultra_pipe_hist[entry];
7918  slot = 4;
7919  while (--slot >= 0)
7920    if (up->group[slot] == insn)
7921      return 0;
7922
7923  return 1;
7924}
7925
7926/* Walk backwards in pipeline history looking for FPU
7927   operations which use a mode different than FPMODE and
7928   will create a stall if an insn using FPMODE were to be
7929   dispatched this cycle.  */
7930static int
7931ultra_fpmode_conflict_exists (fpmode)
7932     enum machine_mode fpmode;
7933{
7934  int hist_ent;
7935  int hist_lim;
7936
7937  hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
7938  if (ultra_cycles_elapsed < 4)
7939    hist_lim = ultra_cycles_elapsed;
7940  else
7941    hist_lim = 4;
7942  while (hist_lim > 0)
7943    {
7944      struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
7945      int slot = 4;
7946
7947      while (--slot >= 0)
7948	{
7949	  rtx insn = up->group[slot];
7950	  enum machine_mode this_mode;
7951	  rtx pat;
7952
7953	  if (! insn
7954	      || GET_CODE (insn) != INSN
7955	      || (pat = PATTERN (insn)) == 0
7956	      || GET_CODE (pat) != SET)
7957	    continue;
7958
7959	  this_mode = GET_MODE (SET_DEST (pat));
7960	  if ((this_mode != SFmode
7961	       && this_mode != DFmode)
7962	      || this_mode == fpmode)
7963	    continue;
7964
7965	  /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
7966	     we will get a stall.  Loads and stores are independent
7967	     of these rules.  */
7968	  if (GET_CODE (SET_SRC (pat)) != ABS
7969	      && GET_CODE (SET_SRC (pat)) != NEG
7970	      && ((TMASK (get_attr_type (insn)) &
7971		   (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
7972		    TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) |
7973		    TMASK (TYPE_FPSQRTD) |
7974                    TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
7975	    return 1;
7976	}
7977      hist_lim--;
7978      hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1);
7979    }
7980
7981  /* No conflicts, safe to dispatch.  */
7982  return 0;
7983}
7984
7985/* Find an instruction in LIST which has one of the
7986   type attributes enumerated in TYPE_MASK.  START
7987   says where to begin the search.
7988
7989   NOTE: This scheme depends upon the fact that we
7990         have less than 32 distinct type attributes.  */
7991
7992static int ultra_types_avail;
7993
7994static rtx *
7995ultra_find_type (type_mask, list, start)
7996     int type_mask;
7997     rtx *list;
7998     int start;
7999{
8000  int i;
8001
8002  /* Short circuit if no such insn exists in the ready
8003     at the moment.  */
8004  if ((type_mask & ultra_types_avail) == 0)
8005    return 0;
8006
8007  for (i = start; i >= 0; i--)
8008    {
8009      rtx insn = list[i];
8010
8011      if (recog_memoized (insn) >= 0
8012	  && (TMASK(get_attr_type (insn)) & type_mask))
8013	{
8014	  enum machine_mode fpmode = SFmode;
8015	  rtx pat = 0;
8016	  int slot;
8017	  int check_depend = 0;
8018	  int check_fpmode_conflict = 0;
8019
8020	  if (GET_CODE (insn) == INSN
8021	      && (pat = PATTERN(insn)) != 0
8022	      && GET_CODE (pat) == SET
8023	      && !(type_mask & (TMASK (TYPE_STORE) |
8024				TMASK (TYPE_FPSTORE))))
8025	    {
8026	      check_depend = 1;
8027	      if (GET_MODE (SET_DEST (pat)) == SFmode
8028		  || GET_MODE (SET_DEST (pat)) == DFmode)
8029		{
8030		  fpmode = GET_MODE (SET_DEST (pat));
8031		  check_fpmode_conflict = 1;
8032		}
8033	    }
8034
8035	  slot = 4;
8036	  while(--slot >= 0)
8037	    {
8038	      rtx slot_insn = ultra_pipe.group[slot];
8039	      rtx slot_pat;
8040
8041	      /* Already issued, bad dependency, or FPU
8042		 mode conflict.  */
8043	      if (slot_insn != 0
8044		  && (slot_pat = PATTERN (slot_insn)) != 0
8045		  && ((insn == slot_insn)
8046		      || (check_depend == 1
8047			  && GET_CODE (slot_insn) == INSN
8048			  && GET_CODE (slot_pat) == SET
8049			  && ((GET_CODE (SET_DEST (slot_pat)) == REG
8050			       && GET_CODE (SET_SRC (pat)) == REG
8051			       && REGNO (SET_DEST (slot_pat)) ==
8052			            REGNO (SET_SRC (pat)))
8053			      || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
8054				  && GET_CODE (SET_SRC (pat)) == SUBREG
8055				  && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
8056				       REGNO (SUBREG_REG (SET_SRC (pat)))
8057				  && SUBREG_BYTE (SET_DEST (slot_pat)) ==
8058				       SUBREG_BYTE (SET_SRC (pat)))))
8059		      || (check_fpmode_conflict == 1
8060			  && GET_CODE (slot_insn) == INSN
8061			  && GET_CODE (slot_pat) == SET
8062			  && (GET_MODE (SET_DEST (slot_pat)) == SFmode
8063			      || GET_MODE (SET_DEST (slot_pat)) == DFmode)
8064			  && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
8065		goto next;
8066	    }
8067
8068	  /* Check for peculiar result availability and dispatch
8069	     interference situations.  */
8070	  if (pat != 0
8071	      && ultra_cycles_elapsed > 0)
8072	    {
8073	      rtx link;
8074
8075	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
8076		{
8077		  rtx link_insn = XEXP (link, 0);
8078		  if (GET_CODE (link_insn) == INSN
8079		      && recog_memoized (link_insn) >= 0
8080		      && (TMASK (get_attr_type (link_insn)) &
8081			  (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
8082		      && ! ultra_cmove_results_ready_p (link_insn))
8083		    goto next;
8084		}
8085
8086	      if (check_fpmode_conflict
8087		  && ultra_fpmode_conflict_exists (fpmode))
8088		goto next;
8089	    }
8090
8091	  return &list[i];
8092	}
8093    next:
8094      ;
8095    }
8096  return 0;
8097}
8098
8099static void
8100ultra_build_types_avail (ready, n_ready)
8101  rtx *ready;
8102  int n_ready;
8103{
8104  int i = n_ready - 1;
8105
8106  ultra_types_avail = 0;
8107  while(i >= 0)
8108    {
8109      rtx insn = ready[i];
8110
8111      if (recog_memoized (insn) >= 0)
8112	ultra_types_avail |= TMASK (get_attr_type (insn));
8113
8114      i -= 1;
8115    }
8116}
8117
8118/* Place insn pointed to my IP into the pipeline.
8119   Make element THIS of READY be that insn if it
8120   is not already.  TYPE indicates the pipeline class
8121   this insn falls into.  */
8122static void
8123ultra_schedule_insn (ip, ready, this, type)
8124     rtx *ip;
8125     rtx *ready;
8126     int this;
8127     enum ultra_code type;
8128{
8129  int pipe_slot;
8130  char mask = ultra_pipe.free_slot_mask;
8131  rtx temp;
8132
8133  /* Obtain free slot.  */
8134  for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
8135    if ((mask & (1 << pipe_slot)) != 0)
8136      break;
8137  if (pipe_slot == 4)
8138    abort ();
8139
8140  /* In it goes, and it hasn't been committed yet.  */
8141  ultra_pipe.group[pipe_slot] = *ip;
8142  ultra_pipe.codes[pipe_slot] = type;
8143  ultra_pipe.contents[type] = 1;
8144  if (UMASK (type) &
8145      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
8146    ultra_pipe.num_ieu_insns += 1;
8147
8148  ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
8149  ultra_pipe.group_size += 1;
8150  ultra_pipe.commit[pipe_slot] = 0;
8151
8152  /* Update ready list.  */
8153  temp = *ip;
8154  while (ip != &ready[this])
8155    {
8156      ip[0] = ip[1];
8157      ++ip;
8158    }
8159  *ip = temp;
8160}
8161
8162/* Advance to the next pipeline group.  */
8163static void
8164ultra_flush_pipeline ()
8165{
8166  ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1);
8167  ultra_cycles_elapsed += 1;
8168  memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
8169  ultra_pipe.free_slot_mask = 0xf;
8170}
8171
8172/* Init our data structures for this current block.  */
8173static void
8174ultrasparc_sched_init ()
8175{
8176  memset ((char *) ultra_pipe_hist, 0, sizeof ultra_pipe_hist);
8177  ultra_cur_hist = 0;
8178  ultra_cycles_elapsed = 0;
8179  ultra_pipe.free_slot_mask = 0xf;
8180}
8181
8182static void
8183sparc_sched_init (dump, sched_verbose, max_ready)
8184     FILE *dump ATTRIBUTE_UNUSED;
8185     int sched_verbose ATTRIBUTE_UNUSED;
8186     int max_ready ATTRIBUTE_UNUSED;
8187{
8188  if (sparc_cpu == PROCESSOR_ULTRASPARC)
8189    ultrasparc_sched_init ();
8190}
8191
8192/* INSN has been scheduled, update pipeline commit state
8193   and return how many instructions are still to be
8194   scheduled in this group.  */
8195static int
8196ultrasparc_variable_issue (insn)
8197     rtx insn;
8198{
8199  struct ultrasparc_pipeline_state *up = &ultra_pipe;
8200  int i, left_to_fire;
8201
8202  left_to_fire = 0;
8203  for (i = 0; i < 4; i++)
8204    {
8205      if (up->group[i] == 0)
8206	continue;
8207
8208      if (up->group[i] == insn)
8209	{
8210	  up->commit[i] = 1;
8211	}
8212      else if (! up->commit[i])
8213	left_to_fire++;
8214    }
8215
8216  return left_to_fire;
8217}
8218
8219static int
8220sparc_variable_issue (dump, sched_verbose, insn, cim)
8221     FILE *dump ATTRIBUTE_UNUSED;
8222     int sched_verbose ATTRIBUTE_UNUSED;
8223     rtx insn;
8224     int cim;
8225{
8226  if (sparc_cpu == PROCESSOR_ULTRASPARC)
8227    return ultrasparc_variable_issue (insn);
8228  else
8229    return cim - 1;
8230}
8231
8232/* In actual_hazard_this_instance, we may have yanked some
8233   instructions from the ready list due to conflict cost
8234   adjustments.  If so, and such an insn was in our pipeline
8235   group, remove it and update state.  */
8236static void
8237ultra_rescan_pipeline_state (ready, n_ready)
8238     rtx *ready;
8239     int n_ready;
8240{
8241  struct ultrasparc_pipeline_state *up = &ultra_pipe;
8242  int i;
8243
8244  for (i = 0; i < 4; i++)
8245    {
8246      rtx insn = up->group[i];
8247      int j;
8248
8249      if (! insn)
8250	continue;
8251
8252      /* If it has been committed, then it was removed from
8253	 the ready list because it was actually scheduled,
8254	 and that is not the case we are searching for here.  */
8255      if (up->commit[i] != 0)
8256	continue;
8257
8258      for (j = n_ready - 1; j >= 0; j--)
8259	if (ready[j] == insn)
8260	  break;
8261
8262      /* If we didn't find it, toss it.  */
8263      if (j < 0)
8264	{
8265	  enum ultra_code ucode = up->codes[i];
8266
8267	  up->group[i] = 0;
8268	  up->codes[i] = NONE;
8269	  up->contents[ucode] = 0;
8270	  if (UMASK (ucode) &
8271	      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
8272	    up->num_ieu_insns -= 1;
8273
8274	  up->free_slot_mask |= (1 << i);
8275	  up->group_size -= 1;
8276	  up->commit[i] = 0;
8277	}
8278    }
8279}
8280
8281static void
8282ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
8283     FILE *dump;
8284     int sched_verbose;
8285     rtx *ready;
8286     int n_ready;
8287{
8288  struct ultrasparc_pipeline_state *up = &ultra_pipe;
8289  int i, this_insn;
8290
8291  if (sched_verbose)
8292    {
8293      int n;
8294
8295      fprintf (dump, "\n;;\tUltraSPARC Looking at [");
8296      for (n = n_ready - 1; n >= 0; n--)
8297	{
8298	  rtx insn = ready[n];
8299	  enum ultra_code ucode;
8300
8301	  if (recog_memoized (insn) < 0)
8302	    continue;
8303	  ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
8304	  if (n != 0)
8305	    fprintf (dump, "%s(%d) ",
8306		     ultra_code_names[ucode],
8307		     INSN_UID (insn));
8308	  else
8309	    fprintf (dump, "%s(%d)",
8310		     ultra_code_names[ucode],
8311		     INSN_UID (insn));
8312	}
8313      fprintf (dump, "]\n");
8314    }
8315
8316  this_insn = n_ready - 1;
8317
8318  /* Skip over junk we don't understand.  */
8319  while ((this_insn >= 0)
8320	 && recog_memoized (ready[this_insn]) < 0)
8321    this_insn--;
8322
8323  ultra_build_types_avail (ready, this_insn + 1);
8324
8325  while (this_insn >= 0) {
8326    int old_group_size = up->group_size;
8327
8328    if (up->group_size != 0)
8329      {
8330	int num_committed;
8331
8332	num_committed = (up->commit[0] + up->commit[1] +
8333			 up->commit[2] + up->commit[3]);
8334	/* If nothing has been commited from our group, or all of
8335	   them have.  Clear out the (current cycle's) pipeline
8336	   state and start afresh.  */
8337	if (num_committed == 0
8338	    || num_committed == up->group_size)
8339	  {
8340	    ultra_flush_pipeline ();
8341	    up = &ultra_pipe;
8342	    old_group_size = 0;
8343	  }
8344	else
8345	  {
8346	    /* OK, some ready list insns got requeued and thus removed
8347	       from the ready list.  Account for this fact.  */
8348	    ultra_rescan_pipeline_state (ready, n_ready);
8349
8350	    /* Something "changed", make this look like a newly
8351	       formed group so the code at the end of the loop
8352	       knows that progress was in fact made.  */
8353	    if (up->group_size != old_group_size)
8354	      old_group_size = 0;
8355	  }
8356      }
8357
8358    if (up->group_size == 0)
8359      {
8360	/* If the pipeline is (still) empty and we have any single
8361	   group insns, get them out now as this is a good time.  */
8362	rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_IDIV) |
8363				    TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
8364				    TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
8365				   ready, this_insn);
8366	if (ip)
8367	  {
8368	    ultra_schedule_insn (ip, ready, this_insn, SINGLE);
8369	    break;
8370	  }
8371
8372	/* If we are not in the process of emptying out the pipe, try to
8373	   obtain an instruction which must be the first in it's group.  */
8374	ip = ultra_find_type ((TMASK (TYPE_CALL) |
8375			       TMASK (TYPE_SIBCALL) |
8376			       TMASK (TYPE_CALL_NO_DELAY_SLOT) |
8377			       TMASK (TYPE_UNCOND_BRANCH)),
8378			      ready, this_insn);
8379	if (ip)
8380	  {
8381	    ultra_schedule_insn (ip, ready, this_insn, IEU1);
8382	    this_insn--;
8383	  }
8384	else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
8385					 TMASK (TYPE_FPDIVD) |
8386					 TMASK (TYPE_FPSQRTS) |
8387					 TMASK (TYPE_FPSQRTD)),
8388					ready, this_insn)) != 0)
8389	  {
8390	    ultra_schedule_insn (ip, ready, this_insn, FPM);
8391	    this_insn--;
8392	  }
8393      }
8394
8395    /* Try to fill the integer pipeline.  First, look for an IEU0 specific
8396       operation.  We can't do more IEU operations if the first 3 slots are
8397       all full or we have dispatched two IEU insns already.  */
8398    if ((up->free_slot_mask & 0x7) != 0
8399	&& up->num_ieu_insns < 2
8400	&& up->contents[IEU0] == 0
8401	&& up->contents[IEUN] == 0)
8402      {
8403	rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
8404	if (ip)
8405	  {
8406	    ultra_schedule_insn (ip, ready, this_insn, IEU0);
8407	    this_insn--;
8408	  }
8409      }
8410
8411    /* If we can, try to find an IEU1 specific or an unnamed
8412       IEU instruction.  */
8413    if ((up->free_slot_mask & 0x7) != 0
8414	&& up->num_ieu_insns < 2)
8415      {
8416	rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) |
8417				    (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
8418				   ready, this_insn);
8419	if (ip)
8420	  {
8421	    rtx insn = *ip;
8422
8423	    ultra_schedule_insn (ip, ready, this_insn,
8424				 (!up->contents[IEU1]
8425				  && get_attr_type (insn) == TYPE_COMPARE)
8426				 ? IEU1 : IEUN);
8427	    this_insn--;
8428	  }
8429      }
8430
8431    /* If only one IEU insn has been found, try to find another unnamed
8432       IEU operation or an IEU1 specific one.  */
8433    if ((up->free_slot_mask & 0x7) != 0
8434	&& up->num_ieu_insns < 2)
8435      {
8436	rtx *ip;
8437	int tmask = TMASK (TYPE_IALU);
8438
8439	if (!up->contents[IEU1])
8440	  tmask |= TMASK (TYPE_COMPARE);
8441	ip = ultra_find_type (tmask, ready, this_insn);
8442	if (ip)
8443	  {
8444	    rtx insn = *ip;
8445
8446	    ultra_schedule_insn (ip, ready, this_insn,
8447				 (!up->contents[IEU1]
8448				  && get_attr_type (insn) == TYPE_COMPARE)
8449				 ? IEU1 : IEUN);
8450	    this_insn--;
8451	  }
8452      }
8453
8454    /* Try for a load or store, but such an insn can only be issued
8455       if it is within' one of the first 3 slots.  */
8456    if ((up->free_slot_mask & 0x7) != 0
8457        && up->contents[LSU] == 0)
8458      {
8459	rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
8460				   TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
8461				   TMASK (TYPE_FPSTORE)), ready, this_insn);
8462	if (ip)
8463	  {
8464	    ultra_schedule_insn (ip, ready, this_insn, LSU);
8465	    this_insn--;
8466	  }
8467      }
8468
8469    /* Now find FPU operations, first FPM class.  But not divisions or
8470       square-roots because those will break the group up.  Unlike all
8471       the previous types, these can go in any slot.  */
8472    if (up->free_slot_mask != 0
8473	&& up->contents[FPM] == 0)
8474      {
8475	rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
8476	if (ip)
8477	  {
8478	    ultra_schedule_insn (ip, ready, this_insn, FPM);
8479	    this_insn--;
8480	  }
8481      }
8482
8483    /* Continue on with FPA class if we have not filled the group already.  */
8484    if (up->free_slot_mask != 0
8485	&& up->contents[FPA] == 0)
8486      {
8487	rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
8488				    TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
8489				   ready, this_insn);
8490	if (ip)
8491	  {
8492	    ultra_schedule_insn (ip, ready, this_insn, FPA);
8493	    this_insn--;
8494	  }
8495      }
8496
8497    /* Finally, maybe stick a branch in here.  */
8498    if (up->free_slot_mask != 0
8499	&& up->contents[CTI] == 0)
8500      {
8501	rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
8502
8503	/* Try to slip in a branch only if it is one of the
8504	   next 2 in the ready list.  */
8505	if (ip && ((&ready[this_insn] - ip) < 2))
8506	  {
8507	    ultra_schedule_insn (ip, ready, this_insn, CTI);
8508	    this_insn--;
8509	  }
8510      }
8511
8512    up->group_size = 0;
8513    for (i = 0; i < 4; i++)
8514      if ((up->free_slot_mask & (1 << i)) == 0)
8515	up->group_size++;
8516
8517    /* See if we made any progress...  */
8518    if (old_group_size != up->group_size)
8519      break;
8520
8521    /* Clean out the (current cycle's) pipeline state
8522       and try once more.  If we placed no instructions
8523       into the pipeline at all, it means a real hard
8524       conflict exists with some earlier issued instruction
8525       so we must advance to the next cycle to clear it up.  */
8526    if (up->group_size == 0)
8527      {
8528	ultra_flush_pipeline ();
8529	up = &ultra_pipe;
8530      }
8531    else
8532      {
8533	memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
8534	ultra_pipe.free_slot_mask = 0xf;
8535      }
8536  }
8537
8538  if (sched_verbose)
8539    {
8540      int n, gsize;
8541
8542      fprintf (dump, ";;\tUltraSPARC Launched   [");
8543      gsize = up->group_size;
8544      for (n = 0; n < 4; n++)
8545	{
8546	  rtx insn = up->group[n];
8547
8548	  if (! insn)
8549	    continue;
8550
8551	  gsize -= 1;
8552	  if (gsize != 0)
8553	    fprintf (dump, "%s(%d) ",
8554		     ultra_code_names[up->codes[n]],
8555		     INSN_UID (insn));
8556	  else
8557	    fprintf (dump, "%s(%d)",
8558		     ultra_code_names[up->codes[n]],
8559		     INSN_UID (insn));
8560	}
8561      fprintf (dump, "]\n");
8562    }
8563}
8564
8565static int
8566sparc_sched_reorder (dump, sched_verbose, ready, n_readyp, clock)
8567     FILE *dump;
8568     int sched_verbose;
8569     rtx *ready;
8570     int *n_readyp;
8571     int clock ATTRIBUTE_UNUSED;
8572{
8573  if (sparc_cpu == PROCESSOR_ULTRASPARC)
8574    ultrasparc_sched_reorder (dump, sched_verbose, ready, *n_readyp);
8575  return sparc_issue_rate ();
8576}
8577
8578static int
8579sparc_issue_rate ()
8580{
8581  switch (sparc_cpu)
8582    {
8583    default:
8584      return 1;
8585    case PROCESSOR_V9:
8586      /* Assume V9 processors are capable of at least dual-issue.  */
8587      return 2;
8588    case PROCESSOR_SUPERSPARC:
8589      return 3;
8590    case PROCESSOR_HYPERSPARC:
8591    case PROCESSOR_SPARCLITE86X:
8592      return 2;
8593    case PROCESSOR_ULTRASPARC:
8594      return 4;
8595    }
8596}
8597
8598static int
8599set_extends (insn)
8600     rtx insn;
8601{
8602  register rtx pat = PATTERN (insn);
8603
8604  switch (GET_CODE (SET_SRC (pat)))
8605    {
8606      /* Load and some shift instructions zero extend.  */
8607    case MEM:
8608    case ZERO_EXTEND:
8609      /* sethi clears the high bits */
8610    case HIGH:
8611      /* LO_SUM is used with sethi.  sethi cleared the high
8612	 bits and the values used with lo_sum are positive */
8613    case LO_SUM:
8614      /* Store flag stores 0 or 1 */
8615    case LT: case LTU:
8616    case GT: case GTU:
8617    case LE: case LEU:
8618    case GE: case GEU:
8619    case EQ:
8620    case NE:
8621      return 1;
8622    case AND:
8623      {
8624	rtx op0 = XEXP (SET_SRC (pat), 0);
8625	rtx op1 = XEXP (SET_SRC (pat), 1);
8626	if (GET_CODE (op1) == CONST_INT)
8627	  return INTVAL (op1) >= 0;
8628	if (GET_CODE (op0) != REG)
8629	  return 0;
8630	if (sparc_check_64 (op0, insn) == 1)
8631	  return 1;
8632	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8633      }
8634    case IOR:
8635    case XOR:
8636      {
8637	rtx op0 = XEXP (SET_SRC (pat), 0);
8638	rtx op1 = XEXP (SET_SRC (pat), 1);
8639	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
8640	  return 0;
8641	if (GET_CODE (op1) == CONST_INT)
8642	  return INTVAL (op1) >= 0;
8643	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8644      }
8645    case ASHIFT:
8646    case LSHIFTRT:
8647      return GET_MODE (SET_SRC (pat)) == SImode;
8648      /* Positive integers leave the high bits zero.  */
8649    case CONST_DOUBLE:
8650      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
8651    case CONST_INT:
8652      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
8653    case ASHIFTRT:
8654    case SIGN_EXTEND:
8655      return - (GET_MODE (SET_SRC (pat)) == SImode);
8656    case REG:
8657      return sparc_check_64 (SET_SRC (pat), insn);
8658    default:
8659      return 0;
8660    }
8661}
8662
8663/* We _ought_ to have only one kind per function, but...  */
8664static rtx sparc_addr_diff_list;
8665static rtx sparc_addr_list;
8666
8667void
8668sparc_defer_case_vector (lab, vec, diff)
8669     rtx lab, vec;
8670     int diff;
8671{
8672  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
8673  if (diff)
8674    sparc_addr_diff_list
8675      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
8676  else
8677    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
8678}
8679
8680static void
8681sparc_output_addr_vec (vec)
8682     rtx vec;
8683{
8684  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8685  int idx, vlen = XVECLEN (body, 0);
8686
8687#ifdef ASM_OUTPUT_ADDR_VEC_START
8688  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8689#endif
8690
8691#ifdef ASM_OUTPUT_CASE_LABEL
8692  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8693			 NEXT_INSN (lab));
8694#else
8695  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8696#endif
8697
8698  for (idx = 0; idx < vlen; idx++)
8699    {
8700      ASM_OUTPUT_ADDR_VEC_ELT
8701	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
8702    }
8703
8704#ifdef ASM_OUTPUT_ADDR_VEC_END
8705  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8706#endif
8707}
8708
8709static void
8710sparc_output_addr_diff_vec (vec)
8711     rtx vec;
8712{
8713  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8714  rtx base = XEXP (XEXP (body, 0), 0);
8715  int idx, vlen = XVECLEN (body, 1);
8716
8717#ifdef ASM_OUTPUT_ADDR_VEC_START
8718  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8719#endif
8720
8721#ifdef ASM_OUTPUT_CASE_LABEL
8722  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8723			 NEXT_INSN (lab));
8724#else
8725  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8726#endif
8727
8728  for (idx = 0; idx < vlen; idx++)
8729    {
8730      ASM_OUTPUT_ADDR_DIFF_ELT
8731        (asm_out_file,
8732         body,
8733         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
8734         CODE_LABEL_NUMBER (base));
8735    }
8736
8737#ifdef ASM_OUTPUT_ADDR_VEC_END
8738  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8739#endif
8740}
8741
8742static void
8743sparc_output_deferred_case_vectors ()
8744{
8745  rtx t;
8746  int align;
8747
8748  if (sparc_addr_list == NULL_RTX
8749      && sparc_addr_diff_list == NULL_RTX)
8750    return;
8751
8752  /* Align to cache line in the function's code section.  */
8753  function_section (current_function_decl);
8754
8755  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
8756  if (align > 0)
8757    ASM_OUTPUT_ALIGN (asm_out_file, align);
8758
8759  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
8760    sparc_output_addr_vec (XEXP (t, 0));
8761  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
8762    sparc_output_addr_diff_vec (XEXP (t, 0));
8763
8764  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
8765}
8766
8767/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
8768   unknown.  Return 1 if the high bits are zero, -1 if the register is
8769   sign extended.  */
8770int
8771sparc_check_64 (x, insn)
8772     rtx x, insn;
8773{
8774  /* If a register is set only once it is safe to ignore insns this
8775     code does not know how to handle.  The loop will either recognize
8776     the single set and return the correct value or fail to recognize
8777     it and return 0.  */
8778  int set_once = 0;
8779  rtx y = x;
8780
8781  if (GET_CODE (x) != REG)
8782    abort ();
8783
8784  if (GET_MODE (x) == DImode)
8785    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
8786
8787  if (flag_expensive_optimizations
8788      && REG_N_SETS (REGNO (y)) == 1)
8789    set_once = 1;
8790
8791  if (insn == 0)
8792    {
8793      if (set_once)
8794	insn = get_last_insn_anywhere ();
8795      else
8796	return 0;
8797    }
8798
8799  while ((insn = PREV_INSN (insn)))
8800    {
8801      switch (GET_CODE (insn))
8802	{
8803	case JUMP_INSN:
8804	case NOTE:
8805	  break;
8806	case CODE_LABEL:
8807	case CALL_INSN:
8808	default:
8809	  if (! set_once)
8810	    return 0;
8811	  break;
8812	case INSN:
8813	  {
8814	    rtx pat = PATTERN (insn);
8815	    if (GET_CODE (pat) != SET)
8816	      return 0;
8817	    if (rtx_equal_p (x, SET_DEST (pat)))
8818	      return set_extends (insn);
8819	    if (y && rtx_equal_p (y, SET_DEST (pat)))
8820	      return set_extends (insn);
8821	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
8822	      return 0;
8823	  }
8824	}
8825    }
8826  return 0;
8827}
8828
8829char *
8830sparc_v8plus_shift (operands, insn, opcode)
8831     rtx *operands;
8832     rtx insn;
8833     const char *opcode;
8834{
8835  static char asm_code[60];
8836
8837  if (GET_CODE (operands[3]) == SCRATCH)
8838    operands[3] = operands[0];
8839  if (GET_CODE (operands[1]) == CONST_INT)
8840    {
8841      output_asm_insn ("mov\t%1, %3", operands);
8842    }
8843  else
8844    {
8845      output_asm_insn ("sllx\t%H1, 32, %3", operands);
8846      if (sparc_check_64 (operands[1], insn) <= 0)
8847	output_asm_insn ("srl\t%L1, 0, %L1", operands);
8848      output_asm_insn ("or\t%L1, %3, %3", operands);
8849    }
8850
8851  strcpy(asm_code, opcode);
8852  if (which_alternative != 2)
8853    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8854  else
8855    return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8856}
8857
8858/* Output rtl to increment the profiler label LABELNO
8859   for profiling a function entry.  */
8860
8861void
8862sparc_profile_hook (labelno)
8863     int labelno;
8864{
8865  char buf[32];
8866  rtx lab, fun;
8867
8868  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8869  lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
8870  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
8871
8872  emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
8873}
8874
8875/* Mark ARG, which is really a struct ultrasparc_pipline_state *, for
8876   GC.  */
8877
8878static void
8879mark_ultrasparc_pipeline_state (arg)
8880     void *arg;
8881{
8882  struct ultrasparc_pipeline_state *ups;
8883  size_t i;
8884
8885  ups = (struct ultrasparc_pipeline_state *) arg;
8886  for (i = 0; i < sizeof (ups->group) / sizeof (rtx); ++i)
8887    ggc_mark_rtx (ups->group[i]);
8888}
8889
8890/* Called to register all of our global variables with the garbage
8891   collector.  */
8892
8893static void
8894sparc_add_gc_roots ()
8895{
8896  ggc_add_rtx_root (&sparc_compare_op0, 1);
8897  ggc_add_rtx_root (&sparc_compare_op1, 1);
8898  ggc_add_rtx_root (&global_offset_table, 1);
8899  ggc_add_rtx_root (&get_pc_symbol, 1);
8900  ggc_add_rtx_root (&sparc_addr_diff_list, 1);
8901  ggc_add_rtx_root (&sparc_addr_list, 1);
8902  ggc_add_root (ultra_pipe_hist, ARRAY_SIZE (ultra_pipe_hist),
8903		sizeof (ultra_pipe_hist[0]), &mark_ultrasparc_pipeline_state);
8904}
8905
8906#ifdef OBJECT_FORMAT_ELF
8907static void
8908sparc_elf_asm_named_section (name, flags)
8909     const char *name;
8910     unsigned int flags;
8911{
8912  if (flags & SECTION_MERGE)
8913    {
8914      /* entsize cannot be expressed in this section attributes
8915	 encoding style.  */
8916      default_elf_asm_named_section (name, flags);
8917      return;
8918    }
8919
8920  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8921
8922  if (!(flags & SECTION_DEBUG))
8923    fputs (",#alloc", asm_out_file);
8924  if (flags & SECTION_WRITE)
8925    fputs (",#write", asm_out_file);
8926  if (flags & SECTION_CODE)
8927    fputs (",#execinstr", asm_out_file);
8928
8929  /* ??? Handle SECTION_BSS.  */
8930
8931  fputc ('\n', asm_out_file);
8932}
8933#endif /* OBJECT_FORMAT_ELF */
8934
8935int
8936sparc_extra_constraint_check (op, c, strict)
8937     rtx op;
8938     int c;
8939     int strict;
8940{
8941  int reload_ok_mem;
8942
8943  if (TARGET_ARCH64
8944      && (c == 'T' || c == 'U'))
8945    return 0;
8946
8947  switch (c)
8948    {
8949    case 'Q':
8950      return fp_sethi_p (op);
8951
8952    case 'R':
8953      return fp_mov_p (op);
8954
8955    case 'S':
8956      return fp_high_losum_p (op);
8957
8958    case 'U':
8959      if (! strict
8960	  || (GET_CODE (op) == REG
8961	      && (REGNO (op) < FIRST_PSEUDO_REGISTER
8962		  || reg_renumber[REGNO (op)] >= 0)))
8963	return register_ok_for_ldd (op);
8964
8965      return 0;
8966
8967    case 'W':
8968    case 'T':
8969      break;
8970
8971    default:
8972      return 0;
8973    }
8974
8975  /* Our memory extra constraints have to emulate the
8976     behavior of 'm' and 'o' in order for reload to work
8977     correctly.  */
8978  if (GET_CODE (op) == MEM)
8979    {
8980      reload_ok_mem = 0;
8981      if ((TARGET_ARCH64 || mem_min_alignment (op, 8))
8982	  && (! strict
8983	      || strict_memory_address_p (Pmode, XEXP (op, 0))))
8984	reload_ok_mem = 1;
8985    }
8986  else
8987    {
8988      reload_ok_mem = (reload_in_progress
8989		       && GET_CODE (op) == REG
8990		       && REGNO (op) >= FIRST_PSEUDO_REGISTER
8991		       && reg_renumber [REGNO (op)] < 0);
8992    }
8993
8994  return reload_ok_mem;
8995}
8996