1/* Subroutines for insn-output.c for Sun SPARC.
2   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3   1999, 2000 Free Software Foundation, Inc.
4   Contributed by Michael Tiemann (tiemann@cygnus.com)
5   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
6   at Cygnus Support.
7
8This file is part of GNU CC.
9
10GNU CC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
14
15GNU CC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GNU CC; see the file COPYING.  If not, write to
22the Free Software Foundation, 59 Temple Place - Suite 330,
23Boston, MA 02111-1307, USA.  */
24
25#include "config.h"
26#include "system.h"
27#include "tree.h"
28#include "rtl.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
34#include "insn-flags.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "expr.h"
39#include "recog.h"
40#include "toplev.h"
41
42/* 1 if the caller has placed an "unimp" insn immediately after the call.
43   This is used in v8 code when calling a function that returns a structure.
44   v9 doesn't have this.  Be careful to have this test be the same as that
45   used on the call.  */
46
47#define SKIP_CALLERS_UNIMP_P  \
48(!TARGET_ARCH64 && current_function_returns_struct			\
49 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
50 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
51     == INTEGER_CST))
52
53/* Global variables for machine-dependent things.  */
54
55/* Size of frame.  Need to know this to emit return insns from leaf procedures.
56   ACTUAL_FSIZE is set by compute_frame_size() which is called during the
57   reload pass.  This is important as the value is later used in insn
58   scheduling (to see what can go in a delay slot).
59   APPARENT_FSIZE is the size of the stack less the register save area and less
60   the outgoing argument area.  It is used when saving call preserved regs.  */
61static int apparent_fsize;
62static int actual_fsize;
63
64/* Save the operands last given to a compare for use when we
65   generate a scc or bcc insn.  */
66
67rtx sparc_compare_op0, sparc_compare_op1;
68
69/* We may need an epilogue if we spill too many registers.
70   If this is non-zero, then we branch here for the epilogue.  */
71static rtx leaf_label;
72
73#ifdef LEAF_REGISTERS
74
75/* Vector to say how input registers are mapped to output
76   registers.  FRAME_POINTER_REGNUM cannot be remapped by
77   this function to eliminate it.  You must use -fomit-frame-pointer
78   to get that.  */
79char leaf_reg_remap[] =
80{ 0, 1, 2, 3, 4, 5, 6, 7,
81  -1, -1, -1, -1, -1, -1, 14, -1,
82  -1, -1, -1, -1, -1, -1, -1, -1,
83  8, 9, 10, 11, 12, 13, -1, 15,
84
85  32, 33, 34, 35, 36, 37, 38, 39,
86  40, 41, 42, 43, 44, 45, 46, 47,
87  48, 49, 50, 51, 52, 53, 54, 55,
88  56, 57, 58, 59, 60, 61, 62, 63,
89  64, 65, 66, 67, 68, 69, 70, 71,
90  72, 73, 74, 75, 76, 77, 78, 79,
91  80, 81, 82, 83, 84, 85, 86, 87,
92  88, 89, 90, 91, 92, 93, 94, 95,
93  96, 97, 98, 99, 100};
94
95#endif
96
97/* Name of where we pretend to think the frame pointer points.
98   Normally, this is "%fp", but if we are in a leaf procedure,
99   this is "%sp+something".  We record "something" separately as it may be
100   too big for reg+constant addressing.  */
101
102static const char *frame_base_name;
103static int frame_base_offset;
104
105static rtx pic_setup_code	PROTO((void));
106static void sparc_init_modes	PROTO((void));
107static int save_regs		PROTO((FILE *, int, int, const char *,
108				       int, int, int));
109static int restore_regs		PROTO((FILE *, int, int, const char *, int, int));
110static void build_big_number	PROTO((FILE *, int, const char *));
111static int function_arg_slotno	PROTO((const CUMULATIVE_ARGS *,
112				       enum machine_mode, tree, int, int,
113				       int *, int *));
114
115static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
116static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
117static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
118
119static void sparc_output_addr_vec PROTO((rtx));
120static void sparc_output_addr_diff_vec PROTO((rtx));
121static void sparc_output_deferred_case_vectors PROTO((void));
122
123
124#ifdef DWARF2_DEBUGGING_INFO
125extern char *dwarf2out_cfi_label ();
126#endif
127
128/* Option handling.  */
129
130/* Code model option as passed by user.  */
131const char *sparc_cmodel_string;
132/* Parsed value.  */
133enum cmodel sparc_cmodel;
134
135/* Record alignment options as passed by user.  */
136const char *sparc_align_loops_string;
137const char *sparc_align_jumps_string;
138const char *sparc_align_funcs_string;
139
140/* Parsed values, as a power of two.  */
141int sparc_align_loops;
142int sparc_align_jumps;
143int sparc_align_funcs;
144
145struct sparc_cpu_select sparc_select[] =
146{
147  /* switch	name,		tune	arch */
148  { (char *)0,	"default",	1,	1 },
149  { (char *)0,	"-mcpu=",	1,	1 },
150  { (char *)0,	"-mtune=",	1,	0 },
151  { 0, 0, 0, 0 }
152};
153
154/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
155enum processor_type sparc_cpu;
156
157/* Validate and override various options, and do some machine dependent
158   initialization.  */
159
160void
161sparc_override_options ()
162{
163  static struct code_model {
164    const char *name;
165    int value;
166  } cmodels[] = {
167    { "32", CM_32 },
168    { "medlow", CM_MEDLOW },
169    { "medmid", CM_MEDMID },
170    { "medany", CM_MEDANY },
171    { "embmedany", CM_EMBMEDANY },
172    { 0, 0 }
173  };
174  struct code_model *cmodel;
175  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
176  static struct cpu_default {
177    int cpu;
178    const char *name;
179  } cpu_default[] = {
180    /* There must be one entry here for each TARGET_CPU value.  */
181    { TARGET_CPU_sparc, "cypress" },
182    { TARGET_CPU_sparclet, "tsc701" },
183    { TARGET_CPU_sparclite, "f930" },
184    { TARGET_CPU_v8, "v8" },
185    { TARGET_CPU_hypersparc, "hypersparc" },
186    { TARGET_CPU_sparclite86x, "sparclite86x" },
187    { TARGET_CPU_supersparc, "supersparc" },
188    { TARGET_CPU_v9, "v9" },
189    { TARGET_CPU_ultrasparc, "ultrasparc" },
190    { 0, 0 }
191  };
192  struct cpu_default *def;
193  /* Table of values for -m{cpu,tune}=.  */
194  static struct cpu_table {
195    const char *name;
196    enum processor_type processor;
197    int disable;
198    int enable;
199  } cpu_table[] = {
200    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
201    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
202    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
203    /* TI TMS390Z55 supersparc */
204    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
205    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
206    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
207       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
208    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
209    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
210    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
211    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, MASK_V8 },
212    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
213    /* TEMIC sparclet */
214    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
215    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
216    /* TI ultrasparc */
217    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
218    { 0, 0, 0, 0 }
219  };
220  struct cpu_table *cpu;
221  struct sparc_cpu_select *sel;
222  int fpu;
223
224#ifndef SPARC_BI_ARCH
225  /* Check for unsupported architecture size.  */
226  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
227    {
228      error ("%s is not supported by this configuration",
229	     DEFAULT_ARCH32_P ? "-m64" : "-m32");
230    }
231#endif
232
233  /* At the moment we don't allow different pointer size and architecture */
234  if (! TARGET_64BIT != ! TARGET_PTR64)
235    {
236      error ("-mptr%d not allowed on -m%d",
237      	     TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
238      if (TARGET_64BIT)
239    	target_flags |= MASK_PTR64;
240      else
241        target_flags &= ~MASK_PTR64;
242    }
243
244  /* Code model selection.  */
245  sparc_cmodel = SPARC_DEFAULT_CMODEL;
246
247#ifdef SPARC_BI_ARCH
248  if (TARGET_ARCH32)
249    sparc_cmodel = CM_32;
250#endif
251
252  if (sparc_cmodel_string != NULL)
253    {
254      if (TARGET_ARCH64)
255	{
256	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
257	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
258	      break;
259	  if (cmodel->name == NULL)
260	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
261	  else
262	    sparc_cmodel = cmodel->value;
263	}
264      else
265	error ("-mcmodel= is not supported on 32 bit systems");
266    }
267
268  fpu = TARGET_FPU; /* save current -mfpu status */
269
270  /* Set the default CPU.  */
271  for (def = &cpu_default[0]; def->name; ++def)
272    if (def->cpu == TARGET_CPU_DEFAULT)
273      break;
274  if (! def->name)
275    abort ();
276  sparc_select[0].string = def->name;
277
278  for (sel = &sparc_select[0]; sel->name; ++sel)
279    {
280      if (sel->string)
281	{
282	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
283	    if (! strcmp (sel->string, cpu->name))
284	      {
285		if (sel->set_tune_p)
286		  sparc_cpu = cpu->processor;
287
288		if (sel->set_arch_p)
289		  {
290		    target_flags &= ~cpu->disable;
291		    target_flags |= cpu->enable;
292		  }
293		break;
294	      }
295
296	  if (! cpu->name)
297	    error ("bad value (%s) for %s switch", sel->string, sel->name);
298	}
299    }
300
301  /* If -mfpu or -mno-fpu was explicitly used, don't override with
302     the processor default.  */
303  if (TARGET_FPU_SET)
304    target_flags = (target_flags & ~MASK_FPU) | fpu;
305
306  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
307  if (TARGET_V9 && TARGET_ARCH32)
308    target_flags |= MASK_DEPRECATED_V8_INSNS;
309
310  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
311  if (! TARGET_V9 || TARGET_ARCH64)
312    target_flags &= ~MASK_V8PLUS;
313
314  /* Don't use stack biasing in 32 bit mode.  */
315  if (TARGET_ARCH32)
316    target_flags &= ~MASK_STACK_BIAS;
317
318  /* Don't allow -mvis if FPU is disabled.  */
319  if (! TARGET_FPU)
320    target_flags &= ~MASK_VIS;
321
322  /* Validate -malign-loops= value, or provide default.  */
323  if (sparc_align_loops_string)
324    {
325      sparc_align_loops = exact_log2 (atoi (sparc_align_loops_string));
326      if (sparc_align_loops < 2 || sparc_align_loops > 7)
327	fatal ("-malign-loops=%s is not between 4 and 128 or is not a power of two",
328	       sparc_align_loops_string);
329    }
330  else
331    {
332      /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
333	 its 0.  This sounds a bit kludgey.  */
334      sparc_align_loops = 0;
335    }
336
337  /* Validate -malign-jumps= value, or provide default.  */
338  if (sparc_align_jumps_string)
339    {
340      sparc_align_jumps = exact_log2 (atoi (sparc_align_jumps_string));
341      if (sparc_align_jumps < 2 || sparc_align_loops > 7)
342	fatal ("-malign-jumps=%s is not between 4 and 128 or is not a power of two",
343	       sparc_align_jumps_string);
344    }
345  else
346    {
347      /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
348	 its 0.  This sounds a bit kludgey.  */
349      sparc_align_jumps = 0;
350    }
351
352  /* Validate -malign-functions= value, or provide default. */
353  if (sparc_align_funcs_string)
354    {
355      sparc_align_funcs = exact_log2 (atoi (sparc_align_funcs_string));
356      if (sparc_align_funcs < 2 || sparc_align_loops > 7)
357	fatal ("-malign-functions=%s is not between 4 and 128 or is not a power of two",
358	       sparc_align_funcs_string);
359    }
360  else
361    sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
362
363  /* Validate PCC_STRUCT_RETURN.  */
364  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
365    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
366
367  /* Do various machine dependent initializations.  */
368  sparc_init_modes ();
369
370  if ((profile_flag || profile_block_flag)
371      && sparc_cmodel != CM_MEDLOW)
372    {
373      error ("profiling does not support code models other than medlow");
374    }
375}
376
377/* Miscellaneous utilities.  */
378
379/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
380   or branch on register contents instructions.  */
381
382int
383v9_regcmp_p (code)
384     enum rtx_code code;
385{
386  return (code == EQ || code == NE || code == GE || code == LT
387	  || code == LE || code == GT);
388}
389
390
391/* Operand constraints.  */
392
393/* Return non-zero only if OP is a register of mode MODE,
394   or const0_rtx.  Don't allow const0_rtx if TARGET_LIVE_G0 because
395   %g0 may contain anything.  */
396
397int
398reg_or_0_operand (op, mode)
399     rtx op;
400     enum machine_mode mode;
401{
402  if (register_operand (op, mode))
403    return 1;
404  if (TARGET_LIVE_G0)
405    return 0;
406  if (op == const0_rtx)
407    return 1;
408  if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
409      && CONST_DOUBLE_HIGH (op) == 0
410      && CONST_DOUBLE_LOW (op) == 0)
411    return 1;
412  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
413      && GET_CODE (op) == CONST_DOUBLE
414      && fp_zero_operand (op))
415    return 1;
416  return 0;
417}
418
419/* Nonzero if OP is a floating point value with value 0.0.  */
420
421int
422fp_zero_operand (op)
423     rtx op;
424{
425  REAL_VALUE_TYPE r;
426
427  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
428  return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
429}
430
431/* Nonzero if OP is an integer register.  */
432
433int
434intreg_operand (op, mode)
435     rtx op;
436     enum machine_mode mode ATTRIBUTE_UNUSED;
437{
438  return (register_operand (op, SImode)
439	  || (TARGET_ARCH64 && register_operand (op, DImode)));
440}
441
442/* Nonzero if OP is a floating point condition code register.  */
443
444int
445fcc_reg_operand (op, mode)
446     rtx op;
447     enum machine_mode mode;
448{
449  /* This can happen when recog is called from combine.  Op may be a MEM.
450     Fail instead of calling abort in this case.  */
451  if (GET_CODE (op) != REG)
452    return 0;
453
454  if (mode != VOIDmode && mode != GET_MODE (op))
455    return 0;
456  if (mode == VOIDmode
457      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
458    return 0;
459
460#if 0	/* ??? ==> 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
461  if (reg_renumber == 0)
462    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
463  return REGNO_OK_FOR_CCFP_P (REGNO (op));
464#else
465  return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
466#endif
467}
468
469/* Nonzero if OP is an integer or floating point condition code register.  */
470
471int
472icc_or_fcc_reg_operand (op, mode)
473     rtx op;
474     enum machine_mode mode;
475{
476  if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
477    {
478      if (mode != VOIDmode && mode != GET_MODE (op))
479	return 0;
480      if (mode == VOIDmode
481	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
482	return 0;
483      return 1;
484    }
485
486  return fcc_reg_operand (op, mode);
487}
488
489/* Nonzero if OP can appear as the dest of a RESTORE insn.  */
490int
491restore_operand (op, mode)
492     rtx op;
493     enum machine_mode mode;
494{
495  return (GET_CODE (op) == REG && GET_MODE (op) == mode
496	  && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
497}
498
499/* Call insn on SPARC can take a PC-relative constant address, or any regular
500   memory address.  */
501
502int
503call_operand (op, mode)
504     rtx op;
505     enum machine_mode mode;
506{
507  if (GET_CODE (op) != MEM)
508    abort ();
509  op = XEXP (op, 0);
510  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
511}
512
513int
514call_operand_address (op, mode)
515     rtx op;
516     enum machine_mode mode;
517{
518  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
519}
520
521/* Returns 1 if OP is either a symbol reference or a sum of a symbol
522   reference and a constant.  */
523
524int
525symbolic_operand (op, mode)
526     register rtx op;
527     enum machine_mode mode;
528{
529  switch (GET_CODE (op))
530    {
531    case SYMBOL_REF:
532    case LABEL_REF:
533      return 1;
534
535    case CONST:
536      op = XEXP (op, 0);
537      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
538	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
539	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
540
541      /* ??? This clause seems to be irrelevant.  */
542    case CONST_DOUBLE:
543      return GET_MODE (op) == mode;
544
545    default:
546      return 0;
547    }
548}
549
550/* Return truth value of statement that OP is a symbolic memory
551   operand of mode MODE.  */
552
553int
554symbolic_memory_operand (op, mode)
555     rtx op;
556     enum machine_mode mode ATTRIBUTE_UNUSED;
557{
558  if (GET_CODE (op) == SUBREG)
559    op = SUBREG_REG (op);
560  if (GET_CODE (op) != MEM)
561    return 0;
562  op = XEXP (op, 0);
563  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
564	  || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
565}
566
567/* Return truth value of statement that OP is a LABEL_REF of mode MODE.  */
568
569int
570label_ref_operand (op, mode)
571     rtx op;
572     enum machine_mode mode;
573{
574  if (GET_CODE (op) != LABEL_REF)
575    return 0;
576  if (GET_MODE (op) != mode)
577    return 0;
578  return 1;
579}
580
581/* Return 1 if the operand is an argument used in generating pic references
582   in either the medium/low or medium/anywhere code models of sparc64.  */
583
584int
585sp64_medium_pic_operand (op, mode)
586     rtx op;
587     enum machine_mode mode ATTRIBUTE_UNUSED;
588{
589  /* Check for (const (minus (symbol_ref:GOT)
590                             (const (minus (label) (pc))))).  */
591  if (GET_CODE (op) != CONST)
592    return 0;
593  op = XEXP (op, 0);
594  if (GET_CODE (op) != MINUS)
595    return 0;
596  if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
597    return 0;
598  /* ??? Ensure symbol is GOT.  */
599  if (GET_CODE (XEXP (op, 1)) != CONST)
600    return 0;
601  if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
602    return 0;
603  return 1;
604}
605
606/* Return 1 if the operand is a data segment reference.  This includes
607   the readonly data segment, or in other words anything but the text segment.
608   This is needed in the medium/anywhere code model on v9.  These values
609   are accessed with EMBMEDANY_BASE_REG.  */
610
611int
612data_segment_operand (op, mode)
613     rtx op;
614     enum machine_mode mode ATTRIBUTE_UNUSED;
615{
616  switch (GET_CODE (op))
617    {
618    case SYMBOL_REF :
619      return ! SYMBOL_REF_FLAG (op);
620    case PLUS :
621      /* Assume canonical format of symbol + constant.
622	 Fall through.  */
623    case CONST :
624      return data_segment_operand (XEXP (op, 0));
625    default :
626      return 0;
627    }
628}
629
630/* Return 1 if the operand is a text segment reference.
631   This is needed in the medium/anywhere code model on v9.  */
632
633int
634text_segment_operand (op, mode)
635     rtx op;
636     enum machine_mode mode ATTRIBUTE_UNUSED;
637{
638  switch (GET_CODE (op))
639    {
640    case LABEL_REF :
641      return 1;
642    case SYMBOL_REF :
643      return SYMBOL_REF_FLAG (op);
644    case PLUS :
645      /* Assume canonical format of symbol + constant.
646	 Fall through.  */
647    case CONST :
648      return text_segment_operand (XEXP (op, 0));
649    default :
650      return 0;
651    }
652}
653
654/* Return 1 if the operand is either a register or a memory operand that is
655   not symbolic.  */
656
657int
658reg_or_nonsymb_mem_operand (op, mode)
659    register rtx op;
660    enum machine_mode mode;
661{
662  if (register_operand (op, mode))
663    return 1;
664
665  if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
666    return 1;
667
668  return 0;
669}
670
671int
672splittable_symbolic_memory_operand (op, mode)
673     rtx op;
674     enum machine_mode mode ATTRIBUTE_UNUSED;
675{
676  if (GET_CODE (op) != MEM)
677    return 0;
678  if (! symbolic_operand (XEXP (op, 0), Pmode))
679    return 0;
680  return 1;
681}
682
683int
684splittable_immediate_memory_operand (op, mode)
685     rtx op;
686     enum machine_mode mode ATTRIBUTE_UNUSED;
687{
688  if (GET_CODE (op) != MEM)
689    return 0;
690  if (! immediate_operand (XEXP (op, 0), Pmode))
691    return 0;
692  return 1;
693}
694
695/* Return truth value of whether OP is EQ or NE.  */
696
697int
698eq_or_neq (op, mode)
699     rtx op;
700     enum machine_mode mode ATTRIBUTE_UNUSED;
701{
702  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
703}
704
705/* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
706   or LTU for non-floating-point.  We handle those specially.  */
707
708int
709normal_comp_operator (op, mode)
710     rtx op;
711     enum machine_mode mode ATTRIBUTE_UNUSED;
712{
713  enum rtx_code code = GET_CODE (op);
714
715  if (GET_RTX_CLASS (code) != '<')
716    return 0;
717
718  if (GET_MODE (XEXP (op, 0)) == CCFPmode
719      || GET_MODE (XEXP (op, 0)) == CCFPEmode)
720    return 1;
721
722  return (code != NE && code != EQ && code != GEU && code != LTU);
723}
724
725/* Return 1 if this is a comparison operator.  This allows the use of
726   MATCH_OPERATOR to recognize all the branch insns.  */
727
728int
729noov_compare_op (op, mode)
730    register rtx op;
731    enum machine_mode mode ATTRIBUTE_UNUSED;
732{
733  enum rtx_code code = GET_CODE (op);
734
735  if (GET_RTX_CLASS (code) != '<')
736    return 0;
737
738  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
739    /* These are the only branches which work with CC_NOOVmode.  */
740    return (code == EQ || code == NE || code == GE || code == LT);
741  return 1;
742}
743
744/* Nonzero if OP is a comparison operator suitable for use in v9
745   conditional move or branch on register contents instructions.  */
746
747int
748v9_regcmp_op (op, mode)
749     register rtx op;
750     enum machine_mode mode ATTRIBUTE_UNUSED;
751{
752  enum rtx_code code = GET_CODE (op);
753
754  if (GET_RTX_CLASS (code) != '<')
755    return 0;
756
757  return v9_regcmp_p (code);
758}
759
760/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
761
762int
763extend_op (op, mode)
764     rtx op;
765     enum machine_mode mode ATTRIBUTE_UNUSED;
766{
767  return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
768}
769
770/* Return nonzero if OP is an operator of mode MODE which can set
771   the condition codes explicitly.  We do not include PLUS and MINUS
772   because these require CC_NOOVmode, which we handle explicitly.  */
773
774int
775cc_arithop (op, mode)
776     rtx op;
777     enum machine_mode mode ATTRIBUTE_UNUSED;
778{
779  if (GET_CODE (op) == AND
780      || GET_CODE (op) == IOR
781      || GET_CODE (op) == XOR)
782    return 1;
783
784  return 0;
785}
786
787/* Return nonzero if OP is an operator of mode MODE which can bitwise
788   complement its second operand and set the condition codes explicitly.  */
789
790int
791cc_arithopn (op, mode)
792     rtx op;
793     enum machine_mode mode ATTRIBUTE_UNUSED;
794{
795  /* XOR is not here because combine canonicalizes (xor (not ...) ...)
796     and (xor ... (not ...)) to (not (xor ...)).   */
797  return (GET_CODE (op) == AND
798	  || GET_CODE (op) == IOR);
799}
800
801/* Return true if OP is a register, or is a CONST_INT that can fit in a
802   signed 13 bit immediate field.  This is an acceptable SImode operand for
803   most 3 address instructions.  */
804
805int
806arith_operand (op, mode)
807     rtx op;
808     enum machine_mode mode;
809{
810  int val;
811  if (register_operand (op, mode))
812    return 1;
813  if (GET_CODE (op) != CONST_INT)
814    return 0;
815  val = INTVAL (op) & 0xffffffff;
816  return SPARC_SIMM13_P (val);
817}
818
819/* Return true if OP is a constant 4096  */
820
821int
822arith_4096_operand (op, mode)
823     rtx op;
824     enum machine_mode mode ATTRIBUTE_UNUSED;
825{
826  int val;
827  if (GET_CODE (op) != CONST_INT)
828    return 0;
829  val = INTVAL (op) & 0xffffffff;
830  return val == 4096;
831}
832
833/* Return true if OP is suitable as second operand for add/sub */
834
835int
836arith_add_operand (op, mode)
837     rtx op;
838     enum machine_mode mode;
839{
840  return arith_operand (op, mode) || arith_4096_operand (op, mode);
841}
842
843/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
844   immediate field of OR and XOR instructions.  Used for 64-bit
845   constant formation patterns.  */
846int
847const64_operand (op, mode)
848     rtx op;
849     enum machine_mode mode ATTRIBUTE_UNUSED;
850{
851  return ((GET_CODE (op) == CONST_INT
852	   && SPARC_SIMM13_P (INTVAL (op)))
853#if HOST_BITS_PER_WIDE_INT != 64
854	  || (GET_CODE (op) == CONST_DOUBLE
855	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
856	      && (CONST_DOUBLE_HIGH (op) ==
857		  ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
858		   (HOST_WIDE_INT)0xffffffff : 0)))
859#endif
860	  );
861}
862
863/* The same, but only for sethi instructions.  */
864int
865const64_high_operand (op, mode)
866     rtx op;
867     enum machine_mode mode ATTRIBUTE_UNUSED;
868{
869  return ((GET_CODE (op) == CONST_INT
870	   && (INTVAL (op) & 0xfffffc00) != 0
871	   && SPARC_SETHI_P (INTVAL (op))
872#if HOST_BITS_PER_WIDE_INT != 64
873	   /* Must be positive on non-64bit host else the
874	      optimizer is fooled into thinking that sethi
875	      sign extends, even though it does not.  */
876	   && INTVAL (op) >= 0
877#endif
878	   )
879	  || (GET_CODE (op) == CONST_DOUBLE
880	      && CONST_DOUBLE_HIGH (op) == 0
881	      && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
882	      && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
883}
884
885/* Return true if OP is a register, or is a CONST_INT that can fit in a
886   signed 11 bit immediate field.  This is an acceptable SImode operand for
887   the movcc instructions.  */
888
889int
890arith11_operand (op, mode)
891     rtx op;
892     enum machine_mode mode;
893{
894  return (register_operand (op, mode)
895	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
896}
897
898/* Return true if OP is a register, or is a CONST_INT that can fit in a
899   signed 10 bit immediate field.  This is an acceptable SImode operand for
900   the movrcc instructions.  */
901
902int
903arith10_operand (op, mode)
904     rtx op;
905     enum machine_mode mode;
906{
907  return (register_operand (op, mode)
908	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
909}
910
911/* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
912   immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
913   immediate field.
914   v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
915   can fit in a 13 bit immediate field.  This is an acceptable DImode operand
916   for most 3 address instructions.  */
917
918int
919arith_double_operand (op, mode)
920     rtx op;
921     enum machine_mode mode;
922{
923  return (register_operand (op, mode)
924	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
925	  || (! TARGET_ARCH64
926	      && GET_CODE (op) == CONST_DOUBLE
927	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
928	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
929	  || (TARGET_ARCH64
930	      && GET_CODE (op) == CONST_DOUBLE
931	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
932	      && ((CONST_DOUBLE_HIGH (op) == -1
933		   && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
934		  || (CONST_DOUBLE_HIGH (op) == 0
935		      && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
936}
937
938/* Return true if OP is a constant 4096 for DImode on ARCH64 */
939
940int
941arith_double_4096_operand (op, mode)
942     rtx op;
943     enum machine_mode mode ATTRIBUTE_UNUSED;
944{
945  return (TARGET_ARCH64 &&
946  	  ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
947  	   (GET_CODE (op) == CONST_DOUBLE &&
948  	    CONST_DOUBLE_LOW (op) == 4096 &&
949  	    CONST_DOUBLE_HIGH (op) == 0)));
950}
951
952/* Return true if OP is suitable as second operand for add/sub in DImode */
953
954int
955arith_double_add_operand (op, mode)
956     rtx op;
957     enum machine_mode mode;
958{
959  return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
960}
961
962/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
963   can fit in an 11 bit immediate field.  This is an acceptable DImode
964   operand for the movcc instructions.  */
965/* ??? Replace with arith11_operand?  */
966
967int
968arith11_double_operand (op, mode)
969     rtx op;
970     enum machine_mode mode;
971{
972  return (register_operand (op, mode)
973	  || (GET_CODE (op) == CONST_DOUBLE
974	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
975	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
976	      && ((CONST_DOUBLE_HIGH (op) == -1
977		   && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
978		  || (CONST_DOUBLE_HIGH (op) == 0
979		      && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
980	  || (GET_CODE (op) == CONST_INT
981	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
982	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
983}
984
985/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
986   can fit in an 10 bit immediate field.  This is an acceptable DImode
987   operand for the movrcc instructions.  */
988/* ??? Replace with arith10_operand?  */
989
990int
991arith10_double_operand (op, mode)
992     rtx op;
993     enum machine_mode mode;
994{
995  return (register_operand (op, mode)
996	  || (GET_CODE (op) == CONST_DOUBLE
997	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
998	      && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
999	      && ((CONST_DOUBLE_HIGH (op) == -1
1000		   && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1001		  || (CONST_DOUBLE_HIGH (op) == 0
1002		      && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1003	  || (GET_CODE (op) == CONST_INT
1004	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1005	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1006}
1007
1008/* Return truth value of whether OP is a integer which fits the
1009   range constraining immediate operands in most three-address insns,
1010   which have a 13 bit immediate field.  */
1011
1012int
1013small_int (op, mode)
1014     rtx op;
1015     enum machine_mode mode ATTRIBUTE_UNUSED;
1016{
1017  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1018}
1019
1020int
1021small_int_or_double (op, mode)
1022     rtx op;
1023     enum machine_mode mode ATTRIBUTE_UNUSED;
1024{
1025  return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1026	  || (GET_CODE (op) == CONST_DOUBLE
1027	      && CONST_DOUBLE_HIGH (op) == 0
1028	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1029}
1030
1031/* Recognize operand values for the umul instruction.  That instruction sign
1032   extends immediate values just like all other sparc instructions, but
1033   interprets the extended result as an unsigned number.  */
1034
1035int
1036uns_small_int (op, mode)
1037     rtx op;
1038     enum machine_mode mode ATTRIBUTE_UNUSED;
1039{
1040#if HOST_BITS_PER_WIDE_INT > 32
1041  /* All allowed constants will fit a CONST_INT.  */
1042  return (GET_CODE (op) == CONST_INT
1043	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1044	      || (INTVAL (op) >= 0xFFFFF000
1045                  && INTVAL (op) < 0x100000000)));
1046#else
1047  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1048	  || (GET_CODE (op) == CONST_DOUBLE
1049	      && CONST_DOUBLE_HIGH (op) == 0
1050	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1051#endif
1052}
1053
1054int
1055uns_arith_operand (op, mode)
1056     rtx op;
1057     enum machine_mode mode;
1058{
1059  return register_operand (op, mode) || uns_small_int (op, mode);
1060}
1061
1062/* Return truth value of statement that OP is a call-clobbered register.  */
1063int
1064clobbered_register (op, mode)
1065     rtx op;
1066     enum machine_mode mode ATTRIBUTE_UNUSED;
1067{
1068  return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1069}
1070
1071/* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns.  */
1072
1073int
1074zero_operand (op, mode)
1075     rtx op;
1076     enum machine_mode mode ATTRIBUTE_UNUSED;
1077{
1078  return op == const0_rtx;
1079}
1080
1081/* Return 1 if OP is a valid operand for the source of a move insn.  */
1082
1083int
1084input_operand (op, mode)
1085     rtx op;
1086     enum machine_mode mode;
1087{
1088  /* If both modes are non-void they must be the same.  */
1089  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1090    return 0;
1091
1092  /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary.  */
1093  if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1094    return 1;
1095
1096  /* Allow any one instruction integer constant, and all CONST_INT
1097     variants when we are working in DImode and !arch64.  */
1098  if (GET_MODE_CLASS (mode) == MODE_INT
1099      && ((GET_CODE (op) == CONST_INT
1100	   && ((SPARC_SETHI_P (INTVAL (op))
1101		&& (! TARGET_ARCH64
1102		    || (INTVAL (op) >= 0)
1103		    || mode == SImode))
1104	       || SPARC_SIMM13_P (INTVAL (op))
1105	       || (mode == DImode
1106		   && ! TARGET_ARCH64)))
1107	  || (TARGET_ARCH64
1108	      && GET_CODE (op) == CONST_DOUBLE
1109	      && ((CONST_DOUBLE_HIGH (op) == 0
1110		   && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1111		  ||
1112#if HOST_BITS_PER_WIDE_INT == 64
1113		  (CONST_DOUBLE_HIGH (op) == 0
1114		   && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1115#else
1116		  (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1117		   && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1118			&& CONST_DOUBLE_HIGH (op) == 0)
1119		       || (CONST_DOUBLE_HIGH (op) == -1)))
1120#endif
1121		  ))))
1122    return 1;
1123
1124  /* If !arch64 and this is a DImode const, allow it so that
1125     the splits can be generated.  */
1126  if (! TARGET_ARCH64
1127      && mode == DImode
1128      && GET_CODE (op) == CONST_DOUBLE)
1129    return 1;
1130
1131  if (register_operand (op, mode))
1132    return 1;
1133
1134  /* If this is a SUBREG, look inside so that we handle
1135     paradoxical ones.  */
1136  if (GET_CODE (op) == SUBREG)
1137    op = SUBREG_REG (op);
1138
1139  /* Check for valid MEM forms.  */
1140  if (GET_CODE (op) == MEM)
1141    {
1142      rtx inside = XEXP (op, 0);
1143
1144      if (GET_CODE (inside) == LO_SUM)
1145	{
1146	  /* We can't allow these because all of the splits
1147	     (eventually as they trickle down into DFmode
1148	     splits) require offsettable memory references.  */
1149	  if (! TARGET_V9
1150	      && GET_MODE (op) == TFmode)
1151	    return 0;
1152
1153	  return (register_operand (XEXP (inside, 0), Pmode)
1154		  && CONSTANT_P (XEXP (inside, 1)));
1155	}
1156      return memory_address_p (mode, inside);
1157    }
1158
1159  return 0;
1160}
1161
1162
1163/* We know it can't be done in one insn when we get here,
1164   the movsi expander guarentees this.  */
1165void
1166sparc_emit_set_const32 (op0, op1)
1167     rtx op0;
1168     rtx op1;
1169{
1170  enum machine_mode mode = GET_MODE (op0);
1171  rtx temp;
1172
1173  if (GET_CODE (op1) == CONST_INT)
1174    {
1175      HOST_WIDE_INT value = INTVAL (op1);
1176
1177      if (SPARC_SETHI_P (value)
1178	  || SPARC_SIMM13_P (value))
1179	abort ();
1180    }
1181
1182  /* Full 2-insn decomposition is needed.  */
1183  if (reload_in_progress || reload_completed)
1184    temp = op0;
1185  else
1186    temp = gen_reg_rtx (mode);
1187
1188  if (GET_CODE (op1) == CONST_INT)
1189    {
1190      /* Emit them as real moves instead of a HIGH/LO_SUM,
1191	 this way CSE can see everything and reuse intermediate
1192	 values if it wants.  */
1193      if (TARGET_ARCH64
1194	  && HOST_BITS_PER_WIDE_INT != 64
1195	  && (INTVAL (op1) & 0x80000000) != 0)
1196	{
1197	  emit_insn (gen_rtx_SET (VOIDmode,
1198				  temp,
1199				  gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1200							INTVAL (op1) & 0xfffffc00, 0)));
1201	}
1202      else
1203	{
1204	  emit_insn (gen_rtx_SET (VOIDmode,
1205				  temp,
1206				  GEN_INT (INTVAL (op1) & 0xfffffc00)));
1207	}
1208      emit_insn (gen_rtx_SET (VOIDmode,
1209			      op0,
1210			      gen_rtx_IOR (mode,
1211					   temp,
1212					   GEN_INT (INTVAL (op1) & 0x3ff))));
1213    }
1214  else
1215    {
1216      /* A symbol, emit in the traditional way.  */
1217      emit_insn (gen_rtx_SET (VOIDmode,
1218			      temp,
1219			      gen_rtx_HIGH (mode,
1220					    op1)));
1221      emit_insn (gen_rtx_SET (VOIDmode,
1222			      op0,
1223			      gen_rtx_LO_SUM (mode,
1224					      temp,
1225					      op1)));
1226
1227    }
1228}
1229
1230
1231/* Sparc-v9 code-model support. */
1232void
1233sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1234     rtx op0;
1235     rtx op1;
1236     rtx temp1;
1237{
1238  switch (sparc_cmodel)
1239    {
1240    case CM_MEDLOW:
1241      /* The range spanned by all instructions in the object is less
1242	 than 2^31 bytes (2GB) and the distance from any instruction
1243	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1244	 than 2^31 bytes (2GB).
1245
1246	 The executable must be in the low 4TB of the virtual address
1247	 space.
1248
1249	 sethi	%hi(symbol), %temp
1250	 or	%temp, %lo(symbol), %reg  */
1251      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1252      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1253      break;
1254
1255    case CM_MEDMID:
1256      /* The range spanned by all instructions in the object is less
1257	 than 2^31 bytes (2GB) and the distance from any instruction
1258	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1259	 than 2^31 bytes (2GB).
1260
1261	 The executable must be in the low 16TB of the virtual address
1262	 space.
1263
1264	 sethi	%h44(symbol), %temp1
1265	 or	%temp1, %m44(symbol), %temp2
1266	 sllx	%temp2, 12, %temp3
1267	 or	%temp3, %l44(symbol), %reg  */
1268      emit_insn (gen_seth44 (op0, op1));
1269      emit_insn (gen_setm44 (op0, op0, op1));
1270      emit_insn (gen_rtx_SET (VOIDmode, temp1,
1271			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1272      emit_insn (gen_setl44 (op0, temp1, op1));
1273      break;
1274
1275    case CM_MEDANY:
1276      /* The range spanned by all instructions in the object is less
1277	 than 2^31 bytes (2GB) and the distance from any instruction
1278	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1279	 than 2^31 bytes (2GB).
1280
1281	 The executable can be placed anywhere in the virtual address
1282	 space.
1283
1284	 sethi	%hh(symbol), %temp1
1285	 sethi	%lm(symbol), %temp2
1286	 or	%temp1, %hm(symbol), %temp3
1287	 or	%temp2, %lo(symbol), %temp4
1288	 sllx	%temp3, 32, %temp5
1289	 or	%temp4, %temp5, %reg  */
1290
1291      /* Getting this right wrt. reloading is really tricky.
1292	 We _MUST_ have a seperate temporary at this point,
1293	 if we don't barf immediately instead of generating
1294	 incorrect code.  */
1295      if (temp1 == op0)
1296	abort ();
1297
1298      emit_insn (gen_sethh (op0, op1));
1299      emit_insn (gen_setlm (temp1, op1));
1300      emit_insn (gen_sethm (op0, op0, op1));
1301      emit_insn (gen_rtx_SET (VOIDmode, op0,
1302			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1303      emit_insn (gen_rtx_SET (VOIDmode, op0,
1304			      gen_rtx_PLUS (DImode, op0, temp1)));
1305      emit_insn (gen_setlo (op0, op0, op1));
1306      break;
1307
1308    case CM_EMBMEDANY:
1309      /* Old old old backwards compatibility kruft here.
1310	 Essentially it is MEDLOW with a fixed 64-bit
1311	 virtual base added to all data segment addresses.
1312	 Text-segment stuff is computed like MEDANY, we can't
1313	 reuse the code above because the relocation knobs
1314	 look different.
1315
1316	 Data segment:	sethi	%hi(symbol), %temp1
1317			or	%temp1, %lo(symbol), %temp2
1318			add	%temp2, EMBMEDANY_BASE_REG, %reg
1319
1320	 Text segment:	sethi	%uhi(symbol), %temp1
1321			sethi	%hi(symbol), %temp2
1322			or	%temp1, %ulo(symbol), %temp3
1323			or	%temp2, %lo(symbol), %temp4
1324			sllx	%temp3, 32, %temp5
1325			or	%temp4, %temp5, %reg  */
1326      if (data_segment_operand (op1, GET_MODE (op1)))
1327	{
1328	  emit_insn (gen_embmedany_sethi (temp1, op1));
1329	  emit_insn (gen_embmedany_brsum (op0, temp1));
1330	  emit_insn (gen_embmedany_losum (op0, op0, op1));
1331	}
1332      else
1333	{
1334	  /* Getting this right wrt. reloading is really tricky.
1335	     We _MUST_ have a seperate temporary at this point,
1336	     so we barf immediately instead of generating
1337	     incorrect code.  */
1338	  if (temp1 == op0)
1339	    abort ();
1340
1341	  emit_insn (gen_embmedany_textuhi (op0, op1));
1342	  emit_insn (gen_embmedany_texthi  (temp1, op1));
1343	  emit_insn (gen_embmedany_textulo (op0, op0, op1));
1344	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1345				  gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1346	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1347				  gen_rtx_PLUS (DImode, op0, temp1)));
1348	  emit_insn (gen_embmedany_textlo  (op0, op0, op1));
1349	}
1350      break;
1351
1352    default:
1353      abort();
1354    }
1355}
1356
1357/* These avoid problems when cross compiling.  If we do not
1358   go through all this hair then the optimizer will see
1359   invalid REG_EQUAL notes or in some cases none at all.  */
1360static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1361static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1362static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1363static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1364
1365#if HOST_BITS_PER_WIDE_INT == 64
1366#define GEN_HIGHINT64(__x)		GEN_INT ((__x) & 0xfffffc00)
1367#define GEN_INT64(__x)			GEN_INT (__x)
1368#else
1369#define GEN_HIGHINT64(__x) \
1370	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1371			      (__x) & 0xfffffc00, 0)
1372#define GEN_INT64(__x) \
1373	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1374			      (__x) & 0xffffffff, \
1375			      ((__x) & 0x80000000 \
1376			       ? 0xffffffff : 0))
1377#endif
1378
1379/* The optimizer is not to assume anything about exactly
1380   which bits are set for a HIGH, they are unspecified.
1381   Unfortunately this leads to many missed optimizations
1382   during CSE.  We mask out the non-HIGH bits, and matches
1383   a plain movdi, to alleviate this problem.  */
1384static void
1385sparc_emit_set_safe_HIGH64 (dest, val)
1386     rtx dest;
1387     HOST_WIDE_INT val;
1388{
1389  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1390}
1391
1392static rtx
1393gen_safe_SET64 (dest, val)
1394     rtx dest;
1395     HOST_WIDE_INT val;
1396{
1397  return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1398}
1399
1400static rtx
1401gen_safe_OR64 (src, val)
1402     rtx src;
1403     HOST_WIDE_INT val;
1404{
1405  return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1406}
1407
1408static rtx
1409gen_safe_XOR64 (src, val)
1410     rtx src;
1411     HOST_WIDE_INT val;
1412{
1413  return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1414}
1415
1416/* Worker routines for 64-bit constant formation on arch64.
1417   One of the key things to be doing in these emissions is
1418   to create as many temp REGs as possible.  This makes it
1419   possible for half-built constants to be used later when
1420   such values are similar to something required later on.
1421   Without doing this, the optimizer cannot see such
1422   opportunities.  */
1423
1424static void sparc_emit_set_const64_quick1
1425	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1426
1427static void
1428sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1429  rtx op0;
1430  rtx temp;
1431  unsigned HOST_WIDE_INT low_bits;
1432  int is_neg;
1433{
1434  unsigned HOST_WIDE_INT high_bits;
1435
1436  if (is_neg)
1437    high_bits = (~low_bits) & 0xffffffff;
1438  else
1439    high_bits = low_bits;
1440
1441  sparc_emit_set_safe_HIGH64 (temp, high_bits);
1442  if (!is_neg)
1443    {
1444      emit_insn (gen_rtx_SET (VOIDmode, op0,
1445			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1446    }
1447  else
1448    {
1449      /* If we are XOR'ing with -1, then we should emit a one's complement
1450	 instead.  This way the combiner will notice logical operations
1451	 such as ANDN later on and substitute.  */
1452      if ((low_bits & 0x3ff) == 0x3ff)
1453	{
1454	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1455				  gen_rtx_NOT (DImode, temp)));
1456	}
1457      else
1458	{
1459	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1460				  gen_safe_XOR64 (temp,
1461						  (-0x400 | (low_bits & 0x3ff)))));
1462	}
1463    }
1464}
1465
1466static void sparc_emit_set_const64_quick2
1467	PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1468	       unsigned HOST_WIDE_INT, int));
1469
1470static void
1471sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1472  rtx op0;
1473  rtx temp;
1474  unsigned HOST_WIDE_INT high_bits;
1475  unsigned HOST_WIDE_INT low_immediate;
1476  int shift_count;
1477{
1478  rtx temp2 = op0;
1479
1480  if ((high_bits & 0xfffffc00) != 0)
1481    {
1482      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1483      if ((high_bits & ~0xfffffc00) != 0)
1484	emit_insn (gen_rtx_SET (VOIDmode, op0,
1485				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1486      else
1487	temp2 = temp;
1488    }
1489  else
1490    {
1491      emit_insn (gen_safe_SET64 (temp, high_bits));
1492      temp2 = temp;
1493    }
1494
1495  /* Now shift it up into place. */
1496  emit_insn (gen_rtx_SET (VOIDmode, op0,
1497			  gen_rtx_ASHIFT (DImode, temp2,
1498					  GEN_INT (shift_count))));
1499
1500  /* If there is a low immediate part piece, finish up by
1501     putting that in as well.  */
1502  if (low_immediate != 0)
1503    emit_insn (gen_rtx_SET (VOIDmode, op0,
1504			    gen_safe_OR64 (op0, low_immediate)));
1505}
1506
1507static void sparc_emit_set_const64_longway
1508	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1509
1510/* Full 64-bit constant decomposition.  Even though this is the
1511   'worst' case, we still optimize a few things away.  */
1512static void
1513sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1514     rtx op0;
1515     rtx temp;
1516     unsigned HOST_WIDE_INT high_bits;
1517     unsigned HOST_WIDE_INT low_bits;
1518{
1519  rtx sub_temp;
1520
1521  if (reload_in_progress || reload_completed)
1522    sub_temp = op0;
1523  else
1524    sub_temp = gen_reg_rtx (DImode);
1525
1526  if ((high_bits & 0xfffffc00) != 0)
1527    {
1528      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1529      if ((high_bits & ~0xfffffc00) != 0)
1530	emit_insn (gen_rtx_SET (VOIDmode,
1531				sub_temp,
1532				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1533      else
1534	sub_temp = temp;
1535    }
1536  else
1537    {
1538      emit_insn (gen_safe_SET64 (temp, high_bits));
1539      sub_temp = temp;
1540    }
1541
1542  if (!reload_in_progress && !reload_completed)
1543    {
1544      rtx temp2 = gen_reg_rtx (DImode);
1545      rtx temp3 = gen_reg_rtx (DImode);
1546      rtx temp4 = gen_reg_rtx (DImode);
1547
1548      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1549			      gen_rtx_ASHIFT (DImode, sub_temp,
1550					      GEN_INT (32))));
1551
1552      sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1553      if ((low_bits & ~0xfffffc00) != 0)
1554	{
1555	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1556				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1557	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1558				  gen_rtx_PLUS (DImode, temp4, temp3)));
1559	}
1560      else
1561	{
1562	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1563				  gen_rtx_PLUS (DImode, temp4, temp2)));
1564	}
1565    }
1566  else
1567    {
1568      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1569      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1570      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1571      int to_shift = 12;
1572
1573      /* We are in the middle of reload, so this is really
1574	 painful.  However we do still make an attempt to
1575	 avoid emitting truly stupid code.  */
1576      if (low1 != const0_rtx)
1577	{
1578	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1579				  gen_rtx_ASHIFT (DImode, sub_temp,
1580						  GEN_INT (to_shift))));
1581	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1582				  gen_rtx_IOR (DImode, op0, low1)));
1583	  sub_temp = op0;
1584	  to_shift = 12;
1585	}
1586      else
1587	{
1588	  to_shift += 12;
1589	}
1590      if (low2 != const0_rtx)
1591	{
1592	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1593				  gen_rtx_ASHIFT (DImode, sub_temp,
1594						  GEN_INT (to_shift))));
1595	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1596				  gen_rtx_IOR (DImode, op0, low2)));
1597	  sub_temp = op0;
1598	  to_shift = 8;
1599	}
1600      else
1601	{
1602	  to_shift += 8;
1603	}
1604      emit_insn (gen_rtx_SET (VOIDmode, op0,
1605			      gen_rtx_ASHIFT (DImode, sub_temp,
1606					      GEN_INT (to_shift))));
1607      if (low3 != const0_rtx)
1608	emit_insn (gen_rtx_SET (VOIDmode, op0,
1609				gen_rtx_IOR (DImode, op0, low3)));
1610      /* phew... */
1611    }
1612}
1613
1614/* Analyze a 64-bit constant for certain properties. */
1615static void analyze_64bit_constant
1616	PROTO((unsigned HOST_WIDE_INT,
1617	       unsigned HOST_WIDE_INT,
1618	       int *, int *, int *));
1619
1620static void
1621analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1622     unsigned HOST_WIDE_INT high_bits, low_bits;
1623     int *hbsp, *lbsp, *abbasp;
1624{
1625  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1626  int i;
1627
1628  lowest_bit_set = highest_bit_set = -1;
1629  i = 0;
1630  do
1631    {
1632      if ((lowest_bit_set == -1)
1633	  && ((low_bits >> i) & 1))
1634	lowest_bit_set = i;
1635      if ((highest_bit_set == -1)
1636	  && ((high_bits >> (32 - i - 1)) & 1))
1637	highest_bit_set = (64 - i - 1);
1638    }
1639  while (++i < 32
1640	 && ((highest_bit_set == -1)
1641	     || (lowest_bit_set == -1)));
1642  if (i == 32)
1643    {
1644      i = 0;
1645      do
1646	{
1647	  if ((lowest_bit_set == -1)
1648	      && ((high_bits >> i) & 1))
1649	    lowest_bit_set = i + 32;
1650	  if ((highest_bit_set == -1)
1651	      && ((low_bits >> (32 - i - 1)) & 1))
1652	    highest_bit_set = 32 - i - 1;
1653	}
1654      while (++i < 32
1655	     && ((highest_bit_set == -1)
1656		 || (lowest_bit_set == -1)));
1657    }
1658  /* If there are no bits set this should have gone out
1659     as one instruction!  */
1660  if (lowest_bit_set == -1
1661      || highest_bit_set == -1)
1662    abort ();
1663  all_bits_between_are_set = 1;
1664  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1665    {
1666      if (i < 32)
1667	{
1668	  if ((low_bits & (1 << i)) != 0)
1669	    continue;
1670	}
1671      else
1672	{
1673	  if ((high_bits & (1 << (i - 32))) != 0)
1674	    continue;
1675	}
1676      all_bits_between_are_set = 0;
1677      break;
1678    }
1679  *hbsp = highest_bit_set;
1680  *lbsp = lowest_bit_set;
1681  *abbasp = all_bits_between_are_set;
1682}
1683
1684static int const64_is_2insns
1685	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1686
1687static int
1688const64_is_2insns (high_bits, low_bits)
1689     unsigned HOST_WIDE_INT high_bits, low_bits;
1690{
1691  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1692
1693  if (high_bits == 0
1694      || high_bits == 0xffffffff)
1695    return 1;
1696
1697  analyze_64bit_constant (high_bits, low_bits,
1698			  &highest_bit_set, &lowest_bit_set,
1699			  &all_bits_between_are_set);
1700
1701  if ((highest_bit_set == 63
1702       || lowest_bit_set == 0)
1703      && all_bits_between_are_set != 0)
1704    return 1;
1705
1706  if ((highest_bit_set - lowest_bit_set) < 21)
1707    return 1;
1708
1709  return 0;
1710}
1711
1712static unsigned HOST_WIDE_INT create_simple_focus_bits
1713	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1714	       int, int));
1715
1716static unsigned HOST_WIDE_INT
1717create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1718     unsigned HOST_WIDE_INT high_bits, low_bits;
1719     int lowest_bit_set, shift;
1720{
1721  HOST_WIDE_INT hi, lo;
1722
1723  if (lowest_bit_set < 32)
1724    {
1725      lo = (low_bits >> lowest_bit_set) << shift;
1726      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1727    }
1728  else
1729    {
1730      lo = 0;
1731      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1732    }
1733  if (hi & lo)
1734    abort ();
1735  return (hi | lo);
1736}
1737
1738/* Here we are sure to be arch64 and this is an integer constant
1739   being loaded into a register.  Emit the most efficient
1740   insn sequence possible.  Detection of all the 1-insn cases
1741   has been done already.  */
1742void
1743sparc_emit_set_const64 (op0, op1)
1744     rtx op0;
1745     rtx op1;
1746{
1747  unsigned HOST_WIDE_INT high_bits, low_bits;
1748  int lowest_bit_set, highest_bit_set;
1749  int all_bits_between_are_set;
1750  rtx temp;
1751
1752  /* Sanity check that we know what we are working with.  */
1753  if (! TARGET_ARCH64
1754      || GET_CODE (op0) != REG
1755      || (REGNO (op0) >= SPARC_FIRST_FP_REG
1756	  && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1757    abort ();
1758
1759  if (reload_in_progress || reload_completed)
1760    temp = op0;
1761  else
1762    temp = gen_reg_rtx (DImode);
1763
1764  if (GET_CODE (op1) != CONST_DOUBLE
1765      && GET_CODE (op1) != CONST_INT)
1766    {
1767      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1768      return;
1769    }
1770
1771  if (GET_CODE (op1) == CONST_DOUBLE)
1772    {
1773#if HOST_BITS_PER_WIDE_INT == 64
1774      high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1775      low_bits  = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1776#else
1777      high_bits = CONST_DOUBLE_HIGH (op1);
1778      low_bits = CONST_DOUBLE_LOW (op1);
1779#endif
1780    }
1781  else
1782    {
1783#if HOST_BITS_PER_WIDE_INT == 64
1784      high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1785      low_bits = (INTVAL (op1) & 0xffffffff);
1786#else
1787      high_bits = ((INTVAL (op1) < 0) ?
1788		   0xffffffff :
1789		   0x00000000);
1790      low_bits = INTVAL (op1);
1791#endif
1792    }
1793
1794  /* low_bits	bits 0  --> 31
1795     high_bits	bits 32 --> 63  */
1796
1797  analyze_64bit_constant (high_bits, low_bits,
1798			  &highest_bit_set, &lowest_bit_set,
1799			  &all_bits_between_are_set);
1800
1801  /* First try for a 2-insn sequence.  */
1802
1803  /* These situations are preferred because the optimizer can
1804   * do more things with them:
1805   * 1) mov	-1, %reg
1806   *    sllx	%reg, shift, %reg
1807   * 2) mov	-1, %reg
1808   *    srlx	%reg, shift, %reg
1809   * 3) mov	some_small_const, %reg
1810   *    sllx	%reg, shift, %reg
1811   */
1812  if (((highest_bit_set == 63
1813	|| lowest_bit_set == 0)
1814       && all_bits_between_are_set != 0)
1815      || ((highest_bit_set - lowest_bit_set) < 12))
1816    {
1817      HOST_WIDE_INT the_const = -1;
1818      int shift = lowest_bit_set;
1819
1820      if ((highest_bit_set != 63
1821	   && lowest_bit_set != 0)
1822	  || all_bits_between_are_set == 0)
1823	{
1824	  the_const =
1825	    create_simple_focus_bits (high_bits, low_bits,
1826				      lowest_bit_set, 0);
1827	}
1828      else if (lowest_bit_set == 0)
1829	shift = -(63 - highest_bit_set);
1830
1831      if (! SPARC_SIMM13_P (the_const))
1832	abort ();
1833
1834      emit_insn (gen_safe_SET64 (temp, the_const));
1835      if (shift > 0)
1836	emit_insn (gen_rtx_SET (VOIDmode,
1837				op0,
1838				gen_rtx_ASHIFT (DImode,
1839						temp,
1840						GEN_INT (shift))));
1841      else if (shift < 0)
1842	emit_insn (gen_rtx_SET (VOIDmode,
1843				op0,
1844				gen_rtx_LSHIFTRT (DImode,
1845						  temp,
1846						  GEN_INT (-shift))));
1847      else
1848	abort ();
1849      return;
1850    }
1851
1852  /* Now a range of 22 or less bits set somewhere.
1853   * 1) sethi	%hi(focus_bits), %reg
1854   *    sllx	%reg, shift, %reg
1855   * 2) sethi	%hi(focus_bits), %reg
1856   *    srlx	%reg, shift, %reg
1857   */
1858  if ((highest_bit_set - lowest_bit_set) < 21)
1859    {
1860      unsigned HOST_WIDE_INT focus_bits =
1861	create_simple_focus_bits (high_bits, low_bits,
1862				  lowest_bit_set, 10);
1863
1864      if (! SPARC_SETHI_P (focus_bits))
1865	 abort ();
1866
1867      sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1868
1869      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1870      if (lowest_bit_set < 10)
1871	emit_insn (gen_rtx_SET (VOIDmode,
1872				op0,
1873				gen_rtx_LSHIFTRT (DImode, temp,
1874						  GEN_INT (10 - lowest_bit_set))));
1875      else if (lowest_bit_set > 10)
1876	emit_insn (gen_rtx_SET (VOIDmode,
1877				op0,
1878				gen_rtx_ASHIFT (DImode, temp,
1879						GEN_INT (lowest_bit_set - 10))));
1880      else
1881	abort ();
1882      return;
1883    }
1884
1885  /* 1) sethi	%hi(low_bits), %reg
1886   *    or	%reg, %lo(low_bits), %reg
1887   * 2) sethi	%hi(~low_bits), %reg
1888   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1889   */
1890  if (high_bits == 0
1891      || high_bits == 0xffffffff)
1892    {
1893      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1894				     (high_bits == 0xffffffff));
1895      return;
1896    }
1897
1898  /* Now, try 3-insn sequences.  */
1899
1900  /* 1) sethi	%hi(high_bits), %reg
1901   *    or	%reg, %lo(high_bits), %reg
1902   *    sllx	%reg, 32, %reg
1903   */
1904  if (low_bits == 0)
1905    {
1906      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1907      return;
1908    }
1909
1910  /* We may be able to do something quick
1911     when the constant is negated, so try that.  */
1912  if (const64_is_2insns ((~high_bits) & 0xffffffff,
1913			 (~low_bits) & 0xfffffc00))
1914    {
1915      /* NOTE: The trailing bits get XOR'd so we need the
1916	 non-negated bits, not the negated ones.  */
1917      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1918
1919      if ((((~high_bits) & 0xffffffff) == 0
1920	   && ((~low_bits) & 0x80000000) == 0)
1921	  || (((~high_bits) & 0xffffffff) == 0xffffffff
1922	      && ((~low_bits) & 0x80000000) != 0))
1923	{
1924	  int fast_int = (~low_bits & 0xffffffff);
1925
1926	  if ((SPARC_SETHI_P (fast_int)
1927	       && (~high_bits & 0xffffffff) == 0)
1928	      || SPARC_SIMM13_P (fast_int))
1929	    emit_insn (gen_safe_SET64 (temp, fast_int));
1930	  else
1931	    sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1932	}
1933      else
1934	{
1935	  rtx negated_const;
1936#if HOST_BITS_PER_WIDE_INT == 64
1937	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1938				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1939#else
1940	  negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
1941						(~low_bits) & 0xfffffc00,
1942						(~high_bits) & 0xffffffff);
1943#endif
1944	  sparc_emit_set_const64 (temp, negated_const);
1945	}
1946
1947      /* If we are XOR'ing with -1, then we should emit a one's complement
1948	 instead.  This way the combiner will notice logical operations
1949	 such as ANDN later on and substitute.  */
1950      if (trailing_bits == 0x3ff)
1951	{
1952	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1953				  gen_rtx_NOT (DImode, temp)));
1954	}
1955      else
1956	{
1957	  emit_insn (gen_rtx_SET (VOIDmode,
1958				  op0,
1959				  gen_safe_XOR64 (temp,
1960						  (-0x400 | trailing_bits))));
1961	}
1962      return;
1963    }
1964
1965  /* 1) sethi	%hi(xxx), %reg
1966   *    or	%reg, %lo(xxx), %reg
1967   *	sllx	%reg, yyy, %reg
1968   *
1969   * ??? This is just a generalized version of the low_bits==0
1970   * thing above, FIXME...
1971   */
1972  if ((highest_bit_set - lowest_bit_set) < 32)
1973    {
1974      unsigned HOST_WIDE_INT focus_bits =
1975	create_simple_focus_bits (high_bits, low_bits,
1976				  lowest_bit_set, 0);
1977
1978      /* We can't get here in this state.  */
1979      if (highest_bit_set < 32
1980	  || lowest_bit_set >= 32)
1981	abort ();
1982
1983      /* So what we know is that the set bits straddle the
1984	 middle of the 64-bit word.  */
1985      sparc_emit_set_const64_quick2 (op0, temp,
1986				     focus_bits, 0,
1987				     lowest_bit_set);
1988      return;
1989    }
1990
1991  /* 1) sethi	%hi(high_bits), %reg
1992   *    or	%reg, %lo(high_bits), %reg
1993   *    sllx	%reg, 32, %reg
1994   *	or	%reg, low_bits, %reg
1995   */
1996  if (SPARC_SIMM13_P(low_bits)
1997      && ((int)low_bits > 0))
1998    {
1999      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2000      return;
2001    }
2002
2003  /* The easiest way when all else fails, is full decomposition. */
2004#if 0
2005  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2006	  high_bits, low_bits, ~high_bits, ~low_bits);
2007#endif
2008  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2009}
2010
2011/* X and Y are two things to compare using CODE.  Emit the compare insn and
2012   return the rtx for the cc reg in the proper mode.  */
2013
2014rtx
2015gen_compare_reg (code, x, y)
2016     enum rtx_code code;
2017     rtx x, y;
2018{
2019  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2020  rtx cc_reg;
2021
2022  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2023     fcc regs (cse can't tell they're really call clobbered regs and will
2024     remove a duplicate comparison even if there is an intervening function
2025     call - it will then try to reload the cc reg via an int reg which is why
2026     we need the movcc patterns).  It is possible to provide the movcc
2027     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2028     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2029     to tell cse that CCFPE mode registers (even pseudos) are call
2030     clobbered.  */
2031
2032  /* ??? This is an experiment.  Rather than making changes to cse which may
2033     or may not be easy/clean, we do our own cse.  This is possible because
2034     we will generate hard registers.  Cse knows they're call clobbered (it
2035     doesn't know the same thing about pseudos). If we guess wrong, no big
2036     deal, but if we win, great!  */
2037
2038  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2039#if 1 /* experiment */
2040    {
2041      int reg;
2042      /* We cycle through the registers to ensure they're all exercised.  */
2043      static int next_fcc_reg = 0;
2044      /* Previous x,y for each fcc reg.  */
2045      static rtx prev_args[4][2];
2046
2047      /* Scan prev_args for x,y.  */
2048      for (reg = 0; reg < 4; reg++)
2049	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2050	  break;
2051      if (reg == 4)
2052	{
2053	  reg = next_fcc_reg;
2054	  prev_args[reg][0] = x;
2055	  prev_args[reg][1] = y;
2056	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2057	}
2058      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2059    }
2060#else
2061    cc_reg = gen_reg_rtx (mode);
2062#endif /* ! experiment */
2063  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2064    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2065  else
2066    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2067
2068  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2069			  gen_rtx_COMPARE (mode, x, y)));
2070
2071  return cc_reg;
2072}
2073
2074/* This function is used for v9 only.
2075   CODE is the code for an Scc's comparison.
2076   OPERANDS[0] is the target of the Scc insn.
2077   OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2078   been generated yet).
2079
2080   This function is needed to turn
2081
2082	   (set (reg:SI 110)
2083	       (gt (reg:CCX 100 %icc)
2084	           (const_int 0)))
2085   into
2086	   (set (reg:SI 110)
2087	       (gt:DI (reg:CCX 100 %icc)
2088	           (const_int 0)))
2089
2090   IE: The instruction recognizer needs to see the mode of the comparison to
2091   find the right instruction. We could use "gt:DI" right in the
2092   define_expand, but leaving it out allows us to handle DI, SI, etc.
2093
2094   We refer to the global sparc compare operands sparc_compare_op0 and
2095   sparc_compare_op1.  */
2096
2097int
2098gen_v9_scc (compare_code, operands)
2099     enum rtx_code compare_code;
2100     register rtx *operands;
2101{
2102  rtx temp, op0, op1;
2103
2104  if (! TARGET_ARCH64
2105      && (GET_MODE (sparc_compare_op0) == DImode
2106	  || GET_MODE (operands[0]) == DImode))
2107    return 0;
2108
2109  /* Handle the case where operands[0] == sparc_compare_op0.
2110     We "early clobber" the result.  */
2111  if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2112    {
2113      op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2114      emit_move_insn (op0, sparc_compare_op0);
2115    }
2116  else
2117    op0 = sparc_compare_op0;
2118  /* For consistency in the following.  */
2119  op1 = sparc_compare_op1;
2120
2121  /* Try to use the movrCC insns.  */
2122  if (TARGET_ARCH64
2123      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2124      && op1 == const0_rtx
2125      && v9_regcmp_p (compare_code))
2126    {
2127      /* Special case for op0 != 0.  This can be done with one instruction if
2128	 operands[0] == sparc_compare_op0.  We don't assume they are equal
2129	 now though.  */
2130
2131      if (compare_code == NE
2132	  && GET_MODE (operands[0]) == DImode
2133	  && GET_MODE (op0) == DImode)
2134	{
2135	  emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2136	  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2137			      gen_rtx_IF_THEN_ELSE (DImode,
2138				       gen_rtx_fmt_ee (compare_code, DImode,
2139						       op0, const0_rtx),
2140				       const1_rtx,
2141				       operands[0])));
2142	  return 1;
2143	}
2144
2145      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2146      if (GET_MODE (op0) != DImode)
2147	{
2148	  temp = gen_reg_rtx (DImode);
2149	  convert_move (temp, op0, 0);
2150	}
2151      else
2152	temp = op0;
2153      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2154			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2155				   gen_rtx_fmt_ee (compare_code, DImode,
2156						   temp, const0_rtx),
2157				   const1_rtx,
2158				   operands[0])));
2159      return 1;
2160    }
2161  else
2162    {
2163      operands[1] = gen_compare_reg (compare_code, op0, op1);
2164
2165      switch (GET_MODE (operands[1]))
2166	{
2167	  case CCmode :
2168	  case CCXmode :
2169	  case CCFPEmode :
2170	  case CCFPmode :
2171	    break;
2172	  default :
2173	    abort ();
2174	}
2175      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2176      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2177			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2178				   gen_rtx_fmt_ee (compare_code,
2179						   GET_MODE (operands[1]),
2180						   operands[1], const0_rtx),
2181				    const1_rtx, operands[0])));
2182      return 1;
2183    }
2184}
2185
2186/* Emit a conditional jump insn for the v9 architecture using comparison code
2187   CODE and jump target LABEL.
2188   This function exists to take advantage of the v9 brxx insns.  */
2189
2190void
2191emit_v9_brxx_insn (code, op0, label)
2192     enum rtx_code code;
2193     rtx op0, label;
2194{
2195  emit_jump_insn (gen_rtx_SET (VOIDmode,
2196			   pc_rtx,
2197			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2198				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2199						    op0, const0_rtx),
2200				    gen_rtx_LABEL_REF (VOIDmode, label),
2201				    pc_rtx)));
2202}
2203
2204/* Return nonzero if a return peephole merging return with
2205   setting of output register is ok.  */
2206int
2207leaf_return_peephole_ok ()
2208{
2209  return (actual_fsize == 0);
2210}
2211
2212/* Return nonzero if TRIAL can go into the function epilogue's
2213   delay slot.  SLOT is the slot we are trying to fill.  */
2214
2215int
2216eligible_for_epilogue_delay (trial, slot)
2217     rtx trial;
2218     int slot;
2219{
2220  rtx pat, src;
2221
2222  if (slot >= 1)
2223    return 0;
2224
2225  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2226    return 0;
2227
2228  if (get_attr_length (trial) != 1)
2229    return 0;
2230
2231  /* If %g0 is live, there are lots of things we can't handle.
2232     Rather than trying to find them all now, let's punt and only
2233     optimize things as necessary.  */
2234  if (TARGET_LIVE_G0)
2235    return 0;
2236
2237  /* In the case of a true leaf function, anything can go into the delay slot.
2238     A delay slot only exists however if the frame size is zero, otherwise
2239     we will put an insn to adjust the stack after the return.  */
2240  if (current_function_uses_only_leaf_regs)
2241    {
2242      if (leaf_return_peephole_ok ())
2243	return ((get_attr_in_uncond_branch_delay (trial)
2244		 == IN_BRANCH_DELAY_TRUE));
2245      return 0;
2246    }
2247
2248  /* If only trivial `restore' insns work, nothing can go in the
2249     delay slot.  */
2250  else if (TARGET_BROKEN_SAVERESTORE)
2251    return 0;
2252
2253  pat = PATTERN (trial);
2254
2255  /* Otherwise, only operations which can be done in tandem with
2256     a `restore' insn can go into the delay slot.  */
2257  if (GET_CODE (SET_DEST (pat)) != REG
2258      || REGNO (SET_DEST (pat)) >= 32
2259      || REGNO (SET_DEST (pat)) < 24)
2260    return 0;
2261
2262  /* The set of insns matched here must agree precisely with the set of
2263     patterns paired with a RETURN in sparc.md.  */
2264
2265  src = SET_SRC (pat);
2266
2267  /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64.  */
2268  if (arith_operand (src, GET_MODE (src)))
2269    {
2270      if (TARGET_ARCH64)
2271        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2272      else
2273        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2274    }
2275
2276  /* This matches "*return_di".  */
2277  else if (arith_double_operand (src, GET_MODE (src)))
2278    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2279
2280  /* This matches "*return_sf_no_fpu".  */
2281  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2282	   && register_operand (src, SFmode))
2283    return 1;
2284
2285  /* This matches "*return_addsi".  */
2286  else if (GET_CODE (src) == PLUS
2287	   && arith_operand (XEXP (src, 0), SImode)
2288	   && arith_operand (XEXP (src, 1), SImode)
2289	   && (register_operand (XEXP (src, 0), SImode)
2290	       || register_operand (XEXP (src, 1), SImode)))
2291    return 1;
2292
2293  /* This matches "*return_adddi".  */
2294  else if (GET_CODE (src) == PLUS
2295	   && arith_double_operand (XEXP (src, 0), DImode)
2296	   && arith_double_operand (XEXP (src, 1), DImode)
2297	   && (register_operand (XEXP (src, 0), DImode)
2298	       || register_operand (XEXP (src, 1), DImode)))
2299    return 1;
2300
2301  return 0;
2302}
2303
2304static int
2305check_return_regs (x)
2306     rtx x;
2307{
2308  switch (GET_CODE (x))
2309    {
2310    case REG:
2311      return IN_OR_GLOBAL_P (x);
2312
2313    case CONST_INT:
2314    case CONST_DOUBLE:
2315    case CONST:
2316    case SYMBOL_REF:
2317    case LABEL_REF:
2318    return 1;
2319
2320    case SET:
2321    case IOR:
2322    case AND:
2323    case XOR:
2324    case PLUS:
2325    case MINUS:
2326      if (check_return_regs (XEXP (x, 1)) == 0)
2327  return 0;
2328    case NOT:
2329    case NEG:
2330    case MEM:
2331      return check_return_regs (XEXP (x, 0));
2332
2333    default:
2334      return 0;
2335    }
2336
2337}
2338
2339/* Return 1 if TRIAL references only in and global registers.  */
2340int
2341eligible_for_return_delay (trial)
2342     rtx trial;
2343{
2344  if (GET_CODE (PATTERN (trial)) != SET)
2345    return 0;
2346
2347  return check_return_regs (PATTERN (trial));
2348}
2349
2350int
2351short_branch (uid1, uid2)
2352     int uid1, uid2;
2353{
2354  unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2355  if (delta + 1024 < 2048)
2356    return 1;
2357  /* warning ("long branch, distance %d", delta); */
2358  return 0;
2359}
2360
2361/* Return non-zero if REG is not used after INSN.
2362   We assume REG is a reload reg, and therefore does
2363   not live past labels or calls or jumps.  */
2364int
2365reg_unused_after (reg, insn)
2366     rtx reg;
2367     rtx insn;
2368{
2369  enum rtx_code code, prev_code = UNKNOWN;
2370
2371  while ((insn = NEXT_INSN (insn)))
2372    {
2373      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2374	return 1;
2375
2376      code = GET_CODE (insn);
2377      if (GET_CODE (insn) == CODE_LABEL)
2378	return 1;
2379
2380      if (GET_RTX_CLASS (code) == 'i')
2381	{
2382	  rtx set = single_set (insn);
2383	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2384	  if (set && in_src)
2385	    return 0;
2386	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2387	    return 1;
2388	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2389	    return 0;
2390	}
2391      prev_code = code;
2392    }
2393  return 1;
2394}
2395
2396/* The table we use to reference PIC data.  */
2397static rtx global_offset_table;
2398
2399/* The function we use to get at it.  */
2400static rtx get_pc_symbol;
2401static char get_pc_symbol_name[256];
2402
2403/* Ensure that we are not using patterns that are not OK with PIC.  */
2404
2405int
2406check_pic (i)
2407     int i;
2408{
2409  switch (flag_pic)
2410    {
2411    case 1:
2412      if (GET_CODE (recog_operand[i]) == SYMBOL_REF
2413	  || (GET_CODE (recog_operand[i]) == CONST
2414	      && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
2415		    && (XEXP (XEXP (recog_operand[i], 0), 0)
2416			== global_offset_table)
2417		    && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
2418			== CONST))))
2419	abort ();
2420    case 2:
2421    default:
2422      return 1;
2423    }
2424}
2425
2426/* Return true if X is an address which needs a temporary register when
2427   reloaded while generating PIC code.  */
2428
2429int
2430pic_address_needs_scratch (x)
2431     rtx x;
2432{
2433  if (GET_CODE (x) == LABEL_REF)
2434    return 1;
2435
2436  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2437  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2438      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2439      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2440      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2441    return 1;
2442
2443  return 0;
2444}
2445
2446/* Legitimize PIC addresses.  If the address is already position-independent,
2447   we return ORIG.  Newly generated position-independent addresses go into a
2448   reg.  This is REG if non zero, otherwise we allocate register(s) as
2449   necessary.  */
2450
2451rtx
2452legitimize_pic_address (orig, mode, reg)
2453     rtx orig;
2454     enum machine_mode mode ATTRIBUTE_UNUSED;
2455     rtx reg;
2456{
2457  if (GET_CODE (orig) == SYMBOL_REF)
2458    {
2459      rtx pic_ref, address;
2460      rtx insn;
2461
2462      if (reg == 0)
2463	{
2464	  if (reload_in_progress || reload_completed)
2465	    abort ();
2466	  else
2467	    reg = gen_reg_rtx (Pmode);
2468	}
2469
2470      if (flag_pic == 2)
2471	{
2472	  /* If not during reload, allocate another temp reg here for loading
2473	     in the address, so that these instructions can be optimized
2474	     properly.  */
2475	  rtx temp_reg = ((reload_in_progress || reload_completed)
2476			  ? reg : gen_reg_rtx (Pmode));
2477
2478	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2479	     won't get confused into thinking that these two instructions
2480	     are loading in the true address of the symbol.  If in the
2481	     future a PIC rtx exists, that should be used instead.  */
2482	  if (Pmode == SImode)
2483	    {
2484	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
2485	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2486	    }
2487	  else
2488	    {
2489	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
2490	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2491	    }
2492	  address = temp_reg;
2493	}
2494      else
2495	address = orig;
2496
2497      pic_ref = gen_rtx_MEM (Pmode,
2498			 gen_rtx_PLUS (Pmode,
2499				  pic_offset_table_rtx, address));
2500      current_function_uses_pic_offset_table = 1;
2501      RTX_UNCHANGING_P (pic_ref) = 1;
2502      insn = emit_move_insn (reg, pic_ref);
2503      /* Put a REG_EQUAL note on this insn, so that it can be optimized
2504	 by loop.  */
2505      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2506				  REG_NOTES (insn));
2507      return reg;
2508    }
2509  else if (GET_CODE (orig) == CONST)
2510    {
2511      rtx base, offset;
2512
2513      if (GET_CODE (XEXP (orig, 0)) == PLUS
2514	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2515	return orig;
2516
2517      if (reg == 0)
2518	{
2519	  if (reload_in_progress || reload_completed)
2520	    abort ();
2521	  else
2522	    reg = gen_reg_rtx (Pmode);
2523	}
2524
2525      if (GET_CODE (XEXP (orig, 0)) == PLUS)
2526	{
2527	  base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2528	  offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2529					 base == reg ? 0 : reg);
2530	}
2531      else
2532	abort ();
2533
2534      if (GET_CODE (offset) == CONST_INT)
2535	{
2536	  if (SMALL_INT (offset))
2537	    return plus_constant_for_output (base, INTVAL (offset));
2538	  else if (! reload_in_progress && ! reload_completed)
2539	    offset = force_reg (Pmode, offset);
2540	  else
2541	    /* If we reach here, then something is seriously wrong.  */
2542	    abort ();
2543	}
2544      return gen_rtx_PLUS (Pmode, base, offset);
2545    }
2546  else if (GET_CODE (orig) == LABEL_REF)
2547    /* ??? Why do we do this?  */
2548    /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2549       the register is live instead, in case it is eliminated.  */
2550    current_function_uses_pic_offset_table = 1;
2551
2552  return orig;
2553}
2554
2555/* Return the RTX for insns to set the PIC register.  */
2556
2557static rtx
2558pic_setup_code ()
2559{
2560  rtx seq;
2561
2562  start_sequence ();
2563  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2564			 get_pc_symbol));
2565  seq = gen_sequence ();
2566  end_sequence ();
2567
2568  return seq;
2569}
2570
2571/* Emit special PIC prologues and epilogues.  */
2572
2573void
2574finalize_pic ()
2575{
2576  /* Labels to get the PC in the prologue of this function.  */
2577  int orig_flag_pic = flag_pic;
2578  rtx insn;
2579
2580  if (current_function_uses_pic_offset_table == 0)
2581    return;
2582
2583  if (! flag_pic)
2584    abort ();
2585
2586  /* If we havn't emitted the special get_pc helper function, do so now.  */
2587  if (get_pc_symbol_name[0] == 0)
2588    {
2589      int align;
2590
2591      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2592      text_section ();
2593
2594      align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2595      if (align > 0)
2596	ASM_OUTPUT_ALIGN (asm_out_file, align);
2597      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2598      fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2599    }
2600
2601  /* Initialize every time through, since we can't easily
2602     know this to be permanent.  */
2603  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2604  get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2605  flag_pic = 0;
2606
2607  emit_insn_after (pic_setup_code (), get_insns ());
2608
2609  /* Insert the code in each nonlocal goto receiver.
2610     If you make changes here or to the nonlocal_goto_receiver
2611     pattern, make sure the unspec_volatile numbers still
2612     match.  */
2613  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2614    if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2615	&& XINT (PATTERN (insn), 1) == 5)
2616      emit_insn_after (pic_setup_code (), insn);
2617
2618  flag_pic = orig_flag_pic;
2619
2620  /* Need to emit this whether or not we obey regdecls,
2621     since setjmp/longjmp can cause life info to screw up.
2622     ??? In the case where we don't obey regdecls, this is not sufficient
2623     since we may not fall out the bottom.  */
2624  emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2625}
2626
2627/* Return 1 if RTX is a MEM which is known to be aligned to at
2628   least an 8 byte boundary.  */
2629
2630int
2631mem_min_alignment (mem, desired)
2632     rtx mem;
2633     int desired;
2634{
2635  rtx addr, base, offset;
2636
2637  /* If it's not a MEM we can't accept it.  */
2638  if (GET_CODE (mem) != MEM)
2639    return 0;
2640
2641  addr = XEXP (mem, 0);
2642  base = offset = NULL_RTX;
2643  if (GET_CODE (addr) == PLUS)
2644    {
2645      if (GET_CODE (XEXP (addr, 0)) == REG)
2646	{
2647	  base = XEXP (addr, 0);
2648
2649	  /* What we are saying here is that if the base
2650	     REG is aligned properly, the compiler will make
2651	     sure any REG based index upon it will be so
2652	     as well.  */
2653	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2654	    offset = XEXP (addr, 1);
2655	  else
2656	    offset = const0_rtx;
2657	}
2658    }
2659  else if (GET_CODE (addr) == REG)
2660    {
2661      base = addr;
2662      offset = const0_rtx;
2663    }
2664
2665  if (base != NULL_RTX)
2666    {
2667      int regno = REGNO (base);
2668
2669      if (regno != FRAME_POINTER_REGNUM
2670	  && regno != STACK_POINTER_REGNUM)
2671	{
2672	  /* Check if the compiler has recorded some information
2673	     about the alignment of the base REG.  If reload has
2674	     completed, we already matched with proper alignments.  */
2675	  if (((regno_pointer_align != NULL
2676		&& REGNO_POINTER_ALIGN (regno) >= desired)
2677	       || reload_completed)
2678	      && ((INTVAL (offset) & (desired - 1)) == 0))
2679	    return 1;
2680	}
2681      else
2682	{
2683	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2684	    return 1;
2685	}
2686    }
2687  else if (! TARGET_UNALIGNED_DOUBLES
2688	   || CONSTANT_P (addr)
2689	   || GET_CODE (addr) == LO_SUM)
2690    {
2691      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2692	 is true, in which case we can only assume that an access is aligned if
2693	 it is to a constant address, or the address involves a LO_SUM.  */
2694      return 1;
2695    }
2696
2697  /* An obviously unaligned address.  */
2698  return 0;
2699}
2700
2701
2702/* Vectors to keep interesting information about registers where it can easily
2703   be got.  We use to use the actual mode value as the bit number, but there
2704   are more than 32 modes now.  Instead we use two tables: one indexed by
2705   hard register number, and one indexed by mode.  */
2706
2707/* The purpose of sparc_mode_class is to shrink the range of modes so that
2708   they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
2709   mapped into one sparc_mode_class mode.  */
2710
2711enum sparc_mode_class {
2712  S_MODE, D_MODE, T_MODE, O_MODE,
2713  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2714  CC_MODE, CCFP_MODE
2715};
2716
2717/* Modes for single-word and smaller quantities.  */
2718#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2719
2720/* Modes for double-word and smaller quantities.  */
2721#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2722
2723/* Modes for quad-word and smaller quantities.  */
2724#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2725
2726/* Modes for single-float quantities.  We must allow any single word or
2727   smaller quantity.  This is because the fix/float conversion instructions
2728   take integer inputs/outputs from the float registers.  */
2729#define SF_MODES (S_MODES)
2730
2731/* Modes for double-float and smaller quantities.  */
2732#define DF_MODES (S_MODES | D_MODES)
2733
2734#define DF_MODES64 DF_MODES
2735
2736/* Modes for double-float only quantities.  */
2737#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
2738
2739/* Modes for double-float and larger quantities.  */
2740#define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2741
2742/* Modes for quad-float only quantities.  */
2743#define TF_ONLY_MODES (1 << (int) TF_MODE)
2744
2745/* Modes for quad-float and smaller quantities.  */
2746#define TF_MODES (DF_MODES | TF_ONLY_MODES)
2747
2748#define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2749
2750/* Modes for condition codes.  */
2751#define CC_MODES (1 << (int) CC_MODE)
2752#define CCFP_MODES (1 << (int) CCFP_MODE)
2753
2754/* Value is 1 if register/mode pair is acceptable on sparc.
2755   The funny mixture of D and T modes is because integer operations
2756   do not specially operate on tetra quantities, so non-quad-aligned
2757   registers can hold quadword quantities (except %o4 and %i4 because
2758   they cross fixed registers).  */
2759
2760/* This points to either the 32 bit or the 64 bit version.  */
2761int *hard_regno_mode_classes;
2762
2763static int hard_32bit_mode_classes[] = {
2764  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2765  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2766  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2767  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2768
2769  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2770  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2771  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2772  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2773
2774  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
2775     and none can hold SFmode/SImode values.  */
2776  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2777  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2778  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2779  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2780
2781  /* %fcc[0123] */
2782  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2783
2784  /* %icc */
2785  CC_MODES
2786};
2787
2788static int hard_64bit_mode_classes[] = {
2789  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2790  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2791  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2792  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2793
2794  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2795  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2796  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2797  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2798
2799  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
2800     and none can hold SFmode/SImode values.  */
2801  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2802  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2803  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2804  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2805
2806  /* %fcc[0123] */
2807  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2808
2809  /* %icc */
2810  CC_MODES
2811};
2812
2813int sparc_mode_class [NUM_MACHINE_MODES];
2814
2815enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2816
2817static void
2818sparc_init_modes ()
2819{
2820  int i;
2821
2822  for (i = 0; i < NUM_MACHINE_MODES; i++)
2823    {
2824      switch (GET_MODE_CLASS (i))
2825	{
2826	case MODE_INT:
2827	case MODE_PARTIAL_INT:
2828	case MODE_COMPLEX_INT:
2829	  if (GET_MODE_SIZE (i) <= 4)
2830	    sparc_mode_class[i] = 1 << (int) S_MODE;
2831	  else if (GET_MODE_SIZE (i) == 8)
2832	    sparc_mode_class[i] = 1 << (int) D_MODE;
2833	  else if (GET_MODE_SIZE (i) == 16)
2834	    sparc_mode_class[i] = 1 << (int) T_MODE;
2835	  else if (GET_MODE_SIZE (i) == 32)
2836	    sparc_mode_class[i] = 1 << (int) O_MODE;
2837	  else
2838	    sparc_mode_class[i] = 0;
2839	  break;
2840	case MODE_FLOAT:
2841	case MODE_COMPLEX_FLOAT:
2842	  if (GET_MODE_SIZE (i) <= 4)
2843	    sparc_mode_class[i] = 1 << (int) SF_MODE;
2844	  else if (GET_MODE_SIZE (i) == 8)
2845	    sparc_mode_class[i] = 1 << (int) DF_MODE;
2846	  else if (GET_MODE_SIZE (i) == 16)
2847	    sparc_mode_class[i] = 1 << (int) TF_MODE;
2848	  else if (GET_MODE_SIZE (i) == 32)
2849	    sparc_mode_class[i] = 1 << (int) OF_MODE;
2850	  else
2851	    sparc_mode_class[i] = 0;
2852	  break;
2853	case MODE_CC:
2854	default:
2855	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2856	     we must explicitly check for them here.  */
2857	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
2858	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2859	  else if (i == (int) CCmode || i == (int) CC_NOOVmode
2860		   || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2861	    sparc_mode_class[i] = 1 << (int) CC_MODE;
2862	  else
2863	    sparc_mode_class[i] = 0;
2864	  break;
2865	}
2866    }
2867
2868  if (TARGET_ARCH64)
2869    hard_regno_mode_classes = hard_64bit_mode_classes;
2870  else
2871    hard_regno_mode_classes = hard_32bit_mode_classes;
2872
2873  /* Initialize the array used by REGNO_REG_CLASS.  */
2874  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2875    {
2876      if (i < 16 && TARGET_V8PLUS)
2877	sparc_regno_reg_class[i] = I64_REGS;
2878      else if (i < 32)
2879	sparc_regno_reg_class[i] = GENERAL_REGS;
2880      else if (i < 64)
2881	sparc_regno_reg_class[i] = FP_REGS;
2882      else if (i < 96)
2883	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2884      else if (i < 100)
2885	sparc_regno_reg_class[i] = FPCC_REGS;
2886      else
2887	sparc_regno_reg_class[i] = NO_REGS;
2888    }
2889}
2890
2891/* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2892   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2893   v9 int regs as it simplifies the code.  */
2894
2895static int
2896save_regs (file, low, high, base, offset, n_regs, real_offset)
2897     FILE *file;
2898     int low, high;
2899     const char *base;
2900     int offset;
2901     int n_regs;
2902     int real_offset;
2903{
2904  int i;
2905
2906  if (TARGET_ARCH64 && high <= 32)
2907    {
2908      for (i = low; i < high; i++)
2909	{
2910	  if (regs_ever_live[i] && ! call_used_regs[i])
2911	    {
2912	      fprintf (file, "\tstx\t%s, [%s+%d]\n",
2913		       reg_names[i], base, offset + 4 * n_regs);
2914	      if (dwarf2out_do_frame ())
2915		dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2916	      n_regs += 2;
2917	    }
2918	}
2919    }
2920  else
2921    {
2922      for (i = low; i < high; i += 2)
2923	{
2924	  if (regs_ever_live[i] && ! call_used_regs[i])
2925	    {
2926	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2927		{
2928		  fprintf (file, "\tstd\t%s, [%s+%d]\n",
2929			   reg_names[i], base, offset + 4 * n_regs);
2930		  if (dwarf2out_do_frame ())
2931		    {
2932		      char *l = dwarf2out_cfi_label ();
2933		      dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
2934		      dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
2935		    }
2936		  n_regs += 2;
2937		}
2938	      else
2939		{
2940		  fprintf (file, "\tst\t%s, [%s+%d]\n",
2941			   reg_names[i], base, offset + 4 * n_regs);
2942		  if (dwarf2out_do_frame ())
2943		    dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2944		  n_regs += 2;
2945		}
2946	    }
2947	  else
2948	    {
2949	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2950		{
2951		  fprintf (file, "\tst\t%s, [%s+%d]\n",
2952			   reg_names[i+1], base, offset + 4 * n_regs + 4);
2953		  if (dwarf2out_do_frame ())
2954		    dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
2955		  n_regs += 2;
2956		}
2957	    }
2958	}
2959    }
2960  return n_regs;
2961}
2962
2963/* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2964
2965   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2966   v9 int regs as it simplifies the code.  */
2967
2968static int
2969restore_regs (file, low, high, base, offset, n_regs)
2970     FILE *file;
2971     int low, high;
2972     const char *base;
2973     int offset;
2974     int n_regs;
2975{
2976  int i;
2977
2978  if (TARGET_ARCH64 && high <= 32)
2979    {
2980      for (i = low; i < high; i++)
2981	{
2982	  if (regs_ever_live[i] && ! call_used_regs[i])
2983	    fprintf (file, "\tldx\t[%s+%d], %s\n",
2984	      base, offset + 4 * n_regs, reg_names[i]),
2985	    n_regs += 2;
2986	}
2987    }
2988  else
2989    {
2990      for (i = low; i < high; i += 2)
2991	{
2992	  if (regs_ever_live[i] && ! call_used_regs[i])
2993	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2994	      fprintf (file, "\tldd\t[%s+%d], %s\n",
2995		       base, offset + 4 * n_regs, reg_names[i]),
2996	      n_regs += 2;
2997	    else
2998	      fprintf (file, "\tld\t[%s+%d],%s\n",
2999		       base, offset + 4 * n_regs, reg_names[i]),
3000	      n_regs += 2;
3001	  else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3002	    fprintf (file, "\tld\t[%s+%d],%s\n",
3003		     base, offset + 4 * n_regs + 4, reg_names[i+1]),
3004	    n_regs += 2;
3005	}
3006    }
3007  return n_regs;
3008}
3009
3010/* Static variables we want to share between prologue and epilogue.  */
3011
3012/* Number of live general or floating point registers needed to be saved
3013   (as 4-byte quantities).  This is only done if TARGET_EPILOGUE.  */
3014static int num_gfregs;
3015
3016/* Compute the frame size required by the function.  This function is called
3017   during the reload pass and also by output_function_prologue().  */
3018
3019int
3020compute_frame_size (size, leaf_function)
3021     int size;
3022     int leaf_function;
3023{
3024  int n_regs = 0, i;
3025  int outgoing_args_size = (current_function_outgoing_args_size
3026			    + REG_PARM_STACK_SPACE (current_function_decl));
3027
3028  if (TARGET_EPILOGUE)
3029    {
3030      /* N_REGS is the number of 4-byte regs saved thus far.  This applies
3031	 even to v9 int regs to be consistent with save_regs/restore_regs.  */
3032
3033      if (TARGET_ARCH64)
3034	{
3035	  for (i = 0; i < 8; i++)
3036	    if (regs_ever_live[i] && ! call_used_regs[i])
3037	      n_regs += 2;
3038	}
3039      else
3040	{
3041	  for (i = 0; i < 8; i += 2)
3042	    if ((regs_ever_live[i] && ! call_used_regs[i])
3043		|| (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3044	      n_regs += 2;
3045	}
3046
3047      for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3048	if ((regs_ever_live[i] && ! call_used_regs[i])
3049	    || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3050	  n_regs += 2;
3051    }
3052
3053  /* Set up values for use in `function_epilogue'.  */
3054  num_gfregs = n_regs;
3055
3056  if (leaf_function && n_regs == 0
3057      && size == 0 && current_function_outgoing_args_size == 0)
3058    {
3059      actual_fsize = apparent_fsize = 0;
3060    }
3061  else
3062    {
3063      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3064         The stack bias (if any) is taken out to undo its effects.  */
3065      apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3066      apparent_fsize += n_regs * 4;
3067      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3068    }
3069
3070  /* Make sure nothing can clobber our register windows.
3071     If a SAVE must be done, or there is a stack-local variable,
3072     the register window area must be allocated.
3073     ??? For v8 we apparently need an additional 8 bytes of reserved space.  */
3074  if (leaf_function == 0 || size > 0)
3075    actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3076
3077  return SPARC_STACK_ALIGN (actual_fsize);
3078}
3079
3080/* Build a (32 bit) big number in a register.  */
3081/* ??? We may be able to use the set macro here too.  */
3082
3083static void
3084build_big_number (file, num, reg)
3085     FILE *file;
3086     int num;
3087     const char *reg;
3088{
3089  if (num >= 0 || ! TARGET_ARCH64)
3090    {
3091      fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3092      if ((num & 0x3ff) != 0)
3093	fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3094    }
3095  else /* num < 0 && TARGET_ARCH64 */
3096    {
3097      /* Sethi does not sign extend, so we must use a little trickery
3098	 to use it for negative numbers.  Invert the constant before
3099	 loading it in, then use xor immediate to invert the loaded bits
3100	 (along with the upper 32 bits) to the desired constant.  This
3101	 works because the sethi and immediate fields overlap.  */
3102      int asize = num;
3103      int inv = ~asize;
3104      int low = -0x400 + (asize & 0x3FF);
3105
3106      fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3107	       inv, reg, reg, low, reg);
3108    }
3109}
3110
3111/* Output code for the function prologue.  */
3112
3113void
3114output_function_prologue (file, size, leaf_function)
3115     FILE *file;
3116     int size;
3117     int leaf_function;
3118{
3119  /* Need to use actual_fsize, since we are also allocating
3120     space for our callee (and our own register save area).  */
3121  actual_fsize = compute_frame_size (size, leaf_function);
3122
3123  if (leaf_function)
3124    {
3125      frame_base_name = "%sp";
3126      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3127    }
3128  else
3129    {
3130      frame_base_name = "%fp";
3131      frame_base_offset = SPARC_STACK_BIAS;
3132    }
3133
3134  /* This is only for the human reader.  */
3135  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3136
3137  if (actual_fsize == 0)
3138    /* do nothing.  */ ;
3139  else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3140    {
3141      if (actual_fsize <= 4096)
3142	fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3143      else if (actual_fsize <= 8192)
3144	{
3145	  fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3146	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3147	}
3148      else
3149	{
3150	  build_big_number (file, -actual_fsize, "%g1");
3151	  fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3152	}
3153    }
3154  else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3155    {
3156      /* We assume the environment will properly handle or otherwise avoid
3157	 trouble associated with an interrupt occurring after the `save' or
3158	 trap occurring during it.  */
3159      fprintf (file, "\tsave\n");
3160
3161      if (actual_fsize <= 4096)
3162	fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3163      else if (actual_fsize <= 8192)
3164	{
3165	  fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3166	  fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3167	}
3168      else
3169	{
3170	  build_big_number (file, -actual_fsize, "%g1");
3171	  fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3172	}
3173    }
3174  else /* leaf function */
3175    {
3176      if (actual_fsize <= 4096)
3177	fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3178      else if (actual_fsize <= 8192)
3179	{
3180	  fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3181	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3182	}
3183      else
3184	{
3185	  build_big_number (file, -actual_fsize, "%g1");
3186	  fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3187	}
3188    }
3189
3190  if (dwarf2out_do_frame () && actual_fsize)
3191    {
3192      char *label = dwarf2out_cfi_label ();
3193
3194      /* The canonical frame address refers to the top of the frame.  */
3195      dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3196				 : FRAME_POINTER_REGNUM),
3197			 frame_base_offset);
3198
3199      if (! leaf_function)
3200	{
3201	  /* Note the register window save.  This tells the unwinder that
3202	     it needs to restore the window registers from the previous
3203	     frame's window save area at 0(cfa).  */
3204	  dwarf2out_window_save (label);
3205
3206	  /* The return address (-8) is now in %i7.  */
3207	  dwarf2out_return_reg (label, 31);
3208	}
3209    }
3210
3211  /* If doing anything with PIC, do it now.  */
3212  if (! flag_pic)
3213    fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3214
3215  /* Call saved registers are saved just above the outgoing argument area.  */
3216  if (num_gfregs)
3217    {
3218      int offset, real_offset, n_regs;
3219      const char *base;
3220
3221      real_offset = -apparent_fsize;
3222      offset = -apparent_fsize + frame_base_offset;
3223      if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3224	{
3225	  /* ??? This might be optimized a little as %g1 might already have a
3226	     value close enough that a single add insn will do.  */
3227	  /* ??? Although, all of this is probably only a temporary fix
3228	     because if %g1 can hold a function result, then
3229	     output_function_epilogue will lose (the result will get
3230	     clobbered).  */
3231	  build_big_number (file, offset, "%g1");
3232	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3233	  base = "%g1";
3234	  offset = 0;
3235	}
3236      else
3237	{
3238	  base = frame_base_name;
3239	}
3240
3241      n_regs = 0;
3242      if (TARGET_EPILOGUE && ! leaf_function)
3243	/* ??? Originally saved regs 0-15 here.  */
3244	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3245      else if (leaf_function)
3246	/* ??? Originally saved regs 0-31 here.  */
3247	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3248      if (TARGET_EPILOGUE)
3249	save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3250		   real_offset);
3251    }
3252
3253  leaf_label = 0;
3254  if (leaf_function && actual_fsize != 0)
3255    {
3256      /* warning ("leaf procedure with frame size %d", actual_fsize); */
3257      if (! TARGET_EPILOGUE)
3258	leaf_label = gen_label_rtx ();
3259    }
3260}
3261
3262/* Output code for the function epilogue.  */
3263
3264void
3265output_function_epilogue (file, size, leaf_function)
3266     FILE *file;
3267     int size ATTRIBUTE_UNUSED;
3268     int leaf_function;
3269{
3270  const char *ret;
3271
3272  if (leaf_label)
3273    {
3274      emit_label_after (leaf_label, get_last_insn ());
3275      final_scan_insn (get_last_insn (), file, 0, 0, 1);
3276    }
3277
3278#ifdef FUNCTION_BLOCK_PROFILER_EXIT
3279  else if (profile_block_flag == 2)
3280    {
3281      FUNCTION_BLOCK_PROFILER_EXIT(file);
3282    }
3283#endif
3284
3285  else if (current_function_epilogue_delay_list == 0)
3286    {
3287      /* If code does not drop into the epilogue, we need
3288	 do nothing except output pending case vectors.  */
3289      rtx insn = get_last_insn ();
3290      if (GET_CODE (insn) == NOTE)
3291      insn = prev_nonnote_insn (insn);
3292      if (insn && GET_CODE (insn) == BARRIER)
3293      goto output_vectors;
3294    }
3295
3296  /* Restore any call saved registers.  */
3297  if (num_gfregs)
3298    {
3299      int offset, n_regs;
3300      const char *base;
3301
3302      offset = -apparent_fsize + frame_base_offset;
3303      if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3304	{
3305	  build_big_number (file, offset, "%g1");
3306	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3307	  base = "%g1";
3308	  offset = 0;
3309	}
3310      else
3311	{
3312	  base = frame_base_name;
3313	}
3314
3315      n_regs = 0;
3316      if (TARGET_EPILOGUE && ! leaf_function)
3317	/* ??? Originally saved regs 0-15 here.  */
3318	n_regs = restore_regs (file, 0, 8, base, offset, 0);
3319      else if (leaf_function)
3320	/* ??? Originally saved regs 0-31 here.  */
3321	n_regs = restore_regs (file, 0, 8, base, offset, 0);
3322      if (TARGET_EPILOGUE)
3323	restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3324    }
3325
3326  /* Work out how to skip the caller's unimp instruction if required.  */
3327  if (leaf_function)
3328    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3329  else
3330    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3331
3332  if (TARGET_EPILOGUE || leaf_label)
3333    {
3334      int old_target_epilogue = TARGET_EPILOGUE;
3335      target_flags &= ~old_target_epilogue;
3336
3337      if (! leaf_function)
3338	{
3339	  /* If we wound up with things in our delay slot, flush them here.  */
3340	  if (current_function_epilogue_delay_list)
3341	    {
3342	      rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3343					       get_last_insn ());
3344	      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3345					gen_rtvec (2,
3346						   PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3347						   PATTERN (insn)));
3348	      final_scan_insn (insn, file, 1, 0, 1);
3349	    }
3350	  else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3351	    fputs ("\treturn\t%i7+8\n\tnop\n", file);
3352	  else
3353	    fprintf (file, "\t%s\n\trestore\n", ret);
3354	}
3355      /* All of the following cases are for leaf functions.  */
3356      else if (current_function_epilogue_delay_list)
3357	{
3358	  /* eligible_for_epilogue_delay_slot ensures that if this is a
3359	     leaf function, then we will only have insn in the delay slot
3360	     if the frame size is zero, thus no adjust for the stack is
3361	     needed here.  */
3362	  if (actual_fsize != 0)
3363	    abort ();
3364	  fprintf (file, "\t%s\n", ret);
3365	  final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3366			   file, 1, 0, 1);
3367	}
3368      /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3369	 avoid generating confusing assembly language output.  */
3370      else if (actual_fsize == 0)
3371	fprintf (file, "\t%s\n\tnop\n", ret);
3372      else if (actual_fsize <= 4096)
3373	fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3374      else if (actual_fsize <= 8192)
3375	fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3376		 ret, actual_fsize - 4096);
3377      else if ((actual_fsize & 0x3ff) == 0)
3378	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3379		 actual_fsize, ret);
3380      else
3381	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3382		 actual_fsize, actual_fsize, ret);
3383      target_flags |= old_target_epilogue;
3384    }
3385
3386 output_vectors:
3387  sparc_output_deferred_case_vectors ();
3388}
3389
3390/* Functions for handling argument passing.
3391
3392   For v8 the first six args are normally in registers and the rest are
3393   pushed.  Any arg that starts within the first 6 words is at least
3394   partially passed in a register unless its data type forbids.
3395
3396   For v9, the argument registers are laid out as an array of 16 elements
3397   and arguments are added sequentially.  The first 6 int args and up to the
3398   first 16 fp args (depending on size) are passed in regs.
3399
3400   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
3401   ----    -----   --------   -----   ------------------   ------   -----------
3402    15   [SP+248]              %f31       %f30,%f31         %d30
3403    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
3404    13   [SP+232]              %f27       %f26,%f27         %d26
3405    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
3406    11   [SP+216]              %f23       %f22,%f23         %d22
3407    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
3408     9   [SP+200]              %f19       %f18,%f19         %d18
3409     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
3410     7   [SP+184]              %f15       %f14,%f15         %d14
3411     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
3412     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
3413     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
3414     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
3415     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
3416     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
3417     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
3418
3419   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3420
3421   Integral arguments are always passed as 64 bit quantities appropriately
3422   extended.
3423
3424   Passing of floating point values is handled as follows.
3425   If a prototype is in scope:
3426     If the value is in a named argument (i.e. not a stdarg function or a
3427     value not part of the `...') then the value is passed in the appropriate
3428     fp reg.
3429     If the value is part of the `...' and is passed in one of the first 6
3430     slots then the value is passed in the appropriate int reg.
3431     If the value is part of the `...' and is not passed in one of the first 6
3432     slots then the value is passed in memory.
3433   If a prototype is not in scope:
3434     If the value is one of the first 6 arguments the value is passed in the
3435     appropriate integer reg and the appropriate fp reg.
3436     If the value is not one of the first 6 arguments the value is passed in
3437     the appropriate fp reg and in memory.
3438   */
3439
3440/* Maximum number of int regs for args.  */
3441#define SPARC_INT_ARG_MAX 6
3442/* Maximum number of fp regs for args.  */
3443#define SPARC_FP_ARG_MAX 16
3444
3445#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3446
3447/* Handle the INIT_CUMULATIVE_ARGS macro.
3448   Initialize a variable CUM of type CUMULATIVE_ARGS
3449   for a call to a function whose data type is FNTYPE.
3450   For a library call, FNTYPE is 0.  */
3451
3452void
3453init_cumulative_args (cum, fntype, libname, indirect)
3454     CUMULATIVE_ARGS *cum;
3455     tree fntype;
3456     tree libname ATTRIBUTE_UNUSED;
3457     int indirect ATTRIBUTE_UNUSED;
3458{
3459  cum->words = 0;
3460  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3461  cum->libcall_p = fntype == 0;
3462}
3463
3464/* Compute the slot number to pass an argument in.
3465   Returns the slot number or -1 if passing on the stack.
3466
3467   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3468    the preceding args and about the function being called.
3469   MODE is the argument's machine mode.
3470   TYPE is the data type of the argument (as a tree).
3471    This is null for libcalls where that information may
3472    not be available.
3473   NAMED is nonzero if this argument is a named parameter
3474    (otherwise it is an extra parameter matching an ellipsis).
3475   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3476   *PREGNO records the register number to use if scalar type.
3477   *PPADDING records the amount of padding needed in words.  */
3478
3479static int
3480function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3481     const CUMULATIVE_ARGS *cum;
3482     enum machine_mode mode;
3483     tree type;
3484     int named;
3485     int incoming_p;
3486     int *pregno;
3487     int *ppadding;
3488{
3489  int regbase = (incoming_p
3490		 ? SPARC_INCOMING_INT_ARG_FIRST
3491		 : SPARC_OUTGOING_INT_ARG_FIRST);
3492  int slotno = cum->words;
3493  int regno;
3494
3495  *ppadding = 0;
3496
3497  if (type != 0 && TREE_ADDRESSABLE (type))
3498    return -1;
3499  if (TARGET_ARCH32
3500      && type != 0 && mode == BLKmode
3501      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3502    return -1;
3503
3504  switch (mode)
3505    {
3506    case VOIDmode :
3507      /* MODE is VOIDmode when generating the actual call.
3508	 See emit_call_1.  */
3509      return -1;
3510
3511    case QImode : case CQImode :
3512    case HImode : case CHImode :
3513    case SImode : case CSImode :
3514    case DImode : case CDImode :
3515      if (slotno >= SPARC_INT_ARG_MAX)
3516	return -1;
3517      regno = regbase + slotno;
3518      break;
3519
3520    case SFmode : case SCmode :
3521    case DFmode : case DCmode :
3522    case TFmode : case TCmode :
3523      if (TARGET_ARCH32)
3524	{
3525	  if (slotno >= SPARC_INT_ARG_MAX)
3526	    return -1;
3527	  regno = regbase + slotno;
3528	}
3529      else
3530	{
3531	  if ((mode == TFmode || mode == TCmode)
3532	      && (slotno & 1) != 0)
3533	    slotno++, *ppadding = 1;
3534	  if (TARGET_FPU && named)
3535	    {
3536	      if (slotno >= SPARC_FP_ARG_MAX)
3537		return -1;
3538	      regno = SPARC_FP_ARG_FIRST + slotno * 2;
3539	      if (mode == SFmode)
3540		regno++;
3541	    }
3542	  else
3543	    {
3544	      if (slotno >= SPARC_INT_ARG_MAX)
3545		return -1;
3546	      regno = regbase + slotno;
3547	    }
3548	}
3549      break;
3550
3551    case BLKmode :
3552      /* For sparc64, objects requiring 16 byte alignment get it.  */
3553      if (TARGET_ARCH64)
3554	{
3555	  if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3556	    slotno++, *ppadding = 1;
3557	}
3558
3559      if (TARGET_ARCH32
3560	  || (type && TREE_CODE (type) == UNION_TYPE))
3561	{
3562	  if (slotno >= SPARC_INT_ARG_MAX)
3563	    return -1;
3564	  regno = regbase + slotno;
3565	}
3566      else
3567	{
3568	  tree field;
3569	  int intregs_p = 0, fpregs_p = 0;
3570	  /* The ABI obviously doesn't specify how packed
3571	     structures are passed.  These are defined to be passed
3572	     in int regs if possible, otherwise memory.  */
3573	  int packed_p = 0;
3574
3575	  /* First see what kinds of registers we need.  */
3576	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3577	    {
3578	      if (TREE_CODE (field) == FIELD_DECL)
3579		{
3580		  if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3581		      && TARGET_FPU)
3582		    fpregs_p = 1;
3583		  else
3584		    intregs_p = 1;
3585		  if (DECL_PACKED (field))
3586		    packed_p = 1;
3587		}
3588	    }
3589	  if (packed_p || !named)
3590	    fpregs_p = 0, intregs_p = 1;
3591
3592	  /* If all arg slots are filled, then must pass on stack.  */
3593	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3594	    return -1;
3595	  /* If there are only int args and all int arg slots are filled,
3596	     then must pass on stack.  */
3597	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3598	    return -1;
3599	  /* Note that even if all int arg slots are filled, fp members may
3600	     still be passed in regs if such regs are available.
3601	     *PREGNO isn't set because there may be more than one, it's up
3602	     to the caller to compute them.  */
3603	  return slotno;
3604	}
3605      break;
3606
3607    default :
3608      abort ();
3609    }
3610
3611  *pregno = regno;
3612  return slotno;
3613}
3614
3615/* Handle recursive register counting for structure field layout.  */
3616
3617struct function_arg_record_value_parms
3618{
3619  rtx ret;
3620  int slotno, named, regbase;
3621  int nregs, intoffset;
3622};
3623
3624static void function_arg_record_value_3
3625	PROTO((int, struct function_arg_record_value_parms *));
3626static void function_arg_record_value_2
3627	PROTO((tree, int, struct function_arg_record_value_parms *));
3628static rtx function_arg_record_value
3629	PROTO((tree, enum machine_mode, int, int, int));
3630
3631static void
3632function_arg_record_value_1 (type, startbitpos, parms)
3633     tree type;
3634     int startbitpos;
3635     struct function_arg_record_value_parms *parms;
3636{
3637  tree field;
3638
3639  /* The ABI obviously doesn't specify how packed structures are
3640     passed.  These are defined to be passed in int regs if possible,
3641     otherwise memory.  */
3642  int packed_p = 0;
3643
3644  /* We need to compute how many registers are needed so we can
3645     allocate the PARALLEL but before we can do that we need to know
3646     whether there are any packed fields.  If there are, int regs are
3647     used regardless of whether there are fp values present.  */
3648  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3649    {
3650      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3651	{
3652	  packed_p = 1;
3653	  break;
3654	}
3655    }
3656
3657  /* Compute how many registers we need.  */
3658  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3659    {
3660      if (TREE_CODE (field) == FIELD_DECL)
3661	{
3662	  int bitpos = startbitpos;
3663	  if (DECL_FIELD_BITPOS (field))
3664	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3665	  /* ??? FIXME: else assume zero offset.  */
3666
3667	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3668	    {
3669	      function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3670	    }
3671	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3672	           && TARGET_FPU
3673	           && ! packed_p
3674	           && parms->named)
3675	    {
3676	      if (parms->intoffset != -1)
3677		{
3678		  int intslots, this_slotno;
3679
3680		  intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3681		    / BITS_PER_WORD;
3682		  this_slotno = parms->slotno + parms->intoffset
3683		    / BITS_PER_WORD;
3684
3685		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3686		  intslots = MAX (intslots, 0);
3687		  parms->nregs += intslots;
3688		  parms->intoffset = -1;
3689		}
3690
3691	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3692		 If it wasn't true we wouldn't be here.  */
3693	      parms->nregs += 1;
3694	    }
3695	  else
3696	    {
3697	      if (parms->intoffset == -1)
3698		parms->intoffset = bitpos;
3699	    }
3700	}
3701    }
3702}
3703
3704/* Handle recursive structure field register assignment.  */
3705
3706static void
3707function_arg_record_value_3 (bitpos, parms)
3708     int bitpos;
3709     struct function_arg_record_value_parms *parms;
3710{
3711  enum machine_mode mode;
3712  int regno, this_slotno, intslots, intoffset;
3713  rtx reg;
3714
3715  if (parms->intoffset == -1)
3716    return;
3717  intoffset = parms->intoffset;
3718  parms->intoffset = -1;
3719
3720  intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3721  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3722
3723  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3724  if (intslots <= 0)
3725    return;
3726
3727  /* If this is the trailing part of a word, only load that much into
3728     the register.  Otherwise load the whole register.  Note that in
3729     the latter case we may pick up unwanted bits.  It's not a problem
3730     at the moment but may wish to revisit.  */
3731
3732  if (intoffset % BITS_PER_WORD != 0)
3733    {
3734      mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3735			    MODE_INT, 0);
3736    }
3737  else
3738    mode = word_mode;
3739
3740  intoffset /= BITS_PER_UNIT;
3741  do
3742    {
3743      regno = parms->regbase + this_slotno;
3744      reg = gen_rtx_REG (mode, regno);
3745      XVECEXP (parms->ret, 0, parms->nregs)
3746	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3747
3748      this_slotno += 1;
3749      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3750      parms->nregs += 1;
3751      intslots -= 1;
3752    }
3753  while (intslots > 0);
3754}
3755
3756static void
3757function_arg_record_value_2 (type, startbitpos, parms)
3758     tree type;
3759     int startbitpos;
3760     struct function_arg_record_value_parms *parms;
3761{
3762  tree field;
3763  int packed_p = 0;
3764
3765  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3766    {
3767      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3768	{
3769	  packed_p = 1;
3770	  break;
3771	}
3772    }
3773
3774  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3775    {
3776      if (TREE_CODE (field) == FIELD_DECL)
3777	{
3778	  int bitpos = startbitpos;
3779	  if (DECL_FIELD_BITPOS (field))
3780	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3781	  /* ??? FIXME: else assume zero offset.  */
3782
3783	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3784	    {
3785	      function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3786	    }
3787	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3788	           && TARGET_FPU
3789	           && ! packed_p
3790	           && parms->named)
3791	    {
3792	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3793	      rtx reg;
3794
3795	      function_arg_record_value_3 (bitpos, parms);
3796
3797	      reg = gen_rtx_REG (DECL_MODE (field),
3798			         (SPARC_FP_ARG_FIRST + this_slotno * 2
3799			          + (DECL_MODE (field) == SFmode
3800				     && (bitpos & 32) != 0)));
3801	      XVECEXP (parms->ret, 0, parms->nregs)
3802		= gen_rtx_EXPR_LIST (VOIDmode, reg,
3803			   GEN_INT (bitpos / BITS_PER_UNIT));
3804	      parms->nregs += 1;
3805	    }
3806	  else
3807	    {
3808	      if (parms->intoffset == -1)
3809		parms->intoffset = bitpos;
3810	    }
3811	}
3812    }
3813}
3814
3815static rtx
3816function_arg_record_value (type, mode, slotno, named, regbase)
3817     tree type;
3818     enum machine_mode mode;
3819     int slotno, named, regbase;
3820{
3821  HOST_WIDE_INT typesize = int_size_in_bytes (type);
3822  struct function_arg_record_value_parms parms;
3823  int nregs;
3824
3825  parms.ret = NULL_RTX;
3826  parms.slotno = slotno;
3827  parms.named = named;
3828  parms.regbase = regbase;
3829
3830  /* Compute how many registers we need.  */
3831  parms.nregs = 0;
3832  parms.intoffset = 0;
3833  function_arg_record_value_1 (type, 0, &parms);
3834
3835  if (parms.intoffset != -1)
3836    {
3837      int intslots, this_slotno;
3838
3839      intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3840	/ BITS_PER_WORD;
3841      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3842
3843      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3844      intslots = MAX (intslots, 0);
3845
3846      parms.nregs += intslots;
3847    }
3848  nregs = parms.nregs;
3849
3850  /* Allocate the vector and handle some annoying special cases.  */
3851  if (nregs == 0)
3852    {
3853      /* ??? Empty structure has no value?  Duh?  */
3854      if (typesize <= 0)
3855	{
3856	  /* Though there's nothing really to store, return a word register
3857	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
3858	     leads to breakage due to the fact that there are zero bytes to
3859	     load.  */
3860	  return gen_rtx_REG (mode, regbase);
3861	}
3862      else
3863	{
3864	  /* ??? C++ has structures with no fields, and yet a size.  Give up
3865	     for now and pass everything back in integer registers.  */
3866	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3867	}
3868      if (nregs + slotno > SPARC_INT_ARG_MAX)
3869	nregs = SPARC_INT_ARG_MAX - slotno;
3870    }
3871  if (nregs == 0)
3872    abort ();
3873
3874  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3875
3876  /* Fill in the entries.  */
3877  parms.nregs = 0;
3878  parms.intoffset = 0;
3879  function_arg_record_value_2 (type, 0, &parms);
3880  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
3881
3882  if (parms.nregs != nregs)
3883    abort ();
3884
3885  return parms.ret;
3886}
3887
3888/* Handle the FUNCTION_ARG macro.
3889   Determine where to put an argument to a function.
3890   Value is zero to push the argument on the stack,
3891   or a hard register in which to store the argument.
3892
3893   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3894    the preceding args and about the function being called.
3895   MODE is the argument's machine mode.
3896   TYPE is the data type of the argument (as a tree).
3897    This is null for libcalls where that information may
3898    not be available.
3899   NAMED is nonzero if this argument is a named parameter
3900    (otherwise it is an extra parameter matching an ellipsis).
3901   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
3902
3903rtx
3904function_arg (cum, mode, type, named, incoming_p)
3905     const CUMULATIVE_ARGS *cum;
3906     enum machine_mode mode;
3907     tree type;
3908     int named;
3909     int incoming_p;
3910{
3911  int regbase = (incoming_p
3912		 ? SPARC_INCOMING_INT_ARG_FIRST
3913		 : SPARC_OUTGOING_INT_ARG_FIRST);
3914  int slotno, regno, padding;
3915  rtx reg;
3916
3917  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
3918				&regno, &padding);
3919
3920  if (slotno == -1)
3921    return 0;
3922
3923  if (TARGET_ARCH32)
3924    {
3925      reg = gen_rtx_REG (mode, regno);
3926      return reg;
3927    }
3928
3929  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
3930     but also have the slot allocated for them.
3931     If no prototype is in scope fp values in register slots get passed
3932     in two places, either fp regs and int regs or fp regs and memory.  */
3933  if ((GET_MODE_CLASS (mode) == MODE_FLOAT
3934       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3935      && SPARC_FP_REG_P (regno))
3936    {
3937      reg = gen_rtx_REG (mode, regno);
3938      if (cum->prototype_p || cum->libcall_p)
3939	{
3940	  /* "* 2" because fp reg numbers are recorded in 4 byte
3941	     quantities.  */
3942#if 0
3943	  /* ??? This will cause the value to be passed in the fp reg and
3944	     in the stack.  When a prototype exists we want to pass the
3945	     value in the reg but reserve space on the stack.  That's an
3946	     optimization, and is deferred [for a bit].  */
3947	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
3948	    return gen_rtx_PARALLEL (mode,
3949			    gen_rtvec (2,
3950				       gen_rtx_EXPR_LIST (VOIDmode,
3951						NULL_RTX, const0_rtx),
3952				       gen_rtx_EXPR_LIST (VOIDmode,
3953						reg, const0_rtx)));
3954	  else
3955#else
3956	  /* ??? It seems that passing back a register even when past
3957	     the area declared by REG_PARM_STACK_SPACE will allocate
3958	     space appropriately, and will not copy the data onto the
3959	     stack, exactly as we desire.
3960
3961	     This is due to locate_and_pad_parm being called in
3962	     expand_call whenever reg_parm_stack_space > 0, which
3963	     while benefical to our example here, would seem to be
3964	     in error from what had been intended.  Ho hum...  -- r~ */
3965#endif
3966	    return reg;
3967	}
3968      else
3969	{
3970	  rtx v0, v1;
3971
3972	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
3973	    {
3974	      int intreg;
3975
3976	      /* On incoming, we don't need to know that the value
3977		 is passed in %f0 and %i0, and it confuses other parts
3978		 causing needless spillage even on the simplest cases.  */
3979	      if (incoming_p)
3980		return reg;
3981
3982	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
3983			+ (regno - SPARC_FP_ARG_FIRST) / 2);
3984
3985	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3986	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
3987				      const0_rtx);
3988	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3989	    }
3990	  else
3991	    {
3992	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
3993	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3994	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3995	    }
3996	}
3997    }
3998  else if (type && TREE_CODE (type) == RECORD_TYPE)
3999    {
4000      /* Structures up to 16 bytes in size are passed in arg slots on the
4001	 stack and are promoted to registers where possible.  */
4002
4003      if (int_size_in_bytes (type) > 16)
4004	abort (); /* shouldn't get here */
4005
4006      return function_arg_record_value (type, mode, slotno, named, regbase);
4007    }
4008  else if (type && TREE_CODE (type) == UNION_TYPE)
4009    {
4010      enum machine_mode mode;
4011      int bytes = int_size_in_bytes (type);
4012
4013      if (bytes > 16)
4014	abort ();
4015
4016      mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4017      reg = gen_rtx_REG (mode, regno);
4018    }
4019  else
4020    {
4021      /* Scalar or complex int.  */
4022      reg = gen_rtx_REG (mode, regno);
4023    }
4024
4025  return reg;
4026}
4027
4028/* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4029   For an arg passed partly in registers and partly in memory,
4030   this is the number of registers used.
4031   For args passed entirely in registers or entirely in memory, zero.
4032
4033   Any arg that starts in the first 6 regs but won't entirely fit in them
4034   needs partial registers on v8.  On v9, structures with integer
4035   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4036   values that begin in the last fp reg [where "last fp reg" varies with the
4037   mode] will be split between that reg and memory.  */
4038
4039int
4040function_arg_partial_nregs (cum, mode, type, named)
4041     const CUMULATIVE_ARGS *cum;
4042     enum machine_mode mode;
4043     tree type;
4044     int named;
4045{
4046  int slotno, regno, padding;
4047
4048  /* We pass 0 for incoming_p here, it doesn't matter.  */
4049  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4050
4051  if (slotno == -1)
4052    return 0;
4053
4054  if (TARGET_ARCH32)
4055    {
4056      if ((slotno + (mode == BLKmode
4057		     ? ROUND_ADVANCE (int_size_in_bytes (type))
4058		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4059	  > NPARM_REGS (SImode))
4060	return NPARM_REGS (SImode) - slotno;
4061      return 0;
4062    }
4063  else
4064    {
4065      if (type && AGGREGATE_TYPE_P (type))
4066	{
4067	  int size = int_size_in_bytes (type);
4068	  int align = TYPE_ALIGN (type);
4069
4070	  if (align == 16)
4071	    slotno += slotno & 1;
4072	  if (size > 8 && size <= 16
4073	      && slotno == SPARC_INT_ARG_MAX - 1)
4074	    return 1;
4075	}
4076      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4077	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4078		   && ! TARGET_FPU))
4079	{
4080	  if (GET_MODE_ALIGNMENT (mode) == 128)
4081	    {
4082	      slotno += slotno & 1;
4083	      if (slotno == SPARC_INT_ARG_MAX - 2)
4084		return 1;
4085	    }
4086	  else
4087	    {
4088	      if (slotno == SPARC_INT_ARG_MAX - 1)
4089		return 1;
4090	    }
4091	}
4092      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4093	{
4094	  if (GET_MODE_ALIGNMENT (mode) == 128)
4095	    slotno += slotno & 1;
4096	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4097	      > SPARC_FP_ARG_MAX)
4098	    return 1;
4099	}
4100      return 0;
4101    }
4102}
4103
4104/* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4105   !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4106   quad-precision floats by invisible reference.
4107   v9: Aggregates greater than 16 bytes are passed by reference.
4108   For Pascal, also pass arrays by reference.  */
4109
4110int
4111function_arg_pass_by_reference (cum, mode, type, named)
4112     const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4113     enum machine_mode mode;
4114     tree type;
4115     int named ATTRIBUTE_UNUSED;
4116{
4117  if (TARGET_ARCH32)
4118    {
4119      return ((type && AGGREGATE_TYPE_P (type))
4120	      || mode == TFmode || mode == TCmode);
4121    }
4122  else
4123    {
4124      return ((type && TREE_CODE (type) == ARRAY_TYPE)
4125	      /* Consider complex values as aggregates, so care for TCmode. */
4126	      || GET_MODE_SIZE (mode) > 16
4127	      || (type && AGGREGATE_TYPE_P (type)
4128		  && int_size_in_bytes (type) > 16));
4129    }
4130}
4131
4132/* Handle the FUNCTION_ARG_ADVANCE macro.
4133   Update the data in CUM to advance over an argument
4134   of mode MODE and data type TYPE.
4135   TYPE is null for libcalls where that information may not be available.  */
4136
4137void
4138function_arg_advance (cum, mode, type, named)
4139     CUMULATIVE_ARGS *cum;
4140     enum machine_mode mode;
4141     tree type;
4142     int named;
4143{
4144  int slotno, regno, padding;
4145
4146  /* We pass 0 for incoming_p here, it doesn't matter.  */
4147  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4148
4149  /* If register required leading padding, add it.  */
4150  if (slotno != -1)
4151    cum->words += padding;
4152
4153  if (TARGET_ARCH32)
4154    {
4155      cum->words += (mode != BLKmode
4156		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4157		     : ROUND_ADVANCE (int_size_in_bytes (type)));
4158    }
4159  else
4160    {
4161      if (type && AGGREGATE_TYPE_P (type))
4162	{
4163	  int size = int_size_in_bytes (type);
4164
4165	  if (size <= 8)
4166	    ++cum->words;
4167	  else if (size <= 16)
4168	    cum->words += 2;
4169	  else /* passed by reference */
4170	    ++cum->words;
4171	}
4172      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4173	{
4174	  cum->words += 2;
4175	}
4176      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4177	{
4178	  cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4179	}
4180      else
4181	{
4182	  cum->words += (mode != BLKmode
4183			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4184			 : ROUND_ADVANCE (int_size_in_bytes (type)));
4185	}
4186    }
4187}
4188
4189/* Handle the FUNCTION_ARG_PADDING macro.
4190   For the 64 bit ABI structs are always stored left shifted in their
4191   argument slot.  */
4192
4193enum direction
4194function_arg_padding (mode, type)
4195     enum machine_mode mode;
4196     tree type;
4197{
4198  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4199    return upward;
4200
4201  /* This is the default definition.  */
4202  return (! BYTES_BIG_ENDIAN
4203	  ? upward
4204	  : ((mode == BLKmode
4205	      ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4206		 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4207	      : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4208	     ? downward : upward));
4209}
4210
4211/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4212   For v9, function return values are subject to the same rules as arguments,
4213   except that up to 32-bytes may be returned in registers.  */
4214
4215rtx
4216function_value (type, mode, incoming_p)
4217     tree type;
4218     enum machine_mode mode;
4219     int incoming_p;
4220{
4221  int regno;
4222  int regbase = (incoming_p
4223		 ? SPARC_OUTGOING_INT_ARG_FIRST
4224		 : SPARC_INCOMING_INT_ARG_FIRST);
4225
4226  if (TARGET_ARCH64 && type)
4227    {
4228      if (TREE_CODE (type) == RECORD_TYPE)
4229	{
4230	  /* Structures up to 32 bytes in size are passed in registers,
4231	     promoted to fp registers where possible.  */
4232
4233	  if (int_size_in_bytes (type) > 32)
4234	    abort (); /* shouldn't get here */
4235
4236	  return function_arg_record_value (type, mode, 0, 1, regbase);
4237	}
4238      else if (TREE_CODE (type) == UNION_TYPE)
4239	{
4240	  int bytes = int_size_in_bytes (type);
4241
4242	  if (bytes > 32)
4243	    abort ();
4244
4245	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4246	}
4247    }
4248
4249  if (TARGET_ARCH64
4250      && GET_MODE_CLASS (mode) == MODE_INT
4251      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4252      && type && TREE_CODE (type) != UNION_TYPE)
4253    mode = DImode;
4254
4255  if (incoming_p)
4256    regno = BASE_RETURN_VALUE_REG (mode);
4257  else
4258    regno = BASE_OUTGOING_VALUE_REG (mode);
4259
4260  return gen_rtx_REG (mode, regno);
4261}
4262
4263/* Do what is necessary for `va_start'.  The argument is ignored.
4264
4265   We look at the current function to determine if stdarg or varargs
4266   is used and return the address of the first unnamed parameter.  */
4267
4268rtx
4269sparc_builtin_saveregs (arglist)
4270     tree arglist ATTRIBUTE_UNUSED;
4271{
4272  int first_reg = current_function_args_info.words;
4273  rtx address;
4274  int regno;
4275
4276  for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4277    emit_move_insn (gen_rtx_MEM (word_mode,
4278			     gen_rtx_PLUS (Pmode,
4279				      frame_pointer_rtx,
4280				      GEN_INT (STACK_POINTER_OFFSET
4281					       + UNITS_PER_WORD * regno))),
4282		    gen_rtx_REG (word_mode,
4283			     BASE_INCOMING_ARG_REG (word_mode) + regno));
4284
4285  address = gen_rtx_PLUS (Pmode,
4286		     frame_pointer_rtx,
4287		     GEN_INT (STACK_POINTER_OFFSET
4288			      + UNITS_PER_WORD * first_reg));
4289
4290  if (current_function_check_memory_usage
4291      && first_reg < NPARM_REGS (word_mode))
4292    emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4293		       address, ptr_mode,
4294		       GEN_INT (UNITS_PER_WORD
4295			 	* (NPARM_REGS (word_mode) - first_reg)),
4296		       TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4297		       TYPE_MODE (integer_type_node));
4298
4299  return address;
4300}
4301
4302/* Return the string to output a conditional branch to LABEL, which is
4303   the operand number of the label.  OP is the conditional expression.
4304   XEXP (OP, 0) is assumed to be a condition code register (integer or
4305   floating point) and its mode specifies what kind of comparison we made.
4306
4307   REVERSED is non-zero if we should reverse the sense of the comparison.
4308
4309   ANNUL is non-zero if we should generate an annulling branch.
4310
4311   NOOP is non-zero if we have to follow this branch by a noop.
4312
4313   INSN, if set, is the insn.  */
4314
4315char *
4316output_cbranch (op, label, reversed, annul, noop, insn)
4317     rtx op;
4318     int label;
4319     int reversed, annul, noop;
4320     rtx insn;
4321{
4322  static char string[32];
4323  enum rtx_code code = GET_CODE (op);
4324  rtx cc_reg = XEXP (op, 0);
4325  enum machine_mode mode = GET_MODE (cc_reg);
4326  static char v8_labelno[] = "%lX";
4327  static char v9_icc_labelno[] = "%%icc, %lX";
4328  static char v9_xcc_labelno[] = "%%xcc, %lX";
4329  static char v9_fcc_labelno[] = "%%fccX, %lY";
4330  char *labelno;
4331  int labeloff, spaces = 8;
4332
4333  /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4334     Because there is currently no concept of pre-delay slots, we can fix
4335     this only by always emitting a nop before a floating point branch.  */
4336
4337  if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4338    strcpy (string, "nop\n\t");
4339  else
4340    string[0] = '\0';
4341
4342  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
4343  if (reversed
4344      && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4345    code = reverse_condition (code), reversed = 0;
4346
4347  /* Start by writing the branch condition.  */
4348  switch (code)
4349    {
4350    case NE:
4351      if (mode == CCFPmode || mode == CCFPEmode)
4352	{
4353	  strcat (string, "fbne");
4354	  spaces -= 4;
4355	}
4356      else
4357	{
4358	  strcpy (string, "bne");
4359	  spaces -= 3;
4360	}
4361      break;
4362
4363    case EQ:
4364      if (mode == CCFPmode || mode == CCFPEmode)
4365	{
4366	  strcat (string, "fbe");
4367	  spaces -= 3;
4368	}
4369      else
4370	{
4371	  strcpy (string, "be");
4372	  spaces -= 2;
4373	}
4374      break;
4375
4376    case GE:
4377      if (mode == CCFPmode || mode == CCFPEmode)
4378	{
4379	  if (reversed)
4380	    strcat (string, "fbul");
4381	  else
4382	    strcat (string, "fbge");
4383	  spaces -= 4;
4384	}
4385      else if (mode == CC_NOOVmode)
4386	{
4387	  strcpy (string, "bpos");
4388	  spaces -= 4;
4389	}
4390      else
4391	{
4392	  strcpy (string, "bge");
4393	  spaces -= 3;
4394	}
4395      break;
4396
4397    case GT:
4398      if (mode == CCFPmode || mode == CCFPEmode)
4399	{
4400	  if (reversed)
4401	    {
4402	      strcat (string, "fbule");
4403	      spaces -= 5;
4404	    }
4405	  else
4406	    {
4407	      strcat (string, "fbg");
4408	      spaces -= 3;
4409	    }
4410	}
4411      else
4412	{
4413	  strcpy (string, "bg");
4414	  spaces -= 2;
4415	}
4416      break;
4417
4418    case LE:
4419      if (mode == CCFPmode || mode == CCFPEmode)
4420	{
4421	  if (reversed)
4422	    strcat (string, "fbug");
4423	  else
4424	    strcat (string, "fble");
4425	  spaces -= 4;
4426	}
4427      else
4428	{
4429	  strcpy (string, "ble");
4430	  spaces -= 3;
4431	}
4432      break;
4433
4434    case LT:
4435      if (mode == CCFPmode || mode == CCFPEmode)
4436	{
4437	  if (reversed)
4438	    {
4439	      strcat (string, "fbuge");
4440	      spaces -= 5;
4441	    }
4442	  else
4443	    {
4444	      strcat (string, "fbl");
4445	      spaces -= 3;
4446	    }
4447	}
4448      else if (mode == CC_NOOVmode)
4449	{
4450	  strcpy (string, "bneg");
4451	  spaces -= 4;
4452	}
4453      else
4454	{
4455	  strcpy (string, "bl");
4456	  spaces -= 2;
4457	}
4458      break;
4459
4460    case GEU:
4461      strcpy (string, "bgeu");
4462      spaces -= 4;
4463      break;
4464
4465    case GTU:
4466      strcpy (string, "bgu");
4467      spaces -= 3;
4468      break;
4469
4470    case LEU:
4471      strcpy (string, "bleu");
4472      spaces -= 4;
4473      break;
4474
4475    case LTU:
4476      strcpy (string, "blu");
4477      spaces -= 3;
4478      break;
4479
4480    default:
4481      abort ();
4482    }
4483
4484  /* Now add the annulling, the label, and a possible noop.  */
4485  if (annul)
4486    {
4487      strcat (string, ",a");
4488      spaces -= 2;
4489    }
4490
4491  if (! TARGET_V9)
4492    {
4493      labeloff = 2;
4494      labelno = v8_labelno;
4495    }
4496  else
4497    {
4498      rtx note;
4499
4500      if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4501	{
4502	  strcat (string,
4503		  INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4504	  spaces -= 3;
4505	}
4506
4507      labeloff = 9;
4508      if (mode == CCFPmode || mode == CCFPEmode)
4509	{
4510	  labeloff = 10;
4511	  labelno = v9_fcc_labelno;
4512	  /* Set the char indicating the number of the fcc reg to use.  */
4513	  labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4514	}
4515      else if (mode == CCXmode || mode == CCX_NOOVmode)
4516	labelno = v9_xcc_labelno;
4517      else
4518	labelno = v9_icc_labelno;
4519    }
4520  /* Set the char indicating the number of the operand containing the
4521     label_ref.  */
4522  labelno[labeloff] = label + '0';
4523  if (spaces > 0)
4524    strcat (string, "\t");
4525  else
4526    strcat (string, " ");
4527  strcat (string, labelno);
4528
4529  if (noop)
4530    strcat (string, "\n\tnop");
4531
4532  return string;
4533}
4534
4535/* Return the string to output a conditional branch to LABEL, testing
4536   register REG.  LABEL is the operand number of the label; REG is the
4537   operand number of the reg.  OP is the conditional expression.  The mode
4538   of REG says what kind of comparison we made.
4539
4540   REVERSED is non-zero if we should reverse the sense of the comparison.
4541
4542   ANNUL is non-zero if we should generate an annulling branch.
4543
4544   NOOP is non-zero if we have to follow this branch by a noop.  */
4545
4546char *
4547output_v9branch (op, reg, label, reversed, annul, noop, insn)
4548     rtx op;
4549     int reg, label;
4550     int reversed, annul, noop;
4551     rtx insn;
4552{
4553  static char string[20];
4554  enum rtx_code code = GET_CODE (op);
4555  enum machine_mode mode = GET_MODE (XEXP (op, 0));
4556  static char labelno[] = "%X, %lX";
4557  rtx note;
4558  int spaces = 8;
4559
4560  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
4561  if (reversed)
4562    code = reverse_condition (code), reversed = 0;
4563
4564  /* Only 64 bit versions of these instructions exist.  */
4565  if (mode != DImode)
4566    abort ();
4567
4568  /* Start by writing the branch condition.  */
4569
4570  switch (code)
4571    {
4572    case NE:
4573      strcpy (string, "brnz");
4574      spaces -= 4;
4575      break;
4576
4577    case EQ:
4578      strcpy (string, "brz");
4579      spaces -= 3;
4580      break;
4581
4582    case GE:
4583      strcpy (string, "brgez");
4584      spaces -= 5;
4585      break;
4586
4587    case LT:
4588      strcpy (string, "brlz");
4589      spaces -= 4;
4590      break;
4591
4592    case LE:
4593      strcpy (string, "brlez");
4594      spaces -= 5;
4595      break;
4596
4597    case GT:
4598      strcpy (string, "brgz");
4599      spaces -= 4;
4600      break;
4601
4602    default:
4603      abort ();
4604    }
4605
4606  /* Now add the annulling, reg, label, and nop.  */
4607  if (annul)
4608    {
4609      strcat (string, ",a");
4610      spaces -= 2;
4611    }
4612
4613  if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4614    {
4615      strcat (string,
4616	      INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4617      spaces -= 3;
4618    }
4619
4620  labelno[1] = reg + '0';
4621  labelno[6] = label + '0';
4622  if (spaces > 0)
4623    strcat (string, "\t");
4624  else
4625    strcat (string, " ");
4626  strcat (string, labelno);
4627
4628  if (noop)
4629    strcat (string, "\n\tnop");
4630
4631  return string;
4632}
4633
4634/* Renumber registers in delay slot.  Replace registers instead of
4635   renumbering because they may be shared.
4636
4637   This does not handle instructions other than move.  */
4638
4639static void
4640epilogue_renumber (where)
4641     rtx *where;
4642{
4643  rtx x = *where;
4644  enum rtx_code code = GET_CODE (x);
4645
4646  switch (code)
4647    {
4648    case MEM:
4649      *where = x = copy_rtx (x);
4650      epilogue_renumber (&XEXP (x, 0));
4651      return;
4652
4653    case REG:
4654      {
4655	int regno = REGNO (x);
4656	if (regno > 8 && regno < 24)
4657	  abort ();
4658	if (regno >= 24 && regno < 32)
4659	  *where = gen_rtx_REG (GET_MODE (x), regno - 16);
4660	return;
4661      }
4662    case CONST_INT:
4663    case CONST_DOUBLE:
4664    case CONST:
4665    case SYMBOL_REF:
4666    case LABEL_REF:
4667      return;
4668
4669    case IOR:
4670    case AND:
4671    case XOR:
4672    case PLUS:
4673    case MINUS:
4674      epilogue_renumber (&XEXP (x, 1));
4675    case NEG:
4676    case NOT:
4677      epilogue_renumber (&XEXP (x, 0));
4678      return;
4679
4680    default:
4681      debug_rtx (*where);
4682      abort ();
4683    }
4684}
4685
4686/* Output assembler code to return from a function.  */
4687
4688const char *
4689output_return (operands)
4690     rtx *operands;
4691{
4692  rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
4693
4694  if (leaf_label)
4695    {
4696      operands[0] = leaf_label;
4697      return "b%* %l0%(";
4698    }
4699  else if (current_function_uses_only_leaf_regs)
4700    {
4701      /* No delay slot in a leaf function.  */
4702      if (delay)
4703	abort ();
4704
4705      /* If we didn't allocate a frame pointer for the current function,
4706	 the stack pointer might have been adjusted.  Output code to
4707	 restore it now.  */
4708
4709      operands[0] = GEN_INT (actual_fsize);
4710
4711      /* Use sub of negated value in first two cases instead of add to
4712	 allow actual_fsize == 4096.  */
4713
4714      if (actual_fsize <= 4096)
4715	{
4716	  if (SKIP_CALLERS_UNIMP_P)
4717	    return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4718	  else
4719	    return "retl\n\tsub\t%%sp, -%0, %%sp";
4720	}
4721      else if (actual_fsize <= 8192)
4722	{
4723	  operands[0] = GEN_INT (actual_fsize - 4096);
4724	  if (SKIP_CALLERS_UNIMP_P)
4725	    return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4726	  else
4727	    return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
4728	}
4729      else if (SKIP_CALLERS_UNIMP_P)
4730	{
4731	  if ((actual_fsize & 0x3ff) != 0)
4732	    return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4733	  else
4734	    return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4735	}
4736      else
4737	{
4738	  if ((actual_fsize & 0x3ff) != 0)
4739	    return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4740	  else
4741	    return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4742	}
4743    }
4744  else if (TARGET_V9)
4745    {
4746      if (delay)
4747	{
4748	  epilogue_renumber (&SET_DEST (PATTERN (delay)));
4749	  epilogue_renumber (&SET_SRC (PATTERN (delay)));
4750	}
4751      if (SKIP_CALLERS_UNIMP_P)
4752	return "return\t%%i7+12%#";
4753      else
4754	return "return\t%%i7+8%#";
4755    }
4756  else
4757    {
4758      if (delay)
4759	abort ();
4760      if (SKIP_CALLERS_UNIMP_P)
4761	return "jmp\t%%i7+12\n\trestore";
4762      else
4763	return "ret\n\trestore";
4764    }
4765}
4766
4767/* Leaf functions and non-leaf functions have different needs.  */
4768
4769static int
4770reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
4771
4772static int
4773reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
4774
4775static int *reg_alloc_orders[] = {
4776  reg_leaf_alloc_order,
4777  reg_nonleaf_alloc_order};
4778
4779void
4780order_regs_for_local_alloc ()
4781{
4782  static int last_order_nonleaf = 1;
4783
4784  if (regs_ever_live[15] != last_order_nonleaf)
4785    {
4786      last_order_nonleaf = !last_order_nonleaf;
4787      bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
4788	     (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
4789    }
4790}
4791
4792/* Return 1 if REG and MEM are legitimate enough to allow the various
4793   mem<-->reg splits to be run.  */
4794
4795int
4796sparc_splitdi_legitimate (reg, mem)
4797     rtx reg;
4798     rtx mem;
4799{
4800  /* Punt if we are here by mistake.  */
4801  if (! reload_completed)
4802    abort ();
4803
4804  /* We must have an offsettable memory reference.  */
4805  if (! offsettable_memref_p (mem))
4806    return 0;
4807
4808  /* If we have legitimate args for ldd/std, we do not want
4809     the split to happen.  */
4810  if ((REGNO (reg) % 2) == 0
4811      && mem_min_alignment (mem, 8))
4812    return 0;
4813
4814  /* Success.  */
4815  return 1;
4816}
4817
4818/* Return 1 if x and y are some kind of REG and they refer to
4819   different hard registers.  This test is guarenteed to be
4820   run after reload.  */
4821
4822int
4823sparc_absnegfloat_split_legitimate (x, y)
4824     rtx x, y;
4825{
4826  if (GET_CODE (x) == SUBREG)
4827    x = alter_subreg (x);
4828  if (GET_CODE (x) != REG)
4829    return 0;
4830  if (GET_CODE (y) == SUBREG)
4831    y = alter_subreg (y);
4832  if (GET_CODE (y) != REG)
4833    return 0;
4834  if (REGNO (x) == REGNO (y))
4835    return 0;
4836  return 1;
4837}
4838
4839/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
4840   This makes them candidates for using ldd and std insns.
4841
4842   Note reg1 and reg2 *must* be hard registers.  */
4843
4844int
4845registers_ok_for_ldd_peep (reg1, reg2)
4846     rtx reg1, reg2;
4847{
4848  /* We might have been passed a SUBREG.  */
4849  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
4850    return 0;
4851
4852  if (REGNO (reg1) % 2 != 0)
4853    return 0;
4854
4855  /* Integer ldd is deprecated in SPARC V9 */
4856  if (TARGET_V9 && REGNO (reg1) < 32)
4857    return 0;
4858
4859  return (REGNO (reg1) == REGNO (reg2) - 1);
4860}
4861
4862/* Return 1 if addr1 and addr2 are suitable for use in an ldd or
4863   std insn.
4864
4865   This can only happen when addr1 and addr2 are consecutive memory
4866   locations (addr1 + 4 == addr2).  addr1 must also be aligned on a
4867   64 bit boundary (addr1 % 8 == 0).
4868
4869   We know %sp and %fp are kept aligned on a 64 bit boundary.  Other
4870   registers are assumed to *never* be properly aligned and are
4871   rejected.
4872
4873   Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
4874   need only check that the offset for addr1 % 8 == 0.  */
4875
4876int
4877addrs_ok_for_ldd_peep (addr1, addr2)
4878      rtx addr1, addr2;
4879{
4880  int reg1, offset1;
4881
4882  /* Extract a register number and offset (if used) from the first addr.  */
4883  if (GET_CODE (addr1) == PLUS)
4884    {
4885      /* If not a REG, return zero.  */
4886      if (GET_CODE (XEXP (addr1, 0)) != REG)
4887	return 0;
4888      else
4889	{
4890          reg1 = REGNO (XEXP (addr1, 0));
4891	  /* The offset must be constant!  */
4892	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
4893            return 0;
4894          offset1 = INTVAL (XEXP (addr1, 1));
4895	}
4896    }
4897  else if (GET_CODE (addr1) != REG)
4898    return 0;
4899  else
4900    {
4901      reg1 = REGNO (addr1);
4902      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
4903      offset1 = 0;
4904    }
4905
4906  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
4907  if (GET_CODE (addr2) != PLUS)
4908    return 0;
4909
4910  if (GET_CODE (XEXP (addr2, 0)) != REG
4911      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
4912    return 0;
4913
4914  /* Only %fp and %sp are allowed.  Additionally both addresses must
4915     use the same register.  */
4916  if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
4917    return 0;
4918
4919  if (reg1 != REGNO (XEXP (addr2, 0)))
4920    return 0;
4921
4922  /* The first offset must be evenly divisible by 8 to ensure the
4923     address is 64 bit aligned.  */
4924  if (offset1 % 8 != 0)
4925    return 0;
4926
4927  /* The offset for the second addr must be 4 more than the first addr.  */
4928  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
4929    return 0;
4930
4931  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
4932     instructions.  */
4933  return 1;
4934}
4935
4936/* Return 1 if reg is a pseudo, or is the first register in
4937   a hard register pair.  This makes it a candidate for use in
4938   ldd and std insns.  */
4939
4940int
4941register_ok_for_ldd (reg)
4942     rtx reg;
4943{
4944  /* We might have been passed a SUBREG.  */
4945  if (GET_CODE (reg) != REG)
4946    return 0;
4947
4948  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
4949    return (REGNO (reg) % 2 == 0);
4950  else
4951    return 1;
4952}
4953
4954/* Print operand X (an rtx) in assembler syntax to file FILE.
4955   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4956   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4957
4958void
4959print_operand (file, x, code)
4960     FILE *file;
4961     rtx x;
4962     int code;
4963{
4964  switch (code)
4965    {
4966    case '#':
4967      /* Output a 'nop' if there's nothing for the delay slot.  */
4968      if (dbr_sequence_length () == 0)
4969	fputs ("\n\t nop", file);
4970      return;
4971    case '*':
4972      /* Output an annul flag if there's nothing for the delay slot and we
4973	 are optimizing.  This is always used with '(' below.  */
4974      /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
4975	 this is a dbx bug.  So, we only do this when optimizing.  */
4976      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
4977	 Always emit a nop in case the next instruction is a branch.  */
4978      if (dbr_sequence_length () == 0
4979	  && (optimize && (int)sparc_cpu < PROCESSOR_V9))
4980	fputs (",a", file);
4981      return;
4982    case '(':
4983      /* Output a 'nop' if there's nothing for the delay slot and we are
4984	 not optimizing.  This is always used with '*' above.  */
4985      if (dbr_sequence_length () == 0
4986	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
4987	fputs ("\n\t nop", file);
4988      return;
4989    case '_':
4990      /* Output the Embedded Medium/Anywhere code model base register.  */
4991      fputs (EMBMEDANY_BASE_REG, file);
4992      return;
4993    case '@':
4994      /* Print out what we are using as the frame pointer.  This might
4995	 be %fp, or might be %sp+offset.  */
4996      /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
4997      fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
4998      return;
4999    case 'Y':
5000      /* Adjust the operand to take into account a RESTORE operation.  */
5001      if (GET_CODE (x) == CONST_INT)
5002	break;
5003      else if (GET_CODE (x) != REG)
5004	output_operand_lossage ("Invalid %%Y operand");
5005      else if (REGNO (x) < 8)
5006	fputs (reg_names[REGNO (x)], file);
5007      else if (REGNO (x) >= 24 && REGNO (x) < 32)
5008	fputs (reg_names[REGNO (x)-16], file);
5009      else
5010	output_operand_lossage ("Invalid %%Y operand");
5011      return;
5012    case 'L':
5013      /* Print out the low order register name of a register pair.  */
5014      if (WORDS_BIG_ENDIAN)
5015	fputs (reg_names[REGNO (x)+1], file);
5016      else
5017	fputs (reg_names[REGNO (x)], file);
5018      return;
5019    case 'H':
5020      /* Print out the high order register name of a register pair.  */
5021      if (WORDS_BIG_ENDIAN)
5022	fputs (reg_names[REGNO (x)], file);
5023      else
5024	fputs (reg_names[REGNO (x)+1], file);
5025      return;
5026    case 'R':
5027      /* Print out the second register name of a register pair or quad.
5028	 I.e., R (%o0) => %o1.  */
5029      fputs (reg_names[REGNO (x)+1], file);
5030      return;
5031    case 'S':
5032      /* Print out the third register name of a register quad.
5033	 I.e., S (%o0) => %o2.  */
5034      fputs (reg_names[REGNO (x)+2], file);
5035      return;
5036    case 'T':
5037      /* Print out the fourth register name of a register quad.
5038	 I.e., T (%o0) => %o3.  */
5039      fputs (reg_names[REGNO (x)+3], file);
5040      return;
5041    case 'x':
5042      /* Print a condition code register.  */
5043      if (REGNO (x) == SPARC_ICC_REG)
5044	{
5045	  /* We don't handle CC[X]_NOOVmode because they're not supposed
5046	     to occur here.  */
5047	  if (GET_MODE (x) == CCmode)
5048	    fputs ("%icc", file);
5049	  else if (GET_MODE (x) == CCXmode)
5050	    fputs ("%xcc", file);
5051	  else
5052	    abort ();
5053	}
5054      else
5055	/* %fccN register */
5056	fputs (reg_names[REGNO (x)], file);
5057      return;
5058    case 'm':
5059      /* Print the operand's address only.  */
5060      output_address (XEXP (x, 0));
5061      return;
5062    case 'r':
5063      /* In this case we need a register.  Use %g0 if the
5064	 operand is const0_rtx.  */
5065      if (x == const0_rtx
5066	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5067	{
5068	  fputs ("%g0", file);
5069	  return;
5070	}
5071      else
5072	break;
5073
5074    case 'A':
5075      switch (GET_CODE (x))
5076	{
5077	case IOR: fputs ("or", file); break;
5078	case AND: fputs ("and", file); break;
5079	case XOR: fputs ("xor", file); break;
5080	default: output_operand_lossage ("Invalid %%A operand");
5081	}
5082      return;
5083
5084    case 'B':
5085      switch (GET_CODE (x))
5086	{
5087	case IOR: fputs ("orn", file); break;
5088	case AND: fputs ("andn", file); break;
5089	case XOR: fputs ("xnor", file); break;
5090	default: output_operand_lossage ("Invalid %%B operand");
5091	}
5092      return;
5093
5094      /* These are used by the conditional move instructions.  */
5095    case 'c' :
5096    case 'C':
5097      {
5098	enum rtx_code rc = (code == 'c'
5099			    ? reverse_condition (GET_CODE (x))
5100			    : GET_CODE (x));
5101	switch (rc)
5102	  {
5103	  case NE: fputs ("ne", file); break;
5104	  case EQ: fputs ("e", file); break;
5105	  case GE: fputs ("ge", file); break;
5106	  case GT: fputs ("g", file); break;
5107	  case LE: fputs ("le", file); break;
5108	  case LT: fputs ("l", file); break;
5109	  case GEU: fputs ("geu", file); break;
5110	  case GTU: fputs ("gu", file); break;
5111	  case LEU: fputs ("leu", file); break;
5112	  case LTU: fputs ("lu", file); break;
5113	  default: output_operand_lossage (code == 'c'
5114					   ? "Invalid %%c operand"
5115					   : "Invalid %%C operand");
5116	  }
5117	return;
5118      }
5119
5120      /* These are used by the movr instruction pattern.  */
5121    case 'd':
5122    case 'D':
5123      {
5124	enum rtx_code rc = (code == 'd'
5125			    ? reverse_condition (GET_CODE (x))
5126			    : GET_CODE (x));
5127	switch (rc)
5128	  {
5129	  case NE: fputs ("ne", file); break;
5130	  case EQ: fputs ("e", file); break;
5131	  case GE: fputs ("gez", file); break;
5132	  case LT: fputs ("lz", file); break;
5133	  case LE: fputs ("lez", file); break;
5134	  case GT: fputs ("gz", file); break;
5135	  default: output_operand_lossage (code == 'd'
5136					   ? "Invalid %%d operand"
5137					   : "Invalid %%D operand");
5138	  }
5139	return;
5140      }
5141
5142    case 'b':
5143      {
5144	/* Print a sign-extended character.  */
5145	int i = INTVAL (x) & 0xff;
5146	if (i & 0x80)
5147	  i |= 0xffffff00;
5148	fprintf (file, "%d", i);
5149	return;
5150      }
5151
5152    case 'f':
5153      /* Operand must be a MEM; write its address.  */
5154      if (GET_CODE (x) != MEM)
5155	output_operand_lossage ("Invalid %%f operand");
5156      output_address (XEXP (x, 0));
5157      return;
5158
5159    case 0:
5160      /* Do nothing special.  */
5161      break;
5162
5163    default:
5164      /* Undocumented flag.  */
5165      output_operand_lossage ("invalid operand output code");
5166    }
5167
5168  if (GET_CODE (x) == REG)
5169    fputs (reg_names[REGNO (x)], file);
5170  else if (GET_CODE (x) == MEM)
5171    {
5172      fputc ('[', file);
5173	/* Poor Sun assembler doesn't understand absolute addressing.  */
5174      if (CONSTANT_P (XEXP (x, 0))
5175	  && ! TARGET_LIVE_G0)
5176	fputs ("%g0+", file);
5177      output_address (XEXP (x, 0));
5178      fputc (']', file);
5179    }
5180  else if (GET_CODE (x) == HIGH)
5181    {
5182      fputs ("%hi(", file);
5183      output_addr_const (file, XEXP (x, 0));
5184      fputc (')', file);
5185    }
5186  else if (GET_CODE (x) == LO_SUM)
5187    {
5188      print_operand (file, XEXP (x, 0), 0);
5189      if (TARGET_CM_MEDMID)
5190	fputs ("+%l44(", file);
5191      else
5192	fputs ("+%lo(", file);
5193      output_addr_const (file, XEXP (x, 1));
5194      fputc (')', file);
5195    }
5196  else if (GET_CODE (x) == CONST_DOUBLE
5197	   && (GET_MODE (x) == VOIDmode
5198	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5199    {
5200      if (CONST_DOUBLE_HIGH (x) == 0)
5201	fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5202      else if (CONST_DOUBLE_HIGH (x) == -1
5203	       && CONST_DOUBLE_LOW (x) < 0)
5204	fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5205      else
5206	output_operand_lossage ("long long constant not a valid immediate operand");
5207    }
5208  else if (GET_CODE (x) == CONST_DOUBLE)
5209    output_operand_lossage ("floating point constant not a valid immediate operand");
5210  else { output_addr_const (file, x); }
5211}
5212
5213/* This function outputs assembler code for VALUE to FILE, where VALUE is
5214   a 64 bit (DImode) value.  */
5215
5216/* ??? If there is a 64 bit counterpart to .word that the assembler
5217   understands, then using that would simply this code greatly.  */
5218/* ??? We only output .xword's for symbols and only then in environments
5219   where the assembler can handle them.  */
5220
5221void
5222output_double_int (file, value)
5223     FILE *file;
5224     rtx value;
5225{
5226  if (GET_CODE (value) == CONST_INT)
5227    {
5228      /* ??? This has endianness issues.  */
5229#if HOST_BITS_PER_WIDE_INT == 64
5230      HOST_WIDE_INT xword = INTVAL (value);
5231      HOST_WIDE_INT high, low;
5232
5233      high = (xword >> 32) & 0xffffffff;
5234      low  = xword & 0xffffffff;
5235      ASM_OUTPUT_INT (file, GEN_INT (high));
5236      ASM_OUTPUT_INT (file, GEN_INT (low));
5237#else
5238      if (INTVAL (value) < 0)
5239	ASM_OUTPUT_INT (file, constm1_rtx);
5240      else
5241	ASM_OUTPUT_INT (file, const0_rtx);
5242      ASM_OUTPUT_INT (file, value);
5243#endif
5244    }
5245  else if (GET_CODE (value) == CONST_DOUBLE)
5246    {
5247      ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5248      ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5249    }
5250  else if (GET_CODE (value) == SYMBOL_REF
5251	   || GET_CODE (value) == CONST
5252	   || GET_CODE (value) == PLUS
5253	   || (TARGET_ARCH64 &&
5254	       (GET_CODE (value) == LABEL_REF
5255		|| GET_CODE (value) == CODE_LABEL
5256		|| GET_CODE (value) == MINUS)))
5257    {
5258      if (! TARGET_V9)
5259	{
5260	  ASM_OUTPUT_INT (file, const0_rtx);
5261	  ASM_OUTPUT_INT (file, value);
5262	}
5263      else
5264	{
5265	  fprintf (file, "\t%s\t", ASM_LONGLONG);
5266	  output_addr_const (file, value);
5267	  fprintf (file, "\n");
5268	}
5269    }
5270  else
5271    abort ();
5272}
5273
5274/* Return the value of a code used in the .proc pseudo-op that says
5275   what kind of result this function returns.  For non-C types, we pick
5276   the closest C type.  */
5277
5278#ifndef CHAR_TYPE_SIZE
5279#define CHAR_TYPE_SIZE BITS_PER_UNIT
5280#endif
5281
5282#ifndef SHORT_TYPE_SIZE
5283#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5284#endif
5285
5286#ifndef INT_TYPE_SIZE
5287#define INT_TYPE_SIZE BITS_PER_WORD
5288#endif
5289
5290#ifndef LONG_TYPE_SIZE
5291#define LONG_TYPE_SIZE BITS_PER_WORD
5292#endif
5293
5294#ifndef LONG_LONG_TYPE_SIZE
5295#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5296#endif
5297
5298#ifndef FLOAT_TYPE_SIZE
5299#define FLOAT_TYPE_SIZE BITS_PER_WORD
5300#endif
5301
5302#ifndef DOUBLE_TYPE_SIZE
5303#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5304#endif
5305
5306#ifndef LONG_DOUBLE_TYPE_SIZE
5307#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5308#endif
5309
5310unsigned long
5311sparc_type_code (type)
5312     register tree type;
5313{
5314  register unsigned long qualifiers = 0;
5315  register unsigned shift;
5316
5317  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
5318     setting more, since some assemblers will give an error for this.  Also,
5319     we must be careful to avoid shifts of 32 bits or more to avoid getting
5320     unpredictable results.  */
5321
5322  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5323    {
5324      switch (TREE_CODE (type))
5325	{
5326	case ERROR_MARK:
5327	  return qualifiers;
5328
5329	case ARRAY_TYPE:
5330	  qualifiers |= (3 << shift);
5331	  break;
5332
5333	case FUNCTION_TYPE:
5334	case METHOD_TYPE:
5335	  qualifiers |= (2 << shift);
5336	  break;
5337
5338	case POINTER_TYPE:
5339	case REFERENCE_TYPE:
5340	case OFFSET_TYPE:
5341	  qualifiers |= (1 << shift);
5342	  break;
5343
5344	case RECORD_TYPE:
5345	  return (qualifiers | 8);
5346
5347	case UNION_TYPE:
5348	case QUAL_UNION_TYPE:
5349	  return (qualifiers | 9);
5350
5351	case ENUMERAL_TYPE:
5352	  return (qualifiers | 10);
5353
5354	case VOID_TYPE:
5355	  return (qualifiers | 16);
5356
5357	case INTEGER_TYPE:
5358	  /* If this is a range type, consider it to be the underlying
5359	     type.  */
5360	  if (TREE_TYPE (type) != 0)
5361	    break;
5362
5363	  /* Carefully distinguish all the standard types of C,
5364	     without messing up if the language is not C.  We do this by
5365	     testing TYPE_PRECISION and TREE_UNSIGNED.  The old code used to
5366	     look at both the names and the above fields, but that's redundant.
5367	     Any type whose size is between two C types will be considered
5368	     to be the wider of the two types.  Also, we do not have a
5369	     special code to use for "long long", so anything wider than
5370	     long is treated the same.  Note that we can't distinguish
5371	     between "int" and "long" in this code if they are the same
5372	     size, but that's fine, since neither can the assembler.  */
5373
5374	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5375	    return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5376
5377	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5378	    return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5379
5380	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5381	    return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5382
5383	  else
5384	    return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5385
5386	case REAL_TYPE:
5387	  /* If this is a range type, consider it to be the underlying
5388	     type.  */
5389	  if (TREE_TYPE (type) != 0)
5390	    break;
5391
5392	  /* Carefully distinguish all the standard types of C,
5393	     without messing up if the language is not C.  */
5394
5395	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5396	    return (qualifiers | 6);
5397
5398	  else
5399	    return (qualifiers | 7);
5400
5401	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
5402	  /* ??? We need to distinguish between double and float complex types,
5403	     but I don't know how yet because I can't reach this code from
5404	     existing front-ends.  */
5405	  return (qualifiers | 7);	/* Who knows? */
5406
5407	case CHAR_TYPE:		/* GNU Pascal CHAR type.  Not used in C.  */
5408	case BOOLEAN_TYPE:	/* GNU Fortran BOOLEAN type.  */
5409	case FILE_TYPE:		/* GNU Pascal FILE type.  */
5410	case SET_TYPE:		/* GNU Pascal SET type.  */
5411	case LANG_TYPE:		/* ? */
5412	  return qualifiers;
5413
5414	default:
5415	  abort ();		/* Not a type! */
5416        }
5417    }
5418
5419  return qualifiers;
5420}
5421
5422/* Nested function support.  */
5423
5424/* Emit RTL insns to initialize the variable parts of a trampoline.
5425   FNADDR is an RTX for the address of the function's pure code.
5426   CXT is an RTX for the static chain value for the function.
5427
5428   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5429   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5430   (to store insns).  This is a bit excessive.  Perhaps a different
5431   mechanism would be better here.
5432
5433   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
5434
5435void
5436sparc_initialize_trampoline (tramp, fnaddr, cxt)
5437     rtx tramp, fnaddr, cxt;
5438{
5439  /* SPARC 32 bit trampoline:
5440
5441 	sethi	%hi(fn), %g1
5442 	sethi	%hi(static), %g2
5443 	jmp	%g1+%lo(fn)
5444 	or	%g2, %lo(static), %g2
5445
5446    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5447    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5448   */
5449#ifdef TRANSFER_FROM_TRAMPOLINE
5450  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5451                     0, VOIDmode, 1, tramp, Pmode);
5452#endif
5453
5454  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5455		  expand_binop (SImode, ior_optab,
5456				expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5457					      size_int (10), 0, 1),
5458				GEN_INT (0x03000000),
5459				NULL_RTX, 1, OPTAB_DIRECT));
5460
5461  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5462		  expand_binop (SImode, ior_optab,
5463				expand_shift (RSHIFT_EXPR, SImode, cxt,
5464					      size_int (10), 0, 1),
5465				GEN_INT (0x05000000),
5466				NULL_RTX, 1, OPTAB_DIRECT));
5467
5468  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5469		  expand_binop (SImode, ior_optab,
5470				expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5471				GEN_INT (0x81c06000),
5472				NULL_RTX, 1, OPTAB_DIRECT));
5473
5474  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5475		  expand_binop (SImode, ior_optab,
5476				expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5477				GEN_INT (0x8410a000),
5478				NULL_RTX, 1, OPTAB_DIRECT));
5479
5480  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5481  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
5482     aligned on a 16 byte boundary so one flush clears it all.  */
5483  if (sparc_cpu != PROCESSOR_ULTRASPARC)
5484    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5485						     plus_constant (tramp, 8)))));
5486}
5487
5488/* The 64 bit version is simpler because it makes more sense to load the
5489   values as "immediate" data out of the trampoline.  It's also easier since
5490   we can read the PC without clobbering a register.  */
5491
5492void
5493sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5494     rtx tramp, fnaddr, cxt;
5495{
5496#ifdef TRANSFER_FROM_TRAMPOLINE
5497  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5498                     0, VOIDmode, 1, tramp, Pmode);
5499#endif
5500
5501  /*
5502	rd	%pc, %g1
5503	ldx	[%g1+24], %g5
5504	jmp	%g5
5505	ldx	[%g1+16], %g5
5506	+16 bytes data
5507   */
5508
5509  emit_move_insn (gen_rtx_MEM (SImode, tramp),
5510		  GEN_INT (0x83414000));
5511  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5512		  GEN_INT (0xca586018));
5513  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5514		  GEN_INT (0x81c14000));
5515  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5516		  GEN_INT (0xca586010));
5517  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5518  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
5519  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
5520
5521  if (sparc_cpu != PROCESSOR_ULTRASPARC)
5522    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5523}
5524
5525/* Subroutines to support a flat (single) register window calling
5526   convention.  */
5527
5528/* Single-register window sparc stack frames look like:
5529
5530             Before call		        After call
5531        +-----------------------+	+-----------------------+
5532   high |		        |	|			|
5533   mem  |  caller's temps.    	|       |  caller's temps.    	|
5534	|       		|       |       	        |
5535        +-----------------------+	+-----------------------+
5536 	|       		|	|		        |
5537        |  arguments on stack.  |	|  arguments on stack.  |
5538	|       		|      	|			|
5539        +-----------------------+FP+92->+-----------------------+
5540 	|  6 words to save     	|	|  6 words to save	|
5541	|  arguments passed	|	|  arguments passed	|
5542	|  in registers, even	|	|  in registers, even	|
5543       	|  if not passed.       |      	|  if not passed.	|
5544 SP+68->+-----------------------+FP+68->+-----------------------+
5545        | 1 word struct addr	|      	| 1 word struct addr	|
5546        +-----------------------+FP+64->+-----------------------+
5547        |			|	|			|
5548        | 16 word reg save area	|	| 16 word reg save area |
5549       	|                       |      	|			|
5550    SP->+-----------------------+   FP->+-----------------------+
5551				        | 4 word area for	|
5552				       	| fp/alu reg moves	|
5553				 FP-16->+-----------------------+
5554				        |			|
5555				        |  local variables	|
5556				        |			|
5557				        +-----------------------+
5558				        |		        |
5559                                        |  fp register save     |
5560				        |			|
5561				        +-----------------------+
5562				        |		        |
5563                                        |  gp register save     |
5564                                        |       		|
5565				        +-----------------------+
5566				        |			|
5567                                        |  alloca allocations   |
5568        			        |			|
5569				        +-----------------------+
5570				        |			|
5571                                        |  arguments on stack   |
5572        			       	|		        |
5573				 SP+92->+-----------------------+
5574                                        |  6 words to save      |
5575				        |  arguments passed     |
5576                                        |  in registers, even   |
5577   low                                 	|  if not passed.       |
5578   memory        		 SP+68->+-----------------------+
5579				       	| 1 word struct addr	|
5580				 SP+64->+-----------------------+
5581				        |			|
5582				        I 16 word reg save area |
5583				       	|			|
5584				    SP->+-----------------------+  */
5585
5586/* Structure to be filled in by sparc_flat_compute_frame_size with register
5587   save masks, and offsets for the current function.  */
5588
5589struct sparc_frame_info
5590{
5591  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
5592  unsigned long var_size;	/* # bytes that variables take up.  */
5593  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
5594  unsigned long extra_size;	/* # bytes of extra gunk.  */
5595  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
5596  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
5597  unsigned long gmask;		/* Mask of saved gp registers.  */
5598  unsigned long fmask;		/* Mask of saved fp registers.  */
5599  unsigned long reg_offset;	/* Offset from new sp to store regs.  */
5600  int		initialized;	/* Nonzero if frame size already calculated.  */
5601};
5602
5603/* Current frame information calculated by sparc_flat_compute_frame_size.  */
5604struct sparc_frame_info current_frame_info;
5605
5606/* Zero structure to initialize current_frame_info.  */
5607struct sparc_frame_info zero_frame_info;
5608
5609/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
5610
5611#define RETURN_ADDR_REGNUM 15
5612#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5613#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5614
5615#define MUST_SAVE_REGISTER(regno) \
5616 ((regs_ever_live[regno] && !call_used_regs[regno])		\
5617  || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)	\
5618  || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5619
5620/* Return the bytes needed to compute the frame pointer from the current
5621   stack pointer.  */
5622
5623unsigned long
5624sparc_flat_compute_frame_size (size)
5625     int size;			/* # of var. bytes allocated.  */
5626{
5627  int regno;
5628  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
5629  unsigned long var_size;	/* # bytes that variables take up.  */
5630  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
5631  unsigned long extra_size;	/* # extra bytes.  */
5632  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
5633  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
5634  unsigned long gmask;		/* Mask of saved gp registers.  */
5635  unsigned long fmask;		/* Mask of saved fp registers.  */
5636  unsigned long reg_offset;	/* Offset to register save area.  */
5637  int           need_aligned_p;	/* 1 if need the save area 8 byte aligned.  */
5638
5639  /* This is the size of the 16 word reg save area, 1 word struct addr
5640     area, and 4 word fp/alu register copy area.  */
5641  extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5642  var_size = size;
5643  gp_reg_size = 0;
5644  fp_reg_size = 0;
5645  gmask = 0;
5646  fmask = 0;
5647  reg_offset = 0;
5648  need_aligned_p = 0;
5649
5650  args_size = 0;
5651  if (!leaf_function_p ())
5652    {
5653      /* Also include the size needed for the 6 parameter registers.  */
5654      args_size = current_function_outgoing_args_size + 24;
5655    }
5656  total_size = var_size + args_size;
5657
5658  /* Calculate space needed for gp registers.  */
5659  for (regno = 1; regno <= 31; regno++)
5660    {
5661      if (MUST_SAVE_REGISTER (regno))
5662	{
5663	  /* If we need to save two regs in a row, ensure there's room to bump
5664	     up the address to align it to a doubleword boundary.  */
5665	  if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5666	    {
5667	      if (gp_reg_size % 8 != 0)
5668		gp_reg_size += 4;
5669	      gp_reg_size += 2 * UNITS_PER_WORD;
5670	      gmask |= 3 << regno;
5671	      regno++;
5672	      need_aligned_p = 1;
5673	    }
5674	  else
5675	    {
5676	      gp_reg_size += UNITS_PER_WORD;
5677	      gmask |= 1 << regno;
5678	    }
5679	}
5680    }
5681
5682  /* Calculate space needed for fp registers.  */
5683  for (regno = 32; regno <= 63; regno++)
5684    {
5685      if (regs_ever_live[regno] && !call_used_regs[regno])
5686	{
5687	  fp_reg_size += UNITS_PER_WORD;
5688	  fmask |= 1 << (regno - 32);
5689	}
5690    }
5691
5692  if (gmask || fmask)
5693    {
5694      int n;
5695      reg_offset = FIRST_PARM_OFFSET(0) + args_size;
5696      /* Ensure save area is 8 byte aligned if we need it.  */
5697      n = reg_offset % 8;
5698      if (need_aligned_p && n != 0)
5699	{
5700	  total_size += 8 - n;
5701	  reg_offset += 8 - n;
5702	}
5703      total_size += gp_reg_size + fp_reg_size;
5704    }
5705
5706  /* If we must allocate a stack frame at all, we must also allocate
5707     room for register window spillage, so as to be binary compatible
5708     with libraries and operating systems that do not use -mflat.  */
5709  if (total_size > 0)
5710    total_size += extra_size;
5711  else
5712    extra_size = 0;
5713
5714  total_size = SPARC_STACK_ALIGN (total_size);
5715
5716  /* Save other computed information.  */
5717  current_frame_info.total_size  = total_size;
5718  current_frame_info.var_size    = var_size;
5719  current_frame_info.args_size   = args_size;
5720  current_frame_info.extra_size  = extra_size;
5721  current_frame_info.gp_reg_size = gp_reg_size;
5722  current_frame_info.fp_reg_size = fp_reg_size;
5723  current_frame_info.gmask	 = gmask;
5724  current_frame_info.fmask	 = fmask;
5725  current_frame_info.reg_offset	 = reg_offset;
5726  current_frame_info.initialized = reload_completed;
5727
5728  /* Ok, we're done.  */
5729  return total_size;
5730}
5731
5732/* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
5733   OFFSET.
5734
5735   BASE_REG must be 8 byte aligned.  This allows us to test OFFSET for
5736   appropriate alignment and use DOUBLEWORD_OP when we can.  We assume
5737   [BASE_REG+OFFSET] will always be a valid address.
5738
5739   WORD_OP is either "st" for save, "ld" for restore.
5740   DOUBLEWORD_OP is either "std" for save, "ldd" for restore.  */
5741
5742void
5743sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
5744			 doubleword_op, base_offset)
5745     FILE *file;
5746     char *base_reg;
5747     unsigned int offset;
5748     unsigned long gmask;
5749     unsigned long fmask;
5750     char *word_op;
5751     char *doubleword_op;
5752     unsigned long base_offset;
5753{
5754  int regno;
5755
5756  if (gmask == 0 && fmask == 0)
5757    return;
5758
5759  /* Save registers starting from high to low.  We've already saved the
5760     previous frame pointer and previous return address for the debugger's
5761     sake.  The debugger allows us to not need a nop in the epilog if at least
5762     one register is reloaded in addition to return address.  */
5763
5764  if (gmask)
5765    {
5766      for (regno = 1; regno <= 31; regno++)
5767	{
5768	  if ((gmask & (1L << regno)) != 0)
5769	    {
5770	      if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
5771		{
5772		  /* We can save two registers in a row.  If we're not at a
5773		     double word boundary, move to one.
5774		     sparc_flat_compute_frame_size ensures there's room to do
5775		     this.  */
5776		  if (offset % 8 != 0)
5777		    offset += UNITS_PER_WORD;
5778
5779		  if (word_op[0] == 's')
5780		    {
5781		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
5782			       doubleword_op, reg_names[regno],
5783			       base_reg, offset);
5784		      if (dwarf2out_do_frame ())
5785			{
5786			  char *l = dwarf2out_cfi_label ();
5787			  dwarf2out_reg_save (l, regno, offset + base_offset);
5788			  dwarf2out_reg_save
5789			    (l, regno+1, offset+base_offset + UNITS_PER_WORD);
5790			}
5791		    }
5792		  else
5793		    fprintf (file, "\t%s\t[%s+%d], %s\n",
5794			     doubleword_op, base_reg, offset,
5795			     reg_names[regno]);
5796
5797		  offset += 2 * UNITS_PER_WORD;
5798		  regno++;
5799		}
5800	      else
5801		{
5802		  if (word_op[0] == 's')
5803		    {
5804		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
5805			       word_op, reg_names[regno],
5806			       base_reg, offset);
5807		      if (dwarf2out_do_frame ())
5808			dwarf2out_reg_save ("", regno, offset + base_offset);
5809		    }
5810		  else
5811		    fprintf (file, "\t%s\t[%s+%d], %s\n",
5812			     word_op, base_reg, offset, reg_names[regno]);
5813
5814		  offset += UNITS_PER_WORD;
5815		}
5816	    }
5817	}
5818    }
5819
5820  if (fmask)
5821    {
5822      for (regno = 32; regno <= 63; regno++)
5823	{
5824	  if ((fmask & (1L << (regno - 32))) != 0)
5825	    {
5826	      if (word_op[0] == 's')
5827		{
5828		  fprintf (file, "\t%s\t%s, [%s+%d]\n",
5829			   word_op, reg_names[regno],
5830			   base_reg, offset);
5831		  if (dwarf2out_do_frame ())
5832		    dwarf2out_reg_save ("", regno, offset + base_offset);
5833		}
5834	      else
5835		fprintf (file, "\t%s\t[%s+%d], %s\n",
5836			 word_op, base_reg, offset, reg_names[regno]);
5837
5838	      offset += UNITS_PER_WORD;
5839	    }
5840	}
5841    }
5842}
5843
5844/* Set up the stack and frame (if desired) for the function.  */
5845
5846void
5847sparc_flat_output_function_prologue (file, size)
5848     FILE *file;
5849     int size;
5850{
5851  char *sp_str = reg_names[STACK_POINTER_REGNUM];
5852  unsigned long gmask = current_frame_info.gmask;
5853
5854  /* This is only for the human reader.  */
5855  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
5856  fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
5857	   ASM_COMMENT_START,
5858	   current_frame_info.var_size,
5859	   current_frame_info.gp_reg_size / 4,
5860	   current_frame_info.fp_reg_size / 4,
5861	   current_function_outgoing_args_size,
5862	   current_frame_info.extra_size);
5863
5864  size = SPARC_STACK_ALIGN (size);
5865  size = (! current_frame_info.initialized
5866	  ? sparc_flat_compute_frame_size (size)
5867	  : current_frame_info.total_size);
5868
5869  /* These cases shouldn't happen.  Catch them now.  */
5870  if (size == 0 && (gmask || current_frame_info.fmask))
5871    abort ();
5872
5873  /* Allocate our stack frame by decrementing %sp.
5874     At present, the only algorithm gdb can use to determine if this is a
5875     flat frame is if we always set %i7 if we set %sp.  This can be optimized
5876     in the future by putting in some sort of debugging information that says
5877     this is a `flat' function.  However, there is still the case of debugging
5878     code without such debugging information (including cases where most fns
5879     have such info, but there is one that doesn't).  So, always do this now
5880     so we don't get a lot of code out there that gdb can't handle.
5881     If the frame pointer isn't needn't then that's ok - gdb won't be able to
5882     distinguish us from a non-flat function but there won't (and shouldn't)
5883     be any differences anyway.  The return pc is saved (if necessary) right
5884     after %i7 so gdb won't have to look too far to find it.  */
5885  if (size > 0)
5886    {
5887      unsigned int reg_offset = current_frame_info.reg_offset;
5888      char *fp_str = reg_names[FRAME_POINTER_REGNUM];
5889      const char *t1_str = "%g1";
5890
5891      /* Things get a little tricky if local variables take up more than ~4096
5892	 bytes and outgoing arguments take up more than ~4096 bytes.  When that
5893	 happens, the register save area can't be accessed from either end of
5894	 the frame.  Handle this by decrementing %sp to the start of the gp
5895	 register save area, save the regs, update %i7, and then set %sp to its
5896	 final value.  Given that we only have one scratch register to play
5897	 with it is the cheapest solution, and it helps gdb out as it won't
5898	 slow down recognition of flat functions.
5899	 Don't change the order of insns emitted here without checking with
5900	 the gdb folk first.  */
5901
5902      /* Is the entire register save area offsettable from %sp?  */
5903      if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
5904	{
5905	  if (size <= 4096)
5906	    {
5907	      fprintf (file, "\tadd\t%s, %d, %s\n",
5908		       sp_str, -size, sp_str);
5909	      if (gmask & FRAME_POINTER_MASK)
5910		{
5911		  fprintf (file, "\tst\t%s, [%s+%d]\n",
5912			   fp_str, sp_str, reg_offset);
5913		  fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5914			   sp_str, -size, fp_str, ASM_COMMENT_START);
5915		  reg_offset += 4;
5916		}
5917	    }
5918	  else
5919	    {
5920	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5921		       size, t1_str, sp_str, t1_str, sp_str);
5922	      if (gmask & FRAME_POINTER_MASK)
5923		{
5924		  fprintf (file, "\tst\t%s, [%s+%d]\n",
5925			   fp_str, sp_str, reg_offset);
5926		  fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5927			   sp_str, t1_str, fp_str, ASM_COMMENT_START);
5928		  reg_offset += 4;
5929		}
5930	    }
5931	  if (dwarf2out_do_frame ())
5932	    {
5933	      char *l = dwarf2out_cfi_label ();
5934	      if (gmask & FRAME_POINTER_MASK)
5935		{
5936		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5937				      reg_offset - 4 - size);
5938		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5939		}
5940	      else
5941		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
5942	    }
5943	  if (gmask & RETURN_ADDR_MASK)
5944	    {
5945	      fprintf (file, "\tst\t%s, [%s+%d]\n",
5946		       reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
5947	      if (dwarf2out_do_frame ())
5948		dwarf2out_return_save ("", reg_offset - size);
5949	      reg_offset += 4;
5950	    }
5951	  sparc_flat_save_restore (file, sp_str, reg_offset,
5952				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5953				   current_frame_info.fmask,
5954				   "st", "std", -size);
5955	}
5956      else
5957	{
5958	  /* Subtract %sp in two steps, but make sure there is always a
5959	     64 byte register save area, and %sp is properly aligned.  */
5960	  /* Amount to decrement %sp by, the first time.  */
5961	  unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
5962	  /* Offset to register save area from %sp.  */
5963	  unsigned int offset = size1 - (size - reg_offset);
5964
5965	  if (size1 <= 4096)
5966	    {
5967	      fprintf (file, "\tadd\t%s, %d, %s\n",
5968		       sp_str, -size1, sp_str);
5969	      if (gmask & FRAME_POINTER_MASK)
5970		{
5971		  fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5972			   fp_str, sp_str, offset, sp_str, -size1, fp_str,
5973			   ASM_COMMENT_START);
5974		  offset += 4;
5975		}
5976	    }
5977	  else
5978	    {
5979	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5980		       size1, t1_str, sp_str, t1_str, sp_str);
5981	      if (gmask & FRAME_POINTER_MASK)
5982		{
5983		  fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5984			   fp_str, sp_str, offset, sp_str, t1_str, fp_str,
5985			   ASM_COMMENT_START);
5986		  offset += 4;
5987		}
5988	    }
5989	  if (dwarf2out_do_frame ())
5990	    {
5991	      char *l = dwarf2out_cfi_label ();
5992	      if (gmask & FRAME_POINTER_MASK)
5993		{
5994		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5995				      offset - 4 - size1);
5996		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5997		}
5998	      else
5999		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
6000	    }
6001	  if (gmask & RETURN_ADDR_MASK)
6002	    {
6003	      fprintf (file, "\tst\t%s, [%s+%d]\n",
6004		       reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
6005	      if (dwarf2out_do_frame ())
6006		/* offset - size1 == reg_offset - size
6007		   if reg_offset were updated above like offset.  */
6008		dwarf2out_return_save ("", offset - size1);
6009	      offset += 4;
6010	    }
6011	  sparc_flat_save_restore (file, sp_str, offset,
6012				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6013				   current_frame_info.fmask,
6014				   "st", "std", -size1);
6015	  fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6016		   size - size1, t1_str, sp_str, t1_str, sp_str);
6017	  if (dwarf2out_do_frame ())
6018	    if (! (gmask & FRAME_POINTER_MASK))
6019	      dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6020	}
6021    }
6022
6023  fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6024}
6025
6026/* Do any necessary cleanup after a function to restore stack, frame,
6027   and regs. */
6028
6029void
6030sparc_flat_output_function_epilogue (file, size)
6031     FILE *file;
6032     int size;
6033{
6034  rtx epilogue_delay = current_function_epilogue_delay_list;
6035  int noepilogue = FALSE;
6036
6037  /* This is only for the human reader.  */
6038  fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6039
6040  /* The epilogue does not depend on any registers, but the stack
6041     registers, so we assume that if we have 1 pending nop, it can be
6042     ignored, and 2 it must be filled (2 nops occur for integer
6043     multiply and divide).  */
6044
6045  size = SPARC_STACK_ALIGN (size);
6046  size = (!current_frame_info.initialized
6047	   ? sparc_flat_compute_frame_size (size)
6048	   : current_frame_info.total_size);
6049
6050  if (size == 0 && epilogue_delay == 0)
6051    {
6052      rtx insn = get_last_insn ();
6053
6054      /* If the last insn was a BARRIER, we don't have to write any code
6055	 because a jump (aka return) was put there.  */
6056      if (GET_CODE (insn) == NOTE)
6057	insn = prev_nonnote_insn (insn);
6058      if (insn && GET_CODE (insn) == BARRIER)
6059	noepilogue = TRUE;
6060    }
6061
6062  if (!noepilogue)
6063    {
6064      unsigned int reg_offset = current_frame_info.reg_offset;
6065      unsigned int size1;
6066      char *sp_str = reg_names[STACK_POINTER_REGNUM];
6067      char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6068      const char *t1_str = "%g1";
6069
6070      /* In the reload sequence, we don't need to fill the load delay
6071	 slots for most of the loads, also see if we can fill the final
6072	 delay slot if not otherwise filled by the reload sequence.  */
6073
6074      if (size > 4095)
6075	fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6076
6077      if (frame_pointer_needed)
6078	{
6079	  if (size > 4095)
6080	    fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6081		     fp_str, t1_str, sp_str, ASM_COMMENT_START);
6082	  else
6083	    fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6084		     fp_str, size, sp_str, ASM_COMMENT_START);
6085	}
6086
6087      /* Is the entire register save area offsettable from %sp?  */
6088      if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6089	{
6090	  size1 = 0;
6091	}
6092      else
6093	{
6094	  /* Restore %sp in two steps, but make sure there is always a
6095	     64 byte register save area, and %sp is properly aligned.  */
6096	  /* Amount to increment %sp by, the first time.  */
6097	  size1 = ((reg_offset - 64 - 16) + 15) & -16;
6098	  /* Offset to register save area from %sp.  */
6099	  reg_offset = size1 - reg_offset;
6100
6101	  fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6102		   size1, t1_str, sp_str, t1_str, sp_str);
6103	}
6104
6105      /* We must restore the frame pointer and return address reg first
6106	 because they are treated specially by the prologue output code.  */
6107      if (current_frame_info.gmask & FRAME_POINTER_MASK)
6108	{
6109	  fprintf (file, "\tld\t[%s+%d], %s\n",
6110		   sp_str, reg_offset, fp_str);
6111	  reg_offset += 4;
6112	}
6113      if (current_frame_info.gmask & RETURN_ADDR_MASK)
6114	{
6115	  fprintf (file, "\tld\t[%s+%d], %s\n",
6116		   sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6117	  reg_offset += 4;
6118	}
6119
6120      /* Restore any remaining saved registers.  */
6121      sparc_flat_save_restore (file, sp_str, reg_offset,
6122			       current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6123			       current_frame_info.fmask,
6124			       "ld", "ldd", 0);
6125
6126      /* If we had to increment %sp in two steps, record it so the second
6127	 restoration in the epilogue finishes up.  */
6128      if (size1 > 0)
6129	{
6130	  size -= size1;
6131	  if (size > 4095)
6132	    fprintf (file, "\tset\t%d, %s\n",
6133		     size, t1_str);
6134	}
6135
6136      if (current_function_returns_struct)
6137	fprintf (file, "\tjmp\t%%o7+12\n");
6138      else
6139	fprintf (file, "\tretl\n");
6140
6141      /* If the only register saved is the return address, we need a
6142	 nop, unless we have an instruction to put into it.  Otherwise
6143	 we don't since reloading multiple registers doesn't reference
6144	 the register being loaded.  */
6145
6146      if (epilogue_delay)
6147	{
6148	  if (size)
6149	    abort ();
6150	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6151	}
6152
6153      else if (size > 4095)
6154	fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6155
6156      else if (size > 0)
6157	fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6158
6159      else
6160	fprintf (file, "\tnop\n");
6161    }
6162
6163  /* Reset state info for each function.  */
6164  current_frame_info = zero_frame_info;
6165
6166  sparc_output_deferred_case_vectors ();
6167}
6168
6169/* Define the number of delay slots needed for the function epilogue.
6170
6171   On the sparc, we need a slot if either no stack has been allocated,
6172   or the only register saved is the return register.  */
6173
6174int
6175sparc_flat_epilogue_delay_slots ()
6176{
6177  if (!current_frame_info.initialized)
6178    (void) sparc_flat_compute_frame_size (get_frame_size ());
6179
6180  if (current_frame_info.total_size == 0)
6181    return 1;
6182
6183  return 0;
6184}
6185
6186/* Return true is TRIAL is a valid insn for the epilogue delay slot.
6187   Any single length instruction which doesn't reference the stack or frame
6188   pointer is OK.  */
6189
6190int
6191sparc_flat_eligible_for_epilogue_delay (trial, slot)
6192     rtx trial;
6193     int slot ATTRIBUTE_UNUSED;
6194{
6195  rtx pat = PATTERN (trial);
6196
6197  if (get_attr_length (trial) != 1)
6198    return 0;
6199
6200  /* If %g0 is live, there are lots of things we can't handle.
6201     Rather than trying to find them all now, let's punt and only
6202     optimize things as necessary.  */
6203  if (TARGET_LIVE_G0)
6204    return 0;
6205
6206  if (! reg_mentioned_p (stack_pointer_rtx, pat)
6207      && ! reg_mentioned_p (frame_pointer_rtx, pat))
6208    return 1;
6209
6210  return 0;
6211}
6212
6213/* Adjust the cost of a scheduling dependency.  Return the new cost of
6214   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
6215
6216static int
6217supersparc_adjust_cost (insn, link, dep_insn, cost)
6218     rtx insn;
6219     rtx link;
6220     rtx dep_insn;
6221     int cost;
6222{
6223  enum attr_type insn_type;
6224
6225  if (! recog_memoized (insn))
6226    return 0;
6227
6228  insn_type = get_attr_type (insn);
6229
6230  if (REG_NOTE_KIND (link) == 0)
6231    {
6232      /* Data dependency; DEP_INSN writes a register that INSN reads some
6233	 cycles later.  */
6234
6235      /* if a load, then the dependence must be on the memory address;
6236	 add an extra "cycle".  Note that the cost could be two cycles
6237	 if the reg was written late in an instruction group; we ca not tell
6238	 here.  */
6239      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6240	return cost + 3;
6241
6242      /* Get the delay only if the address of the store is the dependence.  */
6243      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6244	{
6245	  rtx pat = PATTERN(insn);
6246	  rtx dep_pat = PATTERN (dep_insn);
6247
6248	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6249	    return cost;  /* This should not happen!  */
6250
6251	  /* The dependency between the two instructions was on the data that
6252	     is being stored.  Assume that this implies that the address of the
6253	     store is not dependent.  */
6254	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6255	    return cost;
6256
6257	  return cost + 3;  /* An approximation.  */
6258	}
6259
6260      /* A shift instruction cannot receive its data from an instruction
6261	 in the same cycle; add a one cycle penalty.  */
6262      if (insn_type == TYPE_SHIFT)
6263	return cost + 3;   /* Split before cascade into shift.  */
6264    }
6265  else
6266    {
6267      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6268	 INSN writes some cycles later.  */
6269
6270      /* These are only significant for the fpu unit; writing a fp reg before
6271         the fpu has finished with it stalls the processor.  */
6272
6273      /* Reusing an integer register causes no problems.  */
6274      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6275	return 0;
6276    }
6277
6278  return cost;
6279}
6280
6281static int
6282hypersparc_adjust_cost (insn, link, dep_insn, cost)
6283     rtx insn;
6284     rtx link;
6285     rtx dep_insn;
6286     int cost;
6287{
6288  enum attr_type insn_type, dep_type;
6289  rtx pat = PATTERN(insn);
6290  rtx dep_pat = PATTERN (dep_insn);
6291
6292  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6293    return cost;
6294
6295  insn_type = get_attr_type (insn);
6296  dep_type = get_attr_type (dep_insn);
6297
6298  switch (REG_NOTE_KIND (link))
6299    {
6300    case 0:
6301      /* Data dependency; DEP_INSN writes a register that INSN reads some
6302	 cycles later.  */
6303
6304      switch (insn_type)
6305	{
6306	case TYPE_STORE:
6307	case TYPE_FPSTORE:
6308	  /* Get the delay iff the address of the store is the dependence. */
6309	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6310	    return cost;
6311
6312	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6313	    return cost;
6314	  return cost + 3;
6315
6316	case TYPE_LOAD:
6317	case TYPE_SLOAD:
6318	case TYPE_FPLOAD:
6319	  /* If a load, then the dependence must be on the memory address.  If
6320	     the addresses aren't equal, then it might be a false dependency */
6321	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6322	    {
6323	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6324		  || GET_CODE (SET_DEST (dep_pat)) != MEM
6325		  || GET_CODE (SET_SRC (pat)) != MEM
6326		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
6327				    XEXP (SET_SRC (pat), 0)))
6328		return cost + 2;
6329
6330	      return cost + 8;
6331	    }
6332	  break;
6333
6334	case TYPE_BRANCH:
6335	  /* Compare to branch latency is 0.  There is no benefit from
6336	     separating compare and branch.  */
6337	  if (dep_type == TYPE_COMPARE)
6338	    return 0;
6339	  /* Floating point compare to branch latency is less than
6340	     compare to conditional move.  */
6341	  if (dep_type == TYPE_FPCMP)
6342	    return cost - 1;
6343	  break;
6344	default:
6345	  break;
6346	}
6347	break;
6348
6349    case REG_DEP_ANTI:
6350      /* Anti-dependencies only penalize the fpu unit. */
6351      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6352        return 0;
6353      break;
6354
6355    default:
6356      break;
6357    }
6358
6359  return cost;
6360}
6361
6362static int
6363ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6364     rtx insn;
6365     rtx link;
6366     rtx dep_insn;
6367     int cost;
6368{
6369  enum attr_type insn_type, dep_type;
6370  rtx pat = PATTERN(insn);
6371  rtx dep_pat = PATTERN (dep_insn);
6372
6373  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6374    return cost;
6375
6376  insn_type = get_attr_type (insn);
6377  dep_type = get_attr_type (dep_insn);
6378
6379  /* Nothing issues in parallel with integer multiplies, so
6380     mark as zero cost since the scheduler can not do anything
6381     about it.  */
6382  if (insn_type == TYPE_IMUL)
6383    return 0;
6384
6385#define SLOW_FP(dep_type) \
6386(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6387
6388  switch (REG_NOTE_KIND (link))
6389    {
6390    case 0:
6391      /* Data dependency; DEP_INSN writes a register that INSN reads some
6392	 cycles later.  */
6393
6394      if (dep_type == TYPE_CMOVE)
6395	{
6396	  /* Instructions that read the result of conditional moves cannot
6397	     be in the same group or the following group.  */
6398	  return cost + 1;
6399	}
6400
6401      switch (insn_type)
6402	{
6403	  /* UltraSPARC can dual issue a store and an instruction setting
6404	     the value stored, except for divide and square root.  */
6405	case TYPE_FPSTORE:
6406	  if (! SLOW_FP (dep_type))
6407	    return 0;
6408	  return cost;
6409
6410	case TYPE_STORE:
6411	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6412	    return cost;
6413
6414	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6415	    /* The dependency between the two instructions is on the data
6416	       that is being stored.  Assume that the address of the store
6417	       is not also dependent.  */
6418	    return 0;
6419	  return cost;
6420
6421	case TYPE_LOAD:
6422	case TYPE_SLOAD:
6423	case TYPE_FPLOAD:
6424	  /* A load does not return data until at least 11 cycles after
6425	     a store to the same location.  3 cycles are accounted for
6426	     in the load latency; add the other 8 here.  */
6427	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6428	    {
6429	      /* If the addresses are not equal this may be a false
6430		 dependency because pointer aliasing could not be
6431		 determined.  Add only 2 cycles in that case.  2 is
6432		 an arbitrary compromise between 8, which would cause
6433		 the scheduler to generate worse code elsewhere to
6434		 compensate for a dependency which might not really
6435		 exist, and 0.  */
6436	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6437		  || GET_CODE (SET_SRC (pat)) != MEM
6438		  || GET_CODE (SET_DEST (dep_pat)) != MEM
6439		  || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
6440				    XEXP (SET_DEST (dep_pat), 0)))
6441		return cost + 2;
6442
6443	      return cost + 8;
6444	    }
6445	  return cost;
6446
6447	case TYPE_BRANCH:
6448	  /* Compare to branch latency is 0.  There is no benefit from
6449	     separating compare and branch.  */
6450	  if (dep_type == TYPE_COMPARE)
6451	    return 0;
6452	  /* Floating point compare to branch latency is less than
6453	     compare to conditional move.  */
6454	  if (dep_type == TYPE_FPCMP)
6455	    return cost - 1;
6456	  return cost;
6457
6458	case TYPE_FPCMOVE:
6459	  /* FMOVR class instructions can not issue in the same cycle
6460	     or the cycle after an instruction which writes any
6461	     integer register.  Model this as cost 2 for dependent
6462	     instructions.  */
6463	  if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
6464	       || dep_type == TYPE_BINARY)
6465	      && cost < 2)
6466	    return 2;
6467	  /* Otherwise check as for integer conditional moves. */
6468
6469	case TYPE_CMOVE:
6470	  /* Conditional moves involving integer registers wait until
6471	     3 cycles after loads return data.  The interlock applies
6472	     to all loads, not just dependent loads, but that is hard
6473	     to model.  */
6474	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
6475	    return cost + 3;
6476	  return cost;
6477
6478	default:
6479	  break;
6480	}
6481      break;
6482
6483    case REG_DEP_ANTI:
6484      /* Divide and square root lock destination registers for full latency. */
6485      if (! SLOW_FP (dep_type))
6486	return 0;
6487      break;
6488
6489    case REG_DEP_OUTPUT:
6490      /* IEU and FPU instruction that have the same destination
6491	 register cannot be grouped together.  */
6492      return cost + 1;
6493
6494    default:
6495      break;
6496    }
6497
6498  /* Other costs not accounted for:
6499     - Single precision floating point loads lock the other half of
6500       the even/odd register pair.
6501     - Several hazards associated with ldd/std are ignored because these
6502       instructions are rarely generated for V9.
6503     - The floating point pipeline can not have both a single and double
6504       precision operation active at the same time.  Format conversions
6505       and graphics instructions are given honorary double precision status.
6506     - call and jmpl are always the first instruction in a group.  */
6507
6508  return cost;
6509
6510#undef SLOW_FP
6511}
6512
6513int
6514sparc_adjust_cost(insn, link, dep, cost)
6515     rtx insn;
6516     rtx link;
6517     rtx dep;
6518     int cost;
6519{
6520  switch (sparc_cpu)
6521    {
6522    case PROCESSOR_SUPERSPARC:
6523      cost = supersparc_adjust_cost (insn, link, dep, cost);
6524      break;
6525    case PROCESSOR_HYPERSPARC:
6526    case PROCESSOR_SPARCLITE86X:
6527      cost = hypersparc_adjust_cost (insn, link, dep, cost);
6528      break;
6529    case PROCESSOR_ULTRASPARC:
6530      cost = ultrasparc_adjust_cost (insn, link, dep, cost);
6531      break;
6532    default:
6533      break;
6534    }
6535  return cost;
6536}
6537
6538/* This describes the state of the UltraSPARC pipeline during
6539   instruction scheduling.  */
6540
6541#define TMASK(__x)	((unsigned)1 << ((int)(__x)))
6542#define UMASK(__x)	((unsigned)1 << ((int)(__x)))
6543
6544enum ultra_code { NONE=0, /* no insn at all				*/
6545		  IEU0,   /* shifts and conditional moves		*/
6546		  IEU1,   /* condition code setting insns, calls+jumps	*/
6547		  IEUN,   /* all other single cycle ieu insns		*/
6548		  LSU,    /* loads and stores				*/
6549		  CTI,    /* branches					*/
6550		  FPM,    /* FPU pipeline 1, multiplies and divides	*/
6551		  FPA,    /* FPU pipeline 2, all other operations	*/
6552		  SINGLE, /* single issue instructions			*/
6553		  NUM_ULTRA_CODES };
6554
6555static const char *ultra_code_names[NUM_ULTRA_CODES] = {
6556  "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6557  "FPM", "FPA", "SINGLE" };
6558
6559struct ultrasparc_pipeline_state {
6560  /* The insns in this group.  */
6561  rtx group[4];
6562
6563  /* The code for each insn.  */
6564  enum ultra_code codes[4];
6565
6566  /* Which insns in this group have been committed by the
6567     scheduler.  This is how we determine how many more
6568     can issue this cycle.  */
6569  char commit[4];
6570
6571  /* How many insns in this group.  */
6572  char group_size;
6573
6574  /* Mask of free slots still in this group.  */
6575  char free_slot_mask;
6576
6577  /* The slotter uses the following to determine what other
6578     insn types can still make their way into this group.  */
6579  char contents [NUM_ULTRA_CODES];
6580  char num_ieu_insns;
6581};
6582
6583#define ULTRA_NUM_HIST	8
6584static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6585static int ultra_cur_hist;
6586static int ultra_cycles_elapsed;
6587
6588#define ultra_pipe	(ultra_pipe_hist[ultra_cur_hist])
6589
6590/* Given TYPE_MASK compute the ultra_code it has.  */
6591static enum ultra_code
6592ultra_code_from_mask (type_mask)
6593     int type_mask;
6594{
6595  if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6596    return IEU0;
6597  else if (type_mask & (TMASK (TYPE_COMPARE) |
6598			TMASK (TYPE_CALL) |
6599			TMASK (TYPE_UNCOND_BRANCH)))
6600    return IEU1;
6601  else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6602			TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6603    return IEUN;
6604  else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6605			TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6606			TMASK (TYPE_FPSTORE)))
6607    return LSU;
6608  else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6609			TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
6610    return FPM;
6611  else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6612			TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6613    return FPA;
6614  else if (type_mask & TMASK (TYPE_BRANCH))
6615    return CTI;
6616
6617  return SINGLE;
6618}
6619
6620/* Check INSN (a conditional move) and make sure that it's
6621   results are available at this cycle.  Return 1 if the
6622   results are in fact ready.  */
6623static int
6624ultra_cmove_results_ready_p (insn)
6625     rtx insn;
6626{
6627  struct ultrasparc_pipeline_state *up;
6628  int entry, slot;
6629
6630  /* If this got dispatched in the previous
6631     group, the results are not ready.  */
6632  entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6633  up = &ultra_pipe_hist[entry];
6634  slot = 4;
6635  while (--slot >= 0)
6636    if (up->group[slot] == insn)
6637      return 0;
6638
6639  return 1;
6640}
6641
6642/* Walk backwards in pipeline history looking for FPU
6643   operations which use a mode different than FPMODE and
6644   will create a stall if an insn using FPMODE were to be
6645   dispatched this cycle.  */
6646static int
6647ultra_fpmode_conflict_exists (fpmode)
6648     enum machine_mode fpmode;
6649{
6650  int hist_ent;
6651  int hist_lim;
6652
6653  hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6654  if (ultra_cycles_elapsed < 4)
6655    hist_lim = ultra_cycles_elapsed;
6656  else
6657    hist_lim = 4;
6658  while (hist_lim > 0)
6659    {
6660      struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6661      int slot = 4;
6662
6663      while (--slot >= 0)
6664	{
6665	  rtx insn = up->group[slot];
6666	  enum machine_mode this_mode;
6667	  rtx pat;
6668
6669	  if (! insn
6670	      || GET_CODE (insn) != INSN
6671	      || (pat = PATTERN (insn)) == 0
6672	      || GET_CODE (pat) != SET)
6673	    continue;
6674
6675	  this_mode = GET_MODE (SET_DEST (pat));
6676	  if ((this_mode != SFmode
6677	       && this_mode != DFmode)
6678	      || this_mode == fpmode)
6679	    continue;
6680
6681	  /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
6682	     we will get a stall.  Loads and stores are independant
6683	     of these rules.  */
6684	  if (GET_CODE (SET_SRC (pat)) != ABS
6685	      && GET_CODE (SET_SRC (pat)) != NEG
6686	      && ((TMASK (get_attr_type (insn)) &
6687		   (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
6688		    TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
6689                    TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
6690	    return 1;
6691	}
6692      hist_lim--;
6693      hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
6694    }
6695
6696  /* No conflicts, safe to dispatch.  */
6697  return 0;
6698}
6699
6700/* Find an instruction in LIST which has one of the
6701   type attributes enumerated in TYPE_MASK.  START
6702   says where to begin the search.
6703
6704   NOTE: This scheme depends upon the fact that we
6705         have less than 32 distinct type attributes.  */
6706
6707static int ultra_types_avail;
6708
6709static rtx *
6710ultra_find_type (type_mask, list, start)
6711     int type_mask;
6712     rtx *list;
6713     int start;
6714{
6715  int i;
6716
6717  /* Short circuit if no such insn exists in the ready
6718     at the moment.  */
6719  if ((type_mask & ultra_types_avail) == 0)
6720    return 0;
6721
6722  for (i = start; i >= 0; i--)
6723    {
6724      rtx insn = list[i];
6725
6726      if (recog_memoized (insn) >= 0
6727	  && (TMASK(get_attr_type (insn)) & type_mask))
6728	{
6729	  enum machine_mode fpmode = SFmode;
6730	  rtx pat = 0;
6731	  int slot;
6732	  int check_depend = 0;
6733	  int check_fpmode_conflict = 0;
6734
6735	  if (GET_CODE (insn) == INSN
6736	      && (pat = PATTERN(insn)) != 0
6737	      && GET_CODE (pat) == SET
6738	      && !(type_mask & (TMASK (TYPE_STORE) |
6739				TMASK (TYPE_FPSTORE))))
6740	    {
6741	      check_depend = 1;
6742	      if (GET_MODE (SET_DEST (pat)) == SFmode
6743		  || GET_MODE (SET_DEST (pat)) == DFmode)
6744		{
6745		  fpmode = GET_MODE (SET_DEST (pat));
6746		  check_fpmode_conflict = 1;
6747		}
6748	    }
6749
6750	  slot = 4;
6751	  while(--slot >= 0)
6752	    {
6753	      rtx slot_insn = ultra_pipe.group[slot];
6754	      rtx slot_pat;
6755
6756	      /* Already issued, bad dependency, or FPU
6757		 mode conflict.  */
6758	      if (slot_insn != 0
6759		  && (slot_pat = PATTERN (slot_insn)) != 0
6760		  && ((insn == slot_insn)
6761		      || (check_depend == 1
6762			  && GET_CODE (slot_insn) == INSN
6763			  && GET_CODE (slot_pat) == SET
6764			  && ((GET_CODE (SET_DEST (slot_pat)) == REG
6765			       && GET_CODE (SET_SRC (pat)) == REG
6766			       && REGNO (SET_DEST (slot_pat)) ==
6767			            REGNO (SET_SRC (pat)))
6768			      || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
6769				  && GET_CODE (SET_SRC (pat)) == SUBREG
6770				  && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
6771				       REGNO (SUBREG_REG (SET_SRC (pat)))
6772				  && SUBREG_WORD (SET_DEST (slot_pat)) ==
6773				       SUBREG_WORD (SET_SRC (pat)))))
6774		      || (check_fpmode_conflict == 1
6775			  && GET_CODE (slot_insn) == INSN
6776			  && GET_CODE (slot_pat) == SET
6777			  && (GET_MODE (SET_DEST (slot_pat)) == SFmode
6778			      || GET_MODE (SET_DEST (slot_pat)) == DFmode)
6779			  && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
6780		goto next;
6781	    }
6782
6783	  /* Check for peculiar result availability and dispatch
6784	     interference situations.  */
6785	  if (pat != 0
6786	      && ultra_cycles_elapsed > 0)
6787	    {
6788	      rtx link;
6789
6790	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6791		{
6792		  rtx link_insn = XEXP (link, 0);
6793		  if (GET_CODE (link_insn) == INSN
6794		      && recog_memoized (link_insn) >= 0
6795		      && (TMASK (get_attr_type (link_insn)) &
6796			  (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
6797		      && ! ultra_cmove_results_ready_p (link_insn))
6798		    goto next;
6799		}
6800
6801	      if (check_fpmode_conflict
6802		  && ultra_fpmode_conflict_exists (fpmode))
6803		goto next;
6804	    }
6805
6806	  return &list[i];
6807	}
6808    next:
6809      ;
6810    }
6811  return 0;
6812}
6813
6814static void
6815ultra_build_types_avail (ready, n_ready)
6816  rtx *ready;
6817  int n_ready;
6818{
6819  int i = n_ready - 1;
6820
6821  ultra_types_avail = 0;
6822  while(i >= 0)
6823    {
6824      rtx insn = ready[i];
6825
6826      if (recog_memoized (insn) >= 0)
6827	ultra_types_avail |= TMASK (get_attr_type (insn));
6828
6829      i -= 1;
6830    }
6831}
6832
6833/* Place insn pointed to my IP into the pipeline.
6834   Make element THIS of READY be that insn if it
6835   is not already.  TYPE indicates the pipeline class
6836   this insn falls into.  */
6837static void
6838ultra_schedule_insn (ip, ready, this, type)
6839     rtx *ip;
6840     rtx *ready;
6841     int this;
6842     enum ultra_code type;
6843{
6844  int pipe_slot;
6845  char mask = ultra_pipe.free_slot_mask;
6846
6847  /* Obtain free slot.  */
6848  for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
6849    if ((mask & (1 << pipe_slot)) != 0)
6850      break;
6851  if (pipe_slot == 4)
6852    abort ();
6853
6854  /* In it goes, and it hasn't been committed yet.  */
6855  ultra_pipe.group[pipe_slot] = *ip;
6856  ultra_pipe.codes[pipe_slot] = type;
6857  ultra_pipe.contents[type] = 1;
6858  if (UMASK (type) &
6859      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6860    ultra_pipe.num_ieu_insns += 1;
6861
6862  ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
6863  ultra_pipe.group_size += 1;
6864  ultra_pipe.commit[pipe_slot] = 0;
6865
6866  /* Update ready list.  */
6867  if (ip != &ready[this])
6868    {
6869      rtx temp = *ip;
6870
6871      *ip = ready[this];
6872      ready[this] = temp;
6873    }
6874}
6875
6876/* Advance to the next pipeline group.  */
6877static void
6878ultra_flush_pipeline ()
6879{
6880  ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
6881  ultra_cycles_elapsed += 1;
6882  bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6883  ultra_pipe.free_slot_mask = 0xf;
6884}
6885
6886static int ultra_reorder_called_this_block;
6887
6888/* Init our data structures for this current block.  */
6889void
6890ultrasparc_sched_init (dump, sched_verbose)
6891     FILE *dump ATTRIBUTE_UNUSED;
6892     int sched_verbose ATTRIBUTE_UNUSED;
6893{
6894  bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
6895  ultra_cur_hist = 0;
6896  ultra_cycles_elapsed = 0;
6897  ultra_reorder_called_this_block = 0;
6898  ultra_pipe.free_slot_mask = 0xf;
6899}
6900
6901/* INSN has been scheduled, update pipeline commit state
6902   and return how many instructions are still to be
6903   scheduled in this group.  */
6904int
6905ultrasparc_variable_issue (insn)
6906     rtx insn;
6907{
6908  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6909  int i, left_to_fire;
6910
6911  left_to_fire = 0;
6912  for (i = 0; i < 4; i++)
6913    {
6914      if (up->group[i] == 0)
6915	continue;
6916
6917      if (up->group[i] == insn)
6918	{
6919	  up->commit[i] = 1;
6920	}
6921      else if (! up->commit[i])
6922	left_to_fire++;
6923    }
6924
6925  return left_to_fire;
6926}
6927
6928/* In actual_hazard_this_instance, we may have yanked some
6929   instructions from the ready list due to conflict cost
6930   adjustments.  If so, and such an insn was in our pipeline
6931   group, remove it and update state.  */
6932static void
6933ultra_rescan_pipeline_state (ready, n_ready)
6934     rtx *ready;
6935     int n_ready;
6936{
6937  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6938  int i;
6939
6940  for (i = 0; i < 4; i++)
6941    {
6942      rtx insn = up->group[i];
6943      int j;
6944
6945      if (! insn)
6946	continue;
6947
6948      /* If it has been committed, then it was removed from
6949	 the ready list because it was actually scheduled,
6950	 and that is not the case we are searching for here.  */
6951      if (up->commit[i] != 0)
6952	continue;
6953
6954      for (j = n_ready - 1; j >= 0; j--)
6955	if (ready[j] == insn)
6956	  break;
6957
6958      /* If we didn't find it, toss it.  */
6959      if (j < 0)
6960	{
6961	  enum ultra_code ucode = up->codes[i];
6962
6963	  up->group[i] = 0;
6964	  up->codes[i] = NONE;
6965	  up->contents[ucode] = 0;
6966	  if (UMASK (ucode) &
6967	      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6968	    up->num_ieu_insns -= 1;
6969
6970	  up->free_slot_mask |= (1 << i);
6971	  up->group_size -= 1;
6972	  up->commit[i] = 0;
6973	}
6974    }
6975}
6976
6977void
6978ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
6979     FILE *dump;
6980     int sched_verbose;
6981     rtx *ready;
6982     int n_ready;
6983{
6984  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6985  int i, this_insn;
6986
6987  /* We get called once unnecessarily per block of insns
6988     scheduled.  */
6989  if (ultra_reorder_called_this_block == 0)
6990    {
6991      ultra_reorder_called_this_block = 1;
6992      return;
6993    }
6994
6995  if (sched_verbose)
6996    {
6997      int n;
6998
6999      fprintf (dump, "\n;;\tUltraSPARC Looking at [");
7000      for (n = n_ready - 1; n >= 0; n--)
7001	{
7002	  rtx insn = ready[n];
7003	  enum ultra_code ucode;
7004
7005	  if (recog_memoized (insn) < 0)
7006	    continue;
7007	  ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
7008	  if (n != 0)
7009	    fprintf (dump, "%s(%d) ",
7010		     ultra_code_names[ucode],
7011		     INSN_UID (insn));
7012	  else
7013	    fprintf (dump, "%s(%d)",
7014		     ultra_code_names[ucode],
7015		     INSN_UID (insn));
7016	}
7017      fprintf (dump, "]\n");
7018    }
7019
7020  this_insn = n_ready - 1;
7021
7022  /* Skip over junk we don't understand.  */
7023  while ((this_insn >= 0)
7024	 && recog_memoized (ready[this_insn]) < 0)
7025    this_insn--;
7026
7027  ultra_build_types_avail (ready, this_insn + 1);
7028
7029  while (this_insn >= 0) {
7030    int old_group_size = up->group_size;
7031
7032    if (up->group_size != 0)
7033      {
7034	int num_committed;
7035
7036	num_committed = (up->commit[0] + up->commit[1] +
7037			 up->commit[2] + up->commit[3]);
7038	/* If nothing has been commited from our group, or all of
7039	   them have.  Clear out the (current cycle's) pipeline
7040	   state and start afresh.  */
7041	if (num_committed == 0
7042	    || num_committed == up->group_size)
7043	  {
7044	    ultra_flush_pipeline ();
7045	    up = &ultra_pipe;
7046	    old_group_size = 0;
7047	  }
7048	else
7049	  {
7050	    /* OK, some ready list insns got requeued and thus removed
7051	       from the ready list.  Account for this fact.  */
7052	    ultra_rescan_pipeline_state (ready, n_ready);
7053
7054	    /* Something "changed", make this look like a newly
7055	       formed group so the code at the end of the loop
7056	       knows that progress was in fact made.  */
7057	    if (up->group_size != old_group_size)
7058	      old_group_size = 0;
7059	  }
7060      }
7061
7062    if (up->group_size == 0)
7063      {
7064	/* If the pipeline is (still) empty and we have any single
7065	   group insns, get them out now as this is a good time.  */
7066	rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
7067				    TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7068				    TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7069				   ready, this_insn);
7070	if (ip)
7071	  {
7072	    ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7073	    break;
7074	  }
7075
7076	/* If we are not in the process of emptying out the pipe, try to
7077	   obtain an instruction which must be the first in it's group.  */
7078	ip = ultra_find_type ((TMASK (TYPE_CALL) |
7079			       TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7080			       TMASK (TYPE_UNCOND_BRANCH)),
7081			      ready, this_insn);
7082	if (ip)
7083	  {
7084	    ultra_schedule_insn (ip, ready, this_insn, IEU1);
7085	    this_insn--;
7086	  }
7087	else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7088					 TMASK (TYPE_FPDIVD) |
7089					 TMASK (TYPE_FPSQRT)),
7090					ready, this_insn)) != 0)
7091	  {
7092	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7093	    this_insn--;
7094	  }
7095      }
7096
7097    /* Try to fill the integer pipeline.  First, look for an IEU0 specific
7098       operation.  We can't do more IEU operations if the first 3 slots are
7099       all full or we have dispatched two IEU insns already.  */
7100    if ((up->free_slot_mask & 0x7) != 0
7101	&& up->num_ieu_insns < 2
7102	&& up->contents[IEU0] == 0
7103	&& up->contents[IEUN] == 0)
7104      {
7105	rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7106	if (ip)
7107	  {
7108	    ultra_schedule_insn (ip, ready, this_insn, IEU0);
7109	    this_insn--;
7110	  }
7111      }
7112
7113    /* If we can, try to find an IEU1 specific or an unnamed
7114       IEU instruction.  */
7115    if ((up->free_slot_mask & 0x7) != 0
7116	&& up->num_ieu_insns < 2)
7117      {
7118	rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7119				    TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
7120				    (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7121				   ready, this_insn);
7122	if (ip)
7123	  {
7124	    rtx insn = *ip;
7125
7126	    ultra_schedule_insn (ip, ready, this_insn,
7127				 (!up->contents[IEU1]
7128				  && get_attr_type (insn) == TYPE_COMPARE)
7129				 ? IEU1 : IEUN);
7130	    this_insn--;
7131	  }
7132      }
7133
7134    /* If only one IEU insn has been found, try to find another unnamed
7135       IEU operation or an IEU1 specific one.  */
7136    if ((up->free_slot_mask & 0x7) != 0
7137	&& up->num_ieu_insns < 2)
7138      {
7139	rtx *ip;
7140	int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7141		     TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
7142
7143	if (!up->contents[IEU1])
7144	  tmask |= TMASK (TYPE_COMPARE);
7145	ip = ultra_find_type (tmask, ready, this_insn);
7146	if (ip)
7147	  {
7148	    rtx insn = *ip;
7149
7150	    ultra_schedule_insn (ip, ready, this_insn,
7151				 (!up->contents[IEU1]
7152				  && get_attr_type (insn) == TYPE_COMPARE)
7153				 ? IEU1 : IEUN);
7154	    this_insn--;
7155	  }
7156      }
7157
7158    /* Try for a load or store, but such an insn can only be issued
7159       if it is within' one of the first 3 slots.  */
7160    if ((up->free_slot_mask & 0x7) != 0
7161        && up->contents[LSU] == 0)
7162      {
7163	rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7164				   TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7165				   TMASK (TYPE_FPSTORE)), ready, this_insn);
7166	if (ip)
7167	  {
7168	    ultra_schedule_insn (ip, ready, this_insn, LSU);
7169	    this_insn--;
7170	  }
7171      }
7172
7173    /* Now find FPU operations, first FPM class.  But not divisions or
7174       square-roots because those will break the group up.  Unlike all
7175       the previous types, these can go in any slot.  */
7176    if (up->free_slot_mask != 0
7177	&& up->contents[FPM] == 0)
7178      {
7179	rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
7180	if (ip)
7181	  {
7182	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7183	    this_insn--;
7184	  }
7185      }
7186
7187    /* Continue on with FPA class if we have not filled the group already.  */
7188    if (up->free_slot_mask != 0
7189	&& up->contents[FPA] == 0)
7190      {
7191	rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7192				    TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
7193				   ready, this_insn);
7194	if (ip)
7195	  {
7196	    ultra_schedule_insn (ip, ready, this_insn, FPA);
7197	    this_insn--;
7198	  }
7199      }
7200
7201    /* Finally, maybe stick a branch in here.  */
7202    if (up->free_slot_mask != 0
7203	&& up->contents[CTI] == 0)
7204      {
7205	rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
7206
7207	/* Try to slip in a branch only if it is one of the
7208	   next 2 in the ready list.  */
7209	if (ip && ((&ready[this_insn] - ip) < 2))
7210	  {
7211	    ultra_schedule_insn (ip, ready, this_insn, CTI);
7212	    this_insn--;
7213	  }
7214      }
7215
7216    up->group_size = 0;
7217    for (i = 0; i < 4; i++)
7218      if ((up->free_slot_mask & (1 << i)) == 0)
7219	up->group_size++;
7220
7221    /* See if we made any progress...  */
7222    if (old_group_size != up->group_size)
7223      break;
7224
7225    /* Clean out the (current cycle's) pipeline state
7226       and try once more.  If we placed no instructions
7227       into the pipeline at all, it means a real hard
7228       conflict exists with some earlier issued instruction
7229       so we must advance to the next cycle to clear it up.  */
7230    if (up->group_size == 0)
7231      {
7232	ultra_flush_pipeline ();
7233	up = &ultra_pipe;
7234      }
7235    else
7236      {
7237	bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7238	ultra_pipe.free_slot_mask = 0xf;
7239      }
7240  }
7241
7242  if (sched_verbose)
7243    {
7244      int n, gsize;
7245
7246      fprintf (dump, ";;\tUltraSPARC Launched   [");
7247      gsize = up->group_size;
7248      for (n = 0; n < 4; n++)
7249	{
7250	  rtx insn = up->group[n];
7251
7252	  if (! insn)
7253	    continue;
7254
7255	  gsize -= 1;
7256	  if (gsize != 0)
7257	    fprintf (dump, "%s(%d) ",
7258		     ultra_code_names[up->codes[n]],
7259		     INSN_UID (insn));
7260	  else
7261	    fprintf (dump, "%s(%d)",
7262		     ultra_code_names[up->codes[n]],
7263		     INSN_UID (insn));
7264	}
7265      fprintf (dump, "]\n");
7266    }
7267}
7268
7269int
7270sparc_issue_rate ()
7271{
7272  switch (sparc_cpu)
7273    {
7274    default:
7275      return 1;
7276    case PROCESSOR_V9:
7277      /* Assume V9 processors are capable of at least dual-issue.  */
7278      return 2;
7279    case PROCESSOR_SUPERSPARC:
7280      return 3;
7281    case PROCESSOR_HYPERSPARC:
7282    case PROCESSOR_SPARCLITE86X:
7283      return 2;
7284    case PROCESSOR_ULTRASPARC:
7285      return 4;
7286    }
7287}
7288
7289static int
7290set_extends(x, insn)
7291     rtx x, insn;
7292{
7293  register rtx pat = PATTERN (insn);
7294
7295  switch (GET_CODE (SET_SRC (pat)))
7296    {
7297      /* Load and some shift instructions zero extend. */
7298    case MEM:
7299    case ZERO_EXTEND:
7300      /* sethi clears the high bits */
7301    case HIGH:
7302      /* LO_SUM is used with sethi.  sethi cleared the high
7303	 bits and the values used with lo_sum are positive */
7304    case LO_SUM:
7305      /* Store flag stores 0 or 1 */
7306    case LT: case LTU:
7307    case GT: case GTU:
7308    case LE: case LEU:
7309    case GE: case GEU:
7310    case EQ:
7311    case NE:
7312      return 1;
7313    case AND:
7314      {
7315	rtx op1 = XEXP (SET_SRC (pat), 1);
7316	if (GET_CODE (op1) == CONST_INT)
7317	  return INTVAL (op1) >= 0;
7318	if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7319	    && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7320	  return 1;
7321	if (GET_CODE (op1) == REG
7322	    && sparc_check_64 ((op1), insn) == 1)
7323	  return 1;
7324      }
7325    case ASHIFT:
7326    case LSHIFTRT:
7327      return GET_MODE (SET_SRC (pat)) == SImode;
7328      /* Positive integers leave the high bits zero. */
7329    case CONST_DOUBLE:
7330      return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7331    case CONST_INT:
7332      return ! (INTVAL (x) & 0x80000000);
7333    case ASHIFTRT:
7334    case SIGN_EXTEND:
7335      return - (GET_MODE (SET_SRC (pat)) == SImode);
7336    default:
7337      return 0;
7338    }
7339}
7340
7341/* We _ought_ to have only one kind per function, but... */
7342static rtx sparc_addr_diff_list;
7343static rtx sparc_addr_list;
7344
7345void
7346sparc_defer_case_vector (lab, vec, diff)
7347     rtx lab, vec;
7348     int diff;
7349{
7350  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7351  if (diff)
7352    sparc_addr_diff_list
7353      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7354  else
7355    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7356}
7357
7358static void
7359sparc_output_addr_vec (vec)
7360     rtx vec;
7361{
7362  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7363  int idx, vlen = XVECLEN (body, 0);
7364
7365#ifdef ASM_OUTPUT_ADDR_VEC_START
7366  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7367#endif
7368
7369#ifdef ASM_OUTPUT_CASE_LABEL
7370  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7371			 NEXT_INSN (lab));
7372#else
7373  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7374#endif
7375
7376  for (idx = 0; idx < vlen; idx++)
7377    {
7378      ASM_OUTPUT_ADDR_VEC_ELT
7379	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7380    }
7381
7382#ifdef ASM_OUTPUT_ADDR_VEC_END
7383  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7384#endif
7385}
7386
7387static void
7388sparc_output_addr_diff_vec (vec)
7389     rtx vec;
7390{
7391  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7392  rtx base = XEXP (XEXP (body, 0), 0);
7393  int idx, vlen = XVECLEN (body, 1);
7394
7395#ifdef ASM_OUTPUT_ADDR_VEC_START
7396  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7397#endif
7398
7399#ifdef ASM_OUTPUT_CASE_LABEL
7400  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7401			 NEXT_INSN (lab));
7402#else
7403  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7404#endif
7405
7406  for (idx = 0; idx < vlen; idx++)
7407    {
7408      ASM_OUTPUT_ADDR_DIFF_ELT
7409        (asm_out_file,
7410         body,
7411         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7412         CODE_LABEL_NUMBER (base));
7413    }
7414
7415#ifdef ASM_OUTPUT_ADDR_VEC_END
7416  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7417#endif
7418}
7419
7420static void
7421sparc_output_deferred_case_vectors ()
7422{
7423  rtx t;
7424  int align;
7425
7426  if (sparc_addr_list == NULL_RTX
7427      && sparc_addr_diff_list == NULL_RTX)
7428    return;
7429
7430  /* Align to cache line in the function's code section.  */
7431  function_section (current_function_decl);
7432
7433  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7434  if (align > 0)
7435    ASM_OUTPUT_ALIGN (asm_out_file, align);
7436
7437  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7438    sparc_output_addr_vec (XEXP (t, 0));
7439  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7440    sparc_output_addr_diff_vec (XEXP (t, 0));
7441
7442  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7443}
7444
7445/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7446   unknown.  Return 1 if the high bits are zero, -1 if the register is
7447   sign extended.  */
7448int
7449sparc_check_64 (x, insn)
7450     rtx x, insn;
7451{
7452  /* If a register is set only once it is safe to ignore insns this
7453     code does not know how to handle.  The loop will either recognize
7454     the single set and return the correct value or fail to recognize
7455     it and return 0.  */
7456  int set_once = 0;
7457
7458  if (GET_CODE (x) == REG
7459      && flag_expensive_optimizations
7460      && REG_N_SETS (REGNO (x)) == 1)
7461    set_once = 1;
7462
7463  if (insn == 0)
7464    {
7465      if (set_once)
7466	insn = get_last_insn_anywhere ();
7467      else
7468	return 0;
7469    }
7470
7471  while ((insn = PREV_INSN (insn)))
7472    {
7473      switch (GET_CODE (insn))
7474	{
7475	case JUMP_INSN:
7476	case NOTE:
7477	  break;
7478	case CODE_LABEL:
7479	case CALL_INSN:
7480	default:
7481	  if (! set_once)
7482	    return 0;
7483	  break;
7484	case INSN:
7485	  {
7486	    rtx pat = PATTERN (insn);
7487	    if (GET_CODE (pat) != SET)
7488	      return 0;
7489	    if (rtx_equal_p (x, SET_DEST (pat)))
7490	      return set_extends (x, insn);
7491	    if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7492	      return 0;
7493	  }
7494	}
7495    }
7496  return 0;
7497}
7498
7499char *
7500sparc_v8plus_shift (operands, insn, opcode)
7501     rtx *operands;
7502     rtx insn;
7503     char *opcode;
7504{
7505  static char asm_code[60];
7506
7507  if (GET_CODE (operands[3]) == SCRATCH)
7508    operands[3] = operands[0];
7509  if (GET_CODE (operands[1]) == CONST_INT)
7510    {
7511      output_asm_insn ("mov %1,%3", operands);
7512    }
7513  else
7514    {
7515      output_asm_insn ("sllx %H1,32,%3", operands);
7516      if (sparc_check_64 (operands[1], insn) <= 0)
7517	output_asm_insn ("srl %L1,0,%L1", operands);
7518      output_asm_insn ("or %L1,%3,%3", operands);
7519    }
7520
7521  strcpy(asm_code, opcode);
7522  if (which_alternative != 2)
7523    return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7524  else
7525    return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7526}
7527
7528
7529/* Return 1 if DEST and SRC reference only global and in registers. */
7530
7531int
7532sparc_return_peephole_ok (dest, src)
7533     rtx dest, src;
7534{
7535  if (! TARGET_V9)
7536    return 0;
7537  if (current_function_uses_only_leaf_regs)
7538    return 0;
7539  if (GET_CODE (src) != CONST_INT
7540      && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7541    return 0;
7542  return IN_OR_GLOBAL_P (dest);
7543}
7544
7545/* Output assembler code to FILE to increment profiler label # LABELNO
7546   for profiling a function entry.
7547
7548   32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
7549   during profiling so we need to save/restore it around the call to mcount.
7550   We're guaranteed that a save has just been done, and we use the space
7551   allocated for intreg/fpreg value passing.  */
7552
7553void
7554sparc_function_profiler (file, labelno)
7555     FILE *file;
7556     int labelno;
7557{
7558  char buf[32];
7559  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7560
7561  if (! TARGET_ARCH64)
7562    fputs ("\tst\t%g2,[%fp-4]\n", file);
7563
7564  fputs ("\tsethi\t%hi(", file);
7565  assemble_name (file, buf);
7566  fputs ("),%o0\n", file);
7567
7568  fputs ("\tcall\t", file);
7569  assemble_name (file, MCOUNT_FUNCTION);
7570  putc ('\n', file);
7571
7572  fputs ("\t or\t%o0,%lo(", file);
7573  assemble_name (file, buf);
7574  fputs ("),%o0\n", file);
7575
7576  if (! TARGET_ARCH64)
7577    fputs ("\tld\t[%fp-4],%g2\n", file);
7578}
7579
7580
7581/* The following macro shall output assembler code to FILE
7582   to initialize basic-block profiling.
7583
7584   If profile_block_flag == 2
7585
7586	Output code to call the subroutine `__bb_init_trace_func'
7587	and pass two parameters to it. The first parameter is
7588	the address of a block allocated in the object module.
7589	The second parameter is the number of the first basic block
7590	of the function.
7591
7592	The name of the block is a local symbol made with this statement:
7593
7594	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7595
7596	Of course, since you are writing the definition of
7597	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7598	can take a short cut in the definition of this macro and use the
7599	name that you know will result.
7600
7601	The number of the first basic block of the function is
7602	passed to the macro in BLOCK_OR_LABEL.
7603
7604	If described in a virtual assembler language the code to be
7605	output looks like:
7606
7607		parameter1 <- LPBX0
7608		parameter2 <- BLOCK_OR_LABEL
7609		call __bb_init_trace_func
7610
7611    else if profile_block_flag != 0
7612
7613	Output code to call the subroutine `__bb_init_func'
7614	and pass one single parameter to it, which is the same
7615	as the first parameter to `__bb_init_trace_func'.
7616
7617	The first word of this parameter is a flag which will be nonzero if
7618	the object module has already been initialized.  So test this word
7619	first, and do not call `__bb_init_func' if the flag is nonzero.
7620	Note: When profile_block_flag == 2 the test need not be done
7621	but `__bb_init_trace_func' *must* be called.
7622
7623	BLOCK_OR_LABEL may be used to generate a label number as a
7624	branch destination in case `__bb_init_func' will not be called.
7625
7626	If described in a virtual assembler language the code to be
7627	output looks like:
7628
7629		cmp (LPBX0),0
7630		jne local_label
7631		parameter1 <- LPBX0
7632		call __bb_init_func
7633	    local_label:
7634
7635*/
7636
7637void
7638sparc_function_block_profiler(file, block_or_label)
7639     FILE *file;
7640     int block_or_label;
7641{
7642  char LPBX[32];
7643  ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7644
7645  if (profile_block_flag == 2)
7646    {
7647      fputs ("\tsethi\t%hi(", file);
7648      assemble_name (file, LPBX);
7649      fputs ("),%o0\n", file);
7650
7651      fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
7652
7653      fputs ("\tor\t%o0,%lo(", file);
7654      assemble_name (file, LPBX);
7655      fputs ("),%o0\n", file);
7656
7657      fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
7658
7659      fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
7660    }
7661  else if (profile_block_flag != 0)
7662    {
7663      char LPBY[32];
7664      ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
7665
7666      fputs ("\tsethi\t%hi(", file);
7667      assemble_name (file, LPBX);
7668      fputs ("),%o0\n", file);
7669
7670      fputs ("\tld\t[%lo(", file);
7671      assemble_name (file, LPBX);
7672      fputs (")+%o0],%o1\n", file);
7673
7674      fputs ("\ttst\t%o1\n", file);
7675
7676      if (TARGET_V9)
7677	{
7678	  fputs ("\tbne,pn\t%icc,", file);
7679	  assemble_name (file, LPBY);
7680	  putc ('\n', file);
7681	}
7682      else
7683	{
7684	  fputs ("\tbne\t", file);
7685	  assemble_name (file, LPBY);
7686	  putc ('\n', file);
7687	}
7688
7689      fputs ("\t or\t%o0,%lo(", file);
7690      assemble_name (file, LPBX);
7691      fputs ("),%o0\n", file);
7692
7693      fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
7694
7695      ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
7696    }
7697}
7698
7699/* The following macro shall output assembler code to FILE
7700   to increment a counter associated with basic block number BLOCKNO.
7701
7702   If profile_block_flag == 2
7703
7704	Output code to initialize the global structure `__bb' and
7705	call the function `__bb_trace_func' which will increment the
7706	counter.
7707
7708	`__bb' consists of two words. In the first word the number
7709	of the basic block has to be stored. In the second word
7710	the address of a block allocated in the object module
7711	has to be stored.
7712
7713	The basic block number is given by BLOCKNO.
7714
7715	The address of the block is given by the label created with
7716
7717	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7718
7719	by FUNCTION_BLOCK_PROFILER.
7720
7721	Of course, since you are writing the definition of
7722	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7723	can take a short cut in the definition of this macro and use the
7724	name that you know will result.
7725
7726	If described in a virtual assembler language the code to be
7727	output looks like:
7728
7729		move BLOCKNO -> (__bb)
7730		move LPBX0 -> (__bb+4)
7731		call __bb_trace_func
7732
7733	Note that function `__bb_trace_func' must not change the
7734	machine state, especially the flag register. To grant
7735	this, you must output code to save and restore registers
7736	either in this macro or in the macros MACHINE_STATE_SAVE
7737	and MACHINE_STATE_RESTORE. The last two macros will be
7738	used in the function `__bb_trace_func', so you must make
7739	sure that the function prologue does not change any
7740	register prior to saving it with MACHINE_STATE_SAVE.
7741
7742   else if profile_block_flag != 0
7743
7744	Output code to increment the counter directly.
7745	Basic blocks are numbered separately from zero within each
7746	compiled object module. The count associated with block number
7747	BLOCKNO is at index BLOCKNO in an array of words; the name of
7748	this array is a local symbol made with this statement:
7749
7750	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
7751
7752	Of course, since you are writing the definition of
7753	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7754	can take a short cut in the definition of this macro and use the
7755	name that you know will result.
7756
7757	If described in a virtual assembler language, the code to be
7758	output looks like:
7759
7760		inc (LPBX2+4*BLOCKNO)
7761
7762*/
7763
7764void
7765sparc_block_profiler(file, blockno)
7766     FILE *file;
7767     int blockno;
7768{
7769  char LPBX[32];
7770
7771  if (profile_block_flag == 2)
7772    {
7773      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7774
7775      fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
7776      fprintf (file, "\tsethi\t%%hi(%d),%%g2\n", blockno);
7777      fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
7778      fprintf (file, "\tor\t%%g2,%%lo(%d),%%g2\n", blockno);
7779
7780      fputs ("\tst\t%g2,[%g1]\n", file);
7781
7782      fputs ("\tsethi\t%hi(", file);
7783      assemble_name (file, LPBX);
7784      fputs ("),%g2\n", file);
7785
7786      fputs ("\tor\t%g2,%lo(", file);
7787      assemble_name (file, LPBX);
7788      fputs ("),%g2\n", file);
7789
7790      fputs ("\tst\t%g2,[%g1+4]\n", file);
7791      fputs ("\tmov\t%o7,%g2\n", file);
7792
7793      fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
7794
7795      fputs ("\tmov\t%g2,%o7\n", file);
7796    }
7797  else if (profile_block_flag != 0)
7798    {
7799      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
7800
7801      fputs ("\tsethi\t%hi(", file);
7802      assemble_name (file, LPBX);
7803      fprintf (file, "+%d),%%g1\n", blockno*4);
7804
7805      fputs ("\tld\t[%g1+%lo(", file);
7806      assemble_name (file, LPBX);
7807      fprintf (file, "+%d)],%%g2\n", blockno*4);
7808
7809      fputs ("\tadd\t%g2,1,%g2\n", file);
7810
7811      fputs ("\tst\t%g2,[%g1+%lo(", file);
7812      assemble_name (file, LPBX);
7813      fprintf (file, "+%d)]\n", blockno*4);
7814    }
7815}
7816
7817/* The following macro shall output assembler code to FILE
7818   to indicate a return from function during basic-block profiling.
7819
7820   If profile_block_flag == 2:
7821
7822	Output assembler code to call function `__bb_trace_ret'.
7823
7824	Note that function `__bb_trace_ret' must not change the
7825	machine state, especially the flag register. To grant
7826	this, you must output code to save and restore registers
7827	either in this macro or in the macros MACHINE_STATE_SAVE_RET
7828	and MACHINE_STATE_RESTORE_RET. The last two macros will be
7829	used in the function `__bb_trace_ret', so you must make
7830	sure that the function prologue does not change any
7831	register prior to saving it with MACHINE_STATE_SAVE_RET.
7832
7833   else if profile_block_flag != 0:
7834
7835	The macro will not be used, so it need not distinguish
7836	these cases.
7837*/
7838
7839void
7840sparc_function_block_profiler_exit(file)
7841     FILE *file;
7842{
7843  if (profile_block_flag == 2)
7844    fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
7845  else
7846    abort ();
7847}
7848