sparc.c revision 52284
1/* Subroutines for insn-output.c for Sun SPARC.
2   Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
3   Contributed by Michael Tiemann (tiemann@cygnus.com)
4   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5   at Cygnus Support.
6
7This file is part of GNU CC.
8
9GNU CC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GNU CC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GNU CC; see the file COPYING.  If not, write to
21the Free Software Foundation, 59 Temple Place - Suite 330,
22Boston, MA 02111-1307, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "tree.h"
27#include "rtl.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "expr.h"
38#include "recog.h"
39#include "toplev.h"
40
41/* 1 if the caller has placed an "unimp" insn immediately after the call.
42   This is used in v8 code when calling a function that returns a structure.
43   v9 doesn't have this.  Be careful to have this test be the same as that
44   used on the call.  */
45
46#define SKIP_CALLERS_UNIMP_P  \
47(!TARGET_ARCH64 && current_function_returns_struct			\
48 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
49 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
50     == INTEGER_CST))
51
52/* Global variables for machine-dependent things.  */
53
54/* Size of frame.  Need to know this to emit return insns from leaf procedures.
55   ACTUAL_FSIZE is set by compute_frame_size() which is called during the
56   reload pass.  This is important as the value is later used in insn
57   scheduling (to see what can go in a delay slot).
58   APPARENT_FSIZE is the size of the stack less the register save area and less
59   the outgoing argument area.  It is used when saving call preserved regs.  */
60static int apparent_fsize;
61static int actual_fsize;
62
63/* Save the operands last given to a compare for use when we
64   generate a scc or bcc insn.  */
65
66rtx sparc_compare_op0, sparc_compare_op1;
67
68/* We may need an epilogue if we spill too many registers.
69   If this is non-zero, then we branch here for the epilogue.  */
70static rtx leaf_label;
71
72#ifdef LEAF_REGISTERS
73
74/* Vector to say how input registers are mapped to output
75   registers.  FRAME_POINTER_REGNUM cannot be remapped by
76   this function to eliminate it.  You must use -fomit-frame-pointer
77   to get that.  */
78char leaf_reg_remap[] =
79{ 0, 1, 2, 3, 4, 5, 6, 7,
80  -1, -1, -1, -1, -1, -1, 14, -1,
81  -1, -1, -1, -1, -1, -1, -1, -1,
82  8, 9, 10, 11, 12, 13, -1, 15,
83
84  32, 33, 34, 35, 36, 37, 38, 39,
85  40, 41, 42, 43, 44, 45, 46, 47,
86  48, 49, 50, 51, 52, 53, 54, 55,
87  56, 57, 58, 59, 60, 61, 62, 63,
88  64, 65, 66, 67, 68, 69, 70, 71,
89  72, 73, 74, 75, 76, 77, 78, 79,
90  80, 81, 82, 83, 84, 85, 86, 87,
91  88, 89, 90, 91, 92, 93, 94, 95,
92  96, 97, 98, 99, 100};
93
94#endif
95
96/* Name of where we pretend to think the frame pointer points.
97   Normally, this is "%fp", but if we are in a leaf procedure,
98   this is "%sp+something".  We record "something" separately as it may be
99   too big for reg+constant addressing.  */
100
101static const char *frame_base_name;
102static int frame_base_offset;
103
104static rtx pic_setup_code	PROTO((void));
105static void sparc_init_modes	PROTO((void));
106static int save_regs		PROTO((FILE *, int, int, const char *,
107				       int, int, int));
108static int restore_regs		PROTO((FILE *, int, int, const char *, int, int));
109static void build_big_number	PROTO((FILE *, int, const char *));
110static int function_arg_slotno	PROTO((const CUMULATIVE_ARGS *,
111				       enum machine_mode, tree, int, int,
112				       int *, int *));
113
114static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
115static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
116static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
117
118static void sparc_output_addr_vec PROTO((rtx));
119static void sparc_output_addr_diff_vec PROTO((rtx));
120static void sparc_output_deferred_case_vectors PROTO((void));
121
122
123#ifdef DWARF2_DEBUGGING_INFO
124extern char *dwarf2out_cfi_label ();
125#endif
126
127/* Option handling.  */
128
129/* Code model option as passed by user.  */
130const char *sparc_cmodel_string;
131/* Parsed value.  */
132enum cmodel sparc_cmodel;
133
134/* Record alignment options as passed by user.  */
135const char *sparc_align_loops_string;
136const char *sparc_align_jumps_string;
137const char *sparc_align_funcs_string;
138
139/* Parsed values, as a power of two.  */
140int sparc_align_loops;
141int sparc_align_jumps;
142int sparc_align_funcs;
143
144struct sparc_cpu_select sparc_select[] =
145{
146  /* switch	name,		tune	arch */
147  { (char *)0,	"default",	1,	1 },
148  { (char *)0,	"-mcpu=",	1,	1 },
149  { (char *)0,	"-mtune=",	1,	0 },
150  { 0, 0, 0, 0 }
151};
152
153/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
154enum processor_type sparc_cpu;
155
156/* Validate and override various options, and do some machine dependent
157   initialization.  */
158
159void
160sparc_override_options ()
161{
162  static struct code_model {
163    const char *name;
164    int value;
165  } cmodels[] = {
166    { "32", CM_32 },
167    { "medlow", CM_MEDLOW },
168    { "medmid", CM_MEDMID },
169    { "medany", CM_MEDANY },
170    { "embmedany", CM_EMBMEDANY },
171    { 0, 0 }
172  };
173  struct code_model *cmodel;
174  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
175  static struct cpu_default {
176    int cpu;
177    const char *name;
178  } cpu_default[] = {
179    /* There must be one entry here for each TARGET_CPU value.  */
180    { TARGET_CPU_sparc, "cypress" },
181    { TARGET_CPU_sparclet, "tsc701" },
182    { TARGET_CPU_sparclite, "f930" },
183    { TARGET_CPU_v8, "v8" },
184    { TARGET_CPU_hypersparc, "hypersparc" },
185    { TARGET_CPU_sparclite86x, "sparclite86x" },
186    { TARGET_CPU_supersparc, "supersparc" },
187    { TARGET_CPU_v9, "v9" },
188    { TARGET_CPU_ultrasparc, "ultrasparc" },
189    { 0, 0 }
190  };
191  struct cpu_default *def;
192  /* Table of values for -m{cpu,tune}=.  */
193  static struct cpu_table {
194    const char *name;
195    enum processor_type processor;
196    int disable;
197    int enable;
198  } cpu_table[] = {
199    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
200    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
201    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
202    /* TI TMS390Z55 supersparc */
203    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
204    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
205    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
206       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
207    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
208    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
209    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
210    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, MASK_V8 },
211    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
212    /* TEMIC sparclet */
213    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
214    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
215    /* TI ultrasparc */
216    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
217    { 0, 0, 0, 0 }
218  };
219  struct cpu_table *cpu;
220  struct sparc_cpu_select *sel;
221  int fpu;
222
223#ifndef SPARC_BI_ARCH
224  /* Check for unsupported architecture size.  */
225  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
226    {
227      error ("%s is not supported by this configuration",
228	     DEFAULT_ARCH32_P ? "-m64" : "-m32");
229    }
230#endif
231
232  /* At the moment we don't allow different pointer size and architecture */
233  if (! TARGET_64BIT != ! TARGET_PTR64)
234    {
235      error ("-mptr%d not allowed on -m%d",
236      	     TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
237      if (TARGET_64BIT)
238    	target_flags |= MASK_PTR64;
239      else
240        target_flags &= ~MASK_PTR64;
241    }
242
243  /* Code model selection.  */
244  sparc_cmodel = SPARC_DEFAULT_CMODEL;
245
246#ifdef SPARC_BI_ARCH
247  if (TARGET_ARCH32)
248    sparc_cmodel = CM_32;
249#endif
250
251  if (sparc_cmodel_string != NULL)
252    {
253      if (TARGET_ARCH64)
254	{
255	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
256	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
257	      break;
258	  if (cmodel->name == NULL)
259	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
260	  else
261	    sparc_cmodel = cmodel->value;
262	}
263      else
264	error ("-mcmodel= is not supported on 32 bit systems");
265    }
266
267  fpu = TARGET_FPU; /* save current -mfpu status */
268
269  /* Set the default CPU.  */
270  for (def = &cpu_default[0]; def->name; ++def)
271    if (def->cpu == TARGET_CPU_DEFAULT)
272      break;
273  if (! def->name)
274    abort ();
275  sparc_select[0].string = def->name;
276
277  for (sel = &sparc_select[0]; sel->name; ++sel)
278    {
279      if (sel->string)
280	{
281	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
282	    if (! strcmp (sel->string, cpu->name))
283	      {
284		if (sel->set_tune_p)
285		  sparc_cpu = cpu->processor;
286
287		if (sel->set_arch_p)
288		  {
289		    target_flags &= ~cpu->disable;
290		    target_flags |= cpu->enable;
291		  }
292		break;
293	      }
294
295	  if (! cpu->name)
296	    error ("bad value (%s) for %s switch", sel->string, sel->name);
297	}
298    }
299
300  /* If -mfpu or -mno-fpu was explicitly used, don't override with
301     the processor default.  */
302  if (TARGET_FPU_SET)
303    target_flags = (target_flags & ~MASK_FPU) | fpu;
304
305  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
306  if (TARGET_V9 && TARGET_ARCH32)
307    target_flags |= MASK_DEPRECATED_V8_INSNS;
308
309  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
310  if (! TARGET_V9 || TARGET_ARCH64)
311    target_flags &= ~MASK_V8PLUS;
312
313  /* Don't use stack biasing in 32 bit mode.  */
314  if (TARGET_ARCH32)
315    target_flags &= ~MASK_STACK_BIAS;
316
317  /* Don't allow -mvis if FPU is disabled.  */
318  if (! TARGET_FPU)
319    target_flags &= ~MASK_VIS;
320
321  /* Validate -malign-loops= value, or provide default.  */
322  if (sparc_align_loops_string)
323    {
324      sparc_align_loops = exact_log2 (atoi (sparc_align_loops_string));
325      if (sparc_align_loops < 2 || sparc_align_loops > 7)
326	fatal ("-malign-loops=%s is not between 4 and 128 or is not a power of two",
327	       sparc_align_loops_string);
328    }
329  else
330    {
331      /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
332	 its 0.  This sounds a bit kludgey.  */
333      sparc_align_loops = 0;
334    }
335
336  /* Validate -malign-jumps= value, or provide default.  */
337  if (sparc_align_jumps_string)
338    {
339      sparc_align_jumps = exact_log2 (atoi (sparc_align_jumps_string));
340      if (sparc_align_jumps < 2 || sparc_align_loops > 7)
341	fatal ("-malign-jumps=%s is not between 4 and 128 or is not a power of two",
342	       sparc_align_jumps_string);
343    }
344  else
345    {
346      /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
347	 its 0.  This sounds a bit kludgey.  */
348      sparc_align_jumps = 0;
349    }
350
351  /* Validate -malign-functions= value, or provide default. */
352  if (sparc_align_funcs_string)
353    {
354      sparc_align_funcs = exact_log2 (atoi (sparc_align_funcs_string));
355      if (sparc_align_funcs < 2 || sparc_align_loops > 7)
356	fatal ("-malign-functions=%s is not between 4 and 128 or is not a power of two",
357	       sparc_align_funcs_string);
358    }
359  else
360    sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
361
362  /* Validate PCC_STRUCT_RETURN.  */
363  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
364    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
365
366  /* Do various machine dependent initializations.  */
367  sparc_init_modes ();
368
369  if ((profile_flag || profile_block_flag)
370      && sparc_cmodel != CM_MEDLOW)
371    {
372      error ("profiling does not support code models other than medlow");
373    }
374}
375
376/* Miscellaneous utilities.  */
377
378/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
379   or branch on register contents instructions.  */
380
381int
382v9_regcmp_p (code)
383     enum rtx_code code;
384{
385  return (code == EQ || code == NE || code == GE || code == LT
386	  || code == LE || code == GT);
387}
388
389
390/* Operand constraints.  */
391
392/* Return non-zero only if OP is a register of mode MODE,
393   or const0_rtx.  Don't allow const0_rtx if TARGET_LIVE_G0 because
394   %g0 may contain anything.  */
395
396int
397reg_or_0_operand (op, mode)
398     rtx op;
399     enum machine_mode mode;
400{
401  if (register_operand (op, mode))
402    return 1;
403  if (TARGET_LIVE_G0)
404    return 0;
405  if (op == const0_rtx)
406    return 1;
407  if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
408      && CONST_DOUBLE_HIGH (op) == 0
409      && CONST_DOUBLE_LOW (op) == 0)
410    return 1;
411  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
412      && GET_CODE (op) == CONST_DOUBLE
413      && fp_zero_operand (op))
414    return 1;
415  return 0;
416}
417
418/* Nonzero if OP is a floating point value with value 0.0.  */
419
420int
421fp_zero_operand (op)
422     rtx op;
423{
424  REAL_VALUE_TYPE r;
425
426  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
427  return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
428}
429
430/* Nonzero if OP is an integer register.  */
431
432int
433intreg_operand (op, mode)
434     rtx op;
435     enum machine_mode mode ATTRIBUTE_UNUSED;
436{
437  return (register_operand (op, SImode)
438	  || (TARGET_ARCH64 && register_operand (op, DImode)));
439}
440
441/* Nonzero if OP is a floating point condition code register.  */
442
443int
444fcc_reg_operand (op, mode)
445     rtx op;
446     enum machine_mode mode;
447{
448  /* This can happen when recog is called from combine.  Op may be a MEM.
449     Fail instead of calling abort in this case.  */
450  if (GET_CODE (op) != REG)
451    return 0;
452
453  if (mode != VOIDmode && mode != GET_MODE (op))
454    return 0;
455  if (mode == VOIDmode
456      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
457    return 0;
458
459#if 0	/* ??? ==> 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
460  if (reg_renumber == 0)
461    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
462  return REGNO_OK_FOR_CCFP_P (REGNO (op));
463#else
464  return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
465#endif
466}
467
468/* Nonzero if OP is an integer or floating point condition code register.  */
469
470int
471icc_or_fcc_reg_operand (op, mode)
472     rtx op;
473     enum machine_mode mode;
474{
475  if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
476    {
477      if (mode != VOIDmode && mode != GET_MODE (op))
478	return 0;
479      if (mode == VOIDmode
480	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
481	return 0;
482      return 1;
483    }
484
485  return fcc_reg_operand (op, mode);
486}
487
488/* Nonzero if OP can appear as the dest of a RESTORE insn.  */
489int
490restore_operand (op, mode)
491     rtx op;
492     enum machine_mode mode;
493{
494  return (GET_CODE (op) == REG && GET_MODE (op) == mode
495	  && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
496}
497
498/* Call insn on SPARC can take a PC-relative constant address, or any regular
499   memory address.  */
500
501int
502call_operand (op, mode)
503     rtx op;
504     enum machine_mode mode;
505{
506  if (GET_CODE (op) != MEM)
507    abort ();
508  op = XEXP (op, 0);
509  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
510}
511
512int
513call_operand_address (op, mode)
514     rtx op;
515     enum machine_mode mode;
516{
517  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
518}
519
520/* Returns 1 if OP is either a symbol reference or a sum of a symbol
521   reference and a constant.  */
522
523int
524symbolic_operand (op, mode)
525     register rtx op;
526     enum machine_mode mode;
527{
528  switch (GET_CODE (op))
529    {
530    case SYMBOL_REF:
531    case LABEL_REF:
532      return 1;
533
534    case CONST:
535      op = XEXP (op, 0);
536      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
537	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
538	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
539
540      /* ??? This clause seems to be irrelevant.  */
541    case CONST_DOUBLE:
542      return GET_MODE (op) == mode;
543
544    default:
545      return 0;
546    }
547}
548
549/* Return truth value of statement that OP is a symbolic memory
550   operand of mode MODE.  */
551
552int
553symbolic_memory_operand (op, mode)
554     rtx op;
555     enum machine_mode mode ATTRIBUTE_UNUSED;
556{
557  if (GET_CODE (op) == SUBREG)
558    op = SUBREG_REG (op);
559  if (GET_CODE (op) != MEM)
560    return 0;
561  op = XEXP (op, 0);
562  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
563	  || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
564}
565
566/* Return truth value of statement that OP is a LABEL_REF of mode MODE.  */
567
568int
569label_ref_operand (op, mode)
570     rtx op;
571     enum machine_mode mode;
572{
573  if (GET_CODE (op) != LABEL_REF)
574    return 0;
575  if (GET_MODE (op) != mode)
576    return 0;
577  return 1;
578}
579
580/* Return 1 if the operand is an argument used in generating pic references
581   in either the medium/low or medium/anywhere code models of sparc64.  */
582
583int
584sp64_medium_pic_operand (op, mode)
585     rtx op;
586     enum machine_mode mode ATTRIBUTE_UNUSED;
587{
588  /* Check for (const (minus (symbol_ref:GOT)
589                             (const (minus (label) (pc))))).  */
590  if (GET_CODE (op) != CONST)
591    return 0;
592  op = XEXP (op, 0);
593  if (GET_CODE (op) != MINUS)
594    return 0;
595  if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
596    return 0;
597  /* ??? Ensure symbol is GOT.  */
598  if (GET_CODE (XEXP (op, 1)) != CONST)
599    return 0;
600  if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
601    return 0;
602  return 1;
603}
604
605/* Return 1 if the operand is a data segment reference.  This includes
606   the readonly data segment, or in other words anything but the text segment.
607   This is needed in the medium/anywhere code model on v9.  These values
608   are accessed with EMBMEDANY_BASE_REG.  */
609
610int
611data_segment_operand (op, mode)
612     rtx op;
613     enum machine_mode mode ATTRIBUTE_UNUSED;
614{
615  switch (GET_CODE (op))
616    {
617    case SYMBOL_REF :
618      return ! SYMBOL_REF_FLAG (op);
619    case PLUS :
620      /* Assume canonical format of symbol + constant.
621	 Fall through.  */
622    case CONST :
623      return data_segment_operand (XEXP (op, 0));
624    default :
625      return 0;
626    }
627}
628
629/* Return 1 if the operand is a text segment reference.
630   This is needed in the medium/anywhere code model on v9.  */
631
632int
633text_segment_operand (op, mode)
634     rtx op;
635     enum machine_mode mode ATTRIBUTE_UNUSED;
636{
637  switch (GET_CODE (op))
638    {
639    case LABEL_REF :
640      return 1;
641    case SYMBOL_REF :
642      return SYMBOL_REF_FLAG (op);
643    case PLUS :
644      /* Assume canonical format of symbol + constant.
645	 Fall through.  */
646    case CONST :
647      return text_segment_operand (XEXP (op, 0));
648    default :
649      return 0;
650    }
651}
652
653/* Return 1 if the operand is either a register or a memory operand that is
654   not symbolic.  */
655
656int
657reg_or_nonsymb_mem_operand (op, mode)
658    register rtx op;
659    enum machine_mode mode;
660{
661  if (register_operand (op, mode))
662    return 1;
663
664  if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
665    return 1;
666
667  return 0;
668}
669
670int
671splittable_symbolic_memory_operand (op, mode)
672     rtx op;
673     enum machine_mode mode ATTRIBUTE_UNUSED;
674{
675  if (GET_CODE (op) != MEM)
676    return 0;
677  if (! symbolic_operand (XEXP (op, 0), Pmode))
678    return 0;
679  return 1;
680}
681
682int
683splittable_immediate_memory_operand (op, mode)
684     rtx op;
685     enum machine_mode mode ATTRIBUTE_UNUSED;
686{
687  if (GET_CODE (op) != MEM)
688    return 0;
689  if (! immediate_operand (XEXP (op, 0), Pmode))
690    return 0;
691  return 1;
692}
693
694/* Return truth value of whether OP is EQ or NE.  */
695
696int
697eq_or_neq (op, mode)
698     rtx op;
699     enum machine_mode mode ATTRIBUTE_UNUSED;
700{
701  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
702}
703
704/* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
705   or LTU for non-floating-point.  We handle those specially.  */
706
707int
708normal_comp_operator (op, mode)
709     rtx op;
710     enum machine_mode mode ATTRIBUTE_UNUSED;
711{
712  enum rtx_code code = GET_CODE (op);
713
714  if (GET_RTX_CLASS (code) != '<')
715    return 0;
716
717  if (GET_MODE (XEXP (op, 0)) == CCFPmode
718      || GET_MODE (XEXP (op, 0)) == CCFPEmode)
719    return 1;
720
721  return (code != NE && code != EQ && code != GEU && code != LTU);
722}
723
724/* Return 1 if this is a comparison operator.  This allows the use of
725   MATCH_OPERATOR to recognize all the branch insns.  */
726
727int
728noov_compare_op (op, mode)
729    register rtx op;
730    enum machine_mode mode ATTRIBUTE_UNUSED;
731{
732  enum rtx_code code = GET_CODE (op);
733
734  if (GET_RTX_CLASS (code) != '<')
735    return 0;
736
737  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
738    /* These are the only branches which work with CC_NOOVmode.  */
739    return (code == EQ || code == NE || code == GE || code == LT);
740  return 1;
741}
742
743/* Nonzero if OP is a comparison operator suitable for use in v9
744   conditional move or branch on register contents instructions.  */
745
746int
747v9_regcmp_op (op, mode)
748     register rtx op;
749     enum machine_mode mode ATTRIBUTE_UNUSED;
750{
751  enum rtx_code code = GET_CODE (op);
752
753  if (GET_RTX_CLASS (code) != '<')
754    return 0;
755
756  return v9_regcmp_p (code);
757}
758
759/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
760
761int
762extend_op (op, mode)
763     rtx op;
764     enum machine_mode mode ATTRIBUTE_UNUSED;
765{
766  return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
767}
768
769/* Return nonzero if OP is an operator of mode MODE which can set
770   the condition codes explicitly.  We do not include PLUS and MINUS
771   because these require CC_NOOVmode, which we handle explicitly.  */
772
773int
774cc_arithop (op, mode)
775     rtx op;
776     enum machine_mode mode ATTRIBUTE_UNUSED;
777{
778  if (GET_CODE (op) == AND
779      || GET_CODE (op) == IOR
780      || GET_CODE (op) == XOR)
781    return 1;
782
783  return 0;
784}
785
786/* Return nonzero if OP is an operator of mode MODE which can bitwise
787   complement its second operand and set the condition codes explicitly.  */
788
789int
790cc_arithopn (op, mode)
791     rtx op;
792     enum machine_mode mode ATTRIBUTE_UNUSED;
793{
794  /* XOR is not here because combine canonicalizes (xor (not ...) ...)
795     and (xor ... (not ...)) to (not (xor ...)).   */
796  return (GET_CODE (op) == AND
797	  || GET_CODE (op) == IOR);
798}
799
800/* Return true if OP is a register, or is a CONST_INT that can fit in a
801   signed 13 bit immediate field.  This is an acceptable SImode operand for
802   most 3 address instructions.  */
803
804int
805arith_operand (op, mode)
806     rtx op;
807     enum machine_mode mode;
808{
809  int val;
810  if (register_operand (op, mode))
811    return 1;
812  if (GET_CODE (op) != CONST_INT)
813    return 0;
814  val = INTVAL (op) & 0xffffffff;
815  return SPARC_SIMM13_P (val);
816}
817
818/* Return true if OP is a constant 4096  */
819
820int
821arith_4096_operand (op, mode)
822     rtx op;
823     enum machine_mode mode ATTRIBUTE_UNUSED;
824{
825  int val;
826  if (GET_CODE (op) != CONST_INT)
827    return 0;
828  val = INTVAL (op) & 0xffffffff;
829  return val == 4096;
830}
831
832/* Return true if OP is suitable as second operand for add/sub */
833
834int
835arith_add_operand (op, mode)
836     rtx op;
837     enum machine_mode mode;
838{
839  return arith_operand (op, mode) || arith_4096_operand (op, mode);
840}
841
842/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
843   immediate field of OR and XOR instructions.  Used for 64-bit
844   constant formation patterns.  */
845int
846const64_operand (op, mode)
847     rtx op;
848     enum machine_mode mode ATTRIBUTE_UNUSED;
849{
850  return ((GET_CODE (op) == CONST_INT
851	   && SPARC_SIMM13_P (INTVAL (op)))
852#if HOST_BITS_PER_WIDE_INT != 64
853	  || (GET_CODE (op) == CONST_DOUBLE
854	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
855	      && (CONST_DOUBLE_HIGH (op) ==
856		  ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
857		   (HOST_WIDE_INT)0xffffffff : 0)))
858#endif
859	  );
860}
861
862/* The same, but only for sethi instructions.  */
863int
864const64_high_operand (op, mode)
865     rtx op;
866     enum machine_mode mode ATTRIBUTE_UNUSED;
867{
868  return ((GET_CODE (op) == CONST_INT
869	   && (INTVAL (op) & 0xfffffc00) != 0
870	   && SPARC_SETHI_P (INTVAL (op))
871#if HOST_BITS_PER_WIDE_INT != 64
872	   /* Must be positive on non-64bit host else the
873	      optimizer is fooled into thinking that sethi
874	      sign extends, even though it does not.  */
875	   && INTVAL (op) >= 0
876#endif
877	   )
878	  || (GET_CODE (op) == CONST_DOUBLE
879	      && CONST_DOUBLE_HIGH (op) == 0
880	      && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
881	      && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
882}
883
884/* Return true if OP is a register, or is a CONST_INT that can fit in a
885   signed 11 bit immediate field.  This is an acceptable SImode operand for
886   the movcc instructions.  */
887
888int
889arith11_operand (op, mode)
890     rtx op;
891     enum machine_mode mode;
892{
893  return (register_operand (op, mode)
894	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
895}
896
897/* Return true if OP is a register, or is a CONST_INT that can fit in a
898   signed 10 bit immediate field.  This is an acceptable SImode operand for
899   the movrcc instructions.  */
900
901int
902arith10_operand (op, mode)
903     rtx op;
904     enum machine_mode mode;
905{
906  return (register_operand (op, mode)
907	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
908}
909
910/* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
911   immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
912   immediate field.
913   v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
914   can fit in a 13 bit immediate field.  This is an acceptable DImode operand
915   for most 3 address instructions.  */
916
917int
918arith_double_operand (op, mode)
919     rtx op;
920     enum machine_mode mode;
921{
922  return (register_operand (op, mode)
923	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
924	  || (! TARGET_ARCH64
925	      && GET_CODE (op) == CONST_DOUBLE
926	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
927	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
928	  || (TARGET_ARCH64
929	      && GET_CODE (op) == CONST_DOUBLE
930	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
931	      && ((CONST_DOUBLE_HIGH (op) == -1
932		   && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
933		  || (CONST_DOUBLE_HIGH (op) == 0
934		      && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
935}
936
937/* Return true if OP is a constant 4096 for DImode on ARCH64 */
938
939int
940arith_double_4096_operand (op, mode)
941     rtx op;
942     enum machine_mode mode ATTRIBUTE_UNUSED;
943{
944  return (TARGET_ARCH64 &&
945  	  ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
946  	   (GET_CODE (op) == CONST_DOUBLE &&
947  	    CONST_DOUBLE_LOW (op) == 4096 &&
948  	    CONST_DOUBLE_HIGH (op) == 0)));
949}
950
951/* Return true if OP is suitable as second operand for add/sub in DImode */
952
953int
954arith_double_add_operand (op, mode)
955     rtx op;
956     enum machine_mode mode;
957{
958  return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
959}
960
961/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
962   can fit in an 11 bit immediate field.  This is an acceptable DImode
963   operand for the movcc instructions.  */
964/* ??? Replace with arith11_operand?  */
965
966int
967arith11_double_operand (op, mode)
968     rtx op;
969     enum machine_mode mode;
970{
971  return (register_operand (op, mode)
972	  || (GET_CODE (op) == CONST_DOUBLE
973	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
974	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
975	      && ((CONST_DOUBLE_HIGH (op) == -1
976		   && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
977		  || (CONST_DOUBLE_HIGH (op) == 0
978		      && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
979	  || (GET_CODE (op) == CONST_INT
980	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
981	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
982}
983
984/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
985   can fit in an 10 bit immediate field.  This is an acceptable DImode
986   operand for the movrcc instructions.  */
987/* ??? Replace with arith10_operand?  */
988
989int
990arith10_double_operand (op, mode)
991     rtx op;
992     enum machine_mode mode;
993{
994  return (register_operand (op, mode)
995	  || (GET_CODE (op) == CONST_DOUBLE
996	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
997	      && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
998	      && ((CONST_DOUBLE_HIGH (op) == -1
999		   && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1000		  || (CONST_DOUBLE_HIGH (op) == 0
1001		      && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1002	  || (GET_CODE (op) == CONST_INT
1003	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1004	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1005}
1006
1007/* Return truth value of whether OP is a integer which fits the
1008   range constraining immediate operands in most three-address insns,
1009   which have a 13 bit immediate field.  */
1010
1011int
1012small_int (op, mode)
1013     rtx op;
1014     enum machine_mode mode ATTRIBUTE_UNUSED;
1015{
1016  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1017}
1018
1019int
1020small_int_or_double (op, mode)
1021     rtx op;
1022     enum machine_mode mode ATTRIBUTE_UNUSED;
1023{
1024  return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1025	  || (GET_CODE (op) == CONST_DOUBLE
1026	      && CONST_DOUBLE_HIGH (op) == 0
1027	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1028}
1029
1030/* Recognize operand values for the umul instruction.  That instruction sign
1031   extends immediate values just like all other sparc instructions, but
1032   interprets the extended result as an unsigned number.  */
1033
1034int
1035uns_small_int (op, mode)
1036     rtx op;
1037     enum machine_mode mode ATTRIBUTE_UNUSED;
1038{
1039#if HOST_BITS_PER_WIDE_INT > 32
1040  /* All allowed constants will fit a CONST_INT.  */
1041  return (GET_CODE (op) == CONST_INT
1042	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1043	      || (INTVAL (op) >= 0xFFFFF000
1044                  && INTVAL (op) < 0x100000000)));
1045#else
1046  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1047	  || (GET_CODE (op) == CONST_DOUBLE
1048	      && CONST_DOUBLE_HIGH (op) == 0
1049	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1050#endif
1051}
1052
1053int
1054uns_arith_operand (op, mode)
1055     rtx op;
1056     enum machine_mode mode;
1057{
1058  return register_operand (op, mode) || uns_small_int (op, mode);
1059}
1060
1061/* Return truth value of statement that OP is a call-clobbered register.  */
1062int
1063clobbered_register (op, mode)
1064     rtx op;
1065     enum machine_mode mode ATTRIBUTE_UNUSED;
1066{
1067  return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1068}
1069
1070/* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns.  */
1071
1072int
1073zero_operand (op, mode)
1074     rtx op;
1075     enum machine_mode mode ATTRIBUTE_UNUSED;
1076{
1077  return op == const0_rtx;
1078}
1079
1080/* Return 1 if OP is a valid operand for the source of a move insn.  */
1081
1082int
1083input_operand (op, mode)
1084     rtx op;
1085     enum machine_mode mode;
1086{
1087  /* If both modes are non-void they must be the same.  */
1088  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1089    return 0;
1090
1091  /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary.  */
1092  if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1093    return 1;
1094
1095  /* Allow any one instruction integer constant, and all CONST_INT
1096     variants when we are working in DImode and !arch64.  */
1097  if (GET_MODE_CLASS (mode) == MODE_INT
1098      && ((GET_CODE (op) == CONST_INT
1099	   && ((SPARC_SETHI_P (INTVAL (op))
1100		&& (! TARGET_ARCH64
1101		    || (INTVAL (op) >= 0)
1102		    || mode == SImode))
1103	       || SPARC_SIMM13_P (INTVAL (op))
1104	       || (mode == DImode
1105		   && ! TARGET_ARCH64)))
1106	  || (TARGET_ARCH64
1107	      && GET_CODE (op) == CONST_DOUBLE
1108	      && ((CONST_DOUBLE_HIGH (op) == 0
1109		   && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1110		  ||
1111#if HOST_BITS_PER_WIDE_INT == 64
1112		  (CONST_DOUBLE_HIGH (op) == 0
1113		   && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1114#else
1115		  (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1116		   && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1117			&& CONST_DOUBLE_HIGH (op) == 0)
1118		       || (CONST_DOUBLE_HIGH (op) == -1)))
1119#endif
1120		  ))))
1121    return 1;
1122
1123  /* If !arch64 and this is a DImode const, allow it so that
1124     the splits can be generated.  */
1125  if (! TARGET_ARCH64
1126      && mode == DImode
1127      && GET_CODE (op) == CONST_DOUBLE)
1128    return 1;
1129
1130  if (register_operand (op, mode))
1131    return 1;
1132
1133  /* If this is a SUBREG, look inside so that we handle
1134     paradoxical ones.  */
1135  if (GET_CODE (op) == SUBREG)
1136    op = SUBREG_REG (op);
1137
1138  /* Check for valid MEM forms.  */
1139  if (GET_CODE (op) == MEM)
1140    {
1141      rtx inside = XEXP (op, 0);
1142
1143      if (GET_CODE (inside) == LO_SUM)
1144	{
1145	  /* We can't allow these because all of the splits
1146	     (eventually as they trickle down into DFmode
1147	     splits) require offsettable memory references.  */
1148	  if (! TARGET_V9
1149	      && GET_MODE (op) == TFmode)
1150	    return 0;
1151
1152	  return (register_operand (XEXP (inside, 0), Pmode)
1153		  && CONSTANT_P (XEXP (inside, 1)));
1154	}
1155      return memory_address_p (mode, inside);
1156    }
1157
1158  return 0;
1159}
1160
1161
1162/* We know it can't be done in one insn when we get here,
1163   the movsi expander guarentees this.  */
1164void
1165sparc_emit_set_const32 (op0, op1)
1166     rtx op0;
1167     rtx op1;
1168{
1169  enum machine_mode mode = GET_MODE (op0);
1170  rtx temp;
1171
1172  if (GET_CODE (op1) == CONST_INT)
1173    {
1174      HOST_WIDE_INT value = INTVAL (op1);
1175
1176      if (SPARC_SETHI_P (value)
1177	  || SPARC_SIMM13_P (value))
1178	abort ();
1179    }
1180
1181  /* Full 2-insn decomposition is needed.  */
1182  if (reload_in_progress || reload_completed)
1183    temp = op0;
1184  else
1185    temp = gen_reg_rtx (mode);
1186
1187  if (GET_CODE (op1) == CONST_INT)
1188    {
1189      /* Emit them as real moves instead of a HIGH/LO_SUM,
1190	 this way CSE can see everything and reuse intermediate
1191	 values if it wants.  */
1192      if (TARGET_ARCH64
1193	  && HOST_BITS_PER_WIDE_INT != 64
1194	  && (INTVAL (op1) & 0x80000000) != 0)
1195	{
1196	  emit_insn (gen_rtx_SET (VOIDmode,
1197				  temp,
1198				  gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1199							INTVAL (op1) & 0xfffffc00, 0)));
1200	}
1201      else
1202	{
1203	  emit_insn (gen_rtx_SET (VOIDmode,
1204				  temp,
1205				  GEN_INT (INTVAL (op1) & 0xfffffc00)));
1206	}
1207      emit_insn (gen_rtx_SET (VOIDmode,
1208			      op0,
1209			      gen_rtx_IOR (mode,
1210					   temp,
1211					   GEN_INT (INTVAL (op1) & 0x3ff))));
1212    }
1213  else
1214    {
1215      /* A symbol, emit in the traditional way.  */
1216      emit_insn (gen_rtx_SET (VOIDmode,
1217			      temp,
1218			      gen_rtx_HIGH (mode,
1219					    op1)));
1220      emit_insn (gen_rtx_SET (VOIDmode,
1221			      op0,
1222			      gen_rtx_LO_SUM (mode,
1223					      temp,
1224					      op1)));
1225
1226    }
1227}
1228
1229
1230/* Sparc-v9 code-model support. */
1231void
1232sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1233     rtx op0;
1234     rtx op1;
1235     rtx temp1;
1236{
1237  switch (sparc_cmodel)
1238    {
1239    case CM_MEDLOW:
1240      /* The range spanned by all instructions in the object is less
1241	 than 2^31 bytes (2GB) and the distance from any instruction
1242	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1243	 than 2^31 bytes (2GB).
1244
1245	 The executable must be in the low 4TB of the virtual address
1246	 space.
1247
1248	 sethi	%hi(symbol), %temp
1249	 or	%temp, %lo(symbol), %reg  */
1250      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1251      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1252      break;
1253
1254    case CM_MEDMID:
1255      /* The range spanned by all instructions in the object is less
1256	 than 2^31 bytes (2GB) and the distance from any instruction
1257	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1258	 than 2^31 bytes (2GB).
1259
1260	 The executable must be in the low 16TB of the virtual address
1261	 space.
1262
1263	 sethi	%h44(symbol), %temp1
1264	 or	%temp1, %m44(symbol), %temp2
1265	 sllx	%temp2, 12, %temp3
1266	 or	%temp3, %l44(symbol), %reg  */
1267      emit_insn (gen_seth44 (op0, op1));
1268      emit_insn (gen_setm44 (op0, op0, op1));
1269      emit_insn (gen_rtx_SET (VOIDmode, temp1,
1270			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1271      emit_insn (gen_setl44 (op0, temp1, op1));
1272      break;
1273
1274    case CM_MEDANY:
1275      /* The range spanned by all instructions in the object is less
1276	 than 2^31 bytes (2GB) and the distance from any instruction
1277	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1278	 than 2^31 bytes (2GB).
1279
1280	 The executable can be placed anywhere in the virtual address
1281	 space.
1282
1283	 sethi	%hh(symbol), %temp1
1284	 sethi	%lm(symbol), %temp2
1285	 or	%temp1, %hm(symbol), %temp3
1286	 or	%temp2, %lo(symbol), %temp4
1287	 sllx	%temp3, 32, %temp5
1288	 or	%temp4, %temp5, %reg  */
1289
1290      /* Getting this right wrt. reloading is really tricky.
1291	 We _MUST_ have a seperate temporary at this point,
1292	 if we don't barf immediately instead of generating
1293	 incorrect code.  */
1294      if (temp1 == op0)
1295	abort ();
1296
1297      emit_insn (gen_sethh (op0, op1));
1298      emit_insn (gen_setlm (temp1, op1));
1299      emit_insn (gen_sethm (op0, op0, op1));
1300      emit_insn (gen_rtx_SET (VOIDmode, op0,
1301			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1302      emit_insn (gen_rtx_SET (VOIDmode, op0,
1303			      gen_rtx_PLUS (DImode, op0, temp1)));
1304      emit_insn (gen_setlo (op0, op0, op1));
1305      break;
1306
1307    case CM_EMBMEDANY:
1308      /* Old old old backwards compatibility kruft here.
1309	 Essentially it is MEDLOW with a fixed 64-bit
1310	 virtual base added to all data segment addresses.
1311	 Text-segment stuff is computed like MEDANY, we can't
1312	 reuse the code above because the relocation knobs
1313	 look different.
1314
1315	 Data segment:	sethi	%hi(symbol), %temp1
1316			or	%temp1, %lo(symbol), %temp2
1317			add	%temp2, EMBMEDANY_BASE_REG, %reg
1318
1319	 Text segment:	sethi	%uhi(symbol), %temp1
1320			sethi	%hi(symbol), %temp2
1321			or	%temp1, %ulo(symbol), %temp3
1322			or	%temp2, %lo(symbol), %temp4
1323			sllx	%temp3, 32, %temp5
1324			or	%temp4, %temp5, %reg  */
1325      if (data_segment_operand (op1, GET_MODE (op1)))
1326	{
1327	  emit_insn (gen_embmedany_sethi (temp1, op1));
1328	  emit_insn (gen_embmedany_brsum (op0, temp1));
1329	  emit_insn (gen_embmedany_losum (op0, op0, op1));
1330	}
1331      else
1332	{
1333	  /* Getting this right wrt. reloading is really tricky.
1334	     We _MUST_ have a seperate temporary at this point,
1335	     so we barf immediately instead of generating
1336	     incorrect code.  */
1337	  if (temp1 == op0)
1338	    abort ();
1339
1340	  emit_insn (gen_embmedany_textuhi (op0, op1));
1341	  emit_insn (gen_embmedany_texthi  (temp1, op1));
1342	  emit_insn (gen_embmedany_textulo (op0, op0, op1));
1343	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1344				  gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1345	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1346				  gen_rtx_PLUS (DImode, op0, temp1)));
1347	  emit_insn (gen_embmedany_textlo  (op0, op0, op1));
1348	}
1349      break;
1350
1351    default:
1352      abort();
1353    }
1354}
1355
1356/* These avoid problems when cross compiling.  If we do not
1357   go through all this hair then the optimizer will see
1358   invalid REG_EQUAL notes or in some cases none at all.  */
1359static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1360static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1361static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1362static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1363
1364#if HOST_BITS_PER_WIDE_INT == 64
1365#define GEN_HIGHINT64(__x)		GEN_INT ((__x) & 0xfffffc00)
1366#define GEN_INT64(__x)			GEN_INT (__x)
1367#else
1368#define GEN_HIGHINT64(__x) \
1369	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1370			      (__x) & 0xfffffc00, 0)
1371#define GEN_INT64(__x) \
1372	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1373			      (__x) & 0xffffffff, \
1374			      ((__x) & 0x80000000 \
1375			       ? 0xffffffff : 0))
1376#endif
1377
1378/* The optimizer is not to assume anything about exactly
1379   which bits are set for a HIGH, they are unspecified.
1380   Unfortunately this leads to many missed optimizations
1381   during CSE.  We mask out the non-HIGH bits, and matches
1382   a plain movdi, to alleviate this problem.  */
1383static void
1384sparc_emit_set_safe_HIGH64 (dest, val)
1385     rtx dest;
1386     HOST_WIDE_INT val;
1387{
1388  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1389}
1390
1391static rtx
1392gen_safe_SET64 (dest, val)
1393     rtx dest;
1394     HOST_WIDE_INT val;
1395{
1396  return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1397}
1398
1399static rtx
1400gen_safe_OR64 (src, val)
1401     rtx src;
1402     HOST_WIDE_INT val;
1403{
1404  return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1405}
1406
1407static rtx
1408gen_safe_XOR64 (src, val)
1409     rtx src;
1410     HOST_WIDE_INT val;
1411{
1412  return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1413}
1414
1415/* Worker routines for 64-bit constant formation on arch64.
1416   One of the key things to be doing in these emissions is
1417   to create as many temp REGs as possible.  This makes it
1418   possible for half-built constants to be used later when
1419   such values are similar to something required later on.
1420   Without doing this, the optimizer cannot see such
1421   opportunities.  */
1422
1423static void sparc_emit_set_const64_quick1
1424	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1425
1426static void
1427sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1428  rtx op0;
1429  rtx temp;
1430  unsigned HOST_WIDE_INT low_bits;
1431  int is_neg;
1432{
1433  unsigned HOST_WIDE_INT high_bits;
1434
1435  if (is_neg)
1436    high_bits = (~low_bits) & 0xffffffff;
1437  else
1438    high_bits = low_bits;
1439
1440  sparc_emit_set_safe_HIGH64 (temp, high_bits);
1441  if (!is_neg)
1442    {
1443      emit_insn (gen_rtx_SET (VOIDmode, op0,
1444			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1445    }
1446  else
1447    {
1448      /* If we are XOR'ing with -1, then we should emit a one's complement
1449	 instead.  This way the combiner will notice logical operations
1450	 such as ANDN later on and substitute.  */
1451      if ((low_bits & 0x3ff) == 0x3ff)
1452	{
1453	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1454				  gen_rtx_NOT (DImode, temp)));
1455	}
1456      else
1457	{
1458	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1459				  gen_safe_XOR64 (temp,
1460						  (-0x400 | (low_bits & 0x3ff)))));
1461	}
1462    }
1463}
1464
1465static void sparc_emit_set_const64_quick2
1466	PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1467	       unsigned HOST_WIDE_INT, int));
1468
1469static void
1470sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1471  rtx op0;
1472  rtx temp;
1473  unsigned HOST_WIDE_INT high_bits;
1474  unsigned HOST_WIDE_INT low_immediate;
1475  int shift_count;
1476{
1477  rtx temp2 = op0;
1478
1479  if ((high_bits & 0xfffffc00) != 0)
1480    {
1481      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1482      if ((high_bits & ~0xfffffc00) != 0)
1483	emit_insn (gen_rtx_SET (VOIDmode, op0,
1484				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1485      else
1486	temp2 = temp;
1487    }
1488  else
1489    {
1490      emit_insn (gen_safe_SET64 (temp, high_bits));
1491      temp2 = temp;
1492    }
1493
1494  /* Now shift it up into place. */
1495  emit_insn (gen_rtx_SET (VOIDmode, op0,
1496			  gen_rtx_ASHIFT (DImode, temp2,
1497					  GEN_INT (shift_count))));
1498
1499  /* If there is a low immediate part piece, finish up by
1500     putting that in as well.  */
1501  if (low_immediate != 0)
1502    emit_insn (gen_rtx_SET (VOIDmode, op0,
1503			    gen_safe_OR64 (op0, low_immediate)));
1504}
1505
1506static void sparc_emit_set_const64_longway
1507	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1508
1509/* Full 64-bit constant decomposition.  Even though this is the
1510   'worst' case, we still optimize a few things away.  */
1511static void
1512sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1513     rtx op0;
1514     rtx temp;
1515     unsigned HOST_WIDE_INT high_bits;
1516     unsigned HOST_WIDE_INT low_bits;
1517{
1518  rtx sub_temp;
1519
1520  if (reload_in_progress || reload_completed)
1521    sub_temp = op0;
1522  else
1523    sub_temp = gen_reg_rtx (DImode);
1524
1525  if ((high_bits & 0xfffffc00) != 0)
1526    {
1527      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1528      if ((high_bits & ~0xfffffc00) != 0)
1529	emit_insn (gen_rtx_SET (VOIDmode,
1530				sub_temp,
1531				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1532      else
1533	sub_temp = temp;
1534    }
1535  else
1536    {
1537      emit_insn (gen_safe_SET64 (temp, high_bits));
1538      sub_temp = temp;
1539    }
1540
1541  if (!reload_in_progress && !reload_completed)
1542    {
1543      rtx temp2 = gen_reg_rtx (DImode);
1544      rtx temp3 = gen_reg_rtx (DImode);
1545      rtx temp4 = gen_reg_rtx (DImode);
1546
1547      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1548			      gen_rtx_ASHIFT (DImode, sub_temp,
1549					      GEN_INT (32))));
1550
1551      sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1552      if ((low_bits & ~0xfffffc00) != 0)
1553	{
1554	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1555				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1556	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1557				  gen_rtx_PLUS (DImode, temp4, temp3)));
1558	}
1559      else
1560	{
1561	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1562				  gen_rtx_PLUS (DImode, temp4, temp2)));
1563	}
1564    }
1565  else
1566    {
1567      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1568      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1569      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1570      int to_shift = 12;
1571
1572      /* We are in the middle of reload, so this is really
1573	 painful.  However we do still make an attempt to
1574	 avoid emitting truly stupid code.  */
1575      if (low1 != const0_rtx)
1576	{
1577	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1578				  gen_rtx_ASHIFT (DImode, sub_temp,
1579						  GEN_INT (to_shift))));
1580	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1581				  gen_rtx_IOR (DImode, op0, low1)));
1582	  sub_temp = op0;
1583	  to_shift = 12;
1584	}
1585      else
1586	{
1587	  to_shift += 12;
1588	}
1589      if (low2 != const0_rtx)
1590	{
1591	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1592				  gen_rtx_ASHIFT (DImode, sub_temp,
1593						  GEN_INT (to_shift))));
1594	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1595				  gen_rtx_IOR (DImode, op0, low2)));
1596	  sub_temp = op0;
1597	  to_shift = 8;
1598	}
1599      else
1600	{
1601	  to_shift += 8;
1602	}
1603      emit_insn (gen_rtx_SET (VOIDmode, op0,
1604			      gen_rtx_ASHIFT (DImode, sub_temp,
1605					      GEN_INT (to_shift))));
1606      if (low3 != const0_rtx)
1607	emit_insn (gen_rtx_SET (VOIDmode, op0,
1608				gen_rtx_IOR (DImode, op0, low3)));
1609      /* phew... */
1610    }
1611}
1612
1613/* Analyze a 64-bit constant for certain properties. */
1614static void analyze_64bit_constant
1615	PROTO((unsigned HOST_WIDE_INT,
1616	       unsigned HOST_WIDE_INT,
1617	       int *, int *, int *));
1618
1619static void
1620analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1621     unsigned HOST_WIDE_INT high_bits, low_bits;
1622     int *hbsp, *lbsp, *abbasp;
1623{
1624  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1625  int i;
1626
1627  lowest_bit_set = highest_bit_set = -1;
1628  i = 0;
1629  do
1630    {
1631      if ((lowest_bit_set == -1)
1632	  && ((low_bits >> i) & 1))
1633	lowest_bit_set = i;
1634      if ((highest_bit_set == -1)
1635	  && ((high_bits >> (32 - i - 1)) & 1))
1636	highest_bit_set = (64 - i - 1);
1637    }
1638  while (++i < 32
1639	 && ((highest_bit_set == -1)
1640	     || (lowest_bit_set == -1)));
1641  if (i == 32)
1642    {
1643      i = 0;
1644      do
1645	{
1646	  if ((lowest_bit_set == -1)
1647	      && ((high_bits >> i) & 1))
1648	    lowest_bit_set = i + 32;
1649	  if ((highest_bit_set == -1)
1650	      && ((low_bits >> (32 - i - 1)) & 1))
1651	    highest_bit_set = 32 - i - 1;
1652	}
1653      while (++i < 32
1654	     && ((highest_bit_set == -1)
1655		 || (lowest_bit_set == -1)));
1656    }
1657  /* If there are no bits set this should have gone out
1658     as one instruction!  */
1659  if (lowest_bit_set == -1
1660      || highest_bit_set == -1)
1661    abort ();
1662  all_bits_between_are_set = 1;
1663  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1664    {
1665      if (i < 32)
1666	{
1667	  if ((low_bits & (1 << i)) != 0)
1668	    continue;
1669	}
1670      else
1671	{
1672	  if ((high_bits & (1 << (i - 32))) != 0)
1673	    continue;
1674	}
1675      all_bits_between_are_set = 0;
1676      break;
1677    }
1678  *hbsp = highest_bit_set;
1679  *lbsp = lowest_bit_set;
1680  *abbasp = all_bits_between_are_set;
1681}
1682
1683static int const64_is_2insns
1684	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1685
1686static int
1687const64_is_2insns (high_bits, low_bits)
1688     unsigned HOST_WIDE_INT high_bits, low_bits;
1689{
1690  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1691
1692  if (high_bits == 0
1693      || high_bits == 0xffffffff)
1694    return 1;
1695
1696  analyze_64bit_constant (high_bits, low_bits,
1697			  &highest_bit_set, &lowest_bit_set,
1698			  &all_bits_between_are_set);
1699
1700  if ((highest_bit_set == 63
1701       || lowest_bit_set == 0)
1702      && all_bits_between_are_set != 0)
1703    return 1;
1704
1705  if ((highest_bit_set - lowest_bit_set) < 21)
1706    return 1;
1707
1708  return 0;
1709}
1710
1711static unsigned HOST_WIDE_INT create_simple_focus_bits
1712	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1713	       int, int));
1714
1715static unsigned HOST_WIDE_INT
1716create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1717     unsigned HOST_WIDE_INT high_bits, low_bits;
1718     int lowest_bit_set, shift;
1719{
1720  HOST_WIDE_INT hi, lo;
1721
1722  if (lowest_bit_set < 32)
1723    {
1724      lo = (low_bits >> lowest_bit_set) << shift;
1725      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1726    }
1727  else
1728    {
1729      lo = 0;
1730      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1731    }
1732  if (hi & lo)
1733    abort ();
1734  return (hi | lo);
1735}
1736
1737/* Here we are sure to be arch64 and this is an integer constant
1738   being loaded into a register.  Emit the most efficient
1739   insn sequence possible.  Detection of all the 1-insn cases
1740   has been done already.  */
1741void
1742sparc_emit_set_const64 (op0, op1)
1743     rtx op0;
1744     rtx op1;
1745{
1746  unsigned HOST_WIDE_INT high_bits, low_bits;
1747  int lowest_bit_set, highest_bit_set;
1748  int all_bits_between_are_set;
1749  rtx temp;
1750
1751  /* Sanity check that we know what we are working with.  */
1752  if (! TARGET_ARCH64
1753      || GET_CODE (op0) != REG
1754      || (REGNO (op0) >= SPARC_FIRST_FP_REG
1755	  && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1756    abort ();
1757
1758  if (reload_in_progress || reload_completed)
1759    temp = op0;
1760  else
1761    temp = gen_reg_rtx (DImode);
1762
1763  if (GET_CODE (op1) != CONST_DOUBLE
1764      && GET_CODE (op1) != CONST_INT)
1765    {
1766      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1767      return;
1768    }
1769
1770  if (GET_CODE (op1) == CONST_DOUBLE)
1771    {
1772#if HOST_BITS_PER_WIDE_INT == 64
1773      high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1774      low_bits  = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1775#else
1776      high_bits = CONST_DOUBLE_HIGH (op1);
1777      low_bits = CONST_DOUBLE_LOW (op1);
1778#endif
1779    }
1780  else
1781    {
1782#if HOST_BITS_PER_WIDE_INT == 64
1783      high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1784      low_bits = (INTVAL (op1) & 0xffffffff);
1785#else
1786      high_bits = ((INTVAL (op1) < 0) ?
1787		   0xffffffff :
1788		   0x00000000);
1789      low_bits = INTVAL (op1);
1790#endif
1791    }
1792
1793  /* low_bits	bits 0  --> 31
1794     high_bits	bits 32 --> 63  */
1795
1796  analyze_64bit_constant (high_bits, low_bits,
1797			  &highest_bit_set, &lowest_bit_set,
1798			  &all_bits_between_are_set);
1799
1800  /* First try for a 2-insn sequence.  */
1801
1802  /* These situations are preferred because the optimizer can
1803   * do more things with them:
1804   * 1) mov	-1, %reg
1805   *    sllx	%reg, shift, %reg
1806   * 2) mov	-1, %reg
1807   *    srlx	%reg, shift, %reg
1808   * 3) mov	some_small_const, %reg
1809   *    sllx	%reg, shift, %reg
1810   */
1811  if (((highest_bit_set == 63
1812	|| lowest_bit_set == 0)
1813       && all_bits_between_are_set != 0)
1814      || ((highest_bit_set - lowest_bit_set) < 12))
1815    {
1816      HOST_WIDE_INT the_const = -1;
1817      int shift = lowest_bit_set;
1818
1819      if ((highest_bit_set != 63
1820	   && lowest_bit_set != 0)
1821	  || all_bits_between_are_set == 0)
1822	{
1823	  the_const =
1824	    create_simple_focus_bits (high_bits, low_bits,
1825				      lowest_bit_set, 0);
1826	}
1827      else if (lowest_bit_set == 0)
1828	shift = -(63 - highest_bit_set);
1829
1830      if (! SPARC_SIMM13_P (the_const))
1831	abort ();
1832
1833      emit_insn (gen_safe_SET64 (temp, the_const));
1834      if (shift > 0)
1835	emit_insn (gen_rtx_SET (VOIDmode,
1836				op0,
1837				gen_rtx_ASHIFT (DImode,
1838						temp,
1839						GEN_INT (shift))));
1840      else if (shift < 0)
1841	emit_insn (gen_rtx_SET (VOIDmode,
1842				op0,
1843				gen_rtx_LSHIFTRT (DImode,
1844						  temp,
1845						  GEN_INT (-shift))));
1846      else
1847	abort ();
1848      return;
1849    }
1850
1851  /* Now a range of 22 or less bits set somewhere.
1852   * 1) sethi	%hi(focus_bits), %reg
1853   *    sllx	%reg, shift, %reg
1854   * 2) sethi	%hi(focus_bits), %reg
1855   *    srlx	%reg, shift, %reg
1856   */
1857  if ((highest_bit_set - lowest_bit_set) < 21)
1858    {
1859      unsigned HOST_WIDE_INT focus_bits =
1860	create_simple_focus_bits (high_bits, low_bits,
1861				  lowest_bit_set, 10);
1862
1863      if (! SPARC_SETHI_P (focus_bits))
1864	 abort ();
1865
1866      sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1867
1868      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1869      if (lowest_bit_set < 10)
1870	emit_insn (gen_rtx_SET (VOIDmode,
1871				op0,
1872				gen_rtx_LSHIFTRT (DImode, temp,
1873						  GEN_INT (10 - lowest_bit_set))));
1874      else if (lowest_bit_set > 10)
1875	emit_insn (gen_rtx_SET (VOIDmode,
1876				op0,
1877				gen_rtx_ASHIFT (DImode, temp,
1878						GEN_INT (lowest_bit_set - 10))));
1879      else
1880	abort ();
1881      return;
1882    }
1883
1884  /* 1) sethi	%hi(low_bits), %reg
1885   *    or	%reg, %lo(low_bits), %reg
1886   * 2) sethi	%hi(~low_bits), %reg
1887   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1888   */
1889  if (high_bits == 0
1890      || high_bits == 0xffffffff)
1891    {
1892      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1893				     (high_bits == 0xffffffff));
1894      return;
1895    }
1896
1897  /* Now, try 3-insn sequences.  */
1898
1899  /* 1) sethi	%hi(high_bits), %reg
1900   *    or	%reg, %lo(high_bits), %reg
1901   *    sllx	%reg, 32, %reg
1902   */
1903  if (low_bits == 0)
1904    {
1905      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1906      return;
1907    }
1908
1909  /* We may be able to do something quick
1910     when the constant is negated, so try that.  */
1911  if (const64_is_2insns ((~high_bits) & 0xffffffff,
1912			 (~low_bits) & 0xfffffc00))
1913    {
1914      /* NOTE: The trailing bits get XOR'd so we need the
1915	 non-negated bits, not the negated ones.  */
1916      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1917
1918      if ((((~high_bits) & 0xffffffff) == 0
1919	   && ((~low_bits) & 0x80000000) == 0)
1920	  || (((~high_bits) & 0xffffffff) == 0xffffffff
1921	      && ((~low_bits) & 0x80000000) != 0))
1922	{
1923	  int fast_int = (~low_bits & 0xffffffff);
1924
1925	  if ((SPARC_SETHI_P (fast_int)
1926	       && (~high_bits & 0xffffffff) == 0)
1927	      || SPARC_SIMM13_P (fast_int))
1928	    emit_insn (gen_safe_SET64 (temp, fast_int));
1929	  else
1930	    sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1931	}
1932      else
1933	{
1934	  rtx negated_const;
1935#if HOST_BITS_PER_WIDE_INT == 64
1936	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1937				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1938#else
1939	  negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
1940						(~low_bits) & 0xfffffc00,
1941						(~high_bits) & 0xffffffff);
1942#endif
1943	  sparc_emit_set_const64 (temp, negated_const);
1944	}
1945
1946      /* If we are XOR'ing with -1, then we should emit a one's complement
1947	 instead.  This way the combiner will notice logical operations
1948	 such as ANDN later on and substitute.  */
1949      if (trailing_bits == 0x3ff)
1950	{
1951	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1952				  gen_rtx_NOT (DImode, temp)));
1953	}
1954      else
1955	{
1956	  emit_insn (gen_rtx_SET (VOIDmode,
1957				  op0,
1958				  gen_safe_XOR64 (temp,
1959						  (-0x400 | trailing_bits))));
1960	}
1961      return;
1962    }
1963
1964  /* 1) sethi	%hi(xxx), %reg
1965   *    or	%reg, %lo(xxx), %reg
1966   *	sllx	%reg, yyy, %reg
1967   *
1968   * ??? This is just a generalized version of the low_bits==0
1969   * thing above, FIXME...
1970   */
1971  if ((highest_bit_set - lowest_bit_set) < 32)
1972    {
1973      unsigned HOST_WIDE_INT focus_bits =
1974	create_simple_focus_bits (high_bits, low_bits,
1975				  lowest_bit_set, 0);
1976
1977      /* We can't get here in this state.  */
1978      if (highest_bit_set < 32
1979	  || lowest_bit_set >= 32)
1980	abort ();
1981
1982      /* So what we know is that the set bits straddle the
1983	 middle of the 64-bit word.  */
1984      sparc_emit_set_const64_quick2 (op0, temp,
1985				     focus_bits, 0,
1986				     lowest_bit_set);
1987      return;
1988    }
1989
1990  /* 1) sethi	%hi(high_bits), %reg
1991   *    or	%reg, %lo(high_bits), %reg
1992   *    sllx	%reg, 32, %reg
1993   *	or	%reg, low_bits, %reg
1994   */
1995  if (SPARC_SIMM13_P(low_bits)
1996      && ((int)low_bits > 0))
1997    {
1998      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1999      return;
2000    }
2001
2002  /* The easiest way when all else fails, is full decomposition. */
2003#if 0
2004  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2005	  high_bits, low_bits, ~high_bits, ~low_bits);
2006#endif
2007  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2008}
2009
2010/* X and Y are two things to compare using CODE.  Emit the compare insn and
2011   return the rtx for the cc reg in the proper mode.  */
2012
2013rtx
2014gen_compare_reg (code, x, y)
2015     enum rtx_code code;
2016     rtx x, y;
2017{
2018  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2019  rtx cc_reg;
2020
2021  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2022     fcc regs (cse can't tell they're really call clobbered regs and will
2023     remove a duplicate comparison even if there is an intervening function
2024     call - it will then try to reload the cc reg via an int reg which is why
2025     we need the movcc patterns).  It is possible to provide the movcc
2026     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2027     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2028     to tell cse that CCFPE mode registers (even pseudos) are call
2029     clobbered.  */
2030
2031  /* ??? This is an experiment.  Rather than making changes to cse which may
2032     or may not be easy/clean, we do our own cse.  This is possible because
2033     we will generate hard registers.  Cse knows they're call clobbered (it
2034     doesn't know the same thing about pseudos). If we guess wrong, no big
2035     deal, but if we win, great!  */
2036
2037  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2038#if 1 /* experiment */
2039    {
2040      int reg;
2041      /* We cycle through the registers to ensure they're all exercised.  */
2042      static int next_fcc_reg = 0;
2043      /* Previous x,y for each fcc reg.  */
2044      static rtx prev_args[4][2];
2045
2046      /* Scan prev_args for x,y.  */
2047      for (reg = 0; reg < 4; reg++)
2048	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2049	  break;
2050      if (reg == 4)
2051	{
2052	  reg = next_fcc_reg;
2053	  prev_args[reg][0] = x;
2054	  prev_args[reg][1] = y;
2055	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2056	}
2057      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2058    }
2059#else
2060    cc_reg = gen_reg_rtx (mode);
2061#endif /* ! experiment */
2062  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2063    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2064  else
2065    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2066
2067  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2068			  gen_rtx_COMPARE (mode, x, y)));
2069
2070  return cc_reg;
2071}
2072
2073/* This function is used for v9 only.
2074   CODE is the code for an Scc's comparison.
2075   OPERANDS[0] is the target of the Scc insn.
2076   OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2077   been generated yet).
2078
2079   This function is needed to turn
2080
2081	   (set (reg:SI 110)
2082	       (gt (reg:CCX 100 %icc)
2083	           (const_int 0)))
2084   into
2085	   (set (reg:SI 110)
2086	       (gt:DI (reg:CCX 100 %icc)
2087	           (const_int 0)))
2088
2089   IE: The instruction recognizer needs to see the mode of the comparison to
2090   find the right instruction. We could use "gt:DI" right in the
2091   define_expand, but leaving it out allows us to handle DI, SI, etc.
2092
2093   We refer to the global sparc compare operands sparc_compare_op0 and
2094   sparc_compare_op1.  */
2095
2096int
2097gen_v9_scc (compare_code, operands)
2098     enum rtx_code compare_code;
2099     register rtx *operands;
2100{
2101  rtx temp, op0, op1;
2102
2103  if (! TARGET_ARCH64
2104      && (GET_MODE (sparc_compare_op0) == DImode
2105	  || GET_MODE (operands[0]) == DImode))
2106    return 0;
2107
2108  /* Handle the case where operands[0] == sparc_compare_op0.
2109     We "early clobber" the result.  */
2110  if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2111    {
2112      op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2113      emit_move_insn (op0, sparc_compare_op0);
2114    }
2115  else
2116    op0 = sparc_compare_op0;
2117  /* For consistency in the following.  */
2118  op1 = sparc_compare_op1;
2119
2120  /* Try to use the movrCC insns.  */
2121  if (TARGET_ARCH64
2122      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2123      && op1 == const0_rtx
2124      && v9_regcmp_p (compare_code))
2125    {
2126      /* Special case for op0 != 0.  This can be done with one instruction if
2127	 operands[0] == sparc_compare_op0.  We don't assume they are equal
2128	 now though.  */
2129
2130      if (compare_code == NE
2131	  && GET_MODE (operands[0]) == DImode
2132	  && GET_MODE (op0) == DImode)
2133	{
2134	  emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2135	  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2136			      gen_rtx_IF_THEN_ELSE (DImode,
2137				       gen_rtx_fmt_ee (compare_code, DImode,
2138						       op0, const0_rtx),
2139				       const1_rtx,
2140				       operands[0])));
2141	  return 1;
2142	}
2143
2144      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2145      if (GET_MODE (op0) != DImode)
2146	{
2147	  temp = gen_reg_rtx (DImode);
2148	  convert_move (temp, op0, 0);
2149	}
2150      else
2151	temp = op0;
2152      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2153			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2154				   gen_rtx_fmt_ee (compare_code, DImode,
2155						   temp, const0_rtx),
2156				   const1_rtx,
2157				   operands[0])));
2158      return 1;
2159    }
2160  else
2161    {
2162      operands[1] = gen_compare_reg (compare_code, op0, op1);
2163
2164      switch (GET_MODE (operands[1]))
2165	{
2166	  case CCmode :
2167	  case CCXmode :
2168	  case CCFPEmode :
2169	  case CCFPmode :
2170	    break;
2171	  default :
2172	    abort ();
2173	}
2174      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2175      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2176			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2177				   gen_rtx_fmt_ee (compare_code,
2178						   GET_MODE (operands[1]),
2179						   operands[1], const0_rtx),
2180				    const1_rtx, operands[0])));
2181      return 1;
2182    }
2183}
2184
2185/* Emit a conditional jump insn for the v9 architecture using comparison code
2186   CODE and jump target LABEL.
2187   This function exists to take advantage of the v9 brxx insns.  */
2188
2189void
2190emit_v9_brxx_insn (code, op0, label)
2191     enum rtx_code code;
2192     rtx op0, label;
2193{
2194  emit_jump_insn (gen_rtx_SET (VOIDmode,
2195			   pc_rtx,
2196			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2197				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2198						    op0, const0_rtx),
2199				    gen_rtx_LABEL_REF (VOIDmode, label),
2200				    pc_rtx)));
2201}
2202
2203/* Return nonzero if a return peephole merging return with
2204   setting of output register is ok.  */
2205int
2206leaf_return_peephole_ok ()
2207{
2208  return (actual_fsize == 0);
2209}
2210
2211/* Return nonzero if TRIAL can go into the function epilogue's
2212   delay slot.  SLOT is the slot we are trying to fill.  */
2213
2214int
2215eligible_for_epilogue_delay (trial, slot)
2216     rtx trial;
2217     int slot;
2218{
2219  rtx pat, src;
2220
2221  if (slot >= 1)
2222    return 0;
2223
2224  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2225    return 0;
2226
2227  if (get_attr_length (trial) != 1)
2228    return 0;
2229
2230  /* If %g0 is live, there are lots of things we can't handle.
2231     Rather than trying to find them all now, let's punt and only
2232     optimize things as necessary.  */
2233  if (TARGET_LIVE_G0)
2234    return 0;
2235
2236  /* In the case of a true leaf function, anything can go into the delay slot.
2237     A delay slot only exists however if the frame size is zero, otherwise
2238     we will put an insn to adjust the stack after the return.  */
2239  if (current_function_uses_only_leaf_regs)
2240    {
2241      if (leaf_return_peephole_ok ())
2242	return ((get_attr_in_uncond_branch_delay (trial)
2243		 == IN_BRANCH_DELAY_TRUE));
2244      return 0;
2245    }
2246
2247  /* If only trivial `restore' insns work, nothing can go in the
2248     delay slot.  */
2249  else if (TARGET_BROKEN_SAVERESTORE)
2250    return 0;
2251
2252  pat = PATTERN (trial);
2253
2254  /* Otherwise, only operations which can be done in tandem with
2255     a `restore' insn can go into the delay slot.  */
2256  if (GET_CODE (SET_DEST (pat)) != REG
2257      || REGNO (SET_DEST (pat)) >= 32
2258      || REGNO (SET_DEST (pat)) < 24)
2259    return 0;
2260
2261  /* The set of insns matched here must agree precisely with the set of
2262     patterns paired with a RETURN in sparc.md.  */
2263
2264  src = SET_SRC (pat);
2265
2266  /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64.  */
2267  if (arith_operand (src, GET_MODE (src)))
2268    {
2269      if (TARGET_ARCH64)
2270        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2271      else
2272        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2273    }
2274
2275  /* This matches "*return_di".  */
2276  else if (arith_double_operand (src, GET_MODE (src)))
2277    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2278
2279  /* This matches "*return_sf_no_fpu".  */
2280  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2281	   && register_operand (src, SFmode))
2282    return 1;
2283
2284  /* This matches "*return_addsi".  */
2285  else if (GET_CODE (src) == PLUS
2286	   && arith_operand (XEXP (src, 0), SImode)
2287	   && arith_operand (XEXP (src, 1), SImode)
2288	   && (register_operand (XEXP (src, 0), SImode)
2289	       || register_operand (XEXP (src, 1), SImode)))
2290    return 1;
2291
2292  /* This matches "*return_adddi".  */
2293  else if (GET_CODE (src) == PLUS
2294	   && arith_double_operand (XEXP (src, 0), DImode)
2295	   && arith_double_operand (XEXP (src, 1), DImode)
2296	   && (register_operand (XEXP (src, 0), DImode)
2297	       || register_operand (XEXP (src, 1), DImode)))
2298    return 1;
2299
2300  return 0;
2301}
2302
2303static int
2304check_return_regs (x)
2305     rtx x;
2306{
2307  switch (GET_CODE (x))
2308    {
2309    case REG:
2310      return IN_OR_GLOBAL_P (x);
2311
2312    case CONST_INT:
2313    case CONST_DOUBLE:
2314    case CONST:
2315    case SYMBOL_REF:
2316    case LABEL_REF:
2317    return 1;
2318
2319    case SET:
2320    case IOR:
2321    case AND:
2322    case XOR:
2323    case PLUS:
2324    case MINUS:
2325      if (check_return_regs (XEXP (x, 1)) == 0)
2326  return 0;
2327    case NOT:
2328    case NEG:
2329    case MEM:
2330      return check_return_regs (XEXP (x, 0));
2331
2332    default:
2333      return 0;
2334    }
2335
2336}
2337
2338/* Return 1 if TRIAL references only in and global registers.  */
2339int
2340eligible_for_return_delay (trial)
2341     rtx trial;
2342{
2343  if (GET_CODE (PATTERN (trial)) != SET)
2344    return 0;
2345
2346  return check_return_regs (PATTERN (trial));
2347}
2348
2349int
2350short_branch (uid1, uid2)
2351     int uid1, uid2;
2352{
2353  unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2354  if (delta + 1024 < 2048)
2355    return 1;
2356  /* warning ("long branch, distance %d", delta); */
2357  return 0;
2358}
2359
2360/* Return non-zero if REG is not used after INSN.
2361   We assume REG is a reload reg, and therefore does
2362   not live past labels or calls or jumps.  */
2363int
2364reg_unused_after (reg, insn)
2365     rtx reg;
2366     rtx insn;
2367{
2368  enum rtx_code code, prev_code = UNKNOWN;
2369
2370  while ((insn = NEXT_INSN (insn)))
2371    {
2372      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2373	return 1;
2374
2375      code = GET_CODE (insn);
2376      if (GET_CODE (insn) == CODE_LABEL)
2377	return 1;
2378
2379      if (GET_RTX_CLASS (code) == 'i')
2380	{
2381	  rtx set = single_set (insn);
2382	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2383	  if (set && in_src)
2384	    return 0;
2385	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2386	    return 1;
2387	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2388	    return 0;
2389	}
2390      prev_code = code;
2391    }
2392  return 1;
2393}
2394
2395/* The table we use to reference PIC data.  */
2396static rtx global_offset_table;
2397
2398/* The function we use to get at it.  */
2399static rtx get_pc_symbol;
2400static char get_pc_symbol_name[256];
2401
2402/* Ensure that we are not using patterns that are not OK with PIC.  */
2403
2404int
2405check_pic (i)
2406     int i;
2407{
2408  switch (flag_pic)
2409    {
2410    case 1:
2411      if (GET_CODE (recog_operand[i]) == SYMBOL_REF
2412	  || (GET_CODE (recog_operand[i]) == CONST
2413	      && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
2414		    && (XEXP (XEXP (recog_operand[i], 0), 0)
2415			== global_offset_table)
2416		    && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
2417			== CONST))))
2418	abort ();
2419    case 2:
2420    default:
2421      return 1;
2422    }
2423}
2424
2425/* Return true if X is an address which needs a temporary register when
2426   reloaded while generating PIC code.  */
2427
2428int
2429pic_address_needs_scratch (x)
2430     rtx x;
2431{
2432  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2433  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2434      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2435      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2436      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2437    return 1;
2438
2439  return 0;
2440}
2441
2442/* Legitimize PIC addresses.  If the address is already position-independent,
2443   we return ORIG.  Newly generated position-independent addresses go into a
2444   reg.  This is REG if non zero, otherwise we allocate register(s) as
2445   necessary.  */
2446
2447rtx
2448legitimize_pic_address (orig, mode, reg)
2449     rtx orig;
2450     enum machine_mode mode ATTRIBUTE_UNUSED;
2451     rtx reg;
2452{
2453  if (GET_CODE (orig) == SYMBOL_REF)
2454    {
2455      rtx pic_ref, address;
2456      rtx insn;
2457
2458      if (reg == 0)
2459	{
2460	  if (reload_in_progress || reload_completed)
2461	    abort ();
2462	  else
2463	    reg = gen_reg_rtx (Pmode);
2464	}
2465
2466      if (flag_pic == 2)
2467	{
2468	  /* If not during reload, allocate another temp reg here for loading
2469	     in the address, so that these instructions can be optimized
2470	     properly.  */
2471	  rtx temp_reg = ((reload_in_progress || reload_completed)
2472			  ? reg : gen_reg_rtx (Pmode));
2473
2474	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2475	     won't get confused into thinking that these two instructions
2476	     are loading in the true address of the symbol.  If in the
2477	     future a PIC rtx exists, that should be used instead.  */
2478	  if (Pmode == SImode)
2479	    {
2480	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
2481	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2482	    }
2483	  else
2484	    {
2485	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
2486	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2487	    }
2488	  address = temp_reg;
2489	}
2490      else
2491	address = orig;
2492
2493      pic_ref = gen_rtx_MEM (Pmode,
2494			 gen_rtx_PLUS (Pmode,
2495				  pic_offset_table_rtx, address));
2496      current_function_uses_pic_offset_table = 1;
2497      RTX_UNCHANGING_P (pic_ref) = 1;
2498      insn = emit_move_insn (reg, pic_ref);
2499      /* Put a REG_EQUAL note on this insn, so that it can be optimized
2500	 by loop.  */
2501      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2502				  REG_NOTES (insn));
2503      return reg;
2504    }
2505  else if (GET_CODE (orig) == CONST)
2506    {
2507      rtx base, offset;
2508
2509      if (GET_CODE (XEXP (orig, 0)) == PLUS
2510	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2511	return orig;
2512
2513      if (reg == 0)
2514	{
2515	  if (reload_in_progress || reload_completed)
2516	    abort ();
2517	  else
2518	    reg = gen_reg_rtx (Pmode);
2519	}
2520
2521      if (GET_CODE (XEXP (orig, 0)) == PLUS)
2522	{
2523	  base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2524	  offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2525					 base == reg ? 0 : reg);
2526	}
2527      else
2528	abort ();
2529
2530      if (GET_CODE (offset) == CONST_INT)
2531	{
2532	  if (SMALL_INT (offset))
2533	    return plus_constant_for_output (base, INTVAL (offset));
2534	  else if (! reload_in_progress && ! reload_completed)
2535	    offset = force_reg (Pmode, offset);
2536	  else
2537	    /* If we reach here, then something is seriously wrong.  */
2538	    abort ();
2539	}
2540      return gen_rtx_PLUS (Pmode, base, offset);
2541    }
2542  else if (GET_CODE (orig) == LABEL_REF)
2543    /* ??? Why do we do this?  */
2544    /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2545       the register is live instead, in case it is eliminated.  */
2546    current_function_uses_pic_offset_table = 1;
2547
2548  return orig;
2549}
2550
2551/* Return the RTX for insns to set the PIC register.  */
2552
2553static rtx
2554pic_setup_code ()
2555{
2556  rtx seq;
2557
2558  start_sequence ();
2559  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2560			 get_pc_symbol));
2561  seq = gen_sequence ();
2562  end_sequence ();
2563
2564  return seq;
2565}
2566
2567/* Emit special PIC prologues and epilogues.  */
2568
2569void
2570finalize_pic ()
2571{
2572  /* Labels to get the PC in the prologue of this function.  */
2573  int orig_flag_pic = flag_pic;
2574  rtx insn;
2575
2576  if (current_function_uses_pic_offset_table == 0)
2577    return;
2578
2579  if (! flag_pic)
2580    abort ();
2581
2582  /* If we havn't emitted the special get_pc helper function, do so now.  */
2583  if (get_pc_symbol_name[0] == 0)
2584    {
2585      int align;
2586
2587      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2588      text_section ();
2589
2590      align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2591      if (align > 0)
2592	ASM_OUTPUT_ALIGN (asm_out_file, align);
2593      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2594      fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2595    }
2596
2597  /* Initialize every time through, since we can't easily
2598     know this to be permanent.  */
2599  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2600  get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2601  flag_pic = 0;
2602
2603  emit_insn_after (pic_setup_code (), get_insns ());
2604
2605  /* Insert the code in each nonlocal goto receiver.
2606     If you make changes here or to the nonlocal_goto_receiver
2607     pattern, make sure the unspec_volatile numbers still
2608     match.  */
2609  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2610    if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2611	&& XINT (PATTERN (insn), 1) == 5)
2612      emit_insn_after (pic_setup_code (), insn);
2613
2614  flag_pic = orig_flag_pic;
2615
2616  /* Need to emit this whether or not we obey regdecls,
2617     since setjmp/longjmp can cause life info to screw up.
2618     ??? In the case where we don't obey regdecls, this is not sufficient
2619     since we may not fall out the bottom.  */
2620  emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2621}
2622
2623/* Return 1 if RTX is a MEM which is known to be aligned to at
2624   least an 8 byte boundary.  */
2625
2626int
2627mem_min_alignment (mem, desired)
2628     rtx mem;
2629     int desired;
2630{
2631  rtx addr, base, offset;
2632
2633  /* If it's not a MEM we can't accept it.  */
2634  if (GET_CODE (mem) != MEM)
2635    return 0;
2636
2637  addr = XEXP (mem, 0);
2638  base = offset = NULL_RTX;
2639  if (GET_CODE (addr) == PLUS)
2640    {
2641      if (GET_CODE (XEXP (addr, 0)) == REG)
2642	{
2643	  base = XEXP (addr, 0);
2644
2645	  /* What we are saying here is that if the base
2646	     REG is aligned properly, the compiler will make
2647	     sure any REG based index upon it will be so
2648	     as well.  */
2649	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2650	    offset = XEXP (addr, 1);
2651	  else
2652	    offset = const0_rtx;
2653	}
2654    }
2655  else if (GET_CODE (addr) == REG)
2656    {
2657      base = addr;
2658      offset = const0_rtx;
2659    }
2660
2661  if (base != NULL_RTX)
2662    {
2663      int regno = REGNO (base);
2664
2665      if (regno != FRAME_POINTER_REGNUM
2666	  && regno != STACK_POINTER_REGNUM)
2667	{
2668	  /* Check if the compiler has recorded some information
2669	     about the alignment of the base REG.  If reload has
2670	     completed, we already matched with proper alignments.  */
2671	  if (((regno_pointer_align != NULL
2672		&& REGNO_POINTER_ALIGN (regno) >= desired)
2673	       || reload_completed)
2674	      && ((INTVAL (offset) & (desired - 1)) == 0))
2675	    return 1;
2676	}
2677      else
2678	{
2679	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2680	    return 1;
2681	}
2682    }
2683  else if (! TARGET_UNALIGNED_DOUBLES
2684	   || CONSTANT_P (addr)
2685	   || GET_CODE (addr) == LO_SUM)
2686    {
2687      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2688	 is true, in which case we can only assume that an access is aligned if
2689	 it is to a constant address, or the address involves a LO_SUM.  */
2690      return 1;
2691    }
2692
2693  /* An obviously unaligned address.  */
2694  return 0;
2695}
2696
2697
2698/* Vectors to keep interesting information about registers where it can easily
2699   be got.  We use to use the actual mode value as the bit number, but there
2700   are more than 32 modes now.  Instead we use two tables: one indexed by
2701   hard register number, and one indexed by mode.  */
2702
2703/* The purpose of sparc_mode_class is to shrink the range of modes so that
2704   they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
2705   mapped into one sparc_mode_class mode.  */
2706
2707enum sparc_mode_class {
2708  S_MODE, D_MODE, T_MODE, O_MODE,
2709  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2710  CC_MODE, CCFP_MODE
2711};
2712
2713/* Modes for single-word and smaller quantities.  */
2714#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2715
2716/* Modes for double-word and smaller quantities.  */
2717#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2718
2719/* Modes for quad-word and smaller quantities.  */
2720#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2721
2722/* Modes for single-float quantities.  We must allow any single word or
2723   smaller quantity.  This is because the fix/float conversion instructions
2724   take integer inputs/outputs from the float registers.  */
2725#define SF_MODES (S_MODES)
2726
2727/* Modes for double-float and smaller quantities.  */
2728#define DF_MODES (S_MODES | D_MODES)
2729
2730#define DF_MODES64 DF_MODES
2731
2732/* Modes for double-float only quantities.  */
2733#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
2734
2735/* Modes for double-float and larger quantities.  */
2736#define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2737
2738/* Modes for quad-float only quantities.  */
2739#define TF_ONLY_MODES (1 << (int) TF_MODE)
2740
2741/* Modes for quad-float and smaller quantities.  */
2742#define TF_MODES (DF_MODES | TF_ONLY_MODES)
2743
2744#define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2745
2746/* Modes for condition codes.  */
2747#define CC_MODES (1 << (int) CC_MODE)
2748#define CCFP_MODES (1 << (int) CCFP_MODE)
2749
2750/* Value is 1 if register/mode pair is acceptable on sparc.
2751   The funny mixture of D and T modes is because integer operations
2752   do not specially operate on tetra quantities, so non-quad-aligned
2753   registers can hold quadword quantities (except %o4 and %i4 because
2754   they cross fixed registers).  */
2755
2756/* This points to either the 32 bit or the 64 bit version.  */
2757int *hard_regno_mode_classes;
2758
2759static int hard_32bit_mode_classes[] = {
2760  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2761  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2762  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2763  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2764
2765  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2766  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2767  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2768  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2769
2770  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
2771     and none can hold SFmode/SImode values.  */
2772  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2773  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2774  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2775  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2776
2777  /* %fcc[0123] */
2778  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2779
2780  /* %icc */
2781  CC_MODES
2782};
2783
2784static int hard_64bit_mode_classes[] = {
2785  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2786  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2787  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2788  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2789
2790  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2791  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2792  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2793  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2794
2795  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
2796     and none can hold SFmode/SImode values.  */
2797  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2798  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2799  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2800  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2801
2802  /* %fcc[0123] */
2803  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2804
2805  /* %icc */
2806  CC_MODES
2807};
2808
2809int sparc_mode_class [NUM_MACHINE_MODES];
2810
2811enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2812
2813static void
2814sparc_init_modes ()
2815{
2816  int i;
2817
2818  for (i = 0; i < NUM_MACHINE_MODES; i++)
2819    {
2820      switch (GET_MODE_CLASS (i))
2821	{
2822	case MODE_INT:
2823	case MODE_PARTIAL_INT:
2824	case MODE_COMPLEX_INT:
2825	  if (GET_MODE_SIZE (i) <= 4)
2826	    sparc_mode_class[i] = 1 << (int) S_MODE;
2827	  else if (GET_MODE_SIZE (i) == 8)
2828	    sparc_mode_class[i] = 1 << (int) D_MODE;
2829	  else if (GET_MODE_SIZE (i) == 16)
2830	    sparc_mode_class[i] = 1 << (int) T_MODE;
2831	  else if (GET_MODE_SIZE (i) == 32)
2832	    sparc_mode_class[i] = 1 << (int) O_MODE;
2833	  else
2834	    sparc_mode_class[i] = 0;
2835	  break;
2836	case MODE_FLOAT:
2837	case MODE_COMPLEX_FLOAT:
2838	  if (GET_MODE_SIZE (i) <= 4)
2839	    sparc_mode_class[i] = 1 << (int) SF_MODE;
2840	  else if (GET_MODE_SIZE (i) == 8)
2841	    sparc_mode_class[i] = 1 << (int) DF_MODE;
2842	  else if (GET_MODE_SIZE (i) == 16)
2843	    sparc_mode_class[i] = 1 << (int) TF_MODE;
2844	  else if (GET_MODE_SIZE (i) == 32)
2845	    sparc_mode_class[i] = 1 << (int) OF_MODE;
2846	  else
2847	    sparc_mode_class[i] = 0;
2848	  break;
2849	case MODE_CC:
2850	default:
2851	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2852	     we must explicitly check for them here.  */
2853	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
2854	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2855	  else if (i == (int) CCmode || i == (int) CC_NOOVmode
2856		   || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2857	    sparc_mode_class[i] = 1 << (int) CC_MODE;
2858	  else
2859	    sparc_mode_class[i] = 0;
2860	  break;
2861	}
2862    }
2863
2864  if (TARGET_ARCH64)
2865    hard_regno_mode_classes = hard_64bit_mode_classes;
2866  else
2867    hard_regno_mode_classes = hard_32bit_mode_classes;
2868
2869  /* Initialize the array used by REGNO_REG_CLASS.  */
2870  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2871    {
2872      if (i < 16 && TARGET_V8PLUS)
2873	sparc_regno_reg_class[i] = I64_REGS;
2874      else if (i < 32)
2875	sparc_regno_reg_class[i] = GENERAL_REGS;
2876      else if (i < 64)
2877	sparc_regno_reg_class[i] = FP_REGS;
2878      else if (i < 96)
2879	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2880      else if (i < 100)
2881	sparc_regno_reg_class[i] = FPCC_REGS;
2882      else
2883	sparc_regno_reg_class[i] = NO_REGS;
2884    }
2885}
2886
2887/* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2888   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2889   v9 int regs as it simplifies the code.  */
2890
2891static int
2892save_regs (file, low, high, base, offset, n_regs, real_offset)
2893     FILE *file;
2894     int low, high;
2895     const char *base;
2896     int offset;
2897     int n_regs;
2898     int real_offset;
2899{
2900  int i;
2901
2902  if (TARGET_ARCH64 && high <= 32)
2903    {
2904      for (i = low; i < high; i++)
2905	{
2906	  if (regs_ever_live[i] && ! call_used_regs[i])
2907	    {
2908	      fprintf (file, "\tstx\t%s, [%s+%d]\n",
2909		       reg_names[i], base, offset + 4 * n_regs);
2910	      if (dwarf2out_do_frame ())
2911		dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2912	      n_regs += 2;
2913	    }
2914	}
2915    }
2916  else
2917    {
2918      for (i = low; i < high; i += 2)
2919	{
2920	  if (regs_ever_live[i] && ! call_used_regs[i])
2921	    {
2922	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2923		{
2924		  fprintf (file, "\tstd\t%s, [%s+%d]\n",
2925			   reg_names[i], base, offset + 4 * n_regs);
2926		  if (dwarf2out_do_frame ())
2927		    {
2928		      char *l = dwarf2out_cfi_label ();
2929		      dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
2930		      dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
2931		    }
2932		  n_regs += 2;
2933		}
2934	      else
2935		{
2936		  fprintf (file, "\tst\t%s, [%s+%d]\n",
2937			   reg_names[i], base, offset + 4 * n_regs);
2938		  if (dwarf2out_do_frame ())
2939		    dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2940		  n_regs += 2;
2941		}
2942	    }
2943	  else
2944	    {
2945	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2946		{
2947		  fprintf (file, "\tst\t%s, [%s+%d]\n",
2948			   reg_names[i+1], base, offset + 4 * n_regs + 4);
2949		  if (dwarf2out_do_frame ())
2950		    dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
2951		  n_regs += 2;
2952		}
2953	    }
2954	}
2955    }
2956  return n_regs;
2957}
2958
2959/* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2960
2961   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2962   v9 int regs as it simplifies the code.  */
2963
2964static int
2965restore_regs (file, low, high, base, offset, n_regs)
2966     FILE *file;
2967     int low, high;
2968     const char *base;
2969     int offset;
2970     int n_regs;
2971{
2972  int i;
2973
2974  if (TARGET_ARCH64 && high <= 32)
2975    {
2976      for (i = low; i < high; i++)
2977	{
2978	  if (regs_ever_live[i] && ! call_used_regs[i])
2979	    fprintf (file, "\tldx\t[%s+%d], %s\n",
2980	      base, offset + 4 * n_regs, reg_names[i]),
2981	    n_regs += 2;
2982	}
2983    }
2984  else
2985    {
2986      for (i = low; i < high; i += 2)
2987	{
2988	  if (regs_ever_live[i] && ! call_used_regs[i])
2989	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2990	      fprintf (file, "\tldd\t[%s+%d], %s\n",
2991		       base, offset + 4 * n_regs, reg_names[i]),
2992	      n_regs += 2;
2993	    else
2994	      fprintf (file, "\tld\t[%s+%d],%s\n",
2995		       base, offset + 4 * n_regs, reg_names[i]),
2996	      n_regs += 2;
2997	  else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2998	    fprintf (file, "\tld\t[%s+%d],%s\n",
2999		     base, offset + 4 * n_regs + 4, reg_names[i+1]),
3000	    n_regs += 2;
3001	}
3002    }
3003  return n_regs;
3004}
3005
3006/* Static variables we want to share between prologue and epilogue.  */
3007
3008/* Number of live general or floating point registers needed to be saved
3009   (as 4-byte quantities).  This is only done if TARGET_EPILOGUE.  */
3010static int num_gfregs;
3011
3012/* Compute the frame size required by the function.  This function is called
3013   during the reload pass and also by output_function_prologue().  */
3014
3015int
3016compute_frame_size (size, leaf_function)
3017     int size;
3018     int leaf_function;
3019{
3020  int n_regs = 0, i;
3021  int outgoing_args_size = (current_function_outgoing_args_size
3022			    + REG_PARM_STACK_SPACE (current_function_decl));
3023
3024  if (TARGET_EPILOGUE)
3025    {
3026      /* N_REGS is the number of 4-byte regs saved thus far.  This applies
3027	 even to v9 int regs to be consistent with save_regs/restore_regs.  */
3028
3029      if (TARGET_ARCH64)
3030	{
3031	  for (i = 0; i < 8; i++)
3032	    if (regs_ever_live[i] && ! call_used_regs[i])
3033	      n_regs += 2;
3034	}
3035      else
3036	{
3037	  for (i = 0; i < 8; i += 2)
3038	    if ((regs_ever_live[i] && ! call_used_regs[i])
3039		|| (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3040	      n_regs += 2;
3041	}
3042
3043      for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3044	if ((regs_ever_live[i] && ! call_used_regs[i])
3045	    || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3046	  n_regs += 2;
3047    }
3048
3049  /* Set up values for use in `function_epilogue'.  */
3050  num_gfregs = n_regs;
3051
3052  if (leaf_function && n_regs == 0
3053      && size == 0 && current_function_outgoing_args_size == 0)
3054    {
3055      actual_fsize = apparent_fsize = 0;
3056    }
3057  else
3058    {
3059      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3060         The stack bias (if any) is taken out to undo its effects.  */
3061      apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3062      apparent_fsize += n_regs * 4;
3063      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3064    }
3065
3066  /* Make sure nothing can clobber our register windows.
3067     If a SAVE must be done, or there is a stack-local variable,
3068     the register window area must be allocated.
3069     ??? For v8 we apparently need an additional 8 bytes of reserved space.  */
3070  if (leaf_function == 0 || size > 0)
3071    actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3072
3073  return SPARC_STACK_ALIGN (actual_fsize);
3074}
3075
3076/* Build a (32 bit) big number in a register.  */
3077/* ??? We may be able to use the set macro here too.  */
3078
3079static void
3080build_big_number (file, num, reg)
3081     FILE *file;
3082     int num;
3083     const char *reg;
3084{
3085  if (num >= 0 || ! TARGET_ARCH64)
3086    {
3087      fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3088      if ((num & 0x3ff) != 0)
3089	fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3090    }
3091  else /* num < 0 && TARGET_ARCH64 */
3092    {
3093      /* Sethi does not sign extend, so we must use a little trickery
3094	 to use it for negative numbers.  Invert the constant before
3095	 loading it in, then use xor immediate to invert the loaded bits
3096	 (along with the upper 32 bits) to the desired constant.  This
3097	 works because the sethi and immediate fields overlap.  */
3098      int asize = num;
3099      int inv = ~asize;
3100      int low = -0x400 + (asize & 0x3FF);
3101
3102      fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3103	       inv, reg, reg, low, reg);
3104    }
3105}
3106
3107/* Output code for the function prologue.  */
3108
3109void
3110output_function_prologue (file, size, leaf_function)
3111     FILE *file;
3112     int size;
3113     int leaf_function;
3114{
3115  /* Need to use actual_fsize, since we are also allocating
3116     space for our callee (and our own register save area).  */
3117  actual_fsize = compute_frame_size (size, leaf_function);
3118
3119  if (leaf_function)
3120    {
3121      frame_base_name = "%sp";
3122      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3123    }
3124  else
3125    {
3126      frame_base_name = "%fp";
3127      frame_base_offset = SPARC_STACK_BIAS;
3128    }
3129
3130  /* This is only for the human reader.  */
3131  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3132
3133  if (actual_fsize == 0)
3134    /* do nothing.  */ ;
3135  else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3136    {
3137      if (actual_fsize <= 4096)
3138	fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3139      else if (actual_fsize <= 8192)
3140	{
3141	  fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3142	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3143	}
3144      else
3145	{
3146	  build_big_number (file, -actual_fsize, "%g1");
3147	  fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3148	}
3149    }
3150  else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3151    {
3152      /* We assume the environment will properly handle or otherwise avoid
3153	 trouble associated with an interrupt occurring after the `save' or
3154	 trap occurring during it.  */
3155      fprintf (file, "\tsave\n");
3156
3157      if (actual_fsize <= 4096)
3158	fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3159      else if (actual_fsize <= 8192)
3160	{
3161	  fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3162	  fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3163	}
3164      else
3165	{
3166	  build_big_number (file, -actual_fsize, "%g1");
3167	  fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3168	}
3169    }
3170  else /* leaf function */
3171    {
3172      if (actual_fsize <= 4096)
3173	fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3174      else if (actual_fsize <= 8192)
3175	{
3176	  fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3177	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3178	}
3179      else
3180	{
3181	  build_big_number (file, -actual_fsize, "%g1");
3182	  fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3183	}
3184    }
3185
3186  if (dwarf2out_do_frame () && actual_fsize)
3187    {
3188      char *label = dwarf2out_cfi_label ();
3189
3190      /* The canonical frame address refers to the top of the frame.  */
3191      dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3192				 : FRAME_POINTER_REGNUM),
3193			 frame_base_offset);
3194
3195      if (! leaf_function)
3196	{
3197	  /* Note the register window save.  This tells the unwinder that
3198	     it needs to restore the window registers from the previous
3199	     frame's window save area at 0(cfa).  */
3200	  dwarf2out_window_save (label);
3201
3202	  /* The return address (-8) is now in %i7.  */
3203	  dwarf2out_return_reg (label, 31);
3204	}
3205    }
3206
3207  /* If doing anything with PIC, do it now.  */
3208  if (! flag_pic)
3209    fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3210
3211  /* Call saved registers are saved just above the outgoing argument area.  */
3212  if (num_gfregs)
3213    {
3214      int offset, real_offset, n_regs;
3215      const char *base;
3216
3217      real_offset = -apparent_fsize;
3218      offset = -apparent_fsize + frame_base_offset;
3219      if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3220	{
3221	  /* ??? This might be optimized a little as %g1 might already have a
3222	     value close enough that a single add insn will do.  */
3223	  /* ??? Although, all of this is probably only a temporary fix
3224	     because if %g1 can hold a function result, then
3225	     output_function_epilogue will lose (the result will get
3226	     clobbered).  */
3227	  build_big_number (file, offset, "%g1");
3228	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3229	  base = "%g1";
3230	  offset = 0;
3231	}
3232      else
3233	{
3234	  base = frame_base_name;
3235	}
3236
3237      n_regs = 0;
3238      if (TARGET_EPILOGUE && ! leaf_function)
3239	/* ??? Originally saved regs 0-15 here.  */
3240	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3241      else if (leaf_function)
3242	/* ??? Originally saved regs 0-31 here.  */
3243	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3244      if (TARGET_EPILOGUE)
3245	save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3246		   real_offset);
3247    }
3248
3249  leaf_label = 0;
3250  if (leaf_function && actual_fsize != 0)
3251    {
3252      /* warning ("leaf procedure with frame size %d", actual_fsize); */
3253      if (! TARGET_EPILOGUE)
3254	leaf_label = gen_label_rtx ();
3255    }
3256}
3257
3258/* Output code for the function epilogue.  */
3259
3260void
3261output_function_epilogue (file, size, leaf_function)
3262     FILE *file;
3263     int size ATTRIBUTE_UNUSED;
3264     int leaf_function;
3265{
3266  const char *ret;
3267
3268  if (leaf_label)
3269    {
3270      emit_label_after (leaf_label, get_last_insn ());
3271      final_scan_insn (get_last_insn (), file, 0, 0, 1);
3272    }
3273
3274#ifdef FUNCTION_BLOCK_PROFILER_EXIT
3275  else if (profile_block_flag == 2)
3276    {
3277      FUNCTION_BLOCK_PROFILER_EXIT(file);
3278    }
3279#endif
3280
3281  else if (current_function_epilogue_delay_list == 0)
3282    {
3283      /* If code does not drop into the epilogue, we need
3284	 do nothing except output pending case vectors.  */
3285      rtx insn = get_last_insn ();
3286      if (GET_CODE (insn) == NOTE)
3287      insn = prev_nonnote_insn (insn);
3288      if (insn && GET_CODE (insn) == BARRIER)
3289      goto output_vectors;
3290    }
3291
3292  /* Restore any call saved registers.  */
3293  if (num_gfregs)
3294    {
3295      int offset, n_regs;
3296      const char *base;
3297
3298      offset = -apparent_fsize + frame_base_offset;
3299      if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3300	{
3301	  build_big_number (file, offset, "%g1");
3302	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3303	  base = "%g1";
3304	  offset = 0;
3305	}
3306      else
3307	{
3308	  base = frame_base_name;
3309	}
3310
3311      n_regs = 0;
3312      if (TARGET_EPILOGUE && ! leaf_function)
3313	/* ??? Originally saved regs 0-15 here.  */
3314	n_regs = restore_regs (file, 0, 8, base, offset, 0);
3315      else if (leaf_function)
3316	/* ??? Originally saved regs 0-31 here.  */
3317	n_regs = restore_regs (file, 0, 8, base, offset, 0);
3318      if (TARGET_EPILOGUE)
3319	restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3320    }
3321
3322  /* Work out how to skip the caller's unimp instruction if required.  */
3323  if (leaf_function)
3324    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3325  else
3326    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3327
3328  if (TARGET_EPILOGUE || leaf_label)
3329    {
3330      int old_target_epilogue = TARGET_EPILOGUE;
3331      target_flags &= ~old_target_epilogue;
3332
3333      if (! leaf_function)
3334	{
3335	  /* If we wound up with things in our delay slot, flush them here.  */
3336	  if (current_function_epilogue_delay_list)
3337	    {
3338	      rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3339					       get_last_insn ());
3340	      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3341					gen_rtvec (2,
3342						   PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3343						   PATTERN (insn)));
3344	      final_scan_insn (insn, file, 1, 0, 1);
3345	    }
3346	  else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3347	    fputs ("\treturn\t%i7+8\n\tnop\n", file);
3348	  else
3349	    fprintf (file, "\t%s\n\trestore\n", ret);
3350	}
3351      /* All of the following cases are for leaf functions.  */
3352      else if (current_function_epilogue_delay_list)
3353	{
3354	  /* eligible_for_epilogue_delay_slot ensures that if this is a
3355	     leaf function, then we will only have insn in the delay slot
3356	     if the frame size is zero, thus no adjust for the stack is
3357	     needed here.  */
3358	  if (actual_fsize != 0)
3359	    abort ();
3360	  fprintf (file, "\t%s\n", ret);
3361	  final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3362			   file, 1, 0, 1);
3363	}
3364      /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3365	 avoid generating confusing assembly language output.  */
3366      else if (actual_fsize == 0)
3367	fprintf (file, "\t%s\n\tnop\n", ret);
3368      else if (actual_fsize <= 4096)
3369	fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3370      else if (actual_fsize <= 8192)
3371	fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3372		 ret, actual_fsize - 4096);
3373      else if ((actual_fsize & 0x3ff) == 0)
3374	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3375		 actual_fsize, ret);
3376      else
3377	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3378		 actual_fsize, actual_fsize, ret);
3379      target_flags |= old_target_epilogue;
3380    }
3381
3382 output_vectors:
3383  sparc_output_deferred_case_vectors ();
3384}
3385
3386/* Functions for handling argument passing.
3387
3388   For v8 the first six args are normally in registers and the rest are
3389   pushed.  Any arg that starts within the first 6 words is at least
3390   partially passed in a register unless its data type forbids.
3391
3392   For v9, the argument registers are laid out as an array of 16 elements
3393   and arguments are added sequentially.  The first 6 int args and up to the
3394   first 16 fp args (depending on size) are passed in regs.
3395
3396   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
3397   ----    -----   --------   -----   ------------------   ------   -----------
3398    15   [SP+248]              %f31       %f30,%f31         %d30
3399    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
3400    13   [SP+232]              %f27       %f26,%f27         %d26
3401    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
3402    11   [SP+216]              %f23       %f22,%f23         %d22
3403    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
3404     9   [SP+200]              %f19       %f18,%f19         %d18
3405     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
3406     7   [SP+184]              %f15       %f14,%f15         %d14
3407     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
3408     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
3409     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
3410     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
3411     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
3412     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
3413     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
3414
3415   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3416
3417   Integral arguments are always passed as 64 bit quantities appropriately
3418   extended.
3419
3420   Passing of floating point values is handled as follows.
3421   If a prototype is in scope:
3422     If the value is in a named argument (i.e. not a stdarg function or a
3423     value not part of the `...') then the value is passed in the appropriate
3424     fp reg.
3425     If the value is part of the `...' and is passed in one of the first 6
3426     slots then the value is passed in the appropriate int reg.
3427     If the value is part of the `...' and is not passed in one of the first 6
3428     slots then the value is passed in memory.
3429   If a prototype is not in scope:
3430     If the value is one of the first 6 arguments the value is passed in the
3431     appropriate integer reg and the appropriate fp reg.
3432     If the value is not one of the first 6 arguments the value is passed in
3433     the appropriate fp reg and in memory.
3434   */
3435
3436/* Maximum number of int regs for args.  */
3437#define SPARC_INT_ARG_MAX 6
3438/* Maximum number of fp regs for args.  */
3439#define SPARC_FP_ARG_MAX 16
3440
3441#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3442
3443/* Handle the INIT_CUMULATIVE_ARGS macro.
3444   Initialize a variable CUM of type CUMULATIVE_ARGS
3445   for a call to a function whose data type is FNTYPE.
3446   For a library call, FNTYPE is 0.  */
3447
3448void
3449init_cumulative_args (cum, fntype, libname, indirect)
3450     CUMULATIVE_ARGS *cum;
3451     tree fntype;
3452     tree libname ATTRIBUTE_UNUSED;
3453     int indirect ATTRIBUTE_UNUSED;
3454{
3455  cum->words = 0;
3456  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3457  cum->libcall_p = fntype == 0;
3458}
3459
3460/* Compute the slot number to pass an argument in.
3461   Returns the slot number or -1 if passing on the stack.
3462
3463   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3464    the preceding args and about the function being called.
3465   MODE is the argument's machine mode.
3466   TYPE is the data type of the argument (as a tree).
3467    This is null for libcalls where that information may
3468    not be available.
3469   NAMED is nonzero if this argument is a named parameter
3470    (otherwise it is an extra parameter matching an ellipsis).
3471   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3472   *PREGNO records the register number to use if scalar type.
3473   *PPADDING records the amount of padding needed in words.  */
3474
3475static int
3476function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3477     const CUMULATIVE_ARGS *cum;
3478     enum machine_mode mode;
3479     tree type;
3480     int named;
3481     int incoming_p;
3482     int *pregno;
3483     int *ppadding;
3484{
3485  int regbase = (incoming_p
3486		 ? SPARC_INCOMING_INT_ARG_FIRST
3487		 : SPARC_OUTGOING_INT_ARG_FIRST);
3488  int slotno = cum->words;
3489  int regno;
3490
3491  *ppadding = 0;
3492
3493  if (type != 0 && TREE_ADDRESSABLE (type))
3494    return -1;
3495  if (TARGET_ARCH32
3496      && type != 0 && mode == BLKmode
3497      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3498    return -1;
3499
3500  switch (mode)
3501    {
3502    case VOIDmode :
3503      /* MODE is VOIDmode when generating the actual call.
3504	 See emit_call_1.  */
3505      return -1;
3506
3507    case QImode : case CQImode :
3508    case HImode : case CHImode :
3509    case SImode : case CSImode :
3510    case DImode : case CDImode :
3511      if (slotno >= SPARC_INT_ARG_MAX)
3512	return -1;
3513      regno = regbase + slotno;
3514      break;
3515
3516    case SFmode : case SCmode :
3517    case DFmode : case DCmode :
3518    case TFmode : case TCmode :
3519      if (TARGET_ARCH32)
3520	{
3521	  if (slotno >= SPARC_INT_ARG_MAX)
3522	    return -1;
3523	  regno = regbase + slotno;
3524	}
3525      else
3526	{
3527	  if ((mode == TFmode || mode == TCmode)
3528	      && (slotno & 1) != 0)
3529	    slotno++, *ppadding = 1;
3530	  if (TARGET_FPU && named)
3531	    {
3532	      if (slotno >= SPARC_FP_ARG_MAX)
3533		return -1;
3534	      regno = SPARC_FP_ARG_FIRST + slotno * 2;
3535	      if (mode == SFmode)
3536		regno++;
3537	    }
3538	  else
3539	    {
3540	      if (slotno >= SPARC_INT_ARG_MAX)
3541		return -1;
3542	      regno = regbase + slotno;
3543	    }
3544	}
3545      break;
3546
3547    case BLKmode :
3548      /* For sparc64, objects requiring 16 byte alignment get it.  */
3549      if (TARGET_ARCH64)
3550	{
3551	  if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3552	    slotno++, *ppadding = 1;
3553	}
3554
3555      if (TARGET_ARCH32
3556	  || (type && TREE_CODE (type) == UNION_TYPE))
3557	{
3558	  if (slotno >= SPARC_INT_ARG_MAX)
3559	    return -1;
3560	  regno = regbase + slotno;
3561	}
3562      else
3563	{
3564	  tree field;
3565	  int intregs_p = 0, fpregs_p = 0;
3566	  /* The ABI obviously doesn't specify how packed
3567	     structures are passed.  These are defined to be passed
3568	     in int regs if possible, otherwise memory.  */
3569	  int packed_p = 0;
3570
3571	  /* First see what kinds of registers we need.  */
3572	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3573	    {
3574	      if (TREE_CODE (field) == FIELD_DECL)
3575		{
3576		  if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3577		      && TARGET_FPU)
3578		    fpregs_p = 1;
3579		  else
3580		    intregs_p = 1;
3581		  if (DECL_PACKED (field))
3582		    packed_p = 1;
3583		}
3584	    }
3585	  if (packed_p || !named)
3586	    fpregs_p = 0, intregs_p = 1;
3587
3588	  /* If all arg slots are filled, then must pass on stack.  */
3589	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3590	    return -1;
3591	  /* If there are only int args and all int arg slots are filled,
3592	     then must pass on stack.  */
3593	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3594	    return -1;
3595	  /* Note that even if all int arg slots are filled, fp members may
3596	     still be passed in regs if such regs are available.
3597	     *PREGNO isn't set because there may be more than one, it's up
3598	     to the caller to compute them.  */
3599	  return slotno;
3600	}
3601      break;
3602
3603    default :
3604      abort ();
3605    }
3606
3607  *pregno = regno;
3608  return slotno;
3609}
3610
3611/* Handle recursive register counting for structure field layout.  */
3612
3613struct function_arg_record_value_parms
3614{
3615  rtx ret;
3616  int slotno, named, regbase;
3617  int nregs, intoffset;
3618};
3619
3620static void function_arg_record_value_3
3621	PROTO((int, struct function_arg_record_value_parms *));
3622static void function_arg_record_value_2
3623	PROTO((tree, int, struct function_arg_record_value_parms *));
3624static rtx function_arg_record_value
3625	PROTO((tree, enum machine_mode, int, int, int));
3626
3627static void
3628function_arg_record_value_1 (type, startbitpos, parms)
3629     tree type;
3630     int startbitpos;
3631     struct function_arg_record_value_parms *parms;
3632{
3633  tree field;
3634
3635  /* The ABI obviously doesn't specify how packed structures are
3636     passed.  These are defined to be passed in int regs if possible,
3637     otherwise memory.  */
3638  int packed_p = 0;
3639
3640  /* We need to compute how many registers are needed so we can
3641     allocate the PARALLEL but before we can do that we need to know
3642     whether there are any packed fields.  If there are, int regs are
3643     used regardless of whether there are fp values present.  */
3644  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3645    {
3646      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3647	{
3648	  packed_p = 1;
3649	  break;
3650	}
3651    }
3652
3653  /* Compute how many registers we need.  */
3654  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3655    {
3656      if (TREE_CODE (field) == FIELD_DECL)
3657	{
3658	  int bitpos = startbitpos;
3659	  if (DECL_FIELD_BITPOS (field))
3660	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3661	  /* ??? FIXME: else assume zero offset.  */
3662
3663	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3664	    {
3665	      function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3666	    }
3667	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3668	           && TARGET_FPU
3669	           && ! packed_p
3670	           && parms->named)
3671	    {
3672	      if (parms->intoffset != -1)
3673		{
3674		  int intslots, this_slotno;
3675
3676		  intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3677		    / BITS_PER_WORD;
3678		  this_slotno = parms->slotno + parms->intoffset
3679		    / BITS_PER_WORD;
3680
3681		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3682		  intslots = MAX (intslots, 0);
3683		  parms->nregs += intslots;
3684		  parms->intoffset = -1;
3685		}
3686
3687	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3688		 If it wasn't true we wouldn't be here.  */
3689	      parms->nregs += 1;
3690	    }
3691	  else
3692	    {
3693	      if (parms->intoffset == -1)
3694		parms->intoffset = bitpos;
3695	    }
3696	}
3697    }
3698}
3699
3700/* Handle recursive structure field register assignment.  */
3701
3702static void
3703function_arg_record_value_3 (bitpos, parms)
3704     int bitpos;
3705     struct function_arg_record_value_parms *parms;
3706{
3707  enum machine_mode mode;
3708  int regno, this_slotno, intslots, intoffset;
3709  rtx reg;
3710
3711  if (parms->intoffset == -1)
3712    return;
3713  intoffset = parms->intoffset;
3714  parms->intoffset = -1;
3715
3716  intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3717  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3718
3719  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3720  if (intslots <= 0)
3721    return;
3722
3723  /* If this is the trailing part of a word, only load that much into
3724     the register.  Otherwise load the whole register.  Note that in
3725     the latter case we may pick up unwanted bits.  It's not a problem
3726     at the moment but may wish to revisit.  */
3727
3728  if (intoffset % BITS_PER_WORD != 0)
3729    {
3730      mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3731			    MODE_INT, 0);
3732    }
3733  else
3734    mode = word_mode;
3735
3736  intoffset /= BITS_PER_UNIT;
3737  do
3738    {
3739      regno = parms->regbase + this_slotno;
3740      reg = gen_rtx_REG (mode, regno);
3741      XVECEXP (parms->ret, 0, parms->nregs)
3742	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3743
3744      this_slotno += 1;
3745      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3746      parms->nregs += 1;
3747      intslots -= 1;
3748    }
3749  while (intslots > 0);
3750}
3751
3752static void
3753function_arg_record_value_2 (type, startbitpos, parms)
3754     tree type;
3755     int startbitpos;
3756     struct function_arg_record_value_parms *parms;
3757{
3758  tree field;
3759  int packed_p = 0;
3760
3761  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3762    {
3763      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3764	{
3765	  packed_p = 1;
3766	  break;
3767	}
3768    }
3769
3770  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3771    {
3772      if (TREE_CODE (field) == FIELD_DECL)
3773	{
3774	  int bitpos = startbitpos;
3775	  if (DECL_FIELD_BITPOS (field))
3776	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3777	  /* ??? FIXME: else assume zero offset.  */
3778
3779	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3780	    {
3781	      function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3782	    }
3783	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3784	           && TARGET_FPU
3785	           && ! packed_p
3786	           && parms->named)
3787	    {
3788	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3789	      rtx reg;
3790
3791	      function_arg_record_value_3 (bitpos, parms);
3792
3793	      reg = gen_rtx_REG (DECL_MODE (field),
3794			         (SPARC_FP_ARG_FIRST + this_slotno * 2
3795			          + (DECL_MODE (field) == SFmode
3796				     && (bitpos & 32) != 0)));
3797	      XVECEXP (parms->ret, 0, parms->nregs)
3798		= gen_rtx_EXPR_LIST (VOIDmode, reg,
3799			   GEN_INT (bitpos / BITS_PER_UNIT));
3800	      parms->nregs += 1;
3801	    }
3802	  else
3803	    {
3804	      if (parms->intoffset == -1)
3805		parms->intoffset = bitpos;
3806	    }
3807	}
3808    }
3809}
3810
3811static rtx
3812function_arg_record_value (type, mode, slotno, named, regbase)
3813     tree type;
3814     enum machine_mode mode;
3815     int slotno, named, regbase;
3816{
3817  HOST_WIDE_INT typesize = int_size_in_bytes (type);
3818  struct function_arg_record_value_parms parms;
3819  int nregs;
3820
3821  parms.ret = NULL_RTX;
3822  parms.slotno = slotno;
3823  parms.named = named;
3824  parms.regbase = regbase;
3825
3826  /* Compute how many registers we need.  */
3827  parms.nregs = 0;
3828  parms.intoffset = 0;
3829  function_arg_record_value_1 (type, 0, &parms);
3830
3831  if (parms.intoffset != -1)
3832    {
3833      int intslots, this_slotno;
3834
3835      intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3836	/ BITS_PER_WORD;
3837      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3838
3839      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3840      intslots = MAX (intslots, 0);
3841
3842      parms.nregs += intslots;
3843    }
3844  nregs = parms.nregs;
3845
3846  /* Allocate the vector and handle some annoying special cases.  */
3847  if (nregs == 0)
3848    {
3849      /* ??? Empty structure has no value?  Duh?  */
3850      if (typesize <= 0)
3851	{
3852	  /* Though there's nothing really to store, return a word register
3853	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
3854	     leads to breakage due to the fact that there are zero bytes to
3855	     load.  */
3856	  return gen_rtx_REG (mode, regbase);
3857	}
3858      else
3859	{
3860	  /* ??? C++ has structures with no fields, and yet a size.  Give up
3861	     for now and pass everything back in integer registers.  */
3862	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3863	}
3864      if (nregs + slotno > SPARC_INT_ARG_MAX)
3865	nregs = SPARC_INT_ARG_MAX - slotno;
3866    }
3867  if (nregs == 0)
3868    abort ();
3869
3870  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3871
3872  /* Fill in the entries.  */
3873  parms.nregs = 0;
3874  parms.intoffset = 0;
3875  function_arg_record_value_2 (type, 0, &parms);
3876  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
3877
3878  if (parms.nregs != nregs)
3879    abort ();
3880
3881  return parms.ret;
3882}
3883
3884/* Handle the FUNCTION_ARG macro.
3885   Determine where to put an argument to a function.
3886   Value is zero to push the argument on the stack,
3887   or a hard register in which to store the argument.
3888
3889   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3890    the preceding args and about the function being called.
3891   MODE is the argument's machine mode.
3892   TYPE is the data type of the argument (as a tree).
3893    This is null for libcalls where that information may
3894    not be available.
3895   NAMED is nonzero if this argument is a named parameter
3896    (otherwise it is an extra parameter matching an ellipsis).
3897   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
3898
3899rtx
3900function_arg (cum, mode, type, named, incoming_p)
3901     const CUMULATIVE_ARGS *cum;
3902     enum machine_mode mode;
3903     tree type;
3904     int named;
3905     int incoming_p;
3906{
3907  int regbase = (incoming_p
3908		 ? SPARC_INCOMING_INT_ARG_FIRST
3909		 : SPARC_OUTGOING_INT_ARG_FIRST);
3910  int slotno, regno, padding;
3911  rtx reg;
3912
3913  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
3914				&regno, &padding);
3915
3916  if (slotno == -1)
3917    return 0;
3918
3919  if (TARGET_ARCH32)
3920    {
3921      reg = gen_rtx_REG (mode, regno);
3922      return reg;
3923    }
3924
3925  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
3926     but also have the slot allocated for them.
3927     If no prototype is in scope fp values in register slots get passed
3928     in two places, either fp regs and int regs or fp regs and memory.  */
3929  if ((GET_MODE_CLASS (mode) == MODE_FLOAT
3930       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3931      && SPARC_FP_REG_P (regno))
3932    {
3933      reg = gen_rtx_REG (mode, regno);
3934      if (cum->prototype_p || cum->libcall_p)
3935	{
3936	  /* "* 2" because fp reg numbers are recorded in 4 byte
3937	     quantities.  */
3938#if 0
3939	  /* ??? This will cause the value to be passed in the fp reg and
3940	     in the stack.  When a prototype exists we want to pass the
3941	     value in the reg but reserve space on the stack.  That's an
3942	     optimization, and is deferred [for a bit].  */
3943	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
3944	    return gen_rtx_PARALLEL (mode,
3945			    gen_rtvec (2,
3946				       gen_rtx_EXPR_LIST (VOIDmode,
3947						NULL_RTX, const0_rtx),
3948				       gen_rtx_EXPR_LIST (VOIDmode,
3949						reg, const0_rtx)));
3950	  else
3951#else
3952	  /* ??? It seems that passing back a register even when past
3953	     the area declared by REG_PARM_STACK_SPACE will allocate
3954	     space appropriately, and will not copy the data onto the
3955	     stack, exactly as we desire.
3956
3957	     This is due to locate_and_pad_parm being called in
3958	     expand_call whenever reg_parm_stack_space > 0, which
3959	     while benefical to our example here, would seem to be
3960	     in error from what had been intended.  Ho hum...  -- r~ */
3961#endif
3962	    return reg;
3963	}
3964      else
3965	{
3966	  rtx v0, v1;
3967
3968	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
3969	    {
3970	      int intreg;
3971
3972	      /* On incoming, we don't need to know that the value
3973		 is passed in %f0 and %i0, and it confuses other parts
3974		 causing needless spillage even on the simplest cases.  */
3975	      if (incoming_p)
3976		return reg;
3977
3978	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
3979			+ (regno - SPARC_FP_ARG_FIRST) / 2);
3980
3981	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3982	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
3983				      const0_rtx);
3984	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3985	    }
3986	  else
3987	    {
3988	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
3989	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3990	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3991	    }
3992	}
3993    }
3994  else if (type && TREE_CODE (type) == RECORD_TYPE)
3995    {
3996      /* Structures up to 16 bytes in size are passed in arg slots on the
3997	 stack and are promoted to registers where possible.  */
3998
3999      if (int_size_in_bytes (type) > 16)
4000	abort (); /* shouldn't get here */
4001
4002      return function_arg_record_value (type, mode, slotno, named, regbase);
4003    }
4004  else if (type && TREE_CODE (type) == UNION_TYPE)
4005    {
4006      enum machine_mode mode;
4007      int bytes = int_size_in_bytes (type);
4008
4009      if (bytes > 16)
4010	abort ();
4011
4012      mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4013      reg = gen_rtx_REG (mode, regno);
4014    }
4015  else
4016    {
4017      /* Scalar or complex int.  */
4018      reg = gen_rtx_REG (mode, regno);
4019    }
4020
4021  return reg;
4022}
4023
4024/* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4025   For an arg passed partly in registers and partly in memory,
4026   this is the number of registers used.
4027   For args passed entirely in registers or entirely in memory, zero.
4028
4029   Any arg that starts in the first 6 regs but won't entirely fit in them
4030   needs partial registers on v8.  On v9, structures with integer
4031   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4032   values that begin in the last fp reg [where "last fp reg" varies with the
4033   mode] will be split between that reg and memory.  */
4034
4035int
4036function_arg_partial_nregs (cum, mode, type, named)
4037     const CUMULATIVE_ARGS *cum;
4038     enum machine_mode mode;
4039     tree type;
4040     int named;
4041{
4042  int slotno, regno, padding;
4043
4044  /* We pass 0 for incoming_p here, it doesn't matter.  */
4045  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4046
4047  if (slotno == -1)
4048    return 0;
4049
4050  if (TARGET_ARCH32)
4051    {
4052      if ((slotno + (mode == BLKmode
4053		     ? ROUND_ADVANCE (int_size_in_bytes (type))
4054		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4055	  > NPARM_REGS (SImode))
4056	return NPARM_REGS (SImode) - slotno;
4057      return 0;
4058    }
4059  else
4060    {
4061      if (type && AGGREGATE_TYPE_P (type))
4062	{
4063	  int size = int_size_in_bytes (type);
4064	  int align = TYPE_ALIGN (type);
4065
4066	  if (align == 16)
4067	    slotno += slotno & 1;
4068	  if (size > 8 && size <= 16
4069	      && slotno == SPARC_INT_ARG_MAX - 1)
4070	    return 1;
4071	}
4072      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4073	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4074		   && ! TARGET_FPU))
4075	{
4076	  if (GET_MODE_ALIGNMENT (mode) == 128)
4077	    {
4078	      slotno += slotno & 1;
4079	      if (slotno == SPARC_INT_ARG_MAX - 2)
4080		return 1;
4081	    }
4082	  else
4083	    {
4084	      if (slotno == SPARC_INT_ARG_MAX - 1)
4085		return 1;
4086	    }
4087	}
4088      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4089	{
4090	  if (GET_MODE_ALIGNMENT (mode) == 128)
4091	    slotno += slotno & 1;
4092	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4093	      > SPARC_FP_ARG_MAX)
4094	    return 1;
4095	}
4096      return 0;
4097    }
4098}
4099
4100/* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4101   !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4102   quad-precision floats by invisible reference.
4103   v9: Aggregates greater than 16 bytes are passed by reference.
4104   For Pascal, also pass arrays by reference.  */
4105
4106int
4107function_arg_pass_by_reference (cum, mode, type, named)
4108     const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4109     enum machine_mode mode;
4110     tree type;
4111     int named ATTRIBUTE_UNUSED;
4112{
4113  if (TARGET_ARCH32)
4114    {
4115      return ((type && AGGREGATE_TYPE_P (type))
4116	      || mode == TFmode || mode == TCmode);
4117    }
4118  else
4119    {
4120      return ((type && TREE_CODE (type) == ARRAY_TYPE)
4121	      /* Consider complex values as aggregates, so care for TCmode. */
4122	      || GET_MODE_SIZE (mode) > 16
4123	      || (type && AGGREGATE_TYPE_P (type)
4124		  && int_size_in_bytes (type) > 16));
4125    }
4126}
4127
4128/* Handle the FUNCTION_ARG_ADVANCE macro.
4129   Update the data in CUM to advance over an argument
4130   of mode MODE and data type TYPE.
4131   TYPE is null for libcalls where that information may not be available.  */
4132
4133void
4134function_arg_advance (cum, mode, type, named)
4135     CUMULATIVE_ARGS *cum;
4136     enum machine_mode mode;
4137     tree type;
4138     int named;
4139{
4140  int slotno, regno, padding;
4141
4142  /* We pass 0 for incoming_p here, it doesn't matter.  */
4143  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4144
4145  /* If register required leading padding, add it.  */
4146  if (slotno != -1)
4147    cum->words += padding;
4148
4149  if (TARGET_ARCH32)
4150    {
4151      cum->words += (mode != BLKmode
4152		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4153		     : ROUND_ADVANCE (int_size_in_bytes (type)));
4154    }
4155  else
4156    {
4157      if (type && AGGREGATE_TYPE_P (type))
4158	{
4159	  int size = int_size_in_bytes (type);
4160
4161	  if (size <= 8)
4162	    ++cum->words;
4163	  else if (size <= 16)
4164	    cum->words += 2;
4165	  else /* passed by reference */
4166	    ++cum->words;
4167	}
4168      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4169	{
4170	  cum->words += 2;
4171	}
4172      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4173	{
4174	  cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4175	}
4176      else
4177	{
4178	  cum->words += (mode != BLKmode
4179			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4180			 : ROUND_ADVANCE (int_size_in_bytes (type)));
4181	}
4182    }
4183}
4184
4185/* Handle the FUNCTION_ARG_PADDING macro.
4186   For the 64 bit ABI structs are always stored left shifted in their
4187   argument slot.  */
4188
4189enum direction
4190function_arg_padding (mode, type)
4191     enum machine_mode mode;
4192     tree type;
4193{
4194  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4195    return upward;
4196
4197  /* This is the default definition.  */
4198  return (! BYTES_BIG_ENDIAN
4199	  ? upward
4200	  : ((mode == BLKmode
4201	      ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4202		 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4203	      : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4204	     ? downward : upward));
4205}
4206
4207/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4208   For v9, function return values are subject to the same rules as arguments,
4209   except that up to 32-bytes may be returned in registers.  */
4210
4211rtx
4212function_value (type, mode, incoming_p)
4213     tree type;
4214     enum machine_mode mode;
4215     int incoming_p;
4216{
4217  int regno;
4218  int regbase = (incoming_p
4219		 ? SPARC_OUTGOING_INT_ARG_FIRST
4220		 : SPARC_INCOMING_INT_ARG_FIRST);
4221
4222  if (TARGET_ARCH64 && type)
4223    {
4224      if (TREE_CODE (type) == RECORD_TYPE)
4225	{
4226	  /* Structures up to 32 bytes in size are passed in registers,
4227	     promoted to fp registers where possible.  */
4228
4229	  if (int_size_in_bytes (type) > 32)
4230	    abort (); /* shouldn't get here */
4231
4232	  return function_arg_record_value (type, mode, 0, 1, regbase);
4233	}
4234      else if (TREE_CODE (type) == UNION_TYPE)
4235	{
4236	  int bytes = int_size_in_bytes (type);
4237
4238	  if (bytes > 32)
4239	    abort ();
4240
4241	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4242	}
4243    }
4244
4245  if (TARGET_ARCH64
4246      && GET_MODE_CLASS (mode) == MODE_INT
4247      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4248      && type && TREE_CODE (type) != UNION_TYPE)
4249    mode = DImode;
4250
4251  if (incoming_p)
4252    regno = BASE_RETURN_VALUE_REG (mode);
4253  else
4254    regno = BASE_OUTGOING_VALUE_REG (mode);
4255
4256  return gen_rtx_REG (mode, regno);
4257}
4258
4259/* Do what is necessary for `va_start'.  The argument is ignored.
4260
4261   We look at the current function to determine if stdarg or varargs
4262   is used and return the address of the first unnamed parameter.  */
4263
4264rtx
4265sparc_builtin_saveregs (arglist)
4266     tree arglist ATTRIBUTE_UNUSED;
4267{
4268  int first_reg = current_function_args_info.words;
4269  rtx address;
4270  int regno;
4271
4272  for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4273    emit_move_insn (gen_rtx_MEM (word_mode,
4274			     gen_rtx_PLUS (Pmode,
4275				      frame_pointer_rtx,
4276				      GEN_INT (STACK_POINTER_OFFSET
4277					       + UNITS_PER_WORD * regno))),
4278		    gen_rtx_REG (word_mode,
4279			     BASE_INCOMING_ARG_REG (word_mode) + regno));
4280
4281  address = gen_rtx_PLUS (Pmode,
4282		     frame_pointer_rtx,
4283		     GEN_INT (STACK_POINTER_OFFSET
4284			      + UNITS_PER_WORD * first_reg));
4285
4286  if (current_function_check_memory_usage
4287      && first_reg < NPARM_REGS (word_mode))
4288    emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4289		       address, ptr_mode,
4290		       GEN_INT (UNITS_PER_WORD
4291			 	* (NPARM_REGS (word_mode) - first_reg)),
4292		       TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4293		       TYPE_MODE (integer_type_node));
4294
4295  return address;
4296}
4297
4298/* Return the string to output a conditional branch to LABEL, which is
4299   the operand number of the label.  OP is the conditional expression.
4300   XEXP (OP, 0) is assumed to be a condition code register (integer or
4301   floating point) and its mode specifies what kind of comparison we made.
4302
4303   REVERSED is non-zero if we should reverse the sense of the comparison.
4304
4305   ANNUL is non-zero if we should generate an annulling branch.
4306
4307   NOOP is non-zero if we have to follow this branch by a noop.
4308
4309   INSN, if set, is the insn.  */
4310
4311char *
4312output_cbranch (op, label, reversed, annul, noop, insn)
4313     rtx op;
4314     int label;
4315     int reversed, annul, noop;
4316     rtx insn;
4317{
4318  static char string[32];
4319  enum rtx_code code = GET_CODE (op);
4320  rtx cc_reg = XEXP (op, 0);
4321  enum machine_mode mode = GET_MODE (cc_reg);
4322  static char v8_labelno[] = "%lX";
4323  static char v9_icc_labelno[] = "%%icc, %lX";
4324  static char v9_xcc_labelno[] = "%%xcc, %lX";
4325  static char v9_fcc_labelno[] = "%%fccX, %lY";
4326  char *labelno;
4327  int labeloff, spaces = 8;
4328
4329  /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4330     Because there is currently no concept of pre-delay slots, we can fix
4331     this only by always emitting a nop before a floating point branch.  */
4332
4333  if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4334    strcpy (string, "nop\n\t");
4335  else
4336    string[0] = '\0';
4337
4338  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
4339  if (reversed
4340      && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4341    code = reverse_condition (code), reversed = 0;
4342
4343  /* Start by writing the branch condition.  */
4344  switch (code)
4345    {
4346    case NE:
4347      if (mode == CCFPmode || mode == CCFPEmode)
4348	{
4349	  strcat (string, "fbne");
4350	  spaces -= 4;
4351	}
4352      else
4353	{
4354	  strcpy (string, "bne");
4355	  spaces -= 3;
4356	}
4357      break;
4358
4359    case EQ:
4360      if (mode == CCFPmode || mode == CCFPEmode)
4361	{
4362	  strcat (string, "fbe");
4363	  spaces -= 3;
4364	}
4365      else
4366	{
4367	  strcpy (string, "be");
4368	  spaces -= 2;
4369	}
4370      break;
4371
4372    case GE:
4373      if (mode == CCFPmode || mode == CCFPEmode)
4374	{
4375	  if (reversed)
4376	    strcat (string, "fbul");
4377	  else
4378	    strcat (string, "fbge");
4379	  spaces -= 4;
4380	}
4381      else if (mode == CC_NOOVmode)
4382	{
4383	  strcpy (string, "bpos");
4384	  spaces -= 4;
4385	}
4386      else
4387	{
4388	  strcpy (string, "bge");
4389	  spaces -= 3;
4390	}
4391      break;
4392
4393    case GT:
4394      if (mode == CCFPmode || mode == CCFPEmode)
4395	{
4396	  if (reversed)
4397	    {
4398	      strcat (string, "fbule");
4399	      spaces -= 5;
4400	    }
4401	  else
4402	    {
4403	      strcat (string, "fbg");
4404	      spaces -= 3;
4405	    }
4406	}
4407      else
4408	{
4409	  strcpy (string, "bg");
4410	  spaces -= 2;
4411	}
4412      break;
4413
4414    case LE:
4415      if (mode == CCFPmode || mode == CCFPEmode)
4416	{
4417	  if (reversed)
4418	    strcat (string, "fbug");
4419	  else
4420	    strcat (string, "fble");
4421	  spaces -= 4;
4422	}
4423      else
4424	{
4425	  strcpy (string, "ble");
4426	  spaces -= 3;
4427	}
4428      break;
4429
4430    case LT:
4431      if (mode == CCFPmode || mode == CCFPEmode)
4432	{
4433	  if (reversed)
4434	    {
4435	      strcat (string, "fbuge");
4436	      spaces -= 5;
4437	    }
4438	  else
4439	    {
4440	      strcat (string, "fbl");
4441	      spaces -= 3;
4442	    }
4443	}
4444      else if (mode == CC_NOOVmode)
4445	{
4446	  strcpy (string, "bneg");
4447	  spaces -= 4;
4448	}
4449      else
4450	{
4451	  strcpy (string, "bl");
4452	  spaces -= 2;
4453	}
4454      break;
4455
4456    case GEU:
4457      strcpy (string, "bgeu");
4458      spaces -= 4;
4459      break;
4460
4461    case GTU:
4462      strcpy (string, "bgu");
4463      spaces -= 3;
4464      break;
4465
4466    case LEU:
4467      strcpy (string, "bleu");
4468      spaces -= 4;
4469      break;
4470
4471    case LTU:
4472      strcpy (string, "blu");
4473      spaces -= 3;
4474      break;
4475
4476    default:
4477      abort ();
4478    }
4479
4480  /* Now add the annulling, the label, and a possible noop.  */
4481  if (annul)
4482    {
4483      strcat (string, ",a");
4484      spaces -= 2;
4485    }
4486
4487  if (! TARGET_V9)
4488    {
4489      labeloff = 2;
4490      labelno = v8_labelno;
4491    }
4492  else
4493    {
4494      rtx note;
4495
4496      if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4497	{
4498	  strcat (string,
4499		  INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4500	  spaces -= 3;
4501	}
4502
4503      labeloff = 9;
4504      if (mode == CCFPmode || mode == CCFPEmode)
4505	{
4506	  labeloff = 10;
4507	  labelno = v9_fcc_labelno;
4508	  /* Set the char indicating the number of the fcc reg to use.  */
4509	  labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4510	}
4511      else if (mode == CCXmode || mode == CCX_NOOVmode)
4512	labelno = v9_xcc_labelno;
4513      else
4514	labelno = v9_icc_labelno;
4515    }
4516  /* Set the char indicating the number of the operand containing the
4517     label_ref.  */
4518  labelno[labeloff] = label + '0';
4519  if (spaces > 0)
4520    strcat (string, "\t");
4521  else
4522    strcat (string, " ");
4523  strcat (string, labelno);
4524
4525  if (noop)
4526    strcat (string, "\n\tnop");
4527
4528  return string;
4529}
4530
4531/* Return the string to output a conditional branch to LABEL, testing
4532   register REG.  LABEL is the operand number of the label; REG is the
4533   operand number of the reg.  OP is the conditional expression.  The mode
4534   of REG says what kind of comparison we made.
4535
4536   REVERSED is non-zero if we should reverse the sense of the comparison.
4537
4538   ANNUL is non-zero if we should generate an annulling branch.
4539
4540   NOOP is non-zero if we have to follow this branch by a noop.  */
4541
4542char *
4543output_v9branch (op, reg, label, reversed, annul, noop, insn)
4544     rtx op;
4545     int reg, label;
4546     int reversed, annul, noop;
4547     rtx insn;
4548{
4549  static char string[20];
4550  enum rtx_code code = GET_CODE (op);
4551  enum machine_mode mode = GET_MODE (XEXP (op, 0));
4552  static char labelno[] = "%X, %lX";
4553  rtx note;
4554  int spaces = 8;
4555
4556  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
4557  if (reversed)
4558    code = reverse_condition (code), reversed = 0;
4559
4560  /* Only 64 bit versions of these instructions exist.  */
4561  if (mode != DImode)
4562    abort ();
4563
4564  /* Start by writing the branch condition.  */
4565
4566  switch (code)
4567    {
4568    case NE:
4569      strcpy (string, "brnz");
4570      spaces -= 4;
4571      break;
4572
4573    case EQ:
4574      strcpy (string, "brz");
4575      spaces -= 3;
4576      break;
4577
4578    case GE:
4579      strcpy (string, "brgez");
4580      spaces -= 5;
4581      break;
4582
4583    case LT:
4584      strcpy (string, "brlz");
4585      spaces -= 4;
4586      break;
4587
4588    case LE:
4589      strcpy (string, "brlez");
4590      spaces -= 5;
4591      break;
4592
4593    case GT:
4594      strcpy (string, "brgz");
4595      spaces -= 4;
4596      break;
4597
4598    default:
4599      abort ();
4600    }
4601
4602  /* Now add the annulling, reg, label, and nop.  */
4603  if (annul)
4604    {
4605      strcat (string, ",a");
4606      spaces -= 2;
4607    }
4608
4609  if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4610    {
4611      strcat (string,
4612	      INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4613      spaces -= 3;
4614    }
4615
4616  labelno[1] = reg + '0';
4617  labelno[6] = label + '0';
4618  if (spaces > 0)
4619    strcat (string, "\t");
4620  else
4621    strcat (string, " ");
4622  strcat (string, labelno);
4623
4624  if (noop)
4625    strcat (string, "\n\tnop");
4626
4627  return string;
4628}
4629
4630/* Renumber registers in delay slot.  Replace registers instead of
4631   renumbering because they may be shared.
4632
4633   This does not handle instructions other than move.  */
4634
4635static void
4636epilogue_renumber (where)
4637     rtx *where;
4638{
4639  rtx x = *where;
4640  enum rtx_code code = GET_CODE (x);
4641
4642  switch (code)
4643    {
4644    case MEM:
4645      *where = x = copy_rtx (x);
4646      epilogue_renumber (&XEXP (x, 0));
4647      return;
4648
4649    case REG:
4650      {
4651	int regno = REGNO (x);
4652	if (regno > 8 && regno < 24)
4653	  abort ();
4654	if (regno >= 24 && regno < 32)
4655	  *where = gen_rtx_REG (GET_MODE (x), regno - 16);
4656	return;
4657      }
4658    case CONST_INT:
4659    case CONST_DOUBLE:
4660    case CONST:
4661    case SYMBOL_REF:
4662    case LABEL_REF:
4663      return;
4664
4665    case IOR:
4666    case AND:
4667    case XOR:
4668    case PLUS:
4669    case MINUS:
4670      epilogue_renumber (&XEXP (x, 1));
4671    case NEG:
4672    case NOT:
4673      epilogue_renumber (&XEXP (x, 0));
4674      return;
4675
4676    default:
4677      debug_rtx (*where);
4678      abort ();
4679    }
4680}
4681
4682/* Output assembler code to return from a function.  */
4683
4684const char *
4685output_return (operands)
4686     rtx *operands;
4687{
4688  rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
4689
4690  if (leaf_label)
4691    {
4692      operands[0] = leaf_label;
4693      return "b%* %l0%(";
4694    }
4695  else if (current_function_uses_only_leaf_regs)
4696    {
4697      /* No delay slot in a leaf function.  */
4698      if (delay)
4699	abort ();
4700
4701      /* If we didn't allocate a frame pointer for the current function,
4702	 the stack pointer might have been adjusted.  Output code to
4703	 restore it now.  */
4704
4705      operands[0] = GEN_INT (actual_fsize);
4706
4707      /* Use sub of negated value in first two cases instead of add to
4708	 allow actual_fsize == 4096.  */
4709
4710      if (actual_fsize <= 4096)
4711	{
4712	  if (SKIP_CALLERS_UNIMP_P)
4713	    return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4714	  else
4715	    return "retl\n\tsub\t%%sp, -%0, %%sp";
4716	}
4717      else if (actual_fsize <= 8192)
4718	{
4719	  operands[0] = GEN_INT (actual_fsize - 4096);
4720	  if (SKIP_CALLERS_UNIMP_P)
4721	    return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4722	  else
4723	    return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
4724	}
4725      else if (SKIP_CALLERS_UNIMP_P)
4726	{
4727	  if ((actual_fsize & 0x3ff) != 0)
4728	    return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4729	  else
4730	    return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4731	}
4732      else
4733	{
4734	  if ((actual_fsize & 0x3ff) != 0)
4735	    return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4736	  else
4737	    return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4738	}
4739    }
4740  else if (TARGET_V9)
4741    {
4742      if (delay)
4743	{
4744	  epilogue_renumber (&SET_DEST (PATTERN (delay)));
4745	  epilogue_renumber (&SET_SRC (PATTERN (delay)));
4746	}
4747      if (SKIP_CALLERS_UNIMP_P)
4748	return "return\t%%i7+12%#";
4749      else
4750	return "return\t%%i7+8%#";
4751    }
4752  else
4753    {
4754      if (delay)
4755	abort ();
4756      if (SKIP_CALLERS_UNIMP_P)
4757	return "jmp\t%%i7+12\n\trestore";
4758      else
4759	return "ret\n\trestore";
4760    }
4761}
4762
4763/* Leaf functions and non-leaf functions have different needs.  */
4764
4765static int
4766reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
4767
4768static int
4769reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
4770
4771static int *reg_alloc_orders[] = {
4772  reg_leaf_alloc_order,
4773  reg_nonleaf_alloc_order};
4774
4775void
4776order_regs_for_local_alloc ()
4777{
4778  static int last_order_nonleaf = 1;
4779
4780  if (regs_ever_live[15] != last_order_nonleaf)
4781    {
4782      last_order_nonleaf = !last_order_nonleaf;
4783      bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
4784	     (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
4785    }
4786}
4787
4788/* Return 1 if REG and MEM are legitimate enough to allow the various
4789   mem<-->reg splits to be run.  */
4790
4791int
4792sparc_splitdi_legitimate (reg, mem)
4793     rtx reg;
4794     rtx mem;
4795{
4796  /* Punt if we are here by mistake.  */
4797  if (! reload_completed)
4798    abort ();
4799
4800  /* We must have an offsettable memory reference.  */
4801  if (! offsettable_memref_p (mem))
4802    return 0;
4803
4804  /* If we have legitimate args for ldd/std, we do not want
4805     the split to happen.  */
4806  if ((REGNO (reg) % 2) == 0
4807      && mem_min_alignment (mem, 8))
4808    return 0;
4809
4810  /* Success.  */
4811  return 1;
4812}
4813
4814/* Return 1 if x and y are some kind of REG and they refer to
4815   different hard registers.  This test is guarenteed to be
4816   run after reload.  */
4817
4818int
4819sparc_absnegfloat_split_legitimate (x, y)
4820     rtx x, y;
4821{
4822  if (GET_CODE (x) == SUBREG)
4823    x = alter_subreg (x);
4824  if (GET_CODE (x) != REG)
4825    return 0;
4826  if (GET_CODE (y) == SUBREG)
4827    y = alter_subreg (y);
4828  if (GET_CODE (y) != REG)
4829    return 0;
4830  if (REGNO (x) == REGNO (y))
4831    return 0;
4832  return 1;
4833}
4834
4835/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
4836   This makes them candidates for using ldd and std insns.
4837
4838   Note reg1 and reg2 *must* be hard registers.  */
4839
4840int
4841registers_ok_for_ldd_peep (reg1, reg2)
4842     rtx reg1, reg2;
4843{
4844  /* We might have been passed a SUBREG.  */
4845  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
4846    return 0;
4847
4848  if (REGNO (reg1) % 2 != 0)
4849    return 0;
4850
4851  /* Integer ldd is deprecated in SPARC V9 */
4852  if (TARGET_V9 && REGNO (reg1) < 32)
4853    return 0;
4854
4855  return (REGNO (reg1) == REGNO (reg2) - 1);
4856}
4857
4858/* Return 1 if addr1 and addr2 are suitable for use in an ldd or
4859   std insn.
4860
4861   This can only happen when addr1 and addr2 are consecutive memory
4862   locations (addr1 + 4 == addr2).  addr1 must also be aligned on a
4863   64 bit boundary (addr1 % 8 == 0).
4864
4865   We know %sp and %fp are kept aligned on a 64 bit boundary.  Other
4866   registers are assumed to *never* be properly aligned and are
4867   rejected.
4868
4869   Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
4870   need only check that the offset for addr1 % 8 == 0.  */
4871
4872int
4873addrs_ok_for_ldd_peep (addr1, addr2)
4874      rtx addr1, addr2;
4875{
4876  int reg1, offset1;
4877
4878  /* Extract a register number and offset (if used) from the first addr.  */
4879  if (GET_CODE (addr1) == PLUS)
4880    {
4881      /* If not a REG, return zero.  */
4882      if (GET_CODE (XEXP (addr1, 0)) != REG)
4883	return 0;
4884      else
4885	{
4886          reg1 = REGNO (XEXP (addr1, 0));
4887	  /* The offset must be constant!  */
4888	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
4889            return 0;
4890          offset1 = INTVAL (XEXP (addr1, 1));
4891	}
4892    }
4893  else if (GET_CODE (addr1) != REG)
4894    return 0;
4895  else
4896    {
4897      reg1 = REGNO (addr1);
4898      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
4899      offset1 = 0;
4900    }
4901
4902  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
4903  if (GET_CODE (addr2) != PLUS)
4904    return 0;
4905
4906  if (GET_CODE (XEXP (addr2, 0)) != REG
4907      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
4908    return 0;
4909
4910  /* Only %fp and %sp are allowed.  Additionally both addresses must
4911     use the same register.  */
4912  if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
4913    return 0;
4914
4915  if (reg1 != REGNO (XEXP (addr2, 0)))
4916    return 0;
4917
4918  /* The first offset must be evenly divisible by 8 to ensure the
4919     address is 64 bit aligned.  */
4920  if (offset1 % 8 != 0)
4921    return 0;
4922
4923  /* The offset for the second addr must be 4 more than the first addr.  */
4924  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
4925    return 0;
4926
4927  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
4928     instructions.  */
4929  return 1;
4930}
4931
4932/* Return 1 if reg is a pseudo, or is the first register in
4933   a hard register pair.  This makes it a candidate for use in
4934   ldd and std insns.  */
4935
4936int
4937register_ok_for_ldd (reg)
4938     rtx reg;
4939{
4940  /* We might have been passed a SUBREG.  */
4941  if (GET_CODE (reg) != REG)
4942    return 0;
4943
4944  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
4945    return (REGNO (reg) % 2 == 0);
4946  else
4947    return 1;
4948}
4949
4950/* Print operand X (an rtx) in assembler syntax to file FILE.
4951   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4952   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4953
4954void
4955print_operand (file, x, code)
4956     FILE *file;
4957     rtx x;
4958     int code;
4959{
4960  switch (code)
4961    {
4962    case '#':
4963      /* Output a 'nop' if there's nothing for the delay slot.  */
4964      if (dbr_sequence_length () == 0)
4965	fputs ("\n\t nop", file);
4966      return;
4967    case '*':
4968      /* Output an annul flag if there's nothing for the delay slot and we
4969	 are optimizing.  This is always used with '(' below.  */
4970      /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
4971	 this is a dbx bug.  So, we only do this when optimizing.  */
4972      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
4973	 Always emit a nop in case the next instruction is a branch.  */
4974      if (dbr_sequence_length () == 0
4975	  && (optimize && (int)sparc_cpu < PROCESSOR_V9))
4976	fputs (",a", file);
4977      return;
4978    case '(':
4979      /* Output a 'nop' if there's nothing for the delay slot and we are
4980	 not optimizing.  This is always used with '*' above.  */
4981      if (dbr_sequence_length () == 0
4982	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
4983	fputs ("\n\t nop", file);
4984      return;
4985    case '_':
4986      /* Output the Embedded Medium/Anywhere code model base register.  */
4987      fputs (EMBMEDANY_BASE_REG, file);
4988      return;
4989    case '@':
4990      /* Print out what we are using as the frame pointer.  This might
4991	 be %fp, or might be %sp+offset.  */
4992      /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
4993      fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
4994      return;
4995    case 'Y':
4996      /* Adjust the operand to take into account a RESTORE operation.  */
4997      if (GET_CODE (x) == CONST_INT)
4998	break;
4999      else if (GET_CODE (x) != REG)
5000	output_operand_lossage ("Invalid %%Y operand");
5001      else if (REGNO (x) < 8)
5002	fputs (reg_names[REGNO (x)], file);
5003      else if (REGNO (x) >= 24 && REGNO (x) < 32)
5004	fputs (reg_names[REGNO (x)-16], file);
5005      else
5006	output_operand_lossage ("Invalid %%Y operand");
5007      return;
5008    case 'L':
5009      /* Print out the low order register name of a register pair.  */
5010      if (WORDS_BIG_ENDIAN)
5011	fputs (reg_names[REGNO (x)+1], file);
5012      else
5013	fputs (reg_names[REGNO (x)], file);
5014      return;
5015    case 'H':
5016      /* Print out the high order register name of a register pair.  */
5017      if (WORDS_BIG_ENDIAN)
5018	fputs (reg_names[REGNO (x)], file);
5019      else
5020	fputs (reg_names[REGNO (x)+1], file);
5021      return;
5022    case 'R':
5023      /* Print out the second register name of a register pair or quad.
5024	 I.e., R (%o0) => %o1.  */
5025      fputs (reg_names[REGNO (x)+1], file);
5026      return;
5027    case 'S':
5028      /* Print out the third register name of a register quad.
5029	 I.e., S (%o0) => %o2.  */
5030      fputs (reg_names[REGNO (x)+2], file);
5031      return;
5032    case 'T':
5033      /* Print out the fourth register name of a register quad.
5034	 I.e., T (%o0) => %o3.  */
5035      fputs (reg_names[REGNO (x)+3], file);
5036      return;
5037    case 'x':
5038      /* Print a condition code register.  */
5039      if (REGNO (x) == SPARC_ICC_REG)
5040	{
5041	  /* We don't handle CC[X]_NOOVmode because they're not supposed
5042	     to occur here.  */
5043	  if (GET_MODE (x) == CCmode)
5044	    fputs ("%icc", file);
5045	  else if (GET_MODE (x) == CCXmode)
5046	    fputs ("%xcc", file);
5047	  else
5048	    abort ();
5049	}
5050      else
5051	/* %fccN register */
5052	fputs (reg_names[REGNO (x)], file);
5053      return;
5054    case 'm':
5055      /* Print the operand's address only.  */
5056      output_address (XEXP (x, 0));
5057      return;
5058    case 'r':
5059      /* In this case we need a register.  Use %g0 if the
5060	 operand is const0_rtx.  */
5061      if (x == const0_rtx
5062	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5063	{
5064	  fputs ("%g0", file);
5065	  return;
5066	}
5067      else
5068	break;
5069
5070    case 'A':
5071      switch (GET_CODE (x))
5072	{
5073	case IOR: fputs ("or", file); break;
5074	case AND: fputs ("and", file); break;
5075	case XOR: fputs ("xor", file); break;
5076	default: output_operand_lossage ("Invalid %%A operand");
5077	}
5078      return;
5079
5080    case 'B':
5081      switch (GET_CODE (x))
5082	{
5083	case IOR: fputs ("orn", file); break;
5084	case AND: fputs ("andn", file); break;
5085	case XOR: fputs ("xnor", file); break;
5086	default: output_operand_lossage ("Invalid %%B operand");
5087	}
5088      return;
5089
5090      /* These are used by the conditional move instructions.  */
5091    case 'c' :
5092    case 'C':
5093      {
5094	enum rtx_code rc = (code == 'c'
5095			    ? reverse_condition (GET_CODE (x))
5096			    : GET_CODE (x));
5097	switch (rc)
5098	  {
5099	  case NE: fputs ("ne", file); break;
5100	  case EQ: fputs ("e", file); break;
5101	  case GE: fputs ("ge", file); break;
5102	  case GT: fputs ("g", file); break;
5103	  case LE: fputs ("le", file); break;
5104	  case LT: fputs ("l", file); break;
5105	  case GEU: fputs ("geu", file); break;
5106	  case GTU: fputs ("gu", file); break;
5107	  case LEU: fputs ("leu", file); break;
5108	  case LTU: fputs ("lu", file); break;
5109	  default: output_operand_lossage (code == 'c'
5110					   ? "Invalid %%c operand"
5111					   : "Invalid %%C operand");
5112	  }
5113	return;
5114      }
5115
5116      /* These are used by the movr instruction pattern.  */
5117    case 'd':
5118    case 'D':
5119      {
5120	enum rtx_code rc = (code == 'd'
5121			    ? reverse_condition (GET_CODE (x))
5122			    : GET_CODE (x));
5123	switch (rc)
5124	  {
5125	  case NE: fputs ("ne", file); break;
5126	  case EQ: fputs ("e", file); break;
5127	  case GE: fputs ("gez", file); break;
5128	  case LT: fputs ("lz", file); break;
5129	  case LE: fputs ("lez", file); break;
5130	  case GT: fputs ("gz", file); break;
5131	  default: output_operand_lossage (code == 'd'
5132					   ? "Invalid %%d operand"
5133					   : "Invalid %%D operand");
5134	  }
5135	return;
5136      }
5137
5138    case 'b':
5139      {
5140	/* Print a sign-extended character.  */
5141	int i = INTVAL (x) & 0xff;
5142	if (i & 0x80)
5143	  i |= 0xffffff00;
5144	fprintf (file, "%d", i);
5145	return;
5146      }
5147
5148    case 'f':
5149      /* Operand must be a MEM; write its address.  */
5150      if (GET_CODE (x) != MEM)
5151	output_operand_lossage ("Invalid %%f operand");
5152      output_address (XEXP (x, 0));
5153      return;
5154
5155    case 0:
5156      /* Do nothing special.  */
5157      break;
5158
5159    default:
5160      /* Undocumented flag.  */
5161      output_operand_lossage ("invalid operand output code");
5162    }
5163
5164  if (GET_CODE (x) == REG)
5165    fputs (reg_names[REGNO (x)], file);
5166  else if (GET_CODE (x) == MEM)
5167    {
5168      fputc ('[', file);
5169	/* Poor Sun assembler doesn't understand absolute addressing.  */
5170      if (CONSTANT_P (XEXP (x, 0))
5171	  && ! TARGET_LIVE_G0)
5172	fputs ("%g0+", file);
5173      output_address (XEXP (x, 0));
5174      fputc (']', file);
5175    }
5176  else if (GET_CODE (x) == HIGH)
5177    {
5178      fputs ("%hi(", file);
5179      output_addr_const (file, XEXP (x, 0));
5180      fputc (')', file);
5181    }
5182  else if (GET_CODE (x) == LO_SUM)
5183    {
5184      print_operand (file, XEXP (x, 0), 0);
5185      if (TARGET_CM_MEDMID)
5186	fputs ("+%l44(", file);
5187      else
5188	fputs ("+%lo(", file);
5189      output_addr_const (file, XEXP (x, 1));
5190      fputc (')', file);
5191    }
5192  else if (GET_CODE (x) == CONST_DOUBLE
5193	   && (GET_MODE (x) == VOIDmode
5194	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5195    {
5196      if (CONST_DOUBLE_HIGH (x) == 0)
5197	fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5198      else if (CONST_DOUBLE_HIGH (x) == -1
5199	       && CONST_DOUBLE_LOW (x) < 0)
5200	fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5201      else
5202	output_operand_lossage ("long long constant not a valid immediate operand");
5203    }
5204  else if (GET_CODE (x) == CONST_DOUBLE)
5205    output_operand_lossage ("floating point constant not a valid immediate operand");
5206  else { output_addr_const (file, x); }
5207}
5208
5209/* This function outputs assembler code for VALUE to FILE, where VALUE is
5210   a 64 bit (DImode) value.  */
5211
5212/* ??? If there is a 64 bit counterpart to .word that the assembler
5213   understands, then using that would simply this code greatly.  */
5214/* ??? We only output .xword's for symbols and only then in environments
5215   where the assembler can handle them.  */
5216
5217void
5218output_double_int (file, value)
5219     FILE *file;
5220     rtx value;
5221{
5222  if (GET_CODE (value) == CONST_INT)
5223    {
5224      /* ??? This has endianness issues.  */
5225#if HOST_BITS_PER_WIDE_INT == 64
5226      HOST_WIDE_INT xword = INTVAL (value);
5227      HOST_WIDE_INT high, low;
5228
5229      high = (xword >> 32) & 0xffffffff;
5230      low  = xword & 0xffffffff;
5231      ASM_OUTPUT_INT (file, GEN_INT (high));
5232      ASM_OUTPUT_INT (file, GEN_INT (low));
5233#else
5234      if (INTVAL (value) < 0)
5235	ASM_OUTPUT_INT (file, constm1_rtx);
5236      else
5237	ASM_OUTPUT_INT (file, const0_rtx);
5238      ASM_OUTPUT_INT (file, value);
5239#endif
5240    }
5241  else if (GET_CODE (value) == CONST_DOUBLE)
5242    {
5243      ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5244      ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5245    }
5246  else if (GET_CODE (value) == SYMBOL_REF
5247	   || GET_CODE (value) == CONST
5248	   || GET_CODE (value) == PLUS
5249	   || (TARGET_ARCH64 &&
5250	       (GET_CODE (value) == LABEL_REF
5251		|| GET_CODE (value) == CODE_LABEL
5252		|| GET_CODE (value) == MINUS)))
5253    {
5254      if (! TARGET_V9)
5255	{
5256	  ASM_OUTPUT_INT (file, const0_rtx);
5257	  ASM_OUTPUT_INT (file, value);
5258	}
5259      else
5260	{
5261	  fprintf (file, "\t%s\t", ASM_LONGLONG);
5262	  output_addr_const (file, value);
5263	  fprintf (file, "\n");
5264	}
5265    }
5266  else
5267    abort ();
5268}
5269
5270/* Return the value of a code used in the .proc pseudo-op that says
5271   what kind of result this function returns.  For non-C types, we pick
5272   the closest C type.  */
5273
5274#ifndef CHAR_TYPE_SIZE
5275#define CHAR_TYPE_SIZE BITS_PER_UNIT
5276#endif
5277
5278#ifndef SHORT_TYPE_SIZE
5279#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5280#endif
5281
5282#ifndef INT_TYPE_SIZE
5283#define INT_TYPE_SIZE BITS_PER_WORD
5284#endif
5285
5286#ifndef LONG_TYPE_SIZE
5287#define LONG_TYPE_SIZE BITS_PER_WORD
5288#endif
5289
5290#ifndef LONG_LONG_TYPE_SIZE
5291#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5292#endif
5293
5294#ifndef FLOAT_TYPE_SIZE
5295#define FLOAT_TYPE_SIZE BITS_PER_WORD
5296#endif
5297
5298#ifndef DOUBLE_TYPE_SIZE
5299#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5300#endif
5301
5302#ifndef LONG_DOUBLE_TYPE_SIZE
5303#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5304#endif
5305
5306unsigned long
5307sparc_type_code (type)
5308     register tree type;
5309{
5310  register unsigned long qualifiers = 0;
5311  register unsigned shift;
5312
5313  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
5314     setting more, since some assemblers will give an error for this.  Also,
5315     we must be careful to avoid shifts of 32 bits or more to avoid getting
5316     unpredictable results.  */
5317
5318  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5319    {
5320      switch (TREE_CODE (type))
5321	{
5322	case ERROR_MARK:
5323	  return qualifiers;
5324
5325	case ARRAY_TYPE:
5326	  qualifiers |= (3 << shift);
5327	  break;
5328
5329	case FUNCTION_TYPE:
5330	case METHOD_TYPE:
5331	  qualifiers |= (2 << shift);
5332	  break;
5333
5334	case POINTER_TYPE:
5335	case REFERENCE_TYPE:
5336	case OFFSET_TYPE:
5337	  qualifiers |= (1 << shift);
5338	  break;
5339
5340	case RECORD_TYPE:
5341	  return (qualifiers | 8);
5342
5343	case UNION_TYPE:
5344	case QUAL_UNION_TYPE:
5345	  return (qualifiers | 9);
5346
5347	case ENUMERAL_TYPE:
5348	  return (qualifiers | 10);
5349
5350	case VOID_TYPE:
5351	  return (qualifiers | 16);
5352
5353	case INTEGER_TYPE:
5354	  /* If this is a range type, consider it to be the underlying
5355	     type.  */
5356	  if (TREE_TYPE (type) != 0)
5357	    break;
5358
5359	  /* Carefully distinguish all the standard types of C,
5360	     without messing up if the language is not C.  We do this by
5361	     testing TYPE_PRECISION and TREE_UNSIGNED.  The old code used to
5362	     look at both the names and the above fields, but that's redundant.
5363	     Any type whose size is between two C types will be considered
5364	     to be the wider of the two types.  Also, we do not have a
5365	     special code to use for "long long", so anything wider than
5366	     long is treated the same.  Note that we can't distinguish
5367	     between "int" and "long" in this code if they are the same
5368	     size, but that's fine, since neither can the assembler.  */
5369
5370	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5371	    return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5372
5373	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5374	    return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5375
5376	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5377	    return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5378
5379	  else
5380	    return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5381
5382	case REAL_TYPE:
5383	  /* If this is a range type, consider it to be the underlying
5384	     type.  */
5385	  if (TREE_TYPE (type) != 0)
5386	    break;
5387
5388	  /* Carefully distinguish all the standard types of C,
5389	     without messing up if the language is not C.  */
5390
5391	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5392	    return (qualifiers | 6);
5393
5394	  else
5395	    return (qualifiers | 7);
5396
5397	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
5398	  /* ??? We need to distinguish between double and float complex types,
5399	     but I don't know how yet because I can't reach this code from
5400	     existing front-ends.  */
5401	  return (qualifiers | 7);	/* Who knows? */
5402
5403	case CHAR_TYPE:		/* GNU Pascal CHAR type.  Not used in C.  */
5404	case BOOLEAN_TYPE:	/* GNU Fortran BOOLEAN type.  */
5405	case FILE_TYPE:		/* GNU Pascal FILE type.  */
5406	case SET_TYPE:		/* GNU Pascal SET type.  */
5407	case LANG_TYPE:		/* ? */
5408	  return qualifiers;
5409
5410	default:
5411	  abort ();		/* Not a type! */
5412        }
5413    }
5414
5415  return qualifiers;
5416}
5417
5418/* Nested function support.  */
5419
5420/* Emit RTL insns to initialize the variable parts of a trampoline.
5421   FNADDR is an RTX for the address of the function's pure code.
5422   CXT is an RTX for the static chain value for the function.
5423
5424   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5425   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5426   (to store insns).  This is a bit excessive.  Perhaps a different
5427   mechanism would be better here.
5428
5429   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
5430
5431void
5432sparc_initialize_trampoline (tramp, fnaddr, cxt)
5433     rtx tramp, fnaddr, cxt;
5434{
5435  /* SPARC 32 bit trampoline:
5436
5437 	sethi	%hi(fn), %g1
5438 	sethi	%hi(static), %g2
5439 	jmp	%g1+%lo(fn)
5440 	or	%g2, %lo(static), %g2
5441
5442    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5443    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5444   */
5445#ifdef TRANSFER_FROM_TRAMPOLINE
5446  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5447                     0, VOIDmode, 1, tramp, Pmode);
5448#endif
5449
5450  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5451		  expand_binop (SImode, ior_optab,
5452				expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5453					      size_int (10), 0, 1),
5454				GEN_INT (0x03000000),
5455				NULL_RTX, 1, OPTAB_DIRECT));
5456
5457  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5458		  expand_binop (SImode, ior_optab,
5459				expand_shift (RSHIFT_EXPR, SImode, cxt,
5460					      size_int (10), 0, 1),
5461				GEN_INT (0x05000000),
5462				NULL_RTX, 1, OPTAB_DIRECT));
5463
5464  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5465		  expand_binop (SImode, ior_optab,
5466				expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5467				GEN_INT (0x81c06000),
5468				NULL_RTX, 1, OPTAB_DIRECT));
5469
5470  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5471		  expand_binop (SImode, ior_optab,
5472				expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5473				GEN_INT (0x8410a000),
5474				NULL_RTX, 1, OPTAB_DIRECT));
5475
5476  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5477  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
5478     aligned on a 16 byte boundary so one flush clears it all.  */
5479  if (sparc_cpu != PROCESSOR_ULTRASPARC)
5480    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5481						     plus_constant (tramp, 8)))));
5482}
5483
5484/* The 64 bit version is simpler because it makes more sense to load the
5485   values as "immediate" data out of the trampoline.  It's also easier since
5486   we can read the PC without clobbering a register.  */
5487
5488void
5489sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5490     rtx tramp, fnaddr, cxt;
5491{
5492#ifdef TRANSFER_FROM_TRAMPOLINE
5493  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5494                     0, VOIDmode, 1, tramp, Pmode);
5495#endif
5496
5497  /*
5498	rd	%pc, %g1
5499	ldx	[%g1+24], %g5
5500	jmp	%g5
5501	ldx	[%g1+16], %g5
5502	+16 bytes data
5503   */
5504
5505  emit_move_insn (gen_rtx_MEM (SImode, tramp),
5506		  GEN_INT (0x83414000));
5507  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5508		  GEN_INT (0xca586018));
5509  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5510		  GEN_INT (0x81c14000));
5511  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5512		  GEN_INT (0xca586010));
5513  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5514  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
5515  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
5516
5517  if (sparc_cpu != PROCESSOR_ULTRASPARC)
5518    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5519}
5520
5521/* Subroutines to support a flat (single) register window calling
5522   convention.  */
5523
5524/* Single-register window sparc stack frames look like:
5525
5526             Before call		        After call
5527        +-----------------------+	+-----------------------+
5528   high |		        |	|			|
5529   mem  |  caller's temps.    	|       |  caller's temps.    	|
5530	|       		|       |       	        |
5531        +-----------------------+	+-----------------------+
5532 	|       		|	|		        |
5533        |  arguments on stack.  |	|  arguments on stack.  |
5534	|       		|      	|			|
5535        +-----------------------+FP+92->+-----------------------+
5536 	|  6 words to save     	|	|  6 words to save	|
5537	|  arguments passed	|	|  arguments passed	|
5538	|  in registers, even	|	|  in registers, even	|
5539       	|  if not passed.       |      	|  if not passed.	|
5540 SP+68->+-----------------------+FP+68->+-----------------------+
5541        | 1 word struct addr	|      	| 1 word struct addr	|
5542        +-----------------------+FP+64->+-----------------------+
5543        |			|	|			|
5544        | 16 word reg save area	|	| 16 word reg save area |
5545       	|                       |      	|			|
5546    SP->+-----------------------+   FP->+-----------------------+
5547				        | 4 word area for	|
5548				       	| fp/alu reg moves	|
5549				 FP-16->+-----------------------+
5550				        |			|
5551				        |  local variables	|
5552				        |			|
5553				        +-----------------------+
5554				        |		        |
5555                                        |  fp register save     |
5556				        |			|
5557				        +-----------------------+
5558				        |		        |
5559                                        |  gp register save     |
5560                                        |       		|
5561				        +-----------------------+
5562				        |			|
5563                                        |  alloca allocations   |
5564        			        |			|
5565				        +-----------------------+
5566				        |			|
5567                                        |  arguments on stack   |
5568        			       	|		        |
5569				 SP+92->+-----------------------+
5570                                        |  6 words to save      |
5571				        |  arguments passed     |
5572                                        |  in registers, even   |
5573   low                                 	|  if not passed.       |
5574   memory        		 SP+68->+-----------------------+
5575				       	| 1 word struct addr	|
5576				 SP+64->+-----------------------+
5577				        |			|
5578				        I 16 word reg save area |
5579				       	|			|
5580				    SP->+-----------------------+  */
5581
5582/* Structure to be filled in by sparc_flat_compute_frame_size with register
5583   save masks, and offsets for the current function.  */
5584
5585struct sparc_frame_info
5586{
5587  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
5588  unsigned long var_size;	/* # bytes that variables take up.  */
5589  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
5590  unsigned long extra_size;	/* # bytes of extra gunk.  */
5591  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
5592  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
5593  unsigned long gmask;		/* Mask of saved gp registers.  */
5594  unsigned long fmask;		/* Mask of saved fp registers.  */
5595  unsigned long reg_offset;	/* Offset from new sp to store regs.  */
5596  int		initialized;	/* Nonzero if frame size already calculated.  */
5597};
5598
5599/* Current frame information calculated by sparc_flat_compute_frame_size.  */
5600struct sparc_frame_info current_frame_info;
5601
5602/* Zero structure to initialize current_frame_info.  */
5603struct sparc_frame_info zero_frame_info;
5604
5605/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
5606
5607#define RETURN_ADDR_REGNUM 15
5608#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5609#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5610
5611#define MUST_SAVE_REGISTER(regno) \
5612 ((regs_ever_live[regno] && !call_used_regs[regno])		\
5613  || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)	\
5614  || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5615
5616/* Return the bytes needed to compute the frame pointer from the current
5617   stack pointer.  */
5618
5619unsigned long
5620sparc_flat_compute_frame_size (size)
5621     int size;			/* # of var. bytes allocated.  */
5622{
5623  int regno;
5624  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
5625  unsigned long var_size;	/* # bytes that variables take up.  */
5626  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
5627  unsigned long extra_size;	/* # extra bytes.  */
5628  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
5629  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
5630  unsigned long gmask;		/* Mask of saved gp registers.  */
5631  unsigned long fmask;		/* Mask of saved fp registers.  */
5632  unsigned long reg_offset;	/* Offset to register save area.  */
5633  int           need_aligned_p;	/* 1 if need the save area 8 byte aligned.  */
5634
5635  /* This is the size of the 16 word reg save area, 1 word struct addr
5636     area, and 4 word fp/alu register copy area.  */
5637  extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5638  var_size = size;
5639  gp_reg_size = 0;
5640  fp_reg_size = 0;
5641  gmask = 0;
5642  fmask = 0;
5643  reg_offset = 0;
5644  need_aligned_p = 0;
5645
5646  args_size = 0;
5647  if (!leaf_function_p ())
5648    {
5649      /* Also include the size needed for the 6 parameter registers.  */
5650      args_size = current_function_outgoing_args_size + 24;
5651    }
5652  total_size = var_size + args_size;
5653
5654  /* Calculate space needed for gp registers.  */
5655  for (regno = 1; regno <= 31; regno++)
5656    {
5657      if (MUST_SAVE_REGISTER (regno))
5658	{
5659	  /* If we need to save two regs in a row, ensure there's room to bump
5660	     up the address to align it to a doubleword boundary.  */
5661	  if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5662	    {
5663	      if (gp_reg_size % 8 != 0)
5664		gp_reg_size += 4;
5665	      gp_reg_size += 2 * UNITS_PER_WORD;
5666	      gmask |= 3 << regno;
5667	      regno++;
5668	      need_aligned_p = 1;
5669	    }
5670	  else
5671	    {
5672	      gp_reg_size += UNITS_PER_WORD;
5673	      gmask |= 1 << regno;
5674	    }
5675	}
5676    }
5677
5678  /* Calculate space needed for fp registers.  */
5679  for (regno = 32; regno <= 63; regno++)
5680    {
5681      if (regs_ever_live[regno] && !call_used_regs[regno])
5682	{
5683	  fp_reg_size += UNITS_PER_WORD;
5684	  fmask |= 1 << (regno - 32);
5685	}
5686    }
5687
5688  if (gmask || fmask)
5689    {
5690      int n;
5691      reg_offset = FIRST_PARM_OFFSET(0) + args_size;
5692      /* Ensure save area is 8 byte aligned if we need it.  */
5693      n = reg_offset % 8;
5694      if (need_aligned_p && n != 0)
5695	{
5696	  total_size += 8 - n;
5697	  reg_offset += 8 - n;
5698	}
5699      total_size += gp_reg_size + fp_reg_size;
5700    }
5701
5702  /* If we must allocate a stack frame at all, we must also allocate
5703     room for register window spillage, so as to be binary compatible
5704     with libraries and operating systems that do not use -mflat.  */
5705  if (total_size > 0)
5706    total_size += extra_size;
5707  else
5708    extra_size = 0;
5709
5710  total_size = SPARC_STACK_ALIGN (total_size);
5711
5712  /* Save other computed information.  */
5713  current_frame_info.total_size  = total_size;
5714  current_frame_info.var_size    = var_size;
5715  current_frame_info.args_size   = args_size;
5716  current_frame_info.extra_size  = extra_size;
5717  current_frame_info.gp_reg_size = gp_reg_size;
5718  current_frame_info.fp_reg_size = fp_reg_size;
5719  current_frame_info.gmask	 = gmask;
5720  current_frame_info.fmask	 = fmask;
5721  current_frame_info.reg_offset	 = reg_offset;
5722  current_frame_info.initialized = reload_completed;
5723
5724  /* Ok, we're done.  */
5725  return total_size;
5726}
5727
5728/* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
5729   OFFSET.
5730
5731   BASE_REG must be 8 byte aligned.  This allows us to test OFFSET for
5732   appropriate alignment and use DOUBLEWORD_OP when we can.  We assume
5733   [BASE_REG+OFFSET] will always be a valid address.
5734
5735   WORD_OP is either "st" for save, "ld" for restore.
5736   DOUBLEWORD_OP is either "std" for save, "ldd" for restore.  */
5737
5738void
5739sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
5740			 doubleword_op, base_offset)
5741     FILE *file;
5742     char *base_reg;
5743     unsigned int offset;
5744     unsigned long gmask;
5745     unsigned long fmask;
5746     char *word_op;
5747     char *doubleword_op;
5748     unsigned long base_offset;
5749{
5750  int regno;
5751
5752  if (gmask == 0 && fmask == 0)
5753    return;
5754
5755  /* Save registers starting from high to low.  We've already saved the
5756     previous frame pointer and previous return address for the debugger's
5757     sake.  The debugger allows us to not need a nop in the epilog if at least
5758     one register is reloaded in addition to return address.  */
5759
5760  if (gmask)
5761    {
5762      for (regno = 1; regno <= 31; regno++)
5763	{
5764	  if ((gmask & (1L << regno)) != 0)
5765	    {
5766	      if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
5767		{
5768		  /* We can save two registers in a row.  If we're not at a
5769		     double word boundary, move to one.
5770		     sparc_flat_compute_frame_size ensures there's room to do
5771		     this.  */
5772		  if (offset % 8 != 0)
5773		    offset += UNITS_PER_WORD;
5774
5775		  if (word_op[0] == 's')
5776		    {
5777		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
5778			       doubleword_op, reg_names[regno],
5779			       base_reg, offset);
5780		      if (dwarf2out_do_frame ())
5781			{
5782			  char *l = dwarf2out_cfi_label ();
5783			  dwarf2out_reg_save (l, regno, offset + base_offset);
5784			  dwarf2out_reg_save
5785			    (l, regno+1, offset+base_offset + UNITS_PER_WORD);
5786			}
5787		    }
5788		  else
5789		    fprintf (file, "\t%s\t[%s+%d], %s\n",
5790			     doubleword_op, base_reg, offset,
5791			     reg_names[regno]);
5792
5793		  offset += 2 * UNITS_PER_WORD;
5794		  regno++;
5795		}
5796	      else
5797		{
5798		  if (word_op[0] == 's')
5799		    {
5800		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
5801			       word_op, reg_names[regno],
5802			       base_reg, offset);
5803		      if (dwarf2out_do_frame ())
5804			dwarf2out_reg_save ("", regno, offset + base_offset);
5805		    }
5806		  else
5807		    fprintf (file, "\t%s\t[%s+%d], %s\n",
5808			     word_op, base_reg, offset, reg_names[regno]);
5809
5810		  offset += UNITS_PER_WORD;
5811		}
5812	    }
5813	}
5814    }
5815
5816  if (fmask)
5817    {
5818      for (regno = 32; regno <= 63; regno++)
5819	{
5820	  if ((fmask & (1L << (regno - 32))) != 0)
5821	    {
5822	      if (word_op[0] == 's')
5823		{
5824		  fprintf (file, "\t%s\t%s, [%s+%d]\n",
5825			   word_op, reg_names[regno],
5826			   base_reg, offset);
5827		  if (dwarf2out_do_frame ())
5828		    dwarf2out_reg_save ("", regno, offset + base_offset);
5829		}
5830	      else
5831		fprintf (file, "\t%s\t[%s+%d], %s\n",
5832			 word_op, base_reg, offset, reg_names[regno]);
5833
5834	      offset += UNITS_PER_WORD;
5835	    }
5836	}
5837    }
5838}
5839
5840/* Set up the stack and frame (if desired) for the function.  */
5841
5842void
5843sparc_flat_output_function_prologue (file, size)
5844     FILE *file;
5845     int size;
5846{
5847  char *sp_str = reg_names[STACK_POINTER_REGNUM];
5848  unsigned long gmask = current_frame_info.gmask;
5849
5850  /* This is only for the human reader.  */
5851  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
5852  fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
5853	   ASM_COMMENT_START,
5854	   current_frame_info.var_size,
5855	   current_frame_info.gp_reg_size / 4,
5856	   current_frame_info.fp_reg_size / 4,
5857	   current_function_outgoing_args_size,
5858	   current_frame_info.extra_size);
5859
5860  size = SPARC_STACK_ALIGN (size);
5861  size = (! current_frame_info.initialized
5862	  ? sparc_flat_compute_frame_size (size)
5863	  : current_frame_info.total_size);
5864
5865  /* These cases shouldn't happen.  Catch them now.  */
5866  if (size == 0 && (gmask || current_frame_info.fmask))
5867    abort ();
5868
5869  /* Allocate our stack frame by decrementing %sp.
5870     At present, the only algorithm gdb can use to determine if this is a
5871     flat frame is if we always set %i7 if we set %sp.  This can be optimized
5872     in the future by putting in some sort of debugging information that says
5873     this is a `flat' function.  However, there is still the case of debugging
5874     code without such debugging information (including cases where most fns
5875     have such info, but there is one that doesn't).  So, always do this now
5876     so we don't get a lot of code out there that gdb can't handle.
5877     If the frame pointer isn't needn't then that's ok - gdb won't be able to
5878     distinguish us from a non-flat function but there won't (and shouldn't)
5879     be any differences anyway.  The return pc is saved (if necessary) right
5880     after %i7 so gdb won't have to look too far to find it.  */
5881  if (size > 0)
5882    {
5883      unsigned int reg_offset = current_frame_info.reg_offset;
5884      char *fp_str = reg_names[FRAME_POINTER_REGNUM];
5885      const char *t1_str = "%g1";
5886
5887      /* Things get a little tricky if local variables take up more than ~4096
5888	 bytes and outgoing arguments take up more than ~4096 bytes.  When that
5889	 happens, the register save area can't be accessed from either end of
5890	 the frame.  Handle this by decrementing %sp to the start of the gp
5891	 register save area, save the regs, update %i7, and then set %sp to its
5892	 final value.  Given that we only have one scratch register to play
5893	 with it is the cheapest solution, and it helps gdb out as it won't
5894	 slow down recognition of flat functions.
5895	 Don't change the order of insns emitted here without checking with
5896	 the gdb folk first.  */
5897
5898      /* Is the entire register save area offsettable from %sp?  */
5899      if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
5900	{
5901	  if (size <= 4096)
5902	    {
5903	      fprintf (file, "\tadd\t%s, %d, %s\n",
5904		       sp_str, -size, sp_str);
5905	      if (gmask & FRAME_POINTER_MASK)
5906		{
5907		  fprintf (file, "\tst\t%s, [%s+%d]\n",
5908			   fp_str, sp_str, reg_offset);
5909		  fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5910			   sp_str, -size, fp_str, ASM_COMMENT_START);
5911		  reg_offset += 4;
5912		}
5913	    }
5914	  else
5915	    {
5916	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5917		       size, t1_str, sp_str, t1_str, sp_str);
5918	      if (gmask & FRAME_POINTER_MASK)
5919		{
5920		  fprintf (file, "\tst\t%s, [%s+%d]\n",
5921			   fp_str, sp_str, reg_offset);
5922		  fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5923			   sp_str, t1_str, fp_str, ASM_COMMENT_START);
5924		  reg_offset += 4;
5925		}
5926	    }
5927	  if (dwarf2out_do_frame ())
5928	    {
5929	      char *l = dwarf2out_cfi_label ();
5930	      if (gmask & FRAME_POINTER_MASK)
5931		{
5932		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5933				      reg_offset - 4 - size);
5934		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5935		}
5936	      else
5937		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
5938	    }
5939	  if (gmask & RETURN_ADDR_MASK)
5940	    {
5941	      fprintf (file, "\tst\t%s, [%s+%d]\n",
5942		       reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
5943	      if (dwarf2out_do_frame ())
5944		dwarf2out_return_save ("", reg_offset - size);
5945	      reg_offset += 4;
5946	    }
5947	  sparc_flat_save_restore (file, sp_str, reg_offset,
5948				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5949				   current_frame_info.fmask,
5950				   "st", "std", -size);
5951	}
5952      else
5953	{
5954	  /* Subtract %sp in two steps, but make sure there is always a
5955	     64 byte register save area, and %sp is properly aligned.  */
5956	  /* Amount to decrement %sp by, the first time.  */
5957	  unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
5958	  /* Offset to register save area from %sp.  */
5959	  unsigned int offset = size1 - (size - reg_offset);
5960
5961	  if (size1 <= 4096)
5962	    {
5963	      fprintf (file, "\tadd\t%s, %d, %s\n",
5964		       sp_str, -size1, sp_str);
5965	      if (gmask & FRAME_POINTER_MASK)
5966		{
5967		  fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5968			   fp_str, sp_str, offset, sp_str, -size1, fp_str,
5969			   ASM_COMMENT_START);
5970		  offset += 4;
5971		}
5972	    }
5973	  else
5974	    {
5975	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5976		       size1, t1_str, sp_str, t1_str, sp_str);
5977	      if (gmask & FRAME_POINTER_MASK)
5978		{
5979		  fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5980			   fp_str, sp_str, offset, sp_str, t1_str, fp_str,
5981			   ASM_COMMENT_START);
5982		  offset += 4;
5983		}
5984	    }
5985	  if (dwarf2out_do_frame ())
5986	    {
5987	      char *l = dwarf2out_cfi_label ();
5988	      if (gmask & FRAME_POINTER_MASK)
5989		{
5990		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5991				      offset - 4 - size1);
5992		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5993		}
5994	      else
5995		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
5996	    }
5997	  if (gmask & RETURN_ADDR_MASK)
5998	    {
5999	      fprintf (file, "\tst\t%s, [%s+%d]\n",
6000		       reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
6001	      if (dwarf2out_do_frame ())
6002		/* offset - size1 == reg_offset - size
6003		   if reg_offset were updated above like offset.  */
6004		dwarf2out_return_save ("", offset - size1);
6005	      offset += 4;
6006	    }
6007	  sparc_flat_save_restore (file, sp_str, offset,
6008				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6009				   current_frame_info.fmask,
6010				   "st", "std", -size1);
6011	  fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6012		   size - size1, t1_str, sp_str, t1_str, sp_str);
6013	  if (dwarf2out_do_frame ())
6014	    if (! (gmask & FRAME_POINTER_MASK))
6015	      dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6016	}
6017    }
6018
6019  fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6020}
6021
6022/* Do any necessary cleanup after a function to restore stack, frame,
6023   and regs. */
6024
6025void
6026sparc_flat_output_function_epilogue (file, size)
6027     FILE *file;
6028     int size;
6029{
6030  rtx epilogue_delay = current_function_epilogue_delay_list;
6031  int noepilogue = FALSE;
6032
6033  /* This is only for the human reader.  */
6034  fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6035
6036  /* The epilogue does not depend on any registers, but the stack
6037     registers, so we assume that if we have 1 pending nop, it can be
6038     ignored, and 2 it must be filled (2 nops occur for integer
6039     multiply and divide).  */
6040
6041  size = SPARC_STACK_ALIGN (size);
6042  size = (!current_frame_info.initialized
6043	   ? sparc_flat_compute_frame_size (size)
6044	   : current_frame_info.total_size);
6045
6046  if (size == 0 && epilogue_delay == 0)
6047    {
6048      rtx insn = get_last_insn ();
6049
6050      /* If the last insn was a BARRIER, we don't have to write any code
6051	 because a jump (aka return) was put there.  */
6052      if (GET_CODE (insn) == NOTE)
6053	insn = prev_nonnote_insn (insn);
6054      if (insn && GET_CODE (insn) == BARRIER)
6055	noepilogue = TRUE;
6056    }
6057
6058  if (!noepilogue)
6059    {
6060      unsigned int reg_offset = current_frame_info.reg_offset;
6061      unsigned int size1;
6062      char *sp_str = reg_names[STACK_POINTER_REGNUM];
6063      char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6064      const char *t1_str = "%g1";
6065
6066      /* In the reload sequence, we don't need to fill the load delay
6067	 slots for most of the loads, also see if we can fill the final
6068	 delay slot if not otherwise filled by the reload sequence.  */
6069
6070      if (size > 4095)
6071	fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6072
6073      if (frame_pointer_needed)
6074	{
6075	  if (size > 4095)
6076	    fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6077		     fp_str, t1_str, sp_str, ASM_COMMENT_START);
6078	  else
6079	    fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6080		     fp_str, size, sp_str, ASM_COMMENT_START);
6081	}
6082
6083      /* Is the entire register save area offsettable from %sp?  */
6084      if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6085	{
6086	  size1 = 0;
6087	}
6088      else
6089	{
6090	  /* Restore %sp in two steps, but make sure there is always a
6091	     64 byte register save area, and %sp is properly aligned.  */
6092	  /* Amount to increment %sp by, the first time.  */
6093	  size1 = ((reg_offset - 64 - 16) + 15) & -16;
6094	  /* Offset to register save area from %sp.  */
6095	  reg_offset = size1 - reg_offset;
6096
6097	  fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6098		   size1, t1_str, sp_str, t1_str, sp_str);
6099	}
6100
6101      /* We must restore the frame pointer and return address reg first
6102	 because they are treated specially by the prologue output code.  */
6103      if (current_frame_info.gmask & FRAME_POINTER_MASK)
6104	{
6105	  fprintf (file, "\tld\t[%s+%d], %s\n",
6106		   sp_str, reg_offset, fp_str);
6107	  reg_offset += 4;
6108	}
6109      if (current_frame_info.gmask & RETURN_ADDR_MASK)
6110	{
6111	  fprintf (file, "\tld\t[%s+%d], %s\n",
6112		   sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6113	  reg_offset += 4;
6114	}
6115
6116      /* Restore any remaining saved registers.  */
6117      sparc_flat_save_restore (file, sp_str, reg_offset,
6118			       current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6119			       current_frame_info.fmask,
6120			       "ld", "ldd", 0);
6121
6122      /* If we had to increment %sp in two steps, record it so the second
6123	 restoration in the epilogue finishes up.  */
6124      if (size1 > 0)
6125	{
6126	  size -= size1;
6127	  if (size > 4095)
6128	    fprintf (file, "\tset\t%d, %s\n",
6129		     size, t1_str);
6130	}
6131
6132      if (current_function_returns_struct)
6133	fprintf (file, "\tjmp\t%%o7+12\n");
6134      else
6135	fprintf (file, "\tretl\n");
6136
6137      /* If the only register saved is the return address, we need a
6138	 nop, unless we have an instruction to put into it.  Otherwise
6139	 we don't since reloading multiple registers doesn't reference
6140	 the register being loaded.  */
6141
6142      if (epilogue_delay)
6143	{
6144	  if (size)
6145	    abort ();
6146	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6147	}
6148
6149      else if (size > 4095)
6150	fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6151
6152      else if (size > 0)
6153	fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6154
6155      else
6156	fprintf (file, "\tnop\n");
6157    }
6158
6159  /* Reset state info for each function.  */
6160  current_frame_info = zero_frame_info;
6161
6162  sparc_output_deferred_case_vectors ();
6163}
6164
6165/* Define the number of delay slots needed for the function epilogue.
6166
6167   On the sparc, we need a slot if either no stack has been allocated,
6168   or the only register saved is the return register.  */
6169
6170int
6171sparc_flat_epilogue_delay_slots ()
6172{
6173  if (!current_frame_info.initialized)
6174    (void) sparc_flat_compute_frame_size (get_frame_size ());
6175
6176  if (current_frame_info.total_size == 0)
6177    return 1;
6178
6179  return 0;
6180}
6181
6182/* Return true is TRIAL is a valid insn for the epilogue delay slot.
6183   Any single length instruction which doesn't reference the stack or frame
6184   pointer is OK.  */
6185
6186int
6187sparc_flat_eligible_for_epilogue_delay (trial, slot)
6188     rtx trial;
6189     int slot ATTRIBUTE_UNUSED;
6190{
6191  rtx pat = PATTERN (trial);
6192
6193  if (get_attr_length (trial) != 1)
6194    return 0;
6195
6196  /* If %g0 is live, there are lots of things we can't handle.
6197     Rather than trying to find them all now, let's punt and only
6198     optimize things as necessary.  */
6199  if (TARGET_LIVE_G0)
6200    return 0;
6201
6202  if (! reg_mentioned_p (stack_pointer_rtx, pat)
6203      && ! reg_mentioned_p (frame_pointer_rtx, pat))
6204    return 1;
6205
6206  return 0;
6207}
6208
6209/* Adjust the cost of a scheduling dependency.  Return the new cost of
6210   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
6211
6212static int
6213supersparc_adjust_cost (insn, link, dep_insn, cost)
6214     rtx insn;
6215     rtx link;
6216     rtx dep_insn;
6217     int cost;
6218{
6219  enum attr_type insn_type;
6220
6221  if (! recog_memoized (insn))
6222    return 0;
6223
6224  insn_type = get_attr_type (insn);
6225
6226  if (REG_NOTE_KIND (link) == 0)
6227    {
6228      /* Data dependency; DEP_INSN writes a register that INSN reads some
6229	 cycles later.  */
6230
6231      /* if a load, then the dependence must be on the memory address;
6232	 add an extra "cycle".  Note that the cost could be two cycles
6233	 if the reg was written late in an instruction group; we ca not tell
6234	 here.  */
6235      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6236	return cost + 3;
6237
6238      /* Get the delay only if the address of the store is the dependence.  */
6239      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6240	{
6241	  rtx pat = PATTERN(insn);
6242	  rtx dep_pat = PATTERN (dep_insn);
6243
6244	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6245	    return cost;  /* This should not happen!  */
6246
6247	  /* The dependency between the two instructions was on the data that
6248	     is being stored.  Assume that this implies that the address of the
6249	     store is not dependent.  */
6250	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6251	    return cost;
6252
6253	  return cost + 3;  /* An approximation.  */
6254	}
6255
6256      /* A shift instruction cannot receive its data from an instruction
6257	 in the same cycle; add a one cycle penalty.  */
6258      if (insn_type == TYPE_SHIFT)
6259	return cost + 3;   /* Split before cascade into shift.  */
6260    }
6261  else
6262    {
6263      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6264	 INSN writes some cycles later.  */
6265
6266      /* These are only significant for the fpu unit; writing a fp reg before
6267         the fpu has finished with it stalls the processor.  */
6268
6269      /* Reusing an integer register causes no problems.  */
6270      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6271	return 0;
6272    }
6273
6274  return cost;
6275}
6276
6277static int
6278hypersparc_adjust_cost (insn, link, dep_insn, cost)
6279     rtx insn;
6280     rtx link;
6281     rtx dep_insn;
6282     int cost;
6283{
6284  enum attr_type insn_type, dep_type;
6285  rtx pat = PATTERN(insn);
6286  rtx dep_pat = PATTERN (dep_insn);
6287
6288  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6289    return cost;
6290
6291  insn_type = get_attr_type (insn);
6292  dep_type = get_attr_type (dep_insn);
6293
6294  switch (REG_NOTE_KIND (link))
6295    {
6296    case 0:
6297      /* Data dependency; DEP_INSN writes a register that INSN reads some
6298	 cycles later.  */
6299
6300      switch (insn_type)
6301	{
6302	case TYPE_STORE:
6303	case TYPE_FPSTORE:
6304	  /* Get the delay iff the address of the store is the dependence. */
6305	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6306	    return cost;
6307
6308	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6309	    return cost;
6310	  return cost + 3;
6311
6312	case TYPE_LOAD:
6313	case TYPE_SLOAD:
6314	case TYPE_FPLOAD:
6315	  /* If a load, then the dependence must be on the memory address.  If
6316	     the addresses aren't equal, then it might be a false dependency */
6317	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6318	    {
6319	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6320		  || GET_CODE (SET_DEST (dep_pat)) != MEM
6321		  || GET_CODE (SET_SRC (pat)) != MEM
6322		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
6323				    XEXP (SET_SRC (pat), 0)))
6324		return cost + 2;
6325
6326	      return cost + 8;
6327	    }
6328	  break;
6329
6330	case TYPE_BRANCH:
6331	  /* Compare to branch latency is 0.  There is no benefit from
6332	     separating compare and branch.  */
6333	  if (dep_type == TYPE_COMPARE)
6334	    return 0;
6335	  /* Floating point compare to branch latency is less than
6336	     compare to conditional move.  */
6337	  if (dep_type == TYPE_FPCMP)
6338	    return cost - 1;
6339	  break;
6340	default:
6341	  break;
6342	}
6343	break;
6344
6345    case REG_DEP_ANTI:
6346      /* Anti-dependencies only penalize the fpu unit. */
6347      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6348        return 0;
6349      break;
6350
6351    default:
6352      break;
6353    }
6354
6355  return cost;
6356}
6357
6358static int
6359ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6360     rtx insn;
6361     rtx link;
6362     rtx dep_insn;
6363     int cost;
6364{
6365  enum attr_type insn_type, dep_type;
6366  rtx pat = PATTERN(insn);
6367  rtx dep_pat = PATTERN (dep_insn);
6368
6369  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6370    return cost;
6371
6372  insn_type = get_attr_type (insn);
6373  dep_type = get_attr_type (dep_insn);
6374
6375  /* Nothing issues in parallel with integer multiplies, so
6376     mark as zero cost since the scheduler can not do anything
6377     about it.  */
6378  if (insn_type == TYPE_IMUL)
6379    return 0;
6380
6381#define SLOW_FP(dep_type) \
6382(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6383
6384  switch (REG_NOTE_KIND (link))
6385    {
6386    case 0:
6387      /* Data dependency; DEP_INSN writes a register that INSN reads some
6388	 cycles later.  */
6389
6390      if (dep_type == TYPE_CMOVE)
6391	{
6392	  /* Instructions that read the result of conditional moves cannot
6393	     be in the same group or the following group.  */
6394	  return cost + 1;
6395	}
6396
6397      switch (insn_type)
6398	{
6399	  /* UltraSPARC can dual issue a store and an instruction setting
6400	     the value stored, except for divide and square root.  */
6401	case TYPE_FPSTORE:
6402	  if (! SLOW_FP (dep_type))
6403	    return 0;
6404	  return cost;
6405
6406	case TYPE_STORE:
6407	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6408	    return cost;
6409
6410	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6411	    /* The dependency between the two instructions is on the data
6412	       that is being stored.  Assume that the address of the store
6413	       is not also dependent.  */
6414	    return 0;
6415	  return cost;
6416
6417	case TYPE_LOAD:
6418	case TYPE_SLOAD:
6419	case TYPE_FPLOAD:
6420	  /* A load does not return data until at least 11 cycles after
6421	     a store to the same location.  3 cycles are accounted for
6422	     in the load latency; add the other 8 here.  */
6423	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6424	    {
6425	      /* If the addresses are not equal this may be a false
6426		 dependency because pointer aliasing could not be
6427		 determined.  Add only 2 cycles in that case.  2 is
6428		 an arbitrary compromise between 8, which would cause
6429		 the scheduler to generate worse code elsewhere to
6430		 compensate for a dependency which might not really
6431		 exist, and 0.  */
6432	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6433		  || GET_CODE (SET_SRC (pat)) != MEM
6434		  || GET_CODE (SET_DEST (dep_pat)) != MEM
6435		  || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
6436				    XEXP (SET_DEST (dep_pat), 0)))
6437		return cost + 2;
6438
6439	      return cost + 8;
6440	    }
6441	  return cost;
6442
6443	case TYPE_BRANCH:
6444	  /* Compare to branch latency is 0.  There is no benefit from
6445	     separating compare and branch.  */
6446	  if (dep_type == TYPE_COMPARE)
6447	    return 0;
6448	  /* Floating point compare to branch latency is less than
6449	     compare to conditional move.  */
6450	  if (dep_type == TYPE_FPCMP)
6451	    return cost - 1;
6452	  return cost;
6453
6454	case TYPE_FPCMOVE:
6455	  /* FMOVR class instructions can not issue in the same cycle
6456	     or the cycle after an instruction which writes any
6457	     integer register.  Model this as cost 2 for dependent
6458	     instructions.  */
6459	  if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
6460	       || dep_type == TYPE_BINARY)
6461	      && cost < 2)
6462	    return 2;
6463	  /* Otherwise check as for integer conditional moves. */
6464
6465	case TYPE_CMOVE:
6466	  /* Conditional moves involving integer registers wait until
6467	     3 cycles after loads return data.  The interlock applies
6468	     to all loads, not just dependent loads, but that is hard
6469	     to model.  */
6470	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
6471	    return cost + 3;
6472	  return cost;
6473
6474	default:
6475	  break;
6476	}
6477      break;
6478
6479    case REG_DEP_ANTI:
6480      /* Divide and square root lock destination registers for full latency. */
6481      if (! SLOW_FP (dep_type))
6482	return 0;
6483      break;
6484
6485    case REG_DEP_OUTPUT:
6486      /* IEU and FPU instruction that have the same destination
6487	 register cannot be grouped together.  */
6488      return cost + 1;
6489
6490    default:
6491      break;
6492    }
6493
6494  /* Other costs not accounted for:
6495     - Single precision floating point loads lock the other half of
6496       the even/odd register pair.
6497     - Several hazards associated with ldd/std are ignored because these
6498       instructions are rarely generated for V9.
6499     - The floating point pipeline can not have both a single and double
6500       precision operation active at the same time.  Format conversions
6501       and graphics instructions are given honorary double precision status.
6502     - call and jmpl are always the first instruction in a group.  */
6503
6504  return cost;
6505
6506#undef SLOW_FP
6507}
6508
6509int
6510sparc_adjust_cost(insn, link, dep, cost)
6511     rtx insn;
6512     rtx link;
6513     rtx dep;
6514     int cost;
6515{
6516  switch (sparc_cpu)
6517    {
6518    case PROCESSOR_SUPERSPARC:
6519      cost = supersparc_adjust_cost (insn, link, dep, cost);
6520      break;
6521    case PROCESSOR_HYPERSPARC:
6522    case PROCESSOR_SPARCLITE86X:
6523      cost = hypersparc_adjust_cost (insn, link, dep, cost);
6524      break;
6525    case PROCESSOR_ULTRASPARC:
6526      cost = ultrasparc_adjust_cost (insn, link, dep, cost);
6527      break;
6528    default:
6529      break;
6530    }
6531  return cost;
6532}
6533
6534/* This describes the state of the UltraSPARC pipeline during
6535   instruction scheduling.  */
6536
6537#define TMASK(__x)	((unsigned)1 << ((int)(__x)))
6538#define UMASK(__x)	((unsigned)1 << ((int)(__x)))
6539
6540enum ultra_code { NONE=0, /* no insn at all				*/
6541		  IEU0,   /* shifts and conditional moves		*/
6542		  IEU1,   /* condition code setting insns, calls+jumps	*/
6543		  IEUN,   /* all other single cycle ieu insns		*/
6544		  LSU,    /* loads and stores				*/
6545		  CTI,    /* branches					*/
6546		  FPM,    /* FPU pipeline 1, multiplies and divides	*/
6547		  FPA,    /* FPU pipeline 2, all other operations	*/
6548		  SINGLE, /* single issue instructions			*/
6549		  NUM_ULTRA_CODES };
6550
6551static const char *ultra_code_names[NUM_ULTRA_CODES] = {
6552  "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6553  "FPM", "FPA", "SINGLE" };
6554
6555struct ultrasparc_pipeline_state {
6556  /* The insns in this group.  */
6557  rtx group[4];
6558
6559  /* The code for each insn.  */
6560  enum ultra_code codes[4];
6561
6562  /* Which insns in this group have been committed by the
6563     scheduler.  This is how we determine how many more
6564     can issue this cycle.  */
6565  char commit[4];
6566
6567  /* How many insns in this group.  */
6568  char group_size;
6569
6570  /* Mask of free slots still in this group.  */
6571  char free_slot_mask;
6572
6573  /* The slotter uses the following to determine what other
6574     insn types can still make their way into this group.  */
6575  char contents [NUM_ULTRA_CODES];
6576  char num_ieu_insns;
6577};
6578
6579#define ULTRA_NUM_HIST	8
6580static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6581static int ultra_cur_hist;
6582static int ultra_cycles_elapsed;
6583
6584#define ultra_pipe	(ultra_pipe_hist[ultra_cur_hist])
6585
6586/* Given TYPE_MASK compute the ultra_code it has.  */
6587static enum ultra_code
6588ultra_code_from_mask (type_mask)
6589     int type_mask;
6590{
6591  if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6592    return IEU0;
6593  else if (type_mask & (TMASK (TYPE_COMPARE) |
6594			TMASK (TYPE_CALL) |
6595			TMASK (TYPE_UNCOND_BRANCH)))
6596    return IEU1;
6597  else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6598			TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6599    return IEUN;
6600  else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6601			TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6602			TMASK (TYPE_FPSTORE)))
6603    return LSU;
6604  else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6605			TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
6606    return FPM;
6607  else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6608			TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6609    return FPA;
6610  else if (type_mask & TMASK (TYPE_BRANCH))
6611    return CTI;
6612
6613  return SINGLE;
6614}
6615
6616/* Check INSN (a conditional move) and make sure that it's
6617   results are available at this cycle.  Return 1 if the
6618   results are in fact ready.  */
6619static int
6620ultra_cmove_results_ready_p (insn)
6621     rtx insn;
6622{
6623  struct ultrasparc_pipeline_state *up;
6624  int entry, slot;
6625
6626  /* If this got dispatched in the previous
6627     group, the results are not ready.  */
6628  entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6629  up = &ultra_pipe_hist[entry];
6630  slot = 4;
6631  while (--slot >= 0)
6632    if (up->group[slot] == insn)
6633      return 0;
6634
6635  return 1;
6636}
6637
6638/* Walk backwards in pipeline history looking for FPU
6639   operations which use a mode different than FPMODE and
6640   will create a stall if an insn using FPMODE were to be
6641   dispatched this cycle.  */
6642static int
6643ultra_fpmode_conflict_exists (fpmode)
6644     enum machine_mode fpmode;
6645{
6646  int hist_ent;
6647  int hist_lim;
6648
6649  hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6650  if (ultra_cycles_elapsed < 4)
6651    hist_lim = ultra_cycles_elapsed;
6652  else
6653    hist_lim = 4;
6654  while (hist_lim > 0)
6655    {
6656      struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6657      int slot = 4;
6658
6659      while (--slot >= 0)
6660	{
6661	  rtx insn = up->group[slot];
6662	  enum machine_mode this_mode;
6663	  rtx pat;
6664
6665	  if (! insn
6666	      || GET_CODE (insn) != INSN
6667	      || (pat = PATTERN (insn)) == 0
6668	      || GET_CODE (pat) != SET)
6669	    continue;
6670
6671	  this_mode = GET_MODE (SET_DEST (pat));
6672	  if ((this_mode != SFmode
6673	       && this_mode != DFmode)
6674	      || this_mode == fpmode)
6675	    continue;
6676
6677	  /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
6678	     we will get a stall.  Loads and stores are independant
6679	     of these rules.  */
6680	  if (GET_CODE (SET_SRC (pat)) != ABS
6681	      && GET_CODE (SET_SRC (pat)) != NEG
6682	      && ((TMASK (get_attr_type (insn)) &
6683		   (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
6684		    TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
6685                    TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
6686	    return 1;
6687	}
6688      hist_lim--;
6689      hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
6690    }
6691
6692  /* No conflicts, safe to dispatch.  */
6693  return 0;
6694}
6695
6696/* Find an instruction in LIST which has one of the
6697   type attributes enumerated in TYPE_MASK.  START
6698   says where to begin the search.
6699
6700   NOTE: This scheme depends upon the fact that we
6701         have less than 32 distinct type attributes.  */
6702
6703static int ultra_types_avail;
6704
6705static rtx *
6706ultra_find_type (type_mask, list, start)
6707     int type_mask;
6708     rtx *list;
6709     int start;
6710{
6711  int i;
6712
6713  /* Short circuit if no such insn exists in the ready
6714     at the moment.  */
6715  if ((type_mask & ultra_types_avail) == 0)
6716    return 0;
6717
6718  for (i = start; i >= 0; i--)
6719    {
6720      rtx insn = list[i];
6721
6722      if (recog_memoized (insn) >= 0
6723	  && (TMASK(get_attr_type (insn)) & type_mask))
6724	{
6725	  enum machine_mode fpmode = SFmode;
6726	  rtx pat = 0;
6727	  int slot;
6728	  int check_depend = 0;
6729	  int check_fpmode_conflict = 0;
6730
6731	  if (GET_CODE (insn) == INSN
6732	      && (pat = PATTERN(insn)) != 0
6733	      && GET_CODE (pat) == SET
6734	      && !(type_mask & (TMASK (TYPE_STORE) |
6735				TMASK (TYPE_FPSTORE))))
6736	    {
6737	      check_depend = 1;
6738	      if (GET_MODE (SET_DEST (pat)) == SFmode
6739		  || GET_MODE (SET_DEST (pat)) == DFmode)
6740		{
6741		  fpmode = GET_MODE (SET_DEST (pat));
6742		  check_fpmode_conflict = 1;
6743		}
6744	    }
6745
6746	  slot = 4;
6747	  while(--slot >= 0)
6748	    {
6749	      rtx slot_insn = ultra_pipe.group[slot];
6750	      rtx slot_pat;
6751
6752	      /* Already issued, bad dependency, or FPU
6753		 mode conflict.  */
6754	      if (slot_insn != 0
6755		  && (slot_pat = PATTERN (slot_insn)) != 0
6756		  && ((insn == slot_insn)
6757		      || (check_depend == 1
6758			  && GET_CODE (slot_insn) == INSN
6759			  && GET_CODE (slot_pat) == SET
6760			  && ((GET_CODE (SET_DEST (slot_pat)) == REG
6761			       && GET_CODE (SET_SRC (pat)) == REG
6762			       && REGNO (SET_DEST (slot_pat)) ==
6763			            REGNO (SET_SRC (pat)))
6764			      || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
6765				  && GET_CODE (SET_SRC (pat)) == SUBREG
6766				  && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
6767				       REGNO (SUBREG_REG (SET_SRC (pat)))
6768				  && SUBREG_WORD (SET_DEST (slot_pat)) ==
6769				       SUBREG_WORD (SET_SRC (pat)))))
6770		      || (check_fpmode_conflict == 1
6771			  && GET_CODE (slot_insn) == INSN
6772			  && GET_CODE (slot_pat) == SET
6773			  && (GET_MODE (SET_DEST (slot_pat)) == SFmode
6774			      || GET_MODE (SET_DEST (slot_pat)) == DFmode)
6775			  && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
6776		goto next;
6777	    }
6778
6779	  /* Check for peculiar result availability and dispatch
6780	     interference situations.  */
6781	  if (pat != 0
6782	      && ultra_cycles_elapsed > 0)
6783	    {
6784	      rtx link;
6785
6786	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6787		{
6788		  rtx link_insn = XEXP (link, 0);
6789		  if (GET_CODE (link_insn) == INSN
6790		      && recog_memoized (link_insn) >= 0
6791		      && (TMASK (get_attr_type (link_insn)) &
6792			  (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
6793		      && ! ultra_cmove_results_ready_p (link_insn))
6794		    goto next;
6795		}
6796
6797	      if (check_fpmode_conflict
6798		  && ultra_fpmode_conflict_exists (fpmode))
6799		goto next;
6800	    }
6801
6802	  return &list[i];
6803	}
6804    next:
6805      ;
6806    }
6807  return 0;
6808}
6809
6810static void
6811ultra_build_types_avail (ready, n_ready)
6812  rtx *ready;
6813  int n_ready;
6814{
6815  int i = n_ready - 1;
6816
6817  ultra_types_avail = 0;
6818  while(i >= 0)
6819    {
6820      rtx insn = ready[i];
6821
6822      if (recog_memoized (insn) >= 0)
6823	ultra_types_avail |= TMASK (get_attr_type (insn));
6824
6825      i -= 1;
6826    }
6827}
6828
6829/* Place insn pointed to my IP into the pipeline.
6830   Make element THIS of READY be that insn if it
6831   is not already.  TYPE indicates the pipeline class
6832   this insn falls into.  */
6833static void
6834ultra_schedule_insn (ip, ready, this, type)
6835     rtx *ip;
6836     rtx *ready;
6837     int this;
6838     enum ultra_code type;
6839{
6840  int pipe_slot;
6841  char mask = ultra_pipe.free_slot_mask;
6842
6843  /* Obtain free slot.  */
6844  for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
6845    if ((mask & (1 << pipe_slot)) != 0)
6846      break;
6847  if (pipe_slot == 4)
6848    abort ();
6849
6850  /* In it goes, and it hasn't been committed yet.  */
6851  ultra_pipe.group[pipe_slot] = *ip;
6852  ultra_pipe.codes[pipe_slot] = type;
6853  ultra_pipe.contents[type] = 1;
6854  if (UMASK (type) &
6855      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6856    ultra_pipe.num_ieu_insns += 1;
6857
6858  ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
6859  ultra_pipe.group_size += 1;
6860  ultra_pipe.commit[pipe_slot] = 0;
6861
6862  /* Update ready list.  */
6863  if (ip != &ready[this])
6864    {
6865      rtx temp = *ip;
6866
6867      *ip = ready[this];
6868      ready[this] = temp;
6869    }
6870}
6871
6872/* Advance to the next pipeline group.  */
6873static void
6874ultra_flush_pipeline ()
6875{
6876  ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
6877  ultra_cycles_elapsed += 1;
6878  bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6879  ultra_pipe.free_slot_mask = 0xf;
6880}
6881
6882static int ultra_reorder_called_this_block;
6883
6884/* Init our data structures for this current block.  */
6885void
6886ultrasparc_sched_init (dump, sched_verbose)
6887     FILE *dump ATTRIBUTE_UNUSED;
6888     int sched_verbose ATTRIBUTE_UNUSED;
6889{
6890  bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
6891  ultra_cur_hist = 0;
6892  ultra_cycles_elapsed = 0;
6893  ultra_reorder_called_this_block = 0;
6894  ultra_pipe.free_slot_mask = 0xf;
6895}
6896
6897/* INSN has been scheduled, update pipeline commit state
6898   and return how many instructions are still to be
6899   scheduled in this group.  */
6900int
6901ultrasparc_variable_issue (insn)
6902     rtx insn;
6903{
6904  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6905  int i, left_to_fire;
6906
6907  left_to_fire = 0;
6908  for (i = 0; i < 4; i++)
6909    {
6910      if (up->group[i] == 0)
6911	continue;
6912
6913      if (up->group[i] == insn)
6914	{
6915	  up->commit[i] = 1;
6916	}
6917      else if (! up->commit[i])
6918	left_to_fire++;
6919    }
6920
6921  return left_to_fire;
6922}
6923
6924/* In actual_hazard_this_instance, we may have yanked some
6925   instructions from the ready list due to conflict cost
6926   adjustments.  If so, and such an insn was in our pipeline
6927   group, remove it and update state.  */
6928static void
6929ultra_rescan_pipeline_state (ready, n_ready)
6930     rtx *ready;
6931     int n_ready;
6932{
6933  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6934  int i;
6935
6936  for (i = 0; i < 4; i++)
6937    {
6938      rtx insn = up->group[i];
6939      int j;
6940
6941      if (! insn)
6942	continue;
6943
6944      /* If it has been committed, then it was removed from
6945	 the ready list because it was actually scheduled,
6946	 and that is not the case we are searching for here.  */
6947      if (up->commit[i] != 0)
6948	continue;
6949
6950      for (j = n_ready - 1; j >= 0; j--)
6951	if (ready[j] == insn)
6952	  break;
6953
6954      /* If we didn't find it, toss it.  */
6955      if (j < 0)
6956	{
6957	  enum ultra_code ucode = up->codes[i];
6958
6959	  up->group[i] = 0;
6960	  up->codes[i] = NONE;
6961	  up->contents[ucode] = 0;
6962	  if (UMASK (ucode) &
6963	      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6964	    up->num_ieu_insns -= 1;
6965
6966	  up->free_slot_mask |= (1 << i);
6967	  up->group_size -= 1;
6968	  up->commit[i] = 0;
6969	}
6970    }
6971}
6972
6973void
6974ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
6975     FILE *dump;
6976     int sched_verbose;
6977     rtx *ready;
6978     int n_ready;
6979{
6980  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6981  int i, this_insn;
6982
6983  /* We get called once unnecessarily per block of insns
6984     scheduled.  */
6985  if (ultra_reorder_called_this_block == 0)
6986    {
6987      ultra_reorder_called_this_block = 1;
6988      return;
6989    }
6990
6991  if (sched_verbose)
6992    {
6993      int n;
6994
6995      fprintf (dump, "\n;;\tUltraSPARC Looking at [");
6996      for (n = n_ready - 1; n >= 0; n--)
6997	{
6998	  rtx insn = ready[n];
6999	  enum ultra_code ucode;
7000
7001	  if (recog_memoized (insn) < 0)
7002	    continue;
7003	  ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
7004	  if (n != 0)
7005	    fprintf (dump, "%s(%d) ",
7006		     ultra_code_names[ucode],
7007		     INSN_UID (insn));
7008	  else
7009	    fprintf (dump, "%s(%d)",
7010		     ultra_code_names[ucode],
7011		     INSN_UID (insn));
7012	}
7013      fprintf (dump, "]\n");
7014    }
7015
7016  this_insn = n_ready - 1;
7017
7018  /* Skip over junk we don't understand.  */
7019  while ((this_insn >= 0)
7020	 && recog_memoized (ready[this_insn]) < 0)
7021    this_insn--;
7022
7023  ultra_build_types_avail (ready, this_insn + 1);
7024
7025  while (this_insn >= 0) {
7026    int old_group_size = up->group_size;
7027
7028    if (up->group_size != 0)
7029      {
7030	int num_committed;
7031
7032	num_committed = (up->commit[0] + up->commit[1] +
7033			 up->commit[2] + up->commit[3]);
7034	/* If nothing has been commited from our group, or all of
7035	   them have.  Clear out the (current cycle's) pipeline
7036	   state and start afresh.  */
7037	if (num_committed == 0
7038	    || num_committed == up->group_size)
7039	  {
7040	    ultra_flush_pipeline ();
7041	    up = &ultra_pipe;
7042	    old_group_size = 0;
7043	  }
7044	else
7045	  {
7046	    /* OK, some ready list insns got requeued and thus removed
7047	       from the ready list.  Account for this fact.  */
7048	    ultra_rescan_pipeline_state (ready, n_ready);
7049
7050	    /* Something "changed", make this look like a newly
7051	       formed group so the code at the end of the loop
7052	       knows that progress was in fact made.  */
7053	    if (up->group_size != old_group_size)
7054	      old_group_size = 0;
7055	  }
7056      }
7057
7058    if (up->group_size == 0)
7059      {
7060	/* If the pipeline is (still) empty and we have any single
7061	   group insns, get them out now as this is a good time.  */
7062	rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
7063				    TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7064				    TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7065				   ready, this_insn);
7066	if (ip)
7067	  {
7068	    ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7069	    break;
7070	  }
7071
7072	/* If we are not in the process of emptying out the pipe, try to
7073	   obtain an instruction which must be the first in it's group.  */
7074	ip = ultra_find_type ((TMASK (TYPE_CALL) |
7075			       TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7076			       TMASK (TYPE_UNCOND_BRANCH)),
7077			      ready, this_insn);
7078	if (ip)
7079	  {
7080	    ultra_schedule_insn (ip, ready, this_insn, IEU1);
7081	    this_insn--;
7082	  }
7083	else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7084					 TMASK (TYPE_FPDIVD) |
7085					 TMASK (TYPE_FPSQRT)),
7086					ready, this_insn)) != 0)
7087	  {
7088	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7089	    this_insn--;
7090	  }
7091      }
7092
7093    /* Try to fill the integer pipeline.  First, look for an IEU0 specific
7094       operation.  We can't do more IEU operations if the first 3 slots are
7095       all full or we have dispatched two IEU insns already.  */
7096    if ((up->free_slot_mask & 0x7) != 0
7097	&& up->num_ieu_insns < 2
7098	&& up->contents[IEU0] == 0
7099	&& up->contents[IEUN] == 0)
7100      {
7101	rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7102	if (ip)
7103	  {
7104	    ultra_schedule_insn (ip, ready, this_insn, IEU0);
7105	    this_insn--;
7106	  }
7107      }
7108
7109    /* If we can, try to find an IEU1 specific or an unnamed
7110       IEU instruction.  */
7111    if ((up->free_slot_mask & 0x7) != 0
7112	&& up->num_ieu_insns < 2)
7113      {
7114	rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7115				    TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
7116				    (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7117				   ready, this_insn);
7118	if (ip)
7119	  {
7120	    rtx insn = *ip;
7121
7122	    ultra_schedule_insn (ip, ready, this_insn,
7123				 (!up->contents[IEU1]
7124				  && get_attr_type (insn) == TYPE_COMPARE)
7125				 ? IEU1 : IEUN);
7126	    this_insn--;
7127	  }
7128      }
7129
7130    /* If only one IEU insn has been found, try to find another unnamed
7131       IEU operation or an IEU1 specific one.  */
7132    if ((up->free_slot_mask & 0x7) != 0
7133	&& up->num_ieu_insns < 2)
7134      {
7135	rtx *ip;
7136	int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7137		     TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
7138
7139	if (!up->contents[IEU1])
7140	  tmask |= TMASK (TYPE_COMPARE);
7141	ip = ultra_find_type (tmask, ready, this_insn);
7142	if (ip)
7143	  {
7144	    rtx insn = *ip;
7145
7146	    ultra_schedule_insn (ip, ready, this_insn,
7147				 (!up->contents[IEU1]
7148				  && get_attr_type (insn) == TYPE_COMPARE)
7149				 ? IEU1 : IEUN);
7150	    this_insn--;
7151	  }
7152      }
7153
7154    /* Try for a load or store, but such an insn can only be issued
7155       if it is within' one of the first 3 slots.  */
7156    if ((up->free_slot_mask & 0x7) != 0
7157        && up->contents[LSU] == 0)
7158      {
7159	rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7160				   TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7161				   TMASK (TYPE_FPSTORE)), ready, this_insn);
7162	if (ip)
7163	  {
7164	    ultra_schedule_insn (ip, ready, this_insn, LSU);
7165	    this_insn--;
7166	  }
7167      }
7168
7169    /* Now find FPU operations, first FPM class.  But not divisions or
7170       square-roots because those will break the group up.  Unlike all
7171       the previous types, these can go in any slot.  */
7172    if (up->free_slot_mask != 0
7173	&& up->contents[FPM] == 0)
7174      {
7175	rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
7176	if (ip)
7177	  {
7178	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7179	    this_insn--;
7180	  }
7181      }
7182
7183    /* Continue on with FPA class if we have not filled the group already.  */
7184    if (up->free_slot_mask != 0
7185	&& up->contents[FPA] == 0)
7186      {
7187	rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7188				    TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
7189				   ready, this_insn);
7190	if (ip)
7191	  {
7192	    ultra_schedule_insn (ip, ready, this_insn, FPA);
7193	    this_insn--;
7194	  }
7195      }
7196
7197    /* Finally, maybe stick a branch in here.  */
7198    if (up->free_slot_mask != 0
7199	&& up->contents[CTI] == 0)
7200      {
7201	rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
7202
7203	/* Try to slip in a branch only if it is one of the
7204	   next 2 in the ready list.  */
7205	if (ip && ((&ready[this_insn] - ip) < 2))
7206	  {
7207	    ultra_schedule_insn (ip, ready, this_insn, CTI);
7208	    this_insn--;
7209	  }
7210      }
7211
7212    up->group_size = 0;
7213    for (i = 0; i < 4; i++)
7214      if ((up->free_slot_mask & (1 << i)) == 0)
7215	up->group_size++;
7216
7217    /* See if we made any progress...  */
7218    if (old_group_size != up->group_size)
7219      break;
7220
7221    /* Clean out the (current cycle's) pipeline state
7222       and try once more.  If we placed no instructions
7223       into the pipeline at all, it means a real hard
7224       conflict exists with some earlier issued instruction
7225       so we must advance to the next cycle to clear it up.  */
7226    if (up->group_size == 0)
7227      {
7228	ultra_flush_pipeline ();
7229	up = &ultra_pipe;
7230      }
7231    else
7232      {
7233	bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7234	ultra_pipe.free_slot_mask = 0xf;
7235      }
7236  }
7237
7238  if (sched_verbose)
7239    {
7240      int n, gsize;
7241
7242      fprintf (dump, ";;\tUltraSPARC Launched   [");
7243      gsize = up->group_size;
7244      for (n = 0; n < 4; n++)
7245	{
7246	  rtx insn = up->group[n];
7247
7248	  if (! insn)
7249	    continue;
7250
7251	  gsize -= 1;
7252	  if (gsize != 0)
7253	    fprintf (dump, "%s(%d) ",
7254		     ultra_code_names[up->codes[n]],
7255		     INSN_UID (insn));
7256	  else
7257	    fprintf (dump, "%s(%d)",
7258		     ultra_code_names[up->codes[n]],
7259		     INSN_UID (insn));
7260	}
7261      fprintf (dump, "]\n");
7262    }
7263}
7264
7265int
7266sparc_issue_rate ()
7267{
7268  switch (sparc_cpu)
7269    {
7270    default:
7271      return 1;
7272    case PROCESSOR_V9:
7273      /* Assume V9 processors are capable of at least dual-issue.  */
7274      return 2;
7275    case PROCESSOR_SUPERSPARC:
7276      return 3;
7277    case PROCESSOR_HYPERSPARC:
7278    case PROCESSOR_SPARCLITE86X:
7279      return 2;
7280    case PROCESSOR_ULTRASPARC:
7281      return 4;
7282    }
7283}
7284
7285static int
7286set_extends(x, insn)
7287     rtx x, insn;
7288{
7289  register rtx pat = PATTERN (insn);
7290
7291  switch (GET_CODE (SET_SRC (pat)))
7292    {
7293      /* Load and some shift instructions zero extend. */
7294    case MEM:
7295    case ZERO_EXTEND:
7296      /* sethi clears the high bits */
7297    case HIGH:
7298      /* LO_SUM is used with sethi.  sethi cleared the high
7299	 bits and the values used with lo_sum are positive */
7300    case LO_SUM:
7301      /* Store flag stores 0 or 1 */
7302    case LT: case LTU:
7303    case GT: case GTU:
7304    case LE: case LEU:
7305    case GE: case GEU:
7306    case EQ:
7307    case NE:
7308      return 1;
7309    case AND:
7310      {
7311	rtx op1 = XEXP (SET_SRC (pat), 1);
7312	if (GET_CODE (op1) == CONST_INT)
7313	  return INTVAL (op1) >= 0;
7314	if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7315	    && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7316	  return 1;
7317	if (GET_CODE (op1) == REG
7318	    && sparc_check_64 ((op1), insn) == 1)
7319	  return 1;
7320      }
7321    case ASHIFT:
7322    case LSHIFTRT:
7323      return GET_MODE (SET_SRC (pat)) == SImode;
7324      /* Positive integers leave the high bits zero. */
7325    case CONST_DOUBLE:
7326      return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7327    case CONST_INT:
7328      return ! (INTVAL (x) & 0x80000000);
7329    case ASHIFTRT:
7330    case SIGN_EXTEND:
7331      return - (GET_MODE (SET_SRC (pat)) == SImode);
7332    default:
7333      return 0;
7334    }
7335}
7336
7337/* We _ought_ to have only one kind per function, but... */
7338static rtx sparc_addr_diff_list;
7339static rtx sparc_addr_list;
7340
7341void
7342sparc_defer_case_vector (lab, vec, diff)
7343     rtx lab, vec;
7344     int diff;
7345{
7346  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7347  if (diff)
7348    sparc_addr_diff_list
7349      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7350  else
7351    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7352}
7353
7354static void
7355sparc_output_addr_vec (vec)
7356     rtx vec;
7357{
7358  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7359  int idx, vlen = XVECLEN (body, 0);
7360
7361#ifdef ASM_OUTPUT_ADDR_VEC_START
7362  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7363#endif
7364
7365#ifdef ASM_OUTPUT_CASE_LABEL
7366  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7367			 NEXT_INSN (lab));
7368#else
7369  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7370#endif
7371
7372  for (idx = 0; idx < vlen; idx++)
7373    {
7374      ASM_OUTPUT_ADDR_VEC_ELT
7375	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7376    }
7377
7378#ifdef ASM_OUTPUT_ADDR_VEC_END
7379  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7380#endif
7381}
7382
7383static void
7384sparc_output_addr_diff_vec (vec)
7385     rtx vec;
7386{
7387  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7388  rtx base = XEXP (XEXP (body, 0), 0);
7389  int idx, vlen = XVECLEN (body, 1);
7390
7391#ifdef ASM_OUTPUT_ADDR_VEC_START
7392  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7393#endif
7394
7395#ifdef ASM_OUTPUT_CASE_LABEL
7396  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7397			 NEXT_INSN (lab));
7398#else
7399  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7400#endif
7401
7402  for (idx = 0; idx < vlen; idx++)
7403    {
7404      ASM_OUTPUT_ADDR_DIFF_ELT
7405        (asm_out_file,
7406         body,
7407         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7408         CODE_LABEL_NUMBER (base));
7409    }
7410
7411#ifdef ASM_OUTPUT_ADDR_VEC_END
7412  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7413#endif
7414}
7415
7416static void
7417sparc_output_deferred_case_vectors ()
7418{
7419  rtx t;
7420  int align;
7421
7422  if (sparc_addr_list == NULL_RTX
7423      && sparc_addr_diff_list == NULL_RTX)
7424    return;
7425
7426  /* Align to cache line in the function's code section.  */
7427  function_section (current_function_decl);
7428
7429  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7430  if (align > 0)
7431    ASM_OUTPUT_ALIGN (asm_out_file, align);
7432
7433  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7434    sparc_output_addr_vec (XEXP (t, 0));
7435  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7436    sparc_output_addr_diff_vec (XEXP (t, 0));
7437
7438  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7439}
7440
7441/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7442   unknown.  Return 1 if the high bits are zero, -1 if the register is
7443   sign extended.  */
7444int
7445sparc_check_64 (x, insn)
7446     rtx x, insn;
7447{
7448  /* If a register is set only once it is safe to ignore insns this
7449     code does not know how to handle.  The loop will either recognize
7450     the single set and return the correct value or fail to recognize
7451     it and return 0.  */
7452  int set_once = 0;
7453
7454  if (GET_CODE (x) == REG
7455      && flag_expensive_optimizations
7456      && REG_N_SETS (REGNO (x)) == 1)
7457    set_once = 1;
7458
7459  if (insn == 0)
7460    {
7461      if (set_once)
7462	insn = get_last_insn_anywhere ();
7463      else
7464	return 0;
7465    }
7466
7467  while ((insn = PREV_INSN (insn)))
7468    {
7469      switch (GET_CODE (insn))
7470	{
7471	case JUMP_INSN:
7472	case NOTE:
7473	  break;
7474	case CODE_LABEL:
7475	case CALL_INSN:
7476	default:
7477	  if (! set_once)
7478	    return 0;
7479	  break;
7480	case INSN:
7481	  {
7482	    rtx pat = PATTERN (insn);
7483	    if (GET_CODE (pat) != SET)
7484	      return 0;
7485	    if (rtx_equal_p (x, SET_DEST (pat)))
7486	      return set_extends (x, insn);
7487	    if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7488	      return 0;
7489	  }
7490	}
7491    }
7492  return 0;
7493}
7494
7495char *
7496sparc_v8plus_shift (operands, insn, opcode)
7497     rtx *operands;
7498     rtx insn;
7499     char *opcode;
7500{
7501  static char asm_code[60];
7502
7503  if (GET_CODE (operands[3]) == SCRATCH)
7504    operands[3] = operands[0];
7505  if (GET_CODE (operands[1]) == CONST_INT)
7506    {
7507      output_asm_insn ("mov %1,%3", operands);
7508    }
7509  else
7510    {
7511      output_asm_insn ("sllx %H1,32,%3", operands);
7512      if (sparc_check_64 (operands[1], insn) <= 0)
7513	output_asm_insn ("srl %L1,0,%L1", operands);
7514      output_asm_insn ("or %L1,%3,%3", operands);
7515    }
7516
7517  strcpy(asm_code, opcode);
7518  if (which_alternative != 2)
7519    return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7520  else
7521    return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7522}
7523
7524
7525/* Return 1 if DEST and SRC reference only global and in registers. */
7526
7527int
7528sparc_return_peephole_ok (dest, src)
7529     rtx dest, src;
7530{
7531  if (! TARGET_V9)
7532    return 0;
7533  if (current_function_uses_only_leaf_regs)
7534    return 0;
7535  if (GET_CODE (src) != CONST_INT
7536      && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7537    return 0;
7538  return IN_OR_GLOBAL_P (dest);
7539}
7540
7541/* Output assembler code to FILE to increment profiler label # LABELNO
7542   for profiling a function entry.
7543
7544   32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
7545   during profiling so we need to save/restore it around the call to mcount.
7546   We're guaranteed that a save has just been done, and we use the space
7547   allocated for intreg/fpreg value passing.  */
7548
7549void
7550sparc_function_profiler (file, labelno)
7551     FILE *file;
7552     int labelno;
7553{
7554  char buf[32];
7555  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7556
7557  if (! TARGET_ARCH64)
7558    fputs ("\tst\t%g2,[%fp-4]\n", file);
7559
7560  fputs ("\tsethi\t%hi(", file);
7561  assemble_name (file, buf);
7562  fputs ("),%o0\n", file);
7563
7564  fputs ("\tcall\t", file);
7565  assemble_name (file, MCOUNT_FUNCTION);
7566  putc ('\n', file);
7567
7568  fputs ("\t or\t%o0,%lo(", file);
7569  assemble_name (file, buf);
7570  fputs ("),%o0\n", file);
7571
7572  if (! TARGET_ARCH64)
7573    fputs ("\tld\t[%fp-4],%g2\n", file);
7574}
7575
7576
7577/* The following macro shall output assembler code to FILE
7578   to initialize basic-block profiling.
7579
7580   If profile_block_flag == 2
7581
7582	Output code to call the subroutine `__bb_init_trace_func'
7583	and pass two parameters to it. The first parameter is
7584	the address of a block allocated in the object module.
7585	The second parameter is the number of the first basic block
7586	of the function.
7587
7588	The name of the block is a local symbol made with this statement:
7589
7590	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7591
7592	Of course, since you are writing the definition of
7593	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7594	can take a short cut in the definition of this macro and use the
7595	name that you know will result.
7596
7597	The number of the first basic block of the function is
7598	passed to the macro in BLOCK_OR_LABEL.
7599
7600	If described in a virtual assembler language the code to be
7601	output looks like:
7602
7603		parameter1 <- LPBX0
7604		parameter2 <- BLOCK_OR_LABEL
7605		call __bb_init_trace_func
7606
7607    else if profile_block_flag != 0
7608
7609	Output code to call the subroutine `__bb_init_func'
7610	and pass one single parameter to it, which is the same
7611	as the first parameter to `__bb_init_trace_func'.
7612
7613	The first word of this parameter is a flag which will be nonzero if
7614	the object module has already been initialized.  So test this word
7615	first, and do not call `__bb_init_func' if the flag is nonzero.
7616	Note: When profile_block_flag == 2 the test need not be done
7617	but `__bb_init_trace_func' *must* be called.
7618
7619	BLOCK_OR_LABEL may be used to generate a label number as a
7620	branch destination in case `__bb_init_func' will not be called.
7621
7622	If described in a virtual assembler language the code to be
7623	output looks like:
7624
7625		cmp (LPBX0),0
7626		jne local_label
7627		parameter1 <- LPBX0
7628		call __bb_init_func
7629	    local_label:
7630
7631*/
7632
7633void
7634sparc_function_block_profiler(file, block_or_label)
7635     FILE *file;
7636     int block_or_label;
7637{
7638  char LPBX[32];
7639  ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7640
7641  if (profile_block_flag == 2)
7642    {
7643      fputs ("\tsethi\t%hi(", file);
7644      assemble_name (file, LPBX);
7645      fputs ("),%o0\n", file);
7646
7647      fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
7648
7649      fputs ("\tor\t%o0,%lo(", file);
7650      assemble_name (file, LPBX);
7651      fputs ("),%o0\n", file);
7652
7653      fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
7654
7655      fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
7656    }
7657  else if (profile_block_flag != 0)
7658    {
7659      char LPBY[32];
7660      ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
7661
7662      fputs ("\tsethi\t%hi(", file);
7663      assemble_name (file, LPBX);
7664      fputs ("),%o0\n", file);
7665
7666      fputs ("\tld\t[%lo(", file);
7667      assemble_name (file, LPBX);
7668      fputs (")+%o0],%o1\n", file);
7669
7670      fputs ("\ttst\t%o1\n", file);
7671
7672      if (TARGET_V9)
7673	{
7674	  fputs ("\tbne,pn\t%icc,", file);
7675	  assemble_name (file, LPBY);
7676	  putc ('\n', file);
7677	}
7678      else
7679	{
7680	  fputs ("\tbne\t", file);
7681	  assemble_name (file, LPBY);
7682	  putc ('\n', file);
7683	}
7684
7685      fputs ("\t or\t%o0,%lo(", file);
7686      assemble_name (file, LPBX);
7687      fputs ("),%o0\n", file);
7688
7689      fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
7690
7691      ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
7692    }
7693}
7694
7695/* The following macro shall output assembler code to FILE
7696   to increment a counter associated with basic block number BLOCKNO.
7697
7698   If profile_block_flag == 2
7699
7700	Output code to initialize the global structure `__bb' and
7701	call the function `__bb_trace_func' which will increment the
7702	counter.
7703
7704	`__bb' consists of two words. In the first word the number
7705	of the basic block has to be stored. In the second word
7706	the address of a block allocated in the object module
7707	has to be stored.
7708
7709	The basic block number is given by BLOCKNO.
7710
7711	The address of the block is given by the label created with
7712
7713	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7714
7715	by FUNCTION_BLOCK_PROFILER.
7716
7717	Of course, since you are writing the definition of
7718	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7719	can take a short cut in the definition of this macro and use the
7720	name that you know will result.
7721
7722	If described in a virtual assembler language the code to be
7723	output looks like:
7724
7725		move BLOCKNO -> (__bb)
7726		move LPBX0 -> (__bb+4)
7727		call __bb_trace_func
7728
7729	Note that function `__bb_trace_func' must not change the
7730	machine state, especially the flag register. To grant
7731	this, you must output code to save and restore registers
7732	either in this macro or in the macros MACHINE_STATE_SAVE
7733	and MACHINE_STATE_RESTORE. The last two macros will be
7734	used in the function `__bb_trace_func', so you must make
7735	sure that the function prologue does not change any
7736	register prior to saving it with MACHINE_STATE_SAVE.
7737
7738   else if profile_block_flag != 0
7739
7740	Output code to increment the counter directly.
7741	Basic blocks are numbered separately from zero within each
7742	compiled object module. The count associated with block number
7743	BLOCKNO is at index BLOCKNO in an array of words; the name of
7744	this array is a local symbol made with this statement:
7745
7746	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
7747
7748	Of course, since you are writing the definition of
7749	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7750	can take a short cut in the definition of this macro and use the
7751	name that you know will result.
7752
7753	If described in a virtual assembler language, the code to be
7754	output looks like:
7755
7756		inc (LPBX2+4*BLOCKNO)
7757
7758*/
7759
7760void
7761sparc_block_profiler(file, blockno)
7762     FILE *file;
7763     int blockno;
7764{
7765  char LPBX[32];
7766
7767  if (profile_block_flag == 2)
7768    {
7769      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7770
7771      fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
7772      fprintf (file, "\tsethi\t%%hi(%d),%%g2\n", blockno);
7773      fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
7774      fprintf (file, "\tor\t%%g2,%%lo(%d),%%g2\n", blockno);
7775
7776      fputs ("\tst\t%g2,[%g1]\n", file);
7777
7778      fputs ("\tsethi\t%hi(", file);
7779      assemble_name (file, LPBX);
7780      fputs ("),%g2\n", file);
7781
7782      fputs ("\tor\t%o2,%lo(", file);
7783      assemble_name (file, LPBX);
7784      fputs ("),%g2\n", file);
7785
7786      fputs ("\tst\t%g2,[%g1+4]\n", file);
7787      fputs ("\tmov\t%o7,%g2\n", file);
7788
7789      fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
7790
7791      fputs ("\tmov\t%g2,%o7\n", file);
7792    }
7793  else if (profile_block_flag != 0)
7794    {
7795      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
7796
7797      fputs ("\tsethi\t%hi(", file);
7798      assemble_name (file, LPBX);
7799      fprintf (file, "+%d),%%g1\n", blockno*4);
7800
7801      fputs ("\tld\t[%g1+%lo(", file);
7802      assemble_name (file, LPBX);
7803      fprintf (file, "+%d)],%%g2\n", blockno*4);
7804
7805      fputs ("\tadd\t%g2,1,%g2\n", file);
7806
7807      fputs ("\tst\t%g2,[%g1+%lo(", file);
7808      assemble_name (file, LPBX);
7809      fprintf (file, "+%d)]\n", blockno*4);
7810    }
7811}
7812
7813/* The following macro shall output assembler code to FILE
7814   to indicate a return from function during basic-block profiling.
7815
7816   If profile_block_flag == 2:
7817
7818	Output assembler code to call function `__bb_trace_ret'.
7819
7820	Note that function `__bb_trace_ret' must not change the
7821	machine state, especially the flag register. To grant
7822	this, you must output code to save and restore registers
7823	either in this macro or in the macros MACHINE_STATE_SAVE_RET
7824	and MACHINE_STATE_RESTORE_RET. The last two macros will be
7825	used in the function `__bb_trace_ret', so you must make
7826	sure that the function prologue does not change any
7827	register prior to saving it with MACHINE_STATE_SAVE_RET.
7828
7829   else if profile_block_flag != 0:
7830
7831	The macro will not be used, so it need not distinguish
7832	these cases.
7833*/
7834
7835void
7836sparc_function_block_profiler_exit(file)
7837     FILE *file;
7838{
7839  if (profile_block_flag == 2)
7840    fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
7841  else
7842    abort ();
7843}
7844