sparc.c revision 70635
1/* Subroutines for insn-output.c for Sun SPARC.
2   Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
3   Contributed by Michael Tiemann (tiemann@cygnus.com)
4   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5   at Cygnus Support.
6
7This file is part of GNU CC.
8
9GNU CC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GNU CC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GNU CC; see the file COPYING.  If not, write to
21the Free Software Foundation, 59 Temple Place - Suite 330,
22Boston, MA 02111-1307, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "tree.h"
27#include "rtl.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "expr.h"
38#include "recog.h"
39#include "toplev.h"
40
41/* 1 if the caller has placed an "unimp" insn immediately after the call.
42   This is used in v8 code when calling a function that returns a structure.
43   v9 doesn't have this.  Be careful to have this test be the same as that
44   used on the call.  */
45
46#define SKIP_CALLERS_UNIMP_P  \
47(!TARGET_ARCH64 && current_function_returns_struct			\
48 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
49 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))	\
50     == INTEGER_CST))
51
52/* Global variables for machine-dependent things.  */
53
54/* Size of frame.  Need to know this to emit return insns from leaf procedures.
55   ACTUAL_FSIZE is set by compute_frame_size() which is called during the
56   reload pass.  This is important as the value is later used in insn
57   scheduling (to see what can go in a delay slot).
58   APPARENT_FSIZE is the size of the stack less the register save area and less
59   the outgoing argument area.  It is used when saving call preserved regs.  */
60static int apparent_fsize;
61static int actual_fsize;
62
63/* Save the operands last given to a compare for use when we
64   generate a scc or bcc insn.  */
65
66rtx sparc_compare_op0, sparc_compare_op1;
67
68/* We may need an epilogue if we spill too many registers.
69   If this is non-zero, then we branch here for the epilogue.  */
70static rtx leaf_label;
71
72#ifdef LEAF_REGISTERS
73
74/* Vector to say how input registers are mapped to output
75   registers.  FRAME_POINTER_REGNUM cannot be remapped by
76   this function to eliminate it.  You must use -fomit-frame-pointer
77   to get that.  */
78char leaf_reg_remap[] =
79{ 0, 1, 2, 3, 4, 5, 6, 7,
80  -1, -1, -1, -1, -1, -1, 14, -1,
81  -1, -1, -1, -1, -1, -1, -1, -1,
82  8, 9, 10, 11, 12, 13, -1, 15,
83
84  32, 33, 34, 35, 36, 37, 38, 39,
85  40, 41, 42, 43, 44, 45, 46, 47,
86  48, 49, 50, 51, 52, 53, 54, 55,
87  56, 57, 58, 59, 60, 61, 62, 63,
88  64, 65, 66, 67, 68, 69, 70, 71,
89  72, 73, 74, 75, 76, 77, 78, 79,
90  80, 81, 82, 83, 84, 85, 86, 87,
91  88, 89, 90, 91, 92, 93, 94, 95,
92  96, 97, 98, 99, 100};
93
94#endif
95
96/* Name of where we pretend to think the frame pointer points.
97   Normally, this is "%fp", but if we are in a leaf procedure,
98   this is "%sp+something".  We record "something" separately as it may be
99   too big for reg+constant addressing.  */
100
101static const char *frame_base_name;
102static int frame_base_offset;
103
104static rtx pic_setup_code	PROTO((void));
105static void sparc_init_modes	PROTO((void));
106static int save_regs		PROTO((FILE *, int, int, const char *,
107				       int, int, int));
108static int restore_regs		PROTO((FILE *, int, int, const char *, int, int));
109static void build_big_number	PROTO((FILE *, int, const char *));
110static int function_arg_slotno	PROTO((const CUMULATIVE_ARGS *,
111				       enum machine_mode, tree, int, int,
112				       int *, int *));
113
114static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
115static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
116static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
117
118static void sparc_output_addr_vec PROTO((rtx));
119static void sparc_output_addr_diff_vec PROTO((rtx));
120static void sparc_output_deferred_case_vectors PROTO((void));
121
122
123#ifdef DWARF2_DEBUGGING_INFO
124extern char *dwarf2out_cfi_label ();
125#endif
126
127/* Option handling.  */
128
129/* Code model option as passed by user.  */
130const char *sparc_cmodel_string;
131/* Parsed value.  */
132enum cmodel sparc_cmodel;
133
134/* Record alignment options as passed by user.  */
135const char *sparc_align_loops_string;
136const char *sparc_align_jumps_string;
137const char *sparc_align_funcs_string;
138
139/* Parsed values, as a power of two.  */
140int sparc_align_loops;
141int sparc_align_jumps;
142int sparc_align_funcs;
143
144struct sparc_cpu_select sparc_select[] =
145{
146  /* switch	name,		tune	arch */
147  { (char *)0,	"default",	1,	1 },
148  { (char *)0,	"-mcpu=",	1,	1 },
149  { (char *)0,	"-mtune=",	1,	0 },
150  { 0, 0, 0, 0 }
151};
152
153/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
154enum processor_type sparc_cpu;
155
156/* Validate and override various options, and do some machine dependent
157   initialization.  */
158
159void
160sparc_override_options ()
161{
162  static struct code_model {
163    const char *name;
164    int value;
165  } cmodels[] = {
166    { "32", CM_32 },
167    { "medlow", CM_MEDLOW },
168    { "medmid", CM_MEDMID },
169    { "medany", CM_MEDANY },
170    { "embmedany", CM_EMBMEDANY },
171    { 0, 0 }
172  };
173  struct code_model *cmodel;
174  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
175  static struct cpu_default {
176    int cpu;
177    const char *name;
178  } cpu_default[] = {
179    /* There must be one entry here for each TARGET_CPU value.  */
180    { TARGET_CPU_sparc, "cypress" },
181    { TARGET_CPU_sparclet, "tsc701" },
182    { TARGET_CPU_sparclite, "f930" },
183    { TARGET_CPU_v8, "v8" },
184    { TARGET_CPU_hypersparc, "hypersparc" },
185    { TARGET_CPU_sparclite86x, "sparclite86x" },
186    { TARGET_CPU_supersparc, "supersparc" },
187    { TARGET_CPU_v9, "v9" },
188    { TARGET_CPU_ultrasparc, "ultrasparc" },
189    { 0, 0 }
190  };
191  struct cpu_default *def;
192  /* Table of values for -m{cpu,tune}=.  */
193  static struct cpu_table {
194    const char *name;
195    enum processor_type processor;
196    int disable;
197    int enable;
198  } cpu_table[] = {
199    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
200    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
201    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
202    /* TI TMS390Z55 supersparc */
203    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
204    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
205    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
206       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
207    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
208    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
209    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
210    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, MASK_V8 },
211    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
212    /* TEMIC sparclet */
213    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
214    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
215    /* TI ultrasparc */
216    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
217    { 0, 0, 0, 0 }
218  };
219  struct cpu_table *cpu;
220  struct sparc_cpu_select *sel;
221  int fpu;
222
223#ifndef SPARC_BI_ARCH
224  /* Check for unsupported architecture size.  */
225  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
226    {
227      error ("%s is not supported by this configuration",
228	     DEFAULT_ARCH32_P ? "-m64" : "-m32");
229    }
230#endif
231
232  /* At the moment we don't allow different pointer size and architecture */
233  if (! TARGET_64BIT != ! TARGET_PTR64)
234    {
235      error ("-mptr%d not allowed on -m%d",
236      	     TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
237      if (TARGET_64BIT)
238    	target_flags |= MASK_PTR64;
239      else
240        target_flags &= ~MASK_PTR64;
241    }
242
243  /* Code model selection.  */
244  sparc_cmodel = SPARC_DEFAULT_CMODEL;
245
246#ifdef SPARC_BI_ARCH
247  if (TARGET_ARCH32)
248    sparc_cmodel = CM_32;
249#endif
250
251  if (sparc_cmodel_string != NULL)
252    {
253      if (TARGET_ARCH64)
254	{
255	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
256	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
257	      break;
258	  if (cmodel->name == NULL)
259	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
260	  else
261	    sparc_cmodel = cmodel->value;
262	}
263      else
264	error ("-mcmodel= is not supported on 32 bit systems");
265    }
266
267  fpu = TARGET_FPU; /* save current -mfpu status */
268
269  /* Set the default CPU.  */
270  for (def = &cpu_default[0]; def->name; ++def)
271    if (def->cpu == TARGET_CPU_DEFAULT)
272      break;
273  if (! def->name)
274    abort ();
275  sparc_select[0].string = def->name;
276
277  for (sel = &sparc_select[0]; sel->name; ++sel)
278    {
279      if (sel->string)
280	{
281	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
282	    if (! strcmp (sel->string, cpu->name))
283	      {
284		if (sel->set_tune_p)
285		  sparc_cpu = cpu->processor;
286
287		if (sel->set_arch_p)
288		  {
289		    target_flags &= ~cpu->disable;
290		    target_flags |= cpu->enable;
291		  }
292		break;
293	      }
294
295	  if (! cpu->name)
296	    error ("bad value (%s) for %s switch", sel->string, sel->name);
297	}
298    }
299
300  /* If -mfpu or -mno-fpu was explicitly used, don't override with
301     the processor default.  */
302  if (TARGET_FPU_SET)
303    target_flags = (target_flags & ~MASK_FPU) | fpu;
304
305  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
306  if (TARGET_V9 && TARGET_ARCH32)
307    target_flags |= MASK_DEPRECATED_V8_INSNS;
308
309  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
310  if (! TARGET_V9 || TARGET_ARCH64)
311    target_flags &= ~MASK_V8PLUS;
312
313  /* Don't use stack biasing in 32 bit mode.  */
314  if (TARGET_ARCH32)
315    target_flags &= ~MASK_STACK_BIAS;
316
317  /* Don't allow -mvis if FPU is disabled.  */
318  if (! TARGET_FPU)
319    target_flags &= ~MASK_VIS;
320
321  /* Validate -malign-loops= value, or provide default.  */
322  if (sparc_align_loops_string)
323    {
324      sparc_align_loops = exact_log2 (atoi (sparc_align_loops_string));
325      if (sparc_align_loops < 2 || sparc_align_loops > 7)
326	fatal ("-malign-loops=%s is not between 4 and 128 or is not a power of two",
327	       sparc_align_loops_string);
328    }
329  else
330    {
331      /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
332	 its 0.  This sounds a bit kludgey.  */
333      sparc_align_loops = 0;
334    }
335
336  /* Validate -malign-jumps= value, or provide default.  */
337  if (sparc_align_jumps_string)
338    {
339      sparc_align_jumps = exact_log2 (atoi (sparc_align_jumps_string));
340      if (sparc_align_jumps < 2 || sparc_align_loops > 7)
341	fatal ("-malign-jumps=%s is not between 4 and 128 or is not a power of two",
342	       sparc_align_jumps_string);
343    }
344  else
345    {
346      /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
347	 its 0.  This sounds a bit kludgey.  */
348      sparc_align_jumps = 0;
349    }
350
351  /* Validate -malign-functions= value, or provide default. */
352  if (sparc_align_funcs_string)
353    {
354      sparc_align_funcs = exact_log2 (atoi (sparc_align_funcs_string));
355      if (sparc_align_funcs < 2 || sparc_align_loops > 7)
356	fatal ("-malign-functions=%s is not between 4 and 128 or is not a power of two",
357	       sparc_align_funcs_string);
358    }
359  else
360    sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
361
362  /* Validate PCC_STRUCT_RETURN.  */
363  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
364    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
365
366  /* Do various machine dependent initializations.  */
367  sparc_init_modes ();
368
369  if ((profile_flag || profile_block_flag)
370      && sparc_cmodel != CM_MEDLOW)
371    {
372      error ("profiling does not support code models other than medlow");
373    }
374}
375
376/* Miscellaneous utilities.  */
377
378/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
379   or branch on register contents instructions.  */
380
381int
382v9_regcmp_p (code)
383     enum rtx_code code;
384{
385  return (code == EQ || code == NE || code == GE || code == LT
386	  || code == LE || code == GT);
387}
388
389
390/* Operand constraints.  */
391
392/* Return non-zero only if OP is a register of mode MODE,
393   or const0_rtx.  Don't allow const0_rtx if TARGET_LIVE_G0 because
394   %g0 may contain anything.  */
395
396int
397reg_or_0_operand (op, mode)
398     rtx op;
399     enum machine_mode mode;
400{
401  if (register_operand (op, mode))
402    return 1;
403  if (TARGET_LIVE_G0)
404    return 0;
405  if (op == const0_rtx)
406    return 1;
407  if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
408      && CONST_DOUBLE_HIGH (op) == 0
409      && CONST_DOUBLE_LOW (op) == 0)
410    return 1;
411  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
412      && GET_CODE (op) == CONST_DOUBLE
413      && fp_zero_operand (op))
414    return 1;
415  return 0;
416}
417
418/* Nonzero if OP is a floating point value with value 0.0.  */
419
420int
421fp_zero_operand (op)
422     rtx op;
423{
424  REAL_VALUE_TYPE r;
425
426  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
427  return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
428}
429
430/* Nonzero if OP is an integer register.  */
431
432int
433intreg_operand (op, mode)
434     rtx op;
435     enum machine_mode mode ATTRIBUTE_UNUSED;
436{
437  return (register_operand (op, SImode)
438	  || (TARGET_ARCH64 && register_operand (op, DImode)));
439}
440
441/* Nonzero if OP is a floating point condition code register.  */
442
443int
444fcc_reg_operand (op, mode)
445     rtx op;
446     enum machine_mode mode;
447{
448  /* This can happen when recog is called from combine.  Op may be a MEM.
449     Fail instead of calling abort in this case.  */
450  if (GET_CODE (op) != REG)
451    return 0;
452
453  if (mode != VOIDmode && mode != GET_MODE (op))
454    return 0;
455  if (mode == VOIDmode
456      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
457    return 0;
458
459#if 0	/* ??? ==> 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
460  if (reg_renumber == 0)
461    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
462  return REGNO_OK_FOR_CCFP_P (REGNO (op));
463#else
464  return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
465#endif
466}
467
468/* Nonzero if OP is an integer or floating point condition code register.  */
469
470int
471icc_or_fcc_reg_operand (op, mode)
472     rtx op;
473     enum machine_mode mode;
474{
475  if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
476    {
477      if (mode != VOIDmode && mode != GET_MODE (op))
478	return 0;
479      if (mode == VOIDmode
480	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
481	return 0;
482      return 1;
483    }
484
485  return fcc_reg_operand (op, mode);
486}
487
488/* Nonzero if OP can appear as the dest of a RESTORE insn.  */
489int
490restore_operand (op, mode)
491     rtx op;
492     enum machine_mode mode;
493{
494  return (GET_CODE (op) == REG && GET_MODE (op) == mode
495	  && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
496}
497
498/* Call insn on SPARC can take a PC-relative constant address, or any regular
499   memory address.  */
500
501int
502call_operand (op, mode)
503     rtx op;
504     enum machine_mode mode;
505{
506  if (GET_CODE (op) != MEM)
507    abort ();
508  op = XEXP (op, 0);
509  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
510}
511
512int
513call_operand_address (op, mode)
514     rtx op;
515     enum machine_mode mode;
516{
517  return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
518}
519
520/* Returns 1 if OP is either a symbol reference or a sum of a symbol
521   reference and a constant.  */
522
523int
524symbolic_operand (op, mode)
525     register rtx op;
526     enum machine_mode mode;
527{
528  switch (GET_CODE (op))
529    {
530    case SYMBOL_REF:
531    case LABEL_REF:
532      return 1;
533
534    case CONST:
535      op = XEXP (op, 0);
536      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
537	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
538	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
539
540      /* ??? This clause seems to be irrelevant.  */
541    case CONST_DOUBLE:
542      return GET_MODE (op) == mode;
543
544    default:
545      return 0;
546    }
547}
548
549/* Return truth value of statement that OP is a symbolic memory
550   operand of mode MODE.  */
551
552int
553symbolic_memory_operand (op, mode)
554     rtx op;
555     enum machine_mode mode ATTRIBUTE_UNUSED;
556{
557  if (GET_CODE (op) == SUBREG)
558    op = SUBREG_REG (op);
559  if (GET_CODE (op) != MEM)
560    return 0;
561  op = XEXP (op, 0);
562  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
563	  || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
564}
565
566/* Return truth value of statement that OP is a LABEL_REF of mode MODE.  */
567
568int
569label_ref_operand (op, mode)
570     rtx op;
571     enum machine_mode mode;
572{
573  if (GET_CODE (op) != LABEL_REF)
574    return 0;
575  if (GET_MODE (op) != mode)
576    return 0;
577  return 1;
578}
579
580/* Return 1 if the operand is an argument used in generating pic references
581   in either the medium/low or medium/anywhere code models of sparc64.  */
582
583int
584sp64_medium_pic_operand (op, mode)
585     rtx op;
586     enum machine_mode mode ATTRIBUTE_UNUSED;
587{
588  /* Check for (const (minus (symbol_ref:GOT)
589                             (const (minus (label) (pc))))).  */
590  if (GET_CODE (op) != CONST)
591    return 0;
592  op = XEXP (op, 0);
593  if (GET_CODE (op) != MINUS)
594    return 0;
595  if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
596    return 0;
597  /* ??? Ensure symbol is GOT.  */
598  if (GET_CODE (XEXP (op, 1)) != CONST)
599    return 0;
600  if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
601    return 0;
602  return 1;
603}
604
605/* Return 1 if the operand is a data segment reference.  This includes
606   the readonly data segment, or in other words anything but the text segment.
607   This is needed in the medium/anywhere code model on v9.  These values
608   are accessed with EMBMEDANY_BASE_REG.  */
609
610int
611data_segment_operand (op, mode)
612     rtx op;
613     enum machine_mode mode ATTRIBUTE_UNUSED;
614{
615  switch (GET_CODE (op))
616    {
617    case SYMBOL_REF :
618      return ! SYMBOL_REF_FLAG (op);
619    case PLUS :
620      /* Assume canonical format of symbol + constant.
621	 Fall through.  */
622    case CONST :
623      return data_segment_operand (XEXP (op, 0));
624    default :
625      return 0;
626    }
627}
628
629/* Return 1 if the operand is a text segment reference.
630   This is needed in the medium/anywhere code model on v9.  */
631
632int
633text_segment_operand (op, mode)
634     rtx op;
635     enum machine_mode mode ATTRIBUTE_UNUSED;
636{
637  switch (GET_CODE (op))
638    {
639    case LABEL_REF :
640      return 1;
641    case SYMBOL_REF :
642      return SYMBOL_REF_FLAG (op);
643    case PLUS :
644      /* Assume canonical format of symbol + constant.
645	 Fall through.  */
646    case CONST :
647      return text_segment_operand (XEXP (op, 0));
648    default :
649      return 0;
650    }
651}
652
653/* Return 1 if the operand is either a register or a memory operand that is
654   not symbolic.  */
655
656int
657reg_or_nonsymb_mem_operand (op, mode)
658    register rtx op;
659    enum machine_mode mode;
660{
661  if (register_operand (op, mode))
662    return 1;
663
664  if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
665    return 1;
666
667  return 0;
668}
669
670int
671splittable_symbolic_memory_operand (op, mode)
672     rtx op;
673     enum machine_mode mode ATTRIBUTE_UNUSED;
674{
675  if (GET_CODE (op) != MEM)
676    return 0;
677  if (! symbolic_operand (XEXP (op, 0), Pmode))
678    return 0;
679  return 1;
680}
681
682int
683splittable_immediate_memory_operand (op, mode)
684     rtx op;
685     enum machine_mode mode ATTRIBUTE_UNUSED;
686{
687  if (GET_CODE (op) != MEM)
688    return 0;
689  if (! immediate_operand (XEXP (op, 0), Pmode))
690    return 0;
691  return 1;
692}
693
694/* Return truth value of whether OP is EQ or NE.  */
695
696int
697eq_or_neq (op, mode)
698     rtx op;
699     enum machine_mode mode ATTRIBUTE_UNUSED;
700{
701  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
702}
703
704/* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
705   or LTU for non-floating-point.  We handle those specially.  */
706
707int
708normal_comp_operator (op, mode)
709     rtx op;
710     enum machine_mode mode ATTRIBUTE_UNUSED;
711{
712  enum rtx_code code = GET_CODE (op);
713
714  if (GET_RTX_CLASS (code) != '<')
715    return 0;
716
717  if (GET_MODE (XEXP (op, 0)) == CCFPmode
718      || GET_MODE (XEXP (op, 0)) == CCFPEmode)
719    return 1;
720
721  return (code != NE && code != EQ && code != GEU && code != LTU);
722}
723
724/* Return 1 if this is a comparison operator.  This allows the use of
725   MATCH_OPERATOR to recognize all the branch insns.  */
726
727int
728noov_compare_op (op, mode)
729    register rtx op;
730    enum machine_mode mode ATTRIBUTE_UNUSED;
731{
732  enum rtx_code code = GET_CODE (op);
733
734  if (GET_RTX_CLASS (code) != '<')
735    return 0;
736
737  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
738    /* These are the only branches which work with CC_NOOVmode.  */
739    return (code == EQ || code == NE || code == GE || code == LT);
740  return 1;
741}
742
743/* Nonzero if OP is a comparison operator suitable for use in v9
744   conditional move or branch on register contents instructions.  */
745
746int
747v9_regcmp_op (op, mode)
748     register rtx op;
749     enum machine_mode mode ATTRIBUTE_UNUSED;
750{
751  enum rtx_code code = GET_CODE (op);
752
753  if (GET_RTX_CLASS (code) != '<')
754    return 0;
755
756  return v9_regcmp_p (code);
757}
758
759/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
760
761int
762extend_op (op, mode)
763     rtx op;
764     enum machine_mode mode ATTRIBUTE_UNUSED;
765{
766  return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
767}
768
769/* Return nonzero if OP is an operator of mode MODE which can set
770   the condition codes explicitly.  We do not include PLUS and MINUS
771   because these require CC_NOOVmode, which we handle explicitly.  */
772
773int
774cc_arithop (op, mode)
775     rtx op;
776     enum machine_mode mode ATTRIBUTE_UNUSED;
777{
778  if (GET_CODE (op) == AND
779      || GET_CODE (op) == IOR
780      || GET_CODE (op) == XOR)
781    return 1;
782
783  return 0;
784}
785
786/* Return nonzero if OP is an operator of mode MODE which can bitwise
787   complement its second operand and set the condition codes explicitly.  */
788
789int
790cc_arithopn (op, mode)
791     rtx op;
792     enum machine_mode mode ATTRIBUTE_UNUSED;
793{
794  /* XOR is not here because combine canonicalizes (xor (not ...) ...)
795     and (xor ... (not ...)) to (not (xor ...)).   */
796  return (GET_CODE (op) == AND
797	  || GET_CODE (op) == IOR);
798}
799
800/* Return true if OP is a register, or is a CONST_INT that can fit in a
801   signed 13 bit immediate field.  This is an acceptable SImode operand for
802   most 3 address instructions.  */
803
804int
805arith_operand (op, mode)
806     rtx op;
807     enum machine_mode mode;
808{
809  int val;
810  if (register_operand (op, mode))
811    return 1;
812  if (GET_CODE (op) != CONST_INT)
813    return 0;
814  val = INTVAL (op) & 0xffffffff;
815  return SPARC_SIMM13_P (val);
816}
817
818/* Return true if OP is a constant 4096  */
819
820int
821arith_4096_operand (op, mode)
822     rtx op;
823     enum machine_mode mode ATTRIBUTE_UNUSED;
824{
825  int val;
826  if (GET_CODE (op) != CONST_INT)
827    return 0;
828  val = INTVAL (op) & 0xffffffff;
829  return val == 4096;
830}
831
832/* Return true if OP is suitable as second operand for add/sub */
833
834int
835arith_add_operand (op, mode)
836     rtx op;
837     enum machine_mode mode;
838{
839  return arith_operand (op, mode) || arith_4096_operand (op, mode);
840}
841
842/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
843   immediate field of OR and XOR instructions.  Used for 64-bit
844   constant formation patterns.  */
845int
846const64_operand (op, mode)
847     rtx op;
848     enum machine_mode mode ATTRIBUTE_UNUSED;
849{
850  return ((GET_CODE (op) == CONST_INT
851	   && SPARC_SIMM13_P (INTVAL (op)))
852#if HOST_BITS_PER_WIDE_INT != 64
853	  || (GET_CODE (op) == CONST_DOUBLE
854	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
855	      && (CONST_DOUBLE_HIGH (op) ==
856		  ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
857		   (HOST_WIDE_INT)0xffffffff : 0)))
858#endif
859	  );
860}
861
862/* The same, but only for sethi instructions.  */
863int
864const64_high_operand (op, mode)
865     rtx op;
866     enum machine_mode mode ATTRIBUTE_UNUSED;
867{
868  return ((GET_CODE (op) == CONST_INT
869	   && (INTVAL (op) & 0xfffffc00) != 0
870	   && SPARC_SETHI_P (INTVAL (op))
871#if HOST_BITS_PER_WIDE_INT != 64
872	   /* Must be positive on non-64bit host else the
873	      optimizer is fooled into thinking that sethi
874	      sign extends, even though it does not.  */
875	   && INTVAL (op) >= 0
876#endif
877	   )
878	  || (GET_CODE (op) == CONST_DOUBLE
879	      && CONST_DOUBLE_HIGH (op) == 0
880	      && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
881	      && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
882}
883
884/* Return true if OP is a register, or is a CONST_INT that can fit in a
885   signed 11 bit immediate field.  This is an acceptable SImode operand for
886   the movcc instructions.  */
887
888int
889arith11_operand (op, mode)
890     rtx op;
891     enum machine_mode mode;
892{
893  return (register_operand (op, mode)
894	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
895}
896
897/* Return true if OP is a register, or is a CONST_INT that can fit in a
898   signed 10 bit immediate field.  This is an acceptable SImode operand for
899   the movrcc instructions.  */
900
901int
902arith10_operand (op, mode)
903     rtx op;
904     enum machine_mode mode;
905{
906  return (register_operand (op, mode)
907	  || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
908}
909
910/* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
911   immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
912   immediate field.
913   v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
914   can fit in a 13 bit immediate field.  This is an acceptable DImode operand
915   for most 3 address instructions.  */
916
917int
918arith_double_operand (op, mode)
919     rtx op;
920     enum machine_mode mode;
921{
922  return (register_operand (op, mode)
923	  || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
924	  || (! TARGET_ARCH64
925	      && GET_CODE (op) == CONST_DOUBLE
926	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
927	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
928	  || (TARGET_ARCH64
929	      && GET_CODE (op) == CONST_DOUBLE
930	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
931	      && ((CONST_DOUBLE_HIGH (op) == -1
932		   && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
933		  || (CONST_DOUBLE_HIGH (op) == 0
934		      && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
935}
936
937/* Return true if OP is a constant 4096 for DImode on ARCH64 */
938
939int
940arith_double_4096_operand (op, mode)
941     rtx op;
942     enum machine_mode mode ATTRIBUTE_UNUSED;
943{
944  return (TARGET_ARCH64 &&
945  	  ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
946  	   (GET_CODE (op) == CONST_DOUBLE &&
947  	    CONST_DOUBLE_LOW (op) == 4096 &&
948  	    CONST_DOUBLE_HIGH (op) == 0)));
949}
950
951/* Return true if OP is suitable as second operand for add/sub in DImode */
952
953int
954arith_double_add_operand (op, mode)
955     rtx op;
956     enum machine_mode mode;
957{
958  return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
959}
960
961/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
962   can fit in an 11 bit immediate field.  This is an acceptable DImode
963   operand for the movcc instructions.  */
964/* ??? Replace with arith11_operand?  */
965
966int
967arith11_double_operand (op, mode)
968     rtx op;
969     enum machine_mode mode;
970{
971  return (register_operand (op, mode)
972	  || (GET_CODE (op) == CONST_DOUBLE
973	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
974	      && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
975	      && ((CONST_DOUBLE_HIGH (op) == -1
976		   && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
977		  || (CONST_DOUBLE_HIGH (op) == 0
978		      && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
979	  || (GET_CODE (op) == CONST_INT
980	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
981	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
982}
983
984/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
985   can fit in an 10 bit immediate field.  This is an acceptable DImode
986   operand for the movrcc instructions.  */
987/* ??? Replace with arith10_operand?  */
988
989int
990arith10_double_operand (op, mode)
991     rtx op;
992     enum machine_mode mode;
993{
994  return (register_operand (op, mode)
995	  || (GET_CODE (op) == CONST_DOUBLE
996	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
997	      && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
998	      && ((CONST_DOUBLE_HIGH (op) == -1
999		   && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1000		  || (CONST_DOUBLE_HIGH (op) == 0
1001		      && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1002	  || (GET_CODE (op) == CONST_INT
1003	      && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1004	      && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1005}
1006
1007/* Return truth value of whether OP is a integer which fits the
1008   range constraining immediate operands in most three-address insns,
1009   which have a 13 bit immediate field.  */
1010
1011int
1012small_int (op, mode)
1013     rtx op;
1014     enum machine_mode mode ATTRIBUTE_UNUSED;
1015{
1016  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1017}
1018
1019int
1020small_int_or_double (op, mode)
1021     rtx op;
1022     enum machine_mode mode ATTRIBUTE_UNUSED;
1023{
1024  return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1025	  || (GET_CODE (op) == CONST_DOUBLE
1026	      && CONST_DOUBLE_HIGH (op) == 0
1027	      && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1028}
1029
1030/* Recognize operand values for the umul instruction.  That instruction sign
1031   extends immediate values just like all other sparc instructions, but
1032   interprets the extended result as an unsigned number.  */
1033
1034int
1035uns_small_int (op, mode)
1036     rtx op;
1037     enum machine_mode mode ATTRIBUTE_UNUSED;
1038{
1039#if HOST_BITS_PER_WIDE_INT > 32
1040  /* All allowed constants will fit a CONST_INT.  */
1041  return (GET_CODE (op) == CONST_INT
1042	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1043	      || (INTVAL (op) >= 0xFFFFF000
1044                  && INTVAL (op) < 0x100000000)));
1045#else
1046  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1047	  || (GET_CODE (op) == CONST_DOUBLE
1048	      && CONST_DOUBLE_HIGH (op) == 0
1049	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1050#endif
1051}
1052
1053int
1054uns_arith_operand (op, mode)
1055     rtx op;
1056     enum machine_mode mode;
1057{
1058  return register_operand (op, mode) || uns_small_int (op, mode);
1059}
1060
1061/* Return truth value of statement that OP is a call-clobbered register.  */
1062int
1063clobbered_register (op, mode)
1064     rtx op;
1065     enum machine_mode mode ATTRIBUTE_UNUSED;
1066{
1067  return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1068}
1069
1070/* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns.  */
1071
1072int
1073zero_operand (op, mode)
1074     rtx op;
1075     enum machine_mode mode ATTRIBUTE_UNUSED;
1076{
1077  return op == const0_rtx;
1078}
1079
1080/* Return 1 if OP is a valid operand for the source of a move insn.  */
1081
1082int
1083input_operand (op, mode)
1084     rtx op;
1085     enum machine_mode mode;
1086{
1087  /* If both modes are non-void they must be the same.  */
1088  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1089    return 0;
1090
1091  /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary.  */
1092  if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1093    return 1;
1094
1095  /* Allow any one instruction integer constant, and all CONST_INT
1096     variants when we are working in DImode and !arch64.  */
1097  if (GET_MODE_CLASS (mode) == MODE_INT
1098      && ((GET_CODE (op) == CONST_INT
1099	   && ((SPARC_SETHI_P (INTVAL (op))
1100		&& (! TARGET_ARCH64
1101		    || (INTVAL (op) >= 0)
1102		    || mode == SImode))
1103	       || SPARC_SIMM13_P (INTVAL (op))
1104	       || (mode == DImode
1105		   && ! TARGET_ARCH64)))
1106	  || (TARGET_ARCH64
1107	      && GET_CODE (op) == CONST_DOUBLE
1108	      && ((CONST_DOUBLE_HIGH (op) == 0
1109		   && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1110		  ||
1111#if HOST_BITS_PER_WIDE_INT == 64
1112		  (CONST_DOUBLE_HIGH (op) == 0
1113		   && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1114#else
1115		  (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1116		   && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1117			&& CONST_DOUBLE_HIGH (op) == 0)
1118		       || (CONST_DOUBLE_HIGH (op) == -1)))
1119#endif
1120		  ))))
1121    return 1;
1122
1123  /* If !arch64 and this is a DImode const, allow it so that
1124     the splits can be generated.  */
1125  if (! TARGET_ARCH64
1126      && mode == DImode
1127      && GET_CODE (op) == CONST_DOUBLE)
1128    return 1;
1129
1130  if (register_operand (op, mode))
1131    return 1;
1132
1133  /* If this is a SUBREG, look inside so that we handle
1134     paradoxical ones.  */
1135  if (GET_CODE (op) == SUBREG)
1136    op = SUBREG_REG (op);
1137
1138  /* Check for valid MEM forms.  */
1139  if (GET_CODE (op) == MEM)
1140    {
1141      rtx inside = XEXP (op, 0);
1142
1143      if (GET_CODE (inside) == LO_SUM)
1144	{
1145	  /* We can't allow these because all of the splits
1146	     (eventually as they trickle down into DFmode
1147	     splits) require offsettable memory references.  */
1148	  if (! TARGET_V9
1149	      && GET_MODE (op) == TFmode)
1150	    return 0;
1151
1152	  return (register_operand (XEXP (inside, 0), Pmode)
1153		  && CONSTANT_P (XEXP (inside, 1)));
1154	}
1155      return memory_address_p (mode, inside);
1156    }
1157
1158  return 0;
1159}
1160
1161
1162/* We know it can't be done in one insn when we get here,
1163   the movsi expander guarentees this.  */
1164void
1165sparc_emit_set_const32 (op0, op1)
1166     rtx op0;
1167     rtx op1;
1168{
1169  enum machine_mode mode = GET_MODE (op0);
1170  rtx temp;
1171
1172  if (GET_CODE (op1) == CONST_INT)
1173    {
1174      HOST_WIDE_INT value = INTVAL (op1);
1175
1176      if (SPARC_SETHI_P (value)
1177	  || SPARC_SIMM13_P (value))
1178	abort ();
1179    }
1180
1181  /* Full 2-insn decomposition is needed.  */
1182  if (reload_in_progress || reload_completed)
1183    temp = op0;
1184  else
1185    temp = gen_reg_rtx (mode);
1186
1187  if (GET_CODE (op1) == CONST_INT)
1188    {
1189      /* Emit them as real moves instead of a HIGH/LO_SUM,
1190	 this way CSE can see everything and reuse intermediate
1191	 values if it wants.  */
1192      if (TARGET_ARCH64
1193	  && HOST_BITS_PER_WIDE_INT != 64
1194	  && (INTVAL (op1) & 0x80000000) != 0)
1195	{
1196	  emit_insn (gen_rtx_SET (VOIDmode,
1197				  temp,
1198				  gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1199							INTVAL (op1) & 0xfffffc00, 0)));
1200	}
1201      else
1202	{
1203	  emit_insn (gen_rtx_SET (VOIDmode,
1204				  temp,
1205				  GEN_INT (INTVAL (op1) & 0xfffffc00)));
1206	}
1207      emit_insn (gen_rtx_SET (VOIDmode,
1208			      op0,
1209			      gen_rtx_IOR (mode,
1210					   temp,
1211					   GEN_INT (INTVAL (op1) & 0x3ff))));
1212    }
1213  else
1214    {
1215      /* A symbol, emit in the traditional way.  */
1216      emit_insn (gen_rtx_SET (VOIDmode,
1217			      temp,
1218			      gen_rtx_HIGH (mode,
1219					    op1)));
1220      emit_insn (gen_rtx_SET (VOIDmode,
1221			      op0,
1222			      gen_rtx_LO_SUM (mode,
1223					      temp,
1224					      op1)));
1225
1226    }
1227}
1228
1229
1230/* Sparc-v9 code-model support. */
1231void
1232sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1233     rtx op0;
1234     rtx op1;
1235     rtx temp1;
1236{
1237  switch (sparc_cmodel)
1238    {
1239    case CM_MEDLOW:
1240      /* The range spanned by all instructions in the object is less
1241	 than 2^31 bytes (2GB) and the distance from any instruction
1242	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1243	 than 2^31 bytes (2GB).
1244
1245	 The executable must be in the low 4TB of the virtual address
1246	 space.
1247
1248	 sethi	%hi(symbol), %temp
1249	 or	%temp, %lo(symbol), %reg  */
1250      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1251      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1252      break;
1253
1254    case CM_MEDMID:
1255      /* The range spanned by all instructions in the object is less
1256	 than 2^31 bytes (2GB) and the distance from any instruction
1257	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1258	 than 2^31 bytes (2GB).
1259
1260	 The executable must be in the low 16TB of the virtual address
1261	 space.
1262
1263	 sethi	%h44(symbol), %temp1
1264	 or	%temp1, %m44(symbol), %temp2
1265	 sllx	%temp2, 12, %temp3
1266	 or	%temp3, %l44(symbol), %reg  */
1267      emit_insn (gen_seth44 (op0, op1));
1268      emit_insn (gen_setm44 (op0, op0, op1));
1269      emit_insn (gen_rtx_SET (VOIDmode, temp1,
1270			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1271      emit_insn (gen_setl44 (op0, temp1, op1));
1272      break;
1273
1274    case CM_MEDANY:
1275      /* The range spanned by all instructions in the object is less
1276	 than 2^31 bytes (2GB) and the distance from any instruction
1277	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1278	 than 2^31 bytes (2GB).
1279
1280	 The executable can be placed anywhere in the virtual address
1281	 space.
1282
1283	 sethi	%hh(symbol), %temp1
1284	 sethi	%lm(symbol), %temp2
1285	 or	%temp1, %hm(symbol), %temp3
1286	 or	%temp2, %lo(symbol), %temp4
1287	 sllx	%temp3, 32, %temp5
1288	 or	%temp4, %temp5, %reg  */
1289
1290      /* Getting this right wrt. reloading is really tricky.
1291	 We _MUST_ have a seperate temporary at this point,
1292	 if we don't barf immediately instead of generating
1293	 incorrect code.  */
1294      if (temp1 == op0)
1295	abort ();
1296
1297      emit_insn (gen_sethh (op0, op1));
1298      emit_insn (gen_setlm (temp1, op1));
1299      emit_insn (gen_sethm (op0, op0, op1));
1300      emit_insn (gen_rtx_SET (VOIDmode, op0,
1301			      gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1302      emit_insn (gen_rtx_SET (VOIDmode, op0,
1303			      gen_rtx_PLUS (DImode, op0, temp1)));
1304      emit_insn (gen_setlo (op0, op0, op1));
1305      break;
1306
1307    case CM_EMBMEDANY:
1308      /* Old old old backwards compatibility kruft here.
1309	 Essentially it is MEDLOW with a fixed 64-bit
1310	 virtual base added to all data segment addresses.
1311	 Text-segment stuff is computed like MEDANY, we can't
1312	 reuse the code above because the relocation knobs
1313	 look different.
1314
1315	 Data segment:	sethi	%hi(symbol), %temp1
1316			or	%temp1, %lo(symbol), %temp2
1317			add	%temp2, EMBMEDANY_BASE_REG, %reg
1318
1319	 Text segment:	sethi	%uhi(symbol), %temp1
1320			sethi	%hi(symbol), %temp2
1321			or	%temp1, %ulo(symbol), %temp3
1322			or	%temp2, %lo(symbol), %temp4
1323			sllx	%temp3, 32, %temp5
1324			or	%temp4, %temp5, %reg  */
1325      if (data_segment_operand (op1, GET_MODE (op1)))
1326	{
1327	  emit_insn (gen_embmedany_sethi (temp1, op1));
1328	  emit_insn (gen_embmedany_brsum (op0, temp1));
1329	  emit_insn (gen_embmedany_losum (op0, op0, op1));
1330	}
1331      else
1332	{
1333	  /* Getting this right wrt. reloading is really tricky.
1334	     We _MUST_ have a seperate temporary at this point,
1335	     so we barf immediately instead of generating
1336	     incorrect code.  */
1337	  if (temp1 == op0)
1338	    abort ();
1339
1340	  emit_insn (gen_embmedany_textuhi (op0, op1));
1341	  emit_insn (gen_embmedany_texthi  (temp1, op1));
1342	  emit_insn (gen_embmedany_textulo (op0, op0, op1));
1343	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1344				  gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1345	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1346				  gen_rtx_PLUS (DImode, op0, temp1)));
1347	  emit_insn (gen_embmedany_textlo  (op0, op0, op1));
1348	}
1349      break;
1350
1351    default:
1352      abort();
1353    }
1354}
1355
1356/* These avoid problems when cross compiling.  If we do not
1357   go through all this hair then the optimizer will see
1358   invalid REG_EQUAL notes or in some cases none at all.  */
1359static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1360static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1361static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1362static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1363
1364#if HOST_BITS_PER_WIDE_INT == 64
1365#define GEN_HIGHINT64(__x)		GEN_INT ((__x) & 0xfffffc00)
1366#define GEN_INT64(__x)			GEN_INT (__x)
1367#else
1368#define GEN_HIGHINT64(__x) \
1369	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1370			      (__x) & 0xfffffc00, 0)
1371#define GEN_INT64(__x) \
1372	gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1373			      (__x) & 0xffffffff, \
1374			      ((__x) & 0x80000000 \
1375			       ? 0xffffffff : 0))
1376#endif
1377
1378/* The optimizer is not to assume anything about exactly
1379   which bits are set for a HIGH, they are unspecified.
1380   Unfortunately this leads to many missed optimizations
1381   during CSE.  We mask out the non-HIGH bits, and matches
1382   a plain movdi, to alleviate this problem.  */
1383static void
1384sparc_emit_set_safe_HIGH64 (dest, val)
1385     rtx dest;
1386     HOST_WIDE_INT val;
1387{
1388  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1389}
1390
1391static rtx
1392gen_safe_SET64 (dest, val)
1393     rtx dest;
1394     HOST_WIDE_INT val;
1395{
1396  return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1397}
1398
1399static rtx
1400gen_safe_OR64 (src, val)
1401     rtx src;
1402     HOST_WIDE_INT val;
1403{
1404  return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1405}
1406
1407static rtx
1408gen_safe_XOR64 (src, val)
1409     rtx src;
1410     HOST_WIDE_INT val;
1411{
1412  return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1413}
1414
1415/* Worker routines for 64-bit constant formation on arch64.
1416   One of the key things to be doing in these emissions is
1417   to create as many temp REGs as possible.  This makes it
1418   possible for half-built constants to be used later when
1419   such values are similar to something required later on.
1420   Without doing this, the optimizer cannot see such
1421   opportunities.  */
1422
1423static void sparc_emit_set_const64_quick1
1424	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1425
1426static void
1427sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1428  rtx op0;
1429  rtx temp;
1430  unsigned HOST_WIDE_INT low_bits;
1431  int is_neg;
1432{
1433  unsigned HOST_WIDE_INT high_bits;
1434
1435  if (is_neg)
1436    high_bits = (~low_bits) & 0xffffffff;
1437  else
1438    high_bits = low_bits;
1439
1440  sparc_emit_set_safe_HIGH64 (temp, high_bits);
1441  if (!is_neg)
1442    {
1443      emit_insn (gen_rtx_SET (VOIDmode, op0,
1444			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1445    }
1446  else
1447    {
1448      /* If we are XOR'ing with -1, then we should emit a one's complement
1449	 instead.  This way the combiner will notice logical operations
1450	 such as ANDN later on and substitute.  */
1451      if ((low_bits & 0x3ff) == 0x3ff)
1452	{
1453	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1454				  gen_rtx_NOT (DImode, temp)));
1455	}
1456      else
1457	{
1458	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1459				  gen_safe_XOR64 (temp,
1460						  (-0x400 | (low_bits & 0x3ff)))));
1461	}
1462    }
1463}
1464
1465static void sparc_emit_set_const64_quick2
1466	PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1467	       unsigned HOST_WIDE_INT, int));
1468
1469static void
1470sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1471  rtx op0;
1472  rtx temp;
1473  unsigned HOST_WIDE_INT high_bits;
1474  unsigned HOST_WIDE_INT low_immediate;
1475  int shift_count;
1476{
1477  rtx temp2 = op0;
1478
1479  if ((high_bits & 0xfffffc00) != 0)
1480    {
1481      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1482      if ((high_bits & ~0xfffffc00) != 0)
1483	emit_insn (gen_rtx_SET (VOIDmode, op0,
1484				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1485      else
1486	temp2 = temp;
1487    }
1488  else
1489    {
1490      emit_insn (gen_safe_SET64 (temp, high_bits));
1491      temp2 = temp;
1492    }
1493
1494  /* Now shift it up into place. */
1495  emit_insn (gen_rtx_SET (VOIDmode, op0,
1496			  gen_rtx_ASHIFT (DImode, temp2,
1497					  GEN_INT (shift_count))));
1498
1499  /* If there is a low immediate part piece, finish up by
1500     putting that in as well.  */
1501  if (low_immediate != 0)
1502    emit_insn (gen_rtx_SET (VOIDmode, op0,
1503			    gen_safe_OR64 (op0, low_immediate)));
1504}
1505
1506static void sparc_emit_set_const64_longway
1507	PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1508
1509/* Full 64-bit constant decomposition.  Even though this is the
1510   'worst' case, we still optimize a few things away.  */
1511static void
1512sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1513     rtx op0;
1514     rtx temp;
1515     unsigned HOST_WIDE_INT high_bits;
1516     unsigned HOST_WIDE_INT low_bits;
1517{
1518  rtx sub_temp;
1519
1520  if (reload_in_progress || reload_completed)
1521    sub_temp = op0;
1522  else
1523    sub_temp = gen_reg_rtx (DImode);
1524
1525  if ((high_bits & 0xfffffc00) != 0)
1526    {
1527      sparc_emit_set_safe_HIGH64 (temp, high_bits);
1528      if ((high_bits & ~0xfffffc00) != 0)
1529	emit_insn (gen_rtx_SET (VOIDmode,
1530				sub_temp,
1531				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1532      else
1533	sub_temp = temp;
1534    }
1535  else
1536    {
1537      emit_insn (gen_safe_SET64 (temp, high_bits));
1538      sub_temp = temp;
1539    }
1540
1541  if (!reload_in_progress && !reload_completed)
1542    {
1543      rtx temp2 = gen_reg_rtx (DImode);
1544      rtx temp3 = gen_reg_rtx (DImode);
1545      rtx temp4 = gen_reg_rtx (DImode);
1546
1547      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1548			      gen_rtx_ASHIFT (DImode, sub_temp,
1549					      GEN_INT (32))));
1550
1551      sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1552      if ((low_bits & ~0xfffffc00) != 0)
1553	{
1554	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1555				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1556	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1557				  gen_rtx_PLUS (DImode, temp4, temp3)));
1558	}
1559      else
1560	{
1561	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1562				  gen_rtx_PLUS (DImode, temp4, temp2)));
1563	}
1564    }
1565  else
1566    {
1567      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1568      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1569      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1570      int to_shift = 12;
1571
1572      /* We are in the middle of reload, so this is really
1573	 painful.  However we do still make an attempt to
1574	 avoid emitting truly stupid code.  */
1575      if (low1 != const0_rtx)
1576	{
1577	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1578				  gen_rtx_ASHIFT (DImode, sub_temp,
1579						  GEN_INT (to_shift))));
1580	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1581				  gen_rtx_IOR (DImode, op0, low1)));
1582	  sub_temp = op0;
1583	  to_shift = 12;
1584	}
1585      else
1586	{
1587	  to_shift += 12;
1588	}
1589      if (low2 != const0_rtx)
1590	{
1591	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1592				  gen_rtx_ASHIFT (DImode, sub_temp,
1593						  GEN_INT (to_shift))));
1594	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1595				  gen_rtx_IOR (DImode, op0, low2)));
1596	  sub_temp = op0;
1597	  to_shift = 8;
1598	}
1599      else
1600	{
1601	  to_shift += 8;
1602	}
1603      emit_insn (gen_rtx_SET (VOIDmode, op0,
1604			      gen_rtx_ASHIFT (DImode, sub_temp,
1605					      GEN_INT (to_shift))));
1606      if (low3 != const0_rtx)
1607	emit_insn (gen_rtx_SET (VOIDmode, op0,
1608				gen_rtx_IOR (DImode, op0, low3)));
1609      /* phew... */
1610    }
1611}
1612
1613/* Analyze a 64-bit constant for certain properties. */
1614static void analyze_64bit_constant
1615	PROTO((unsigned HOST_WIDE_INT,
1616	       unsigned HOST_WIDE_INT,
1617	       int *, int *, int *));
1618
1619static void
1620analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1621     unsigned HOST_WIDE_INT high_bits, low_bits;
1622     int *hbsp, *lbsp, *abbasp;
1623{
1624  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1625  int i;
1626
1627  lowest_bit_set = highest_bit_set = -1;
1628  i = 0;
1629  do
1630    {
1631      if ((lowest_bit_set == -1)
1632	  && ((low_bits >> i) & 1))
1633	lowest_bit_set = i;
1634      if ((highest_bit_set == -1)
1635	  && ((high_bits >> (32 - i - 1)) & 1))
1636	highest_bit_set = (64 - i - 1);
1637    }
1638  while (++i < 32
1639	 && ((highest_bit_set == -1)
1640	     || (lowest_bit_set == -1)));
1641  if (i == 32)
1642    {
1643      i = 0;
1644      do
1645	{
1646	  if ((lowest_bit_set == -1)
1647	      && ((high_bits >> i) & 1))
1648	    lowest_bit_set = i + 32;
1649	  if ((highest_bit_set == -1)
1650	      && ((low_bits >> (32 - i - 1)) & 1))
1651	    highest_bit_set = 32 - i - 1;
1652	}
1653      while (++i < 32
1654	     && ((highest_bit_set == -1)
1655		 || (lowest_bit_set == -1)));
1656    }
1657  /* If there are no bits set this should have gone out
1658     as one instruction!  */
1659  if (lowest_bit_set == -1
1660      || highest_bit_set == -1)
1661    abort ();
1662  all_bits_between_are_set = 1;
1663  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1664    {
1665      if (i < 32)
1666	{
1667	  if ((low_bits & (1 << i)) != 0)
1668	    continue;
1669	}
1670      else
1671	{
1672	  if ((high_bits & (1 << (i - 32))) != 0)
1673	    continue;
1674	}
1675      all_bits_between_are_set = 0;
1676      break;
1677    }
1678  *hbsp = highest_bit_set;
1679  *lbsp = lowest_bit_set;
1680  *abbasp = all_bits_between_are_set;
1681}
1682
1683static int const64_is_2insns
1684	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1685
1686static int
1687const64_is_2insns (high_bits, low_bits)
1688     unsigned HOST_WIDE_INT high_bits, low_bits;
1689{
1690  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1691
1692  if (high_bits == 0
1693      || high_bits == 0xffffffff)
1694    return 1;
1695
1696  analyze_64bit_constant (high_bits, low_bits,
1697			  &highest_bit_set, &lowest_bit_set,
1698			  &all_bits_between_are_set);
1699
1700  if ((highest_bit_set == 63
1701       || lowest_bit_set == 0)
1702      && all_bits_between_are_set != 0)
1703    return 1;
1704
1705  if ((highest_bit_set - lowest_bit_set) < 21)
1706    return 1;
1707
1708  return 0;
1709}
1710
1711static unsigned HOST_WIDE_INT create_simple_focus_bits
1712	PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1713	       int, int));
1714
1715static unsigned HOST_WIDE_INT
1716create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1717     unsigned HOST_WIDE_INT high_bits, low_bits;
1718     int lowest_bit_set, shift;
1719{
1720  HOST_WIDE_INT hi, lo;
1721
1722  if (lowest_bit_set < 32)
1723    {
1724      lo = (low_bits >> lowest_bit_set) << shift;
1725      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1726    }
1727  else
1728    {
1729      lo = 0;
1730      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1731    }
1732  if (hi & lo)
1733    abort ();
1734  return (hi | lo);
1735}
1736
1737/* Here we are sure to be arch64 and this is an integer constant
1738   being loaded into a register.  Emit the most efficient
1739   insn sequence possible.  Detection of all the 1-insn cases
1740   has been done already.  */
1741void
1742sparc_emit_set_const64 (op0, op1)
1743     rtx op0;
1744     rtx op1;
1745{
1746  unsigned HOST_WIDE_INT high_bits, low_bits;
1747  int lowest_bit_set, highest_bit_set;
1748  int all_bits_between_are_set;
1749  rtx temp;
1750
1751  /* Sanity check that we know what we are working with.  */
1752  if (! TARGET_ARCH64
1753      || GET_CODE (op0) != REG
1754      || (REGNO (op0) >= SPARC_FIRST_FP_REG
1755	  && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1756    abort ();
1757
1758  if (reload_in_progress || reload_completed)
1759    temp = op0;
1760  else
1761    temp = gen_reg_rtx (DImode);
1762
1763  if (GET_CODE (op1) != CONST_DOUBLE
1764      && GET_CODE (op1) != CONST_INT)
1765    {
1766      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1767      return;
1768    }
1769
1770  if (GET_CODE (op1) == CONST_DOUBLE)
1771    {
1772#if HOST_BITS_PER_WIDE_INT == 64
1773      high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1774      low_bits  = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1775#else
1776      high_bits = CONST_DOUBLE_HIGH (op1);
1777      low_bits = CONST_DOUBLE_LOW (op1);
1778#endif
1779    }
1780  else
1781    {
1782#if HOST_BITS_PER_WIDE_INT == 64
1783      high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1784      low_bits = (INTVAL (op1) & 0xffffffff);
1785#else
1786      high_bits = ((INTVAL (op1) < 0) ?
1787		   0xffffffff :
1788		   0x00000000);
1789      low_bits = INTVAL (op1);
1790#endif
1791    }
1792
1793  /* low_bits	bits 0  --> 31
1794     high_bits	bits 32 --> 63  */
1795
1796  analyze_64bit_constant (high_bits, low_bits,
1797			  &highest_bit_set, &lowest_bit_set,
1798			  &all_bits_between_are_set);
1799
1800  /* First try for a 2-insn sequence.  */
1801
1802  /* These situations are preferred because the optimizer can
1803   * do more things with them:
1804   * 1) mov	-1, %reg
1805   *    sllx	%reg, shift, %reg
1806   * 2) mov	-1, %reg
1807   *    srlx	%reg, shift, %reg
1808   * 3) mov	some_small_const, %reg
1809   *    sllx	%reg, shift, %reg
1810   */
1811  if (((highest_bit_set == 63
1812	|| lowest_bit_set == 0)
1813       && all_bits_between_are_set != 0)
1814      || ((highest_bit_set - lowest_bit_set) < 12))
1815    {
1816      HOST_WIDE_INT the_const = -1;
1817      int shift = lowest_bit_set;
1818
1819      if ((highest_bit_set != 63
1820	   && lowest_bit_set != 0)
1821	  || all_bits_between_are_set == 0)
1822	{
1823	  the_const =
1824	    create_simple_focus_bits (high_bits, low_bits,
1825				      lowest_bit_set, 0);
1826	}
1827      else if (lowest_bit_set == 0)
1828	shift = -(63 - highest_bit_set);
1829
1830      if (! SPARC_SIMM13_P (the_const))
1831	abort ();
1832
1833      emit_insn (gen_safe_SET64 (temp, the_const));
1834      if (shift > 0)
1835	emit_insn (gen_rtx_SET (VOIDmode,
1836				op0,
1837				gen_rtx_ASHIFT (DImode,
1838						temp,
1839						GEN_INT (shift))));
1840      else if (shift < 0)
1841	emit_insn (gen_rtx_SET (VOIDmode,
1842				op0,
1843				gen_rtx_LSHIFTRT (DImode,
1844						  temp,
1845						  GEN_INT (-shift))));
1846      else
1847	abort ();
1848      return;
1849    }
1850
1851  /* Now a range of 22 or less bits set somewhere.
1852   * 1) sethi	%hi(focus_bits), %reg
1853   *    sllx	%reg, shift, %reg
1854   * 2) sethi	%hi(focus_bits), %reg
1855   *    srlx	%reg, shift, %reg
1856   */
1857  if ((highest_bit_set - lowest_bit_set) < 21)
1858    {
1859      unsigned HOST_WIDE_INT focus_bits =
1860	create_simple_focus_bits (high_bits, low_bits,
1861				  lowest_bit_set, 10);
1862
1863      if (! SPARC_SETHI_P (focus_bits))
1864	 abort ();
1865
1866      sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1867
1868      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1869      if (lowest_bit_set < 10)
1870	emit_insn (gen_rtx_SET (VOIDmode,
1871				op0,
1872				gen_rtx_LSHIFTRT (DImode, temp,
1873						  GEN_INT (10 - lowest_bit_set))));
1874      else if (lowest_bit_set > 10)
1875	emit_insn (gen_rtx_SET (VOIDmode,
1876				op0,
1877				gen_rtx_ASHIFT (DImode, temp,
1878						GEN_INT (lowest_bit_set - 10))));
1879      else
1880	abort ();
1881      return;
1882    }
1883
1884  /* 1) sethi	%hi(low_bits), %reg
1885   *    or	%reg, %lo(low_bits), %reg
1886   * 2) sethi	%hi(~low_bits), %reg
1887   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1888   */
1889  if (high_bits == 0
1890      || high_bits == 0xffffffff)
1891    {
1892      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1893				     (high_bits == 0xffffffff));
1894      return;
1895    }
1896
1897  /* Now, try 3-insn sequences.  */
1898
1899  /* 1) sethi	%hi(high_bits), %reg
1900   *    or	%reg, %lo(high_bits), %reg
1901   *    sllx	%reg, 32, %reg
1902   */
1903  if (low_bits == 0)
1904    {
1905      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1906      return;
1907    }
1908
1909  /* We may be able to do something quick
1910     when the constant is negated, so try that.  */
1911  if (const64_is_2insns ((~high_bits) & 0xffffffff,
1912			 (~low_bits) & 0xfffffc00))
1913    {
1914      /* NOTE: The trailing bits get XOR'd so we need the
1915	 non-negated bits, not the negated ones.  */
1916      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1917
1918      if ((((~high_bits) & 0xffffffff) == 0
1919	   && ((~low_bits) & 0x80000000) == 0)
1920	  || (((~high_bits) & 0xffffffff) == 0xffffffff
1921	      && ((~low_bits) & 0x80000000) != 0))
1922	{
1923	  int fast_int = (~low_bits & 0xffffffff);
1924
1925	  if ((SPARC_SETHI_P (fast_int)
1926	       && (~high_bits & 0xffffffff) == 0)
1927	      || SPARC_SIMM13_P (fast_int))
1928	    emit_insn (gen_safe_SET64 (temp, fast_int));
1929	  else
1930	    sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1931	}
1932      else
1933	{
1934	  rtx negated_const;
1935#if HOST_BITS_PER_WIDE_INT == 64
1936	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1937				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1938#else
1939	  negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
1940						(~low_bits) & 0xfffffc00,
1941						(~high_bits) & 0xffffffff);
1942#endif
1943	  sparc_emit_set_const64 (temp, negated_const);
1944	}
1945
1946      /* If we are XOR'ing with -1, then we should emit a one's complement
1947	 instead.  This way the combiner will notice logical operations
1948	 such as ANDN later on and substitute.  */
1949      if (trailing_bits == 0x3ff)
1950	{
1951	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1952				  gen_rtx_NOT (DImode, temp)));
1953	}
1954      else
1955	{
1956	  emit_insn (gen_rtx_SET (VOIDmode,
1957				  op0,
1958				  gen_safe_XOR64 (temp,
1959						  (-0x400 | trailing_bits))));
1960	}
1961      return;
1962    }
1963
1964  /* 1) sethi	%hi(xxx), %reg
1965   *    or	%reg, %lo(xxx), %reg
1966   *	sllx	%reg, yyy, %reg
1967   *
1968   * ??? This is just a generalized version of the low_bits==0
1969   * thing above, FIXME...
1970   */
1971  if ((highest_bit_set - lowest_bit_set) < 32)
1972    {
1973      unsigned HOST_WIDE_INT focus_bits =
1974	create_simple_focus_bits (high_bits, low_bits,
1975				  lowest_bit_set, 0);
1976
1977      /* We can't get here in this state.  */
1978      if (highest_bit_set < 32
1979	  || lowest_bit_set >= 32)
1980	abort ();
1981
1982      /* So what we know is that the set bits straddle the
1983	 middle of the 64-bit word.  */
1984      sparc_emit_set_const64_quick2 (op0, temp,
1985				     focus_bits, 0,
1986				     lowest_bit_set);
1987      return;
1988    }
1989
1990  /* 1) sethi	%hi(high_bits), %reg
1991   *    or	%reg, %lo(high_bits), %reg
1992   *    sllx	%reg, 32, %reg
1993   *	or	%reg, low_bits, %reg
1994   */
1995  if (SPARC_SIMM13_P(low_bits)
1996      && ((int)low_bits > 0))
1997    {
1998      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1999      return;
2000    }
2001
2002  /* The easiest way when all else fails, is full decomposition. */
2003#if 0
2004  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2005	  high_bits, low_bits, ~high_bits, ~low_bits);
2006#endif
2007  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2008}
2009
2010/* X and Y are two things to compare using CODE.  Emit the compare insn and
2011   return the rtx for the cc reg in the proper mode.  */
2012
2013rtx
2014gen_compare_reg (code, x, y)
2015     enum rtx_code code;
2016     rtx x, y;
2017{
2018  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2019  rtx cc_reg;
2020
2021  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2022     fcc regs (cse can't tell they're really call clobbered regs and will
2023     remove a duplicate comparison even if there is an intervening function
2024     call - it will then try to reload the cc reg via an int reg which is why
2025     we need the movcc patterns).  It is possible to provide the movcc
2026     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2027     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2028     to tell cse that CCFPE mode registers (even pseudos) are call
2029     clobbered.  */
2030
2031  /* ??? This is an experiment.  Rather than making changes to cse which may
2032     or may not be easy/clean, we do our own cse.  This is possible because
2033     we will generate hard registers.  Cse knows they're call clobbered (it
2034     doesn't know the same thing about pseudos). If we guess wrong, no big
2035     deal, but if we win, great!  */
2036
2037  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2038#if 1 /* experiment */
2039    {
2040      int reg;
2041      /* We cycle through the registers to ensure they're all exercised.  */
2042      static int next_fcc_reg = 0;
2043      /* Previous x,y for each fcc reg.  */
2044      static rtx prev_args[4][2];
2045
2046      /* Scan prev_args for x,y.  */
2047      for (reg = 0; reg < 4; reg++)
2048	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2049	  break;
2050      if (reg == 4)
2051	{
2052	  reg = next_fcc_reg;
2053	  prev_args[reg][0] = x;
2054	  prev_args[reg][1] = y;
2055	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2056	}
2057      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2058    }
2059#else
2060    cc_reg = gen_reg_rtx (mode);
2061#endif /* ! experiment */
2062  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2063    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2064  else
2065    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2066
2067  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2068			  gen_rtx_COMPARE (mode, x, y)));
2069
2070  return cc_reg;
2071}
2072
2073/* This function is used for v9 only.
2074   CODE is the code for an Scc's comparison.
2075   OPERANDS[0] is the target of the Scc insn.
2076   OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2077   been generated yet).
2078
2079   This function is needed to turn
2080
2081	   (set (reg:SI 110)
2082	       (gt (reg:CCX 100 %icc)
2083	           (const_int 0)))
2084   into
2085	   (set (reg:SI 110)
2086	       (gt:DI (reg:CCX 100 %icc)
2087	           (const_int 0)))
2088
2089   IE: The instruction recognizer needs to see the mode of the comparison to
2090   find the right instruction. We could use "gt:DI" right in the
2091   define_expand, but leaving it out allows us to handle DI, SI, etc.
2092
2093   We refer to the global sparc compare operands sparc_compare_op0 and
2094   sparc_compare_op1.  */
2095
2096int
2097gen_v9_scc (compare_code, operands)
2098     enum rtx_code compare_code;
2099     register rtx *operands;
2100{
2101  rtx temp, op0, op1;
2102
2103  if (! TARGET_ARCH64
2104      && (GET_MODE (sparc_compare_op0) == DImode
2105	  || GET_MODE (operands[0]) == DImode))
2106    return 0;
2107
2108  /* Handle the case where operands[0] == sparc_compare_op0.
2109     We "early clobber" the result.  */
2110  if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2111    {
2112      op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2113      emit_move_insn (op0, sparc_compare_op0);
2114    }
2115  else
2116    op0 = sparc_compare_op0;
2117  /* For consistency in the following.  */
2118  op1 = sparc_compare_op1;
2119
2120  /* Try to use the movrCC insns.  */
2121  if (TARGET_ARCH64
2122      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2123      && op1 == const0_rtx
2124      && v9_regcmp_p (compare_code))
2125    {
2126      /* Special case for op0 != 0.  This can be done with one instruction if
2127	 operands[0] == sparc_compare_op0.  We don't assume they are equal
2128	 now though.  */
2129
2130      if (compare_code == NE
2131	  && GET_MODE (operands[0]) == DImode
2132	  && GET_MODE (op0) == DImode)
2133	{
2134	  emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2135	  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2136			      gen_rtx_IF_THEN_ELSE (DImode,
2137				       gen_rtx_fmt_ee (compare_code, DImode,
2138						       op0, const0_rtx),
2139				       const1_rtx,
2140				       operands[0])));
2141	  return 1;
2142	}
2143
2144      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2145      if (GET_MODE (op0) != DImode)
2146	{
2147	  temp = gen_reg_rtx (DImode);
2148	  convert_move (temp, op0, 0);
2149	}
2150      else
2151	temp = op0;
2152      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2153			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2154				   gen_rtx_fmt_ee (compare_code, DImode,
2155						   temp, const0_rtx),
2156				   const1_rtx,
2157				   operands[0])));
2158      return 1;
2159    }
2160  else
2161    {
2162      operands[1] = gen_compare_reg (compare_code, op0, op1);
2163
2164      switch (GET_MODE (operands[1]))
2165	{
2166	  case CCmode :
2167	  case CCXmode :
2168	  case CCFPEmode :
2169	  case CCFPmode :
2170	    break;
2171	  default :
2172	    abort ();
2173	}
2174      emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2175      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2176			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2177				   gen_rtx_fmt_ee (compare_code,
2178						   GET_MODE (operands[1]),
2179						   operands[1], const0_rtx),
2180				    const1_rtx, operands[0])));
2181      return 1;
2182    }
2183}
2184
2185/* Emit a conditional jump insn for the v9 architecture using comparison code
2186   CODE and jump target LABEL.
2187   This function exists to take advantage of the v9 brxx insns.  */
2188
2189void
2190emit_v9_brxx_insn (code, op0, label)
2191     enum rtx_code code;
2192     rtx op0, label;
2193{
2194  emit_jump_insn (gen_rtx_SET (VOIDmode,
2195			   pc_rtx,
2196			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2197				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2198						    op0, const0_rtx),
2199				    gen_rtx_LABEL_REF (VOIDmode, label),
2200				    pc_rtx)));
2201}
2202
2203/* Return nonzero if a return peephole merging return with
2204   setting of output register is ok.  */
2205int
2206leaf_return_peephole_ok ()
2207{
2208  return (actual_fsize == 0);
2209}
2210
2211/* Return nonzero if TRIAL can go into the function epilogue's
2212   delay slot.  SLOT is the slot we are trying to fill.  */
2213
2214int
2215eligible_for_epilogue_delay (trial, slot)
2216     rtx trial;
2217     int slot;
2218{
2219  rtx pat, src;
2220
2221  if (slot >= 1)
2222    return 0;
2223
2224  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2225    return 0;
2226
2227  if (get_attr_length (trial) != 1)
2228    return 0;
2229
2230  /* If %g0 is live, there are lots of things we can't handle.
2231     Rather than trying to find them all now, let's punt and only
2232     optimize things as necessary.  */
2233  if (TARGET_LIVE_G0)
2234    return 0;
2235
2236  /* In the case of a true leaf function, anything can go into the delay slot.
2237     A delay slot only exists however if the frame size is zero, otherwise
2238     we will put an insn to adjust the stack after the return.  */
2239  if (current_function_uses_only_leaf_regs)
2240    {
2241      if (leaf_return_peephole_ok ())
2242	return ((get_attr_in_uncond_branch_delay (trial)
2243		 == IN_BRANCH_DELAY_TRUE));
2244      return 0;
2245    }
2246
2247  /* If only trivial `restore' insns work, nothing can go in the
2248     delay slot.  */
2249  else if (TARGET_BROKEN_SAVERESTORE)
2250    return 0;
2251
2252  pat = PATTERN (trial);
2253
2254  /* Otherwise, only operations which can be done in tandem with
2255     a `restore' insn can go into the delay slot.  */
2256  if (GET_CODE (SET_DEST (pat)) != REG
2257      || REGNO (SET_DEST (pat)) >= 32
2258      || REGNO (SET_DEST (pat)) < 24)
2259    return 0;
2260
2261  /* The set of insns matched here must agree precisely with the set of
2262     patterns paired with a RETURN in sparc.md.  */
2263
2264  src = SET_SRC (pat);
2265
2266  /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64.  */
2267  if (arith_operand (src, GET_MODE (src)))
2268    {
2269      if (TARGET_ARCH64)
2270        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2271      else
2272        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2273    }
2274
2275  /* This matches "*return_di".  */
2276  else if (arith_double_operand (src, GET_MODE (src)))
2277    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2278
2279  /* This matches "*return_sf_no_fpu".  */
2280  else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2281	   && register_operand (src, SFmode))
2282    return 1;
2283
2284  /* This matches "*return_addsi".  */
2285  else if (GET_CODE (src) == PLUS
2286	   && arith_operand (XEXP (src, 0), SImode)
2287	   && arith_operand (XEXP (src, 1), SImode)
2288	   && (register_operand (XEXP (src, 0), SImode)
2289	       || register_operand (XEXP (src, 1), SImode)))
2290    return 1;
2291
2292  /* This matches "*return_adddi".  */
2293  else if (GET_CODE (src) == PLUS
2294	   && arith_double_operand (XEXP (src, 0), DImode)
2295	   && arith_double_operand (XEXP (src, 1), DImode)
2296	   && (register_operand (XEXP (src, 0), DImode)
2297	       || register_operand (XEXP (src, 1), DImode)))
2298    return 1;
2299
2300  return 0;
2301}
2302
2303static int
2304check_return_regs (x)
2305     rtx x;
2306{
2307  switch (GET_CODE (x))
2308    {
2309    case REG:
2310      return IN_OR_GLOBAL_P (x);
2311
2312    case CONST_INT:
2313    case CONST_DOUBLE:
2314    case CONST:
2315    case SYMBOL_REF:
2316    case LABEL_REF:
2317    return 1;
2318
2319    case SET:
2320    case IOR:
2321    case AND:
2322    case XOR:
2323    case PLUS:
2324    case MINUS:
2325      if (check_return_regs (XEXP (x, 1)) == 0)
2326  return 0;
2327    case NOT:
2328    case NEG:
2329    case MEM:
2330      return check_return_regs (XEXP (x, 0));
2331
2332    default:
2333      return 0;
2334    }
2335
2336}
2337
2338/* Return 1 if TRIAL references only in and global registers.  */
2339int
2340eligible_for_return_delay (trial)
2341     rtx trial;
2342{
2343  if (GET_CODE (PATTERN (trial)) != SET)
2344    return 0;
2345
2346  return check_return_regs (PATTERN (trial));
2347}
2348
2349int
2350short_branch (uid1, uid2)
2351     int uid1, uid2;
2352{
2353  unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2354  if (delta + 1024 < 2048)
2355    return 1;
2356  /* warning ("long branch, distance %d", delta); */
2357  return 0;
2358}
2359
2360/* Return non-zero if REG is not used after INSN.
2361   We assume REG is a reload reg, and therefore does
2362   not live past labels or calls or jumps.  */
2363int
2364reg_unused_after (reg, insn)
2365     rtx reg;
2366     rtx insn;
2367{
2368  enum rtx_code code, prev_code = UNKNOWN;
2369
2370  while ((insn = NEXT_INSN (insn)))
2371    {
2372      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2373	return 1;
2374
2375      code = GET_CODE (insn);
2376      if (GET_CODE (insn) == CODE_LABEL)
2377	return 1;
2378
2379      if (GET_RTX_CLASS (code) == 'i')
2380	{
2381	  rtx set = single_set (insn);
2382	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2383	  if (set && in_src)
2384	    return 0;
2385	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2386	    return 1;
2387	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2388	    return 0;
2389	}
2390      prev_code = code;
2391    }
2392  return 1;
2393}
2394
2395/* The table we use to reference PIC data.  */
2396static rtx global_offset_table;
2397
2398/* The function we use to get at it.  */
2399static rtx get_pc_symbol;
2400static char get_pc_symbol_name[256];
2401
2402/* Ensure that we are not using patterns that are not OK with PIC.  */
2403
2404int
2405check_pic (i)
2406     int i;
2407{
2408  switch (flag_pic)
2409    {
2410    case 1:
2411      if (GET_CODE (recog_operand[i]) == SYMBOL_REF
2412	  || (GET_CODE (recog_operand[i]) == CONST
2413	      && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
2414		    && (XEXP (XEXP (recog_operand[i], 0), 0)
2415			== global_offset_table)
2416		    && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
2417			== CONST))))
2418	abort ();
2419    case 2:
2420    default:
2421      return 1;
2422    }
2423}
2424
2425/* Return true if X is an address which needs a temporary register when
2426   reloaded while generating PIC code.  */
2427
2428int
2429pic_address_needs_scratch (x)
2430     rtx x;
2431{
2432  if (GET_CODE (x) == LABEL_REF)
2433    return 1;
2434
2435  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2436  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2437      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2438      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2439      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2440    return 1;
2441
2442  return 0;
2443}
2444
2445/* Legitimize PIC addresses.  If the address is already position-independent,
2446   we return ORIG.  Newly generated position-independent addresses go into a
2447   reg.  This is REG if non zero, otherwise we allocate register(s) as
2448   necessary.  */
2449
2450rtx
2451legitimize_pic_address (orig, mode, reg)
2452     rtx orig;
2453     enum machine_mode mode ATTRIBUTE_UNUSED;
2454     rtx reg;
2455{
2456  if (GET_CODE (orig) == SYMBOL_REF)
2457    {
2458      rtx pic_ref, address;
2459      rtx insn;
2460
2461      if (reg == 0)
2462	{
2463	  if (reload_in_progress || reload_completed)
2464	    abort ();
2465	  else
2466	    reg = gen_reg_rtx (Pmode);
2467	}
2468
2469      if (flag_pic == 2)
2470	{
2471	  /* If not during reload, allocate another temp reg here for loading
2472	     in the address, so that these instructions can be optimized
2473	     properly.  */
2474	  rtx temp_reg = ((reload_in_progress || reload_completed)
2475			  ? reg : gen_reg_rtx (Pmode));
2476
2477	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2478	     won't get confused into thinking that these two instructions
2479	     are loading in the true address of the symbol.  If in the
2480	     future a PIC rtx exists, that should be used instead.  */
2481	  if (Pmode == SImode)
2482	    {
2483	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
2484	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2485	    }
2486	  else
2487	    {
2488	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
2489	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2490	    }
2491	  address = temp_reg;
2492	}
2493      else
2494	address = orig;
2495
2496      pic_ref = gen_rtx_MEM (Pmode,
2497			 gen_rtx_PLUS (Pmode,
2498				  pic_offset_table_rtx, address));
2499      current_function_uses_pic_offset_table = 1;
2500      RTX_UNCHANGING_P (pic_ref) = 1;
2501      insn = emit_move_insn (reg, pic_ref);
2502      /* Put a REG_EQUAL note on this insn, so that it can be optimized
2503	 by loop.  */
2504      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2505				  REG_NOTES (insn));
2506      return reg;
2507    }
2508  else if (GET_CODE (orig) == CONST)
2509    {
2510      rtx base, offset;
2511
2512      if (GET_CODE (XEXP (orig, 0)) == PLUS
2513	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2514	return orig;
2515
2516      if (reg == 0)
2517	{
2518	  if (reload_in_progress || reload_completed)
2519	    abort ();
2520	  else
2521	    reg = gen_reg_rtx (Pmode);
2522	}
2523
2524      if (GET_CODE (XEXP (orig, 0)) == PLUS)
2525	{
2526	  base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2527	  offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2528					 base == reg ? 0 : reg);
2529	}
2530      else
2531	abort ();
2532
2533      if (GET_CODE (offset) == CONST_INT)
2534	{
2535	  if (SMALL_INT (offset))
2536	    return plus_constant_for_output (base, INTVAL (offset));
2537	  else if (! reload_in_progress && ! reload_completed)
2538	    offset = force_reg (Pmode, offset);
2539	  else
2540	    /* If we reach here, then something is seriously wrong.  */
2541	    abort ();
2542	}
2543      return gen_rtx_PLUS (Pmode, base, offset);
2544    }
2545  else if (GET_CODE (orig) == LABEL_REF)
2546    /* ??? Why do we do this?  */
2547    /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2548       the register is live instead, in case it is eliminated.  */
2549    current_function_uses_pic_offset_table = 1;
2550
2551  return orig;
2552}
2553
2554/* Return the RTX for insns to set the PIC register.  */
2555
2556static rtx
2557pic_setup_code ()
2558{
2559  rtx seq;
2560
2561  start_sequence ();
2562  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2563			 get_pc_symbol));
2564  seq = gen_sequence ();
2565  end_sequence ();
2566
2567  return seq;
2568}
2569
2570/* Emit special PIC prologues and epilogues.  */
2571
2572void
2573finalize_pic ()
2574{
2575  /* Labels to get the PC in the prologue of this function.  */
2576  int orig_flag_pic = flag_pic;
2577  rtx insn;
2578
2579  if (current_function_uses_pic_offset_table == 0)
2580    return;
2581
2582  if (! flag_pic)
2583    abort ();
2584
2585  /* If we havn't emitted the special get_pc helper function, do so now.  */
2586  if (get_pc_symbol_name[0] == 0)
2587    {
2588      int align;
2589
2590      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2591      text_section ();
2592
2593      align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2594      if (align > 0)
2595	ASM_OUTPUT_ALIGN (asm_out_file, align);
2596      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2597      fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2598    }
2599
2600  /* Initialize every time through, since we can't easily
2601     know this to be permanent.  */
2602  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2603  get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2604  flag_pic = 0;
2605
2606  emit_insn_after (pic_setup_code (), get_insns ());
2607
2608  /* Insert the code in each nonlocal goto receiver.
2609     If you make changes here or to the nonlocal_goto_receiver
2610     pattern, make sure the unspec_volatile numbers still
2611     match.  */
2612  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2613    if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2614	&& XINT (PATTERN (insn), 1) == 5)
2615      emit_insn_after (pic_setup_code (), insn);
2616
2617  flag_pic = orig_flag_pic;
2618
2619  /* Need to emit this whether or not we obey regdecls,
2620     since setjmp/longjmp can cause life info to screw up.
2621     ??? In the case where we don't obey regdecls, this is not sufficient
2622     since we may not fall out the bottom.  */
2623  emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2624}
2625
2626/* Return 1 if RTX is a MEM which is known to be aligned to at
2627   least an 8 byte boundary.  */
2628
2629int
2630mem_min_alignment (mem, desired)
2631     rtx mem;
2632     int desired;
2633{
2634  rtx addr, base, offset;
2635
2636  /* If it's not a MEM we can't accept it.  */
2637  if (GET_CODE (mem) != MEM)
2638    return 0;
2639
2640  addr = XEXP (mem, 0);
2641  base = offset = NULL_RTX;
2642  if (GET_CODE (addr) == PLUS)
2643    {
2644      if (GET_CODE (XEXP (addr, 0)) == REG)
2645	{
2646	  base = XEXP (addr, 0);
2647
2648	  /* What we are saying here is that if the base
2649	     REG is aligned properly, the compiler will make
2650	     sure any REG based index upon it will be so
2651	     as well.  */
2652	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2653	    offset = XEXP (addr, 1);
2654	  else
2655	    offset = const0_rtx;
2656	}
2657    }
2658  else if (GET_CODE (addr) == REG)
2659    {
2660      base = addr;
2661      offset = const0_rtx;
2662    }
2663
2664  if (base != NULL_RTX)
2665    {
2666      int regno = REGNO (base);
2667
2668      if (regno != FRAME_POINTER_REGNUM
2669	  && regno != STACK_POINTER_REGNUM)
2670	{
2671	  /* Check if the compiler has recorded some information
2672	     about the alignment of the base REG.  If reload has
2673	     completed, we already matched with proper alignments.  */
2674	  if (((regno_pointer_align != NULL
2675		&& REGNO_POINTER_ALIGN (regno) >= desired)
2676	       || reload_completed)
2677	      && ((INTVAL (offset) & (desired - 1)) == 0))
2678	    return 1;
2679	}
2680      else
2681	{
2682	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2683	    return 1;
2684	}
2685    }
2686  else if (! TARGET_UNALIGNED_DOUBLES
2687	   || CONSTANT_P (addr)
2688	   || GET_CODE (addr) == LO_SUM)
2689    {
2690      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2691	 is true, in which case we can only assume that an access is aligned if
2692	 it is to a constant address, or the address involves a LO_SUM.  */
2693      return 1;
2694    }
2695
2696  /* An obviously unaligned address.  */
2697  return 0;
2698}
2699
2700
2701/* Vectors to keep interesting information about registers where it can easily
2702   be got.  We use to use the actual mode value as the bit number, but there
2703   are more than 32 modes now.  Instead we use two tables: one indexed by
2704   hard register number, and one indexed by mode.  */
2705
2706/* The purpose of sparc_mode_class is to shrink the range of modes so that
2707   they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
2708   mapped into one sparc_mode_class mode.  */
2709
2710enum sparc_mode_class {
2711  S_MODE, D_MODE, T_MODE, O_MODE,
2712  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2713  CC_MODE, CCFP_MODE
2714};
2715
2716/* Modes for single-word and smaller quantities.  */
2717#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2718
2719/* Modes for double-word and smaller quantities.  */
2720#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2721
2722/* Modes for quad-word and smaller quantities.  */
2723#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2724
2725/* Modes for single-float quantities.  We must allow any single word or
2726   smaller quantity.  This is because the fix/float conversion instructions
2727   take integer inputs/outputs from the float registers.  */
2728#define SF_MODES (S_MODES)
2729
2730/* Modes for double-float and smaller quantities.  */
2731#define DF_MODES (S_MODES | D_MODES)
2732
2733#define DF_MODES64 DF_MODES
2734
2735/* Modes for double-float only quantities.  */
2736#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
2737
2738/* Modes for double-float and larger quantities.  */
2739#define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2740
2741/* Modes for quad-float only quantities.  */
2742#define TF_ONLY_MODES (1 << (int) TF_MODE)
2743
2744/* Modes for quad-float and smaller quantities.  */
2745#define TF_MODES (DF_MODES | TF_ONLY_MODES)
2746
2747#define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2748
2749/* Modes for condition codes.  */
2750#define CC_MODES (1 << (int) CC_MODE)
2751#define CCFP_MODES (1 << (int) CCFP_MODE)
2752
2753/* Value is 1 if register/mode pair is acceptable on sparc.
2754   The funny mixture of D and T modes is because integer operations
2755   do not specially operate on tetra quantities, so non-quad-aligned
2756   registers can hold quadword quantities (except %o4 and %i4 because
2757   they cross fixed registers).  */
2758
2759/* This points to either the 32 bit or the 64 bit version.  */
2760int *hard_regno_mode_classes;
2761
2762static int hard_32bit_mode_classes[] = {
2763  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2764  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2765  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2766  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2767
2768  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2769  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2770  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2771  TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2772
2773  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
2774     and none can hold SFmode/SImode values.  */
2775  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2776  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2777  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2778  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2779
2780  /* %fcc[0123] */
2781  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2782
2783  /* %icc */
2784  CC_MODES
2785};
2786
2787static int hard_64bit_mode_classes[] = {
2788  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2789  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2790  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2791  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2792
2793  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2794  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2795  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2796  TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2797
2798  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
2799     and none can hold SFmode/SImode values.  */
2800  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2801  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2802  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2803  DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2804
2805  /* %fcc[0123] */
2806  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2807
2808  /* %icc */
2809  CC_MODES
2810};
2811
2812int sparc_mode_class [NUM_MACHINE_MODES];
2813
2814enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2815
2816static void
2817sparc_init_modes ()
2818{
2819  int i;
2820
2821  for (i = 0; i < NUM_MACHINE_MODES; i++)
2822    {
2823      switch (GET_MODE_CLASS (i))
2824	{
2825	case MODE_INT:
2826	case MODE_PARTIAL_INT:
2827	case MODE_COMPLEX_INT:
2828	  if (GET_MODE_SIZE (i) <= 4)
2829	    sparc_mode_class[i] = 1 << (int) S_MODE;
2830	  else if (GET_MODE_SIZE (i) == 8)
2831	    sparc_mode_class[i] = 1 << (int) D_MODE;
2832	  else if (GET_MODE_SIZE (i) == 16)
2833	    sparc_mode_class[i] = 1 << (int) T_MODE;
2834	  else if (GET_MODE_SIZE (i) == 32)
2835	    sparc_mode_class[i] = 1 << (int) O_MODE;
2836	  else
2837	    sparc_mode_class[i] = 0;
2838	  break;
2839	case MODE_FLOAT:
2840	case MODE_COMPLEX_FLOAT:
2841	  if (GET_MODE_SIZE (i) <= 4)
2842	    sparc_mode_class[i] = 1 << (int) SF_MODE;
2843	  else if (GET_MODE_SIZE (i) == 8)
2844	    sparc_mode_class[i] = 1 << (int) DF_MODE;
2845	  else if (GET_MODE_SIZE (i) == 16)
2846	    sparc_mode_class[i] = 1 << (int) TF_MODE;
2847	  else if (GET_MODE_SIZE (i) == 32)
2848	    sparc_mode_class[i] = 1 << (int) OF_MODE;
2849	  else
2850	    sparc_mode_class[i] = 0;
2851	  break;
2852	case MODE_CC:
2853	default:
2854	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2855	     we must explicitly check for them here.  */
2856	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
2857	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2858	  else if (i == (int) CCmode || i == (int) CC_NOOVmode
2859		   || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2860	    sparc_mode_class[i] = 1 << (int) CC_MODE;
2861	  else
2862	    sparc_mode_class[i] = 0;
2863	  break;
2864	}
2865    }
2866
2867  if (TARGET_ARCH64)
2868    hard_regno_mode_classes = hard_64bit_mode_classes;
2869  else
2870    hard_regno_mode_classes = hard_32bit_mode_classes;
2871
2872  /* Initialize the array used by REGNO_REG_CLASS.  */
2873  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2874    {
2875      if (i < 16 && TARGET_V8PLUS)
2876	sparc_regno_reg_class[i] = I64_REGS;
2877      else if (i < 32)
2878	sparc_regno_reg_class[i] = GENERAL_REGS;
2879      else if (i < 64)
2880	sparc_regno_reg_class[i] = FP_REGS;
2881      else if (i < 96)
2882	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2883      else if (i < 100)
2884	sparc_regno_reg_class[i] = FPCC_REGS;
2885      else
2886	sparc_regno_reg_class[i] = NO_REGS;
2887    }
2888}
2889
2890/* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2891   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2892   v9 int regs as it simplifies the code.  */
2893
2894static int
2895save_regs (file, low, high, base, offset, n_regs, real_offset)
2896     FILE *file;
2897     int low, high;
2898     const char *base;
2899     int offset;
2900     int n_regs;
2901     int real_offset;
2902{
2903  int i;
2904
2905  if (TARGET_ARCH64 && high <= 32)
2906    {
2907      for (i = low; i < high; i++)
2908	{
2909	  if (regs_ever_live[i] && ! call_used_regs[i])
2910	    {
2911	      fprintf (file, "\tstx\t%s, [%s+%d]\n",
2912		       reg_names[i], base, offset + 4 * n_regs);
2913	      if (dwarf2out_do_frame ())
2914		dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2915	      n_regs += 2;
2916	    }
2917	}
2918    }
2919  else
2920    {
2921      for (i = low; i < high; i += 2)
2922	{
2923	  if (regs_ever_live[i] && ! call_used_regs[i])
2924	    {
2925	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2926		{
2927		  fprintf (file, "\tstd\t%s, [%s+%d]\n",
2928			   reg_names[i], base, offset + 4 * n_regs);
2929		  if (dwarf2out_do_frame ())
2930		    {
2931		      char *l = dwarf2out_cfi_label ();
2932		      dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
2933		      dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
2934		    }
2935		  n_regs += 2;
2936		}
2937	      else
2938		{
2939		  fprintf (file, "\tst\t%s, [%s+%d]\n",
2940			   reg_names[i], base, offset + 4 * n_regs);
2941		  if (dwarf2out_do_frame ())
2942		    dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2943		  n_regs += 2;
2944		}
2945	    }
2946	  else
2947	    {
2948	      if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2949		{
2950		  fprintf (file, "\tst\t%s, [%s+%d]\n",
2951			   reg_names[i+1], base, offset + 4 * n_regs + 4);
2952		  if (dwarf2out_do_frame ())
2953		    dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
2954		  n_regs += 2;
2955		}
2956	    }
2957	}
2958    }
2959  return n_regs;
2960}
2961
2962/* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2963
2964   N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2965   v9 int regs as it simplifies the code.  */
2966
2967static int
2968restore_regs (file, low, high, base, offset, n_regs)
2969     FILE *file;
2970     int low, high;
2971     const char *base;
2972     int offset;
2973     int n_regs;
2974{
2975  int i;
2976
2977  if (TARGET_ARCH64 && high <= 32)
2978    {
2979      for (i = low; i < high; i++)
2980	{
2981	  if (regs_ever_live[i] && ! call_used_regs[i])
2982	    fprintf (file, "\tldx\t[%s+%d], %s\n",
2983	      base, offset + 4 * n_regs, reg_names[i]),
2984	    n_regs += 2;
2985	}
2986    }
2987  else
2988    {
2989      for (i = low; i < high; i += 2)
2990	{
2991	  if (regs_ever_live[i] && ! call_used_regs[i])
2992	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2993	      fprintf (file, "\tldd\t[%s+%d], %s\n",
2994		       base, offset + 4 * n_regs, reg_names[i]),
2995	      n_regs += 2;
2996	    else
2997	      fprintf (file, "\tld\t[%s+%d],%s\n",
2998		       base, offset + 4 * n_regs, reg_names[i]),
2999	      n_regs += 2;
3000	  else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
3001	    fprintf (file, "\tld\t[%s+%d],%s\n",
3002		     base, offset + 4 * n_regs + 4, reg_names[i+1]),
3003	    n_regs += 2;
3004	}
3005    }
3006  return n_regs;
3007}
3008
3009/* Static variables we want to share between prologue and epilogue.  */
3010
3011/* Number of live general or floating point registers needed to be saved
3012   (as 4-byte quantities).  This is only done if TARGET_EPILOGUE.  */
3013static int num_gfregs;
3014
3015/* Compute the frame size required by the function.  This function is called
3016   during the reload pass and also by output_function_prologue().  */
3017
3018int
3019compute_frame_size (size, leaf_function)
3020     int size;
3021     int leaf_function;
3022{
3023  int n_regs = 0, i;
3024  int outgoing_args_size = (current_function_outgoing_args_size
3025			    + REG_PARM_STACK_SPACE (current_function_decl));
3026
3027  if (TARGET_EPILOGUE)
3028    {
3029      /* N_REGS is the number of 4-byte regs saved thus far.  This applies
3030	 even to v9 int regs to be consistent with save_regs/restore_regs.  */
3031
3032      if (TARGET_ARCH64)
3033	{
3034	  for (i = 0; i < 8; i++)
3035	    if (regs_ever_live[i] && ! call_used_regs[i])
3036	      n_regs += 2;
3037	}
3038      else
3039	{
3040	  for (i = 0; i < 8; i += 2)
3041	    if ((regs_ever_live[i] && ! call_used_regs[i])
3042		|| (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3043	      n_regs += 2;
3044	}
3045
3046      for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3047	if ((regs_ever_live[i] && ! call_used_regs[i])
3048	    || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3049	  n_regs += 2;
3050    }
3051
3052  /* Set up values for use in `function_epilogue'.  */
3053  num_gfregs = n_regs;
3054
3055  if (leaf_function && n_regs == 0
3056      && size == 0 && current_function_outgoing_args_size == 0)
3057    {
3058      actual_fsize = apparent_fsize = 0;
3059    }
3060  else
3061    {
3062      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3063         The stack bias (if any) is taken out to undo its effects.  */
3064      apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3065      apparent_fsize += n_regs * 4;
3066      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3067    }
3068
3069  /* Make sure nothing can clobber our register windows.
3070     If a SAVE must be done, or there is a stack-local variable,
3071     the register window area must be allocated.
3072     ??? For v8 we apparently need an additional 8 bytes of reserved space.  */
3073  if (leaf_function == 0 || size > 0)
3074    actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3075
3076  return SPARC_STACK_ALIGN (actual_fsize);
3077}
3078
3079/* Build a (32 bit) big number in a register.  */
3080/* ??? We may be able to use the set macro here too.  */
3081
3082static void
3083build_big_number (file, num, reg)
3084     FILE *file;
3085     int num;
3086     const char *reg;
3087{
3088  if (num >= 0 || ! TARGET_ARCH64)
3089    {
3090      fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3091      if ((num & 0x3ff) != 0)
3092	fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3093    }
3094  else /* num < 0 && TARGET_ARCH64 */
3095    {
3096      /* Sethi does not sign extend, so we must use a little trickery
3097	 to use it for negative numbers.  Invert the constant before
3098	 loading it in, then use xor immediate to invert the loaded bits
3099	 (along with the upper 32 bits) to the desired constant.  This
3100	 works because the sethi and immediate fields overlap.  */
3101      int asize = num;
3102      int inv = ~asize;
3103      int low = -0x400 + (asize & 0x3FF);
3104
3105      fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3106	       inv, reg, reg, low, reg);
3107    }
3108}
3109
3110/* Output code for the function prologue.  */
3111
3112void
3113output_function_prologue (file, size, leaf_function)
3114     FILE *file;
3115     int size;
3116     int leaf_function;
3117{
3118  /* Need to use actual_fsize, since we are also allocating
3119     space for our callee (and our own register save area).  */
3120  actual_fsize = compute_frame_size (size, leaf_function);
3121
3122  if (leaf_function)
3123    {
3124      frame_base_name = "%sp";
3125      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3126    }
3127  else
3128    {
3129      frame_base_name = "%fp";
3130      frame_base_offset = SPARC_STACK_BIAS;
3131    }
3132
3133  /* This is only for the human reader.  */
3134  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3135
3136  if (actual_fsize == 0)
3137    /* do nothing.  */ ;
3138  else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3139    {
3140      if (actual_fsize <= 4096)
3141	fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3142      else if (actual_fsize <= 8192)
3143	{
3144	  fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3145	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3146	}
3147      else
3148	{
3149	  build_big_number (file, -actual_fsize, "%g1");
3150	  fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3151	}
3152    }
3153  else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3154    {
3155      /* We assume the environment will properly handle or otherwise avoid
3156	 trouble associated with an interrupt occurring after the `save' or
3157	 trap occurring during it.  */
3158      fprintf (file, "\tsave\n");
3159
3160      if (actual_fsize <= 4096)
3161	fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3162      else if (actual_fsize <= 8192)
3163	{
3164	  fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3165	  fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3166	}
3167      else
3168	{
3169	  build_big_number (file, -actual_fsize, "%g1");
3170	  fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3171	}
3172    }
3173  else /* leaf function */
3174    {
3175      if (actual_fsize <= 4096)
3176	fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3177      else if (actual_fsize <= 8192)
3178	{
3179	  fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3180	  fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3181	}
3182      else
3183	{
3184	  build_big_number (file, -actual_fsize, "%g1");
3185	  fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3186	}
3187    }
3188
3189  if (dwarf2out_do_frame () && actual_fsize)
3190    {
3191      char *label = dwarf2out_cfi_label ();
3192
3193      /* The canonical frame address refers to the top of the frame.  */
3194      dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3195				 : FRAME_POINTER_REGNUM),
3196			 frame_base_offset);
3197
3198      if (! leaf_function)
3199	{
3200	  /* Note the register window save.  This tells the unwinder that
3201	     it needs to restore the window registers from the previous
3202	     frame's window save area at 0(cfa).  */
3203	  dwarf2out_window_save (label);
3204
3205	  /* The return address (-8) is now in %i7.  */
3206	  dwarf2out_return_reg (label, 31);
3207	}
3208    }
3209
3210  /* If doing anything with PIC, do it now.  */
3211  if (! flag_pic)
3212    fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3213
3214  /* Call saved registers are saved just above the outgoing argument area.  */
3215  if (num_gfregs)
3216    {
3217      int offset, real_offset, n_regs;
3218      const char *base;
3219
3220      real_offset = -apparent_fsize;
3221      offset = -apparent_fsize + frame_base_offset;
3222      if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3223	{
3224	  /* ??? This might be optimized a little as %g1 might already have a
3225	     value close enough that a single add insn will do.  */
3226	  /* ??? Although, all of this is probably only a temporary fix
3227	     because if %g1 can hold a function result, then
3228	     output_function_epilogue will lose (the result will get
3229	     clobbered).  */
3230	  build_big_number (file, offset, "%g1");
3231	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3232	  base = "%g1";
3233	  offset = 0;
3234	}
3235      else
3236	{
3237	  base = frame_base_name;
3238	}
3239
3240      n_regs = 0;
3241      if (TARGET_EPILOGUE && ! leaf_function)
3242	/* ??? Originally saved regs 0-15 here.  */
3243	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3244      else if (leaf_function)
3245	/* ??? Originally saved regs 0-31 here.  */
3246	n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3247      if (TARGET_EPILOGUE)
3248	save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3249		   real_offset);
3250    }
3251
3252  leaf_label = 0;
3253  if (leaf_function && actual_fsize != 0)
3254    {
3255      /* warning ("leaf procedure with frame size %d", actual_fsize); */
3256      if (! TARGET_EPILOGUE)
3257	leaf_label = gen_label_rtx ();
3258    }
3259}
3260
3261/* Output code for the function epilogue.  */
3262
3263void
3264output_function_epilogue (file, size, leaf_function)
3265     FILE *file;
3266     int size ATTRIBUTE_UNUSED;
3267     int leaf_function;
3268{
3269  const char *ret;
3270
3271  if (leaf_label)
3272    {
3273      emit_label_after (leaf_label, get_last_insn ());
3274      final_scan_insn (get_last_insn (), file, 0, 0, 1);
3275    }
3276
3277#ifdef FUNCTION_BLOCK_PROFILER_EXIT
3278  else if (profile_block_flag == 2)
3279    {
3280      FUNCTION_BLOCK_PROFILER_EXIT(file);
3281    }
3282#endif
3283
3284  else if (current_function_epilogue_delay_list == 0)
3285    {
3286      /* If code does not drop into the epilogue, we need
3287	 do nothing except output pending case vectors.  */
3288      rtx insn = get_last_insn ();
3289      if (GET_CODE (insn) == NOTE)
3290      insn = prev_nonnote_insn (insn);
3291      if (insn && GET_CODE (insn) == BARRIER)
3292      goto output_vectors;
3293    }
3294
3295  /* Restore any call saved registers.  */
3296  if (num_gfregs)
3297    {
3298      int offset, n_regs;
3299      const char *base;
3300
3301      offset = -apparent_fsize + frame_base_offset;
3302      if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3303	{
3304	  build_big_number (file, offset, "%g1");
3305	  fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3306	  base = "%g1";
3307	  offset = 0;
3308	}
3309      else
3310	{
3311	  base = frame_base_name;
3312	}
3313
3314      n_regs = 0;
3315      if (TARGET_EPILOGUE && ! leaf_function)
3316	/* ??? Originally saved regs 0-15 here.  */
3317	n_regs = restore_regs (file, 0, 8, base, offset, 0);
3318      else if (leaf_function)
3319	/* ??? Originally saved regs 0-31 here.  */
3320	n_regs = restore_regs (file, 0, 8, base, offset, 0);
3321      if (TARGET_EPILOGUE)
3322	restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3323    }
3324
3325  /* Work out how to skip the caller's unimp instruction if required.  */
3326  if (leaf_function)
3327    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3328  else
3329    ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3330
3331  if (TARGET_EPILOGUE || leaf_label)
3332    {
3333      int old_target_epilogue = TARGET_EPILOGUE;
3334      target_flags &= ~old_target_epilogue;
3335
3336      if (! leaf_function)
3337	{
3338	  /* If we wound up with things in our delay slot, flush them here.  */
3339	  if (current_function_epilogue_delay_list)
3340	    {
3341	      rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3342					       get_last_insn ());
3343	      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3344					gen_rtvec (2,
3345						   PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3346						   PATTERN (insn)));
3347	      final_scan_insn (insn, file, 1, 0, 1);
3348	    }
3349	  else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3350	    fputs ("\treturn\t%i7+8\n\tnop\n", file);
3351	  else
3352	    fprintf (file, "\t%s\n\trestore\n", ret);
3353	}
3354      /* All of the following cases are for leaf functions.  */
3355      else if (current_function_epilogue_delay_list)
3356	{
3357	  /* eligible_for_epilogue_delay_slot ensures that if this is a
3358	     leaf function, then we will only have insn in the delay slot
3359	     if the frame size is zero, thus no adjust for the stack is
3360	     needed here.  */
3361	  if (actual_fsize != 0)
3362	    abort ();
3363	  fprintf (file, "\t%s\n", ret);
3364	  final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3365			   file, 1, 0, 1);
3366	}
3367      /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3368	 avoid generating confusing assembly language output.  */
3369      else if (actual_fsize == 0)
3370	fprintf (file, "\t%s\n\tnop\n", ret);
3371      else if (actual_fsize <= 4096)
3372	fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3373      else if (actual_fsize <= 8192)
3374	fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3375		 ret, actual_fsize - 4096);
3376      else if ((actual_fsize & 0x3ff) == 0)
3377	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3378		 actual_fsize, ret);
3379      else
3380	fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3381		 actual_fsize, actual_fsize, ret);
3382      target_flags |= old_target_epilogue;
3383    }
3384
3385 output_vectors:
3386  sparc_output_deferred_case_vectors ();
3387}
3388
3389/* Functions for handling argument passing.
3390
3391   For v8 the first six args are normally in registers and the rest are
3392   pushed.  Any arg that starts within the first 6 words is at least
3393   partially passed in a register unless its data type forbids.
3394
3395   For v9, the argument registers are laid out as an array of 16 elements
3396   and arguments are added sequentially.  The first 6 int args and up to the
3397   first 16 fp args (depending on size) are passed in regs.
3398
3399   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
3400   ----    -----   --------   -----   ------------------   ------   -----------
3401    15   [SP+248]              %f31       %f30,%f31         %d30
3402    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
3403    13   [SP+232]              %f27       %f26,%f27         %d26
3404    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
3405    11   [SP+216]              %f23       %f22,%f23         %d22
3406    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
3407     9   [SP+200]              %f19       %f18,%f19         %d18
3408     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
3409     7   [SP+184]              %f15       %f14,%f15         %d14
3410     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
3411     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
3412     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
3413     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
3414     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
3415     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
3416     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
3417
3418   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3419
3420   Integral arguments are always passed as 64 bit quantities appropriately
3421   extended.
3422
3423   Passing of floating point values is handled as follows.
3424   If a prototype is in scope:
3425     If the value is in a named argument (i.e. not a stdarg function or a
3426     value not part of the `...') then the value is passed in the appropriate
3427     fp reg.
3428     If the value is part of the `...' and is passed in one of the first 6
3429     slots then the value is passed in the appropriate int reg.
3430     If the value is part of the `...' and is not passed in one of the first 6
3431     slots then the value is passed in memory.
3432   If a prototype is not in scope:
3433     If the value is one of the first 6 arguments the value is passed in the
3434     appropriate integer reg and the appropriate fp reg.
3435     If the value is not one of the first 6 arguments the value is passed in
3436     the appropriate fp reg and in memory.
3437   */
3438
3439/* Maximum number of int regs for args.  */
3440#define SPARC_INT_ARG_MAX 6
3441/* Maximum number of fp regs for args.  */
3442#define SPARC_FP_ARG_MAX 16
3443
3444#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3445
3446/* Handle the INIT_CUMULATIVE_ARGS macro.
3447   Initialize a variable CUM of type CUMULATIVE_ARGS
3448   for a call to a function whose data type is FNTYPE.
3449   For a library call, FNTYPE is 0.  */
3450
3451void
3452init_cumulative_args (cum, fntype, libname, indirect)
3453     CUMULATIVE_ARGS *cum;
3454     tree fntype;
3455     tree libname ATTRIBUTE_UNUSED;
3456     int indirect ATTRIBUTE_UNUSED;
3457{
3458  cum->words = 0;
3459  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3460  cum->libcall_p = fntype == 0;
3461}
3462
3463/* Compute the slot number to pass an argument in.
3464   Returns the slot number or -1 if passing on the stack.
3465
3466   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3467    the preceding args and about the function being called.
3468   MODE is the argument's machine mode.
3469   TYPE is the data type of the argument (as a tree).
3470    This is null for libcalls where that information may
3471    not be available.
3472   NAMED is nonzero if this argument is a named parameter
3473    (otherwise it is an extra parameter matching an ellipsis).
3474   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3475   *PREGNO records the register number to use if scalar type.
3476   *PPADDING records the amount of padding needed in words.  */
3477
3478static int
3479function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3480     const CUMULATIVE_ARGS *cum;
3481     enum machine_mode mode;
3482     tree type;
3483     int named;
3484     int incoming_p;
3485     int *pregno;
3486     int *ppadding;
3487{
3488  int regbase = (incoming_p
3489		 ? SPARC_INCOMING_INT_ARG_FIRST
3490		 : SPARC_OUTGOING_INT_ARG_FIRST);
3491  int slotno = cum->words;
3492  int regno;
3493
3494  *ppadding = 0;
3495
3496  if (type != 0 && TREE_ADDRESSABLE (type))
3497    return -1;
3498  if (TARGET_ARCH32
3499      && type != 0 && mode == BLKmode
3500      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3501    return -1;
3502
3503  switch (mode)
3504    {
3505    case VOIDmode :
3506      /* MODE is VOIDmode when generating the actual call.
3507	 See emit_call_1.  */
3508      return -1;
3509
3510    case QImode : case CQImode :
3511    case HImode : case CHImode :
3512    case SImode : case CSImode :
3513    case DImode : case CDImode :
3514      if (slotno >= SPARC_INT_ARG_MAX)
3515	return -1;
3516      regno = regbase + slotno;
3517      break;
3518
3519    case SFmode : case SCmode :
3520    case DFmode : case DCmode :
3521    case TFmode : case TCmode :
3522      if (TARGET_ARCH32)
3523	{
3524	  if (slotno >= SPARC_INT_ARG_MAX)
3525	    return -1;
3526	  regno = regbase + slotno;
3527	}
3528      else
3529	{
3530	  if ((mode == TFmode || mode == TCmode)
3531	      && (slotno & 1) != 0)
3532	    slotno++, *ppadding = 1;
3533	  if (TARGET_FPU && named)
3534	    {
3535	      if (slotno >= SPARC_FP_ARG_MAX)
3536		return -1;
3537	      regno = SPARC_FP_ARG_FIRST + slotno * 2;
3538	      if (mode == SFmode)
3539		regno++;
3540	    }
3541	  else
3542	    {
3543	      if (slotno >= SPARC_INT_ARG_MAX)
3544		return -1;
3545	      regno = regbase + slotno;
3546	    }
3547	}
3548      break;
3549
3550    case BLKmode :
3551      /* For sparc64, objects requiring 16 byte alignment get it.  */
3552      if (TARGET_ARCH64)
3553	{
3554	  if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3555	    slotno++, *ppadding = 1;
3556	}
3557
3558      if (TARGET_ARCH32
3559	  || (type && TREE_CODE (type) == UNION_TYPE))
3560	{
3561	  if (slotno >= SPARC_INT_ARG_MAX)
3562	    return -1;
3563	  regno = regbase + slotno;
3564	}
3565      else
3566	{
3567	  tree field;
3568	  int intregs_p = 0, fpregs_p = 0;
3569	  /* The ABI obviously doesn't specify how packed
3570	     structures are passed.  These are defined to be passed
3571	     in int regs if possible, otherwise memory.  */
3572	  int packed_p = 0;
3573
3574	  /* First see what kinds of registers we need.  */
3575	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3576	    {
3577	      if (TREE_CODE (field) == FIELD_DECL)
3578		{
3579		  if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3580		      && TARGET_FPU)
3581		    fpregs_p = 1;
3582		  else
3583		    intregs_p = 1;
3584		  if (DECL_PACKED (field))
3585		    packed_p = 1;
3586		}
3587	    }
3588	  if (packed_p || !named)
3589	    fpregs_p = 0, intregs_p = 1;
3590
3591	  /* If all arg slots are filled, then must pass on stack.  */
3592	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3593	    return -1;
3594	  /* If there are only int args and all int arg slots are filled,
3595	     then must pass on stack.  */
3596	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3597	    return -1;
3598	  /* Note that even if all int arg slots are filled, fp members may
3599	     still be passed in regs if such regs are available.
3600	     *PREGNO isn't set because there may be more than one, it's up
3601	     to the caller to compute them.  */
3602	  return slotno;
3603	}
3604      break;
3605
3606    default :
3607      abort ();
3608    }
3609
3610  *pregno = regno;
3611  return slotno;
3612}
3613
3614/* Handle recursive register counting for structure field layout.  */
3615
3616struct function_arg_record_value_parms
3617{
3618  rtx ret;
3619  int slotno, named, regbase;
3620  int nregs, intoffset;
3621};
3622
3623static void function_arg_record_value_3
3624	PROTO((int, struct function_arg_record_value_parms *));
3625static void function_arg_record_value_2
3626	PROTO((tree, int, struct function_arg_record_value_parms *));
3627static rtx function_arg_record_value
3628	PROTO((tree, enum machine_mode, int, int, int));
3629
3630static void
3631function_arg_record_value_1 (type, startbitpos, parms)
3632     tree type;
3633     int startbitpos;
3634     struct function_arg_record_value_parms *parms;
3635{
3636  tree field;
3637
3638  /* The ABI obviously doesn't specify how packed structures are
3639     passed.  These are defined to be passed in int regs if possible,
3640     otherwise memory.  */
3641  int packed_p = 0;
3642
3643  /* We need to compute how many registers are needed so we can
3644     allocate the PARALLEL but before we can do that we need to know
3645     whether there are any packed fields.  If there are, int regs are
3646     used regardless of whether there are fp values present.  */
3647  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3648    {
3649      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3650	{
3651	  packed_p = 1;
3652	  break;
3653	}
3654    }
3655
3656  /* Compute how many registers we need.  */
3657  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3658    {
3659      if (TREE_CODE (field) == FIELD_DECL)
3660	{
3661	  int bitpos = startbitpos;
3662	  if (DECL_FIELD_BITPOS (field))
3663	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3664	  /* ??? FIXME: else assume zero offset.  */
3665
3666	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3667	    {
3668	      function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3669	    }
3670	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3671	           && TARGET_FPU
3672	           && ! packed_p
3673	           && parms->named)
3674	    {
3675	      if (parms->intoffset != -1)
3676		{
3677		  int intslots, this_slotno;
3678
3679		  intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3680		    / BITS_PER_WORD;
3681		  this_slotno = parms->slotno + parms->intoffset
3682		    / BITS_PER_WORD;
3683
3684		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3685		  intslots = MAX (intslots, 0);
3686		  parms->nregs += intslots;
3687		  parms->intoffset = -1;
3688		}
3689
3690	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3691		 If it wasn't true we wouldn't be here.  */
3692	      parms->nregs += 1;
3693	    }
3694	  else
3695	    {
3696	      if (parms->intoffset == -1)
3697		parms->intoffset = bitpos;
3698	    }
3699	}
3700    }
3701}
3702
3703/* Handle recursive structure field register assignment.  */
3704
3705static void
3706function_arg_record_value_3 (bitpos, parms)
3707     int bitpos;
3708     struct function_arg_record_value_parms *parms;
3709{
3710  enum machine_mode mode;
3711  int regno, this_slotno, intslots, intoffset;
3712  rtx reg;
3713
3714  if (parms->intoffset == -1)
3715    return;
3716  intoffset = parms->intoffset;
3717  parms->intoffset = -1;
3718
3719  intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3720  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3721
3722  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3723  if (intslots <= 0)
3724    return;
3725
3726  /* If this is the trailing part of a word, only load that much into
3727     the register.  Otherwise load the whole register.  Note that in
3728     the latter case we may pick up unwanted bits.  It's not a problem
3729     at the moment but may wish to revisit.  */
3730
3731  if (intoffset % BITS_PER_WORD != 0)
3732    {
3733      mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3734			    MODE_INT, 0);
3735    }
3736  else
3737    mode = word_mode;
3738
3739  intoffset /= BITS_PER_UNIT;
3740  do
3741    {
3742      regno = parms->regbase + this_slotno;
3743      reg = gen_rtx_REG (mode, regno);
3744      XVECEXP (parms->ret, 0, parms->nregs)
3745	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3746
3747      this_slotno += 1;
3748      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3749      parms->nregs += 1;
3750      intslots -= 1;
3751    }
3752  while (intslots > 0);
3753}
3754
3755static void
3756function_arg_record_value_2 (type, startbitpos, parms)
3757     tree type;
3758     int startbitpos;
3759     struct function_arg_record_value_parms *parms;
3760{
3761  tree field;
3762  int packed_p = 0;
3763
3764  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3765    {
3766      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3767	{
3768	  packed_p = 1;
3769	  break;
3770	}
3771    }
3772
3773  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3774    {
3775      if (TREE_CODE (field) == FIELD_DECL)
3776	{
3777	  int bitpos = startbitpos;
3778	  if (DECL_FIELD_BITPOS (field))
3779	    bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3780	  /* ??? FIXME: else assume zero offset.  */
3781
3782	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3783	    {
3784	      function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3785	    }
3786	  else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3787	           && TARGET_FPU
3788	           && ! packed_p
3789	           && parms->named)
3790	    {
3791	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3792	      rtx reg;
3793
3794	      function_arg_record_value_3 (bitpos, parms);
3795
3796	      reg = gen_rtx_REG (DECL_MODE (field),
3797			         (SPARC_FP_ARG_FIRST + this_slotno * 2
3798			          + (DECL_MODE (field) == SFmode
3799				     && (bitpos & 32) != 0)));
3800	      XVECEXP (parms->ret, 0, parms->nregs)
3801		= gen_rtx_EXPR_LIST (VOIDmode, reg,
3802			   GEN_INT (bitpos / BITS_PER_UNIT));
3803	      parms->nregs += 1;
3804	    }
3805	  else
3806	    {
3807	      if (parms->intoffset == -1)
3808		parms->intoffset = bitpos;
3809	    }
3810	}
3811    }
3812}
3813
3814static rtx
3815function_arg_record_value (type, mode, slotno, named, regbase)
3816     tree type;
3817     enum machine_mode mode;
3818     int slotno, named, regbase;
3819{
3820  HOST_WIDE_INT typesize = int_size_in_bytes (type);
3821  struct function_arg_record_value_parms parms;
3822  int nregs;
3823
3824  parms.ret = NULL_RTX;
3825  parms.slotno = slotno;
3826  parms.named = named;
3827  parms.regbase = regbase;
3828
3829  /* Compute how many registers we need.  */
3830  parms.nregs = 0;
3831  parms.intoffset = 0;
3832  function_arg_record_value_1 (type, 0, &parms);
3833
3834  if (parms.intoffset != -1)
3835    {
3836      int intslots, this_slotno;
3837
3838      intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3839	/ BITS_PER_WORD;
3840      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3841
3842      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3843      intslots = MAX (intslots, 0);
3844
3845      parms.nregs += intslots;
3846    }
3847  nregs = parms.nregs;
3848
3849  /* Allocate the vector and handle some annoying special cases.  */
3850  if (nregs == 0)
3851    {
3852      /* ??? Empty structure has no value?  Duh?  */
3853      if (typesize <= 0)
3854	{
3855	  /* Though there's nothing really to store, return a word register
3856	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
3857	     leads to breakage due to the fact that there are zero bytes to
3858	     load.  */
3859	  return gen_rtx_REG (mode, regbase);
3860	}
3861      else
3862	{
3863	  /* ??? C++ has structures with no fields, and yet a size.  Give up
3864	     for now and pass everything back in integer registers.  */
3865	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3866	}
3867      if (nregs + slotno > SPARC_INT_ARG_MAX)
3868	nregs = SPARC_INT_ARG_MAX - slotno;
3869    }
3870  if (nregs == 0)
3871    abort ();
3872
3873  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3874
3875  /* Fill in the entries.  */
3876  parms.nregs = 0;
3877  parms.intoffset = 0;
3878  function_arg_record_value_2 (type, 0, &parms);
3879  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
3880
3881  if (parms.nregs != nregs)
3882    abort ();
3883
3884  return parms.ret;
3885}
3886
3887/* Handle the FUNCTION_ARG macro.
3888   Determine where to put an argument to a function.
3889   Value is zero to push the argument on the stack,
3890   or a hard register in which to store the argument.
3891
3892   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3893    the preceding args and about the function being called.
3894   MODE is the argument's machine mode.
3895   TYPE is the data type of the argument (as a tree).
3896    This is null for libcalls where that information may
3897    not be available.
3898   NAMED is nonzero if this argument is a named parameter
3899    (otherwise it is an extra parameter matching an ellipsis).
3900   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
3901
3902rtx
3903function_arg (cum, mode, type, named, incoming_p)
3904     const CUMULATIVE_ARGS *cum;
3905     enum machine_mode mode;
3906     tree type;
3907     int named;
3908     int incoming_p;
3909{
3910  int regbase = (incoming_p
3911		 ? SPARC_INCOMING_INT_ARG_FIRST
3912		 : SPARC_OUTGOING_INT_ARG_FIRST);
3913  int slotno, regno, padding;
3914  rtx reg;
3915
3916  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
3917				&regno, &padding);
3918
3919  if (slotno == -1)
3920    return 0;
3921
3922  if (TARGET_ARCH32)
3923    {
3924      reg = gen_rtx_REG (mode, regno);
3925      return reg;
3926    }
3927
3928  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
3929     but also have the slot allocated for them.
3930     If no prototype is in scope fp values in register slots get passed
3931     in two places, either fp regs and int regs or fp regs and memory.  */
3932  if ((GET_MODE_CLASS (mode) == MODE_FLOAT
3933       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3934      && SPARC_FP_REG_P (regno))
3935    {
3936      reg = gen_rtx_REG (mode, regno);
3937      if (cum->prototype_p || cum->libcall_p)
3938	{
3939	  /* "* 2" because fp reg numbers are recorded in 4 byte
3940	     quantities.  */
3941#if 0
3942	  /* ??? This will cause the value to be passed in the fp reg and
3943	     in the stack.  When a prototype exists we want to pass the
3944	     value in the reg but reserve space on the stack.  That's an
3945	     optimization, and is deferred [for a bit].  */
3946	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
3947	    return gen_rtx_PARALLEL (mode,
3948			    gen_rtvec (2,
3949				       gen_rtx_EXPR_LIST (VOIDmode,
3950						NULL_RTX, const0_rtx),
3951				       gen_rtx_EXPR_LIST (VOIDmode,
3952						reg, const0_rtx)));
3953	  else
3954#else
3955	  /* ??? It seems that passing back a register even when past
3956	     the area declared by REG_PARM_STACK_SPACE will allocate
3957	     space appropriately, and will not copy the data onto the
3958	     stack, exactly as we desire.
3959
3960	     This is due to locate_and_pad_parm being called in
3961	     expand_call whenever reg_parm_stack_space > 0, which
3962	     while benefical to our example here, would seem to be
3963	     in error from what had been intended.  Ho hum...  -- r~ */
3964#endif
3965	    return reg;
3966	}
3967      else
3968	{
3969	  rtx v0, v1;
3970
3971	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
3972	    {
3973	      int intreg;
3974
3975	      /* On incoming, we don't need to know that the value
3976		 is passed in %f0 and %i0, and it confuses other parts
3977		 causing needless spillage even on the simplest cases.  */
3978	      if (incoming_p)
3979		return reg;
3980
3981	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
3982			+ (regno - SPARC_FP_ARG_FIRST) / 2);
3983
3984	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3985	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
3986				      const0_rtx);
3987	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3988	    }
3989	  else
3990	    {
3991	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
3992	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3993	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3994	    }
3995	}
3996    }
3997  else if (type && TREE_CODE (type) == RECORD_TYPE)
3998    {
3999      /* Structures up to 16 bytes in size are passed in arg slots on the
4000	 stack and are promoted to registers where possible.  */
4001
4002      if (int_size_in_bytes (type) > 16)
4003	abort (); /* shouldn't get here */
4004
4005      return function_arg_record_value (type, mode, slotno, named, regbase);
4006    }
4007  else if (type && TREE_CODE (type) == UNION_TYPE)
4008    {
4009      enum machine_mode mode;
4010      int bytes = int_size_in_bytes (type);
4011
4012      if (bytes > 16)
4013	abort ();
4014
4015      mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4016      reg = gen_rtx_REG (mode, regno);
4017    }
4018  else
4019    {
4020      /* Scalar or complex int.  */
4021      reg = gen_rtx_REG (mode, regno);
4022    }
4023
4024  return reg;
4025}
4026
4027/* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4028   For an arg passed partly in registers and partly in memory,
4029   this is the number of registers used.
4030   For args passed entirely in registers or entirely in memory, zero.
4031
4032   Any arg that starts in the first 6 regs but won't entirely fit in them
4033   needs partial registers on v8.  On v9, structures with integer
4034   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4035   values that begin in the last fp reg [where "last fp reg" varies with the
4036   mode] will be split between that reg and memory.  */
4037
4038int
4039function_arg_partial_nregs (cum, mode, type, named)
4040     const CUMULATIVE_ARGS *cum;
4041     enum machine_mode mode;
4042     tree type;
4043     int named;
4044{
4045  int slotno, regno, padding;
4046
4047  /* We pass 0 for incoming_p here, it doesn't matter.  */
4048  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4049
4050  if (slotno == -1)
4051    return 0;
4052
4053  if (TARGET_ARCH32)
4054    {
4055      if ((slotno + (mode == BLKmode
4056		     ? ROUND_ADVANCE (int_size_in_bytes (type))
4057		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4058	  > NPARM_REGS (SImode))
4059	return NPARM_REGS (SImode) - slotno;
4060      return 0;
4061    }
4062  else
4063    {
4064      if (type && AGGREGATE_TYPE_P (type))
4065	{
4066	  int size = int_size_in_bytes (type);
4067	  int align = TYPE_ALIGN (type);
4068
4069	  if (align == 16)
4070	    slotno += slotno & 1;
4071	  if (size > 8 && size <= 16
4072	      && slotno == SPARC_INT_ARG_MAX - 1)
4073	    return 1;
4074	}
4075      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4076	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4077		   && ! TARGET_FPU))
4078	{
4079	  if (GET_MODE_ALIGNMENT (mode) == 128)
4080	    {
4081	      slotno += slotno & 1;
4082	      if (slotno == SPARC_INT_ARG_MAX - 2)
4083		return 1;
4084	    }
4085	  else
4086	    {
4087	      if (slotno == SPARC_INT_ARG_MAX - 1)
4088		return 1;
4089	    }
4090	}
4091      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4092	{
4093	  if (GET_MODE_ALIGNMENT (mode) == 128)
4094	    slotno += slotno & 1;
4095	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4096	      > SPARC_FP_ARG_MAX)
4097	    return 1;
4098	}
4099      return 0;
4100    }
4101}
4102
4103/* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4104   !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4105   quad-precision floats by invisible reference.
4106   v9: Aggregates greater than 16 bytes are passed by reference.
4107   For Pascal, also pass arrays by reference.  */
4108
4109int
4110function_arg_pass_by_reference (cum, mode, type, named)
4111     const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4112     enum machine_mode mode;
4113     tree type;
4114     int named ATTRIBUTE_UNUSED;
4115{
4116  if (TARGET_ARCH32)
4117    {
4118      return ((type && AGGREGATE_TYPE_P (type))
4119	      || mode == TFmode || mode == TCmode);
4120    }
4121  else
4122    {
4123      return ((type && TREE_CODE (type) == ARRAY_TYPE)
4124	      /* Consider complex values as aggregates, so care for TCmode. */
4125	      || GET_MODE_SIZE (mode) > 16
4126	      || (type && AGGREGATE_TYPE_P (type)
4127		  && int_size_in_bytes (type) > 16));
4128    }
4129}
4130
4131/* Handle the FUNCTION_ARG_ADVANCE macro.
4132   Update the data in CUM to advance over an argument
4133   of mode MODE and data type TYPE.
4134   TYPE is null for libcalls where that information may not be available.  */
4135
4136void
4137function_arg_advance (cum, mode, type, named)
4138     CUMULATIVE_ARGS *cum;
4139     enum machine_mode mode;
4140     tree type;
4141     int named;
4142{
4143  int slotno, regno, padding;
4144
4145  /* We pass 0 for incoming_p here, it doesn't matter.  */
4146  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4147
4148  /* If register required leading padding, add it.  */
4149  if (slotno != -1)
4150    cum->words += padding;
4151
4152  if (TARGET_ARCH32)
4153    {
4154      cum->words += (mode != BLKmode
4155		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4156		     : ROUND_ADVANCE (int_size_in_bytes (type)));
4157    }
4158  else
4159    {
4160      if (type && AGGREGATE_TYPE_P (type))
4161	{
4162	  int size = int_size_in_bytes (type);
4163
4164	  if (size <= 8)
4165	    ++cum->words;
4166	  else if (size <= 16)
4167	    cum->words += 2;
4168	  else /* passed by reference */
4169	    ++cum->words;
4170	}
4171      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4172	{
4173	  cum->words += 2;
4174	}
4175      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4176	{
4177	  cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4178	}
4179      else
4180	{
4181	  cum->words += (mode != BLKmode
4182			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4183			 : ROUND_ADVANCE (int_size_in_bytes (type)));
4184	}
4185    }
4186}
4187
4188/* Handle the FUNCTION_ARG_PADDING macro.
4189   For the 64 bit ABI structs are always stored left shifted in their
4190   argument slot.  */
4191
4192enum direction
4193function_arg_padding (mode, type)
4194     enum machine_mode mode;
4195     tree type;
4196{
4197  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4198    return upward;
4199
4200  /* This is the default definition.  */
4201  return (! BYTES_BIG_ENDIAN
4202	  ? upward
4203	  : ((mode == BLKmode
4204	      ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4205		 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4206	      : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4207	     ? downward : upward));
4208}
4209
4210/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4211   For v9, function return values are subject to the same rules as arguments,
4212   except that up to 32-bytes may be returned in registers.  */
4213
4214rtx
4215function_value (type, mode, incoming_p)
4216     tree type;
4217     enum machine_mode mode;
4218     int incoming_p;
4219{
4220  int regno;
4221  int regbase = (incoming_p
4222		 ? SPARC_OUTGOING_INT_ARG_FIRST
4223		 : SPARC_INCOMING_INT_ARG_FIRST);
4224
4225  if (TARGET_ARCH64 && type)
4226    {
4227      if (TREE_CODE (type) == RECORD_TYPE)
4228	{
4229	  /* Structures up to 32 bytes in size are passed in registers,
4230	     promoted to fp registers where possible.  */
4231
4232	  if (int_size_in_bytes (type) > 32)
4233	    abort (); /* shouldn't get here */
4234
4235	  return function_arg_record_value (type, mode, 0, 1, regbase);
4236	}
4237      else if (TREE_CODE (type) == UNION_TYPE)
4238	{
4239	  int bytes = int_size_in_bytes (type);
4240
4241	  if (bytes > 32)
4242	    abort ();
4243
4244	  mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4245	}
4246    }
4247
4248  if (TARGET_ARCH64
4249      && GET_MODE_CLASS (mode) == MODE_INT
4250      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4251      && type && TREE_CODE (type) != UNION_TYPE)
4252    mode = DImode;
4253
4254  if (incoming_p)
4255    regno = BASE_RETURN_VALUE_REG (mode);
4256  else
4257    regno = BASE_OUTGOING_VALUE_REG (mode);
4258
4259  return gen_rtx_REG (mode, regno);
4260}
4261
4262/* Do what is necessary for `va_start'.  The argument is ignored.
4263
4264   We look at the current function to determine if stdarg or varargs
4265   is used and return the address of the first unnamed parameter.  */
4266
4267rtx
4268sparc_builtin_saveregs (arglist)
4269     tree arglist ATTRIBUTE_UNUSED;
4270{
4271  int first_reg = current_function_args_info.words;
4272  rtx address;
4273  int regno;
4274
4275  for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4276    emit_move_insn (gen_rtx_MEM (word_mode,
4277			     gen_rtx_PLUS (Pmode,
4278				      frame_pointer_rtx,
4279				      GEN_INT (STACK_POINTER_OFFSET
4280					       + UNITS_PER_WORD * regno))),
4281		    gen_rtx_REG (word_mode,
4282			     BASE_INCOMING_ARG_REG (word_mode) + regno));
4283
4284  address = gen_rtx_PLUS (Pmode,
4285		     frame_pointer_rtx,
4286		     GEN_INT (STACK_POINTER_OFFSET
4287			      + UNITS_PER_WORD * first_reg));
4288
4289  if (current_function_check_memory_usage
4290      && first_reg < NPARM_REGS (word_mode))
4291    emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4292		       address, ptr_mode,
4293		       GEN_INT (UNITS_PER_WORD
4294			 	* (NPARM_REGS (word_mode) - first_reg)),
4295		       TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4296		       TYPE_MODE (integer_type_node));
4297
4298  return address;
4299}
4300
4301/* Return the string to output a conditional branch to LABEL, which is
4302   the operand number of the label.  OP is the conditional expression.
4303   XEXP (OP, 0) is assumed to be a condition code register (integer or
4304   floating point) and its mode specifies what kind of comparison we made.
4305
4306   REVERSED is non-zero if we should reverse the sense of the comparison.
4307
4308   ANNUL is non-zero if we should generate an annulling branch.
4309
4310   NOOP is non-zero if we have to follow this branch by a noop.
4311
4312   INSN, if set, is the insn.  */
4313
4314char *
4315output_cbranch (op, label, reversed, annul, noop, insn)
4316     rtx op;
4317     int label;
4318     int reversed, annul, noop;
4319     rtx insn;
4320{
4321  static char string[32];
4322  enum rtx_code code = GET_CODE (op);
4323  rtx cc_reg = XEXP (op, 0);
4324  enum machine_mode mode = GET_MODE (cc_reg);
4325  static char v8_labelno[] = "%lX";
4326  static char v9_icc_labelno[] = "%%icc, %lX";
4327  static char v9_xcc_labelno[] = "%%xcc, %lX";
4328  static char v9_fcc_labelno[] = "%%fccX, %lY";
4329  char *labelno;
4330  int labeloff, spaces = 8;
4331
4332  /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4333     Because there is currently no concept of pre-delay slots, we can fix
4334     this only by always emitting a nop before a floating point branch.  */
4335
4336  if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4337    strcpy (string, "nop\n\t");
4338  else
4339    string[0] = '\0';
4340
4341  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
4342  if (reversed
4343      && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4344    code = reverse_condition (code), reversed = 0;
4345
4346  /* Start by writing the branch condition.  */
4347  switch (code)
4348    {
4349    case NE:
4350      if (mode == CCFPmode || mode == CCFPEmode)
4351	{
4352	  strcat (string, "fbne");
4353	  spaces -= 4;
4354	}
4355      else
4356	{
4357	  strcpy (string, "bne");
4358	  spaces -= 3;
4359	}
4360      break;
4361
4362    case EQ:
4363      if (mode == CCFPmode || mode == CCFPEmode)
4364	{
4365	  strcat (string, "fbe");
4366	  spaces -= 3;
4367	}
4368      else
4369	{
4370	  strcpy (string, "be");
4371	  spaces -= 2;
4372	}
4373      break;
4374
4375    case GE:
4376      if (mode == CCFPmode || mode == CCFPEmode)
4377	{
4378	  if (reversed)
4379	    strcat (string, "fbul");
4380	  else
4381	    strcat (string, "fbge");
4382	  spaces -= 4;
4383	}
4384      else if (mode == CC_NOOVmode)
4385	{
4386	  strcpy (string, "bpos");
4387	  spaces -= 4;
4388	}
4389      else
4390	{
4391	  strcpy (string, "bge");
4392	  spaces -= 3;
4393	}
4394      break;
4395
4396    case GT:
4397      if (mode == CCFPmode || mode == CCFPEmode)
4398	{
4399	  if (reversed)
4400	    {
4401	      strcat (string, "fbule");
4402	      spaces -= 5;
4403	    }
4404	  else
4405	    {
4406	      strcat (string, "fbg");
4407	      spaces -= 3;
4408	    }
4409	}
4410      else
4411	{
4412	  strcpy (string, "bg");
4413	  spaces -= 2;
4414	}
4415      break;
4416
4417    case LE:
4418      if (mode == CCFPmode || mode == CCFPEmode)
4419	{
4420	  if (reversed)
4421	    strcat (string, "fbug");
4422	  else
4423	    strcat (string, "fble");
4424	  spaces -= 4;
4425	}
4426      else
4427	{
4428	  strcpy (string, "ble");
4429	  spaces -= 3;
4430	}
4431      break;
4432
4433    case LT:
4434      if (mode == CCFPmode || mode == CCFPEmode)
4435	{
4436	  if (reversed)
4437	    {
4438	      strcat (string, "fbuge");
4439	      spaces -= 5;
4440	    }
4441	  else
4442	    {
4443	      strcat (string, "fbl");
4444	      spaces -= 3;
4445	    }
4446	}
4447      else if (mode == CC_NOOVmode)
4448	{
4449	  strcpy (string, "bneg");
4450	  spaces -= 4;
4451	}
4452      else
4453	{
4454	  strcpy (string, "bl");
4455	  spaces -= 2;
4456	}
4457      break;
4458
4459    case GEU:
4460      strcpy (string, "bgeu");
4461      spaces -= 4;
4462      break;
4463
4464    case GTU:
4465      strcpy (string, "bgu");
4466      spaces -= 3;
4467      break;
4468
4469    case LEU:
4470      strcpy (string, "bleu");
4471      spaces -= 4;
4472      break;
4473
4474    case LTU:
4475      strcpy (string, "blu");
4476      spaces -= 3;
4477      break;
4478
4479    default:
4480      abort ();
4481    }
4482
4483  /* Now add the annulling, the label, and a possible noop.  */
4484  if (annul)
4485    {
4486      strcat (string, ",a");
4487      spaces -= 2;
4488    }
4489
4490  if (! TARGET_V9)
4491    {
4492      labeloff = 2;
4493      labelno = v8_labelno;
4494    }
4495  else
4496    {
4497      rtx note;
4498
4499      if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4500	{
4501	  strcat (string,
4502		  INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4503	  spaces -= 3;
4504	}
4505
4506      labeloff = 9;
4507      if (mode == CCFPmode || mode == CCFPEmode)
4508	{
4509	  labeloff = 10;
4510	  labelno = v9_fcc_labelno;
4511	  /* Set the char indicating the number of the fcc reg to use.  */
4512	  labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4513	}
4514      else if (mode == CCXmode || mode == CCX_NOOVmode)
4515	labelno = v9_xcc_labelno;
4516      else
4517	labelno = v9_icc_labelno;
4518    }
4519  /* Set the char indicating the number of the operand containing the
4520     label_ref.  */
4521  labelno[labeloff] = label + '0';
4522  if (spaces > 0)
4523    strcat (string, "\t");
4524  else
4525    strcat (string, " ");
4526  strcat (string, labelno);
4527
4528  if (noop)
4529    strcat (string, "\n\tnop");
4530
4531  return string;
4532}
4533
4534/* Return the string to output a conditional branch to LABEL, testing
4535   register REG.  LABEL is the operand number of the label; REG is the
4536   operand number of the reg.  OP is the conditional expression.  The mode
4537   of REG says what kind of comparison we made.
4538
4539   REVERSED is non-zero if we should reverse the sense of the comparison.
4540
4541   ANNUL is non-zero if we should generate an annulling branch.
4542
4543   NOOP is non-zero if we have to follow this branch by a noop.  */
4544
4545char *
4546output_v9branch (op, reg, label, reversed, annul, noop, insn)
4547     rtx op;
4548     int reg, label;
4549     int reversed, annul, noop;
4550     rtx insn;
4551{
4552  static char string[20];
4553  enum rtx_code code = GET_CODE (op);
4554  enum machine_mode mode = GET_MODE (XEXP (op, 0));
4555  static char labelno[] = "%X, %lX";
4556  rtx note;
4557  int spaces = 8;
4558
4559  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
4560  if (reversed)
4561    code = reverse_condition (code), reversed = 0;
4562
4563  /* Only 64 bit versions of these instructions exist.  */
4564  if (mode != DImode)
4565    abort ();
4566
4567  /* Start by writing the branch condition.  */
4568
4569  switch (code)
4570    {
4571    case NE:
4572      strcpy (string, "brnz");
4573      spaces -= 4;
4574      break;
4575
4576    case EQ:
4577      strcpy (string, "brz");
4578      spaces -= 3;
4579      break;
4580
4581    case GE:
4582      strcpy (string, "brgez");
4583      spaces -= 5;
4584      break;
4585
4586    case LT:
4587      strcpy (string, "brlz");
4588      spaces -= 4;
4589      break;
4590
4591    case LE:
4592      strcpy (string, "brlez");
4593      spaces -= 5;
4594      break;
4595
4596    case GT:
4597      strcpy (string, "brgz");
4598      spaces -= 4;
4599      break;
4600
4601    default:
4602      abort ();
4603    }
4604
4605  /* Now add the annulling, reg, label, and nop.  */
4606  if (annul)
4607    {
4608      strcat (string, ",a");
4609      spaces -= 2;
4610    }
4611
4612  if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4613    {
4614      strcat (string,
4615	      INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4616      spaces -= 3;
4617    }
4618
4619  labelno[1] = reg + '0';
4620  labelno[6] = label + '0';
4621  if (spaces > 0)
4622    strcat (string, "\t");
4623  else
4624    strcat (string, " ");
4625  strcat (string, labelno);
4626
4627  if (noop)
4628    strcat (string, "\n\tnop");
4629
4630  return string;
4631}
4632
4633/* Renumber registers in delay slot.  Replace registers instead of
4634   renumbering because they may be shared.
4635
4636   This does not handle instructions other than move.  */
4637
4638static void
4639epilogue_renumber (where)
4640     rtx *where;
4641{
4642  rtx x = *where;
4643  enum rtx_code code = GET_CODE (x);
4644
4645  switch (code)
4646    {
4647    case MEM:
4648      *where = x = copy_rtx (x);
4649      epilogue_renumber (&XEXP (x, 0));
4650      return;
4651
4652    case REG:
4653      {
4654	int regno = REGNO (x);
4655	if (regno > 8 && regno < 24)
4656	  abort ();
4657	if (regno >= 24 && regno < 32)
4658	  *where = gen_rtx_REG (GET_MODE (x), regno - 16);
4659	return;
4660      }
4661    case CONST_INT:
4662    case CONST_DOUBLE:
4663    case CONST:
4664    case SYMBOL_REF:
4665    case LABEL_REF:
4666      return;
4667
4668    case IOR:
4669    case AND:
4670    case XOR:
4671    case PLUS:
4672    case MINUS:
4673      epilogue_renumber (&XEXP (x, 1));
4674    case NEG:
4675    case NOT:
4676      epilogue_renumber (&XEXP (x, 0));
4677      return;
4678
4679    default:
4680      debug_rtx (*where);
4681      abort ();
4682    }
4683}
4684
4685/* Output assembler code to return from a function.  */
4686
4687const char *
4688output_return (operands)
4689     rtx *operands;
4690{
4691  rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
4692
4693  if (leaf_label)
4694    {
4695      operands[0] = leaf_label;
4696      return "b%* %l0%(";
4697    }
4698  else if (current_function_uses_only_leaf_regs)
4699    {
4700      /* No delay slot in a leaf function.  */
4701      if (delay)
4702	abort ();
4703
4704      /* If we didn't allocate a frame pointer for the current function,
4705	 the stack pointer might have been adjusted.  Output code to
4706	 restore it now.  */
4707
4708      operands[0] = GEN_INT (actual_fsize);
4709
4710      /* Use sub of negated value in first two cases instead of add to
4711	 allow actual_fsize == 4096.  */
4712
4713      if (actual_fsize <= 4096)
4714	{
4715	  if (SKIP_CALLERS_UNIMP_P)
4716	    return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4717	  else
4718	    return "retl\n\tsub\t%%sp, -%0, %%sp";
4719	}
4720      else if (actual_fsize <= 8192)
4721	{
4722	  operands[0] = GEN_INT (actual_fsize - 4096);
4723	  if (SKIP_CALLERS_UNIMP_P)
4724	    return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4725	  else
4726	    return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
4727	}
4728      else if (SKIP_CALLERS_UNIMP_P)
4729	{
4730	  if ((actual_fsize & 0x3ff) != 0)
4731	    return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4732	  else
4733	    return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4734	}
4735      else
4736	{
4737	  if ((actual_fsize & 0x3ff) != 0)
4738	    return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4739	  else
4740	    return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4741	}
4742    }
4743  else if (TARGET_V9)
4744    {
4745      if (delay)
4746	{
4747	  epilogue_renumber (&SET_DEST (PATTERN (delay)));
4748	  epilogue_renumber (&SET_SRC (PATTERN (delay)));
4749	}
4750      if (SKIP_CALLERS_UNIMP_P)
4751	return "return\t%%i7+12%#";
4752      else
4753	return "return\t%%i7+8%#";
4754    }
4755  else
4756    {
4757      if (delay)
4758	abort ();
4759      if (SKIP_CALLERS_UNIMP_P)
4760	return "jmp\t%%i7+12\n\trestore";
4761      else
4762	return "ret\n\trestore";
4763    }
4764}
4765
4766/* Leaf functions and non-leaf functions have different needs.  */
4767
4768static int
4769reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
4770
4771static int
4772reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
4773
4774static int *reg_alloc_orders[] = {
4775  reg_leaf_alloc_order,
4776  reg_nonleaf_alloc_order};
4777
4778void
4779order_regs_for_local_alloc ()
4780{
4781  static int last_order_nonleaf = 1;
4782
4783  if (regs_ever_live[15] != last_order_nonleaf)
4784    {
4785      last_order_nonleaf = !last_order_nonleaf;
4786      bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
4787	     (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
4788    }
4789}
4790
4791/* Return 1 if REG and MEM are legitimate enough to allow the various
4792   mem<-->reg splits to be run.  */
4793
4794int
4795sparc_splitdi_legitimate (reg, mem)
4796     rtx reg;
4797     rtx mem;
4798{
4799  /* Punt if we are here by mistake.  */
4800  if (! reload_completed)
4801    abort ();
4802
4803  /* We must have an offsettable memory reference.  */
4804  if (! offsettable_memref_p (mem))
4805    return 0;
4806
4807  /* If we have legitimate args for ldd/std, we do not want
4808     the split to happen.  */
4809  if ((REGNO (reg) % 2) == 0
4810      && mem_min_alignment (mem, 8))
4811    return 0;
4812
4813  /* Success.  */
4814  return 1;
4815}
4816
4817/* Return 1 if x and y are some kind of REG and they refer to
4818   different hard registers.  This test is guarenteed to be
4819   run after reload.  */
4820
4821int
4822sparc_absnegfloat_split_legitimate (x, y)
4823     rtx x, y;
4824{
4825  if (GET_CODE (x) == SUBREG)
4826    x = alter_subreg (x);
4827  if (GET_CODE (x) != REG)
4828    return 0;
4829  if (GET_CODE (y) == SUBREG)
4830    y = alter_subreg (y);
4831  if (GET_CODE (y) != REG)
4832    return 0;
4833  if (REGNO (x) == REGNO (y))
4834    return 0;
4835  return 1;
4836}
4837
4838/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
4839   This makes them candidates for using ldd and std insns.
4840
4841   Note reg1 and reg2 *must* be hard registers.  */
4842
4843int
4844registers_ok_for_ldd_peep (reg1, reg2)
4845     rtx reg1, reg2;
4846{
4847  /* We might have been passed a SUBREG.  */
4848  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
4849    return 0;
4850
4851  if (REGNO (reg1) % 2 != 0)
4852    return 0;
4853
4854  /* Integer ldd is deprecated in SPARC V9 */
4855  if (TARGET_V9 && REGNO (reg1) < 32)
4856    return 0;
4857
4858  return (REGNO (reg1) == REGNO (reg2) - 1);
4859}
4860
4861/* Return 1 if addr1 and addr2 are suitable for use in an ldd or
4862   std insn.
4863
4864   This can only happen when addr1 and addr2 are consecutive memory
4865   locations (addr1 + 4 == addr2).  addr1 must also be aligned on a
4866   64 bit boundary (addr1 % 8 == 0).
4867
4868   We know %sp and %fp are kept aligned on a 64 bit boundary.  Other
4869   registers are assumed to *never* be properly aligned and are
4870   rejected.
4871
4872   Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
4873   need only check that the offset for addr1 % 8 == 0.  */
4874
4875int
4876addrs_ok_for_ldd_peep (addr1, addr2)
4877      rtx addr1, addr2;
4878{
4879  int reg1, offset1;
4880
4881  /* Extract a register number and offset (if used) from the first addr.  */
4882  if (GET_CODE (addr1) == PLUS)
4883    {
4884      /* If not a REG, return zero.  */
4885      if (GET_CODE (XEXP (addr1, 0)) != REG)
4886	return 0;
4887      else
4888	{
4889          reg1 = REGNO (XEXP (addr1, 0));
4890	  /* The offset must be constant!  */
4891	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
4892            return 0;
4893          offset1 = INTVAL (XEXP (addr1, 1));
4894	}
4895    }
4896  else if (GET_CODE (addr1) != REG)
4897    return 0;
4898  else
4899    {
4900      reg1 = REGNO (addr1);
4901      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
4902      offset1 = 0;
4903    }
4904
4905  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
4906  if (GET_CODE (addr2) != PLUS)
4907    return 0;
4908
4909  if (GET_CODE (XEXP (addr2, 0)) != REG
4910      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
4911    return 0;
4912
4913  /* Only %fp and %sp are allowed.  Additionally both addresses must
4914     use the same register.  */
4915  if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
4916    return 0;
4917
4918  if (reg1 != REGNO (XEXP (addr2, 0)))
4919    return 0;
4920
4921  /* The first offset must be evenly divisible by 8 to ensure the
4922     address is 64 bit aligned.  */
4923  if (offset1 % 8 != 0)
4924    return 0;
4925
4926  /* The offset for the second addr must be 4 more than the first addr.  */
4927  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
4928    return 0;
4929
4930  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
4931     instructions.  */
4932  return 1;
4933}
4934
4935/* Return 1 if reg is a pseudo, or is the first register in
4936   a hard register pair.  This makes it a candidate for use in
4937   ldd and std insns.  */
4938
4939int
4940register_ok_for_ldd (reg)
4941     rtx reg;
4942{
4943  /* We might have been passed a SUBREG.  */
4944  if (GET_CODE (reg) != REG)
4945    return 0;
4946
4947  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
4948    return (REGNO (reg) % 2 == 0);
4949  else
4950    return 1;
4951}
4952
4953/* Print operand X (an rtx) in assembler syntax to file FILE.
4954   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4955   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4956
4957void
4958print_operand (file, x, code)
4959     FILE *file;
4960     rtx x;
4961     int code;
4962{
4963  switch (code)
4964    {
4965    case '#':
4966      /* Output a 'nop' if there's nothing for the delay slot.  */
4967      if (dbr_sequence_length () == 0)
4968	fputs ("\n\t nop", file);
4969      return;
4970    case '*':
4971      /* Output an annul flag if there's nothing for the delay slot and we
4972	 are optimizing.  This is always used with '(' below.  */
4973      /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
4974	 this is a dbx bug.  So, we only do this when optimizing.  */
4975      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
4976	 Always emit a nop in case the next instruction is a branch.  */
4977      if (dbr_sequence_length () == 0
4978	  && (optimize && (int)sparc_cpu < PROCESSOR_V9))
4979	fputs (",a", file);
4980      return;
4981    case '(':
4982      /* Output a 'nop' if there's nothing for the delay slot and we are
4983	 not optimizing.  This is always used with '*' above.  */
4984      if (dbr_sequence_length () == 0
4985	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
4986	fputs ("\n\t nop", file);
4987      return;
4988    case '_':
4989      /* Output the Embedded Medium/Anywhere code model base register.  */
4990      fputs (EMBMEDANY_BASE_REG, file);
4991      return;
4992    case '@':
4993      /* Print out what we are using as the frame pointer.  This might
4994	 be %fp, or might be %sp+offset.  */
4995      /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
4996      fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
4997      return;
4998    case 'Y':
4999      /* Adjust the operand to take into account a RESTORE operation.  */
5000      if (GET_CODE (x) == CONST_INT)
5001	break;
5002      else if (GET_CODE (x) != REG)
5003	output_operand_lossage ("Invalid %%Y operand");
5004      else if (REGNO (x) < 8)
5005	fputs (reg_names[REGNO (x)], file);
5006      else if (REGNO (x) >= 24 && REGNO (x) < 32)
5007	fputs (reg_names[REGNO (x)-16], file);
5008      else
5009	output_operand_lossage ("Invalid %%Y operand");
5010      return;
5011    case 'L':
5012      /* Print out the low order register name of a register pair.  */
5013      if (WORDS_BIG_ENDIAN)
5014	fputs (reg_names[REGNO (x)+1], file);
5015      else
5016	fputs (reg_names[REGNO (x)], file);
5017      return;
5018    case 'H':
5019      /* Print out the high order register name of a register pair.  */
5020      if (WORDS_BIG_ENDIAN)
5021	fputs (reg_names[REGNO (x)], file);
5022      else
5023	fputs (reg_names[REGNO (x)+1], file);
5024      return;
5025    case 'R':
5026      /* Print out the second register name of a register pair or quad.
5027	 I.e., R (%o0) => %o1.  */
5028      fputs (reg_names[REGNO (x)+1], file);
5029      return;
5030    case 'S':
5031      /* Print out the third register name of a register quad.
5032	 I.e., S (%o0) => %o2.  */
5033      fputs (reg_names[REGNO (x)+2], file);
5034      return;
5035    case 'T':
5036      /* Print out the fourth register name of a register quad.
5037	 I.e., T (%o0) => %o3.  */
5038      fputs (reg_names[REGNO (x)+3], file);
5039      return;
5040    case 'x':
5041      /* Print a condition code register.  */
5042      if (REGNO (x) == SPARC_ICC_REG)
5043	{
5044	  /* We don't handle CC[X]_NOOVmode because they're not supposed
5045	     to occur here.  */
5046	  if (GET_MODE (x) == CCmode)
5047	    fputs ("%icc", file);
5048	  else if (GET_MODE (x) == CCXmode)
5049	    fputs ("%xcc", file);
5050	  else
5051	    abort ();
5052	}
5053      else
5054	/* %fccN register */
5055	fputs (reg_names[REGNO (x)], file);
5056      return;
5057    case 'm':
5058      /* Print the operand's address only.  */
5059      output_address (XEXP (x, 0));
5060      return;
5061    case 'r':
5062      /* In this case we need a register.  Use %g0 if the
5063	 operand is const0_rtx.  */
5064      if (x == const0_rtx
5065	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5066	{
5067	  fputs ("%g0", file);
5068	  return;
5069	}
5070      else
5071	break;
5072
5073    case 'A':
5074      switch (GET_CODE (x))
5075	{
5076	case IOR: fputs ("or", file); break;
5077	case AND: fputs ("and", file); break;
5078	case XOR: fputs ("xor", file); break;
5079	default: output_operand_lossage ("Invalid %%A operand");
5080	}
5081      return;
5082
5083    case 'B':
5084      switch (GET_CODE (x))
5085	{
5086	case IOR: fputs ("orn", file); break;
5087	case AND: fputs ("andn", file); break;
5088	case XOR: fputs ("xnor", file); break;
5089	default: output_operand_lossage ("Invalid %%B operand");
5090	}
5091      return;
5092
5093      /* These are used by the conditional move instructions.  */
5094    case 'c' :
5095    case 'C':
5096      {
5097	enum rtx_code rc = (code == 'c'
5098			    ? reverse_condition (GET_CODE (x))
5099			    : GET_CODE (x));
5100	switch (rc)
5101	  {
5102	  case NE: fputs ("ne", file); break;
5103	  case EQ: fputs ("e", file); break;
5104	  case GE: fputs ("ge", file); break;
5105	  case GT: fputs ("g", file); break;
5106	  case LE: fputs ("le", file); break;
5107	  case LT: fputs ("l", file); break;
5108	  case GEU: fputs ("geu", file); break;
5109	  case GTU: fputs ("gu", file); break;
5110	  case LEU: fputs ("leu", file); break;
5111	  case LTU: fputs ("lu", file); break;
5112	  default: output_operand_lossage (code == 'c'
5113					   ? "Invalid %%c operand"
5114					   : "Invalid %%C operand");
5115	  }
5116	return;
5117      }
5118
5119      /* These are used by the movr instruction pattern.  */
5120    case 'd':
5121    case 'D':
5122      {
5123	enum rtx_code rc = (code == 'd'
5124			    ? reverse_condition (GET_CODE (x))
5125			    : GET_CODE (x));
5126	switch (rc)
5127	  {
5128	  case NE: fputs ("ne", file); break;
5129	  case EQ: fputs ("e", file); break;
5130	  case GE: fputs ("gez", file); break;
5131	  case LT: fputs ("lz", file); break;
5132	  case LE: fputs ("lez", file); break;
5133	  case GT: fputs ("gz", file); break;
5134	  default: output_operand_lossage (code == 'd'
5135					   ? "Invalid %%d operand"
5136					   : "Invalid %%D operand");
5137	  }
5138	return;
5139      }
5140
5141    case 'b':
5142      {
5143	/* Print a sign-extended character.  */
5144	int i = INTVAL (x) & 0xff;
5145	if (i & 0x80)
5146	  i |= 0xffffff00;
5147	fprintf (file, "%d", i);
5148	return;
5149      }
5150
5151    case 'f':
5152      /* Operand must be a MEM; write its address.  */
5153      if (GET_CODE (x) != MEM)
5154	output_operand_lossage ("Invalid %%f operand");
5155      output_address (XEXP (x, 0));
5156      return;
5157
5158    case 0:
5159      /* Do nothing special.  */
5160      break;
5161
5162    default:
5163      /* Undocumented flag.  */
5164      output_operand_lossage ("invalid operand output code");
5165    }
5166
5167  if (GET_CODE (x) == REG)
5168    fputs (reg_names[REGNO (x)], file);
5169  else if (GET_CODE (x) == MEM)
5170    {
5171      fputc ('[', file);
5172	/* Poor Sun assembler doesn't understand absolute addressing.  */
5173      if (CONSTANT_P (XEXP (x, 0))
5174	  && ! TARGET_LIVE_G0)
5175	fputs ("%g0+", file);
5176      output_address (XEXP (x, 0));
5177      fputc (']', file);
5178    }
5179  else if (GET_CODE (x) == HIGH)
5180    {
5181      fputs ("%hi(", file);
5182      output_addr_const (file, XEXP (x, 0));
5183      fputc (')', file);
5184    }
5185  else if (GET_CODE (x) == LO_SUM)
5186    {
5187      print_operand (file, XEXP (x, 0), 0);
5188      if (TARGET_CM_MEDMID)
5189	fputs ("+%l44(", file);
5190      else
5191	fputs ("+%lo(", file);
5192      output_addr_const (file, XEXP (x, 1));
5193      fputc (')', file);
5194    }
5195  else if (GET_CODE (x) == CONST_DOUBLE
5196	   && (GET_MODE (x) == VOIDmode
5197	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5198    {
5199      if (CONST_DOUBLE_HIGH (x) == 0)
5200	fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5201      else if (CONST_DOUBLE_HIGH (x) == -1
5202	       && CONST_DOUBLE_LOW (x) < 0)
5203	fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5204      else
5205	output_operand_lossage ("long long constant not a valid immediate operand");
5206    }
5207  else if (GET_CODE (x) == CONST_DOUBLE)
5208    output_operand_lossage ("floating point constant not a valid immediate operand");
5209  else { output_addr_const (file, x); }
5210}
5211
5212/* This function outputs assembler code for VALUE to FILE, where VALUE is
5213   a 64 bit (DImode) value.  */
5214
5215/* ??? If there is a 64 bit counterpart to .word that the assembler
5216   understands, then using that would simply this code greatly.  */
5217/* ??? We only output .xword's for symbols and only then in environments
5218   where the assembler can handle them.  */
5219
5220void
5221output_double_int (file, value)
5222     FILE *file;
5223     rtx value;
5224{
5225  if (GET_CODE (value) == CONST_INT)
5226    {
5227      /* ??? This has endianness issues.  */
5228#if HOST_BITS_PER_WIDE_INT == 64
5229      HOST_WIDE_INT xword = INTVAL (value);
5230      HOST_WIDE_INT high, low;
5231
5232      high = (xword >> 32) & 0xffffffff;
5233      low  = xword & 0xffffffff;
5234      ASM_OUTPUT_INT (file, GEN_INT (high));
5235      ASM_OUTPUT_INT (file, GEN_INT (low));
5236#else
5237      if (INTVAL (value) < 0)
5238	ASM_OUTPUT_INT (file, constm1_rtx);
5239      else
5240	ASM_OUTPUT_INT (file, const0_rtx);
5241      ASM_OUTPUT_INT (file, value);
5242#endif
5243    }
5244  else if (GET_CODE (value) == CONST_DOUBLE)
5245    {
5246      ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5247      ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5248    }
5249  else if (GET_CODE (value) == SYMBOL_REF
5250	   || GET_CODE (value) == CONST
5251	   || GET_CODE (value) == PLUS
5252	   || (TARGET_ARCH64 &&
5253	       (GET_CODE (value) == LABEL_REF
5254		|| GET_CODE (value) == CODE_LABEL
5255		|| GET_CODE (value) == MINUS)))
5256    {
5257      if (! TARGET_V9)
5258	{
5259	  ASM_OUTPUT_INT (file, const0_rtx);
5260	  ASM_OUTPUT_INT (file, value);
5261	}
5262      else
5263	{
5264	  fprintf (file, "\t%s\t", ASM_LONGLONG);
5265	  output_addr_const (file, value);
5266	  fprintf (file, "\n");
5267	}
5268    }
5269  else
5270    abort ();
5271}
5272
5273/* Return the value of a code used in the .proc pseudo-op that says
5274   what kind of result this function returns.  For non-C types, we pick
5275   the closest C type.  */
5276
5277#ifndef CHAR_TYPE_SIZE
5278#define CHAR_TYPE_SIZE BITS_PER_UNIT
5279#endif
5280
5281#ifndef SHORT_TYPE_SIZE
5282#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5283#endif
5284
5285#ifndef INT_TYPE_SIZE
5286#define INT_TYPE_SIZE BITS_PER_WORD
5287#endif
5288
5289#ifndef LONG_TYPE_SIZE
5290#define LONG_TYPE_SIZE BITS_PER_WORD
5291#endif
5292
5293#ifndef LONG_LONG_TYPE_SIZE
5294#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5295#endif
5296
5297#ifndef FLOAT_TYPE_SIZE
5298#define FLOAT_TYPE_SIZE BITS_PER_WORD
5299#endif
5300
5301#ifndef DOUBLE_TYPE_SIZE
5302#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5303#endif
5304
5305#ifndef LONG_DOUBLE_TYPE_SIZE
5306#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5307#endif
5308
5309unsigned long
5310sparc_type_code (type)
5311     register tree type;
5312{
5313  register unsigned long qualifiers = 0;
5314  register unsigned shift;
5315
5316  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
5317     setting more, since some assemblers will give an error for this.  Also,
5318     we must be careful to avoid shifts of 32 bits or more to avoid getting
5319     unpredictable results.  */
5320
5321  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5322    {
5323      switch (TREE_CODE (type))
5324	{
5325	case ERROR_MARK:
5326	  return qualifiers;
5327
5328	case ARRAY_TYPE:
5329	  qualifiers |= (3 << shift);
5330	  break;
5331
5332	case FUNCTION_TYPE:
5333	case METHOD_TYPE:
5334	  qualifiers |= (2 << shift);
5335	  break;
5336
5337	case POINTER_TYPE:
5338	case REFERENCE_TYPE:
5339	case OFFSET_TYPE:
5340	  qualifiers |= (1 << shift);
5341	  break;
5342
5343	case RECORD_TYPE:
5344	  return (qualifiers | 8);
5345
5346	case UNION_TYPE:
5347	case QUAL_UNION_TYPE:
5348	  return (qualifiers | 9);
5349
5350	case ENUMERAL_TYPE:
5351	  return (qualifiers | 10);
5352
5353	case VOID_TYPE:
5354	  return (qualifiers | 16);
5355
5356	case INTEGER_TYPE:
5357	  /* If this is a range type, consider it to be the underlying
5358	     type.  */
5359	  if (TREE_TYPE (type) != 0)
5360	    break;
5361
5362	  /* Carefully distinguish all the standard types of C,
5363	     without messing up if the language is not C.  We do this by
5364	     testing TYPE_PRECISION and TREE_UNSIGNED.  The old code used to
5365	     look at both the names and the above fields, but that's redundant.
5366	     Any type whose size is between two C types will be considered
5367	     to be the wider of the two types.  Also, we do not have a
5368	     special code to use for "long long", so anything wider than
5369	     long is treated the same.  Note that we can't distinguish
5370	     between "int" and "long" in this code if they are the same
5371	     size, but that's fine, since neither can the assembler.  */
5372
5373	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5374	    return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5375
5376	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5377	    return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5378
5379	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5380	    return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5381
5382	  else
5383	    return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5384
5385	case REAL_TYPE:
5386	  /* If this is a range type, consider it to be the underlying
5387	     type.  */
5388	  if (TREE_TYPE (type) != 0)
5389	    break;
5390
5391	  /* Carefully distinguish all the standard types of C,
5392	     without messing up if the language is not C.  */
5393
5394	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5395	    return (qualifiers | 6);
5396
5397	  else
5398	    return (qualifiers | 7);
5399
5400	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
5401	  /* ??? We need to distinguish between double and float complex types,
5402	     but I don't know how yet because I can't reach this code from
5403	     existing front-ends.  */
5404	  return (qualifiers | 7);	/* Who knows? */
5405
5406	case CHAR_TYPE:		/* GNU Pascal CHAR type.  Not used in C.  */
5407	case BOOLEAN_TYPE:	/* GNU Fortran BOOLEAN type.  */
5408	case FILE_TYPE:		/* GNU Pascal FILE type.  */
5409	case SET_TYPE:		/* GNU Pascal SET type.  */
5410	case LANG_TYPE:		/* ? */
5411	  return qualifiers;
5412
5413	default:
5414	  abort ();		/* Not a type! */
5415        }
5416    }
5417
5418  return qualifiers;
5419}
5420
5421/* Nested function support.  */
5422
5423/* Emit RTL insns to initialize the variable parts of a trampoline.
5424   FNADDR is an RTX for the address of the function's pure code.
5425   CXT is an RTX for the static chain value for the function.
5426
5427   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5428   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5429   (to store insns).  This is a bit excessive.  Perhaps a different
5430   mechanism would be better here.
5431
5432   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
5433
5434void
5435sparc_initialize_trampoline (tramp, fnaddr, cxt)
5436     rtx tramp, fnaddr, cxt;
5437{
5438  /* SPARC 32 bit trampoline:
5439
5440 	sethi	%hi(fn), %g1
5441 	sethi	%hi(static), %g2
5442 	jmp	%g1+%lo(fn)
5443 	or	%g2, %lo(static), %g2
5444
5445    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5446    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5447   */
5448#ifdef TRANSFER_FROM_TRAMPOLINE
5449  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5450                     0, VOIDmode, 1, tramp, Pmode);
5451#endif
5452
5453  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5454		  expand_binop (SImode, ior_optab,
5455				expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5456					      size_int (10), 0, 1),
5457				GEN_INT (0x03000000),
5458				NULL_RTX, 1, OPTAB_DIRECT));
5459
5460  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5461		  expand_binop (SImode, ior_optab,
5462				expand_shift (RSHIFT_EXPR, SImode, cxt,
5463					      size_int (10), 0, 1),
5464				GEN_INT (0x05000000),
5465				NULL_RTX, 1, OPTAB_DIRECT));
5466
5467  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5468		  expand_binop (SImode, ior_optab,
5469				expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5470				GEN_INT (0x81c06000),
5471				NULL_RTX, 1, OPTAB_DIRECT));
5472
5473  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5474		  expand_binop (SImode, ior_optab,
5475				expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5476				GEN_INT (0x8410a000),
5477				NULL_RTX, 1, OPTAB_DIRECT));
5478
5479  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5480  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
5481     aligned on a 16 byte boundary so one flush clears it all.  */
5482  if (sparc_cpu != PROCESSOR_ULTRASPARC)
5483    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5484						     plus_constant (tramp, 8)))));
5485}
5486
5487/* The 64 bit version is simpler because it makes more sense to load the
5488   values as "immediate" data out of the trampoline.  It's also easier since
5489   we can read the PC without clobbering a register.  */
5490
5491void
5492sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5493     rtx tramp, fnaddr, cxt;
5494{
5495#ifdef TRANSFER_FROM_TRAMPOLINE
5496  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5497                     0, VOIDmode, 1, tramp, Pmode);
5498#endif
5499
5500  /*
5501	rd	%pc, %g1
5502	ldx	[%g1+24], %g5
5503	jmp	%g5
5504	ldx	[%g1+16], %g5
5505	+16 bytes data
5506   */
5507
5508  emit_move_insn (gen_rtx_MEM (SImode, tramp),
5509		  GEN_INT (0x83414000));
5510  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5511		  GEN_INT (0xca586018));
5512  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5513		  GEN_INT (0x81c14000));
5514  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5515		  GEN_INT (0xca586010));
5516  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5517  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
5518  emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
5519
5520  if (sparc_cpu != PROCESSOR_ULTRASPARC)
5521    emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5522}
5523
5524/* Subroutines to support a flat (single) register window calling
5525   convention.  */
5526
5527/* Single-register window sparc stack frames look like:
5528
5529             Before call		        After call
5530        +-----------------------+	+-----------------------+
5531   high |		        |	|			|
5532   mem  |  caller's temps.    	|       |  caller's temps.    	|
5533	|       		|       |       	        |
5534        +-----------------------+	+-----------------------+
5535 	|       		|	|		        |
5536        |  arguments on stack.  |	|  arguments on stack.  |
5537	|       		|      	|			|
5538        +-----------------------+FP+92->+-----------------------+
5539 	|  6 words to save     	|	|  6 words to save	|
5540	|  arguments passed	|	|  arguments passed	|
5541	|  in registers, even	|	|  in registers, even	|
5542       	|  if not passed.       |      	|  if not passed.	|
5543 SP+68->+-----------------------+FP+68->+-----------------------+
5544        | 1 word struct addr	|      	| 1 word struct addr	|
5545        +-----------------------+FP+64->+-----------------------+
5546        |			|	|			|
5547        | 16 word reg save area	|	| 16 word reg save area |
5548       	|                       |      	|			|
5549    SP->+-----------------------+   FP->+-----------------------+
5550				        | 4 word area for	|
5551				       	| fp/alu reg moves	|
5552				 FP-16->+-----------------------+
5553				        |			|
5554				        |  local variables	|
5555				        |			|
5556				        +-----------------------+
5557				        |		        |
5558                                        |  fp register save     |
5559				        |			|
5560				        +-----------------------+
5561				        |		        |
5562                                        |  gp register save     |
5563                                        |       		|
5564				        +-----------------------+
5565				        |			|
5566                                        |  alloca allocations   |
5567        			        |			|
5568				        +-----------------------+
5569				        |			|
5570                                        |  arguments on stack   |
5571        			       	|		        |
5572				 SP+92->+-----------------------+
5573                                        |  6 words to save      |
5574				        |  arguments passed     |
5575                                        |  in registers, even   |
5576   low                                 	|  if not passed.       |
5577   memory        		 SP+68->+-----------------------+
5578				       	| 1 word struct addr	|
5579				 SP+64->+-----------------------+
5580				        |			|
5581				        I 16 word reg save area |
5582				       	|			|
5583				    SP->+-----------------------+  */
5584
5585/* Structure to be filled in by sparc_flat_compute_frame_size with register
5586   save masks, and offsets for the current function.  */
5587
5588struct sparc_frame_info
5589{
5590  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
5591  unsigned long var_size;	/* # bytes that variables take up.  */
5592  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
5593  unsigned long extra_size;	/* # bytes of extra gunk.  */
5594  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
5595  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
5596  unsigned long gmask;		/* Mask of saved gp registers.  */
5597  unsigned long fmask;		/* Mask of saved fp registers.  */
5598  unsigned long reg_offset;	/* Offset from new sp to store regs.  */
5599  int		initialized;	/* Nonzero if frame size already calculated.  */
5600};
5601
5602/* Current frame information calculated by sparc_flat_compute_frame_size.  */
5603struct sparc_frame_info current_frame_info;
5604
5605/* Zero structure to initialize current_frame_info.  */
5606struct sparc_frame_info zero_frame_info;
5607
5608/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
5609
5610#define RETURN_ADDR_REGNUM 15
5611#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5612#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5613
5614#define MUST_SAVE_REGISTER(regno) \
5615 ((regs_ever_live[regno] && !call_used_regs[regno])		\
5616  || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)	\
5617  || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5618
5619/* Return the bytes needed to compute the frame pointer from the current
5620   stack pointer.  */
5621
5622unsigned long
5623sparc_flat_compute_frame_size (size)
5624     int size;			/* # of var. bytes allocated.  */
5625{
5626  int regno;
5627  unsigned long total_size;	/* # bytes that the entire frame takes up.  */
5628  unsigned long var_size;	/* # bytes that variables take up.  */
5629  unsigned long args_size;	/* # bytes that outgoing arguments take up.  */
5630  unsigned long extra_size;	/* # extra bytes.  */
5631  unsigned int  gp_reg_size;	/* # bytes needed to store gp regs.  */
5632  unsigned int  fp_reg_size;	/* # bytes needed to store fp regs.  */
5633  unsigned long gmask;		/* Mask of saved gp registers.  */
5634  unsigned long fmask;		/* Mask of saved fp registers.  */
5635  unsigned long reg_offset;	/* Offset to register save area.  */
5636  int           need_aligned_p;	/* 1 if need the save area 8 byte aligned.  */
5637
5638  /* This is the size of the 16 word reg save area, 1 word struct addr
5639     area, and 4 word fp/alu register copy area.  */
5640  extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5641  var_size = size;
5642  gp_reg_size = 0;
5643  fp_reg_size = 0;
5644  gmask = 0;
5645  fmask = 0;
5646  reg_offset = 0;
5647  need_aligned_p = 0;
5648
5649  args_size = 0;
5650  if (!leaf_function_p ())
5651    {
5652      /* Also include the size needed for the 6 parameter registers.  */
5653      args_size = current_function_outgoing_args_size + 24;
5654    }
5655  total_size = var_size + args_size;
5656
5657  /* Calculate space needed for gp registers.  */
5658  for (regno = 1; regno <= 31; regno++)
5659    {
5660      if (MUST_SAVE_REGISTER (regno))
5661	{
5662	  /* If we need to save two regs in a row, ensure there's room to bump
5663	     up the address to align it to a doubleword boundary.  */
5664	  if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5665	    {
5666	      if (gp_reg_size % 8 != 0)
5667		gp_reg_size += 4;
5668	      gp_reg_size += 2 * UNITS_PER_WORD;
5669	      gmask |= 3 << regno;
5670	      regno++;
5671	      need_aligned_p = 1;
5672	    }
5673	  else
5674	    {
5675	      gp_reg_size += UNITS_PER_WORD;
5676	      gmask |= 1 << regno;
5677	    }
5678	}
5679    }
5680
5681  /* Calculate space needed for fp registers.  */
5682  for (regno = 32; regno <= 63; regno++)
5683    {
5684      if (regs_ever_live[regno] && !call_used_regs[regno])
5685	{
5686	  fp_reg_size += UNITS_PER_WORD;
5687	  fmask |= 1 << (regno - 32);
5688	}
5689    }
5690
5691  if (gmask || fmask)
5692    {
5693      int n;
5694      reg_offset = FIRST_PARM_OFFSET(0) + args_size;
5695      /* Ensure save area is 8 byte aligned if we need it.  */
5696      n = reg_offset % 8;
5697      if (need_aligned_p && n != 0)
5698	{
5699	  total_size += 8 - n;
5700	  reg_offset += 8 - n;
5701	}
5702      total_size += gp_reg_size + fp_reg_size;
5703    }
5704
5705  /* If we must allocate a stack frame at all, we must also allocate
5706     room for register window spillage, so as to be binary compatible
5707     with libraries and operating systems that do not use -mflat.  */
5708  if (total_size > 0)
5709    total_size += extra_size;
5710  else
5711    extra_size = 0;
5712
5713  total_size = SPARC_STACK_ALIGN (total_size);
5714
5715  /* Save other computed information.  */
5716  current_frame_info.total_size  = total_size;
5717  current_frame_info.var_size    = var_size;
5718  current_frame_info.args_size   = args_size;
5719  current_frame_info.extra_size  = extra_size;
5720  current_frame_info.gp_reg_size = gp_reg_size;
5721  current_frame_info.fp_reg_size = fp_reg_size;
5722  current_frame_info.gmask	 = gmask;
5723  current_frame_info.fmask	 = fmask;
5724  current_frame_info.reg_offset	 = reg_offset;
5725  current_frame_info.initialized = reload_completed;
5726
5727  /* Ok, we're done.  */
5728  return total_size;
5729}
5730
5731/* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
5732   OFFSET.
5733
5734   BASE_REG must be 8 byte aligned.  This allows us to test OFFSET for
5735   appropriate alignment and use DOUBLEWORD_OP when we can.  We assume
5736   [BASE_REG+OFFSET] will always be a valid address.
5737
5738   WORD_OP is either "st" for save, "ld" for restore.
5739   DOUBLEWORD_OP is either "std" for save, "ldd" for restore.  */
5740
5741void
5742sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
5743			 doubleword_op, base_offset)
5744     FILE *file;
5745     char *base_reg;
5746     unsigned int offset;
5747     unsigned long gmask;
5748     unsigned long fmask;
5749     char *word_op;
5750     char *doubleword_op;
5751     unsigned long base_offset;
5752{
5753  int regno;
5754
5755  if (gmask == 0 && fmask == 0)
5756    return;
5757
5758  /* Save registers starting from high to low.  We've already saved the
5759     previous frame pointer and previous return address for the debugger's
5760     sake.  The debugger allows us to not need a nop in the epilog if at least
5761     one register is reloaded in addition to return address.  */
5762
5763  if (gmask)
5764    {
5765      for (regno = 1; regno <= 31; regno++)
5766	{
5767	  if ((gmask & (1L << regno)) != 0)
5768	    {
5769	      if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
5770		{
5771		  /* We can save two registers in a row.  If we're not at a
5772		     double word boundary, move to one.
5773		     sparc_flat_compute_frame_size ensures there's room to do
5774		     this.  */
5775		  if (offset % 8 != 0)
5776		    offset += UNITS_PER_WORD;
5777
5778		  if (word_op[0] == 's')
5779		    {
5780		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
5781			       doubleword_op, reg_names[regno],
5782			       base_reg, offset);
5783		      if (dwarf2out_do_frame ())
5784			{
5785			  char *l = dwarf2out_cfi_label ();
5786			  dwarf2out_reg_save (l, regno, offset + base_offset);
5787			  dwarf2out_reg_save
5788			    (l, regno+1, offset+base_offset + UNITS_PER_WORD);
5789			}
5790		    }
5791		  else
5792		    fprintf (file, "\t%s\t[%s+%d], %s\n",
5793			     doubleword_op, base_reg, offset,
5794			     reg_names[regno]);
5795
5796		  offset += 2 * UNITS_PER_WORD;
5797		  regno++;
5798		}
5799	      else
5800		{
5801		  if (word_op[0] == 's')
5802		    {
5803		      fprintf (file, "\t%s\t%s, [%s+%d]\n",
5804			       word_op, reg_names[regno],
5805			       base_reg, offset);
5806		      if (dwarf2out_do_frame ())
5807			dwarf2out_reg_save ("", regno, offset + base_offset);
5808		    }
5809		  else
5810		    fprintf (file, "\t%s\t[%s+%d], %s\n",
5811			     word_op, base_reg, offset, reg_names[regno]);
5812
5813		  offset += UNITS_PER_WORD;
5814		}
5815	    }
5816	}
5817    }
5818
5819  if (fmask)
5820    {
5821      for (regno = 32; regno <= 63; regno++)
5822	{
5823	  if ((fmask & (1L << (regno - 32))) != 0)
5824	    {
5825	      if (word_op[0] == 's')
5826		{
5827		  fprintf (file, "\t%s\t%s, [%s+%d]\n",
5828			   word_op, reg_names[regno],
5829			   base_reg, offset);
5830		  if (dwarf2out_do_frame ())
5831		    dwarf2out_reg_save ("", regno, offset + base_offset);
5832		}
5833	      else
5834		fprintf (file, "\t%s\t[%s+%d], %s\n",
5835			 word_op, base_reg, offset, reg_names[regno]);
5836
5837	      offset += UNITS_PER_WORD;
5838	    }
5839	}
5840    }
5841}
5842
5843/* Set up the stack and frame (if desired) for the function.  */
5844
5845void
5846sparc_flat_output_function_prologue (file, size)
5847     FILE *file;
5848     int size;
5849{
5850  char *sp_str = reg_names[STACK_POINTER_REGNUM];
5851  unsigned long gmask = current_frame_info.gmask;
5852
5853  /* This is only for the human reader.  */
5854  fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
5855  fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
5856	   ASM_COMMENT_START,
5857	   current_frame_info.var_size,
5858	   current_frame_info.gp_reg_size / 4,
5859	   current_frame_info.fp_reg_size / 4,
5860	   current_function_outgoing_args_size,
5861	   current_frame_info.extra_size);
5862
5863  size = SPARC_STACK_ALIGN (size);
5864  size = (! current_frame_info.initialized
5865	  ? sparc_flat_compute_frame_size (size)
5866	  : current_frame_info.total_size);
5867
5868  /* These cases shouldn't happen.  Catch them now.  */
5869  if (size == 0 && (gmask || current_frame_info.fmask))
5870    abort ();
5871
5872  /* Allocate our stack frame by decrementing %sp.
5873     At present, the only algorithm gdb can use to determine if this is a
5874     flat frame is if we always set %i7 if we set %sp.  This can be optimized
5875     in the future by putting in some sort of debugging information that says
5876     this is a `flat' function.  However, there is still the case of debugging
5877     code without such debugging information (including cases where most fns
5878     have such info, but there is one that doesn't).  So, always do this now
5879     so we don't get a lot of code out there that gdb can't handle.
5880     If the frame pointer isn't needn't then that's ok - gdb won't be able to
5881     distinguish us from a non-flat function but there won't (and shouldn't)
5882     be any differences anyway.  The return pc is saved (if necessary) right
5883     after %i7 so gdb won't have to look too far to find it.  */
5884  if (size > 0)
5885    {
5886      unsigned int reg_offset = current_frame_info.reg_offset;
5887      char *fp_str = reg_names[FRAME_POINTER_REGNUM];
5888      const char *t1_str = "%g1";
5889
5890      /* Things get a little tricky if local variables take up more than ~4096
5891	 bytes and outgoing arguments take up more than ~4096 bytes.  When that
5892	 happens, the register save area can't be accessed from either end of
5893	 the frame.  Handle this by decrementing %sp to the start of the gp
5894	 register save area, save the regs, update %i7, and then set %sp to its
5895	 final value.  Given that we only have one scratch register to play
5896	 with it is the cheapest solution, and it helps gdb out as it won't
5897	 slow down recognition of flat functions.
5898	 Don't change the order of insns emitted here without checking with
5899	 the gdb folk first.  */
5900
5901      /* Is the entire register save area offsettable from %sp?  */
5902      if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
5903	{
5904	  if (size <= 4096)
5905	    {
5906	      fprintf (file, "\tadd\t%s, %d, %s\n",
5907		       sp_str, -size, sp_str);
5908	      if (gmask & FRAME_POINTER_MASK)
5909		{
5910		  fprintf (file, "\tst\t%s, [%s+%d]\n",
5911			   fp_str, sp_str, reg_offset);
5912		  fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5913			   sp_str, -size, fp_str, ASM_COMMENT_START);
5914		  reg_offset += 4;
5915		}
5916	    }
5917	  else
5918	    {
5919	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5920		       size, t1_str, sp_str, t1_str, sp_str);
5921	      if (gmask & FRAME_POINTER_MASK)
5922		{
5923		  fprintf (file, "\tst\t%s, [%s+%d]\n",
5924			   fp_str, sp_str, reg_offset);
5925		  fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5926			   sp_str, t1_str, fp_str, ASM_COMMENT_START);
5927		  reg_offset += 4;
5928		}
5929	    }
5930	  if (dwarf2out_do_frame ())
5931	    {
5932	      char *l = dwarf2out_cfi_label ();
5933	      if (gmask & FRAME_POINTER_MASK)
5934		{
5935		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5936				      reg_offset - 4 - size);
5937		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5938		}
5939	      else
5940		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
5941	    }
5942	  if (gmask & RETURN_ADDR_MASK)
5943	    {
5944	      fprintf (file, "\tst\t%s, [%s+%d]\n",
5945		       reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
5946	      if (dwarf2out_do_frame ())
5947		dwarf2out_return_save ("", reg_offset - size);
5948	      reg_offset += 4;
5949	    }
5950	  sparc_flat_save_restore (file, sp_str, reg_offset,
5951				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5952				   current_frame_info.fmask,
5953				   "st", "std", -size);
5954	}
5955      else
5956	{
5957	  /* Subtract %sp in two steps, but make sure there is always a
5958	     64 byte register save area, and %sp is properly aligned.  */
5959	  /* Amount to decrement %sp by, the first time.  */
5960	  unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
5961	  /* Offset to register save area from %sp.  */
5962	  unsigned int offset = size1 - (size - reg_offset);
5963
5964	  if (size1 <= 4096)
5965	    {
5966	      fprintf (file, "\tadd\t%s, %d, %s\n",
5967		       sp_str, -size1, sp_str);
5968	      if (gmask & FRAME_POINTER_MASK)
5969		{
5970		  fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5971			   fp_str, sp_str, offset, sp_str, -size1, fp_str,
5972			   ASM_COMMENT_START);
5973		  offset += 4;
5974		}
5975	    }
5976	  else
5977	    {
5978	      fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5979		       size1, t1_str, sp_str, t1_str, sp_str);
5980	      if (gmask & FRAME_POINTER_MASK)
5981		{
5982		  fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5983			   fp_str, sp_str, offset, sp_str, t1_str, fp_str,
5984			   ASM_COMMENT_START);
5985		  offset += 4;
5986		}
5987	    }
5988	  if (dwarf2out_do_frame ())
5989	    {
5990	      char *l = dwarf2out_cfi_label ();
5991	      if (gmask & FRAME_POINTER_MASK)
5992		{
5993		  dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5994				      offset - 4 - size1);
5995		  dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5996		}
5997	      else
5998		dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
5999	    }
6000	  if (gmask & RETURN_ADDR_MASK)
6001	    {
6002	      fprintf (file, "\tst\t%s, [%s+%d]\n",
6003		       reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
6004	      if (dwarf2out_do_frame ())
6005		/* offset - size1 == reg_offset - size
6006		   if reg_offset were updated above like offset.  */
6007		dwarf2out_return_save ("", offset - size1);
6008	      offset += 4;
6009	    }
6010	  sparc_flat_save_restore (file, sp_str, offset,
6011				   gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6012				   current_frame_info.fmask,
6013				   "st", "std", -size1);
6014	  fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6015		   size - size1, t1_str, sp_str, t1_str, sp_str);
6016	  if (dwarf2out_do_frame ())
6017	    if (! (gmask & FRAME_POINTER_MASK))
6018	      dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6019	}
6020    }
6021
6022  fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6023}
6024
6025/* Do any necessary cleanup after a function to restore stack, frame,
6026   and regs. */
6027
6028void
6029sparc_flat_output_function_epilogue (file, size)
6030     FILE *file;
6031     int size;
6032{
6033  rtx epilogue_delay = current_function_epilogue_delay_list;
6034  int noepilogue = FALSE;
6035
6036  /* This is only for the human reader.  */
6037  fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6038
6039  /* The epilogue does not depend on any registers, but the stack
6040     registers, so we assume that if we have 1 pending nop, it can be
6041     ignored, and 2 it must be filled (2 nops occur for integer
6042     multiply and divide).  */
6043
6044  size = SPARC_STACK_ALIGN (size);
6045  size = (!current_frame_info.initialized
6046	   ? sparc_flat_compute_frame_size (size)
6047	   : current_frame_info.total_size);
6048
6049  if (size == 0 && epilogue_delay == 0)
6050    {
6051      rtx insn = get_last_insn ();
6052
6053      /* If the last insn was a BARRIER, we don't have to write any code
6054	 because a jump (aka return) was put there.  */
6055      if (GET_CODE (insn) == NOTE)
6056	insn = prev_nonnote_insn (insn);
6057      if (insn && GET_CODE (insn) == BARRIER)
6058	noepilogue = TRUE;
6059    }
6060
6061  if (!noepilogue)
6062    {
6063      unsigned int reg_offset = current_frame_info.reg_offset;
6064      unsigned int size1;
6065      char *sp_str = reg_names[STACK_POINTER_REGNUM];
6066      char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6067      const char *t1_str = "%g1";
6068
6069      /* In the reload sequence, we don't need to fill the load delay
6070	 slots for most of the loads, also see if we can fill the final
6071	 delay slot if not otherwise filled by the reload sequence.  */
6072
6073      if (size > 4095)
6074	fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6075
6076      if (frame_pointer_needed)
6077	{
6078	  if (size > 4095)
6079	    fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6080		     fp_str, t1_str, sp_str, ASM_COMMENT_START);
6081	  else
6082	    fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6083		     fp_str, size, sp_str, ASM_COMMENT_START);
6084	}
6085
6086      /* Is the entire register save area offsettable from %sp?  */
6087      if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6088	{
6089	  size1 = 0;
6090	}
6091      else
6092	{
6093	  /* Restore %sp in two steps, but make sure there is always a
6094	     64 byte register save area, and %sp is properly aligned.  */
6095	  /* Amount to increment %sp by, the first time.  */
6096	  size1 = ((reg_offset - 64 - 16) + 15) & -16;
6097	  /* Offset to register save area from %sp.  */
6098	  reg_offset = size1 - reg_offset;
6099
6100	  fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6101		   size1, t1_str, sp_str, t1_str, sp_str);
6102	}
6103
6104      /* We must restore the frame pointer and return address reg first
6105	 because they are treated specially by the prologue output code.  */
6106      if (current_frame_info.gmask & FRAME_POINTER_MASK)
6107	{
6108	  fprintf (file, "\tld\t[%s+%d], %s\n",
6109		   sp_str, reg_offset, fp_str);
6110	  reg_offset += 4;
6111	}
6112      if (current_frame_info.gmask & RETURN_ADDR_MASK)
6113	{
6114	  fprintf (file, "\tld\t[%s+%d], %s\n",
6115		   sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6116	  reg_offset += 4;
6117	}
6118
6119      /* Restore any remaining saved registers.  */
6120      sparc_flat_save_restore (file, sp_str, reg_offset,
6121			       current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6122			       current_frame_info.fmask,
6123			       "ld", "ldd", 0);
6124
6125      /* If we had to increment %sp in two steps, record it so the second
6126	 restoration in the epilogue finishes up.  */
6127      if (size1 > 0)
6128	{
6129	  size -= size1;
6130	  if (size > 4095)
6131	    fprintf (file, "\tset\t%d, %s\n",
6132		     size, t1_str);
6133	}
6134
6135      if (current_function_returns_struct)
6136	fprintf (file, "\tjmp\t%%o7+12\n");
6137      else
6138	fprintf (file, "\tretl\n");
6139
6140      /* If the only register saved is the return address, we need a
6141	 nop, unless we have an instruction to put into it.  Otherwise
6142	 we don't since reloading multiple registers doesn't reference
6143	 the register being loaded.  */
6144
6145      if (epilogue_delay)
6146	{
6147	  if (size)
6148	    abort ();
6149	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6150	}
6151
6152      else if (size > 4095)
6153	fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6154
6155      else if (size > 0)
6156	fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6157
6158      else
6159	fprintf (file, "\tnop\n");
6160    }
6161
6162  /* Reset state info for each function.  */
6163  current_frame_info = zero_frame_info;
6164
6165  sparc_output_deferred_case_vectors ();
6166}
6167
6168/* Define the number of delay slots needed for the function epilogue.
6169
6170   On the sparc, we need a slot if either no stack has been allocated,
6171   or the only register saved is the return register.  */
6172
6173int
6174sparc_flat_epilogue_delay_slots ()
6175{
6176  if (!current_frame_info.initialized)
6177    (void) sparc_flat_compute_frame_size (get_frame_size ());
6178
6179  if (current_frame_info.total_size == 0)
6180    return 1;
6181
6182  return 0;
6183}
6184
6185/* Return true is TRIAL is a valid insn for the epilogue delay slot.
6186   Any single length instruction which doesn't reference the stack or frame
6187   pointer is OK.  */
6188
6189int
6190sparc_flat_eligible_for_epilogue_delay (trial, slot)
6191     rtx trial;
6192     int slot ATTRIBUTE_UNUSED;
6193{
6194  rtx pat = PATTERN (trial);
6195
6196  if (get_attr_length (trial) != 1)
6197    return 0;
6198
6199  /* If %g0 is live, there are lots of things we can't handle.
6200     Rather than trying to find them all now, let's punt and only
6201     optimize things as necessary.  */
6202  if (TARGET_LIVE_G0)
6203    return 0;
6204
6205  if (! reg_mentioned_p (stack_pointer_rtx, pat)
6206      && ! reg_mentioned_p (frame_pointer_rtx, pat))
6207    return 1;
6208
6209  return 0;
6210}
6211
6212/* Adjust the cost of a scheduling dependency.  Return the new cost of
6213   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
6214
6215static int
6216supersparc_adjust_cost (insn, link, dep_insn, cost)
6217     rtx insn;
6218     rtx link;
6219     rtx dep_insn;
6220     int cost;
6221{
6222  enum attr_type insn_type;
6223
6224  if (! recog_memoized (insn))
6225    return 0;
6226
6227  insn_type = get_attr_type (insn);
6228
6229  if (REG_NOTE_KIND (link) == 0)
6230    {
6231      /* Data dependency; DEP_INSN writes a register that INSN reads some
6232	 cycles later.  */
6233
6234      /* if a load, then the dependence must be on the memory address;
6235	 add an extra "cycle".  Note that the cost could be two cycles
6236	 if the reg was written late in an instruction group; we ca not tell
6237	 here.  */
6238      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6239	return cost + 3;
6240
6241      /* Get the delay only if the address of the store is the dependence.  */
6242      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6243	{
6244	  rtx pat = PATTERN(insn);
6245	  rtx dep_pat = PATTERN (dep_insn);
6246
6247	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6248	    return cost;  /* This should not happen!  */
6249
6250	  /* The dependency between the two instructions was on the data that
6251	     is being stored.  Assume that this implies that the address of the
6252	     store is not dependent.  */
6253	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6254	    return cost;
6255
6256	  return cost + 3;  /* An approximation.  */
6257	}
6258
6259      /* A shift instruction cannot receive its data from an instruction
6260	 in the same cycle; add a one cycle penalty.  */
6261      if (insn_type == TYPE_SHIFT)
6262	return cost + 3;   /* Split before cascade into shift.  */
6263    }
6264  else
6265    {
6266      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6267	 INSN writes some cycles later.  */
6268
6269      /* These are only significant for the fpu unit; writing a fp reg before
6270         the fpu has finished with it stalls the processor.  */
6271
6272      /* Reusing an integer register causes no problems.  */
6273      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6274	return 0;
6275    }
6276
6277  return cost;
6278}
6279
6280static int
6281hypersparc_adjust_cost (insn, link, dep_insn, cost)
6282     rtx insn;
6283     rtx link;
6284     rtx dep_insn;
6285     int cost;
6286{
6287  enum attr_type insn_type, dep_type;
6288  rtx pat = PATTERN(insn);
6289  rtx dep_pat = PATTERN (dep_insn);
6290
6291  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6292    return cost;
6293
6294  insn_type = get_attr_type (insn);
6295  dep_type = get_attr_type (dep_insn);
6296
6297  switch (REG_NOTE_KIND (link))
6298    {
6299    case 0:
6300      /* Data dependency; DEP_INSN writes a register that INSN reads some
6301	 cycles later.  */
6302
6303      switch (insn_type)
6304	{
6305	case TYPE_STORE:
6306	case TYPE_FPSTORE:
6307	  /* Get the delay iff the address of the store is the dependence. */
6308	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6309	    return cost;
6310
6311	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6312	    return cost;
6313	  return cost + 3;
6314
6315	case TYPE_LOAD:
6316	case TYPE_SLOAD:
6317	case TYPE_FPLOAD:
6318	  /* If a load, then the dependence must be on the memory address.  If
6319	     the addresses aren't equal, then it might be a false dependency */
6320	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6321	    {
6322	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6323		  || GET_CODE (SET_DEST (dep_pat)) != MEM
6324		  || GET_CODE (SET_SRC (pat)) != MEM
6325		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
6326				    XEXP (SET_SRC (pat), 0)))
6327		return cost + 2;
6328
6329	      return cost + 8;
6330	    }
6331	  break;
6332
6333	case TYPE_BRANCH:
6334	  /* Compare to branch latency is 0.  There is no benefit from
6335	     separating compare and branch.  */
6336	  if (dep_type == TYPE_COMPARE)
6337	    return 0;
6338	  /* Floating point compare to branch latency is less than
6339	     compare to conditional move.  */
6340	  if (dep_type == TYPE_FPCMP)
6341	    return cost - 1;
6342	  break;
6343	default:
6344	  break;
6345	}
6346	break;
6347
6348    case REG_DEP_ANTI:
6349      /* Anti-dependencies only penalize the fpu unit. */
6350      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6351        return 0;
6352      break;
6353
6354    default:
6355      break;
6356    }
6357
6358  return cost;
6359}
6360
6361static int
6362ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6363     rtx insn;
6364     rtx link;
6365     rtx dep_insn;
6366     int cost;
6367{
6368  enum attr_type insn_type, dep_type;
6369  rtx pat = PATTERN(insn);
6370  rtx dep_pat = PATTERN (dep_insn);
6371
6372  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6373    return cost;
6374
6375  insn_type = get_attr_type (insn);
6376  dep_type = get_attr_type (dep_insn);
6377
6378  /* Nothing issues in parallel with integer multiplies, so
6379     mark as zero cost since the scheduler can not do anything
6380     about it.  */
6381  if (insn_type == TYPE_IMUL)
6382    return 0;
6383
6384#define SLOW_FP(dep_type) \
6385(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6386
6387  switch (REG_NOTE_KIND (link))
6388    {
6389    case 0:
6390      /* Data dependency; DEP_INSN writes a register that INSN reads some
6391	 cycles later.  */
6392
6393      if (dep_type == TYPE_CMOVE)
6394	{
6395	  /* Instructions that read the result of conditional moves cannot
6396	     be in the same group or the following group.  */
6397	  return cost + 1;
6398	}
6399
6400      switch (insn_type)
6401	{
6402	  /* UltraSPARC can dual issue a store and an instruction setting
6403	     the value stored, except for divide and square root.  */
6404	case TYPE_FPSTORE:
6405	  if (! SLOW_FP (dep_type))
6406	    return 0;
6407	  return cost;
6408
6409	case TYPE_STORE:
6410	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6411	    return cost;
6412
6413	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6414	    /* The dependency between the two instructions is on the data
6415	       that is being stored.  Assume that the address of the store
6416	       is not also dependent.  */
6417	    return 0;
6418	  return cost;
6419
6420	case TYPE_LOAD:
6421	case TYPE_SLOAD:
6422	case TYPE_FPLOAD:
6423	  /* A load does not return data until at least 11 cycles after
6424	     a store to the same location.  3 cycles are accounted for
6425	     in the load latency; add the other 8 here.  */
6426	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6427	    {
6428	      /* If the addresses are not equal this may be a false
6429		 dependency because pointer aliasing could not be
6430		 determined.  Add only 2 cycles in that case.  2 is
6431		 an arbitrary compromise between 8, which would cause
6432		 the scheduler to generate worse code elsewhere to
6433		 compensate for a dependency which might not really
6434		 exist, and 0.  */
6435	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6436		  || GET_CODE (SET_SRC (pat)) != MEM
6437		  || GET_CODE (SET_DEST (dep_pat)) != MEM
6438		  || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
6439				    XEXP (SET_DEST (dep_pat), 0)))
6440		return cost + 2;
6441
6442	      return cost + 8;
6443	    }
6444	  return cost;
6445
6446	case TYPE_BRANCH:
6447	  /* Compare to branch latency is 0.  There is no benefit from
6448	     separating compare and branch.  */
6449	  if (dep_type == TYPE_COMPARE)
6450	    return 0;
6451	  /* Floating point compare to branch latency is less than
6452	     compare to conditional move.  */
6453	  if (dep_type == TYPE_FPCMP)
6454	    return cost - 1;
6455	  return cost;
6456
6457	case TYPE_FPCMOVE:
6458	  /* FMOVR class instructions can not issue in the same cycle
6459	     or the cycle after an instruction which writes any
6460	     integer register.  Model this as cost 2 for dependent
6461	     instructions.  */
6462	  if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
6463	       || dep_type == TYPE_BINARY)
6464	      && cost < 2)
6465	    return 2;
6466	  /* Otherwise check as for integer conditional moves. */
6467
6468	case TYPE_CMOVE:
6469	  /* Conditional moves involving integer registers wait until
6470	     3 cycles after loads return data.  The interlock applies
6471	     to all loads, not just dependent loads, but that is hard
6472	     to model.  */
6473	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
6474	    return cost + 3;
6475	  return cost;
6476
6477	default:
6478	  break;
6479	}
6480      break;
6481
6482    case REG_DEP_ANTI:
6483      /* Divide and square root lock destination registers for full latency. */
6484      if (! SLOW_FP (dep_type))
6485	return 0;
6486      break;
6487
6488    case REG_DEP_OUTPUT:
6489      /* IEU and FPU instruction that have the same destination
6490	 register cannot be grouped together.  */
6491      return cost + 1;
6492
6493    default:
6494      break;
6495    }
6496
6497  /* Other costs not accounted for:
6498     - Single precision floating point loads lock the other half of
6499       the even/odd register pair.
6500     - Several hazards associated with ldd/std are ignored because these
6501       instructions are rarely generated for V9.
6502     - The floating point pipeline can not have both a single and double
6503       precision operation active at the same time.  Format conversions
6504       and graphics instructions are given honorary double precision status.
6505     - call and jmpl are always the first instruction in a group.  */
6506
6507  return cost;
6508
6509#undef SLOW_FP
6510}
6511
6512int
6513sparc_adjust_cost(insn, link, dep, cost)
6514     rtx insn;
6515     rtx link;
6516     rtx dep;
6517     int cost;
6518{
6519  switch (sparc_cpu)
6520    {
6521    case PROCESSOR_SUPERSPARC:
6522      cost = supersparc_adjust_cost (insn, link, dep, cost);
6523      break;
6524    case PROCESSOR_HYPERSPARC:
6525    case PROCESSOR_SPARCLITE86X:
6526      cost = hypersparc_adjust_cost (insn, link, dep, cost);
6527      break;
6528    case PROCESSOR_ULTRASPARC:
6529      cost = ultrasparc_adjust_cost (insn, link, dep, cost);
6530      break;
6531    default:
6532      break;
6533    }
6534  return cost;
6535}
6536
6537/* This describes the state of the UltraSPARC pipeline during
6538   instruction scheduling.  */
6539
6540#define TMASK(__x)	((unsigned)1 << ((int)(__x)))
6541#define UMASK(__x)	((unsigned)1 << ((int)(__x)))
6542
6543enum ultra_code { NONE=0, /* no insn at all				*/
6544		  IEU0,   /* shifts and conditional moves		*/
6545		  IEU1,   /* condition code setting insns, calls+jumps	*/
6546		  IEUN,   /* all other single cycle ieu insns		*/
6547		  LSU,    /* loads and stores				*/
6548		  CTI,    /* branches					*/
6549		  FPM,    /* FPU pipeline 1, multiplies and divides	*/
6550		  FPA,    /* FPU pipeline 2, all other operations	*/
6551		  SINGLE, /* single issue instructions			*/
6552		  NUM_ULTRA_CODES };
6553
6554static const char *ultra_code_names[NUM_ULTRA_CODES] = {
6555  "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6556  "FPM", "FPA", "SINGLE" };
6557
6558struct ultrasparc_pipeline_state {
6559  /* The insns in this group.  */
6560  rtx group[4];
6561
6562  /* The code for each insn.  */
6563  enum ultra_code codes[4];
6564
6565  /* Which insns in this group have been committed by the
6566     scheduler.  This is how we determine how many more
6567     can issue this cycle.  */
6568  char commit[4];
6569
6570  /* How many insns in this group.  */
6571  char group_size;
6572
6573  /* Mask of free slots still in this group.  */
6574  char free_slot_mask;
6575
6576  /* The slotter uses the following to determine what other
6577     insn types can still make their way into this group.  */
6578  char contents [NUM_ULTRA_CODES];
6579  char num_ieu_insns;
6580};
6581
6582#define ULTRA_NUM_HIST	8
6583static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6584static int ultra_cur_hist;
6585static int ultra_cycles_elapsed;
6586
6587#define ultra_pipe	(ultra_pipe_hist[ultra_cur_hist])
6588
6589/* Given TYPE_MASK compute the ultra_code it has.  */
6590static enum ultra_code
6591ultra_code_from_mask (type_mask)
6592     int type_mask;
6593{
6594  if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6595    return IEU0;
6596  else if (type_mask & (TMASK (TYPE_COMPARE) |
6597			TMASK (TYPE_CALL) |
6598			TMASK (TYPE_UNCOND_BRANCH)))
6599    return IEU1;
6600  else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6601			TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6602    return IEUN;
6603  else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6604			TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6605			TMASK (TYPE_FPSTORE)))
6606    return LSU;
6607  else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6608			TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
6609    return FPM;
6610  else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6611			TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6612    return FPA;
6613  else if (type_mask & TMASK (TYPE_BRANCH))
6614    return CTI;
6615
6616  return SINGLE;
6617}
6618
6619/* Check INSN (a conditional move) and make sure that it's
6620   results are available at this cycle.  Return 1 if the
6621   results are in fact ready.  */
6622static int
6623ultra_cmove_results_ready_p (insn)
6624     rtx insn;
6625{
6626  struct ultrasparc_pipeline_state *up;
6627  int entry, slot;
6628
6629  /* If this got dispatched in the previous
6630     group, the results are not ready.  */
6631  entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6632  up = &ultra_pipe_hist[entry];
6633  slot = 4;
6634  while (--slot >= 0)
6635    if (up->group[slot] == insn)
6636      return 0;
6637
6638  return 1;
6639}
6640
6641/* Walk backwards in pipeline history looking for FPU
6642   operations which use a mode different than FPMODE and
6643   will create a stall if an insn using FPMODE were to be
6644   dispatched this cycle.  */
6645static int
6646ultra_fpmode_conflict_exists (fpmode)
6647     enum machine_mode fpmode;
6648{
6649  int hist_ent;
6650  int hist_lim;
6651
6652  hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6653  if (ultra_cycles_elapsed < 4)
6654    hist_lim = ultra_cycles_elapsed;
6655  else
6656    hist_lim = 4;
6657  while (hist_lim > 0)
6658    {
6659      struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6660      int slot = 4;
6661
6662      while (--slot >= 0)
6663	{
6664	  rtx insn = up->group[slot];
6665	  enum machine_mode this_mode;
6666	  rtx pat;
6667
6668	  if (! insn
6669	      || GET_CODE (insn) != INSN
6670	      || (pat = PATTERN (insn)) == 0
6671	      || GET_CODE (pat) != SET)
6672	    continue;
6673
6674	  this_mode = GET_MODE (SET_DEST (pat));
6675	  if ((this_mode != SFmode
6676	       && this_mode != DFmode)
6677	      || this_mode == fpmode)
6678	    continue;
6679
6680	  /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
6681	     we will get a stall.  Loads and stores are independant
6682	     of these rules.  */
6683	  if (GET_CODE (SET_SRC (pat)) != ABS
6684	      && GET_CODE (SET_SRC (pat)) != NEG
6685	      && ((TMASK (get_attr_type (insn)) &
6686		   (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
6687		    TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
6688                    TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
6689	    return 1;
6690	}
6691      hist_lim--;
6692      hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
6693    }
6694
6695  /* No conflicts, safe to dispatch.  */
6696  return 0;
6697}
6698
6699/* Find an instruction in LIST which has one of the
6700   type attributes enumerated in TYPE_MASK.  START
6701   says where to begin the search.
6702
6703   NOTE: This scheme depends upon the fact that we
6704         have less than 32 distinct type attributes.  */
6705
6706static int ultra_types_avail;
6707
6708static rtx *
6709ultra_find_type (type_mask, list, start)
6710     int type_mask;
6711     rtx *list;
6712     int start;
6713{
6714  int i;
6715
6716  /* Short circuit if no such insn exists in the ready
6717     at the moment.  */
6718  if ((type_mask & ultra_types_avail) == 0)
6719    return 0;
6720
6721  for (i = start; i >= 0; i--)
6722    {
6723      rtx insn = list[i];
6724
6725      if (recog_memoized (insn) >= 0
6726	  && (TMASK(get_attr_type (insn)) & type_mask))
6727	{
6728	  enum machine_mode fpmode = SFmode;
6729	  rtx pat = 0;
6730	  int slot;
6731	  int check_depend = 0;
6732	  int check_fpmode_conflict = 0;
6733
6734	  if (GET_CODE (insn) == INSN
6735	      && (pat = PATTERN(insn)) != 0
6736	      && GET_CODE (pat) == SET
6737	      && !(type_mask & (TMASK (TYPE_STORE) |
6738				TMASK (TYPE_FPSTORE))))
6739	    {
6740	      check_depend = 1;
6741	      if (GET_MODE (SET_DEST (pat)) == SFmode
6742		  || GET_MODE (SET_DEST (pat)) == DFmode)
6743		{
6744		  fpmode = GET_MODE (SET_DEST (pat));
6745		  check_fpmode_conflict = 1;
6746		}
6747	    }
6748
6749	  slot = 4;
6750	  while(--slot >= 0)
6751	    {
6752	      rtx slot_insn = ultra_pipe.group[slot];
6753	      rtx slot_pat;
6754
6755	      /* Already issued, bad dependency, or FPU
6756		 mode conflict.  */
6757	      if (slot_insn != 0
6758		  && (slot_pat = PATTERN (slot_insn)) != 0
6759		  && ((insn == slot_insn)
6760		      || (check_depend == 1
6761			  && GET_CODE (slot_insn) == INSN
6762			  && GET_CODE (slot_pat) == SET
6763			  && ((GET_CODE (SET_DEST (slot_pat)) == REG
6764			       && GET_CODE (SET_SRC (pat)) == REG
6765			       && REGNO (SET_DEST (slot_pat)) ==
6766			            REGNO (SET_SRC (pat)))
6767			      || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
6768				  && GET_CODE (SET_SRC (pat)) == SUBREG
6769				  && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
6770				       REGNO (SUBREG_REG (SET_SRC (pat)))
6771				  && SUBREG_WORD (SET_DEST (slot_pat)) ==
6772				       SUBREG_WORD (SET_SRC (pat)))))
6773		      || (check_fpmode_conflict == 1
6774			  && GET_CODE (slot_insn) == INSN
6775			  && GET_CODE (slot_pat) == SET
6776			  && (GET_MODE (SET_DEST (slot_pat)) == SFmode
6777			      || GET_MODE (SET_DEST (slot_pat)) == DFmode)
6778			  && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
6779		goto next;
6780	    }
6781
6782	  /* Check for peculiar result availability and dispatch
6783	     interference situations.  */
6784	  if (pat != 0
6785	      && ultra_cycles_elapsed > 0)
6786	    {
6787	      rtx link;
6788
6789	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6790		{
6791		  rtx link_insn = XEXP (link, 0);
6792		  if (GET_CODE (link_insn) == INSN
6793		      && recog_memoized (link_insn) >= 0
6794		      && (TMASK (get_attr_type (link_insn)) &
6795			  (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
6796		      && ! ultra_cmove_results_ready_p (link_insn))
6797		    goto next;
6798		}
6799
6800	      if (check_fpmode_conflict
6801		  && ultra_fpmode_conflict_exists (fpmode))
6802		goto next;
6803	    }
6804
6805	  return &list[i];
6806	}
6807    next:
6808      ;
6809    }
6810  return 0;
6811}
6812
6813static void
6814ultra_build_types_avail (ready, n_ready)
6815  rtx *ready;
6816  int n_ready;
6817{
6818  int i = n_ready - 1;
6819
6820  ultra_types_avail = 0;
6821  while(i >= 0)
6822    {
6823      rtx insn = ready[i];
6824
6825      if (recog_memoized (insn) >= 0)
6826	ultra_types_avail |= TMASK (get_attr_type (insn));
6827
6828      i -= 1;
6829    }
6830}
6831
6832/* Place insn pointed to my IP into the pipeline.
6833   Make element THIS of READY be that insn if it
6834   is not already.  TYPE indicates the pipeline class
6835   this insn falls into.  */
6836static void
6837ultra_schedule_insn (ip, ready, this, type)
6838     rtx *ip;
6839     rtx *ready;
6840     int this;
6841     enum ultra_code type;
6842{
6843  int pipe_slot;
6844  char mask = ultra_pipe.free_slot_mask;
6845
6846  /* Obtain free slot.  */
6847  for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
6848    if ((mask & (1 << pipe_slot)) != 0)
6849      break;
6850  if (pipe_slot == 4)
6851    abort ();
6852
6853  /* In it goes, and it hasn't been committed yet.  */
6854  ultra_pipe.group[pipe_slot] = *ip;
6855  ultra_pipe.codes[pipe_slot] = type;
6856  ultra_pipe.contents[type] = 1;
6857  if (UMASK (type) &
6858      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6859    ultra_pipe.num_ieu_insns += 1;
6860
6861  ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
6862  ultra_pipe.group_size += 1;
6863  ultra_pipe.commit[pipe_slot] = 0;
6864
6865  /* Update ready list.  */
6866  if (ip != &ready[this])
6867    {
6868      rtx temp = *ip;
6869
6870      *ip = ready[this];
6871      ready[this] = temp;
6872    }
6873}
6874
6875/* Advance to the next pipeline group.  */
6876static void
6877ultra_flush_pipeline ()
6878{
6879  ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
6880  ultra_cycles_elapsed += 1;
6881  bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6882  ultra_pipe.free_slot_mask = 0xf;
6883}
6884
6885static int ultra_reorder_called_this_block;
6886
6887/* Init our data structures for this current block.  */
6888void
6889ultrasparc_sched_init (dump, sched_verbose)
6890     FILE *dump ATTRIBUTE_UNUSED;
6891     int sched_verbose ATTRIBUTE_UNUSED;
6892{
6893  bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
6894  ultra_cur_hist = 0;
6895  ultra_cycles_elapsed = 0;
6896  ultra_reorder_called_this_block = 0;
6897  ultra_pipe.free_slot_mask = 0xf;
6898}
6899
6900/* INSN has been scheduled, update pipeline commit state
6901   and return how many instructions are still to be
6902   scheduled in this group.  */
6903int
6904ultrasparc_variable_issue (insn)
6905     rtx insn;
6906{
6907  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6908  int i, left_to_fire;
6909
6910  left_to_fire = 0;
6911  for (i = 0; i < 4; i++)
6912    {
6913      if (up->group[i] == 0)
6914	continue;
6915
6916      if (up->group[i] == insn)
6917	{
6918	  up->commit[i] = 1;
6919	}
6920      else if (! up->commit[i])
6921	left_to_fire++;
6922    }
6923
6924  return left_to_fire;
6925}
6926
6927/* In actual_hazard_this_instance, we may have yanked some
6928   instructions from the ready list due to conflict cost
6929   adjustments.  If so, and such an insn was in our pipeline
6930   group, remove it and update state.  */
6931static void
6932ultra_rescan_pipeline_state (ready, n_ready)
6933     rtx *ready;
6934     int n_ready;
6935{
6936  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6937  int i;
6938
6939  for (i = 0; i < 4; i++)
6940    {
6941      rtx insn = up->group[i];
6942      int j;
6943
6944      if (! insn)
6945	continue;
6946
6947      /* If it has been committed, then it was removed from
6948	 the ready list because it was actually scheduled,
6949	 and that is not the case we are searching for here.  */
6950      if (up->commit[i] != 0)
6951	continue;
6952
6953      for (j = n_ready - 1; j >= 0; j--)
6954	if (ready[j] == insn)
6955	  break;
6956
6957      /* If we didn't find it, toss it.  */
6958      if (j < 0)
6959	{
6960	  enum ultra_code ucode = up->codes[i];
6961
6962	  up->group[i] = 0;
6963	  up->codes[i] = NONE;
6964	  up->contents[ucode] = 0;
6965	  if (UMASK (ucode) &
6966	      (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6967	    up->num_ieu_insns -= 1;
6968
6969	  up->free_slot_mask |= (1 << i);
6970	  up->group_size -= 1;
6971	  up->commit[i] = 0;
6972	}
6973    }
6974}
6975
6976void
6977ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
6978     FILE *dump;
6979     int sched_verbose;
6980     rtx *ready;
6981     int n_ready;
6982{
6983  struct ultrasparc_pipeline_state *up = &ultra_pipe;
6984  int i, this_insn;
6985
6986  /* We get called once unnecessarily per block of insns
6987     scheduled.  */
6988  if (ultra_reorder_called_this_block == 0)
6989    {
6990      ultra_reorder_called_this_block = 1;
6991      return;
6992    }
6993
6994  if (sched_verbose)
6995    {
6996      int n;
6997
6998      fprintf (dump, "\n;;\tUltraSPARC Looking at [");
6999      for (n = n_ready - 1; n >= 0; n--)
7000	{
7001	  rtx insn = ready[n];
7002	  enum ultra_code ucode;
7003
7004	  if (recog_memoized (insn) < 0)
7005	    continue;
7006	  ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
7007	  if (n != 0)
7008	    fprintf (dump, "%s(%d) ",
7009		     ultra_code_names[ucode],
7010		     INSN_UID (insn));
7011	  else
7012	    fprintf (dump, "%s(%d)",
7013		     ultra_code_names[ucode],
7014		     INSN_UID (insn));
7015	}
7016      fprintf (dump, "]\n");
7017    }
7018
7019  this_insn = n_ready - 1;
7020
7021  /* Skip over junk we don't understand.  */
7022  while ((this_insn >= 0)
7023	 && recog_memoized (ready[this_insn]) < 0)
7024    this_insn--;
7025
7026  ultra_build_types_avail (ready, this_insn + 1);
7027
7028  while (this_insn >= 0) {
7029    int old_group_size = up->group_size;
7030
7031    if (up->group_size != 0)
7032      {
7033	int num_committed;
7034
7035	num_committed = (up->commit[0] + up->commit[1] +
7036			 up->commit[2] + up->commit[3]);
7037	/* If nothing has been commited from our group, or all of
7038	   them have.  Clear out the (current cycle's) pipeline
7039	   state and start afresh.  */
7040	if (num_committed == 0
7041	    || num_committed == up->group_size)
7042	  {
7043	    ultra_flush_pipeline ();
7044	    up = &ultra_pipe;
7045	    old_group_size = 0;
7046	  }
7047	else
7048	  {
7049	    /* OK, some ready list insns got requeued and thus removed
7050	       from the ready list.  Account for this fact.  */
7051	    ultra_rescan_pipeline_state (ready, n_ready);
7052
7053	    /* Something "changed", make this look like a newly
7054	       formed group so the code at the end of the loop
7055	       knows that progress was in fact made.  */
7056	    if (up->group_size != old_group_size)
7057	      old_group_size = 0;
7058	  }
7059      }
7060
7061    if (up->group_size == 0)
7062      {
7063	/* If the pipeline is (still) empty and we have any single
7064	   group insns, get them out now as this is a good time.  */
7065	rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
7066				    TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7067				    TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7068				   ready, this_insn);
7069	if (ip)
7070	  {
7071	    ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7072	    break;
7073	  }
7074
7075	/* If we are not in the process of emptying out the pipe, try to
7076	   obtain an instruction which must be the first in it's group.  */
7077	ip = ultra_find_type ((TMASK (TYPE_CALL) |
7078			       TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7079			       TMASK (TYPE_UNCOND_BRANCH)),
7080			      ready, this_insn);
7081	if (ip)
7082	  {
7083	    ultra_schedule_insn (ip, ready, this_insn, IEU1);
7084	    this_insn--;
7085	  }
7086	else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7087					 TMASK (TYPE_FPDIVD) |
7088					 TMASK (TYPE_FPSQRT)),
7089					ready, this_insn)) != 0)
7090	  {
7091	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7092	    this_insn--;
7093	  }
7094      }
7095
7096    /* Try to fill the integer pipeline.  First, look for an IEU0 specific
7097       operation.  We can't do more IEU operations if the first 3 slots are
7098       all full or we have dispatched two IEU insns already.  */
7099    if ((up->free_slot_mask & 0x7) != 0
7100	&& up->num_ieu_insns < 2
7101	&& up->contents[IEU0] == 0
7102	&& up->contents[IEUN] == 0)
7103      {
7104	rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7105	if (ip)
7106	  {
7107	    ultra_schedule_insn (ip, ready, this_insn, IEU0);
7108	    this_insn--;
7109	  }
7110      }
7111
7112    /* If we can, try to find an IEU1 specific or an unnamed
7113       IEU instruction.  */
7114    if ((up->free_slot_mask & 0x7) != 0
7115	&& up->num_ieu_insns < 2)
7116      {
7117	rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7118				    TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
7119				    (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7120				   ready, this_insn);
7121	if (ip)
7122	  {
7123	    rtx insn = *ip;
7124
7125	    ultra_schedule_insn (ip, ready, this_insn,
7126				 (!up->contents[IEU1]
7127				  && get_attr_type (insn) == TYPE_COMPARE)
7128				 ? IEU1 : IEUN);
7129	    this_insn--;
7130	  }
7131      }
7132
7133    /* If only one IEU insn has been found, try to find another unnamed
7134       IEU operation or an IEU1 specific one.  */
7135    if ((up->free_slot_mask & 0x7) != 0
7136	&& up->num_ieu_insns < 2)
7137      {
7138	rtx *ip;
7139	int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7140		     TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
7141
7142	if (!up->contents[IEU1])
7143	  tmask |= TMASK (TYPE_COMPARE);
7144	ip = ultra_find_type (tmask, ready, this_insn);
7145	if (ip)
7146	  {
7147	    rtx insn = *ip;
7148
7149	    ultra_schedule_insn (ip, ready, this_insn,
7150				 (!up->contents[IEU1]
7151				  && get_attr_type (insn) == TYPE_COMPARE)
7152				 ? IEU1 : IEUN);
7153	    this_insn--;
7154	  }
7155      }
7156
7157    /* Try for a load or store, but such an insn can only be issued
7158       if it is within' one of the first 3 slots.  */
7159    if ((up->free_slot_mask & 0x7) != 0
7160        && up->contents[LSU] == 0)
7161      {
7162	rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7163				   TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7164				   TMASK (TYPE_FPSTORE)), ready, this_insn);
7165	if (ip)
7166	  {
7167	    ultra_schedule_insn (ip, ready, this_insn, LSU);
7168	    this_insn--;
7169	  }
7170      }
7171
7172    /* Now find FPU operations, first FPM class.  But not divisions or
7173       square-roots because those will break the group up.  Unlike all
7174       the previous types, these can go in any slot.  */
7175    if (up->free_slot_mask != 0
7176	&& up->contents[FPM] == 0)
7177      {
7178	rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
7179	if (ip)
7180	  {
7181	    ultra_schedule_insn (ip, ready, this_insn, FPM);
7182	    this_insn--;
7183	  }
7184      }
7185
7186    /* Continue on with FPA class if we have not filled the group already.  */
7187    if (up->free_slot_mask != 0
7188	&& up->contents[FPA] == 0)
7189      {
7190	rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7191				    TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
7192				   ready, this_insn);
7193	if (ip)
7194	  {
7195	    ultra_schedule_insn (ip, ready, this_insn, FPA);
7196	    this_insn--;
7197	  }
7198      }
7199
7200    /* Finally, maybe stick a branch in here.  */
7201    if (up->free_slot_mask != 0
7202	&& up->contents[CTI] == 0)
7203      {
7204	rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
7205
7206	/* Try to slip in a branch only if it is one of the
7207	   next 2 in the ready list.  */
7208	if (ip && ((&ready[this_insn] - ip) < 2))
7209	  {
7210	    ultra_schedule_insn (ip, ready, this_insn, CTI);
7211	    this_insn--;
7212	  }
7213      }
7214
7215    up->group_size = 0;
7216    for (i = 0; i < 4; i++)
7217      if ((up->free_slot_mask & (1 << i)) == 0)
7218	up->group_size++;
7219
7220    /* See if we made any progress...  */
7221    if (old_group_size != up->group_size)
7222      break;
7223
7224    /* Clean out the (current cycle's) pipeline state
7225       and try once more.  If we placed no instructions
7226       into the pipeline at all, it means a real hard
7227       conflict exists with some earlier issued instruction
7228       so we must advance to the next cycle to clear it up.  */
7229    if (up->group_size == 0)
7230      {
7231	ultra_flush_pipeline ();
7232	up = &ultra_pipe;
7233      }
7234    else
7235      {
7236	bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7237	ultra_pipe.free_slot_mask = 0xf;
7238      }
7239  }
7240
7241  if (sched_verbose)
7242    {
7243      int n, gsize;
7244
7245      fprintf (dump, ";;\tUltraSPARC Launched   [");
7246      gsize = up->group_size;
7247      for (n = 0; n < 4; n++)
7248	{
7249	  rtx insn = up->group[n];
7250
7251	  if (! insn)
7252	    continue;
7253
7254	  gsize -= 1;
7255	  if (gsize != 0)
7256	    fprintf (dump, "%s(%d) ",
7257		     ultra_code_names[up->codes[n]],
7258		     INSN_UID (insn));
7259	  else
7260	    fprintf (dump, "%s(%d)",
7261		     ultra_code_names[up->codes[n]],
7262		     INSN_UID (insn));
7263	}
7264      fprintf (dump, "]\n");
7265    }
7266}
7267
7268int
7269sparc_issue_rate ()
7270{
7271  switch (sparc_cpu)
7272    {
7273    default:
7274      return 1;
7275    case PROCESSOR_V9:
7276      /* Assume V9 processors are capable of at least dual-issue.  */
7277      return 2;
7278    case PROCESSOR_SUPERSPARC:
7279      return 3;
7280    case PROCESSOR_HYPERSPARC:
7281    case PROCESSOR_SPARCLITE86X:
7282      return 2;
7283    case PROCESSOR_ULTRASPARC:
7284      return 4;
7285    }
7286}
7287
7288static int
7289set_extends(x, insn)
7290     rtx x, insn;
7291{
7292  register rtx pat = PATTERN (insn);
7293
7294  switch (GET_CODE (SET_SRC (pat)))
7295    {
7296      /* Load and some shift instructions zero extend. */
7297    case MEM:
7298    case ZERO_EXTEND:
7299      /* sethi clears the high bits */
7300    case HIGH:
7301      /* LO_SUM is used with sethi.  sethi cleared the high
7302	 bits and the values used with lo_sum are positive */
7303    case LO_SUM:
7304      /* Store flag stores 0 or 1 */
7305    case LT: case LTU:
7306    case GT: case GTU:
7307    case LE: case LEU:
7308    case GE: case GEU:
7309    case EQ:
7310    case NE:
7311      return 1;
7312    case AND:
7313      {
7314	rtx op1 = XEXP (SET_SRC (pat), 1);
7315	if (GET_CODE (op1) == CONST_INT)
7316	  return INTVAL (op1) >= 0;
7317	if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7318	    && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7319	  return 1;
7320	if (GET_CODE (op1) == REG
7321	    && sparc_check_64 ((op1), insn) == 1)
7322	  return 1;
7323      }
7324    case ASHIFT:
7325    case LSHIFTRT:
7326      return GET_MODE (SET_SRC (pat)) == SImode;
7327      /* Positive integers leave the high bits zero. */
7328    case CONST_DOUBLE:
7329      return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7330    case CONST_INT:
7331      return ! (INTVAL (x) & 0x80000000);
7332    case ASHIFTRT:
7333    case SIGN_EXTEND:
7334      return - (GET_MODE (SET_SRC (pat)) == SImode);
7335    default:
7336      return 0;
7337    }
7338}
7339
7340/* We _ought_ to have only one kind per function, but... */
7341static rtx sparc_addr_diff_list;
7342static rtx sparc_addr_list;
7343
7344void
7345sparc_defer_case_vector (lab, vec, diff)
7346     rtx lab, vec;
7347     int diff;
7348{
7349  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7350  if (diff)
7351    sparc_addr_diff_list
7352      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7353  else
7354    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7355}
7356
7357static void
7358sparc_output_addr_vec (vec)
7359     rtx vec;
7360{
7361  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7362  int idx, vlen = XVECLEN (body, 0);
7363
7364#ifdef ASM_OUTPUT_ADDR_VEC_START
7365  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7366#endif
7367
7368#ifdef ASM_OUTPUT_CASE_LABEL
7369  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7370			 NEXT_INSN (lab));
7371#else
7372  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7373#endif
7374
7375  for (idx = 0; idx < vlen; idx++)
7376    {
7377      ASM_OUTPUT_ADDR_VEC_ELT
7378	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7379    }
7380
7381#ifdef ASM_OUTPUT_ADDR_VEC_END
7382  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7383#endif
7384}
7385
7386static void
7387sparc_output_addr_diff_vec (vec)
7388     rtx vec;
7389{
7390  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7391  rtx base = XEXP (XEXP (body, 0), 0);
7392  int idx, vlen = XVECLEN (body, 1);
7393
7394#ifdef ASM_OUTPUT_ADDR_VEC_START
7395  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7396#endif
7397
7398#ifdef ASM_OUTPUT_CASE_LABEL
7399  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7400			 NEXT_INSN (lab));
7401#else
7402  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7403#endif
7404
7405  for (idx = 0; idx < vlen; idx++)
7406    {
7407      ASM_OUTPUT_ADDR_DIFF_ELT
7408        (asm_out_file,
7409         body,
7410         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7411         CODE_LABEL_NUMBER (base));
7412    }
7413
7414#ifdef ASM_OUTPUT_ADDR_VEC_END
7415  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7416#endif
7417}
7418
7419static void
7420sparc_output_deferred_case_vectors ()
7421{
7422  rtx t;
7423  int align;
7424
7425  if (sparc_addr_list == NULL_RTX
7426      && sparc_addr_diff_list == NULL_RTX)
7427    return;
7428
7429  /* Align to cache line in the function's code section.  */
7430  function_section (current_function_decl);
7431
7432  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7433  if (align > 0)
7434    ASM_OUTPUT_ALIGN (asm_out_file, align);
7435
7436  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7437    sparc_output_addr_vec (XEXP (t, 0));
7438  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7439    sparc_output_addr_diff_vec (XEXP (t, 0));
7440
7441  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7442}
7443
7444/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7445   unknown.  Return 1 if the high bits are zero, -1 if the register is
7446   sign extended.  */
7447int
7448sparc_check_64 (x, insn)
7449     rtx x, insn;
7450{
7451  /* If a register is set only once it is safe to ignore insns this
7452     code does not know how to handle.  The loop will either recognize
7453     the single set and return the correct value or fail to recognize
7454     it and return 0.  */
7455  int set_once = 0;
7456
7457  if (GET_CODE (x) == REG
7458      && flag_expensive_optimizations
7459      && REG_N_SETS (REGNO (x)) == 1)
7460    set_once = 1;
7461
7462  if (insn == 0)
7463    {
7464      if (set_once)
7465	insn = get_last_insn_anywhere ();
7466      else
7467	return 0;
7468    }
7469
7470  while ((insn = PREV_INSN (insn)))
7471    {
7472      switch (GET_CODE (insn))
7473	{
7474	case JUMP_INSN:
7475	case NOTE:
7476	  break;
7477	case CODE_LABEL:
7478	case CALL_INSN:
7479	default:
7480	  if (! set_once)
7481	    return 0;
7482	  break;
7483	case INSN:
7484	  {
7485	    rtx pat = PATTERN (insn);
7486	    if (GET_CODE (pat) != SET)
7487	      return 0;
7488	    if (rtx_equal_p (x, SET_DEST (pat)))
7489	      return set_extends (x, insn);
7490	    if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7491	      return 0;
7492	  }
7493	}
7494    }
7495  return 0;
7496}
7497
7498char *
7499sparc_v8plus_shift (operands, insn, opcode)
7500     rtx *operands;
7501     rtx insn;
7502     char *opcode;
7503{
7504  static char asm_code[60];
7505
7506  if (GET_CODE (operands[3]) == SCRATCH)
7507    operands[3] = operands[0];
7508  if (GET_CODE (operands[1]) == CONST_INT)
7509    {
7510      output_asm_insn ("mov %1,%3", operands);
7511    }
7512  else
7513    {
7514      output_asm_insn ("sllx %H1,32,%3", operands);
7515      if (sparc_check_64 (operands[1], insn) <= 0)
7516	output_asm_insn ("srl %L1,0,%L1", operands);
7517      output_asm_insn ("or %L1,%3,%3", operands);
7518    }
7519
7520  strcpy(asm_code, opcode);
7521  if (which_alternative != 2)
7522    return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7523  else
7524    return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7525}
7526
7527
7528/* Return 1 if DEST and SRC reference only global and in registers. */
7529
7530int
7531sparc_return_peephole_ok (dest, src)
7532     rtx dest, src;
7533{
7534  if (! TARGET_V9)
7535    return 0;
7536  if (current_function_uses_only_leaf_regs)
7537    return 0;
7538  if (GET_CODE (src) != CONST_INT
7539      && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7540    return 0;
7541  return IN_OR_GLOBAL_P (dest);
7542}
7543
7544/* Output assembler code to FILE to increment profiler label # LABELNO
7545   for profiling a function entry.
7546
7547   32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
7548   during profiling so we need to save/restore it around the call to mcount.
7549   We're guaranteed that a save has just been done, and we use the space
7550   allocated for intreg/fpreg value passing.  */
7551
7552void
7553sparc_function_profiler (file, labelno)
7554     FILE *file;
7555     int labelno;
7556{
7557  char buf[32];
7558  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7559
7560  if (! TARGET_ARCH64)
7561    fputs ("\tst\t%g2,[%fp-4]\n", file);
7562
7563  fputs ("\tsethi\t%hi(", file);
7564  assemble_name (file, buf);
7565  fputs ("),%o0\n", file);
7566
7567  fputs ("\tcall\t", file);
7568  assemble_name (file, MCOUNT_FUNCTION);
7569  putc ('\n', file);
7570
7571  fputs ("\t or\t%o0,%lo(", file);
7572  assemble_name (file, buf);
7573  fputs ("),%o0\n", file);
7574
7575  if (! TARGET_ARCH64)
7576    fputs ("\tld\t[%fp-4],%g2\n", file);
7577}
7578
7579
7580/* The following macro shall output assembler code to FILE
7581   to initialize basic-block profiling.
7582
7583   If profile_block_flag == 2
7584
7585	Output code to call the subroutine `__bb_init_trace_func'
7586	and pass two parameters to it. The first parameter is
7587	the address of a block allocated in the object module.
7588	The second parameter is the number of the first basic block
7589	of the function.
7590
7591	The name of the block is a local symbol made with this statement:
7592
7593	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7594
7595	Of course, since you are writing the definition of
7596	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7597	can take a short cut in the definition of this macro and use the
7598	name that you know will result.
7599
7600	The number of the first basic block of the function is
7601	passed to the macro in BLOCK_OR_LABEL.
7602
7603	If described in a virtual assembler language the code to be
7604	output looks like:
7605
7606		parameter1 <- LPBX0
7607		parameter2 <- BLOCK_OR_LABEL
7608		call __bb_init_trace_func
7609
7610    else if profile_block_flag != 0
7611
7612	Output code to call the subroutine `__bb_init_func'
7613	and pass one single parameter to it, which is the same
7614	as the first parameter to `__bb_init_trace_func'.
7615
7616	The first word of this parameter is a flag which will be nonzero if
7617	the object module has already been initialized.  So test this word
7618	first, and do not call `__bb_init_func' if the flag is nonzero.
7619	Note: When profile_block_flag == 2 the test need not be done
7620	but `__bb_init_trace_func' *must* be called.
7621
7622	BLOCK_OR_LABEL may be used to generate a label number as a
7623	branch destination in case `__bb_init_func' will not be called.
7624
7625	If described in a virtual assembler language the code to be
7626	output looks like:
7627
7628		cmp (LPBX0),0
7629		jne local_label
7630		parameter1 <- LPBX0
7631		call __bb_init_func
7632	    local_label:
7633
7634*/
7635
7636void
7637sparc_function_block_profiler(file, block_or_label)
7638     FILE *file;
7639     int block_or_label;
7640{
7641  char LPBX[32];
7642  ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7643
7644  if (profile_block_flag == 2)
7645    {
7646      fputs ("\tsethi\t%hi(", file);
7647      assemble_name (file, LPBX);
7648      fputs ("),%o0\n", file);
7649
7650      fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
7651
7652      fputs ("\tor\t%o0,%lo(", file);
7653      assemble_name (file, LPBX);
7654      fputs ("),%o0\n", file);
7655
7656      fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
7657
7658      fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
7659    }
7660  else if (profile_block_flag != 0)
7661    {
7662      char LPBY[32];
7663      ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
7664
7665      fputs ("\tsethi\t%hi(", file);
7666      assemble_name (file, LPBX);
7667      fputs ("),%o0\n", file);
7668
7669      fputs ("\tld\t[%lo(", file);
7670      assemble_name (file, LPBX);
7671      fputs (")+%o0],%o1\n", file);
7672
7673      fputs ("\ttst\t%o1\n", file);
7674
7675      if (TARGET_V9)
7676	{
7677	  fputs ("\tbne,pn\t%icc,", file);
7678	  assemble_name (file, LPBY);
7679	  putc ('\n', file);
7680	}
7681      else
7682	{
7683	  fputs ("\tbne\t", file);
7684	  assemble_name (file, LPBY);
7685	  putc ('\n', file);
7686	}
7687
7688      fputs ("\t or\t%o0,%lo(", file);
7689      assemble_name (file, LPBX);
7690      fputs ("),%o0\n", file);
7691
7692      fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
7693
7694      ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
7695    }
7696}
7697
7698/* The following macro shall output assembler code to FILE
7699   to increment a counter associated with basic block number BLOCKNO.
7700
7701   If profile_block_flag == 2
7702
7703	Output code to initialize the global structure `__bb' and
7704	call the function `__bb_trace_func' which will increment the
7705	counter.
7706
7707	`__bb' consists of two words. In the first word the number
7708	of the basic block has to be stored. In the second word
7709	the address of a block allocated in the object module
7710	has to be stored.
7711
7712	The basic block number is given by BLOCKNO.
7713
7714	The address of the block is given by the label created with
7715
7716	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7717
7718	by FUNCTION_BLOCK_PROFILER.
7719
7720	Of course, since you are writing the definition of
7721	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7722	can take a short cut in the definition of this macro and use the
7723	name that you know will result.
7724
7725	If described in a virtual assembler language the code to be
7726	output looks like:
7727
7728		move BLOCKNO -> (__bb)
7729		move LPBX0 -> (__bb+4)
7730		call __bb_trace_func
7731
7732	Note that function `__bb_trace_func' must not change the
7733	machine state, especially the flag register. To grant
7734	this, you must output code to save and restore registers
7735	either in this macro or in the macros MACHINE_STATE_SAVE
7736	and MACHINE_STATE_RESTORE. The last two macros will be
7737	used in the function `__bb_trace_func', so you must make
7738	sure that the function prologue does not change any
7739	register prior to saving it with MACHINE_STATE_SAVE.
7740
7741   else if profile_block_flag != 0
7742
7743	Output code to increment the counter directly.
7744	Basic blocks are numbered separately from zero within each
7745	compiled object module. The count associated with block number
7746	BLOCKNO is at index BLOCKNO in an array of words; the name of
7747	this array is a local symbol made with this statement:
7748
7749	    ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
7750
7751	Of course, since you are writing the definition of
7752	`ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7753	can take a short cut in the definition of this macro and use the
7754	name that you know will result.
7755
7756	If described in a virtual assembler language, the code to be
7757	output looks like:
7758
7759		inc (LPBX2+4*BLOCKNO)
7760
7761*/
7762
7763void
7764sparc_block_profiler(file, blockno)
7765     FILE *file;
7766     int blockno;
7767{
7768  char LPBX[32];
7769
7770  if (profile_block_flag == 2)
7771    {
7772      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7773
7774      fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
7775      fprintf (file, "\tsethi\t%%hi(%d),%%g2\n", blockno);
7776      fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
7777      fprintf (file, "\tor\t%%g2,%%lo(%d),%%g2\n", blockno);
7778
7779      fputs ("\tst\t%g2,[%g1]\n", file);
7780
7781      fputs ("\tsethi\t%hi(", file);
7782      assemble_name (file, LPBX);
7783      fputs ("),%g2\n", file);
7784
7785      fputs ("\tor\t%g2,%lo(", file);
7786      assemble_name (file, LPBX);
7787      fputs ("),%g2\n", file);
7788
7789      fputs ("\tst\t%g2,[%g1+4]\n", file);
7790      fputs ("\tmov\t%o7,%g2\n", file);
7791
7792      fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
7793
7794      fputs ("\tmov\t%g2,%o7\n", file);
7795    }
7796  else if (profile_block_flag != 0)
7797    {
7798      ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
7799
7800      fputs ("\tsethi\t%hi(", file);
7801      assemble_name (file, LPBX);
7802      fprintf (file, "+%d),%%g1\n", blockno*4);
7803
7804      fputs ("\tld\t[%g1+%lo(", file);
7805      assemble_name (file, LPBX);
7806      fprintf (file, "+%d)],%%g2\n", blockno*4);
7807
7808      fputs ("\tadd\t%g2,1,%g2\n", file);
7809
7810      fputs ("\tst\t%g2,[%g1+%lo(", file);
7811      assemble_name (file, LPBX);
7812      fprintf (file, "+%d)]\n", blockno*4);
7813    }
7814}
7815
7816/* The following macro shall output assembler code to FILE
7817   to indicate a return from function during basic-block profiling.
7818
7819   If profile_block_flag == 2:
7820
7821	Output assembler code to call function `__bb_trace_ret'.
7822
7823	Note that function `__bb_trace_ret' must not change the
7824	machine state, especially the flag register. To grant
7825	this, you must output code to save and restore registers
7826	either in this macro or in the macros MACHINE_STATE_SAVE_RET
7827	and MACHINE_STATE_RESTORE_RET. The last two macros will be
7828	used in the function `__bb_trace_ret', so you must make
7829	sure that the function prologue does not change any
7830	register prior to saving it with MACHINE_STATE_SAVE_RET.
7831
7832   else if profile_block_flag != 0:
7833
7834	The macro will not be used, so it need not distinguish
7835	these cases.
7836*/
7837
7838void
7839sparc_function_block_profiler_exit(file)
7840     FILE *file;
7841{
7842  if (profile_block_flag == 2)
7843    fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
7844  else
7845    abort ();
7846}
7847