1/* Subroutines for insn-output.c for SPARC.
2   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4   Free Software Foundation, Inc.
5   Contributed by Michael Tiemann (tiemann@cygnus.com)
6   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7   at Cygnus Support.
8
9This file is part of GCC.
10
11GCC is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 3, or (at your option)
14any later version.
15
16GCC is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GCC; see the file COPYING3.  If not see
23<http://www.gnu.org/licenses/>.  */
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "tree.h"
30#include "rtl.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "insn-codes.h"
36#include "conditions.h"
37#include "output.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "function.h"
41#include "expr.h"
42#include "optabs.h"
43#include "recog.h"
44#include "toplev.h"
45#include "ggc.h"
46#include "tm_p.h"
47#include "debug.h"
48#include "target.h"
49#include "target-def.h"
50#include "cfglayout.h"
51#include "gimple.h"
52#include "langhooks.h"
53#include "params.h"
54#include "df.h"
55#include "dwarf2out.h"
56
57/* Processor costs */
58static const
59struct processor_costs cypress_costs = {
60  COSTS_N_INSNS (2), /* int load */
61  COSTS_N_INSNS (2), /* int signed load */
62  COSTS_N_INSNS (2), /* int zeroed load */
63  COSTS_N_INSNS (2), /* float load */
64  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
65  COSTS_N_INSNS (5), /* fadd, fsub */
66  COSTS_N_INSNS (1), /* fcmp */
67  COSTS_N_INSNS (1), /* fmov, fmovr */
68  COSTS_N_INSNS (7), /* fmul */
69  COSTS_N_INSNS (37), /* fdivs */
70  COSTS_N_INSNS (37), /* fdivd */
71  COSTS_N_INSNS (63), /* fsqrts */
72  COSTS_N_INSNS (63), /* fsqrtd */
73  COSTS_N_INSNS (1), /* imul */
74  COSTS_N_INSNS (1), /* imulX */
75  0, /* imul bit factor */
76  COSTS_N_INSNS (1), /* idiv */
77  COSTS_N_INSNS (1), /* idivX */
78  COSTS_N_INSNS (1), /* movcc/movr */
79  0, /* shift penalty */
80};
81
82static const
83struct processor_costs supersparc_costs = {
84  COSTS_N_INSNS (1), /* int load */
85  COSTS_N_INSNS (1), /* int signed load */
86  COSTS_N_INSNS (1), /* int zeroed load */
87  COSTS_N_INSNS (0), /* float load */
88  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
89  COSTS_N_INSNS (3), /* fadd, fsub */
90  COSTS_N_INSNS (3), /* fcmp */
91  COSTS_N_INSNS (1), /* fmov, fmovr */
92  COSTS_N_INSNS (3), /* fmul */
93  COSTS_N_INSNS (6), /* fdivs */
94  COSTS_N_INSNS (9), /* fdivd */
95  COSTS_N_INSNS (12), /* fsqrts */
96  COSTS_N_INSNS (12), /* fsqrtd */
97  COSTS_N_INSNS (4), /* imul */
98  COSTS_N_INSNS (4), /* imulX */
99  0, /* imul bit factor */
100  COSTS_N_INSNS (4), /* idiv */
101  COSTS_N_INSNS (4), /* idivX */
102  COSTS_N_INSNS (1), /* movcc/movr */
103  1, /* shift penalty */
104};
105
106static const
107struct processor_costs hypersparc_costs = {
108  COSTS_N_INSNS (1), /* int load */
109  COSTS_N_INSNS (1), /* int signed load */
110  COSTS_N_INSNS (1), /* int zeroed load */
111  COSTS_N_INSNS (1), /* float load */
112  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
113  COSTS_N_INSNS (1), /* fadd, fsub */
114  COSTS_N_INSNS (1), /* fcmp */
115  COSTS_N_INSNS (1), /* fmov, fmovr */
116  COSTS_N_INSNS (1), /* fmul */
117  COSTS_N_INSNS (8), /* fdivs */
118  COSTS_N_INSNS (12), /* fdivd */
119  COSTS_N_INSNS (17), /* fsqrts */
120  COSTS_N_INSNS (17), /* fsqrtd */
121  COSTS_N_INSNS (17), /* imul */
122  COSTS_N_INSNS (17), /* imulX */
123  0, /* imul bit factor */
124  COSTS_N_INSNS (17), /* idiv */
125  COSTS_N_INSNS (17), /* idivX */
126  COSTS_N_INSNS (1), /* movcc/movr */
127  0, /* shift penalty */
128};
129
130static const
131struct processor_costs sparclet_costs = {
132  COSTS_N_INSNS (3), /* int load */
133  COSTS_N_INSNS (3), /* int signed load */
134  COSTS_N_INSNS (1), /* int zeroed load */
135  COSTS_N_INSNS (1), /* float load */
136  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
137  COSTS_N_INSNS (1), /* fadd, fsub */
138  COSTS_N_INSNS (1), /* fcmp */
139  COSTS_N_INSNS (1), /* fmov, fmovr */
140  COSTS_N_INSNS (1), /* fmul */
141  COSTS_N_INSNS (1), /* fdivs */
142  COSTS_N_INSNS (1), /* fdivd */
143  COSTS_N_INSNS (1), /* fsqrts */
144  COSTS_N_INSNS (1), /* fsqrtd */
145  COSTS_N_INSNS (5), /* imul */
146  COSTS_N_INSNS (5), /* imulX */
147  0, /* imul bit factor */
148  COSTS_N_INSNS (5), /* idiv */
149  COSTS_N_INSNS (5), /* idivX */
150  COSTS_N_INSNS (1), /* movcc/movr */
151  0, /* shift penalty */
152};
153
154static const
155struct processor_costs ultrasparc_costs = {
156  COSTS_N_INSNS (2), /* int load */
157  COSTS_N_INSNS (3), /* int signed load */
158  COSTS_N_INSNS (2), /* int zeroed load */
159  COSTS_N_INSNS (2), /* float load */
160  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
161  COSTS_N_INSNS (4), /* fadd, fsub */
162  COSTS_N_INSNS (1), /* fcmp */
163  COSTS_N_INSNS (2), /* fmov, fmovr */
164  COSTS_N_INSNS (4), /* fmul */
165  COSTS_N_INSNS (13), /* fdivs */
166  COSTS_N_INSNS (23), /* fdivd */
167  COSTS_N_INSNS (13), /* fsqrts */
168  COSTS_N_INSNS (23), /* fsqrtd */
169  COSTS_N_INSNS (4), /* imul */
170  COSTS_N_INSNS (4), /* imulX */
171  2, /* imul bit factor */
172  COSTS_N_INSNS (37), /* idiv */
173  COSTS_N_INSNS (68), /* idivX */
174  COSTS_N_INSNS (2), /* movcc/movr */
175  2, /* shift penalty */
176};
177
178static const
179struct processor_costs ultrasparc3_costs = {
180  COSTS_N_INSNS (2), /* int load */
181  COSTS_N_INSNS (3), /* int signed load */
182  COSTS_N_INSNS (3), /* int zeroed load */
183  COSTS_N_INSNS (2), /* float load */
184  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
185  COSTS_N_INSNS (4), /* fadd, fsub */
186  COSTS_N_INSNS (5), /* fcmp */
187  COSTS_N_INSNS (3), /* fmov, fmovr */
188  COSTS_N_INSNS (4), /* fmul */
189  COSTS_N_INSNS (17), /* fdivs */
190  COSTS_N_INSNS (20), /* fdivd */
191  COSTS_N_INSNS (20), /* fsqrts */
192  COSTS_N_INSNS (29), /* fsqrtd */
193  COSTS_N_INSNS (6), /* imul */
194  COSTS_N_INSNS (6), /* imulX */
195  0, /* imul bit factor */
196  COSTS_N_INSNS (40), /* idiv */
197  COSTS_N_INSNS (71), /* idivX */
198  COSTS_N_INSNS (2), /* movcc/movr */
199  0, /* shift penalty */
200};
201
202static const
203struct processor_costs niagara_costs = {
204  COSTS_N_INSNS (3), /* int load */
205  COSTS_N_INSNS (3), /* int signed load */
206  COSTS_N_INSNS (3), /* int zeroed load */
207  COSTS_N_INSNS (9), /* float load */
208  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
209  COSTS_N_INSNS (8), /* fadd, fsub */
210  COSTS_N_INSNS (26), /* fcmp */
211  COSTS_N_INSNS (8), /* fmov, fmovr */
212  COSTS_N_INSNS (29), /* fmul */
213  COSTS_N_INSNS (54), /* fdivs */
214  COSTS_N_INSNS (83), /* fdivd */
215  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
216  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
217  COSTS_N_INSNS (11), /* imul */
218  COSTS_N_INSNS (11), /* imulX */
219  0, /* imul bit factor */
220  COSTS_N_INSNS (72), /* idiv */
221  COSTS_N_INSNS (72), /* idivX */
222  COSTS_N_INSNS (1), /* movcc/movr */
223  0, /* shift penalty */
224};
225
226static const
227struct processor_costs niagara2_costs = {
228  COSTS_N_INSNS (3), /* int load */
229  COSTS_N_INSNS (3), /* int signed load */
230  COSTS_N_INSNS (3), /* int zeroed load */
231  COSTS_N_INSNS (3), /* float load */
232  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
233  COSTS_N_INSNS (6), /* fadd, fsub */
234  COSTS_N_INSNS (6), /* fcmp */
235  COSTS_N_INSNS (6), /* fmov, fmovr */
236  COSTS_N_INSNS (6), /* fmul */
237  COSTS_N_INSNS (19), /* fdivs */
238  COSTS_N_INSNS (33), /* fdivd */
239  COSTS_N_INSNS (19), /* fsqrts */
240  COSTS_N_INSNS (33), /* fsqrtd */
241  COSTS_N_INSNS (5), /* imul */
242  COSTS_N_INSNS (5), /* imulX */
243  0, /* imul bit factor */
244  COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
245  COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
246  COSTS_N_INSNS (1), /* movcc/movr */
247  0, /* shift penalty */
248};
249
250const struct processor_costs *sparc_costs = &cypress_costs;
251
252#ifdef HAVE_AS_RELAX_OPTION
253/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
254   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
255   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
256   somebody does not branch between the sethi and jmp.  */
257#define LEAF_SIBCALL_SLOT_RESERVED_P 1
258#else
259#define LEAF_SIBCALL_SLOT_RESERVED_P \
260  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
261#endif
262
263/* Global variables for machine-dependent things.  */
264
265/* Size of frame.  Need to know this to emit return insns from leaf procedures.
266   ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
267   reload pass.  This is important as the value is later used for scheduling
268   (to see what can go in a delay slot).
269   APPARENT_FSIZE is the size of the stack less the register save area and less
270   the outgoing argument area.  It is used when saving call preserved regs.  */
271static HOST_WIDE_INT apparent_fsize;
272static HOST_WIDE_INT actual_fsize;
273
274/* Number of live general or floating point registers needed to be
275   saved (as 4-byte quantities).  */
276static int num_gfregs;
277
278/* The alias set for prologue/epilogue register save/restore.  */
279static GTY(()) alias_set_type sparc_sr_alias_set;
280
281/* The alias set for the structure return value.  */
282static GTY(()) alias_set_type struct_value_alias_set;
283
284/* Vector to say how input registers are mapped to output registers.
285   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
286   eliminate it.  You must use -fomit-frame-pointer to get that.  */
287char leaf_reg_remap[] =
288{ 0, 1, 2, 3, 4, 5, 6, 7,
289  -1, -1, -1, -1, -1, -1, 14, -1,
290  -1, -1, -1, -1, -1, -1, -1, -1,
291  8, 9, 10, 11, 12, 13, -1, 15,
292
293  32, 33, 34, 35, 36, 37, 38, 39,
294  40, 41, 42, 43, 44, 45, 46, 47,
295  48, 49, 50, 51, 52, 53, 54, 55,
296  56, 57, 58, 59, 60, 61, 62, 63,
297  64, 65, 66, 67, 68, 69, 70, 71,
298  72, 73, 74, 75, 76, 77, 78, 79,
299  80, 81, 82, 83, 84, 85, 86, 87,
300  88, 89, 90, 91, 92, 93, 94, 95,
301  96, 97, 98, 99, 100};
302
303/* Vector, indexed by hard register number, which contains 1
304   for a register that is allowable in a candidate for leaf
305   function treatment.  */
306char sparc_leaf_regs[] =
307{ 1, 1, 1, 1, 1, 1, 1, 1,
308  0, 0, 0, 0, 0, 0, 1, 0,
309  0, 0, 0, 0, 0, 0, 0, 0,
310  1, 1, 1, 1, 1, 1, 0, 1,
311  1, 1, 1, 1, 1, 1, 1, 1,
312  1, 1, 1, 1, 1, 1, 1, 1,
313  1, 1, 1, 1, 1, 1, 1, 1,
314  1, 1, 1, 1, 1, 1, 1, 1,
315  1, 1, 1, 1, 1, 1, 1, 1,
316  1, 1, 1, 1, 1, 1, 1, 1,
317  1, 1, 1, 1, 1, 1, 1, 1,
318  1, 1, 1, 1, 1, 1, 1, 1,
319  1, 1, 1, 1, 1};
320
321struct GTY(()) machine_function
322{
323  /* Some local-dynamic TLS symbol name.  */
324  const char *some_ld_name;
325
326  /* True if the current function is leaf and uses only leaf regs,
327     so that the SPARC leaf function optimization can be applied.
328     Private version of current_function_uses_only_leaf_regs, see
329     sparc_expand_prologue for the rationale.  */
330  int leaf_function_p;
331
332  /* True if the data calculated by sparc_expand_prologue are valid.  */
333  bool prologue_data_valid_p;
334};
335
336#define sparc_leaf_function_p  cfun->machine->leaf_function_p
337#define sparc_prologue_data_valid_p  cfun->machine->prologue_data_valid_p
338
339/* Register we pretend to think the frame pointer is allocated to.
340   Normally, this is %fp, but if we are in a leaf procedure, this
341   is %sp+"something".  We record "something" separately as it may
342   be too big for reg+constant addressing.  */
343static rtx frame_base_reg;
344static HOST_WIDE_INT frame_base_offset;
345
346/* 1 if the next opcode is to be specially indented.  */
347int sparc_indent_opcode = 0;
348
349static bool sparc_handle_option (size_t, const char *, int);
350static void sparc_init_modes (void);
351static void scan_record_type (tree, int *, int *, int *);
352static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
353				tree, int, int, int *, int *);
354
355static int supersparc_adjust_cost (rtx, rtx, rtx, int);
356static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
357
358static void sparc_output_addr_vec (rtx);
359static void sparc_output_addr_diff_vec (rtx);
360static void sparc_output_deferred_case_vectors (void);
361static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
362static rtx sparc_builtin_saveregs (void);
363static int epilogue_renumber (rtx *, int);
364static bool sparc_assemble_integer (rtx, unsigned int, int);
365static int set_extends (rtx);
366static void load_got_register (void);
367static int save_or_restore_regs (int, int, rtx, int, int);
368static void emit_save_or_restore_regs (int);
369static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
370static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
371static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
372						 tree) ATTRIBUTE_UNUSED;
373static int sparc_adjust_cost (rtx, rtx, rtx, int);
374static int sparc_issue_rate (void);
375static void sparc_sched_init (FILE *, int, int);
376static int sparc_use_sched_lookahead (void);
377
378static void emit_soft_tfmode_libcall (const char *, int, rtx *);
379static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
380static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
381static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
382static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
383
384static bool sparc_function_ok_for_sibcall (tree, tree);
385static void sparc_init_libfuncs (void);
386static void sparc_init_builtins (void);
387static void sparc_vis_init_builtins (void);
388static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
389static tree sparc_fold_builtin (tree, tree, bool);
390static int sparc_vis_mul8x16 (int, int);
391static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
392static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
393				   HOST_WIDE_INT, tree);
394static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
395				       HOST_WIDE_INT, const_tree);
396static struct machine_function * sparc_init_machine_status (void);
397static bool sparc_cannot_force_const_mem (rtx);
398static rtx sparc_tls_get_addr (void);
399static rtx sparc_tls_got (void);
400static const char *get_some_local_dynamic_name (void);
401static int get_some_local_dynamic_name_1 (rtx *, void *);
402static bool sparc_rtx_costs (rtx, int, int, int *, bool);
403static bool sparc_promote_prototypes (const_tree);
404static rtx sparc_struct_value_rtx (tree, int);
405static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
406						      int *, const_tree, int);
407static bool sparc_return_in_memory (const_tree, const_tree);
408static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
409static void sparc_va_start (tree, rtx);
410static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
411static bool sparc_vector_mode_supported_p (enum machine_mode);
412static bool sparc_tls_referenced_p (rtx);
413static rtx legitimize_tls_address (rtx);
414static rtx legitimize_pic_address (rtx, rtx);
415static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
416static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
417				     enum machine_mode, const_tree, bool);
418static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
419				    enum machine_mode, tree, bool);
420static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
421static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
422static void sparc_file_end (void);
423static bool sparc_frame_pointer_required (void);
424static bool sparc_can_eliminate (const int, const int);
425#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
426static const char *sparc_mangle_type (const_tree);
427#endif
428static void sparc_trampoline_init (rtx, tree, rtx);
429
430#ifdef SUBTARGET_ATTRIBUTE_TABLE
431/* Table of valid machine attributes.  */
432static const struct attribute_spec sparc_attribute_table[] =
433{
434  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
435  SUBTARGET_ATTRIBUTE_TABLE,
436  { NULL,        0, 0, false, false, false, NULL }
437};
438#endif
439
440/* Option handling.  */
441
442/* Parsed value.  */
443enum cmodel sparc_cmodel;
444
445char sparc_hard_reg_printed[8];
446
447struct sparc_cpu_select sparc_select[] =
448{
449  /* switch	name,		tune	arch */
450  { (char *)0,	"default",	1,	1 },
451  { (char *)0,	"-mcpu=",	1,	1 },
452  { (char *)0,	"-mtune=",	1,	0 },
453  { 0, 0, 0, 0 }
454};
455
456/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
457enum processor_type sparc_cpu;
458
459/* Whetheran FPU option was specified.  */
460static bool fpu_option_set = false;
461
462/* Initialize the GCC target structure.  */
463
464/* The default is to use .half rather than .short for aligned HI objects.  */
465#undef TARGET_ASM_ALIGNED_HI_OP
466#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
467
468#undef TARGET_ASM_UNALIGNED_HI_OP
469#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
470#undef TARGET_ASM_UNALIGNED_SI_OP
471#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
472#undef TARGET_ASM_UNALIGNED_DI_OP
473#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
474
475/* The target hook has to handle DI-mode values.  */
476#undef TARGET_ASM_INTEGER
477#define TARGET_ASM_INTEGER sparc_assemble_integer
478
479#undef TARGET_ASM_FUNCTION_PROLOGUE
480#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
481#undef TARGET_ASM_FUNCTION_EPILOGUE
482#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
483
484#undef TARGET_SCHED_ADJUST_COST
485#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
486#undef TARGET_SCHED_ISSUE_RATE
487#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
488#undef TARGET_SCHED_INIT
489#define TARGET_SCHED_INIT sparc_sched_init
490#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
491#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
492
493#undef TARGET_FUNCTION_OK_FOR_SIBCALL
494#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
495
496#undef TARGET_INIT_LIBFUNCS
497#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
498#undef TARGET_INIT_BUILTINS
499#define TARGET_INIT_BUILTINS sparc_init_builtins
500
501#undef TARGET_LEGITIMIZE_ADDRESS
502#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
503
504#undef TARGET_EXPAND_BUILTIN
505#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
506#undef TARGET_FOLD_BUILTIN
507#define TARGET_FOLD_BUILTIN sparc_fold_builtin
508
509#if TARGET_TLS
510#undef TARGET_HAVE_TLS
511#define TARGET_HAVE_TLS true
512#endif
513
514#undef TARGET_CANNOT_FORCE_CONST_MEM
515#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
516
517#undef TARGET_ASM_OUTPUT_MI_THUNK
518#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
519#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
520#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
521
522#undef TARGET_RTX_COSTS
523#define TARGET_RTX_COSTS sparc_rtx_costs
524#undef TARGET_ADDRESS_COST
525#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526
527#undef TARGET_PROMOTE_FUNCTION_MODE
528#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
529
530#undef TARGET_PROMOTE_PROTOTYPES
531#define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
532
533#undef TARGET_STRUCT_VALUE_RTX
534#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
535#undef TARGET_RETURN_IN_MEMORY
536#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
537#undef TARGET_MUST_PASS_IN_STACK
538#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
539#undef TARGET_PASS_BY_REFERENCE
540#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
541#undef TARGET_ARG_PARTIAL_BYTES
542#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
543
544#undef TARGET_EXPAND_BUILTIN_SAVEREGS
545#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
546#undef TARGET_STRICT_ARGUMENT_NAMING
547#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
548
549#undef TARGET_EXPAND_BUILTIN_VA_START
550#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
551#undef TARGET_GIMPLIFY_VA_ARG_EXPR
552#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
553
554#undef TARGET_VECTOR_MODE_SUPPORTED_P
555#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
556
557#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
558#define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
559
560#ifdef SUBTARGET_INSERT_ATTRIBUTES
561#undef TARGET_INSERT_ATTRIBUTES
562#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
563#endif
564
565#ifdef SUBTARGET_ATTRIBUTE_TABLE
566#undef TARGET_ATTRIBUTE_TABLE
567#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
568#endif
569
570#undef TARGET_RELAXED_ORDERING
571#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
572
573#undef TARGET_DEFAULT_TARGET_FLAGS
574#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
575#undef TARGET_HANDLE_OPTION
576#define TARGET_HANDLE_OPTION sparc_handle_option
577
578#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
579#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
580#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
581#endif
582
583#undef TARGET_ASM_FILE_END
584#define TARGET_ASM_FILE_END sparc_file_end
585
586#undef TARGET_FRAME_POINTER_REQUIRED
587#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
588
589#undef TARGET_CAN_ELIMINATE
590#define TARGET_CAN_ELIMINATE sparc_can_eliminate
591
592#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
593#undef TARGET_MANGLE_TYPE
594#define TARGET_MANGLE_TYPE sparc_mangle_type
595#endif
596
597#undef TARGET_LEGITIMATE_ADDRESS_P
598#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
599
600#undef TARGET_TRAMPOLINE_INIT
601#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
602
603struct gcc_target targetm = TARGET_INITIALIZER;
604
605/* Implement TARGET_HANDLE_OPTION.  */
606
607static bool
608sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
609{
610  switch (code)
611    {
612    case OPT_mfpu:
613    case OPT_mhard_float:
614    case OPT_msoft_float:
615      fpu_option_set = true;
616      break;
617
618    case OPT_mcpu_:
619      sparc_select[1].string = arg;
620      break;
621
622    case OPT_mtune_:
623      sparc_select[2].string = arg;
624      break;
625    }
626
627  return true;
628}
629
630/* Validate and override various options, and do some machine dependent
631   initialization.  */
632
633void
634sparc_override_options (void)
635{
636  static struct code_model {
637    const char *const name;
638    const enum cmodel value;
639  } const cmodels[] = {
640    { "32", CM_32 },
641    { "medlow", CM_MEDLOW },
642    { "medmid", CM_MEDMID },
643    { "medany", CM_MEDANY },
644    { "embmedany", CM_EMBMEDANY },
645    { NULL, (enum cmodel) 0 }
646  };
647  const struct code_model *cmodel;
648  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
649  static struct cpu_default {
650    const int cpu;
651    const char *const name;
652  } const cpu_default[] = {
653    /* There must be one entry here for each TARGET_CPU value.  */
654    { TARGET_CPU_sparc, "cypress" },
655    { TARGET_CPU_sparclet, "tsc701" },
656    { TARGET_CPU_sparclite, "f930" },
657    { TARGET_CPU_v8, "v8" },
658    { TARGET_CPU_hypersparc, "hypersparc" },
659    { TARGET_CPU_sparclite86x, "sparclite86x" },
660    { TARGET_CPU_supersparc, "supersparc" },
661    { TARGET_CPU_v9, "v9" },
662    { TARGET_CPU_ultrasparc, "ultrasparc" },
663    { TARGET_CPU_ultrasparc3, "ultrasparc3" },
664    { TARGET_CPU_niagara, "niagara" },
665    { TARGET_CPU_niagara2, "niagara2" },
666    { 0, 0 }
667  };
668  const struct cpu_default *def;
669  /* Table of values for -m{cpu,tune}=.  */
670  static struct cpu_table {
671    const char *const name;
672    const enum processor_type processor;
673    const int disable;
674    const int enable;
675  } const cpu_table[] = {
676    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
677    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
678    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
679    /* TI TMS390Z55 supersparc */
680    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
681    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
682    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
683       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
684    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
685    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
686    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
687    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
688      MASK_SPARCLITE },
689    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
690    /* TEMIC sparclet */
691    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
692    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
693    /* TI ultrasparc I, II, IIi */
694    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
695    /* Although insns using %y are deprecated, it is a clear win on current
696       ultrasparcs.  */
697    						    |MASK_DEPRECATED_V8_INSNS},
698    /* TI ultrasparc III */
699    /* ??? Check if %y issue still holds true in ultra3.  */
700    { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
701    /* UltraSPARC T1 */
702    { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
703    { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
704    { 0, (enum processor_type) 0, 0, 0 }
705  };
706  const struct cpu_table *cpu;
707  const struct sparc_cpu_select *sel;
708  int fpu;
709
710#ifndef SPARC_BI_ARCH
711  /* Check for unsupported architecture size.  */
712  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
713    error ("%s is not supported by this configuration",
714	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
715#endif
716
717  /* We force all 64bit archs to use 128 bit long double */
718  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
719    {
720      error ("-mlong-double-64 not allowed with -m64");
721      target_flags |= MASK_LONG_DOUBLE_128;
722    }
723
724  /* Code model selection.  */
725  sparc_cmodel = SPARC_DEFAULT_CMODEL;
726
727#ifdef SPARC_BI_ARCH
728  if (TARGET_ARCH32)
729    sparc_cmodel = CM_32;
730#endif
731
732  if (sparc_cmodel_string != NULL)
733    {
734      if (TARGET_ARCH64)
735	{
736	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
737	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
738	      break;
739	  if (cmodel->name == NULL)
740	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
741	  else
742	    sparc_cmodel = cmodel->value;
743	}
744      else
745	error ("-mcmodel= is not supported on 32 bit systems");
746    }
747
748  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
749
750  /* Set the default CPU.  */
751  for (def = &cpu_default[0]; def->name; ++def)
752    if (def->cpu == TARGET_CPU_DEFAULT)
753      break;
754  gcc_assert (def->name);
755  sparc_select[0].string = def->name;
756
757  for (sel = &sparc_select[0]; sel->name; ++sel)
758    {
759      if (sel->string)
760	{
761	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
762	    if (! strcmp (sel->string, cpu->name))
763	      {
764		if (sel->set_tune_p)
765		  sparc_cpu = cpu->processor;
766
767		if (sel->set_arch_p)
768		  {
769		    target_flags &= ~cpu->disable;
770		    target_flags |= cpu->enable;
771		  }
772		break;
773	      }
774
775	  if (! cpu->name)
776	    error ("bad value (%s) for %s switch", sel->string, sel->name);
777	}
778    }
779
780  /* If -mfpu or -mno-fpu was explicitly used, don't override with
781     the processor default.  */
782  if (fpu_option_set)
783    target_flags = (target_flags & ~MASK_FPU) | fpu;
784
785  /* Don't allow -mvis if FPU is disabled.  */
786  if (! TARGET_FPU)
787    target_flags &= ~MASK_VIS;
788
789  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
790     are available.
791     -m64 also implies v9.  */
792  if (TARGET_VIS || TARGET_ARCH64)
793    {
794      target_flags |= MASK_V9;
795      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
796    }
797
798  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
799  if (TARGET_V9 && TARGET_ARCH32)
800    target_flags |= MASK_DEPRECATED_V8_INSNS;
801
802  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
803  if (! TARGET_V9 || TARGET_ARCH64)
804    target_flags &= ~MASK_V8PLUS;
805
806  /* Don't use stack biasing in 32 bit mode.  */
807  if (TARGET_ARCH32)
808    target_flags &= ~MASK_STACK_BIAS;
809
810  /* Supply a default value for align_functions.  */
811  if (align_functions == 0
812      && (sparc_cpu == PROCESSOR_ULTRASPARC
813	  || sparc_cpu == PROCESSOR_ULTRASPARC3
814	  || sparc_cpu == PROCESSOR_NIAGARA
815	  || sparc_cpu == PROCESSOR_NIAGARA2))
816    align_functions = 32;
817
818  /* Validate PCC_STRUCT_RETURN.  */
819  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
820    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
821
822  /* Only use .uaxword when compiling for a 64-bit target.  */
823  if (!TARGET_ARCH64)
824    targetm.asm_out.unaligned_op.di = NULL;
825
826  /* Do various machine dependent initializations.  */
827  sparc_init_modes ();
828
829  /* Acquire unique alias sets for our private stuff.  */
830  sparc_sr_alias_set = new_alias_set ();
831  struct_value_alias_set = new_alias_set ();
832
833  /* Set up function hooks.  */
834  init_machine_status = sparc_init_machine_status;
835
836  switch (sparc_cpu)
837    {
838    case PROCESSOR_V7:
839    case PROCESSOR_CYPRESS:
840      sparc_costs = &cypress_costs;
841      break;
842    case PROCESSOR_V8:
843    case PROCESSOR_SPARCLITE:
844    case PROCESSOR_SUPERSPARC:
845      sparc_costs = &supersparc_costs;
846      break;
847    case PROCESSOR_F930:
848    case PROCESSOR_F934:
849    case PROCESSOR_HYPERSPARC:
850    case PROCESSOR_SPARCLITE86X:
851      sparc_costs = &hypersparc_costs;
852      break;
853    case PROCESSOR_SPARCLET:
854    case PROCESSOR_TSC701:
855      sparc_costs = &sparclet_costs;
856      break;
857    case PROCESSOR_V9:
858    case PROCESSOR_ULTRASPARC:
859      sparc_costs = &ultrasparc_costs;
860      break;
861    case PROCESSOR_ULTRASPARC3:
862      sparc_costs = &ultrasparc3_costs;
863      break;
864    case PROCESSOR_NIAGARA:
865      sparc_costs = &niagara_costs;
866      break;
867    case PROCESSOR_NIAGARA2:
868      sparc_costs = &niagara2_costs;
869      break;
870    };
871
872#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
873  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
874    target_flags |= MASK_LONG_DOUBLE_128;
875#endif
876
877  if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
878    set_param_value ("simultaneous-prefetches",
879		     ((sparc_cpu == PROCESSOR_ULTRASPARC
880		       || sparc_cpu == PROCESSOR_NIAGARA
881		       || sparc_cpu == PROCESSOR_NIAGARA2)
882		      ? 2
883		      : (sparc_cpu == PROCESSOR_ULTRASPARC3
884			 ? 8 : 3)));
885  if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
886    set_param_value ("l1-cache-line-size",
887		     ((sparc_cpu == PROCESSOR_ULTRASPARC
888		       || sparc_cpu == PROCESSOR_ULTRASPARC3
889		       || sparc_cpu == PROCESSOR_NIAGARA
890		       || sparc_cpu == PROCESSOR_NIAGARA2)
891		      ? 64 : 32));
892}
893
894/* Miscellaneous utilities.  */
895
896/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
897   or branch on register contents instructions.  */
898
899int
900v9_regcmp_p (enum rtx_code code)
901{
902  return (code == EQ || code == NE || code == GE || code == LT
903	  || code == LE || code == GT);
904}
905
906/* Nonzero if OP is a floating point constant which can
907   be loaded into an integer register using a single
908   sethi instruction.  */
909
910int
911fp_sethi_p (rtx op)
912{
913  if (GET_CODE (op) == CONST_DOUBLE)
914    {
915      REAL_VALUE_TYPE r;
916      long i;
917
918      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
919      REAL_VALUE_TO_TARGET_SINGLE (r, i);
920      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
921    }
922
923  return 0;
924}
925
926/* Nonzero if OP is a floating point constant which can
927   be loaded into an integer register using a single
928   mov instruction.  */
929
930int
931fp_mov_p (rtx op)
932{
933  if (GET_CODE (op) == CONST_DOUBLE)
934    {
935      REAL_VALUE_TYPE r;
936      long i;
937
938      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
939      REAL_VALUE_TO_TARGET_SINGLE (r, i);
940      return SPARC_SIMM13_P (i);
941    }
942
943  return 0;
944}
945
946/* Nonzero if OP is a floating point constant which can
947   be loaded into an integer register using a high/losum
948   instruction sequence.  */
949
950int
951fp_high_losum_p (rtx op)
952{
953  /* The constraints calling this should only be in
954     SFmode move insns, so any constant which cannot
955     be moved using a single insn will do.  */
956  if (GET_CODE (op) == CONST_DOUBLE)
957    {
958      REAL_VALUE_TYPE r;
959      long i;
960
961      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
962      REAL_VALUE_TO_TARGET_SINGLE (r, i);
963      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
964    }
965
966  return 0;
967}
968
969/* Return true if the address of LABEL can be loaded by means of the
970   mov{si,di}_pic_label_ref patterns in PIC mode.  */
971
972static bool
973can_use_mov_pic_label_ref (rtx label)
974{
975  /* VxWorks does not impose a fixed gap between segments; the run-time
976     gap can be different from the object-file gap.  We therefore can't
977     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
978     are absolutely sure that X is in the same segment as the GOT.
979     Unfortunately, the flexibility of linker scripts means that we
980     can't be sure of that in general, so assume that GOT-relative
981     accesses are never valid on VxWorks.  */
982  if (TARGET_VXWORKS_RTP)
983    return false;
984
985  /* Similarly, if the label is non-local, it might end up being placed
986     in a different section than the current one; now mov_pic_label_ref
987     requires the label and the code to be in the same section.  */
988  if (LABEL_REF_NONLOCAL_P (label))
989    return false;
990
991  /* Finally, if we are reordering basic blocks and partition into hot
992     and cold sections, this might happen for any label.  */
993  if (flag_reorder_blocks_and_partition)
994    return false;
995
996  return true;
997}
998
999/* Expand a move instruction.  Return true if all work is done.  */
1000
1001bool
1002sparc_expand_move (enum machine_mode mode, rtx *operands)
1003{
1004  /* Handle sets of MEM first.  */
1005  if (GET_CODE (operands[0]) == MEM)
1006    {
1007      /* 0 is a register (or a pair of registers) on SPARC.  */
1008      if (register_or_zero_operand (operands[1], mode))
1009	return false;
1010
1011      if (!reload_in_progress)
1012	{
1013	  operands[0] = validize_mem (operands[0]);
1014	  operands[1] = force_reg (mode, operands[1]);
1015	}
1016    }
1017
1018  /* Fixup TLS cases.  */
1019  if (TARGET_HAVE_TLS
1020      && CONSTANT_P (operands[1])
1021      && sparc_tls_referenced_p (operands [1]))
1022    {
1023      operands[1] = legitimize_tls_address (operands[1]);
1024      return false;
1025    }
1026
1027  /* Fixup PIC cases.  */
1028  if (flag_pic && CONSTANT_P (operands[1]))
1029    {
1030      if (pic_address_needs_scratch (operands[1]))
1031	operands[1] = legitimize_pic_address (operands[1], NULL_RTX);
1032
1033      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
1034      if (GET_CODE (operands[1]) == LABEL_REF
1035	  && can_use_mov_pic_label_ref (operands[1]))
1036	{
1037	  if (mode == SImode)
1038	    {
1039	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1040	      return true;
1041	    }
1042
1043	  if (mode == DImode)
1044	    {
1045	      gcc_assert (TARGET_ARCH64);
1046	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1047	      return true;
1048	    }
1049	}
1050
1051      if (symbolic_operand (operands[1], mode))
1052	{
1053	  operands[1] = legitimize_pic_address (operands[1],
1054						reload_in_progress
1055						? operands[0] : NULL_RTX);
1056	  return false;
1057	}
1058    }
1059
1060  /* If we are trying to toss an integer constant into FP registers,
1061     or loading a FP or vector constant, force it into memory.  */
1062  if (CONSTANT_P (operands[1])
1063      && REG_P (operands[0])
1064      && (SPARC_FP_REG_P (REGNO (operands[0]))
1065	  || SCALAR_FLOAT_MODE_P (mode)
1066	  || VECTOR_MODE_P (mode)))
1067    {
1068      /* emit_group_store will send such bogosity to us when it is
1069         not storing directly into memory.  So fix this up to avoid
1070         crashes in output_constant_pool.  */
1071      if (operands [1] == const0_rtx)
1072	operands[1] = CONST0_RTX (mode);
1073
1074      /* We can clear FP registers if TARGET_VIS, and always other regs.  */
1075      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1076	  && const_zero_operand (operands[1], mode))
1077	return false;
1078
1079      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1080	  /* We are able to build any SF constant in integer registers
1081	     with at most 2 instructions.  */
1082	  && (mode == SFmode
1083	      /* And any DF constant in integer registers.  */
1084	      || (mode == DFmode
1085		  && (reload_completed || reload_in_progress))))
1086	return false;
1087
1088      operands[1] = force_const_mem (mode, operands[1]);
1089      if (!reload_in_progress)
1090	operands[1] = validize_mem (operands[1]);
1091      return false;
1092    }
1093
1094  /* Accept non-constants and valid constants unmodified.  */
1095  if (!CONSTANT_P (operands[1])
1096      || GET_CODE (operands[1]) == HIGH
1097      || input_operand (operands[1], mode))
1098    return false;
1099
1100  switch (mode)
1101    {
1102    case QImode:
1103      /* All QImode constants require only one insn, so proceed.  */
1104      break;
1105
1106    case HImode:
1107    case SImode:
1108      sparc_emit_set_const32 (operands[0], operands[1]);
1109      return true;
1110
1111    case DImode:
1112      /* input_operand should have filtered out 32-bit mode.  */
1113      sparc_emit_set_const64 (operands[0], operands[1]);
1114      return true;
1115
1116    default:
1117      gcc_unreachable ();
1118    }
1119
1120  return false;
1121}
1122
1123/* Load OP1, a 32-bit constant, into OP0, a register.
1124   We know it can't be done in one insn when we get
1125   here, the move expander guarantees this.  */
1126
1127void
1128sparc_emit_set_const32 (rtx op0, rtx op1)
1129{
1130  enum machine_mode mode = GET_MODE (op0);
1131  rtx temp;
1132
1133  if (reload_in_progress || reload_completed)
1134    temp = op0;
1135  else
1136    temp = gen_reg_rtx (mode);
1137
1138  if (GET_CODE (op1) == CONST_INT)
1139    {
1140      gcc_assert (!small_int_operand (op1, mode)
1141		  && !const_high_operand (op1, mode));
1142
1143      /* Emit them as real moves instead of a HIGH/LO_SUM,
1144	 this way CSE can see everything and reuse intermediate
1145	 values if it wants.  */
1146      emit_insn (gen_rtx_SET (VOIDmode, temp,
1147			      GEN_INT (INTVAL (op1)
1148			        & ~(HOST_WIDE_INT)0x3ff)));
1149
1150      emit_insn (gen_rtx_SET (VOIDmode,
1151			      op0,
1152			      gen_rtx_IOR (mode, temp,
1153					   GEN_INT (INTVAL (op1) & 0x3ff))));
1154    }
1155  else
1156    {
1157      /* A symbol, emit in the traditional way.  */
1158      emit_insn (gen_rtx_SET (VOIDmode, temp,
1159			      gen_rtx_HIGH (mode, op1)));
1160      emit_insn (gen_rtx_SET (VOIDmode,
1161			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
1162    }
1163}
1164
1165/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1166   If TEMP is nonzero, we are forbidden to use any other scratch
1167   registers.  Otherwise, we are allowed to generate them as needed.
1168
1169   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1170   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
1171
1172void
1173sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1174{
1175  rtx temp1, temp2, temp3, temp4, temp5;
1176  rtx ti_temp = 0;
1177
1178  if (temp && GET_MODE (temp) == TImode)
1179    {
1180      ti_temp = temp;
1181      temp = gen_rtx_REG (DImode, REGNO (temp));
1182    }
1183
1184  /* SPARC-V9 code-model support.  */
1185  switch (sparc_cmodel)
1186    {
1187    case CM_MEDLOW:
1188      /* The range spanned by all instructions in the object is less
1189	 than 2^31 bytes (2GB) and the distance from any instruction
1190	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1191	 than 2^31 bytes (2GB).
1192
1193	 The executable must be in the low 4TB of the virtual address
1194	 space.
1195
1196	 sethi	%hi(symbol), %temp1
1197	 or	%temp1, %lo(symbol), %reg  */
1198      if (temp)
1199	temp1 = temp;  /* op0 is allowed.  */
1200      else
1201	temp1 = gen_reg_rtx (DImode);
1202
1203      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1204      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1205      break;
1206
1207    case CM_MEDMID:
1208      /* The range spanned by all instructions in the object is less
1209	 than 2^31 bytes (2GB) and the distance from any instruction
1210	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1211	 than 2^31 bytes (2GB).
1212
1213	 The executable must be in the low 16TB of the virtual address
1214	 space.
1215
1216	 sethi	%h44(symbol), %temp1
1217	 or	%temp1, %m44(symbol), %temp2
1218	 sllx	%temp2, 12, %temp3
1219	 or	%temp3, %l44(symbol), %reg  */
1220      if (temp)
1221	{
1222	  temp1 = op0;
1223	  temp2 = op0;
1224	  temp3 = temp;  /* op0 is allowed.  */
1225	}
1226      else
1227	{
1228	  temp1 = gen_reg_rtx (DImode);
1229	  temp2 = gen_reg_rtx (DImode);
1230	  temp3 = gen_reg_rtx (DImode);
1231	}
1232
1233      emit_insn (gen_seth44 (temp1, op1));
1234      emit_insn (gen_setm44 (temp2, temp1, op1));
1235      emit_insn (gen_rtx_SET (VOIDmode, temp3,
1236			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1237      emit_insn (gen_setl44 (op0, temp3, op1));
1238      break;
1239
1240    case CM_MEDANY:
1241      /* The range spanned by all instructions in the object is less
1242	 than 2^31 bytes (2GB) and the distance from any instruction
1243	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1244	 than 2^31 bytes (2GB).
1245
1246	 The executable can be placed anywhere in the virtual address
1247	 space.
1248
1249	 sethi	%hh(symbol), %temp1
1250	 sethi	%lm(symbol), %temp2
1251	 or	%temp1, %hm(symbol), %temp3
1252	 sllx	%temp3, 32, %temp4
1253	 or	%temp4, %temp2, %temp5
1254	 or	%temp5, %lo(symbol), %reg  */
1255      if (temp)
1256	{
1257	  /* It is possible that one of the registers we got for operands[2]
1258	     might coincide with that of operands[0] (which is why we made
1259	     it TImode).  Pick the other one to use as our scratch.  */
1260	  if (rtx_equal_p (temp, op0))
1261	    {
1262	      gcc_assert (ti_temp);
1263	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1264	    }
1265	  temp1 = op0;
1266	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1267	  temp3 = op0;
1268	  temp4 = op0;
1269	  temp5 = op0;
1270	}
1271      else
1272	{
1273	  temp1 = gen_reg_rtx (DImode);
1274	  temp2 = gen_reg_rtx (DImode);
1275	  temp3 = gen_reg_rtx (DImode);
1276	  temp4 = gen_reg_rtx (DImode);
1277	  temp5 = gen_reg_rtx (DImode);
1278	}
1279
1280      emit_insn (gen_sethh (temp1, op1));
1281      emit_insn (gen_setlm (temp2, op1));
1282      emit_insn (gen_sethm (temp3, temp1, op1));
1283      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1284			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1285      emit_insn (gen_rtx_SET (VOIDmode, temp5,
1286			      gen_rtx_PLUS (DImode, temp4, temp2)));
1287      emit_insn (gen_setlo (op0, temp5, op1));
1288      break;
1289
1290    case CM_EMBMEDANY:
1291      /* Old old old backwards compatibility kruft here.
1292	 Essentially it is MEDLOW with a fixed 64-bit
1293	 virtual base added to all data segment addresses.
1294	 Text-segment stuff is computed like MEDANY, we can't
1295	 reuse the code above because the relocation knobs
1296	 look different.
1297
1298	 Data segment:	sethi	%hi(symbol), %temp1
1299			add	%temp1, EMBMEDANY_BASE_REG, %temp2
1300			or	%temp2, %lo(symbol), %reg  */
1301      if (data_segment_operand (op1, GET_MODE (op1)))
1302	{
1303	  if (temp)
1304	    {
1305	      temp1 = temp;  /* op0 is allowed.  */
1306	      temp2 = op0;
1307	    }
1308	  else
1309	    {
1310	      temp1 = gen_reg_rtx (DImode);
1311	      temp2 = gen_reg_rtx (DImode);
1312	    }
1313
1314	  emit_insn (gen_embmedany_sethi (temp1, op1));
1315	  emit_insn (gen_embmedany_brsum (temp2, temp1));
1316	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
1317	}
1318
1319      /* Text segment:	sethi	%uhi(symbol), %temp1
1320			sethi	%hi(symbol), %temp2
1321			or	%temp1, %ulo(symbol), %temp3
1322			sllx	%temp3, 32, %temp4
1323			or	%temp4, %temp2, %temp5
1324			or	%temp5, %lo(symbol), %reg  */
1325      else
1326	{
1327	  if (temp)
1328	    {
1329	      /* It is possible that one of the registers we got for operands[2]
1330		 might coincide with that of operands[0] (which is why we made
1331		 it TImode).  Pick the other one to use as our scratch.  */
1332	      if (rtx_equal_p (temp, op0))
1333		{
1334		  gcc_assert (ti_temp);
1335		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1336		}
1337	      temp1 = op0;
1338	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1339	      temp3 = op0;
1340	      temp4 = op0;
1341	      temp5 = op0;
1342	    }
1343	  else
1344	    {
1345	      temp1 = gen_reg_rtx (DImode);
1346	      temp2 = gen_reg_rtx (DImode);
1347	      temp3 = gen_reg_rtx (DImode);
1348	      temp4 = gen_reg_rtx (DImode);
1349	      temp5 = gen_reg_rtx (DImode);
1350	    }
1351
1352	  emit_insn (gen_embmedany_textuhi (temp1, op1));
1353	  emit_insn (gen_embmedany_texthi  (temp2, op1));
1354	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1355	  emit_insn (gen_rtx_SET (VOIDmode, temp4,
1356				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1357	  emit_insn (gen_rtx_SET (VOIDmode, temp5,
1358				  gen_rtx_PLUS (DImode, temp4, temp2)));
1359	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
1360	}
1361      break;
1362
1363    default:
1364      gcc_unreachable ();
1365    }
1366}
1367
1368#if HOST_BITS_PER_WIDE_INT == 32
1369void
1370sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1371{
1372  gcc_unreachable ();
1373}
1374#else
1375/* These avoid problems when cross compiling.  If we do not
1376   go through all this hair then the optimizer will see
1377   invalid REG_EQUAL notes or in some cases none at all.  */
1378static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1379static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1380static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1381static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1382
1383/* The optimizer is not to assume anything about exactly
1384   which bits are set for a HIGH, they are unspecified.
1385   Unfortunately this leads to many missed optimizations
1386   during CSE.  We mask out the non-HIGH bits, and matches
1387   a plain movdi, to alleviate this problem.  */
1388static rtx
1389gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1390{
1391  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1392}
1393
1394static rtx
1395gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1396{
1397  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1398}
1399
1400static rtx
1401gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1402{
1403  return gen_rtx_IOR (DImode, src, GEN_INT (val));
1404}
1405
1406static rtx
1407gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1408{
1409  return gen_rtx_XOR (DImode, src, GEN_INT (val));
1410}
1411
1412/* Worker routines for 64-bit constant formation on arch64.
1413   One of the key things to be doing in these emissions is
1414   to create as many temp REGs as possible.  This makes it
1415   possible for half-built constants to be used later when
1416   such values are similar to something required later on.
1417   Without doing this, the optimizer cannot see such
1418   opportunities.  */
1419
1420static void sparc_emit_set_const64_quick1 (rtx, rtx,
1421					   unsigned HOST_WIDE_INT, int);
1422
1423static void
1424sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1425			       unsigned HOST_WIDE_INT low_bits, int is_neg)
1426{
1427  unsigned HOST_WIDE_INT high_bits;
1428
1429  if (is_neg)
1430    high_bits = (~low_bits) & 0xffffffff;
1431  else
1432    high_bits = low_bits;
1433
1434  emit_insn (gen_safe_HIGH64 (temp, high_bits));
1435  if (!is_neg)
1436    {
1437      emit_insn (gen_rtx_SET (VOIDmode, op0,
1438			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1439    }
1440  else
1441    {
1442      /* If we are XOR'ing with -1, then we should emit a one's complement
1443	 instead.  This way the combiner will notice logical operations
1444	 such as ANDN later on and substitute.  */
1445      if ((low_bits & 0x3ff) == 0x3ff)
1446	{
1447	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1448				  gen_rtx_NOT (DImode, temp)));
1449	}
1450      else
1451	{
1452	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1453				  gen_safe_XOR64 (temp,
1454						  (-(HOST_WIDE_INT)0x400
1455						   | (low_bits & 0x3ff)))));
1456	}
1457    }
1458}
1459
1460static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1461					   unsigned HOST_WIDE_INT, int);
1462
1463static void
1464sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1465			       unsigned HOST_WIDE_INT high_bits,
1466			       unsigned HOST_WIDE_INT low_immediate,
1467			       int shift_count)
1468{
1469  rtx temp2 = op0;
1470
1471  if ((high_bits & 0xfffffc00) != 0)
1472    {
1473      emit_insn (gen_safe_HIGH64 (temp, high_bits));
1474      if ((high_bits & ~0xfffffc00) != 0)
1475	emit_insn (gen_rtx_SET (VOIDmode, op0,
1476				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1477      else
1478	temp2 = temp;
1479    }
1480  else
1481    {
1482      emit_insn (gen_safe_SET64 (temp, high_bits));
1483      temp2 = temp;
1484    }
1485
1486  /* Now shift it up into place.  */
1487  emit_insn (gen_rtx_SET (VOIDmode, op0,
1488			  gen_rtx_ASHIFT (DImode, temp2,
1489					  GEN_INT (shift_count))));
1490
1491  /* If there is a low immediate part piece, finish up by
1492     putting that in as well.  */
1493  if (low_immediate != 0)
1494    emit_insn (gen_rtx_SET (VOIDmode, op0,
1495			    gen_safe_OR64 (op0, low_immediate)));
1496}
1497
1498static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1499					    unsigned HOST_WIDE_INT);
1500
1501/* Full 64-bit constant decomposition.  Even though this is the
1502   'worst' case, we still optimize a few things away.  */
1503static void
1504sparc_emit_set_const64_longway (rtx op0, rtx temp,
1505				unsigned HOST_WIDE_INT high_bits,
1506				unsigned HOST_WIDE_INT low_bits)
1507{
1508  rtx sub_temp;
1509
1510  if (reload_in_progress || reload_completed)
1511    sub_temp = op0;
1512  else
1513    sub_temp = gen_reg_rtx (DImode);
1514
1515  if ((high_bits & 0xfffffc00) != 0)
1516    {
1517      emit_insn (gen_safe_HIGH64 (temp, high_bits));
1518      if ((high_bits & ~0xfffffc00) != 0)
1519	emit_insn (gen_rtx_SET (VOIDmode,
1520				sub_temp,
1521				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1522      else
1523	sub_temp = temp;
1524    }
1525  else
1526    {
1527      emit_insn (gen_safe_SET64 (temp, high_bits));
1528      sub_temp = temp;
1529    }
1530
1531  if (!reload_in_progress && !reload_completed)
1532    {
1533      rtx temp2 = gen_reg_rtx (DImode);
1534      rtx temp3 = gen_reg_rtx (DImode);
1535      rtx temp4 = gen_reg_rtx (DImode);
1536
1537      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1538			      gen_rtx_ASHIFT (DImode, sub_temp,
1539					      GEN_INT (32))));
1540
1541      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1542      if ((low_bits & ~0xfffffc00) != 0)
1543	{
1544	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1545				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1546	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1547				  gen_rtx_PLUS (DImode, temp4, temp3)));
1548	}
1549      else
1550	{
1551	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1552				  gen_rtx_PLUS (DImode, temp4, temp2)));
1553	}
1554    }
1555  else
1556    {
1557      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1558      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1559      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1560      int to_shift = 12;
1561
1562      /* We are in the middle of reload, so this is really
1563	 painful.  However we do still make an attempt to
1564	 avoid emitting truly stupid code.  */
1565      if (low1 != const0_rtx)
1566	{
1567	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1568				  gen_rtx_ASHIFT (DImode, sub_temp,
1569						  GEN_INT (to_shift))));
1570	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1571				  gen_rtx_IOR (DImode, op0, low1)));
1572	  sub_temp = op0;
1573	  to_shift = 12;
1574	}
1575      else
1576	{
1577	  to_shift += 12;
1578	}
1579      if (low2 != const0_rtx)
1580	{
1581	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1582				  gen_rtx_ASHIFT (DImode, sub_temp,
1583						  GEN_INT (to_shift))));
1584	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1585				  gen_rtx_IOR (DImode, op0, low2)));
1586	  sub_temp = op0;
1587	  to_shift = 8;
1588	}
1589      else
1590	{
1591	  to_shift += 8;
1592	}
1593      emit_insn (gen_rtx_SET (VOIDmode, op0,
1594			      gen_rtx_ASHIFT (DImode, sub_temp,
1595					      GEN_INT (to_shift))));
1596      if (low3 != const0_rtx)
1597	emit_insn (gen_rtx_SET (VOIDmode, op0,
1598				gen_rtx_IOR (DImode, op0, low3)));
1599      /* phew...  */
1600    }
1601}
1602
1603/* Analyze a 64-bit constant for certain properties.  */
1604static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1605				    unsigned HOST_WIDE_INT,
1606				    int *, int *, int *);
1607
1608static void
1609analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1610			unsigned HOST_WIDE_INT low_bits,
1611			int *hbsp, int *lbsp, int *abbasp)
1612{
1613  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1614  int i;
1615
1616  lowest_bit_set = highest_bit_set = -1;
1617  i = 0;
1618  do
1619    {
1620      if ((lowest_bit_set == -1)
1621	  && ((low_bits >> i) & 1))
1622	lowest_bit_set = i;
1623      if ((highest_bit_set == -1)
1624	  && ((high_bits >> (32 - i - 1)) & 1))
1625	highest_bit_set = (64 - i - 1);
1626    }
1627  while (++i < 32
1628	 && ((highest_bit_set == -1)
1629	     || (lowest_bit_set == -1)));
1630  if (i == 32)
1631    {
1632      i = 0;
1633      do
1634	{
1635	  if ((lowest_bit_set == -1)
1636	      && ((high_bits >> i) & 1))
1637	    lowest_bit_set = i + 32;
1638	  if ((highest_bit_set == -1)
1639	      && ((low_bits >> (32 - i - 1)) & 1))
1640	    highest_bit_set = 32 - i - 1;
1641	}
1642      while (++i < 32
1643	     && ((highest_bit_set == -1)
1644		 || (lowest_bit_set == -1)));
1645    }
1646  /* If there are no bits set this should have gone out
1647     as one instruction!  */
1648  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1649  all_bits_between_are_set = 1;
1650  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1651    {
1652      if (i < 32)
1653	{
1654	  if ((low_bits & (1 << i)) != 0)
1655	    continue;
1656	}
1657      else
1658	{
1659	  if ((high_bits & (1 << (i - 32))) != 0)
1660	    continue;
1661	}
1662      all_bits_between_are_set = 0;
1663      break;
1664    }
1665  *hbsp = highest_bit_set;
1666  *lbsp = lowest_bit_set;
1667  *abbasp = all_bits_between_are_set;
1668}
1669
1670static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1671
1672static int
1673const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1674		   unsigned HOST_WIDE_INT low_bits)
1675{
1676  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1677
1678  if (high_bits == 0
1679      || high_bits == 0xffffffff)
1680    return 1;
1681
1682  analyze_64bit_constant (high_bits, low_bits,
1683			  &highest_bit_set, &lowest_bit_set,
1684			  &all_bits_between_are_set);
1685
1686  if ((highest_bit_set == 63
1687       || lowest_bit_set == 0)
1688      && all_bits_between_are_set != 0)
1689    return 1;
1690
1691  if ((highest_bit_set - lowest_bit_set) < 21)
1692    return 1;
1693
1694  return 0;
1695}
1696
1697static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1698							unsigned HOST_WIDE_INT,
1699							int, int);
1700
1701static unsigned HOST_WIDE_INT
1702create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1703			  unsigned HOST_WIDE_INT low_bits,
1704			  int lowest_bit_set, int shift)
1705{
1706  HOST_WIDE_INT hi, lo;
1707
1708  if (lowest_bit_set < 32)
1709    {
1710      lo = (low_bits >> lowest_bit_set) << shift;
1711      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1712    }
1713  else
1714    {
1715      lo = 0;
1716      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1717    }
1718  gcc_assert (! (hi & lo));
1719  return (hi | lo);
1720}
1721
1722/* Here we are sure to be arch64 and this is an integer constant
1723   being loaded into a register.  Emit the most efficient
1724   insn sequence possible.  Detection of all the 1-insn cases
1725   has been done already.  */
1726void
1727sparc_emit_set_const64 (rtx op0, rtx op1)
1728{
1729  unsigned HOST_WIDE_INT high_bits, low_bits;
1730  int lowest_bit_set, highest_bit_set;
1731  int all_bits_between_are_set;
1732  rtx temp = 0;
1733
1734  /* Sanity check that we know what we are working with.  */
1735  gcc_assert (TARGET_ARCH64
1736	      && (GET_CODE (op0) == SUBREG
1737		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1738
1739  if (reload_in_progress || reload_completed)
1740    temp = op0;
1741
1742  if (GET_CODE (op1) != CONST_INT)
1743    {
1744      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1745      return;
1746    }
1747
1748  if (! temp)
1749    temp = gen_reg_rtx (DImode);
1750
1751  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1752  low_bits = (INTVAL (op1) & 0xffffffff);
1753
1754  /* low_bits	bits 0  --> 31
1755     high_bits	bits 32 --> 63  */
1756
1757  analyze_64bit_constant (high_bits, low_bits,
1758			  &highest_bit_set, &lowest_bit_set,
1759			  &all_bits_between_are_set);
1760
1761  /* First try for a 2-insn sequence.  */
1762
1763  /* These situations are preferred because the optimizer can
1764   * do more things with them:
1765   * 1) mov	-1, %reg
1766   *    sllx	%reg, shift, %reg
1767   * 2) mov	-1, %reg
1768   *    srlx	%reg, shift, %reg
1769   * 3) mov	some_small_const, %reg
1770   *    sllx	%reg, shift, %reg
1771   */
1772  if (((highest_bit_set == 63
1773	|| lowest_bit_set == 0)
1774       && all_bits_between_are_set != 0)
1775      || ((highest_bit_set - lowest_bit_set) < 12))
1776    {
1777      HOST_WIDE_INT the_const = -1;
1778      int shift = lowest_bit_set;
1779
1780      if ((highest_bit_set != 63
1781	   && lowest_bit_set != 0)
1782	  || all_bits_between_are_set == 0)
1783	{
1784	  the_const =
1785	    create_simple_focus_bits (high_bits, low_bits,
1786				      lowest_bit_set, 0);
1787	}
1788      else if (lowest_bit_set == 0)
1789	shift = -(63 - highest_bit_set);
1790
1791      gcc_assert (SPARC_SIMM13_P (the_const));
1792      gcc_assert (shift != 0);
1793
1794      emit_insn (gen_safe_SET64 (temp, the_const));
1795      if (shift > 0)
1796	emit_insn (gen_rtx_SET (VOIDmode,
1797				op0,
1798				gen_rtx_ASHIFT (DImode,
1799						temp,
1800						GEN_INT (shift))));
1801      else if (shift < 0)
1802	emit_insn (gen_rtx_SET (VOIDmode,
1803				op0,
1804				gen_rtx_LSHIFTRT (DImode,
1805						  temp,
1806						  GEN_INT (-shift))));
1807      return;
1808    }
1809
1810  /* Now a range of 22 or less bits set somewhere.
1811   * 1) sethi	%hi(focus_bits), %reg
1812   *    sllx	%reg, shift, %reg
1813   * 2) sethi	%hi(focus_bits), %reg
1814   *    srlx	%reg, shift, %reg
1815   */
1816  if ((highest_bit_set - lowest_bit_set) < 21)
1817    {
1818      unsigned HOST_WIDE_INT focus_bits =
1819	create_simple_focus_bits (high_bits, low_bits,
1820				  lowest_bit_set, 10);
1821
1822      gcc_assert (SPARC_SETHI_P (focus_bits));
1823      gcc_assert (lowest_bit_set != 10);
1824
1825      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1826
1827      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1828      if (lowest_bit_set < 10)
1829	emit_insn (gen_rtx_SET (VOIDmode,
1830				op0,
1831				gen_rtx_LSHIFTRT (DImode, temp,
1832						  GEN_INT (10 - lowest_bit_set))));
1833      else if (lowest_bit_set > 10)
1834	emit_insn (gen_rtx_SET (VOIDmode,
1835				op0,
1836				gen_rtx_ASHIFT (DImode, temp,
1837						GEN_INT (lowest_bit_set - 10))));
1838      return;
1839    }
1840
1841  /* 1) sethi	%hi(low_bits), %reg
1842   *    or	%reg, %lo(low_bits), %reg
1843   * 2) sethi	%hi(~low_bits), %reg
1844   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1845   */
1846  if (high_bits == 0
1847      || high_bits == 0xffffffff)
1848    {
1849      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1850				     (high_bits == 0xffffffff));
1851      return;
1852    }
1853
1854  /* Now, try 3-insn sequences.  */
1855
1856  /* 1) sethi	%hi(high_bits), %reg
1857   *    or	%reg, %lo(high_bits), %reg
1858   *    sllx	%reg, 32, %reg
1859   */
1860  if (low_bits == 0)
1861    {
1862      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1863      return;
1864    }
1865
1866  /* We may be able to do something quick
1867     when the constant is negated, so try that.  */
1868  if (const64_is_2insns ((~high_bits) & 0xffffffff,
1869			 (~low_bits) & 0xfffffc00))
1870    {
1871      /* NOTE: The trailing bits get XOR'd so we need the
1872	 non-negated bits, not the negated ones.  */
1873      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1874
1875      if ((((~high_bits) & 0xffffffff) == 0
1876	   && ((~low_bits) & 0x80000000) == 0)
1877	  || (((~high_bits) & 0xffffffff) == 0xffffffff
1878	      && ((~low_bits) & 0x80000000) != 0))
1879	{
1880	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1881
1882	  if ((SPARC_SETHI_P (fast_int)
1883	       && (~high_bits & 0xffffffff) == 0)
1884	      || SPARC_SIMM13_P (fast_int))
1885	    emit_insn (gen_safe_SET64 (temp, fast_int));
1886	  else
1887	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1888	}
1889      else
1890	{
1891	  rtx negated_const;
1892	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1893				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1894	  sparc_emit_set_const64 (temp, negated_const);
1895	}
1896
1897      /* If we are XOR'ing with -1, then we should emit a one's complement
1898	 instead.  This way the combiner will notice logical operations
1899	 such as ANDN later on and substitute.  */
1900      if (trailing_bits == 0x3ff)
1901	{
1902	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1903				  gen_rtx_NOT (DImode, temp)));
1904	}
1905      else
1906	{
1907	  emit_insn (gen_rtx_SET (VOIDmode,
1908				  op0,
1909				  gen_safe_XOR64 (temp,
1910						  (-0x400 | trailing_bits))));
1911	}
1912      return;
1913    }
1914
1915  /* 1) sethi	%hi(xxx), %reg
1916   *    or	%reg, %lo(xxx), %reg
1917   *	sllx	%reg, yyy, %reg
1918   *
1919   * ??? This is just a generalized version of the low_bits==0
1920   * thing above, FIXME...
1921   */
1922  if ((highest_bit_set - lowest_bit_set) < 32)
1923    {
1924      unsigned HOST_WIDE_INT focus_bits =
1925	create_simple_focus_bits (high_bits, low_bits,
1926				  lowest_bit_set, 0);
1927
1928      /* We can't get here in this state.  */
1929      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1930
1931      /* So what we know is that the set bits straddle the
1932	 middle of the 64-bit word.  */
1933      sparc_emit_set_const64_quick2 (op0, temp,
1934				     focus_bits, 0,
1935				     lowest_bit_set);
1936      return;
1937    }
1938
1939  /* 1) sethi	%hi(high_bits), %reg
1940   *    or	%reg, %lo(high_bits), %reg
1941   *    sllx	%reg, 32, %reg
1942   *	or	%reg, low_bits, %reg
1943   */
1944  if (SPARC_SIMM13_P(low_bits)
1945      && ((int)low_bits > 0))
1946    {
1947      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1948      return;
1949    }
1950
1951  /* The easiest way when all else fails, is full decomposition.  */
1952#if 0
1953  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1954	  high_bits, low_bits, ~high_bits, ~low_bits);
1955#endif
1956  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1957}
1958#endif /* HOST_BITS_PER_WIDE_INT == 32 */
1959
1960/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1961   return the mode to be used for the comparison.  For floating-point,
1962   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
1963   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
1964   processing is needed.  */
1965
1966enum machine_mode
1967select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1968{
1969  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1970    {
1971      switch (op)
1972	{
1973	case EQ:
1974	case NE:
1975	case UNORDERED:
1976	case ORDERED:
1977	case UNLT:
1978	case UNLE:
1979	case UNGT:
1980	case UNGE:
1981	case UNEQ:
1982	case LTGT:
1983	  return CCFPmode;
1984
1985	case LT:
1986	case LE:
1987	case GT:
1988	case GE:
1989	  return CCFPEmode;
1990
1991	default:
1992	  gcc_unreachable ();
1993	}
1994    }
1995  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
1996	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
1997    {
1998      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1999	return CCX_NOOVmode;
2000      else
2001	return CC_NOOVmode;
2002    }
2003  else
2004    {
2005      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2006	return CCXmode;
2007      else
2008	return CCmode;
2009    }
2010}
2011
2012/* Emit the compare insn and return the CC reg for a CODE comparison
2013   with operands X and Y.  */
2014
2015static rtx
2016gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2017{
2018  enum machine_mode mode;
2019  rtx cc_reg;
2020
2021  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2022    return x;
2023
2024  mode = SELECT_CC_MODE (code, x, y);
2025
2026  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2027     fcc regs (cse can't tell they're really call clobbered regs and will
2028     remove a duplicate comparison even if there is an intervening function
2029     call - it will then try to reload the cc reg via an int reg which is why
2030     we need the movcc patterns).  It is possible to provide the movcc
2031     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2032     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2033     to tell cse that CCFPE mode registers (even pseudos) are call
2034     clobbered.  */
2035
2036  /* ??? This is an experiment.  Rather than making changes to cse which may
2037     or may not be easy/clean, we do our own cse.  This is possible because
2038     we will generate hard registers.  Cse knows they're call clobbered (it
2039     doesn't know the same thing about pseudos). If we guess wrong, no big
2040     deal, but if we win, great!  */
2041
2042  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2043#if 1 /* experiment */
2044    {
2045      int reg;
2046      /* We cycle through the registers to ensure they're all exercised.  */
2047      static int next_fcc_reg = 0;
2048      /* Previous x,y for each fcc reg.  */
2049      static rtx prev_args[4][2];
2050
2051      /* Scan prev_args for x,y.  */
2052      for (reg = 0; reg < 4; reg++)
2053	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2054	  break;
2055      if (reg == 4)
2056	{
2057	  reg = next_fcc_reg;
2058	  prev_args[reg][0] = x;
2059	  prev_args[reg][1] = y;
2060	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2061	}
2062      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2063    }
2064#else
2065    cc_reg = gen_reg_rtx (mode);
2066#endif /* ! experiment */
2067  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2068    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2069  else
2070    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2071
2072  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2073     will only result in an unrecognizable insn so no point in asserting.  */
2074  emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2075
2076  return cc_reg;
2077}
2078
2079
2080/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2081
2082rtx
2083gen_compare_reg (rtx cmp)
2084{
2085  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2086}
2087
2088/* This function is used for v9 only.
2089   DEST is the target of the Scc insn.
2090   CODE is the code for an Scc's comparison.
2091   X and Y are the values we compare.
2092
2093   This function is needed to turn
2094
2095	   (set (reg:SI 110)
2096	       (gt (reg:CCX 100 %icc)
2097	           (const_int 0)))
2098   into
2099	   (set (reg:SI 110)
2100	       (gt:DI (reg:CCX 100 %icc)
2101	           (const_int 0)))
2102
2103   IE: The instruction recognizer needs to see the mode of the comparison to
2104   find the right instruction. We could use "gt:DI" right in the
2105   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2106
2107static int
2108gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2109{
2110  if (! TARGET_ARCH64
2111      && (GET_MODE (x) == DImode
2112	  || GET_MODE (dest) == DImode))
2113    return 0;
2114
2115  /* Try to use the movrCC insns.  */
2116  if (TARGET_ARCH64
2117      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2118      && y == const0_rtx
2119      && v9_regcmp_p (compare_code))
2120    {
2121      rtx op0 = x;
2122      rtx temp;
2123
2124      /* Special case for op0 != 0.  This can be done with one instruction if
2125	 dest == x.  */
2126
2127      if (compare_code == NE
2128	  && GET_MODE (dest) == DImode
2129	  && rtx_equal_p (op0, dest))
2130	{
2131	  emit_insn (gen_rtx_SET (VOIDmode, dest,
2132			      gen_rtx_IF_THEN_ELSE (DImode,
2133				       gen_rtx_fmt_ee (compare_code, DImode,
2134						       op0, const0_rtx),
2135				       const1_rtx,
2136				       dest)));
2137	  return 1;
2138	}
2139
2140      if (reg_overlap_mentioned_p (dest, op0))
2141	{
2142	  /* Handle the case where dest == x.
2143	     We "early clobber" the result.  */
2144	  op0 = gen_reg_rtx (GET_MODE (x));
2145	  emit_move_insn (op0, x);
2146	}
2147
2148      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2149      if (GET_MODE (op0) != DImode)
2150	{
2151	  temp = gen_reg_rtx (DImode);
2152	  convert_move (temp, op0, 0);
2153	}
2154      else
2155	temp = op0;
2156      emit_insn (gen_rtx_SET (VOIDmode, dest,
2157			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2158				   gen_rtx_fmt_ee (compare_code, DImode,
2159						   temp, const0_rtx),
2160				   const1_rtx,
2161				   dest)));
2162      return 1;
2163    }
2164  else
2165    {
2166      x = gen_compare_reg_1 (compare_code, x, y);
2167      y = const0_rtx;
2168
2169      gcc_assert (GET_MODE (x) != CC_NOOVmode
2170		  && GET_MODE (x) != CCX_NOOVmode);
2171
2172      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2173      emit_insn (gen_rtx_SET (VOIDmode, dest,
2174			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2175				   gen_rtx_fmt_ee (compare_code,
2176						   GET_MODE (x), x, y),
2177				    const1_rtx, dest)));
2178      return 1;
2179    }
2180}
2181
2182
2183/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2184   without jumps using the addx/subx instructions.  */
2185
2186bool
2187emit_scc_insn (rtx operands[])
2188{
2189  rtx tem;
2190  rtx x;
2191  rtx y;
2192  enum rtx_code code;
2193
2194  /* The quad-word fp compare library routines all return nonzero to indicate
2195     true, which is different from the equivalent libgcc routines, so we must
2196     handle them specially here.  */
2197  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2198    {
2199      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2200					      GET_CODE (operands[1]));
2201      operands[2] = XEXP (operands[1], 0);
2202      operands[3] = XEXP (operands[1], 1);
2203    }
2204
2205  code = GET_CODE (operands[1]);
2206  x = operands[2];
2207  y = operands[3];
2208
2209  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2210     more applications).  The exception to this is "reg != 0" which can
2211     be done in one instruction on v9 (so we do it).  */
2212  if (code == EQ)
2213    {
2214      if (GET_MODE (x) == SImode)
2215        {
2216          rtx pat = gen_seqsi_special (operands[0], x, y);
2217          emit_insn (pat);
2218          return true;
2219        }
2220      else if (GET_MODE (x) == DImode)
2221        {
2222          rtx pat = gen_seqdi_special (operands[0], x, y);
2223          emit_insn (pat);
2224          return true;
2225        }
2226    }
2227
2228  if (code == NE)
2229    {
2230      if (GET_MODE (x) == SImode)
2231        {
2232          rtx pat = gen_snesi_special (operands[0], x, y);
2233          emit_insn (pat);
2234          return true;
2235        }
2236      else if (GET_MODE (x) == DImode)
2237        {
2238          rtx pat = gen_snedi_special (operands[0], x, y);
2239          emit_insn (pat);
2240          return true;
2241        }
2242    }
2243
2244  /* For the rest, on v9 we can use conditional moves.  */
2245
2246  if (TARGET_V9)
2247    {
2248      if (gen_v9_scc (operands[0], code, x, y))
2249        return true;
2250    }
2251
2252  /* We can do LTU and GEU using the addx/subx instructions too.  And
2253     for GTU/LEU, if both operands are registers swap them and fall
2254     back to the easy case.  */
2255  if (code == GTU || code == LEU)
2256    {
2257      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2258          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2259        {
2260          tem = x;
2261          x = y;
2262          y = tem;
2263          code = swap_condition (code);
2264        }
2265    }
2266
2267  if (code == LTU || code == GEU)
2268    {
2269      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2270			      gen_rtx_fmt_ee (code, SImode,
2271					      gen_compare_reg_1 (code, x, y),
2272					      const0_rtx)));
2273      return true;
2274    }
2275
2276  /* Nope, do branches.  */
2277  return false;
2278}
2279
2280/* Emit a conditional jump insn for the v9 architecture using comparison code
2281   CODE and jump target LABEL.
2282   This function exists to take advantage of the v9 brxx insns.  */
2283
2284static void
2285emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2286{
2287  emit_jump_insn (gen_rtx_SET (VOIDmode,
2288			   pc_rtx,
2289			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2290				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2291						    op0, const0_rtx),
2292				    gen_rtx_LABEL_REF (VOIDmode, label),
2293				    pc_rtx)));
2294}
2295
2296void
2297emit_conditional_branch_insn (rtx operands[])
2298{
2299  /* The quad-word fp compare library routines all return nonzero to indicate
2300     true, which is different from the equivalent libgcc routines, so we must
2301     handle them specially here.  */
2302  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2303    {
2304      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2305					      GET_CODE (operands[0]));
2306      operands[1] = XEXP (operands[0], 0);
2307      operands[2] = XEXP (operands[0], 1);
2308    }
2309
2310  if (TARGET_ARCH64 && operands[2] == const0_rtx
2311      && GET_CODE (operands[1]) == REG
2312      && GET_MODE (operands[1]) == DImode)
2313    {
2314      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2315      return;
2316    }
2317
2318  operands[1] = gen_compare_reg (operands[0]);
2319  operands[2] = const0_rtx;
2320  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2321				operands[1], operands[2]);
2322  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2323				  operands[3]));
2324}
2325
2326
2327/* Generate a DFmode part of a hard TFmode register.
2328   REG is the TFmode hard register, LOW is 1 for the
2329   low 64bit of the register and 0 otherwise.
2330 */
2331rtx
2332gen_df_reg (rtx reg, int low)
2333{
2334  int regno = REGNO (reg);
2335
2336  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2337    regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2338  return gen_rtx_REG (DFmode, regno);
2339}
2340
2341/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
2342   Unlike normal calls, TFmode operands are passed by reference.  It is
2343   assumed that no more than 3 operands are required.  */
2344
2345static void
2346emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2347{
2348  rtx ret_slot = NULL, arg[3], func_sym;
2349  int i;
2350
2351  /* We only expect to be called for conversions, unary, and binary ops.  */
2352  gcc_assert (nargs == 2 || nargs == 3);
2353
2354  for (i = 0; i < nargs; ++i)
2355    {
2356      rtx this_arg = operands[i];
2357      rtx this_slot;
2358
2359      /* TFmode arguments and return values are passed by reference.  */
2360      if (GET_MODE (this_arg) == TFmode)
2361	{
2362	  int force_stack_temp;
2363
2364	  force_stack_temp = 0;
2365	  if (TARGET_BUGGY_QP_LIB && i == 0)
2366	    force_stack_temp = 1;
2367
2368	  if (GET_CODE (this_arg) == MEM
2369	      && ! force_stack_temp)
2370	    this_arg = XEXP (this_arg, 0);
2371	  else if (CONSTANT_P (this_arg)
2372		   && ! force_stack_temp)
2373	    {
2374	      this_slot = force_const_mem (TFmode, this_arg);
2375	      this_arg = XEXP (this_slot, 0);
2376	    }
2377	  else
2378	    {
2379	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2380
2381	      /* Operand 0 is the return value.  We'll copy it out later.  */
2382	      if (i > 0)
2383		emit_move_insn (this_slot, this_arg);
2384	      else
2385		ret_slot = this_slot;
2386
2387	      this_arg = XEXP (this_slot, 0);
2388	    }
2389	}
2390
2391      arg[i] = this_arg;
2392    }
2393
2394  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2395
2396  if (GET_MODE (operands[0]) == TFmode)
2397    {
2398      if (nargs == 2)
2399	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2400			   arg[0], GET_MODE (arg[0]),
2401			   arg[1], GET_MODE (arg[1]));
2402      else
2403	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2404			   arg[0], GET_MODE (arg[0]),
2405			   arg[1], GET_MODE (arg[1]),
2406			   arg[2], GET_MODE (arg[2]));
2407
2408      if (ret_slot)
2409	emit_move_insn (operands[0], ret_slot);
2410    }
2411  else
2412    {
2413      rtx ret;
2414
2415      gcc_assert (nargs == 2);
2416
2417      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2418				     GET_MODE (operands[0]), 1,
2419				     arg[1], GET_MODE (arg[1]));
2420
2421      if (ret != operands[0])
2422	emit_move_insn (operands[0], ret);
2423    }
2424}
2425
2426/* Expand soft-float TFmode calls to sparc abi routines.  */
2427
2428static void
2429emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2430{
2431  const char *func;
2432
2433  switch (code)
2434    {
2435    case PLUS:
2436      func = "_Qp_add";
2437      break;
2438    case MINUS:
2439      func = "_Qp_sub";
2440      break;
2441    case MULT:
2442      func = "_Qp_mul";
2443      break;
2444    case DIV:
2445      func = "_Qp_div";
2446      break;
2447    default:
2448      gcc_unreachable ();
2449    }
2450
2451  emit_soft_tfmode_libcall (func, 3, operands);
2452}
2453
2454static void
2455emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2456{
2457  const char *func;
2458
2459  gcc_assert (code == SQRT);
2460  func = "_Qp_sqrt";
2461
2462  emit_soft_tfmode_libcall (func, 2, operands);
2463}
2464
2465static void
2466emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2467{
2468  const char *func;
2469
2470  switch (code)
2471    {
2472    case FLOAT_EXTEND:
2473      switch (GET_MODE (operands[1]))
2474	{
2475	case SFmode:
2476	  func = "_Qp_stoq";
2477	  break;
2478	case DFmode:
2479	  func = "_Qp_dtoq";
2480	  break;
2481	default:
2482	  gcc_unreachable ();
2483	}
2484      break;
2485
2486    case FLOAT_TRUNCATE:
2487      switch (GET_MODE (operands[0]))
2488	{
2489	case SFmode:
2490	  func = "_Qp_qtos";
2491	  break;
2492	case DFmode:
2493	  func = "_Qp_qtod";
2494	  break;
2495	default:
2496	  gcc_unreachable ();
2497	}
2498      break;
2499
2500    case FLOAT:
2501      switch (GET_MODE (operands[1]))
2502	{
2503	case SImode:
2504	  func = "_Qp_itoq";
2505	  if (TARGET_ARCH64)
2506	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2507	  break;
2508	case DImode:
2509	  func = "_Qp_xtoq";
2510	  break;
2511	default:
2512	  gcc_unreachable ();
2513	}
2514      break;
2515
2516    case UNSIGNED_FLOAT:
2517      switch (GET_MODE (operands[1]))
2518	{
2519	case SImode:
2520	  func = "_Qp_uitoq";
2521	  if (TARGET_ARCH64)
2522	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2523	  break;
2524	case DImode:
2525	  func = "_Qp_uxtoq";
2526	  break;
2527	default:
2528	  gcc_unreachable ();
2529	}
2530      break;
2531
2532    case FIX:
2533      switch (GET_MODE (operands[0]))
2534	{
2535	case SImode:
2536	  func = "_Qp_qtoi";
2537	  break;
2538	case DImode:
2539	  func = "_Qp_qtox";
2540	  break;
2541	default:
2542	  gcc_unreachable ();
2543	}
2544      break;
2545
2546    case UNSIGNED_FIX:
2547      switch (GET_MODE (operands[0]))
2548	{
2549	case SImode:
2550	  func = "_Qp_qtoui";
2551	  break;
2552	case DImode:
2553	  func = "_Qp_qtoux";
2554	  break;
2555	default:
2556	  gcc_unreachable ();
2557	}
2558      break;
2559
2560    default:
2561      gcc_unreachable ();
2562    }
2563
2564  emit_soft_tfmode_libcall (func, 2, operands);
2565}
2566
2567/* Expand a hard-float tfmode operation.  All arguments must be in
2568   registers.  */
2569
2570static void
2571emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2572{
2573  rtx op, dest;
2574
2575  if (GET_RTX_CLASS (code) == RTX_UNARY)
2576    {
2577      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2578      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2579    }
2580  else
2581    {
2582      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2583      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2584      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2585			   operands[1], operands[2]);
2586    }
2587
2588  if (register_operand (operands[0], VOIDmode))
2589    dest = operands[0];
2590  else
2591    dest = gen_reg_rtx (GET_MODE (operands[0]));
2592
2593  emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2594
2595  if (dest != operands[0])
2596    emit_move_insn (operands[0], dest);
2597}
2598
2599void
2600emit_tfmode_binop (enum rtx_code code, rtx *operands)
2601{
2602  if (TARGET_HARD_QUAD)
2603    emit_hard_tfmode_operation (code, operands);
2604  else
2605    emit_soft_tfmode_binop (code, operands);
2606}
2607
2608void
2609emit_tfmode_unop (enum rtx_code code, rtx *operands)
2610{
2611  if (TARGET_HARD_QUAD)
2612    emit_hard_tfmode_operation (code, operands);
2613  else
2614    emit_soft_tfmode_unop (code, operands);
2615}
2616
2617void
2618emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2619{
2620  if (TARGET_HARD_QUAD)
2621    emit_hard_tfmode_operation (code, operands);
2622  else
2623    emit_soft_tfmode_cvt (code, operands);
2624}
2625
2626/* Return nonzero if a branch/jump/call instruction will be emitting
2627   nop into its delay slot.  */
2628
2629int
2630empty_delay_slot (rtx insn)
2631{
2632  rtx seq;
2633
2634  /* If no previous instruction (should not happen), return true.  */
2635  if (PREV_INSN (insn) == NULL)
2636    return 1;
2637
2638  seq = NEXT_INSN (PREV_INSN (insn));
2639  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2640    return 0;
2641
2642  return 1;
2643}
2644
2645/* Return nonzero if TRIAL can go into the call delay slot.  */
2646
2647int
2648tls_call_delay (rtx trial)
2649{
2650  rtx pat;
2651
2652  /* Binutils allows
2653       call __tls_get_addr, %tgd_call (foo)
2654        add %l7, %o0, %o0, %tgd_add (foo)
2655     while Sun as/ld does not.  */
2656  if (TARGET_GNU_TLS || !TARGET_TLS)
2657    return 1;
2658
2659  pat = PATTERN (trial);
2660
2661  /* We must reject tgd_add{32|64}, i.e.
2662       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2663     and tldm_add{32|64}, i.e.
2664       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2665     for Sun as/ld.  */
2666  if (GET_CODE (pat) == SET
2667      && GET_CODE (SET_SRC (pat)) == PLUS)
2668    {
2669      rtx unspec = XEXP (SET_SRC (pat), 1);
2670
2671      if (GET_CODE (unspec) == UNSPEC
2672	  && (XINT (unspec, 1) == UNSPEC_TLSGD
2673	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
2674	return 0;
2675    }
2676
2677  return 1;
2678}
2679
2680/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2681   instruction.  RETURN_P is true if the v9 variant 'return' is to be
2682   considered in the test too.
2683
2684   TRIAL must be a SET whose destination is a REG appropriate for the
2685   'restore' instruction or, if RETURN_P is true, for the 'return'
2686   instruction.  */
2687
2688static int
2689eligible_for_restore_insn (rtx trial, bool return_p)
2690{
2691  rtx pat = PATTERN (trial);
2692  rtx src = SET_SRC (pat);
2693
2694  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
2695  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2696      && arith_operand (src, GET_MODE (src)))
2697    {
2698      if (TARGET_ARCH64)
2699        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2700      else
2701        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2702    }
2703
2704  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
2705  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2706	   && arith_double_operand (src, GET_MODE (src)))
2707    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2708
2709  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
2710  else if (! TARGET_FPU && register_operand (src, SFmode))
2711    return 1;
2712
2713  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
2714  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2715    return 1;
2716
2717  /* If we have the 'return' instruction, anything that does not use
2718     local or output registers and can go into a delay slot wins.  */
2719  else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2720	   && (get_attr_in_uncond_branch_delay (trial)
2721	       == IN_UNCOND_BRANCH_DELAY_TRUE))
2722    return 1;
2723
2724  /* The 'restore src1,src2,dest' pattern for SImode.  */
2725  else if (GET_CODE (src) == PLUS
2726	   && register_operand (XEXP (src, 0), SImode)
2727	   && arith_operand (XEXP (src, 1), SImode))
2728    return 1;
2729
2730  /* The 'restore src1,src2,dest' pattern for DImode.  */
2731  else if (GET_CODE (src) == PLUS
2732	   && register_operand (XEXP (src, 0), DImode)
2733	   && arith_double_operand (XEXP (src, 1), DImode))
2734    return 1;
2735
2736  /* The 'restore src1,%lo(src2),dest' pattern.  */
2737  else if (GET_CODE (src) == LO_SUM
2738	   && ! TARGET_CM_MEDMID
2739	   && ((register_operand (XEXP (src, 0), SImode)
2740	        && immediate_operand (XEXP (src, 1), SImode))
2741	       || (TARGET_ARCH64
2742		   && register_operand (XEXP (src, 0), DImode)
2743		   && immediate_operand (XEXP (src, 1), DImode))))
2744    return 1;
2745
2746  /* The 'restore src,src,dest' pattern.  */
2747  else if (GET_CODE (src) == ASHIFT
2748	   && (register_operand (XEXP (src, 0), SImode)
2749	       || register_operand (XEXP (src, 0), DImode))
2750	   && XEXP (src, 1) == const1_rtx)
2751    return 1;
2752
2753  return 0;
2754}
2755
2756/* Return nonzero if TRIAL can go into the function return's
2757   delay slot.  */
2758
2759int
2760eligible_for_return_delay (rtx trial)
2761{
2762  rtx pat;
2763
2764  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2765    return 0;
2766
2767  if (get_attr_length (trial) != 1)
2768    return 0;
2769
2770  /* If there are any call-saved registers, we should scan TRIAL if it
2771     does not reference them.  For now just make it easy.  */
2772  if (num_gfregs)
2773    return 0;
2774
2775  /* If the function uses __builtin_eh_return, the eh_return machinery
2776     occupies the delay slot.  */
2777  if (crtl->calls_eh_return)
2778    return 0;
2779
2780  /* In the case of a true leaf function, anything can go into the slot.  */
2781  if (sparc_leaf_function_p)
2782    return get_attr_in_uncond_branch_delay (trial)
2783	   == IN_UNCOND_BRANCH_DELAY_TRUE;
2784
2785  pat = PATTERN (trial);
2786
2787  /* Otherwise, only operations which can be done in tandem with
2788     a `restore' or `return' insn can go into the delay slot.  */
2789  if (GET_CODE (SET_DEST (pat)) != REG
2790      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2791    return 0;
2792
2793  /* If this instruction sets up floating point register and we have a return
2794     instruction, it can probably go in.  But restore will not work
2795     with FP_REGS.  */
2796  if (REGNO (SET_DEST (pat)) >= 32)
2797    return (TARGET_V9
2798	    && ! epilogue_renumber (&pat, 1)
2799	    && (get_attr_in_uncond_branch_delay (trial)
2800		== IN_UNCOND_BRANCH_DELAY_TRUE));
2801
2802  return eligible_for_restore_insn (trial, true);
2803}
2804
2805/* Return nonzero if TRIAL can go into the sibling call's
2806   delay slot.  */
2807
2808int
2809eligible_for_sibcall_delay (rtx trial)
2810{
2811  rtx pat;
2812
2813  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2814    return 0;
2815
2816  if (get_attr_length (trial) != 1)
2817    return 0;
2818
2819  pat = PATTERN (trial);
2820
2821  if (sparc_leaf_function_p)
2822    {
2823      /* If the tail call is done using the call instruction,
2824	 we have to restore %o7 in the delay slot.  */
2825      if (LEAF_SIBCALL_SLOT_RESERVED_P)
2826	return 0;
2827
2828      /* %g1 is used to build the function address */
2829      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2830	return 0;
2831
2832      return 1;
2833    }
2834
2835  /* Otherwise, only operations which can be done in tandem with
2836     a `restore' insn can go into the delay slot.  */
2837  if (GET_CODE (SET_DEST (pat)) != REG
2838      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2839      || REGNO (SET_DEST (pat)) >= 32)
2840    return 0;
2841
2842  /* If it mentions %o7, it can't go in, because sibcall will clobber it
2843     in most cases.  */
2844  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2845    return 0;
2846
2847  return eligible_for_restore_insn (trial, false);
2848}
2849
2850int
2851short_branch (int uid1, int uid2)
2852{
2853  int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2854
2855  /* Leave a few words of "slop".  */
2856  if (delta >= -1023 && delta <= 1022)
2857    return 1;
2858
2859  return 0;
2860}
2861
2862/* Return nonzero if REG is not used after INSN.
2863   We assume REG is a reload reg, and therefore does
2864   not live past labels or calls or jumps.  */
2865int
2866reg_unused_after (rtx reg, rtx insn)
2867{
2868  enum rtx_code code, prev_code = UNKNOWN;
2869
2870  while ((insn = NEXT_INSN (insn)))
2871    {
2872      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2873	return 1;
2874
2875      code = GET_CODE (insn);
2876      if (GET_CODE (insn) == CODE_LABEL)
2877	return 1;
2878
2879      if (INSN_P (insn))
2880	{
2881	  rtx set = single_set (insn);
2882	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2883	  if (set && in_src)
2884	    return 0;
2885	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2886	    return 1;
2887	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2888	    return 0;
2889	}
2890      prev_code = code;
2891    }
2892  return 1;
2893}
2894
2895/* Determine if it's legal to put X into the constant pool.  This
2896   is not possible if X contains the address of a symbol that is
2897   not constant (TLS) or not known at final link time (PIC).  */
2898
2899static bool
2900sparc_cannot_force_const_mem (rtx x)
2901{
2902  switch (GET_CODE (x))
2903    {
2904    case CONST_INT:
2905    case CONST_DOUBLE:
2906    case CONST_VECTOR:
2907      /* Accept all non-symbolic constants.  */
2908      return false;
2909
2910    case LABEL_REF:
2911      /* Labels are OK iff we are non-PIC.  */
2912      return flag_pic != 0;
2913
2914    case SYMBOL_REF:
2915      /* 'Naked' TLS symbol references are never OK,
2916	 non-TLS symbols are OK iff we are non-PIC.  */
2917      if (SYMBOL_REF_TLS_MODEL (x))
2918	return true;
2919      else
2920	return flag_pic != 0;
2921
2922    case CONST:
2923      return sparc_cannot_force_const_mem (XEXP (x, 0));
2924    case PLUS:
2925    case MINUS:
2926      return sparc_cannot_force_const_mem (XEXP (x, 0))
2927         || sparc_cannot_force_const_mem (XEXP (x, 1));
2928    case UNSPEC:
2929      return true;
2930    default:
2931      gcc_unreachable ();
2932    }
2933}
2934
2935/* Global Offset Table support.  */
2936static GTY(()) rtx got_helper_rtx = NULL_RTX;
2937static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
2938
2939/* Return the SYMBOL_REF for the Global Offset Table.  */
2940
2941static GTY(()) rtx sparc_got_symbol = NULL_RTX;
2942
2943static rtx
2944sparc_got (void)
2945{
2946  if (!sparc_got_symbol)
2947    sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2948
2949  return sparc_got_symbol;
2950}
2951
2952/* Ensure that we are not using patterns that are not OK with PIC.  */
2953
2954int
2955check_pic (int i)
2956{
2957  rtx op;
2958
2959  switch (flag_pic)
2960    {
2961    case 1:
2962      op = recog_data.operand[i];
2963      gcc_assert (GET_CODE (op) != SYMBOL_REF
2964	  	  && (GET_CODE (op) != CONST
2965		      || (GET_CODE (XEXP (op, 0)) == MINUS
2966			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
2967			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
2968    case 2:
2969    default:
2970      return 1;
2971    }
2972}
2973
2974/* Return true if X is an address which needs a temporary register when
2975   reloaded while generating PIC code.  */
2976
2977int
2978pic_address_needs_scratch (rtx x)
2979{
2980  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2981  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2982      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2983      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2984      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2985    return 1;
2986
2987  return 0;
2988}
2989
2990/* Determine if a given RTX is a valid constant.  We already know this
2991   satisfies CONSTANT_P.  */
2992
2993bool
2994legitimate_constant_p (rtx x)
2995{
2996  switch (GET_CODE (x))
2997    {
2998    case CONST:
2999    case SYMBOL_REF:
3000      if (sparc_tls_referenced_p (x))
3001	return false;
3002      break;
3003
3004    case CONST_DOUBLE:
3005      if (GET_MODE (x) == VOIDmode)
3006        return true;
3007
3008      /* Floating point constants are generally not ok.
3009	 The only exception is 0.0 in VIS.  */
3010      if (TARGET_VIS
3011	  && SCALAR_FLOAT_MODE_P (GET_MODE (x))
3012	  && const_zero_operand (x, GET_MODE (x)))
3013	return true;
3014
3015      return false;
3016
3017    case CONST_VECTOR:
3018      /* Vector constants are generally not ok.
3019	 The only exception is 0 in VIS.  */
3020      if (TARGET_VIS
3021	  && const_zero_operand (x, GET_MODE (x)))
3022	return true;
3023
3024      return false;
3025
3026    default:
3027      break;
3028    }
3029
3030  return true;
3031}
3032
3033/* Determine if a given RTX is a valid constant address.  */
3034
3035bool
3036constant_address_p (rtx x)
3037{
3038  switch (GET_CODE (x))
3039    {
3040    case LABEL_REF:
3041    case CONST_INT:
3042    case HIGH:
3043      return true;
3044
3045    case CONST:
3046      if (flag_pic && pic_address_needs_scratch (x))
3047	return false;
3048      return legitimate_constant_p (x);
3049
3050    case SYMBOL_REF:
3051      return !flag_pic && legitimate_constant_p (x);
3052
3053    default:
3054      return false;
3055    }
3056}
3057
3058/* Nonzero if the constant value X is a legitimate general operand
3059   when generating PIC code.  It is given that flag_pic is on and
3060   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3061
3062bool
3063legitimate_pic_operand_p (rtx x)
3064{
3065  if (pic_address_needs_scratch (x))
3066    return false;
3067  if (sparc_tls_referenced_p (x))
3068    return false;
3069  return true;
3070}
3071
3072/* Return nonzero if ADDR is a valid memory address.
3073   STRICT specifies whether strict register checking applies.  */
3074
3075static bool
3076sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3077{
3078  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3079
3080  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3081    rs1 = addr;
3082  else if (GET_CODE (addr) == PLUS)
3083    {
3084      rs1 = XEXP (addr, 0);
3085      rs2 = XEXP (addr, 1);
3086
3087      /* Canonicalize.  REG comes first, if there are no regs,
3088	 LO_SUM comes first.  */
3089      if (!REG_P (rs1)
3090	  && GET_CODE (rs1) != SUBREG
3091	  && (REG_P (rs2)
3092	      || GET_CODE (rs2) == SUBREG
3093	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3094	{
3095	  rs1 = XEXP (addr, 1);
3096	  rs2 = XEXP (addr, 0);
3097	}
3098
3099      if ((flag_pic == 1
3100	   && rs1 == pic_offset_table_rtx
3101	   && !REG_P (rs2)
3102	   && GET_CODE (rs2) != SUBREG
3103	   && GET_CODE (rs2) != LO_SUM
3104	   && GET_CODE (rs2) != MEM
3105	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3106	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3107	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3108	  || ((REG_P (rs1)
3109	       || GET_CODE (rs1) == SUBREG)
3110	      && RTX_OK_FOR_OFFSET_P (rs2)))
3111	{
3112	  imm1 = rs2;
3113	  rs2 = NULL;
3114	}
3115      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3116	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3117	{
3118	  /* We prohibit REG + REG for TFmode when there are no quad move insns
3119	     and we consequently need to split.  We do this because REG+REG
3120	     is not an offsettable address.  If we get the situation in reload
3121	     where source and destination of a movtf pattern are both MEMs with
3122	     REG+REG address, then only one of them gets converted to an
3123	     offsettable address.  */
3124	  if (mode == TFmode
3125	      && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3126	    return 0;
3127
3128	  /* We prohibit REG + REG on ARCH32 if not optimizing for
3129	     DFmode/DImode because then mem_min_alignment is likely to be zero
3130	     after reload and the  forced split would lack a matching splitter
3131	     pattern.  */
3132	  if (TARGET_ARCH32 && !optimize
3133	      && (mode == DFmode || mode == DImode))
3134	    return 0;
3135	}
3136      else if (USE_AS_OFFSETABLE_LO10
3137	       && GET_CODE (rs1) == LO_SUM
3138	       && TARGET_ARCH64
3139	       && ! TARGET_CM_MEDMID
3140	       && RTX_OK_FOR_OLO10_P (rs2))
3141	{
3142	  rs2 = NULL;
3143	  imm1 = XEXP (rs1, 1);
3144	  rs1 = XEXP (rs1, 0);
3145	  if (!CONSTANT_P (imm1)
3146	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3147	    return 0;
3148	}
3149    }
3150  else if (GET_CODE (addr) == LO_SUM)
3151    {
3152      rs1 = XEXP (addr, 0);
3153      imm1 = XEXP (addr, 1);
3154
3155      if (!CONSTANT_P (imm1)
3156	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3157	return 0;
3158
3159      /* We can't allow TFmode in 32-bit mode, because an offset greater
3160	 than the alignment (8) may cause the LO_SUM to overflow.  */
3161      if (mode == TFmode && TARGET_ARCH32)
3162	return 0;
3163    }
3164  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3165    return 1;
3166  else
3167    return 0;
3168
3169  if (GET_CODE (rs1) == SUBREG)
3170    rs1 = SUBREG_REG (rs1);
3171  if (!REG_P (rs1))
3172    return 0;
3173
3174  if (rs2)
3175    {
3176      if (GET_CODE (rs2) == SUBREG)
3177	rs2 = SUBREG_REG (rs2);
3178      if (!REG_P (rs2))
3179	return 0;
3180    }
3181
3182  if (strict)
3183    {
3184      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3185	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3186	return 0;
3187    }
3188  else
3189    {
3190      if ((REGNO (rs1) >= 32
3191	   && REGNO (rs1) != FRAME_POINTER_REGNUM
3192	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3193	  || (rs2
3194	      && (REGNO (rs2) >= 32
3195		  && REGNO (rs2) != FRAME_POINTER_REGNUM
3196		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3197	return 0;
3198    }
3199  return 1;
3200}
3201
3202/* Return the SYMBOL_REF for the tls_get_addr function.  */
3203
3204static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3205
3206static rtx
3207sparc_tls_get_addr (void)
3208{
3209  if (!sparc_tls_symbol)
3210    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3211
3212  return sparc_tls_symbol;
3213}
3214
3215/* Return the Global Offset Table to be used in TLS mode.  */
3216
3217static rtx
3218sparc_tls_got (void)
3219{
3220  /* In PIC mode, this is just the PIC offset table.  */
3221  if (flag_pic)
3222    {
3223      crtl->uses_pic_offset_table = 1;
3224      return pic_offset_table_rtx;
3225    }
3226
3227  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3228     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
3229  if (TARGET_SUN_TLS && TARGET_ARCH32)
3230    {
3231      load_got_register ();
3232      return global_offset_table_rtx;
3233    }
3234
3235  /* In all other cases, we load a new pseudo with the GOT symbol.  */
3236  return copy_to_reg (sparc_got ());
3237}
3238
3239/* Return true if X contains a thread-local symbol.  */
3240
3241static bool
3242sparc_tls_referenced_p (rtx x)
3243{
3244  if (!TARGET_HAVE_TLS)
3245    return false;
3246
3247  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3248    x = XEXP (XEXP (x, 0), 0);
3249
3250  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3251    return true;
3252
3253  /* That's all we handle in legitimize_tls_address for now.  */
3254  return false;
3255}
3256
3257/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
3258   this (thread-local) address.  */
3259
3260static rtx
3261legitimize_tls_address (rtx addr)
3262{
3263  rtx temp1, temp2, temp3, ret, o0, got, insn;
3264
3265  gcc_assert (can_create_pseudo_p ());
3266
3267  if (GET_CODE (addr) == SYMBOL_REF)
3268    switch (SYMBOL_REF_TLS_MODEL (addr))
3269      {
3270      case TLS_MODEL_GLOBAL_DYNAMIC:
3271	start_sequence ();
3272	temp1 = gen_reg_rtx (SImode);
3273	temp2 = gen_reg_rtx (SImode);
3274	ret = gen_reg_rtx (Pmode);
3275	o0 = gen_rtx_REG (Pmode, 8);
3276	got = sparc_tls_got ();
3277	emit_insn (gen_tgd_hi22 (temp1, addr));
3278	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3279	if (TARGET_ARCH32)
3280	  {
3281	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3282	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3283						   addr, const1_rtx));
3284	  }
3285	else
3286	  {
3287	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3288	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3289						   addr, const1_rtx));
3290	  }
3291        CALL_INSN_FUNCTION_USAGE (insn)
3292	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3293			       CALL_INSN_FUNCTION_USAGE (insn));
3294	insn = get_insns ();
3295	end_sequence ();
3296	emit_libcall_block (insn, ret, o0, addr);
3297	break;
3298
3299      case TLS_MODEL_LOCAL_DYNAMIC:
3300	start_sequence ();
3301	temp1 = gen_reg_rtx (SImode);
3302	temp2 = gen_reg_rtx (SImode);
3303	temp3 = gen_reg_rtx (Pmode);
3304	ret = gen_reg_rtx (Pmode);
3305	o0 = gen_rtx_REG (Pmode, 8);
3306	got = sparc_tls_got ();
3307	emit_insn (gen_tldm_hi22 (temp1));
3308	emit_insn (gen_tldm_lo10 (temp2, temp1));
3309	if (TARGET_ARCH32)
3310	  {
3311	    emit_insn (gen_tldm_add32 (o0, got, temp2));
3312	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3313						    const1_rtx));
3314	  }
3315	else
3316	  {
3317	    emit_insn (gen_tldm_add64 (o0, got, temp2));
3318	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3319						    const1_rtx));
3320	  }
3321        CALL_INSN_FUNCTION_USAGE (insn)
3322	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3323			       CALL_INSN_FUNCTION_USAGE (insn));
3324	insn = get_insns ();
3325	end_sequence ();
3326	emit_libcall_block (insn, temp3, o0,
3327			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3328					    UNSPEC_TLSLD_BASE));
3329	temp1 = gen_reg_rtx (SImode);
3330	temp2 = gen_reg_rtx (SImode);
3331	emit_insn (gen_tldo_hix22 (temp1, addr));
3332	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3333	if (TARGET_ARCH32)
3334	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3335	else
3336	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3337	break;
3338
3339      case TLS_MODEL_INITIAL_EXEC:
3340	temp1 = gen_reg_rtx (SImode);
3341	temp2 = gen_reg_rtx (SImode);
3342	temp3 = gen_reg_rtx (Pmode);
3343	got = sparc_tls_got ();
3344	emit_insn (gen_tie_hi22 (temp1, addr));
3345	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3346	if (TARGET_ARCH32)
3347	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3348	else
3349	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3350        if (TARGET_SUN_TLS)
3351	  {
3352	    ret = gen_reg_rtx (Pmode);
3353	    if (TARGET_ARCH32)
3354	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3355					temp3, addr));
3356	    else
3357	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3358					temp3, addr));
3359	  }
3360	else
3361	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3362	break;
3363
3364      case TLS_MODEL_LOCAL_EXEC:
3365	temp1 = gen_reg_rtx (Pmode);
3366	temp2 = gen_reg_rtx (Pmode);
3367	if (TARGET_ARCH32)
3368	  {
3369	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3370	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3371	  }
3372	else
3373	  {
3374	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3375	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3376	  }
3377	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3378	break;
3379
3380      default:
3381	gcc_unreachable ();
3382      }
3383
3384  else if (GET_CODE (addr) == CONST)
3385    {
3386      rtx base, offset;
3387
3388      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3389
3390      base = legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3391      offset = XEXP (XEXP (addr, 0), 1);
3392
3393      base = force_operand (base, NULL_RTX);
3394      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3395	offset = force_reg (Pmode, offset);
3396      ret = gen_rtx_PLUS (Pmode, base, offset);
3397    }
3398
3399  else
3400    gcc_unreachable ();  /* for now ... */
3401
3402  return ret;
3403}
3404
3405/* Legitimize PIC addresses.  If the address is already position-independent,
3406   we return ORIG.  Newly generated position-independent addresses go into a
3407   reg.  This is REG if nonzero, otherwise we allocate register(s) as
3408   necessary.  */
3409
3410static rtx
3411legitimize_pic_address (rtx orig, rtx reg)
3412{
3413  bool gotdata_op = false;
3414
3415  if (GET_CODE (orig) == SYMBOL_REF
3416      /* See the comment in sparc_expand_move.  */
3417      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3418    {
3419      rtx pic_ref, address;
3420      rtx insn;
3421
3422      if (reg == 0)
3423	{
3424	  gcc_assert (! reload_in_progress && ! reload_completed);
3425	  reg = gen_reg_rtx (Pmode);
3426	}
3427
3428      if (flag_pic == 2)
3429	{
3430	  /* If not during reload, allocate another temp reg here for loading
3431	     in the address, so that these instructions can be optimized
3432	     properly.  */
3433	  rtx temp_reg = ((reload_in_progress || reload_completed)
3434			  ? reg : gen_reg_rtx (Pmode));
3435
3436	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3437	     won't get confused into thinking that these two instructions
3438	     are loading in the true address of the symbol.  If in the
3439	     future a PIC rtx exists, that should be used instead.  */
3440	  if (TARGET_ARCH64)
3441	    {
3442	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
3443	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3444	    }
3445	  else
3446	    {
3447	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
3448	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3449	    }
3450	  address = temp_reg;
3451	  gotdata_op = true;
3452	}
3453      else
3454	address = orig;
3455
3456      crtl->uses_pic_offset_table = 1;
3457      if (gotdata_op)
3458	{
3459	  if (TARGET_ARCH64)
3460	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg, pic_offset_table_rtx,
3461							address, orig));
3462	  else
3463	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg, pic_offset_table_rtx,
3464							address, orig));
3465	}
3466      else
3467	{
3468	  pic_ref
3469	    = gen_const_mem (Pmode,
3470			     gen_rtx_PLUS (Pmode,
3471					   pic_offset_table_rtx, address));
3472	  insn = emit_move_insn (reg, pic_ref);
3473	}
3474
3475      /* Put a REG_EQUAL note on this insn, so that it can be optimized
3476	 by loop.  */
3477      set_unique_reg_note (insn, REG_EQUAL, orig);
3478      return reg;
3479    }
3480  else if (GET_CODE (orig) == CONST)
3481    {
3482      rtx base, offset;
3483
3484      if (GET_CODE (XEXP (orig, 0)) == PLUS
3485	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3486	return orig;
3487
3488      if (reg == 0)
3489	{
3490	  gcc_assert (! reload_in_progress && ! reload_completed);
3491	  reg = gen_reg_rtx (Pmode);
3492	}
3493
3494      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3495      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3496      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3497			 	       base == reg ? NULL_RTX : reg);
3498
3499      if (GET_CODE (offset) == CONST_INT)
3500	{
3501	  if (SMALL_INT (offset))
3502	    return plus_constant (base, INTVAL (offset));
3503	  else if (! reload_in_progress && ! reload_completed)
3504	    offset = force_reg (Pmode, offset);
3505	  else
3506	    /* If we reach here, then something is seriously wrong.  */
3507	    gcc_unreachable ();
3508	}
3509      return gen_rtx_PLUS (Pmode, base, offset);
3510    }
3511  else if (GET_CODE (orig) == LABEL_REF)
3512    /* ??? We ought to be checking that the register is live instead, in case
3513       it is eliminated.  */
3514    crtl->uses_pic_offset_table = 1;
3515
3516  return orig;
3517}
3518
3519/* Try machine-dependent ways of modifying an illegitimate address X
3520   to be legitimate.  If we find one, return the new, valid address.
3521
3522   OLDX is the address as it was before break_out_memory_refs was called.
3523   In some cases it is useful to look at this to decide what needs to be done.
3524
3525   MODE is the mode of the operand pointed to by X.
3526
3527   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
3528
3529static rtx
3530sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3531			  enum machine_mode mode)
3532{
3533  rtx orig_x = x;
3534
3535  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3536    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3537		      force_operand (XEXP (x, 0), NULL_RTX));
3538  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3539    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3540		      force_operand (XEXP (x, 1), NULL_RTX));
3541  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3542    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3543		      XEXP (x, 1));
3544  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3545    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3546		      force_operand (XEXP (x, 1), NULL_RTX));
3547
3548  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3549    return x;
3550
3551  if (sparc_tls_referenced_p (x))
3552    x = legitimize_tls_address (x);
3553  else if (flag_pic)
3554    x = legitimize_pic_address (x, NULL_RTX);
3555  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3556    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3557		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
3558  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3559    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3560		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
3561  else if (GET_CODE (x) == SYMBOL_REF
3562	   || GET_CODE (x) == CONST
3563	   || GET_CODE (x) == LABEL_REF)
3564    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3565
3566  return x;
3567}
3568
3569#ifdef HAVE_GAS_HIDDEN
3570# define USE_HIDDEN_LINKONCE 1
3571#else
3572# define USE_HIDDEN_LINKONCE 0
3573#endif
3574
3575static void
3576get_pc_thunk_name (char name[32], unsigned int regno)
3577{
3578  const char *reg_name = reg_names[regno];
3579
3580  /* Skip the leading '%' as that cannot be used in a
3581     symbol name.  */
3582  reg_name += 1;
3583
3584  if (USE_HIDDEN_LINKONCE)
3585    sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
3586  else
3587    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
3588}
3589
3590/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
3591
3592static rtx
3593gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
3594{
3595  int orig_flag_pic = flag_pic;
3596  rtx insn;
3597
3598  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
3599  flag_pic = 0;
3600  if (TARGET_ARCH64)
3601    insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
3602  else
3603    insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
3604  flag_pic = orig_flag_pic;
3605
3606  return insn;
3607}
3608
3609/* Emit code to load the GOT register.  */
3610
3611static void
3612load_got_register (void)
3613{
3614  /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
3615  if (!global_offset_table_rtx)
3616    global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
3617
3618  if (TARGET_VXWORKS_RTP)
3619    emit_insn (gen_vxworks_load_got ());
3620  else
3621    {
3622      /* The GOT symbol is subject to a PC-relative relocation so we need a
3623	 helper function to add the PC value and thus get the final value.  */
3624      if (!got_helper_rtx)
3625	{
3626	  char name[32];
3627	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
3628	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3629	}
3630
3631      emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
3632				     got_helper_rtx,
3633				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
3634    }
3635
3636  /* Need to emit this whether or not we obey regdecls,
3637     since setjmp/longjmp can cause life info to screw up.
3638     ??? In the case where we don't obey regdecls, this is not sufficient
3639     since we may not fall out the bottom.  */
3640  emit_use (global_offset_table_rtx);
3641}
3642
3643/* Emit a call instruction with the pattern given by PAT.  ADDR is the
3644   address of the call target.  */
3645
3646void
3647sparc_emit_call_insn (rtx pat, rtx addr)
3648{
3649  rtx insn;
3650
3651  insn = emit_call_insn (pat);
3652
3653  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
3654  if (TARGET_VXWORKS_RTP
3655      && flag_pic
3656      && GET_CODE (addr) == SYMBOL_REF
3657      && (SYMBOL_REF_DECL (addr)
3658	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3659	  : !SYMBOL_REF_LOCAL_P (addr)))
3660    {
3661      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3662      crtl->uses_pic_offset_table = 1;
3663    }
3664}
3665
3666/* Return 1 if RTX is a MEM which is known to be aligned to at
3667   least a DESIRED byte boundary.  */
3668
3669int
3670mem_min_alignment (rtx mem, int desired)
3671{
3672  rtx addr, base, offset;
3673
3674  /* If it's not a MEM we can't accept it.  */
3675  if (GET_CODE (mem) != MEM)
3676    return 0;
3677
3678  /* Obviously...  */
3679  if (!TARGET_UNALIGNED_DOUBLES
3680      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3681    return 1;
3682
3683  /* ??? The rest of the function predates MEM_ALIGN so
3684     there is probably a bit of redundancy.  */
3685  addr = XEXP (mem, 0);
3686  base = offset = NULL_RTX;
3687  if (GET_CODE (addr) == PLUS)
3688    {
3689      if (GET_CODE (XEXP (addr, 0)) == REG)
3690	{
3691	  base = XEXP (addr, 0);
3692
3693	  /* What we are saying here is that if the base
3694	     REG is aligned properly, the compiler will make
3695	     sure any REG based index upon it will be so
3696	     as well.  */
3697	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3698	    offset = XEXP (addr, 1);
3699	  else
3700	    offset = const0_rtx;
3701	}
3702    }
3703  else if (GET_CODE (addr) == REG)
3704    {
3705      base = addr;
3706      offset = const0_rtx;
3707    }
3708
3709  if (base != NULL_RTX)
3710    {
3711      int regno = REGNO (base);
3712
3713      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3714	{
3715	  /* Check if the compiler has recorded some information
3716	     about the alignment of the base REG.  If reload has
3717	     completed, we already matched with proper alignments.
3718	     If not running global_alloc, reload might give us
3719	     unaligned pointer to local stack though.  */
3720	  if (((cfun != 0
3721		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3722	       || (optimize && reload_completed))
3723	      && (INTVAL (offset) & (desired - 1)) == 0)
3724	    return 1;
3725	}
3726      else
3727	{
3728	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3729	    return 1;
3730	}
3731    }
3732  else if (! TARGET_UNALIGNED_DOUBLES
3733	   || CONSTANT_P (addr)
3734	   || GET_CODE (addr) == LO_SUM)
3735    {
3736      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3737	 is true, in which case we can only assume that an access is aligned if
3738	 it is to a constant address, or the address involves a LO_SUM.  */
3739      return 1;
3740    }
3741
3742  /* An obviously unaligned address.  */
3743  return 0;
3744}
3745
3746
3747/* Vectors to keep interesting information about registers where it can easily
3748   be got.  We used to use the actual mode value as the bit number, but there
3749   are more than 32 modes now.  Instead we use two tables: one indexed by
3750   hard register number, and one indexed by mode.  */
3751
3752/* The purpose of sparc_mode_class is to shrink the range of modes so that
3753   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
3754   mapped into one sparc_mode_class mode.  */
3755
3756enum sparc_mode_class {
3757  S_MODE, D_MODE, T_MODE, O_MODE,
3758  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3759  CC_MODE, CCFP_MODE
3760};
3761
3762/* Modes for single-word and smaller quantities.  */
3763#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3764
3765/* Modes for double-word and smaller quantities.  */
3766#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3767
3768/* Modes for quad-word and smaller quantities.  */
3769#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3770
3771/* Modes for 8-word and smaller quantities.  */
3772#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3773
3774/* Modes for single-float quantities.  We must allow any single word or
3775   smaller quantity.  This is because the fix/float conversion instructions
3776   take integer inputs/outputs from the float registers.  */
3777#define SF_MODES (S_MODES)
3778
3779/* Modes for double-float and smaller quantities.  */
3780#define DF_MODES (D_MODES)
3781
3782/* Modes for quad-float and smaller quantities.  */
3783#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
3784
3785/* Modes for quad-float pairs and smaller quantities.  */
3786#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
3787
3788/* Modes for double-float only quantities.  */
3789#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3790
3791/* Modes for quad-float and double-float only quantities.  */
3792#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
3793
3794/* Modes for quad-float pairs and double-float only quantities.  */
3795#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
3796
3797/* Modes for condition codes.  */
3798#define CC_MODES (1 << (int) CC_MODE)
3799#define CCFP_MODES (1 << (int) CCFP_MODE)
3800
3801/* Value is 1 if register/mode pair is acceptable on sparc.
3802   The funny mixture of D and T modes is because integer operations
3803   do not specially operate on tetra quantities, so non-quad-aligned
3804   registers can hold quadword quantities (except %o4 and %i4 because
3805   they cross fixed registers).  */
3806
3807/* This points to either the 32 bit or the 64 bit version.  */
3808const int *hard_regno_mode_classes;
3809
3810static const int hard_32bit_mode_classes[] = {
3811  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3812  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3813  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3814  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3815
3816  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3817  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3818  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3819  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3820
3821  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3822     and none can hold SFmode/SImode values.  */
3823  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3824  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3825  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3826  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3827
3828  /* %fcc[0123] */
3829  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3830
3831  /* %icc */
3832  CC_MODES
3833};
3834
3835static const int hard_64bit_mode_classes[] = {
3836  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3837  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3838  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3839  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3840
3841  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3842  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3843  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3844  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3845
3846  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3847     and none can hold SFmode/SImode values.  */
3848  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3849  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3850  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3851  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3852
3853  /* %fcc[0123] */
3854  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3855
3856  /* %icc */
3857  CC_MODES
3858};
3859
3860int sparc_mode_class [NUM_MACHINE_MODES];
3861
3862enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3863
3864static void
3865sparc_init_modes (void)
3866{
3867  int i;
3868
3869  for (i = 0; i < NUM_MACHINE_MODES; i++)
3870    {
3871      switch (GET_MODE_CLASS (i))
3872	{
3873	case MODE_INT:
3874	case MODE_PARTIAL_INT:
3875	case MODE_COMPLEX_INT:
3876	  if (GET_MODE_SIZE (i) <= 4)
3877	    sparc_mode_class[i] = 1 << (int) S_MODE;
3878	  else if (GET_MODE_SIZE (i) == 8)
3879	    sparc_mode_class[i] = 1 << (int) D_MODE;
3880	  else if (GET_MODE_SIZE (i) == 16)
3881	    sparc_mode_class[i] = 1 << (int) T_MODE;
3882	  else if (GET_MODE_SIZE (i) == 32)
3883	    sparc_mode_class[i] = 1 << (int) O_MODE;
3884	  else
3885	    sparc_mode_class[i] = 0;
3886	  break;
3887	case MODE_VECTOR_INT:
3888	  if (GET_MODE_SIZE (i) <= 4)
3889	    sparc_mode_class[i] = 1 << (int)SF_MODE;
3890	  else if (GET_MODE_SIZE (i) == 8)
3891	    sparc_mode_class[i] = 1 << (int)DF_MODE;
3892	  break;
3893	case MODE_FLOAT:
3894	case MODE_COMPLEX_FLOAT:
3895	  if (GET_MODE_SIZE (i) <= 4)
3896	    sparc_mode_class[i] = 1 << (int) SF_MODE;
3897	  else if (GET_MODE_SIZE (i) == 8)
3898	    sparc_mode_class[i] = 1 << (int) DF_MODE;
3899	  else if (GET_MODE_SIZE (i) == 16)
3900	    sparc_mode_class[i] = 1 << (int) TF_MODE;
3901	  else if (GET_MODE_SIZE (i) == 32)
3902	    sparc_mode_class[i] = 1 << (int) OF_MODE;
3903	  else
3904	    sparc_mode_class[i] = 0;
3905	  break;
3906	case MODE_CC:
3907	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
3908	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3909	  else
3910	    sparc_mode_class[i] = 1 << (int) CC_MODE;
3911	  break;
3912	default:
3913	  sparc_mode_class[i] = 0;
3914	  break;
3915	}
3916    }
3917
3918  if (TARGET_ARCH64)
3919    hard_regno_mode_classes = hard_64bit_mode_classes;
3920  else
3921    hard_regno_mode_classes = hard_32bit_mode_classes;
3922
3923  /* Initialize the array used by REGNO_REG_CLASS.  */
3924  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3925    {
3926      if (i < 16 && TARGET_V8PLUS)
3927	sparc_regno_reg_class[i] = I64_REGS;
3928      else if (i < 32 || i == FRAME_POINTER_REGNUM)
3929	sparc_regno_reg_class[i] = GENERAL_REGS;
3930      else if (i < 64)
3931	sparc_regno_reg_class[i] = FP_REGS;
3932      else if (i < 96)
3933	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3934      else if (i < 100)
3935	sparc_regno_reg_class[i] = FPCC_REGS;
3936      else
3937	sparc_regno_reg_class[i] = NO_REGS;
3938    }
3939}
3940
3941/* Compute the frame size required by the function.  This function is called
3942   during the reload pass and also by sparc_expand_prologue.  */
3943
3944HOST_WIDE_INT
3945sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3946{
3947  int outgoing_args_size = (crtl->outgoing_args_size
3948			    + REG_PARM_STACK_SPACE (current_function_decl));
3949  int n_regs = 0;  /* N_REGS is the number of 4-byte regs saved thus far.  */
3950  int i;
3951
3952  if (TARGET_ARCH64)
3953    {
3954      for (i = 0; i < 8; i++)
3955	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3956	  n_regs += 2;
3957    }
3958  else
3959    {
3960      for (i = 0; i < 8; i += 2)
3961	if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3962	    || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3963	  n_regs += 2;
3964    }
3965
3966  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3967    if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3968	|| (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3969      n_regs += 2;
3970
3971  /* Set up values for use in prologue and epilogue.  */
3972  num_gfregs = n_regs;
3973
3974  if (leaf_function_p
3975      && n_regs == 0
3976      && size == 0
3977      && crtl->outgoing_args_size == 0)
3978    actual_fsize = apparent_fsize = 0;
3979  else
3980    {
3981      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
3982      apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3983      apparent_fsize += n_regs * 4;
3984      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3985    }
3986
3987  /* Make sure nothing can clobber our register windows.
3988     If a SAVE must be done, or there is a stack-local variable,
3989     the register window area must be allocated.  */
3990  if (! leaf_function_p || size > 0)
3991    actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
3992
3993  return SPARC_STACK_ALIGN (actual_fsize);
3994}
3995
3996/* Output any necessary .register pseudo-ops.  */
3997
3998void
3999sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4000{
4001#ifdef HAVE_AS_REGISTER_PSEUDO_OP
4002  int i;
4003
4004  if (TARGET_ARCH32)
4005    return;
4006
4007  /* Check if %g[2367] were used without
4008     .register being printed for them already.  */
4009  for (i = 2; i < 8; i++)
4010    {
4011      if (df_regs_ever_live_p (i)
4012	  && ! sparc_hard_reg_printed [i])
4013	{
4014	  sparc_hard_reg_printed [i] = 1;
4015	  /* %g7 is used as TLS base register, use #ignore
4016	     for it instead of #scratch.  */
4017	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4018		   i == 7 ? "ignore" : "scratch");
4019	}
4020      if (i == 3) i = 5;
4021    }
4022#endif
4023}
4024
4025/* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
4026   as needed.  LOW should be double-word aligned for 32-bit registers.
4027   Return the new OFFSET.  */
4028
4029#define SORR_SAVE    0
4030#define SORR_RESTORE 1
4031
4032static int
4033save_or_restore_regs (int low, int high, rtx base, int offset, int action)
4034{
4035  rtx mem, insn;
4036  int i;
4037
4038  if (TARGET_ARCH64 && high <= 32)
4039    {
4040      for (i = low; i < high; i++)
4041	{
4042	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4043	    {
4044	      mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4045	      set_mem_alias_set (mem, sparc_sr_alias_set);
4046	      if (action == SORR_SAVE)
4047		{
4048		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4049		  RTX_FRAME_RELATED_P (insn) = 1;
4050		}
4051	      else  /* action == SORR_RESTORE */
4052		emit_move_insn (gen_rtx_REG (DImode, i), mem);
4053	      offset += 8;
4054	    }
4055	}
4056    }
4057  else
4058    {
4059      for (i = low; i < high; i += 2)
4060	{
4061	  bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4062	  bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4063	  enum machine_mode mode;
4064	  int regno;
4065
4066	  if (reg0 && reg1)
4067	    {
4068	      mode = i < 32 ? DImode : DFmode;
4069	      regno = i;
4070	    }
4071	  else if (reg0)
4072	    {
4073	      mode = i < 32 ? SImode : SFmode;
4074	      regno = i;
4075	    }
4076	  else if (reg1)
4077	    {
4078	      mode = i < 32 ? SImode : SFmode;
4079	      regno = i + 1;
4080	      offset += 4;
4081	    }
4082	  else
4083	    continue;
4084
4085	  mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4086	  set_mem_alias_set (mem, sparc_sr_alias_set);
4087	  if (action == SORR_SAVE)
4088	    {
4089	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4090	      RTX_FRAME_RELATED_P (insn) = 1;
4091	    }
4092	  else  /* action == SORR_RESTORE */
4093	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
4094
4095	  /* Always preserve double-word alignment.  */
4096	  offset = (offset + 7) & -8;
4097	}
4098    }
4099
4100  return offset;
4101}
4102
4103/* Emit code to save call-saved registers.  */
4104
4105static void
4106emit_save_or_restore_regs (int action)
4107{
4108  HOST_WIDE_INT offset;
4109  rtx base;
4110
4111  offset = frame_base_offset - apparent_fsize;
4112
4113  if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4114    {
4115      /* ??? This might be optimized a little as %g1 might already have a
4116	 value close enough that a single add insn will do.  */
4117      /* ??? Although, all of this is probably only a temporary fix
4118	 because if %g1 can hold a function result, then
4119	 sparc_expand_epilogue will lose (the result will be
4120	 clobbered).  */
4121      base = gen_rtx_REG (Pmode, 1);
4122      emit_move_insn (base, GEN_INT (offset));
4123      emit_insn (gen_rtx_SET (VOIDmode,
4124			      base,
4125			      gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4126      offset = 0;
4127    }
4128  else
4129    base = frame_base_reg;
4130
4131  offset = save_or_restore_regs (0, 8, base, offset, action);
4132  save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4133}
4134
4135/* Generate a save_register_window insn.  */
4136
4137static rtx
4138gen_save_register_window (rtx increment)
4139{
4140  if (TARGET_ARCH64)
4141    return gen_save_register_windowdi (increment);
4142  else
4143    return gen_save_register_windowsi (increment);
4144}
4145
4146/* Generate an increment for the stack pointer.  */
4147
4148static rtx
4149gen_stack_pointer_inc (rtx increment)
4150{
4151  return gen_rtx_SET (VOIDmode,
4152		      stack_pointer_rtx,
4153		      gen_rtx_PLUS (Pmode,
4154				    stack_pointer_rtx,
4155				    increment));
4156}
4157
4158/* Generate a decrement for the stack pointer.  */
4159
4160static rtx
4161gen_stack_pointer_dec (rtx decrement)
4162{
4163  return gen_rtx_SET (VOIDmode,
4164		      stack_pointer_rtx,
4165		      gen_rtx_MINUS (Pmode,
4166				     stack_pointer_rtx,
4167				     decrement));
4168}
4169
4170/* Expand the function prologue.  The prologue is responsible for reserving
4171   storage for the frame, saving the call-saved registers and loading the
4172   GOT register if needed.  */
4173
4174void
4175sparc_expand_prologue (void)
4176{
4177  rtx insn;
4178  int i;
4179
4180  /* Compute a snapshot of current_function_uses_only_leaf_regs.  Relying
4181     on the final value of the flag means deferring the prologue/epilogue
4182     expansion until just before the second scheduling pass, which is too
4183     late to emit multiple epilogues or return insns.
4184
4185     Of course we are making the assumption that the value of the flag
4186     will not change between now and its final value.  Of the three parts
4187     of the formula, only the last one can reasonably vary.  Let's take a
4188     closer look, after assuming that the first two ones are set to true
4189     (otherwise the last value is effectively silenced).
4190
4191     If only_leaf_regs_used returns false, the global predicate will also
4192     be false so the actual frame size calculated below will be positive.
4193     As a consequence, the save_register_window insn will be emitted in
4194     the instruction stream; now this insn explicitly references %fp
4195     which is not a leaf register so only_leaf_regs_used will always
4196     return false subsequently.
4197
4198     If only_leaf_regs_used returns true, we hope that the subsequent
4199     optimization passes won't cause non-leaf registers to pop up.  For
4200     example, the regrename pass has special provisions to not rename to
4201     non-leaf registers in a leaf function.  */
4202  sparc_leaf_function_p
4203    = optimize > 0 && leaf_function_p () && only_leaf_regs_used ();
4204
4205  /* Need to use actual_fsize, since we are also allocating
4206     space for our callee (and our own register save area).  */
4207  actual_fsize
4208    = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4209
4210  /* Advertise that the data calculated just above are now valid.  */
4211  sparc_prologue_data_valid_p = true;
4212
4213  if (sparc_leaf_function_p)
4214    {
4215      frame_base_reg = stack_pointer_rtx;
4216      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4217    }
4218  else
4219    {
4220      frame_base_reg = hard_frame_pointer_rtx;
4221      frame_base_offset = SPARC_STACK_BIAS;
4222    }
4223
4224  if (actual_fsize == 0)
4225    /* do nothing.  */ ;
4226  else if (sparc_leaf_function_p)
4227    {
4228      if (actual_fsize <= 4096)
4229	insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4230      else if (actual_fsize <= 8192)
4231	{
4232	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4233	  /* %sp is still the CFA register.  */
4234	  RTX_FRAME_RELATED_P (insn) = 1;
4235	  insn
4236	    = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4237	}
4238      else
4239	{
4240	  rtx reg = gen_rtx_REG (Pmode, 1);
4241	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4242	  insn = emit_insn (gen_stack_pointer_inc (reg));
4243	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4244			gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4245	}
4246
4247      RTX_FRAME_RELATED_P (insn) = 1;
4248    }
4249  else
4250    {
4251      if (actual_fsize <= 4096)
4252	insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4253      else if (actual_fsize <= 8192)
4254	{
4255	  insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4256	  /* %sp is not the CFA register anymore.  */
4257	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4258	}
4259      else
4260	{
4261	  rtx reg = gen_rtx_REG (Pmode, 1);
4262	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4263	  insn = emit_insn (gen_save_register_window (reg));
4264	}
4265
4266      RTX_FRAME_RELATED_P (insn) = 1;
4267      for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4268        RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4269    }
4270
4271  if (num_gfregs)
4272    emit_save_or_restore_regs (SORR_SAVE);
4273
4274  /* Load the GOT register if needed.  */
4275  if (crtl->uses_pic_offset_table)
4276    load_got_register ();
4277}
4278
4279/* This function generates the assembly code for function entry, which boils
4280   down to emitting the necessary .register directives.  */
4281
4282static void
4283sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4284{
4285  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
4286  gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4287
4288  sparc_output_scratch_registers (file);
4289}
4290
4291/* Expand the function epilogue, either normal or part of a sibcall.
4292   We emit all the instructions except the return or the call.  */
4293
4294void
4295sparc_expand_epilogue (void)
4296{
4297  if (num_gfregs)
4298    emit_save_or_restore_regs (SORR_RESTORE);
4299
4300  if (actual_fsize == 0)
4301    /* do nothing.  */ ;
4302  else if (sparc_leaf_function_p)
4303    {
4304      if (actual_fsize <= 4096)
4305	emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4306      else if (actual_fsize <= 8192)
4307	{
4308	  emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4309	  emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4310	}
4311      else
4312	{
4313	  rtx reg = gen_rtx_REG (Pmode, 1);
4314	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4315	  emit_insn (gen_stack_pointer_dec (reg));
4316	}
4317    }
4318}
4319
4320/* Return true if it is appropriate to emit `return' instructions in the
4321   body of a function.  */
4322
4323bool
4324sparc_can_use_return_insn_p (void)
4325{
4326  return sparc_prologue_data_valid_p
4327	 && (actual_fsize == 0 || !sparc_leaf_function_p);
4328}
4329
4330/* This function generates the assembly code for function exit.  */
4331
4332static void
4333sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4334{
4335  /* If code does not drop into the epilogue, we have to still output
4336     a dummy nop for the sake of sane backtraces.  Otherwise, if the
4337     last two instructions of a function were "call foo; dslot;" this
4338     can make the return PC of foo (i.e. address of call instruction
4339     plus 8) point to the first instruction in the next function.  */
4340
4341  rtx insn, last_real_insn;
4342
4343  insn = get_last_insn ();
4344
4345  last_real_insn = prev_real_insn (insn);
4346  if (last_real_insn
4347      && GET_CODE (last_real_insn) == INSN
4348      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4349    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4350
4351  if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN)
4352    fputs("\tnop\n", file);
4353
4354  sparc_output_deferred_case_vectors ();
4355}
4356
4357/* Output a 'restore' instruction.  */
4358
4359static void
4360output_restore (rtx pat)
4361{
4362  rtx operands[3];
4363
4364  if (! pat)
4365    {
4366      fputs ("\t restore\n", asm_out_file);
4367      return;
4368    }
4369
4370  gcc_assert (GET_CODE (pat) == SET);
4371
4372  operands[0] = SET_DEST (pat);
4373  pat = SET_SRC (pat);
4374
4375  switch (GET_CODE (pat))
4376    {
4377      case PLUS:
4378	operands[1] = XEXP (pat, 0);
4379	operands[2] = XEXP (pat, 1);
4380	output_asm_insn (" restore %r1, %2, %Y0", operands);
4381	break;
4382      case LO_SUM:
4383	operands[1] = XEXP (pat, 0);
4384	operands[2] = XEXP (pat, 1);
4385	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4386	break;
4387      case ASHIFT:
4388	operands[1] = XEXP (pat, 0);
4389	gcc_assert (XEXP (pat, 1) == const1_rtx);
4390	output_asm_insn (" restore %r1, %r1, %Y0", operands);
4391	break;
4392      default:
4393	operands[1] = pat;
4394	output_asm_insn (" restore %%g0, %1, %Y0", operands);
4395	break;
4396    }
4397}
4398
4399/* Output a return.  */
4400
4401const char *
4402output_return (rtx insn)
4403{
4404  if (sparc_leaf_function_p)
4405    {
4406      /* This is a leaf function so we don't have to bother restoring the
4407	 register window, which frees us from dealing with the convoluted
4408	 semantics of restore/return.  We simply output the jump to the
4409	 return address and the insn in the delay slot (if any).  */
4410
4411      gcc_assert (! crtl->calls_eh_return);
4412
4413      return "jmp\t%%o7+%)%#";
4414    }
4415  else
4416    {
4417      /* This is a regular function so we have to restore the register window.
4418	 We may have a pending insn for the delay slot, which will be either
4419	 combined with the 'restore' instruction or put in the delay slot of
4420	 the 'return' instruction.  */
4421
4422      if (crtl->calls_eh_return)
4423	{
4424	  /* If the function uses __builtin_eh_return, the eh_return
4425	     machinery occupies the delay slot.  */
4426	  gcc_assert (! final_sequence);
4427
4428	  if (! flag_delayed_branch)
4429	    fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file);
4430
4431	  if (TARGET_V9)
4432	    fputs ("\treturn\t%i7+8\n", asm_out_file);
4433	  else
4434	    fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4435
4436	  if (flag_delayed_branch)
4437	    fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4438	  else
4439	    fputs ("\t nop\n", asm_out_file);
4440	}
4441      else if (final_sequence)
4442	{
4443	  rtx delay, pat;
4444
4445	  delay = NEXT_INSN (insn);
4446	  gcc_assert (delay);
4447
4448	  pat = PATTERN (delay);
4449
4450	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4451	    {
4452	      epilogue_renumber (&pat, 0);
4453	      return "return\t%%i7+%)%#";
4454	    }
4455	  else
4456	    {
4457	      output_asm_insn ("jmp\t%%i7+%)", NULL);
4458	      output_restore (pat);
4459	      PATTERN (delay) = gen_blockage ();
4460	      INSN_CODE (delay) = -1;
4461	    }
4462	}
4463      else
4464        {
4465	  /* The delay slot is empty.  */
4466	  if (TARGET_V9)
4467	    return "return\t%%i7+%)\n\t nop";
4468	  else if (flag_delayed_branch)
4469	    return "jmp\t%%i7+%)\n\t restore";
4470	  else
4471	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
4472	}
4473    }
4474
4475  return "";
4476}
4477
4478/* Output a sibling call.  */
4479
4480const char *
4481output_sibcall (rtx insn, rtx call_operand)
4482{
4483  rtx operands[1];
4484
4485  gcc_assert (flag_delayed_branch);
4486
4487  operands[0] = call_operand;
4488
4489  if (sparc_leaf_function_p)
4490    {
4491      /* This is a leaf function so we don't have to bother restoring the
4492	 register window.  We simply output the jump to the function and
4493	 the insn in the delay slot (if any).  */
4494
4495      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4496
4497      if (final_sequence)
4498	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4499			 operands);
4500      else
4501	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4502	   it into branch if possible.  */
4503	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4504			 operands);
4505    }
4506  else
4507    {
4508      /* This is a regular function so we have to restore the register window.
4509	 We may have a pending insn for the delay slot, which will be combined
4510	 with the 'restore' instruction.  */
4511
4512      output_asm_insn ("call\t%a0, 0", operands);
4513
4514      if (final_sequence)
4515	{
4516	  rtx delay = NEXT_INSN (insn);
4517	  gcc_assert (delay);
4518
4519	  output_restore (PATTERN (delay));
4520
4521	  PATTERN (delay) = gen_blockage ();
4522	  INSN_CODE (delay) = -1;
4523	}
4524      else
4525	output_restore (NULL_RTX);
4526    }
4527
4528  return "";
4529}
4530
4531/* Functions for handling argument passing.
4532
4533   For 32-bit, the first 6 args are normally in registers and the rest are
4534   pushed.  Any arg that starts within the first 6 words is at least
4535   partially passed in a register unless its data type forbids.
4536
4537   For 64-bit, the argument registers are laid out as an array of 16 elements
4538   and arguments are added sequentially.  The first 6 int args and up to the
4539   first 16 fp args (depending on size) are passed in regs.
4540
4541   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
4542   ----    -----   --------   -----   ------------------   ------   -----------
4543    15   [SP+248]              %f31       %f30,%f31         %d30
4544    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
4545    13   [SP+232]              %f27       %f26,%f27         %d26
4546    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
4547    11   [SP+216]              %f23       %f22,%f23         %d22
4548    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
4549     9   [SP+200]              %f19       %f18,%f19         %d18
4550     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
4551     7   [SP+184]              %f15       %f14,%f15         %d14
4552     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
4553     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
4554     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
4555     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
4556     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
4557     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
4558     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
4559
4560   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4561
4562   Integral arguments are always passed as 64-bit quantities appropriately
4563   extended.
4564
4565   Passing of floating point values is handled as follows.
4566   If a prototype is in scope:
4567     If the value is in a named argument (i.e. not a stdarg function or a
4568     value not part of the `...') then the value is passed in the appropriate
4569     fp reg.
4570     If the value is part of the `...' and is passed in one of the first 6
4571     slots then the value is passed in the appropriate int reg.
4572     If the value is part of the `...' and is not passed in one of the first 6
4573     slots then the value is passed in memory.
4574   If a prototype is not in scope:
4575     If the value is one of the first 6 arguments the value is passed in the
4576     appropriate integer reg and the appropriate fp reg.
4577     If the value is not one of the first 6 arguments the value is passed in
4578     the appropriate fp reg and in memory.
4579
4580
4581   Summary of the calling conventions implemented by GCC on the SPARC:
4582
4583   32-bit ABI:
4584                                size      argument     return value
4585
4586      small integer              <4       int. reg.      int. reg.
4587      word                        4       int. reg.      int. reg.
4588      double word                 8       int. reg.      int. reg.
4589
4590      _Complex small integer     <8       int. reg.      int. reg.
4591      _Complex word               8       int. reg.      int. reg.
4592      _Complex double word       16        memory        int. reg.
4593
4594      vector integer            <=8       int. reg.       FP reg.
4595      vector integer             >8        memory         memory
4596
4597      float                       4       int. reg.       FP reg.
4598      double                      8       int. reg.       FP reg.
4599      long double                16        memory         memory
4600
4601      _Complex float              8        memory         FP reg.
4602      _Complex double            16        memory         FP reg.
4603      _Complex long double       32        memory         FP reg.
4604
4605      vector float              any        memory         memory
4606
4607      aggregate                 any        memory         memory
4608
4609
4610
4611    64-bit ABI:
4612                                size      argument     return value
4613
4614      small integer              <8       int. reg.      int. reg.
4615      word                        8       int. reg.      int. reg.
4616      double word                16       int. reg.      int. reg.
4617
4618      _Complex small integer    <16       int. reg.      int. reg.
4619      _Complex word              16       int. reg.      int. reg.
4620      _Complex double word       32        memory        int. reg.
4621
4622      vector integer           <=16        FP reg.        FP reg.
4623      vector integer       16<s<=32        memory         FP reg.
4624      vector integer            >32        memory         memory
4625
4626      float                       4        FP reg.        FP reg.
4627      double                      8        FP reg.        FP reg.
4628      long double                16        FP reg.        FP reg.
4629
4630      _Complex float              8        FP reg.        FP reg.
4631      _Complex double            16        FP reg.        FP reg.
4632      _Complex long double       32        memory         FP reg.
4633
4634      vector float             <=16        FP reg.        FP reg.
4635      vector float         16<s<=32        memory         FP reg.
4636      vector float              >32        memory         memory
4637
4638      aggregate                <=16         reg.           reg.
4639      aggregate            16<s<=32        memory          reg.
4640      aggregate                 >32        memory         memory
4641
4642
4643
4644Note #1: complex floating-point types follow the extended SPARC ABIs as
4645implemented by the Sun compiler.
4646
4647Note #2: integral vector types follow the scalar floating-point types
4648conventions to match what is implemented by the Sun VIS SDK.
4649
4650Note #3: floating-point vector types follow the aggregate types
4651conventions.  */
4652
4653
4654/* Maximum number of int regs for args.  */
4655#define SPARC_INT_ARG_MAX 6
4656/* Maximum number of fp regs for args.  */
4657#define SPARC_FP_ARG_MAX 16
4658
4659#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4660
4661/* Handle the INIT_CUMULATIVE_ARGS macro.
4662   Initialize a variable CUM of type CUMULATIVE_ARGS
4663   for a call to a function whose data type is FNTYPE.
4664   For a library call, FNTYPE is 0.  */
4665
4666void
4667init_cumulative_args (struct sparc_args *cum, tree fntype,
4668		      rtx libname ATTRIBUTE_UNUSED,
4669		      tree fndecl ATTRIBUTE_UNUSED)
4670{
4671  cum->words = 0;
4672  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4673  cum->libcall_p = fntype == 0;
4674}
4675
4676/* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4677   When a prototype says `char' or `short', really pass an `int'.  */
4678
4679static bool
4680sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
4681{
4682  return TARGET_ARCH32 ? true : false;
4683}
4684
4685/* Handle promotion of pointer and integer arguments.  */
4686
4687static enum machine_mode
4688sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
4689                             enum machine_mode mode,
4690                             int *punsignedp ATTRIBUTE_UNUSED,
4691                             const_tree fntype ATTRIBUTE_UNUSED,
4692                             int for_return ATTRIBUTE_UNUSED)
4693{
4694  if (POINTER_TYPE_P (type))
4695    {
4696      *punsignedp = POINTERS_EXTEND_UNSIGNED;
4697      return Pmode;
4698    }
4699
4700  /* For TARGET_ARCH64 we need this, as we don't have instructions
4701     for arithmetic operations which do zero/sign extension at the same time,
4702     so without this we end up with a srl/sra after every assignment to an
4703     user variable,  which means very very bad code.  */
4704  if (TARGET_ARCH64
4705      && GET_MODE_CLASS (mode) == MODE_INT
4706      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4707    return word_mode;
4708
4709  return mode;
4710}
4711
4712/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
4713
4714static bool
4715sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4716{
4717  return TARGET_ARCH64 ? true : false;
4718}
4719
4720/* Scan the record type TYPE and return the following predicates:
4721    - INTREGS_P: the record contains at least one field or sub-field
4722      that is eligible for promotion in integer registers.
4723    - FP_REGS_P: the record contains at least one field or sub-field
4724      that is eligible for promotion in floating-point registers.
4725    - PACKED_P: the record contains at least one field that is packed.
4726
4727   Sub-fields are not taken into account for the PACKED_P predicate.  */
4728
4729static void
4730scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4731{
4732  tree field;
4733
4734  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4735    {
4736      if (TREE_CODE (field) == FIELD_DECL)
4737	{
4738	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4739	    scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4740	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4741		   || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4742		  && TARGET_FPU)
4743	    *fpregs_p = 1;
4744	  else
4745	    *intregs_p = 1;
4746
4747	  if (packed_p && DECL_PACKED (field))
4748	    *packed_p = 1;
4749	}
4750    }
4751}
4752
4753/* Compute the slot number to pass an argument in.
4754   Return the slot number or -1 if passing on the stack.
4755
4756   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4757    the preceding args and about the function being called.
4758   MODE is the argument's machine mode.
4759   TYPE is the data type of the argument (as a tree).
4760    This is null for libcalls where that information may
4761    not be available.
4762   NAMED is nonzero if this argument is a named parameter
4763    (otherwise it is an extra parameter matching an ellipsis).
4764   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4765   *PREGNO records the register number to use if scalar type.
4766   *PPADDING records the amount of padding needed in words.  */
4767
4768static int
4769function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4770		     tree type, int named, int incoming_p,
4771		     int *pregno, int *ppadding)
4772{
4773  int regbase = (incoming_p
4774		 ? SPARC_INCOMING_INT_ARG_FIRST
4775		 : SPARC_OUTGOING_INT_ARG_FIRST);
4776  int slotno = cum->words;
4777  enum mode_class mclass;
4778  int regno;
4779
4780  *ppadding = 0;
4781
4782  if (type && TREE_ADDRESSABLE (type))
4783    return -1;
4784
4785  if (TARGET_ARCH32
4786      && mode == BLKmode
4787      && type
4788      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4789    return -1;
4790
4791  /* For SPARC64, objects requiring 16-byte alignment get it.  */
4792  if (TARGET_ARCH64
4793      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
4794      && (slotno & 1) != 0)
4795    slotno++, *ppadding = 1;
4796
4797  mclass = GET_MODE_CLASS (mode);
4798  if (type && TREE_CODE (type) == VECTOR_TYPE)
4799    {
4800      /* Vector types deserve special treatment because they are
4801	 polymorphic wrt their mode, depending upon whether VIS
4802	 instructions are enabled.  */
4803      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4804	{
4805	  /* The SPARC port defines no floating-point vector modes.  */
4806	  gcc_assert (mode == BLKmode);
4807	}
4808      else
4809	{
4810	  /* Integral vector types should either have a vector
4811	     mode or an integral mode, because we are guaranteed
4812	     by pass_by_reference that their size is not greater
4813	     than 16 bytes and TImode is 16-byte wide.  */
4814	  gcc_assert (mode != BLKmode);
4815
4816	  /* Vector integers are handled like floats according to
4817	     the Sun VIS SDK.  */
4818	  mclass = MODE_FLOAT;
4819	}
4820    }
4821
4822  switch (mclass)
4823    {
4824    case MODE_FLOAT:
4825    case MODE_COMPLEX_FLOAT:
4826    case MODE_VECTOR_INT:
4827      if (TARGET_ARCH64 && TARGET_FPU && named)
4828	{
4829	  if (slotno >= SPARC_FP_ARG_MAX)
4830	    return -1;
4831	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
4832	  /* Arguments filling only one single FP register are
4833	     right-justified in the outer double FP register.  */
4834	  if (GET_MODE_SIZE (mode) <= 4)
4835	    regno++;
4836	  break;
4837	}
4838      /* fallthrough */
4839
4840    case MODE_INT:
4841    case MODE_COMPLEX_INT:
4842      if (slotno >= SPARC_INT_ARG_MAX)
4843	return -1;
4844      regno = regbase + slotno;
4845      break;
4846
4847    case MODE_RANDOM:
4848      if (mode == VOIDmode)
4849	/* MODE is VOIDmode when generating the actual call.  */
4850	return -1;
4851
4852      gcc_assert (mode == BLKmode);
4853
4854      if (TARGET_ARCH32
4855	  || !type
4856	  || (TREE_CODE (type) != VECTOR_TYPE
4857	      && TREE_CODE (type) != RECORD_TYPE))
4858	{
4859	  if (slotno >= SPARC_INT_ARG_MAX)
4860	    return -1;
4861	  regno = regbase + slotno;
4862	}
4863      else  /* TARGET_ARCH64 && type */
4864	{
4865	  int intregs_p = 0, fpregs_p = 0, packed_p = 0;
4866
4867	  /* First see what kinds of registers we would need.  */
4868	  if (TREE_CODE (type) == VECTOR_TYPE)
4869	    fpregs_p = 1;
4870	  else
4871	    scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
4872
4873	  /* The ABI obviously doesn't specify how packed structures
4874	     are passed.  These are defined to be passed in int regs
4875	     if possible, otherwise memory.  */
4876	  if (packed_p || !named)
4877	    fpregs_p = 0, intregs_p = 1;
4878
4879	  /* If all arg slots are filled, then must pass on stack.  */
4880	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4881	    return -1;
4882
4883	  /* If there are only int args and all int arg slots are filled,
4884	     then must pass on stack.  */
4885	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4886	    return -1;
4887
4888	  /* Note that even if all int arg slots are filled, fp members may
4889	     still be passed in regs if such regs are available.
4890	     *PREGNO isn't set because there may be more than one, it's up
4891	     to the caller to compute them.  */
4892	  return slotno;
4893	}
4894      break;
4895
4896    default :
4897      gcc_unreachable ();
4898    }
4899
4900  *pregno = regno;
4901  return slotno;
4902}
4903
4904/* Handle recursive register counting for structure field layout.  */
4905
4906struct function_arg_record_value_parms
4907{
4908  rtx ret;		/* return expression being built.  */
4909  int slotno;		/* slot number of the argument.  */
4910  int named;		/* whether the argument is named.  */
4911  int regbase;		/* regno of the base register.  */
4912  int stack;		/* 1 if part of the argument is on the stack.  */
4913  int intoffset;	/* offset of the first pending integer field.  */
4914  unsigned int nregs;	/* number of words passed in registers.  */
4915};
4916
4917static void function_arg_record_value_3
4918 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
4919static void function_arg_record_value_2
4920 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4921static void function_arg_record_value_1
4922 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4923static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
4924static rtx function_arg_union_value (int, enum machine_mode, int, int);
4925
4926/* A subroutine of function_arg_record_value.  Traverse the structure
4927   recursively and determine how many registers will be required.  */
4928
4929static void
4930function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
4931			     struct function_arg_record_value_parms *parms,
4932			     bool packed_p)
4933{
4934  tree field;
4935
4936  /* We need to compute how many registers are needed so we can
4937     allocate the PARALLEL but before we can do that we need to know
4938     whether there are any packed fields.  The ABI obviously doesn't
4939     specify how structures are passed in this case, so they are
4940     defined to be passed in int regs if possible, otherwise memory,
4941     regardless of whether there are fp values present.  */
4942
4943  if (! packed_p)
4944    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4945      {
4946	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4947	  {
4948	    packed_p = true;
4949	    break;
4950	  }
4951      }
4952
4953  /* Compute how many registers we need.  */
4954  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4955    {
4956      if (TREE_CODE (field) == FIELD_DECL)
4957	{
4958	  HOST_WIDE_INT bitpos = startbitpos;
4959
4960	  if (DECL_SIZE (field) != 0)
4961	    {
4962	      if (integer_zerop (DECL_SIZE (field)))
4963		continue;
4964
4965	      if (host_integerp (bit_position (field), 1))
4966		bitpos += int_bit_position (field);
4967	    }
4968
4969	  /* ??? FIXME: else assume zero offset.  */
4970
4971	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4972	    function_arg_record_value_1 (TREE_TYPE (field),
4973	    				 bitpos,
4974					 parms,
4975					 packed_p);
4976	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4977		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4978		   && TARGET_FPU
4979		   && parms->named
4980		   && ! packed_p)
4981	    {
4982	      if (parms->intoffset != -1)
4983		{
4984		  unsigned int startbit, endbit;
4985		  int intslots, this_slotno;
4986
4987		  startbit = parms->intoffset & -BITS_PER_WORD;
4988		  endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4989
4990		  intslots = (endbit - startbit) / BITS_PER_WORD;
4991		  this_slotno = parms->slotno + parms->intoffset
4992		    / BITS_PER_WORD;
4993
4994		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
4995		    {
4996		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
4997		      /* We need to pass this field on the stack.  */
4998		      parms->stack = 1;
4999		    }
5000
5001		  parms->nregs += intslots;
5002		  parms->intoffset = -1;
5003		}
5004
5005	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
5006		 If it wasn't true we wouldn't be here.  */
5007	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5008		  && DECL_MODE (field) == BLKmode)
5009		parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5010	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5011		parms->nregs += 2;
5012	      else
5013		parms->nregs += 1;
5014	    }
5015	  else
5016	    {
5017	      if (parms->intoffset == -1)
5018		parms->intoffset = bitpos;
5019	    }
5020	}
5021    }
5022}
5023
5024/* A subroutine of function_arg_record_value.  Assign the bits of the
5025   structure between parms->intoffset and bitpos to integer registers.  */
5026
5027static void
5028function_arg_record_value_3 (HOST_WIDE_INT bitpos,
5029			     struct function_arg_record_value_parms *parms)
5030{
5031  enum machine_mode mode;
5032  unsigned int regno;
5033  unsigned int startbit, endbit;
5034  int this_slotno, intslots, intoffset;
5035  rtx reg;
5036
5037  if (parms->intoffset == -1)
5038    return;
5039
5040  intoffset = parms->intoffset;
5041  parms->intoffset = -1;
5042
5043  startbit = intoffset & -BITS_PER_WORD;
5044  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5045  intslots = (endbit - startbit) / BITS_PER_WORD;
5046  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
5047
5048  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
5049  if (intslots <= 0)
5050    return;
5051
5052  /* If this is the trailing part of a word, only load that much into
5053     the register.  Otherwise load the whole register.  Note that in
5054     the latter case we may pick up unwanted bits.  It's not a problem
5055     at the moment but may wish to revisit.  */
5056
5057  if (intoffset % BITS_PER_WORD != 0)
5058    mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
5059			  	   MODE_INT);
5060  else
5061    mode = word_mode;
5062
5063  intoffset /= BITS_PER_UNIT;
5064  do
5065    {
5066      regno = parms->regbase + this_slotno;
5067      reg = gen_rtx_REG (mode, regno);
5068      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5069	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5070
5071      this_slotno += 1;
5072      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5073      mode = word_mode;
5074      parms->nregs += 1;
5075      intslots -= 1;
5076    }
5077  while (intslots > 0);
5078}
5079
5080/* A subroutine of function_arg_record_value.  Traverse the structure
5081   recursively and assign bits to floating point registers.  Track which
5082   bits in between need integer registers; invoke function_arg_record_value_3
5083   to make that happen.  */
5084
5085static void
5086function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5087			     struct function_arg_record_value_parms *parms,
5088			     bool packed_p)
5089{
5090  tree field;
5091
5092  if (! packed_p)
5093    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5094      {
5095	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5096	  {
5097	    packed_p = true;
5098	    break;
5099	  }
5100      }
5101
5102  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5103    {
5104      if (TREE_CODE (field) == FIELD_DECL)
5105	{
5106	  HOST_WIDE_INT bitpos = startbitpos;
5107
5108	  if (DECL_SIZE (field) != 0)
5109	    {
5110	      if (integer_zerop (DECL_SIZE (field)))
5111		continue;
5112
5113	      if (host_integerp (bit_position (field), 1))
5114		bitpos += int_bit_position (field);
5115	    }
5116
5117	  /* ??? FIXME: else assume zero offset.  */
5118
5119	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5120	    function_arg_record_value_2 (TREE_TYPE (field),
5121	    				 bitpos,
5122					 parms,
5123					 packed_p);
5124	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5125		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5126		   && TARGET_FPU
5127		   && parms->named
5128		   && ! packed_p)
5129	    {
5130	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5131	      int regno, nregs, pos;
5132	      enum machine_mode mode = DECL_MODE (field);
5133	      rtx reg;
5134
5135	      function_arg_record_value_3 (bitpos, parms);
5136
5137	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5138		  && mode == BLKmode)
5139	        {
5140		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5141		  nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5142		}
5143	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5144	        {
5145		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5146		  nregs = 2;
5147		}
5148	      else
5149	        nregs = 1;
5150
5151	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5152	      if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5153		regno++;
5154	      reg = gen_rtx_REG (mode, regno);
5155	      pos = bitpos / BITS_PER_UNIT;
5156	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5157		= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5158	      parms->nregs += 1;
5159	      while (--nregs > 0)
5160		{
5161		  regno += GET_MODE_SIZE (mode) / 4;
5162	  	  reg = gen_rtx_REG (mode, regno);
5163		  pos += GET_MODE_SIZE (mode);
5164		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5165		    = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5166		  parms->nregs += 1;
5167		}
5168	    }
5169	  else
5170	    {
5171	      if (parms->intoffset == -1)
5172		parms->intoffset = bitpos;
5173	    }
5174	}
5175    }
5176}
5177
5178/* Used by function_arg and function_value to implement the complex
5179   conventions of the 64-bit ABI for passing and returning structures.
5180   Return an expression valid as a return value for the two macros
5181   FUNCTION_ARG and FUNCTION_VALUE.
5182
5183   TYPE is the data type of the argument (as a tree).
5184    This is null for libcalls where that information may
5185    not be available.
5186   MODE is the argument's machine mode.
5187   SLOTNO is the index number of the argument's slot in the parameter array.
5188   NAMED is nonzero if this argument is a named parameter
5189    (otherwise it is an extra parameter matching an ellipsis).
5190   REGBASE is the regno of the base register for the parameter array.  */
5191
5192static rtx
5193function_arg_record_value (const_tree type, enum machine_mode mode,
5194			   int slotno, int named, int regbase)
5195{
5196  HOST_WIDE_INT typesize = int_size_in_bytes (type);
5197  struct function_arg_record_value_parms parms;
5198  unsigned int nregs;
5199
5200  parms.ret = NULL_RTX;
5201  parms.slotno = slotno;
5202  parms.named = named;
5203  parms.regbase = regbase;
5204  parms.stack = 0;
5205
5206  /* Compute how many registers we need.  */
5207  parms.nregs = 0;
5208  parms.intoffset = 0;
5209  function_arg_record_value_1 (type, 0, &parms, false);
5210
5211  /* Take into account pending integer fields.  */
5212  if (parms.intoffset != -1)
5213    {
5214      unsigned int startbit, endbit;
5215      int intslots, this_slotno;
5216
5217      startbit = parms.intoffset & -BITS_PER_WORD;
5218      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5219      intslots = (endbit - startbit) / BITS_PER_WORD;
5220      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5221
5222      if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5223        {
5224	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5225	  /* We need to pass this field on the stack.  */
5226	  parms.stack = 1;
5227        }
5228
5229      parms.nregs += intslots;
5230    }
5231  nregs = parms.nregs;
5232
5233  /* Allocate the vector and handle some annoying special cases.  */
5234  if (nregs == 0)
5235    {
5236      /* ??? Empty structure has no value?  Duh?  */
5237      if (typesize <= 0)
5238	{
5239	  /* Though there's nothing really to store, return a word register
5240	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
5241	     leads to breakage due to the fact that there are zero bytes to
5242	     load.  */
5243	  return gen_rtx_REG (mode, regbase);
5244	}
5245      else
5246	{
5247	  /* ??? C++ has structures with no fields, and yet a size.  Give up
5248	     for now and pass everything back in integer registers.  */
5249	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5250	}
5251      if (nregs + slotno > SPARC_INT_ARG_MAX)
5252	nregs = SPARC_INT_ARG_MAX - slotno;
5253    }
5254  gcc_assert (nregs != 0);
5255
5256  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5257
5258  /* If at least one field must be passed on the stack, generate
5259     (parallel [(expr_list (nil) ...) ...]) so that all fields will
5260     also be passed on the stack.  We can't do much better because the
5261     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5262     of structures for which the fields passed exclusively in registers
5263     are not at the beginning of the structure.  */
5264  if (parms.stack)
5265    XVECEXP (parms.ret, 0, 0)
5266      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5267
5268  /* Fill in the entries.  */
5269  parms.nregs = 0;
5270  parms.intoffset = 0;
5271  function_arg_record_value_2 (type, 0, &parms, false);
5272  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5273
5274  gcc_assert (parms.nregs == nregs);
5275
5276  return parms.ret;
5277}
5278
5279/* Used by function_arg and function_value to implement the conventions
5280   of the 64-bit ABI for passing and returning unions.
5281   Return an expression valid as a return value for the two macros
5282   FUNCTION_ARG and FUNCTION_VALUE.
5283
5284   SIZE is the size in bytes of the union.
5285   MODE is the argument's machine mode.
5286   REGNO is the hard register the union will be passed in.  */
5287
5288static rtx
5289function_arg_union_value (int size, enum machine_mode mode, int slotno,
5290			  int regno)
5291{
5292  int nwords = ROUND_ADVANCE (size), i;
5293  rtx regs;
5294
5295  /* See comment in previous function for empty structures.  */
5296  if (nwords == 0)
5297    return gen_rtx_REG (mode, regno);
5298
5299  if (slotno == SPARC_INT_ARG_MAX - 1)
5300    nwords = 1;
5301
5302  regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5303
5304  for (i = 0; i < nwords; i++)
5305    {
5306      /* Unions are passed left-justified.  */
5307      XVECEXP (regs, 0, i)
5308	= gen_rtx_EXPR_LIST (VOIDmode,
5309			     gen_rtx_REG (word_mode, regno),
5310			     GEN_INT (UNITS_PER_WORD * i));
5311      regno++;
5312    }
5313
5314  return regs;
5315}
5316
5317/* Used by function_arg and function_value to implement the conventions
5318   for passing and returning large (BLKmode) vectors.
5319   Return an expression valid as a return value for the two macros
5320   FUNCTION_ARG and FUNCTION_VALUE.
5321
5322   SIZE is the size in bytes of the vector (at least 8 bytes).
5323   REGNO is the FP hard register the vector will be passed in.  */
5324
5325static rtx
5326function_arg_vector_value (int size, int regno)
5327{
5328  int i, nregs = size / 8;
5329  rtx regs;
5330
5331  regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5332
5333  for (i = 0; i < nregs; i++)
5334    {
5335      XVECEXP (regs, 0, i)
5336	= gen_rtx_EXPR_LIST (VOIDmode,
5337			     gen_rtx_REG (DImode, regno + 2*i),
5338			     GEN_INT (i*8));
5339    }
5340
5341  return regs;
5342}
5343
5344/* Handle the FUNCTION_ARG macro.
5345   Determine where to put an argument to a function.
5346   Value is zero to push the argument on the stack,
5347   or a hard register in which to store the argument.
5348
5349   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5350    the preceding args and about the function being called.
5351   MODE is the argument's machine mode.
5352   TYPE is the data type of the argument (as a tree).
5353    This is null for libcalls where that information may
5354    not be available.
5355   NAMED is nonzero if this argument is a named parameter
5356    (otherwise it is an extra parameter matching an ellipsis).
5357   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
5358
5359rtx
5360function_arg (const struct sparc_args *cum, enum machine_mode mode,
5361	      tree type, int named, int incoming_p)
5362{
5363  int regbase = (incoming_p
5364		 ? SPARC_INCOMING_INT_ARG_FIRST
5365		 : SPARC_OUTGOING_INT_ARG_FIRST);
5366  int slotno, regno, padding;
5367  enum mode_class mclass = GET_MODE_CLASS (mode);
5368
5369  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5370				&regno, &padding);
5371  if (slotno == -1)
5372    return 0;
5373
5374  /* Vector types deserve special treatment because they are polymorphic wrt
5375     their mode, depending upon whether VIS instructions are enabled.  */
5376  if (type && TREE_CODE (type) == VECTOR_TYPE)
5377    {
5378      HOST_WIDE_INT size = int_size_in_bytes (type);
5379      gcc_assert ((TARGET_ARCH32 && size <= 8)
5380		  || (TARGET_ARCH64 && size <= 16));
5381
5382      if (mode == BLKmode)
5383	return function_arg_vector_value (size,
5384					  SPARC_FP_ARG_FIRST + 2*slotno);
5385      else
5386	mclass = MODE_FLOAT;
5387    }
5388
5389  if (TARGET_ARCH32)
5390    return gen_rtx_REG (mode, regno);
5391
5392  /* Structures up to 16 bytes in size are passed in arg slots on the stack
5393     and are promoted to registers if possible.  */
5394  if (type && TREE_CODE (type) == RECORD_TYPE)
5395    {
5396      HOST_WIDE_INT size = int_size_in_bytes (type);
5397      gcc_assert (size <= 16);
5398
5399      return function_arg_record_value (type, mode, slotno, named, regbase);
5400    }
5401
5402  /* Unions up to 16 bytes in size are passed in integer registers.  */
5403  else if (type && TREE_CODE (type) == UNION_TYPE)
5404    {
5405      HOST_WIDE_INT size = int_size_in_bytes (type);
5406      gcc_assert (size <= 16);
5407
5408      return function_arg_union_value (size, mode, slotno, regno);
5409    }
5410
5411  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5412     but also have the slot allocated for them.
5413     If no prototype is in scope fp values in register slots get passed
5414     in two places, either fp regs and int regs or fp regs and memory.  */
5415  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5416	   && SPARC_FP_REG_P (regno))
5417    {
5418      rtx reg = gen_rtx_REG (mode, regno);
5419      if (cum->prototype_p || cum->libcall_p)
5420	{
5421	  /* "* 2" because fp reg numbers are recorded in 4 byte
5422	     quantities.  */
5423#if 0
5424	  /* ??? This will cause the value to be passed in the fp reg and
5425	     in the stack.  When a prototype exists we want to pass the
5426	     value in the reg but reserve space on the stack.  That's an
5427	     optimization, and is deferred [for a bit].  */
5428	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5429	    return gen_rtx_PARALLEL (mode,
5430			    gen_rtvec (2,
5431				       gen_rtx_EXPR_LIST (VOIDmode,
5432						NULL_RTX, const0_rtx),
5433				       gen_rtx_EXPR_LIST (VOIDmode,
5434						reg, const0_rtx)));
5435	  else
5436#else
5437	  /* ??? It seems that passing back a register even when past
5438	     the area declared by REG_PARM_STACK_SPACE will allocate
5439	     space appropriately, and will not copy the data onto the
5440	     stack, exactly as we desire.
5441
5442	     This is due to locate_and_pad_parm being called in
5443	     expand_call whenever reg_parm_stack_space > 0, which
5444	     while beneficial to our example here, would seem to be
5445	     in error from what had been intended.  Ho hum...  -- r~ */
5446#endif
5447	    return reg;
5448	}
5449      else
5450	{
5451	  rtx v0, v1;
5452
5453	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5454	    {
5455	      int intreg;
5456
5457	      /* On incoming, we don't need to know that the value
5458		 is passed in %f0 and %i0, and it confuses other parts
5459		 causing needless spillage even on the simplest cases.  */
5460	      if (incoming_p)
5461		return reg;
5462
5463	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5464			+ (regno - SPARC_FP_ARG_FIRST) / 2);
5465
5466	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5467	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5468				      const0_rtx);
5469	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5470	    }
5471	  else
5472	    {
5473	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5474	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5475	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5476	    }
5477	}
5478    }
5479
5480  /* All other aggregate types are passed in an integer register in a mode
5481     corresponding to the size of the type.  */
5482  else if (type && AGGREGATE_TYPE_P (type))
5483    {
5484      HOST_WIDE_INT size = int_size_in_bytes (type);
5485      gcc_assert (size <= 16);
5486
5487      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5488    }
5489
5490  return gen_rtx_REG (mode, regno);
5491}
5492
5493/* For an arg passed partly in registers and partly in memory,
5494   this is the number of bytes of registers used.
5495   For args passed entirely in registers or entirely in memory, zero.
5496
5497   Any arg that starts in the first 6 regs but won't entirely fit in them
5498   needs partial registers on v8.  On v9, structures with integer
5499   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5500   values that begin in the last fp reg [where "last fp reg" varies with the
5501   mode] will be split between that reg and memory.  */
5502
5503static int
5504sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5505			 tree type, bool named)
5506{
5507  int slotno, regno, padding;
5508
5509  /* We pass 0 for incoming_p here, it doesn't matter.  */
5510  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5511
5512  if (slotno == -1)
5513    return 0;
5514
5515  if (TARGET_ARCH32)
5516    {
5517      if ((slotno + (mode == BLKmode
5518		     ? ROUND_ADVANCE (int_size_in_bytes (type))
5519		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5520	  > SPARC_INT_ARG_MAX)
5521	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5522    }
5523  else
5524    {
5525      /* We are guaranteed by pass_by_reference that the size of the
5526	 argument is not greater than 16 bytes, so we only need to return
5527	 one word if the argument is partially passed in registers.  */
5528
5529      if (type && AGGREGATE_TYPE_P (type))
5530	{
5531	  int size = int_size_in_bytes (type);
5532
5533	  if (size > UNITS_PER_WORD
5534	      && slotno == SPARC_INT_ARG_MAX - 1)
5535	    return UNITS_PER_WORD;
5536	}
5537      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5538	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5539		   && ! (TARGET_FPU && named)))
5540	{
5541	  /* The complex types are passed as packed types.  */
5542	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5543	      && slotno == SPARC_INT_ARG_MAX - 1)
5544	    return UNITS_PER_WORD;
5545	}
5546      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5547	{
5548	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5549	      > SPARC_FP_ARG_MAX)
5550	    return UNITS_PER_WORD;
5551	}
5552    }
5553
5554  return 0;
5555}
5556
5557/* Handle the TARGET_PASS_BY_REFERENCE target hook.
5558   Specify whether to pass the argument by reference.  */
5559
5560static bool
5561sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5562			 enum machine_mode mode, const_tree type,
5563			 bool named ATTRIBUTE_UNUSED)
5564{
5565  if (TARGET_ARCH32)
5566    /* Original SPARC 32-bit ABI says that structures and unions,
5567       and quad-precision floats are passed by reference.  For Pascal,
5568       also pass arrays by reference.  All other base types are passed
5569       in registers.
5570
5571       Extended ABI (as implemented by the Sun compiler) says that all
5572       complex floats are passed by reference.  Pass complex integers
5573       in registers up to 8 bytes.  More generally, enforce the 2-word
5574       cap for passing arguments in registers.
5575
5576       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5577       integers are passed like floats of the same size, that is in
5578       registers up to 8 bytes.  Pass all vector floats by reference
5579       like structure and unions.  */
5580    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5581	    || mode == SCmode
5582	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
5583	    || GET_MODE_SIZE (mode) > 8
5584	    || (type
5585		&& TREE_CODE (type) == VECTOR_TYPE
5586		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5587  else
5588    /* Original SPARC 64-bit ABI says that structures and unions
5589       smaller than 16 bytes are passed in registers, as well as
5590       all other base types.
5591
5592       Extended ABI (as implemented by the Sun compiler) says that
5593       complex floats are passed in registers up to 16 bytes.  Pass
5594       all complex integers in registers up to 16 bytes.  More generally,
5595       enforce the 2-word cap for passing arguments in registers.
5596
5597       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5598       integers are passed like floats of the same size, that is in
5599       registers (up to 16 bytes).  Pass all vector floats like structure
5600       and unions.  */
5601    return ((type
5602	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5603	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5604	    /* Catch CTImode and TCmode.  */
5605	    || GET_MODE_SIZE (mode) > 16);
5606}
5607
5608/* Handle the FUNCTION_ARG_ADVANCE macro.
5609   Update the data in CUM to advance over an argument
5610   of mode MODE and data type TYPE.
5611   TYPE is null for libcalls where that information may not be available.  */
5612
5613void
5614function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5615		      tree type, int named)
5616{
5617  int regno, padding;
5618
5619  /* We pass 0 for incoming_p here, it doesn't matter.  */
5620  function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5621
5622  /* If argument requires leading padding, add it.  */
5623  cum->words += padding;
5624
5625  if (TARGET_ARCH32)
5626    {
5627      cum->words += (mode != BLKmode
5628		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5629		     : ROUND_ADVANCE (int_size_in_bytes (type)));
5630    }
5631  else
5632    {
5633      if (type && AGGREGATE_TYPE_P (type))
5634	{
5635	  int size = int_size_in_bytes (type);
5636
5637	  if (size <= 8)
5638	    ++cum->words;
5639	  else if (size <= 16)
5640	    cum->words += 2;
5641	  else /* passed by reference */
5642	    ++cum->words;
5643	}
5644      else
5645	{
5646	  cum->words += (mode != BLKmode
5647			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5648			 : ROUND_ADVANCE (int_size_in_bytes (type)));
5649	}
5650    }
5651}
5652
5653/* Handle the FUNCTION_ARG_PADDING macro.
5654   For the 64 bit ABI structs are always stored left shifted in their
5655   argument slot.  */
5656
5657enum direction
5658function_arg_padding (enum machine_mode mode, const_tree type)
5659{
5660  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5661    return upward;
5662
5663  /* Fall back to the default.  */
5664  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5665}
5666
5667/* Handle the TARGET_RETURN_IN_MEMORY target hook.
5668   Specify whether to return the return value in memory.  */
5669
5670static bool
5671sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5672{
5673  if (TARGET_ARCH32)
5674    /* Original SPARC 32-bit ABI says that structures and unions,
5675       and quad-precision floats are returned in memory.  All other
5676       base types are returned in registers.
5677
5678       Extended ABI (as implemented by the Sun compiler) says that
5679       all complex floats are returned in registers (8 FP registers
5680       at most for '_Complex long double').  Return all complex integers
5681       in registers (4 at most for '_Complex long long').
5682
5683       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5684       integers are returned like floats of the same size, that is in
5685       registers up to 8 bytes and in memory otherwise.  Return all
5686       vector floats in memory like structure and unions; note that
5687       they always have BLKmode like the latter.  */
5688    return (TYPE_MODE (type) == BLKmode
5689	    || TYPE_MODE (type) == TFmode
5690	    || (TREE_CODE (type) == VECTOR_TYPE
5691		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5692  else
5693    /* Original SPARC 64-bit ABI says that structures and unions
5694       smaller than 32 bytes are returned in registers, as well as
5695       all other base types.
5696
5697       Extended ABI (as implemented by the Sun compiler) says that all
5698       complex floats are returned in registers (8 FP registers at most
5699       for '_Complex long double').  Return all complex integers in
5700       registers (4 at most for '_Complex TItype').
5701
5702       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5703       integers are returned like floats of the same size, that is in
5704       registers.  Return all vector floats like structure and unions;
5705       note that they always have BLKmode like the latter.  */
5706    return ((TYPE_MODE (type) == BLKmode
5707	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5708}
5709
5710/* Handle the TARGET_STRUCT_VALUE target hook.
5711   Return where to find the structure return value address.  */
5712
5713static rtx
5714sparc_struct_value_rtx (tree fndecl, int incoming)
5715{
5716  if (TARGET_ARCH64)
5717    return 0;
5718  else
5719    {
5720      rtx mem;
5721
5722      if (incoming)
5723	mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5724						 STRUCT_VALUE_OFFSET));
5725      else
5726	mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5727						 STRUCT_VALUE_OFFSET));
5728
5729      /* Only follow the SPARC ABI for fixed-size structure returns.
5730         Variable size structure returns are handled per the normal
5731         procedures in GCC. This is enabled by -mstd-struct-return */
5732      if (incoming == 2
5733	  && sparc_std_struct_return
5734	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
5735	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
5736	{
5737	  /* We must check and adjust the return address, as it is
5738	     optional as to whether the return object is really
5739	     provided.  */
5740	  rtx ret_rtx = gen_rtx_REG (Pmode, 31);
5741	  rtx scratch = gen_reg_rtx (SImode);
5742	  rtx endlab = gen_label_rtx ();
5743
5744	  /* Calculate the return object size */
5745	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
5746	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
5747	  /* Construct a temporary return value */
5748	  rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
5749
5750	  /* Implement SPARC 32-bit psABI callee returns struck checking
5751	     requirements:
5752
5753	      Fetch the instruction where we will return to and see if
5754	     it's an unimp instruction (the most significant 10 bits
5755	     will be zero).  */
5756	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
5757						plus_constant (ret_rtx, 8)));
5758	  /* Assume the size is valid and pre-adjust */
5759	  emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5760	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab);
5761	  emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5762	  /* Assign stack temp:
5763	     Write the address of the memory pointed to by temp_val into
5764	     the memory pointed to by mem */
5765	  emit_move_insn (mem, XEXP (temp_val, 0));
5766	  emit_label (endlab);
5767	}
5768
5769      set_mem_alias_set (mem, struct_value_alias_set);
5770      return mem;
5771    }
5772}
5773
5774/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5775   For v9, function return values are subject to the same rules as arguments,
5776   except that up to 32 bytes may be returned in registers.  */
5777
5778rtx
5779function_value (const_tree type, enum machine_mode mode, int incoming_p)
5780{
5781  /* Beware that the two values are swapped here wrt function_arg.  */
5782  int regbase = (incoming_p
5783		 ? SPARC_OUTGOING_INT_ARG_FIRST
5784		 : SPARC_INCOMING_INT_ARG_FIRST);
5785  enum mode_class mclass = GET_MODE_CLASS (mode);
5786  int regno;
5787
5788  /* Vector types deserve special treatment because they are polymorphic wrt
5789     their mode, depending upon whether VIS instructions are enabled.  */
5790  if (type && TREE_CODE (type) == VECTOR_TYPE)
5791    {
5792      HOST_WIDE_INT size = int_size_in_bytes (type);
5793      gcc_assert ((TARGET_ARCH32 && size <= 8)
5794		  || (TARGET_ARCH64 && size <= 32));
5795
5796      if (mode == BLKmode)
5797	return function_arg_vector_value (size,
5798					  SPARC_FP_ARG_FIRST);
5799      else
5800	mclass = MODE_FLOAT;
5801    }
5802
5803  if (TARGET_ARCH64 && type)
5804    {
5805      /* Structures up to 32 bytes in size are returned in registers.  */
5806      if (TREE_CODE (type) == RECORD_TYPE)
5807	{
5808	  HOST_WIDE_INT size = int_size_in_bytes (type);
5809	  gcc_assert (size <= 32);
5810
5811	  return function_arg_record_value (type, mode, 0, 1, regbase);
5812	}
5813
5814      /* Unions up to 32 bytes in size are returned in integer registers.  */
5815      else if (TREE_CODE (type) == UNION_TYPE)
5816	{
5817	  HOST_WIDE_INT size = int_size_in_bytes (type);
5818	  gcc_assert (size <= 32);
5819
5820	  return function_arg_union_value (size, mode, 0, regbase);
5821	}
5822
5823      /* Objects that require it are returned in FP registers.  */
5824      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5825	;
5826
5827      /* All other aggregate types are returned in an integer register in a
5828	 mode corresponding to the size of the type.  */
5829      else if (AGGREGATE_TYPE_P (type))
5830	{
5831	  /* All other aggregate types are passed in an integer register
5832	     in a mode corresponding to the size of the type.  */
5833	  HOST_WIDE_INT size = int_size_in_bytes (type);
5834	  gcc_assert (size <= 32);
5835
5836	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5837
5838	  /* ??? We probably should have made the same ABI change in
5839	     3.4.0 as the one we made for unions.   The latter was
5840	     required by the SCD though, while the former is not
5841	     specified, so we favored compatibility and efficiency.
5842
5843	     Now we're stuck for aggregates larger than 16 bytes,
5844	     because OImode vanished in the meantime.  Let's not
5845	     try to be unduly clever, and simply follow the ABI
5846	     for unions in that case.  */
5847	  if (mode == BLKmode)
5848	    return function_arg_union_value (size, mode, 0, regbase);
5849	  else
5850	    mclass = MODE_INT;
5851	}
5852
5853      /* This must match sparc_promote_function_mode.
5854	 ??? Maybe 32-bit pointers should actually remain in Pmode?  */
5855      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5856	mode = word_mode;
5857    }
5858
5859  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
5860    regno = SPARC_FP_ARG_FIRST;
5861  else
5862    regno = regbase;
5863
5864  return gen_rtx_REG (mode, regno);
5865}
5866
5867/* Do what is necessary for `va_start'.  We look at the current function
5868   to determine if stdarg or varargs is used and return the address of
5869   the first unnamed parameter.  */
5870
5871static rtx
5872sparc_builtin_saveregs (void)
5873{
5874  int first_reg = crtl->args.info.words;
5875  rtx address;
5876  int regno;
5877
5878  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
5879    emit_move_insn (gen_rtx_MEM (word_mode,
5880				 gen_rtx_PLUS (Pmode,
5881					       frame_pointer_rtx,
5882					       GEN_INT (FIRST_PARM_OFFSET (0)
5883							+ (UNITS_PER_WORD
5884							   * regno)))),
5885		    gen_rtx_REG (word_mode,
5886				 SPARC_INCOMING_INT_ARG_FIRST + regno));
5887
5888  address = gen_rtx_PLUS (Pmode,
5889			  frame_pointer_rtx,
5890			  GEN_INT (FIRST_PARM_OFFSET (0)
5891				   + UNITS_PER_WORD * first_reg));
5892
5893  return address;
5894}
5895
5896/* Implement `va_start' for stdarg.  */
5897
5898static void
5899sparc_va_start (tree valist, rtx nextarg)
5900{
5901  nextarg = expand_builtin_saveregs ();
5902  std_expand_builtin_va_start (valist, nextarg);
5903}
5904
5905/* Implement `va_arg' for stdarg.  */
5906
5907static tree
5908sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5909		       gimple_seq *post_p)
5910{
5911  HOST_WIDE_INT size, rsize, align;
5912  tree addr, incr;
5913  bool indirect;
5914  tree ptrtype = build_pointer_type (type);
5915
5916  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5917    {
5918      indirect = true;
5919      size = rsize = UNITS_PER_WORD;
5920      align = 0;
5921    }
5922  else
5923    {
5924      indirect = false;
5925      size = int_size_in_bytes (type);
5926      rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5927      align = 0;
5928
5929      if (TARGET_ARCH64)
5930	{
5931	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
5932	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5933	    align = 2 * UNITS_PER_WORD;
5934
5935	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
5936	     are left-justified in their slots.  */
5937	  if (AGGREGATE_TYPE_P (type))
5938	    {
5939	      if (size == 0)
5940		size = rsize = UNITS_PER_WORD;
5941	      else
5942		size = rsize;
5943	    }
5944	}
5945    }
5946
5947  incr = valist;
5948  if (align)
5949    {
5950      incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5951			  size_int (align - 1));
5952      incr = fold_convert (sizetype, incr);
5953      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
5954			  size_int (-align));
5955      incr = fold_convert (ptr_type_node, incr);
5956    }
5957
5958  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
5959  addr = incr;
5960
5961  if (BYTES_BIG_ENDIAN && size < rsize)
5962    addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5963			size_int (rsize - size));
5964
5965  if (indirect)
5966    {
5967      addr = fold_convert (build_pointer_type (ptrtype), addr);
5968      addr = build_va_arg_indirect_ref (addr);
5969    }
5970
5971  /* If the address isn't aligned properly for the type, we need a temporary.
5972     FIXME: This is inefficient, usually we can do this in registers.  */
5973  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
5974    {
5975      tree tmp = create_tmp_var (type, "va_arg_tmp");
5976      tree dest_addr = build_fold_addr_expr (tmp);
5977      tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
5978				   3, dest_addr, addr, size_int (rsize));
5979      TREE_ADDRESSABLE (tmp) = 1;
5980      gimplify_and_add (copy, pre_p);
5981      addr = dest_addr;
5982    }
5983
5984  else
5985    addr = fold_convert (ptrtype, addr);
5986
5987  incr
5988    = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
5989  gimplify_assign (valist, incr, post_p);
5990
5991  return build_va_arg_indirect_ref (addr);
5992}
5993
5994/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
5995   Specify whether the vector mode is supported by the hardware.  */
5996
5997static bool
5998sparc_vector_mode_supported_p (enum machine_mode mode)
5999{
6000  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
6001}
6002
6003/* Return the string to output an unconditional branch to LABEL, which is
6004   the operand number of the label.
6005
6006   DEST is the destination insn (i.e. the label), INSN is the source.  */
6007
6008const char *
6009output_ubranch (rtx dest, int label, rtx insn)
6010{
6011  static char string[64];
6012  bool v9_form = false;
6013  char *p;
6014
6015  if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
6016    {
6017      int delta = (INSN_ADDRESSES (INSN_UID (dest))
6018		   - INSN_ADDRESSES (INSN_UID (insn)));
6019      /* Leave some instructions for "slop".  */
6020      if (delta >= -260000 && delta < 260000)
6021	v9_form = true;
6022    }
6023
6024  if (v9_form)
6025    strcpy (string, "ba%*,pt\t%%xcc, ");
6026  else
6027    strcpy (string, "b%*\t");
6028
6029  p = strchr (string, '\0');
6030  *p++ = '%';
6031  *p++ = 'l';
6032  *p++ = '0' + label;
6033  *p++ = '%';
6034  *p++ = '(';
6035  *p = '\0';
6036
6037  return string;
6038}
6039
6040/* Return the string to output a conditional branch to LABEL, which is
6041   the operand number of the label.  OP is the conditional expression.
6042   XEXP (OP, 0) is assumed to be a condition code register (integer or
6043   floating point) and its mode specifies what kind of comparison we made.
6044
6045   DEST is the destination insn (i.e. the label), INSN is the source.
6046
6047   REVERSED is nonzero if we should reverse the sense of the comparison.
6048
6049   ANNUL is nonzero if we should generate an annulling branch.  */
6050
6051const char *
6052output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
6053		rtx insn)
6054{
6055  static char string[64];
6056  enum rtx_code code = GET_CODE (op);
6057  rtx cc_reg = XEXP (op, 0);
6058  enum machine_mode mode = GET_MODE (cc_reg);
6059  const char *labelno, *branch;
6060  int spaces = 8, far;
6061  char *p;
6062
6063  /* v9 branches are limited to +-1MB.  If it is too far away,
6064     change
6065
6066     bne,pt %xcc, .LC30
6067
6068     to
6069
6070     be,pn %xcc, .+12
6071      nop
6072     ba .LC30
6073
6074     and
6075
6076     fbne,a,pn %fcc2, .LC29
6077
6078     to
6079
6080     fbe,pt %fcc2, .+16
6081      nop
6082     ba .LC29  */
6083
6084  far = TARGET_V9 && (get_attr_length (insn) >= 3);
6085  if (reversed ^ far)
6086    {
6087      /* Reversal of FP compares takes care -- an ordered compare
6088	 becomes an unordered compare and vice versa.  */
6089      if (mode == CCFPmode || mode == CCFPEmode)
6090	code = reverse_condition_maybe_unordered (code);
6091      else
6092	code = reverse_condition (code);
6093    }
6094
6095  /* Start by writing the branch condition.  */
6096  if (mode == CCFPmode || mode == CCFPEmode)
6097    {
6098      switch (code)
6099	{
6100	case NE:
6101	  branch = "fbne";
6102	  break;
6103	case EQ:
6104	  branch = "fbe";
6105	  break;
6106	case GE:
6107	  branch = "fbge";
6108	  break;
6109	case GT:
6110	  branch = "fbg";
6111	  break;
6112	case LE:
6113	  branch = "fble";
6114	  break;
6115	case LT:
6116	  branch = "fbl";
6117	  break;
6118	case UNORDERED:
6119	  branch = "fbu";
6120	  break;
6121	case ORDERED:
6122	  branch = "fbo";
6123	  break;
6124	case UNGT:
6125	  branch = "fbug";
6126	  break;
6127	case UNLT:
6128	  branch = "fbul";
6129	  break;
6130	case UNEQ:
6131	  branch = "fbue";
6132	  break;
6133	case UNGE:
6134	  branch = "fbuge";
6135	  break;
6136	case UNLE:
6137	  branch = "fbule";
6138	  break;
6139	case LTGT:
6140	  branch = "fblg";
6141	  break;
6142
6143	default:
6144	  gcc_unreachable ();
6145	}
6146
6147      /* ??? !v9: FP branches cannot be preceded by another floating point
6148	 insn.  Because there is currently no concept of pre-delay slots,
6149	 we can fix this only by always emitting a nop before a floating
6150	 point branch.  */
6151
6152      string[0] = '\0';
6153      if (! TARGET_V9)
6154	strcpy (string, "nop\n\t");
6155      strcat (string, branch);
6156    }
6157  else
6158    {
6159      switch (code)
6160	{
6161	case NE:
6162	  branch = "bne";
6163	  break;
6164	case EQ:
6165	  branch = "be";
6166	  break;
6167	case GE:
6168	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6169	    branch = "bpos";
6170	  else
6171	    branch = "bge";
6172	  break;
6173	case GT:
6174	  branch = "bg";
6175	  break;
6176	case LE:
6177	  branch = "ble";
6178	  break;
6179	case LT:
6180	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6181	    branch = "bneg";
6182	  else
6183	    branch = "bl";
6184	  break;
6185	case GEU:
6186	  branch = "bgeu";
6187	  break;
6188	case GTU:
6189	  branch = "bgu";
6190	  break;
6191	case LEU:
6192	  branch = "bleu";
6193	  break;
6194	case LTU:
6195	  branch = "blu";
6196	  break;
6197
6198	default:
6199	  gcc_unreachable ();
6200	}
6201      strcpy (string, branch);
6202    }
6203  spaces -= strlen (branch);
6204  p = strchr (string, '\0');
6205
6206  /* Now add the annulling, the label, and a possible noop.  */
6207  if (annul && ! far)
6208    {
6209      strcpy (p, ",a");
6210      p += 2;
6211      spaces -= 2;
6212    }
6213
6214  if (TARGET_V9)
6215    {
6216      rtx note;
6217      int v8 = 0;
6218
6219      if (! far && insn && INSN_ADDRESSES_SET_P ())
6220	{
6221	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
6222		       - INSN_ADDRESSES (INSN_UID (insn)));
6223	  /* Leave some instructions for "slop".  */
6224	  if (delta < -260000 || delta >= 260000)
6225	    v8 = 1;
6226	}
6227
6228      if (mode == CCFPmode || mode == CCFPEmode)
6229	{
6230	  static char v9_fcc_labelno[] = "%%fccX, ";
6231	  /* Set the char indicating the number of the fcc reg to use.  */
6232	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6233	  labelno = v9_fcc_labelno;
6234	  if (v8)
6235	    {
6236	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6237	      labelno = "";
6238	    }
6239	}
6240      else if (mode == CCXmode || mode == CCX_NOOVmode)
6241	{
6242	  labelno = "%%xcc, ";
6243	  gcc_assert (! v8);
6244	}
6245      else
6246	{
6247	  labelno = "%%icc, ";
6248	  if (v8)
6249	    labelno = "";
6250	}
6251
6252      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6253	{
6254	  strcpy (p,
6255		  ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6256		  ? ",pt" : ",pn");
6257	  p += 3;
6258	  spaces -= 3;
6259	}
6260    }
6261  else
6262    labelno = "";
6263
6264  if (spaces > 0)
6265    *p++ = '\t';
6266  else
6267    *p++ = ' ';
6268  strcpy (p, labelno);
6269  p = strchr (p, '\0');
6270  if (far)
6271    {
6272      strcpy (p, ".+12\n\t nop\n\tb\t");
6273      /* Skip the next insn if requested or
6274	 if we know that it will be a nop.  */
6275      if (annul || ! final_sequence)
6276        p[3] = '6';
6277      p += 14;
6278    }
6279  *p++ = '%';
6280  *p++ = 'l';
6281  *p++ = label + '0';
6282  *p++ = '%';
6283  *p++ = '#';
6284  *p = '\0';
6285
6286  return string;
6287}
6288
6289/* Emit a library call comparison between floating point X and Y.
6290   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6291   Return the new operator to be used in the comparison sequence.
6292
6293   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6294   values as arguments instead of the TFmode registers themselves,
6295   that's why we cannot call emit_float_lib_cmp.  */
6296
6297rtx
6298sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6299{
6300  const char *qpfunc;
6301  rtx slot0, slot1, result, tem, tem2, libfunc;
6302  enum machine_mode mode;
6303  enum rtx_code new_comparison;
6304
6305  switch (comparison)
6306    {
6307    case EQ:
6308      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6309      break;
6310
6311    case NE:
6312      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6313      break;
6314
6315    case GT:
6316      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6317      break;
6318
6319    case GE:
6320      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6321      break;
6322
6323    case LT:
6324      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6325      break;
6326
6327    case LE:
6328      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6329      break;
6330
6331    case ORDERED:
6332    case UNORDERED:
6333    case UNGT:
6334    case UNLT:
6335    case UNEQ:
6336    case UNGE:
6337    case UNLE:
6338    case LTGT:
6339      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6340      break;
6341
6342    default:
6343      gcc_unreachable ();
6344    }
6345
6346  if (TARGET_ARCH64)
6347    {
6348      if (MEM_P (x))
6349	slot0 = x;
6350      else
6351	{
6352	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6353	  emit_move_insn (slot0, x);
6354	}
6355
6356      if (MEM_P (y))
6357	slot1 = y;
6358      else
6359	{
6360	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6361	  emit_move_insn (slot1, y);
6362	}
6363
6364      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6365      emit_library_call (libfunc, LCT_NORMAL,
6366			 DImode, 2,
6367			 XEXP (slot0, 0), Pmode,
6368			 XEXP (slot1, 0), Pmode);
6369      mode = DImode;
6370    }
6371  else
6372    {
6373      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6374      emit_library_call (libfunc, LCT_NORMAL,
6375			 SImode, 2,
6376			 x, TFmode, y, TFmode);
6377      mode = SImode;
6378    }
6379
6380
6381  /* Immediately move the result of the libcall into a pseudo
6382     register so reload doesn't clobber the value if it needs
6383     the return register for a spill reg.  */
6384  result = gen_reg_rtx (mode);
6385  emit_move_insn (result, hard_libcall_value (mode, libfunc));
6386
6387  switch (comparison)
6388    {
6389    default:
6390      return gen_rtx_NE (VOIDmode, result, const0_rtx);
6391    case ORDERED:
6392    case UNORDERED:
6393      new_comparison = (comparison == UNORDERED ? EQ : NE);
6394      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6395    case UNGT:
6396    case UNGE:
6397      new_comparison = (comparison == UNGT ? GT : NE);
6398      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6399    case UNLE:
6400      return gen_rtx_NE (VOIDmode, result, const2_rtx);
6401    case UNLT:
6402      tem = gen_reg_rtx (mode);
6403      if (TARGET_ARCH32)
6404	emit_insn (gen_andsi3 (tem, result, const1_rtx));
6405      else
6406	emit_insn (gen_anddi3 (tem, result, const1_rtx));
6407      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6408    case UNEQ:
6409    case LTGT:
6410      tem = gen_reg_rtx (mode);
6411      if (TARGET_ARCH32)
6412	emit_insn (gen_addsi3 (tem, result, const1_rtx));
6413      else
6414	emit_insn (gen_adddi3 (tem, result, const1_rtx));
6415      tem2 = gen_reg_rtx (mode);
6416      if (TARGET_ARCH32)
6417	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6418      else
6419	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6420      new_comparison = (comparison == UNEQ ? EQ : NE);
6421      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6422    }
6423
6424  gcc_unreachable ();
6425}
6426
6427/* Generate an unsigned DImode to FP conversion.  This is the same code
6428   optabs would emit if we didn't have TFmode patterns.  */
6429
6430void
6431sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6432{
6433  rtx neglab, donelab, i0, i1, f0, in, out;
6434
6435  out = operands[0];
6436  in = force_reg (DImode, operands[1]);
6437  neglab = gen_label_rtx ();
6438  donelab = gen_label_rtx ();
6439  i0 = gen_reg_rtx (DImode);
6440  i1 = gen_reg_rtx (DImode);
6441  f0 = gen_reg_rtx (mode);
6442
6443  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6444
6445  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6446  emit_jump_insn (gen_jump (donelab));
6447  emit_barrier ();
6448
6449  emit_label (neglab);
6450
6451  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6452  emit_insn (gen_anddi3 (i1, in, const1_rtx));
6453  emit_insn (gen_iordi3 (i0, i0, i1));
6454  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6455  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6456
6457  emit_label (donelab);
6458}
6459
6460/* Generate an FP to unsigned DImode conversion.  This is the same code
6461   optabs would emit if we didn't have TFmode patterns.  */
6462
6463void
6464sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6465{
6466  rtx neglab, donelab, i0, i1, f0, in, out, limit;
6467
6468  out = operands[0];
6469  in = force_reg (mode, operands[1]);
6470  neglab = gen_label_rtx ();
6471  donelab = gen_label_rtx ();
6472  i0 = gen_reg_rtx (DImode);
6473  i1 = gen_reg_rtx (DImode);
6474  limit = gen_reg_rtx (mode);
6475  f0 = gen_reg_rtx (mode);
6476
6477  emit_move_insn (limit,
6478		  CONST_DOUBLE_FROM_REAL_VALUE (
6479		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6480  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6481
6482  emit_insn (gen_rtx_SET (VOIDmode,
6483			  out,
6484			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6485  emit_jump_insn (gen_jump (donelab));
6486  emit_barrier ();
6487
6488  emit_label (neglab);
6489
6490  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6491  emit_insn (gen_rtx_SET (VOIDmode,
6492			  i0,
6493			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6494  emit_insn (gen_movdi (i1, const1_rtx));
6495  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6496  emit_insn (gen_xordi3 (out, i0, i1));
6497
6498  emit_label (donelab);
6499}
6500
6501/* Return the string to output a conditional branch to LABEL, testing
6502   register REG.  LABEL is the operand number of the label; REG is the
6503   operand number of the reg.  OP is the conditional expression.  The mode
6504   of REG says what kind of comparison we made.
6505
6506   DEST is the destination insn (i.e. the label), INSN is the source.
6507
6508   REVERSED is nonzero if we should reverse the sense of the comparison.
6509
6510   ANNUL is nonzero if we should generate an annulling branch.  */
6511
6512const char *
6513output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6514		 int annul, rtx insn)
6515{
6516  static char string[64];
6517  enum rtx_code code = GET_CODE (op);
6518  enum machine_mode mode = GET_MODE (XEXP (op, 0));
6519  rtx note;
6520  int far;
6521  char *p;
6522
6523  /* branch on register are limited to +-128KB.  If it is too far away,
6524     change
6525
6526     brnz,pt %g1, .LC30
6527
6528     to
6529
6530     brz,pn %g1, .+12
6531      nop
6532     ba,pt %xcc, .LC30
6533
6534     and
6535
6536     brgez,a,pn %o1, .LC29
6537
6538     to
6539
6540     brlz,pt %o1, .+16
6541      nop
6542     ba,pt %xcc, .LC29  */
6543
6544  far = get_attr_length (insn) >= 3;
6545
6546  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
6547  if (reversed ^ far)
6548    code = reverse_condition (code);
6549
6550  /* Only 64 bit versions of these instructions exist.  */
6551  gcc_assert (mode == DImode);
6552
6553  /* Start by writing the branch condition.  */
6554
6555  switch (code)
6556    {
6557    case NE:
6558      strcpy (string, "brnz");
6559      break;
6560
6561    case EQ:
6562      strcpy (string, "brz");
6563      break;
6564
6565    case GE:
6566      strcpy (string, "brgez");
6567      break;
6568
6569    case LT:
6570      strcpy (string, "brlz");
6571      break;
6572
6573    case LE:
6574      strcpy (string, "brlez");
6575      break;
6576
6577    case GT:
6578      strcpy (string, "brgz");
6579      break;
6580
6581    default:
6582      gcc_unreachable ();
6583    }
6584
6585  p = strchr (string, '\0');
6586
6587  /* Now add the annulling, reg, label, and nop.  */
6588  if (annul && ! far)
6589    {
6590      strcpy (p, ",a");
6591      p += 2;
6592    }
6593
6594  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6595    {
6596      strcpy (p,
6597	      ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6598	      ? ",pt" : ",pn");
6599      p += 3;
6600    }
6601
6602  *p = p < string + 8 ? '\t' : ' ';
6603  p++;
6604  *p++ = '%';
6605  *p++ = '0' + reg;
6606  *p++ = ',';
6607  *p++ = ' ';
6608  if (far)
6609    {
6610      int veryfar = 1, delta;
6611
6612      if (INSN_ADDRESSES_SET_P ())
6613	{
6614	  delta = (INSN_ADDRESSES (INSN_UID (dest))
6615		   - INSN_ADDRESSES (INSN_UID (insn)));
6616	  /* Leave some instructions for "slop".  */
6617	  if (delta >= -260000 && delta < 260000)
6618	    veryfar = 0;
6619	}
6620
6621      strcpy (p, ".+12\n\t nop\n\t");
6622      /* Skip the next insn if requested or
6623	 if we know that it will be a nop.  */
6624      if (annul || ! final_sequence)
6625        p[3] = '6';
6626      p += 12;
6627      if (veryfar)
6628	{
6629	  strcpy (p, "b\t");
6630	  p += 2;
6631	}
6632      else
6633	{
6634	  strcpy (p, "ba,pt\t%%xcc, ");
6635	  p += 13;
6636	}
6637    }
6638  *p++ = '%';
6639  *p++ = 'l';
6640  *p++ = '0' + label;
6641  *p++ = '%';
6642  *p++ = '#';
6643  *p = '\0';
6644
6645  return string;
6646}
6647
6648/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6649   Such instructions cannot be used in the delay slot of return insn on v9.
6650   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6651 */
6652
6653static int
6654epilogue_renumber (register rtx *where, int test)
6655{
6656  register const char *fmt;
6657  register int i;
6658  register enum rtx_code code;
6659
6660  if (*where == 0)
6661    return 0;
6662
6663  code = GET_CODE (*where);
6664
6665  switch (code)
6666    {
6667    case REG:
6668      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
6669	return 1;
6670      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6671	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6672    case SCRATCH:
6673    case CC0:
6674    case PC:
6675    case CONST_INT:
6676    case CONST_DOUBLE:
6677      return 0;
6678
6679      /* Do not replace the frame pointer with the stack pointer because
6680	 it can cause the delayed instruction to load below the stack.
6681	 This occurs when instructions like:
6682
6683	 (set (reg/i:SI 24 %i0)
6684	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6685                       (const_int -20 [0xffffffec])) 0))
6686
6687	 are in the return delayed slot.  */
6688    case PLUS:
6689      if (GET_CODE (XEXP (*where, 0)) == REG
6690	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6691	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6692	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6693	return 1;
6694      break;
6695
6696    case MEM:
6697      if (SPARC_STACK_BIAS
6698	  && GET_CODE (XEXP (*where, 0)) == REG
6699	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6700	return 1;
6701      break;
6702
6703    default:
6704      break;
6705    }
6706
6707  fmt = GET_RTX_FORMAT (code);
6708
6709  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6710    {
6711      if (fmt[i] == 'E')
6712	{
6713	  register int j;
6714	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6715	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6716	      return 1;
6717	}
6718      else if (fmt[i] == 'e'
6719	       && epilogue_renumber (&(XEXP (*where, i)), test))
6720	return 1;
6721    }
6722  return 0;
6723}
6724
6725/* Leaf functions and non-leaf functions have different needs.  */
6726
6727static const int
6728reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6729
6730static const int
6731reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6732
6733static const int *const reg_alloc_orders[] = {
6734  reg_leaf_alloc_order,
6735  reg_nonleaf_alloc_order};
6736
6737void
6738order_regs_for_local_alloc (void)
6739{
6740  static int last_order_nonleaf = 1;
6741
6742  if (df_regs_ever_live_p (15) != last_order_nonleaf)
6743    {
6744      last_order_nonleaf = !last_order_nonleaf;
6745      memcpy ((char *) reg_alloc_order,
6746	      (const char *) reg_alloc_orders[last_order_nonleaf],
6747	      FIRST_PSEUDO_REGISTER * sizeof (int));
6748    }
6749}
6750
6751/* Return 1 if REG and MEM are legitimate enough to allow the various
6752   mem<-->reg splits to be run.  */
6753
6754int
6755sparc_splitdi_legitimate (rtx reg, rtx mem)
6756{
6757  /* Punt if we are here by mistake.  */
6758  gcc_assert (reload_completed);
6759
6760  /* We must have an offsettable memory reference.  */
6761  if (! offsettable_memref_p (mem))
6762    return 0;
6763
6764  /* If we have legitimate args for ldd/std, we do not want
6765     the split to happen.  */
6766  if ((REGNO (reg) % 2) == 0
6767      && mem_min_alignment (mem, 8))
6768    return 0;
6769
6770  /* Success.  */
6771  return 1;
6772}
6773
6774/* Return 1 if x and y are some kind of REG and they refer to
6775   different hard registers.  This test is guaranteed to be
6776   run after reload.  */
6777
6778int
6779sparc_absnegfloat_split_legitimate (rtx x, rtx y)
6780{
6781  if (GET_CODE (x) != REG)
6782    return 0;
6783  if (GET_CODE (y) != REG)
6784    return 0;
6785  if (REGNO (x) == REGNO (y))
6786    return 0;
6787  return 1;
6788}
6789
6790/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6791   This makes them candidates for using ldd and std insns.
6792
6793   Note reg1 and reg2 *must* be hard registers.  */
6794
6795int
6796registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
6797{
6798  /* We might have been passed a SUBREG.  */
6799  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6800    return 0;
6801
6802  if (REGNO (reg1) % 2 != 0)
6803    return 0;
6804
6805  /* Integer ldd is deprecated in SPARC V9 */
6806  if (TARGET_V9 && REGNO (reg1) < 32)
6807    return 0;
6808
6809  return (REGNO (reg1) == REGNO (reg2) - 1);
6810}
6811
6812/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6813   an ldd or std insn.
6814
6815   This can only happen when addr1 and addr2, the addresses in mem1
6816   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6817   addr1 must also be aligned on a 64-bit boundary.
6818
6819   Also iff dependent_reg_rtx is not null it should not be used to
6820   compute the address for mem1, i.e. we cannot optimize a sequence
6821   like:
6822   	ld [%o0], %o0
6823	ld [%o0 + 4], %o1
6824   to
6825   	ldd [%o0], %o0
6826   nor:
6827	ld [%g3 + 4], %g3
6828	ld [%g3], %g2
6829   to
6830        ldd [%g3], %g2
6831
6832   But, note that the transformation from:
6833	ld [%g2 + 4], %g3
6834        ld [%g2], %g2
6835   to
6836	ldd [%g2], %g2
6837   is perfectly fine.  Thus, the peephole2 patterns always pass us
6838   the destination register of the first load, never the second one.
6839
6840   For stores we don't have a similar problem, so dependent_reg_rtx is
6841   NULL_RTX.  */
6842
6843int
6844mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
6845{
6846  rtx addr1, addr2;
6847  unsigned int reg1;
6848  HOST_WIDE_INT offset1;
6849
6850  /* The mems cannot be volatile.  */
6851  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6852    return 0;
6853
6854  /* MEM1 should be aligned on a 64-bit boundary.  */
6855  if (MEM_ALIGN (mem1) < 64)
6856    return 0;
6857
6858  addr1 = XEXP (mem1, 0);
6859  addr2 = XEXP (mem2, 0);
6860
6861  /* Extract a register number and offset (if used) from the first addr.  */
6862  if (GET_CODE (addr1) == PLUS)
6863    {
6864      /* If not a REG, return zero.  */
6865      if (GET_CODE (XEXP (addr1, 0)) != REG)
6866	return 0;
6867      else
6868	{
6869          reg1 = REGNO (XEXP (addr1, 0));
6870	  /* The offset must be constant!  */
6871	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6872            return 0;
6873          offset1 = INTVAL (XEXP (addr1, 1));
6874	}
6875    }
6876  else if (GET_CODE (addr1) != REG)
6877    return 0;
6878  else
6879    {
6880      reg1 = REGNO (addr1);
6881      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
6882      offset1 = 0;
6883    }
6884
6885  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
6886  if (GET_CODE (addr2) != PLUS)
6887    return 0;
6888
6889  if (GET_CODE (XEXP (addr2, 0)) != REG
6890      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6891    return 0;
6892
6893  if (reg1 != REGNO (XEXP (addr2, 0)))
6894    return 0;
6895
6896  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6897    return 0;
6898
6899  /* The first offset must be evenly divisible by 8 to ensure the
6900     address is 64 bit aligned.  */
6901  if (offset1 % 8 != 0)
6902    return 0;
6903
6904  /* The offset for the second addr must be 4 more than the first addr.  */
6905  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6906    return 0;
6907
6908  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
6909     instructions.  */
6910  return 1;
6911}
6912
6913/* Return 1 if reg is a pseudo, or is the first register in
6914   a hard register pair.  This makes it suitable for use in
6915   ldd and std insns.  */
6916
6917int
6918register_ok_for_ldd (rtx reg)
6919{
6920  /* We might have been passed a SUBREG.  */
6921  if (!REG_P (reg))
6922    return 0;
6923
6924  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6925    return (REGNO (reg) % 2 == 0);
6926
6927  return 1;
6928}
6929
6930/* Return 1 if OP is a memory whose address is known to be
6931   aligned to 8-byte boundary, or a pseudo during reload.
6932   This makes it suitable for use in ldd and std insns.  */
6933
6934int
6935memory_ok_for_ldd (rtx op)
6936{
6937  if (MEM_P (op))
6938    {
6939      /* In 64-bit mode, we assume that the address is word-aligned.  */
6940      if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
6941	return 0;
6942
6943      if ((reload_in_progress || reload_completed)
6944	  && !strict_memory_address_p (Pmode, XEXP (op, 0)))
6945	return 0;
6946    }
6947  else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
6948    {
6949      if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
6950	return 0;
6951    }
6952  else
6953    return 0;
6954
6955  return 1;
6956}
6957
6958/* Print operand X (an rtx) in assembler syntax to file FILE.
6959   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6960   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
6961
6962void
6963print_operand (FILE *file, rtx x, int code)
6964{
6965  switch (code)
6966    {
6967    case '#':
6968      /* Output an insn in a delay slot.  */
6969      if (final_sequence)
6970        sparc_indent_opcode = 1;
6971      else
6972	fputs ("\n\t nop", file);
6973      return;
6974    case '*':
6975      /* Output an annul flag if there's nothing for the delay slot and we
6976	 are optimizing.  This is always used with '(' below.
6977         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6978	 this is a dbx bug.  So, we only do this when optimizing.
6979         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
6980	 Always emit a nop in case the next instruction is a branch.  */
6981      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
6982	fputs (",a", file);
6983      return;
6984    case '(':
6985      /* Output a 'nop' if there's nothing for the delay slot and we are
6986	 not optimizing.  This is always used with '*' above.  */
6987      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
6988	fputs ("\n\t nop", file);
6989      else if (final_sequence)
6990        sparc_indent_opcode = 1;
6991      return;
6992    case ')':
6993      /* Output the right displacement from the saved PC on function return.
6994	 The caller may have placed an "unimp" insn immediately after the call
6995	 so we have to account for it.  This insn is used in the 32-bit ABI
6996	 when calling a function that returns a non zero-sized structure.  The
6997	 64-bit ABI doesn't have it.  Be careful to have this test be the same
6998	 as that for the call.  The exception is when sparc_std_struct_return
6999	 is enabled, the psABI is followed exactly and the adjustment is made
7000	 by the code in sparc_struct_value_rtx.  The call emitted is the same
7001	 when sparc_std_struct_return is enabled. */
7002     if (!TARGET_ARCH64
7003	 && cfun->returns_struct
7004	 && !sparc_std_struct_return
7005	 && DECL_SIZE (DECL_RESULT (current_function_decl))
7006	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
7007	     == INTEGER_CST
7008	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
7009	fputs ("12", file);
7010      else
7011        fputc ('8', file);
7012      return;
7013    case '_':
7014      /* Output the Embedded Medium/Anywhere code model base register.  */
7015      fputs (EMBMEDANY_BASE_REG, file);
7016      return;
7017    case '&':
7018      /* Print some local dynamic TLS name.  */
7019      assemble_name (file, get_some_local_dynamic_name ());
7020      return;
7021
7022    case 'Y':
7023      /* Adjust the operand to take into account a RESTORE operation.  */
7024      if (GET_CODE (x) == CONST_INT)
7025	break;
7026      else if (GET_CODE (x) != REG)
7027	output_operand_lossage ("invalid %%Y operand");
7028      else if (REGNO (x) < 8)
7029	fputs (reg_names[REGNO (x)], file);
7030      else if (REGNO (x) >= 24 && REGNO (x) < 32)
7031	fputs (reg_names[REGNO (x)-16], file);
7032      else
7033	output_operand_lossage ("invalid %%Y operand");
7034      return;
7035    case 'L':
7036      /* Print out the low order register name of a register pair.  */
7037      if (WORDS_BIG_ENDIAN)
7038	fputs (reg_names[REGNO (x)+1], file);
7039      else
7040	fputs (reg_names[REGNO (x)], file);
7041      return;
7042    case 'H':
7043      /* Print out the high order register name of a register pair.  */
7044      if (WORDS_BIG_ENDIAN)
7045	fputs (reg_names[REGNO (x)], file);
7046      else
7047	fputs (reg_names[REGNO (x)+1], file);
7048      return;
7049    case 'R':
7050      /* Print out the second register name of a register pair or quad.
7051	 I.e., R (%o0) => %o1.  */
7052      fputs (reg_names[REGNO (x)+1], file);
7053      return;
7054    case 'S':
7055      /* Print out the third register name of a register quad.
7056	 I.e., S (%o0) => %o2.  */
7057      fputs (reg_names[REGNO (x)+2], file);
7058      return;
7059    case 'T':
7060      /* Print out the fourth register name of a register quad.
7061	 I.e., T (%o0) => %o3.  */
7062      fputs (reg_names[REGNO (x)+3], file);
7063      return;
7064    case 'x':
7065      /* Print a condition code register.  */
7066      if (REGNO (x) == SPARC_ICC_REG)
7067	{
7068	  /* We don't handle CC[X]_NOOVmode because they're not supposed
7069	     to occur here.  */
7070	  if (GET_MODE (x) == CCmode)
7071	    fputs ("%icc", file);
7072	  else if (GET_MODE (x) == CCXmode)
7073	    fputs ("%xcc", file);
7074	  else
7075	    gcc_unreachable ();
7076	}
7077      else
7078	/* %fccN register */
7079	fputs (reg_names[REGNO (x)], file);
7080      return;
7081    case 'm':
7082      /* Print the operand's address only.  */
7083      output_address (XEXP (x, 0));
7084      return;
7085    case 'r':
7086      /* In this case we need a register.  Use %g0 if the
7087	 operand is const0_rtx.  */
7088      if (x == const0_rtx
7089	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7090	{
7091	  fputs ("%g0", file);
7092	  return;
7093	}
7094      else
7095	break;
7096
7097    case 'A':
7098      switch (GET_CODE (x))
7099	{
7100	case IOR: fputs ("or", file); break;
7101	case AND: fputs ("and", file); break;
7102	case XOR: fputs ("xor", file); break;
7103	default: output_operand_lossage ("invalid %%A operand");
7104	}
7105      return;
7106
7107    case 'B':
7108      switch (GET_CODE (x))
7109	{
7110	case IOR: fputs ("orn", file); break;
7111	case AND: fputs ("andn", file); break;
7112	case XOR: fputs ("xnor", file); break;
7113	default: output_operand_lossage ("invalid %%B operand");
7114	}
7115      return;
7116
7117      /* These are used by the conditional move instructions.  */
7118    case 'c' :
7119    case 'C':
7120      {
7121	enum rtx_code rc = GET_CODE (x);
7122
7123	if (code == 'c')
7124	  {
7125	    enum machine_mode mode = GET_MODE (XEXP (x, 0));
7126	    if (mode == CCFPmode || mode == CCFPEmode)
7127	      rc = reverse_condition_maybe_unordered (GET_CODE (x));
7128	    else
7129	      rc = reverse_condition (GET_CODE (x));
7130	  }
7131	switch (rc)
7132	  {
7133	  case NE: fputs ("ne", file); break;
7134	  case EQ: fputs ("e", file); break;
7135	  case GE: fputs ("ge", file); break;
7136	  case GT: fputs ("g", file); break;
7137	  case LE: fputs ("le", file); break;
7138	  case LT: fputs ("l", file); break;
7139	  case GEU: fputs ("geu", file); break;
7140	  case GTU: fputs ("gu", file); break;
7141	  case LEU: fputs ("leu", file); break;
7142	  case LTU: fputs ("lu", file); break;
7143	  case LTGT: fputs ("lg", file); break;
7144	  case UNORDERED: fputs ("u", file); break;
7145	  case ORDERED: fputs ("o", file); break;
7146	  case UNLT: fputs ("ul", file); break;
7147	  case UNLE: fputs ("ule", file); break;
7148	  case UNGT: fputs ("ug", file); break;
7149	  case UNGE: fputs ("uge", file); break;
7150	  case UNEQ: fputs ("ue", file); break;
7151	  default: output_operand_lossage (code == 'c'
7152					   ? "invalid %%c operand"
7153					   : "invalid %%C operand");
7154	  }
7155	return;
7156      }
7157
7158      /* These are used by the movr instruction pattern.  */
7159    case 'd':
7160    case 'D':
7161      {
7162	enum rtx_code rc = (code == 'd'
7163			    ? reverse_condition (GET_CODE (x))
7164			    : GET_CODE (x));
7165	switch (rc)
7166	  {
7167	  case NE: fputs ("ne", file); break;
7168	  case EQ: fputs ("e", file); break;
7169	  case GE: fputs ("gez", file); break;
7170	  case LT: fputs ("lz", file); break;
7171	  case LE: fputs ("lez", file); break;
7172	  case GT: fputs ("gz", file); break;
7173	  default: output_operand_lossage (code == 'd'
7174					   ? "invalid %%d operand"
7175					   : "invalid %%D operand");
7176	  }
7177	return;
7178      }
7179
7180    case 'b':
7181      {
7182	/* Print a sign-extended character.  */
7183	int i = trunc_int_for_mode (INTVAL (x), QImode);
7184	fprintf (file, "%d", i);
7185	return;
7186      }
7187
7188    case 'f':
7189      /* Operand must be a MEM; write its address.  */
7190      if (GET_CODE (x) != MEM)
7191	output_operand_lossage ("invalid %%f operand");
7192      output_address (XEXP (x, 0));
7193      return;
7194
7195    case 's':
7196      {
7197	/* Print a sign-extended 32-bit value.  */
7198	HOST_WIDE_INT i;
7199	if (GET_CODE(x) == CONST_INT)
7200	  i = INTVAL (x);
7201	else if (GET_CODE(x) == CONST_DOUBLE)
7202	  i = CONST_DOUBLE_LOW (x);
7203	else
7204	  {
7205	    output_operand_lossage ("invalid %%s operand");
7206	    return;
7207	  }
7208	i = trunc_int_for_mode (i, SImode);
7209	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7210	return;
7211      }
7212
7213    case 0:
7214      /* Do nothing special.  */
7215      break;
7216
7217    default:
7218      /* Undocumented flag.  */
7219      output_operand_lossage ("invalid operand output code");
7220    }
7221
7222  if (GET_CODE (x) == REG)
7223    fputs (reg_names[REGNO (x)], file);
7224  else if (GET_CODE (x) == MEM)
7225    {
7226      fputc ('[', file);
7227	/* Poor Sun assembler doesn't understand absolute addressing.  */
7228      if (CONSTANT_P (XEXP (x, 0)))
7229	fputs ("%g0+", file);
7230      output_address (XEXP (x, 0));
7231      fputc (']', file);
7232    }
7233  else if (GET_CODE (x) == HIGH)
7234    {
7235      fputs ("%hi(", file);
7236      output_addr_const (file, XEXP (x, 0));
7237      fputc (')', file);
7238    }
7239  else if (GET_CODE (x) == LO_SUM)
7240    {
7241      print_operand (file, XEXP (x, 0), 0);
7242      if (TARGET_CM_MEDMID)
7243	fputs ("+%l44(", file);
7244      else
7245	fputs ("+%lo(", file);
7246      output_addr_const (file, XEXP (x, 1));
7247      fputc (')', file);
7248    }
7249  else if (GET_CODE (x) == CONST_DOUBLE
7250	   && (GET_MODE (x) == VOIDmode
7251	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7252    {
7253      if (CONST_DOUBLE_HIGH (x) == 0)
7254	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7255      else if (CONST_DOUBLE_HIGH (x) == -1
7256	       && CONST_DOUBLE_LOW (x) < 0)
7257	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7258      else
7259	output_operand_lossage ("long long constant not a valid immediate operand");
7260    }
7261  else if (GET_CODE (x) == CONST_DOUBLE)
7262    output_operand_lossage ("floating point constant not a valid immediate operand");
7263  else { output_addr_const (file, x); }
7264}
7265
7266/* Target hook for assembling integer objects.  The sparc version has
7267   special handling for aligned DI-mode objects.  */
7268
7269static bool
7270sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7271{
7272  /* ??? We only output .xword's for symbols and only then in environments
7273     where the assembler can handle them.  */
7274  if (aligned_p && size == 8
7275      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7276    {
7277      if (TARGET_V9)
7278	{
7279	  assemble_integer_with_op ("\t.xword\t", x);
7280	  return true;
7281	}
7282      else
7283	{
7284	  assemble_aligned_integer (4, const0_rtx);
7285	  assemble_aligned_integer (4, x);
7286	  return true;
7287	}
7288    }
7289  return default_assemble_integer (x, size, aligned_p);
7290}
7291
7292/* Return the value of a code used in the .proc pseudo-op that says
7293   what kind of result this function returns.  For non-C types, we pick
7294   the closest C type.  */
7295
7296#ifndef SHORT_TYPE_SIZE
7297#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7298#endif
7299
7300#ifndef INT_TYPE_SIZE
7301#define INT_TYPE_SIZE BITS_PER_WORD
7302#endif
7303
7304#ifndef LONG_TYPE_SIZE
7305#define LONG_TYPE_SIZE BITS_PER_WORD
7306#endif
7307
7308#ifndef LONG_LONG_TYPE_SIZE
7309#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7310#endif
7311
7312#ifndef FLOAT_TYPE_SIZE
7313#define FLOAT_TYPE_SIZE BITS_PER_WORD
7314#endif
7315
7316#ifndef DOUBLE_TYPE_SIZE
7317#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7318#endif
7319
7320#ifndef LONG_DOUBLE_TYPE_SIZE
7321#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7322#endif
7323
7324unsigned long
7325sparc_type_code (register tree type)
7326{
7327  register unsigned long qualifiers = 0;
7328  register unsigned shift;
7329
7330  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
7331     setting more, since some assemblers will give an error for this.  Also,
7332     we must be careful to avoid shifts of 32 bits or more to avoid getting
7333     unpredictable results.  */
7334
7335  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7336    {
7337      switch (TREE_CODE (type))
7338	{
7339	case ERROR_MARK:
7340	  return qualifiers;
7341
7342	case ARRAY_TYPE:
7343	  qualifiers |= (3 << shift);
7344	  break;
7345
7346	case FUNCTION_TYPE:
7347	case METHOD_TYPE:
7348	  qualifiers |= (2 << shift);
7349	  break;
7350
7351	case POINTER_TYPE:
7352	case REFERENCE_TYPE:
7353	case OFFSET_TYPE:
7354	  qualifiers |= (1 << shift);
7355	  break;
7356
7357	case RECORD_TYPE:
7358	  return (qualifiers | 8);
7359
7360	case UNION_TYPE:
7361	case QUAL_UNION_TYPE:
7362	  return (qualifiers | 9);
7363
7364	case ENUMERAL_TYPE:
7365	  return (qualifiers | 10);
7366
7367	case VOID_TYPE:
7368	  return (qualifiers | 16);
7369
7370	case INTEGER_TYPE:
7371	  /* If this is a range type, consider it to be the underlying
7372	     type.  */
7373	  if (TREE_TYPE (type) != 0)
7374	    break;
7375
7376	  /* Carefully distinguish all the standard types of C,
7377	     without messing up if the language is not C.  We do this by
7378	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
7379	     look at both the names and the above fields, but that's redundant.
7380	     Any type whose size is between two C types will be considered
7381	     to be the wider of the two types.  Also, we do not have a
7382	     special code to use for "long long", so anything wider than
7383	     long is treated the same.  Note that we can't distinguish
7384	     between "int" and "long" in this code if they are the same
7385	     size, but that's fine, since neither can the assembler.  */
7386
7387	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7388	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7389
7390	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7391	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7392
7393	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7394	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7395
7396	  else
7397	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7398
7399	case REAL_TYPE:
7400	  /* If this is a range type, consider it to be the underlying
7401	     type.  */
7402	  if (TREE_TYPE (type) != 0)
7403	    break;
7404
7405	  /* Carefully distinguish all the standard types of C,
7406	     without messing up if the language is not C.  */
7407
7408	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7409	    return (qualifiers | 6);
7410
7411	  else
7412	    return (qualifiers | 7);
7413
7414	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
7415	  /* ??? We need to distinguish between double and float complex types,
7416	     but I don't know how yet because I can't reach this code from
7417	     existing front-ends.  */
7418	  return (qualifiers | 7);	/* Who knows? */
7419
7420	case VECTOR_TYPE:
7421	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
7422	case LANG_TYPE:		/* ? */
7423	  return qualifiers;
7424
7425	default:
7426	  gcc_unreachable ();		/* Not a type! */
7427        }
7428    }
7429
7430  return qualifiers;
7431}
7432
7433/* Nested function support.  */
7434
7435/* Emit RTL insns to initialize the variable parts of a trampoline.
7436   FNADDR is an RTX for the address of the function's pure code.
7437   CXT is an RTX for the static chain value for the function.
7438
7439   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7440   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7441   (to store insns).  This is a bit excessive.  Perhaps a different
7442   mechanism would be better here.
7443
7444   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
7445
7446static void
7447sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7448{
7449  /* SPARC 32-bit trampoline:
7450
7451 	sethi	%hi(fn), %g1
7452 	sethi	%hi(static), %g2
7453 	jmp	%g1+%lo(fn)
7454 	or	%g2, %lo(static), %g2
7455
7456    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7457    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7458   */
7459
7460  emit_move_insn
7461    (adjust_address (m_tramp, SImode, 0),
7462     expand_binop (SImode, ior_optab,
7463		   expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7464				 size_int (10), 0, 1),
7465		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7466		   NULL_RTX, 1, OPTAB_DIRECT));
7467
7468  emit_move_insn
7469    (adjust_address (m_tramp, SImode, 4),
7470     expand_binop (SImode, ior_optab,
7471		   expand_shift (RSHIFT_EXPR, SImode, cxt,
7472				 size_int (10), 0, 1),
7473		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7474		   NULL_RTX, 1, OPTAB_DIRECT));
7475
7476  emit_move_insn
7477    (adjust_address (m_tramp, SImode, 8),
7478     expand_binop (SImode, ior_optab,
7479		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7480		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7481		   NULL_RTX, 1, OPTAB_DIRECT));
7482
7483  emit_move_insn
7484    (adjust_address (m_tramp, SImode, 12),
7485     expand_binop (SImode, ior_optab,
7486		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7487		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7488		   NULL_RTX, 1, OPTAB_DIRECT));
7489
7490  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
7491     aligned on a 16 byte boundary so one flush clears it all.  */
7492  emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
7493  if (sparc_cpu != PROCESSOR_ULTRASPARC
7494      && sparc_cpu != PROCESSOR_ULTRASPARC3
7495      && sparc_cpu != PROCESSOR_NIAGARA
7496      && sparc_cpu != PROCESSOR_NIAGARA2)
7497    emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
7498
7499  /* Call __enable_execute_stack after writing onto the stack to make sure
7500     the stack address is accessible.  */
7501#ifdef ENABLE_EXECUTE_STACK
7502  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7503                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7504#endif
7505
7506}
7507
7508/* The 64-bit version is simpler because it makes more sense to load the
7509   values as "immediate" data out of the trampoline.  It's also easier since
7510   we can read the PC without clobbering a register.  */
7511
7512static void
7513sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7514{
7515  /* SPARC 64-bit trampoline:
7516
7517	rd	%pc, %g1
7518	ldx	[%g1+24], %g5
7519	jmp	%g5
7520	ldx	[%g1+16], %g5
7521	+16 bytes data
7522   */
7523
7524  emit_move_insn (adjust_address (m_tramp, SImode, 0),
7525		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7526  emit_move_insn (adjust_address (m_tramp, SImode, 4),
7527		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7528  emit_move_insn (adjust_address (m_tramp, SImode, 8),
7529		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7530  emit_move_insn (adjust_address (m_tramp, SImode, 12),
7531		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7532  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
7533  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
7534  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
7535
7536  if (sparc_cpu != PROCESSOR_ULTRASPARC
7537      && sparc_cpu != PROCESSOR_ULTRASPARC3
7538      && sparc_cpu != PROCESSOR_NIAGARA
7539      && sparc_cpu != PROCESSOR_NIAGARA2)
7540    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
7541
7542  /* Call __enable_execute_stack after writing onto the stack to make sure
7543     the stack address is accessible.  */
7544#ifdef ENABLE_EXECUTE_STACK
7545  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7546                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7547#endif
7548}
7549
7550/* Worker for TARGET_TRAMPOLINE_INIT.  */
7551
7552static void
7553sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
7554{
7555  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
7556  cxt = force_reg (Pmode, cxt);
7557  if (TARGET_ARCH64)
7558    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
7559  else
7560    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
7561}
7562
7563/* Adjust the cost of a scheduling dependency.  Return the new cost of
7564   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7565
7566static int
7567supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7568{
7569  enum attr_type insn_type;
7570
7571  if (! recog_memoized (insn))
7572    return 0;
7573
7574  insn_type = get_attr_type (insn);
7575
7576  if (REG_NOTE_KIND (link) == 0)
7577    {
7578      /* Data dependency; DEP_INSN writes a register that INSN reads some
7579	 cycles later.  */
7580
7581      /* if a load, then the dependence must be on the memory address;
7582	 add an extra "cycle".  Note that the cost could be two cycles
7583	 if the reg was written late in an instruction group; we ca not tell
7584	 here.  */
7585      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7586	return cost + 3;
7587
7588      /* Get the delay only if the address of the store is the dependence.  */
7589      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7590	{
7591	  rtx pat = PATTERN(insn);
7592	  rtx dep_pat = PATTERN (dep_insn);
7593
7594	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7595	    return cost;  /* This should not happen!  */
7596
7597	  /* The dependency between the two instructions was on the data that
7598	     is being stored.  Assume that this implies that the address of the
7599	     store is not dependent.  */
7600	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7601	    return cost;
7602
7603	  return cost + 3;  /* An approximation.  */
7604	}
7605
7606      /* A shift instruction cannot receive its data from an instruction
7607	 in the same cycle; add a one cycle penalty.  */
7608      if (insn_type == TYPE_SHIFT)
7609	return cost + 3;   /* Split before cascade into shift.  */
7610    }
7611  else
7612    {
7613      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7614	 INSN writes some cycles later.  */
7615
7616      /* These are only significant for the fpu unit; writing a fp reg before
7617         the fpu has finished with it stalls the processor.  */
7618
7619      /* Reusing an integer register causes no problems.  */
7620      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7621	return 0;
7622    }
7623
7624  return cost;
7625}
7626
7627static int
7628hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7629{
7630  enum attr_type insn_type, dep_type;
7631  rtx pat = PATTERN(insn);
7632  rtx dep_pat = PATTERN (dep_insn);
7633
7634  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7635    return cost;
7636
7637  insn_type = get_attr_type (insn);
7638  dep_type = get_attr_type (dep_insn);
7639
7640  switch (REG_NOTE_KIND (link))
7641    {
7642    case 0:
7643      /* Data dependency; DEP_INSN writes a register that INSN reads some
7644	 cycles later.  */
7645
7646      switch (insn_type)
7647	{
7648	case TYPE_STORE:
7649	case TYPE_FPSTORE:
7650	  /* Get the delay iff the address of the store is the dependence.  */
7651	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7652	    return cost;
7653
7654	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7655	    return cost;
7656	  return cost + 3;
7657
7658	case TYPE_LOAD:
7659	case TYPE_SLOAD:
7660	case TYPE_FPLOAD:
7661	  /* If a load, then the dependence must be on the memory address.  If
7662	     the addresses aren't equal, then it might be a false dependency */
7663	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7664	    {
7665	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7666		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7667		  || GET_CODE (SET_SRC (pat)) != MEM
7668		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7669				    XEXP (SET_SRC (pat), 0)))
7670		return cost + 2;
7671
7672	      return cost + 8;
7673	    }
7674	  break;
7675
7676	case TYPE_BRANCH:
7677	  /* Compare to branch latency is 0.  There is no benefit from
7678	     separating compare and branch.  */
7679	  if (dep_type == TYPE_COMPARE)
7680	    return 0;
7681	  /* Floating point compare to branch latency is less than
7682	     compare to conditional move.  */
7683	  if (dep_type == TYPE_FPCMP)
7684	    return cost - 1;
7685	  break;
7686	default:
7687	  break;
7688	}
7689	break;
7690
7691    case REG_DEP_ANTI:
7692      /* Anti-dependencies only penalize the fpu unit.  */
7693      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7694        return 0;
7695      break;
7696
7697    default:
7698      break;
7699    }
7700
7701  return cost;
7702}
7703
7704static int
7705sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7706{
7707  switch (sparc_cpu)
7708    {
7709    case PROCESSOR_SUPERSPARC:
7710      cost = supersparc_adjust_cost (insn, link, dep, cost);
7711      break;
7712    case PROCESSOR_HYPERSPARC:
7713    case PROCESSOR_SPARCLITE86X:
7714      cost = hypersparc_adjust_cost (insn, link, dep, cost);
7715      break;
7716    default:
7717      break;
7718    }
7719  return cost;
7720}
7721
7722static void
7723sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7724		  int sched_verbose ATTRIBUTE_UNUSED,
7725		  int max_ready ATTRIBUTE_UNUSED)
7726{}
7727
7728static int
7729sparc_use_sched_lookahead (void)
7730{
7731  if (sparc_cpu == PROCESSOR_NIAGARA
7732      || sparc_cpu == PROCESSOR_NIAGARA2)
7733    return 0;
7734  if (sparc_cpu == PROCESSOR_ULTRASPARC
7735      || sparc_cpu == PROCESSOR_ULTRASPARC3)
7736    return 4;
7737  if ((1 << sparc_cpu) &
7738      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7739       (1 << PROCESSOR_SPARCLITE86X)))
7740    return 3;
7741  return 0;
7742}
7743
7744static int
7745sparc_issue_rate (void)
7746{
7747  switch (sparc_cpu)
7748    {
7749    case PROCESSOR_NIAGARA:
7750    case PROCESSOR_NIAGARA2:
7751    default:
7752      return 1;
7753    case PROCESSOR_V9:
7754      /* Assume V9 processors are capable of at least dual-issue.  */
7755      return 2;
7756    case PROCESSOR_SUPERSPARC:
7757      return 3;
7758    case PROCESSOR_HYPERSPARC:
7759    case PROCESSOR_SPARCLITE86X:
7760      return 2;
7761    case PROCESSOR_ULTRASPARC:
7762    case PROCESSOR_ULTRASPARC3:
7763      return 4;
7764    }
7765}
7766
7767static int
7768set_extends (rtx insn)
7769{
7770  register rtx pat = PATTERN (insn);
7771
7772  switch (GET_CODE (SET_SRC (pat)))
7773    {
7774      /* Load and some shift instructions zero extend.  */
7775    case MEM:
7776    case ZERO_EXTEND:
7777      /* sethi clears the high bits */
7778    case HIGH:
7779      /* LO_SUM is used with sethi.  sethi cleared the high
7780	 bits and the values used with lo_sum are positive */
7781    case LO_SUM:
7782      /* Store flag stores 0 or 1 */
7783    case LT: case LTU:
7784    case GT: case GTU:
7785    case LE: case LEU:
7786    case GE: case GEU:
7787    case EQ:
7788    case NE:
7789      return 1;
7790    case AND:
7791      {
7792	rtx op0 = XEXP (SET_SRC (pat), 0);
7793	rtx op1 = XEXP (SET_SRC (pat), 1);
7794	if (GET_CODE (op1) == CONST_INT)
7795	  return INTVAL (op1) >= 0;
7796	if (GET_CODE (op0) != REG)
7797	  return 0;
7798	if (sparc_check_64 (op0, insn) == 1)
7799	  return 1;
7800	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7801      }
7802    case IOR:
7803    case XOR:
7804      {
7805	rtx op0 = XEXP (SET_SRC (pat), 0);
7806	rtx op1 = XEXP (SET_SRC (pat), 1);
7807	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
7808	  return 0;
7809	if (GET_CODE (op1) == CONST_INT)
7810	  return INTVAL (op1) >= 0;
7811	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7812      }
7813    case LSHIFTRT:
7814      return GET_MODE (SET_SRC (pat)) == SImode;
7815      /* Positive integers leave the high bits zero.  */
7816    case CONST_DOUBLE:
7817      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
7818    case CONST_INT:
7819      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
7820    case ASHIFTRT:
7821    case SIGN_EXTEND:
7822      return - (GET_MODE (SET_SRC (pat)) == SImode);
7823    case REG:
7824      return sparc_check_64 (SET_SRC (pat), insn);
7825    default:
7826      return 0;
7827    }
7828}
7829
7830/* We _ought_ to have only one kind per function, but...  */
7831static GTY(()) rtx sparc_addr_diff_list;
7832static GTY(()) rtx sparc_addr_list;
7833
7834void
7835sparc_defer_case_vector (rtx lab, rtx vec, int diff)
7836{
7837  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7838  if (diff)
7839    sparc_addr_diff_list
7840      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7841  else
7842    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7843}
7844
7845static void
7846sparc_output_addr_vec (rtx vec)
7847{
7848  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7849  int idx, vlen = XVECLEN (body, 0);
7850
7851#ifdef ASM_OUTPUT_ADDR_VEC_START
7852  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7853#endif
7854
7855#ifdef ASM_OUTPUT_CASE_LABEL
7856  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7857			 NEXT_INSN (lab));
7858#else
7859  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7860#endif
7861
7862  for (idx = 0; idx < vlen; idx++)
7863    {
7864      ASM_OUTPUT_ADDR_VEC_ELT
7865	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7866    }
7867
7868#ifdef ASM_OUTPUT_ADDR_VEC_END
7869  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7870#endif
7871}
7872
7873static void
7874sparc_output_addr_diff_vec (rtx vec)
7875{
7876  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7877  rtx base = XEXP (XEXP (body, 0), 0);
7878  int idx, vlen = XVECLEN (body, 1);
7879
7880#ifdef ASM_OUTPUT_ADDR_VEC_START
7881  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7882#endif
7883
7884#ifdef ASM_OUTPUT_CASE_LABEL
7885  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7886			 NEXT_INSN (lab));
7887#else
7888  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7889#endif
7890
7891  for (idx = 0; idx < vlen; idx++)
7892    {
7893      ASM_OUTPUT_ADDR_DIFF_ELT
7894        (asm_out_file,
7895         body,
7896         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7897         CODE_LABEL_NUMBER (base));
7898    }
7899
7900#ifdef ASM_OUTPUT_ADDR_VEC_END
7901  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7902#endif
7903}
7904
7905static void
7906sparc_output_deferred_case_vectors (void)
7907{
7908  rtx t;
7909  int align;
7910
7911  if (sparc_addr_list == NULL_RTX
7912      && sparc_addr_diff_list == NULL_RTX)
7913    return;
7914
7915  /* Align to cache line in the function's code section.  */
7916  switch_to_section (current_function_section ());
7917
7918  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7919  if (align > 0)
7920    ASM_OUTPUT_ALIGN (asm_out_file, align);
7921
7922  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7923    sparc_output_addr_vec (XEXP (t, 0));
7924  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7925    sparc_output_addr_diff_vec (XEXP (t, 0));
7926
7927  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7928}
7929
7930/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7931   unknown.  Return 1 if the high bits are zero, -1 if the register is
7932   sign extended.  */
7933int
7934sparc_check_64 (rtx x, rtx insn)
7935{
7936  /* If a register is set only once it is safe to ignore insns this
7937     code does not know how to handle.  The loop will either recognize
7938     the single set and return the correct value or fail to recognize
7939     it and return 0.  */
7940  int set_once = 0;
7941  rtx y = x;
7942
7943  gcc_assert (GET_CODE (x) == REG);
7944
7945  if (GET_MODE (x) == DImode)
7946    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
7947
7948  if (flag_expensive_optimizations
7949      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
7950    set_once = 1;
7951
7952  if (insn == 0)
7953    {
7954      if (set_once)
7955	insn = get_last_insn_anywhere ();
7956      else
7957	return 0;
7958    }
7959
7960  while ((insn = PREV_INSN (insn)))
7961    {
7962      switch (GET_CODE (insn))
7963	{
7964	case JUMP_INSN:
7965	case NOTE:
7966	  break;
7967	case CODE_LABEL:
7968	case CALL_INSN:
7969	default:
7970	  if (! set_once)
7971	    return 0;
7972	  break;
7973	case INSN:
7974	  {
7975	    rtx pat = PATTERN (insn);
7976	    if (GET_CODE (pat) != SET)
7977	      return 0;
7978	    if (rtx_equal_p (x, SET_DEST (pat)))
7979	      return set_extends (insn);
7980	    if (y && rtx_equal_p (y, SET_DEST (pat)))
7981	      return set_extends (insn);
7982	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
7983	      return 0;
7984	  }
7985	}
7986    }
7987  return 0;
7988}
7989
7990/* Returns assembly code to perform a DImode shift using
7991   a 64-bit global or out register on SPARC-V8+.  */
7992const char *
7993output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
7994{
7995  static char asm_code[60];
7996
7997  /* The scratch register is only required when the destination
7998     register is not a 64-bit global or out register.  */
7999  if (which_alternative != 2)
8000    operands[3] = operands[0];
8001
8002  /* We can only shift by constants <= 63. */
8003  if (GET_CODE (operands[2]) == CONST_INT)
8004    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
8005
8006  if (GET_CODE (operands[1]) == CONST_INT)
8007    {
8008      output_asm_insn ("mov\t%1, %3", operands);
8009    }
8010  else
8011    {
8012      output_asm_insn ("sllx\t%H1, 32, %3", operands);
8013      if (sparc_check_64 (operands[1], insn) <= 0)
8014	output_asm_insn ("srl\t%L1, 0, %L1", operands);
8015      output_asm_insn ("or\t%L1, %3, %3", operands);
8016    }
8017
8018  strcpy(asm_code, opcode);
8019
8020  if (which_alternative != 2)
8021    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8022  else
8023    return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8024}
8025
8026/* Output rtl to increment the profiler label LABELNO
8027   for profiling a function entry.  */
8028
8029void
8030sparc_profile_hook (int labelno)
8031{
8032  char buf[32];
8033  rtx lab, fun;
8034
8035  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
8036  if (NO_PROFILE_COUNTERS)
8037    {
8038      emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
8039    }
8040  else
8041    {
8042      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8043      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
8044      emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
8045    }
8046}
8047
8048/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
8049
8050static void
8051sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
8052				     tree decl ATTRIBUTE_UNUSED)
8053{
8054  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8055
8056  if (!(flags & SECTION_DEBUG))
8057    fputs (",#alloc", asm_out_file);
8058  if (flags & SECTION_WRITE)
8059    fputs (",#write", asm_out_file);
8060  if (flags & SECTION_TLS)
8061    fputs (",#tls", asm_out_file);
8062  if (flags & SECTION_CODE)
8063    fputs (",#execinstr", asm_out_file);
8064
8065  /* ??? Handle SECTION_BSS.  */
8066
8067  fputc ('\n', asm_out_file);
8068}
8069
8070/* We do not allow indirect calls to be optimized into sibling calls.
8071
8072   We cannot use sibling calls when delayed branches are disabled
8073   because they will likely require the call delay slot to be filled.
8074
8075   Also, on SPARC 32-bit we cannot emit a sibling call when the
8076   current function returns a structure.  This is because the "unimp
8077   after call" convention would cause the callee to return to the
8078   wrong place.  The generic code already disallows cases where the
8079   function being called returns a structure.
8080
8081   It may seem strange how this last case could occur.  Usually there
8082   is code after the call which jumps to epilogue code which dumps the
8083   return value into the struct return area.  That ought to invalidate
8084   the sibling call right?  Well, in the C++ case we can end up passing
8085   the pointer to the struct return area to a constructor (which returns
8086   void) and then nothing else happens.  Such a sibling call would look
8087   valid without the added check here.
8088
8089   VxWorks PIC PLT entries require the global pointer to be initialized
8090   on entry.  We therefore can't emit sibling calls to them.  */
8091static bool
8092sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8093{
8094  return (decl
8095	  && flag_delayed_branch
8096	  && (TARGET_ARCH64 || ! cfun->returns_struct)
8097	  && !(TARGET_VXWORKS_RTP
8098	       && flag_pic
8099	       && !targetm.binds_local_p (decl)));
8100}
8101
8102/* libfunc renaming.  */
8103#include "config/gofast.h"
8104
8105static void
8106sparc_init_libfuncs (void)
8107{
8108  if (TARGET_ARCH32)
8109    {
8110      /* Use the subroutines that Sun's library provides for integer
8111	 multiply and divide.  The `*' prevents an underscore from
8112	 being prepended by the compiler. .umul is a little faster
8113	 than .mul.  */
8114      set_optab_libfunc (smul_optab, SImode, "*.umul");
8115      set_optab_libfunc (sdiv_optab, SImode, "*.div");
8116      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8117      set_optab_libfunc (smod_optab, SImode, "*.rem");
8118      set_optab_libfunc (umod_optab, SImode, "*.urem");
8119
8120      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
8121      set_optab_libfunc (add_optab, TFmode, "_Q_add");
8122      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8123      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8124      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8125      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8126
8127      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
8128	 is because with soft-float, the SFmode and DFmode sqrt
8129	 instructions will be absent, and the compiler will notice and
8130	 try to use the TFmode sqrt instruction for calls to the
8131	 builtin function sqrt, but this fails.  */
8132      if (TARGET_FPU)
8133	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8134
8135      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8136      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8137      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8138      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8139      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8140      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8141
8142      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
8143      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
8144      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
8145      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
8146
8147      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
8148      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
8149      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8150      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8151
8152      if (DITF_CONVERSION_LIBFUNCS)
8153	{
8154	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
8155	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
8156	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8157	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8158	}
8159
8160      if (SUN_CONVERSION_LIBFUNCS)
8161	{
8162	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8163	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8164	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8165	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8166	}
8167    }
8168  if (TARGET_ARCH64)
8169    {
8170      /* In the SPARC 64bit ABI, SImode multiply and divide functions
8171	 do not exist in the library.  Make sure the compiler does not
8172	 emit calls to them by accident.  (It should always use the
8173         hardware instructions.)  */
8174      set_optab_libfunc (smul_optab, SImode, 0);
8175      set_optab_libfunc (sdiv_optab, SImode, 0);
8176      set_optab_libfunc (udiv_optab, SImode, 0);
8177      set_optab_libfunc (smod_optab, SImode, 0);
8178      set_optab_libfunc (umod_optab, SImode, 0);
8179
8180      if (SUN_INTEGER_MULTIPLY_64)
8181	{
8182	  set_optab_libfunc (smul_optab, DImode, "__mul64");
8183	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
8184	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8185	  set_optab_libfunc (smod_optab, DImode, "__rem64");
8186	  set_optab_libfunc (umod_optab, DImode, "__urem64");
8187	}
8188
8189      if (SUN_CONVERSION_LIBFUNCS)
8190	{
8191	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8192	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8193	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8194	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8195	}
8196    }
8197
8198  gofast_maybe_init_libfuncs ();
8199}
8200
8201#define def_builtin(NAME, CODE, TYPE) \
8202  add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8203                       NULL_TREE)
8204
8205/* Implement the TARGET_INIT_BUILTINS target hook.
8206   Create builtin functions for special SPARC instructions.  */
8207
8208static void
8209sparc_init_builtins (void)
8210{
8211  if (TARGET_VIS)
8212    sparc_vis_init_builtins ();
8213}
8214
8215/* Create builtin functions for VIS 1.0 instructions.  */
8216
8217static void
8218sparc_vis_init_builtins (void)
8219{
8220  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8221  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8222  tree v4hi = build_vector_type (intHI_type_node, 4);
8223  tree v2hi = build_vector_type (intHI_type_node, 2);
8224  tree v2si = build_vector_type (intSI_type_node, 2);
8225
8226  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8227  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8228  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8229  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8230  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8231  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8232  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8233  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8234  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8235  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8236  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8237  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8238  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8239							 v8qi, v8qi,
8240							 intDI_type_node, 0);
8241  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8242						  intDI_type_node,
8243						  intDI_type_node, 0);
8244  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8245		        			    ptr_type_node,
8246					            intSI_type_node, 0);
8247  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8248		        			    ptr_type_node,
8249					            intDI_type_node, 0);
8250
8251  /* Packing and expanding vectors.  */
8252  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8253  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8254	       v8qi_ftype_v2si_v8qi);
8255  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8256	       v2hi_ftype_v2si);
8257  def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8258  def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8259	       v8qi_ftype_v4qi_v4qi);
8260
8261  /* Multiplications.  */
8262  def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8263	       v4hi_ftype_v4qi_v4hi);
8264  def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8265	       v4hi_ftype_v4qi_v2hi);
8266  def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8267	       v4hi_ftype_v4qi_v2hi);
8268  def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8269	       v4hi_ftype_v8qi_v4hi);
8270  def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8271	       v4hi_ftype_v8qi_v4hi);
8272  def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8273	       v2si_ftype_v4qi_v2hi);
8274  def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8275	       v2si_ftype_v4qi_v2hi);
8276
8277  /* Data aligning.  */
8278  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8279	       v4hi_ftype_v4hi_v4hi);
8280  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8281	       v8qi_ftype_v8qi_v8qi);
8282  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8283	       v2si_ftype_v2si_v2si);
8284  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8285               di_ftype_di_di);
8286  if (TARGET_ARCH64)
8287    def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8288	         ptr_ftype_ptr_di);
8289  else
8290    def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8291	         ptr_ftype_ptr_si);
8292
8293  /* Pixel distance.  */
8294  def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8295	       di_ftype_v8qi_v8qi_di);
8296}
8297
8298/* Handle TARGET_EXPAND_BUILTIN target hook.
8299   Expand builtin functions for sparc intrinsics.  */
8300
8301static rtx
8302sparc_expand_builtin (tree exp, rtx target,
8303		      rtx subtarget ATTRIBUTE_UNUSED,
8304		      enum machine_mode tmode ATTRIBUTE_UNUSED,
8305		      int ignore ATTRIBUTE_UNUSED)
8306{
8307  tree arg;
8308  call_expr_arg_iterator iter;
8309  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8310  unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8311  rtx pat, op[4];
8312  enum machine_mode mode[4];
8313  int arg_count = 0;
8314
8315  mode[0] = insn_data[icode].operand[0].mode;
8316  if (!target
8317      || GET_MODE (target) != mode[0]
8318      || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8319    op[0] = gen_reg_rtx (mode[0]);
8320  else
8321    op[0] = target;
8322
8323  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8324    {
8325      arg_count++;
8326      mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8327      op[arg_count] = expand_normal (arg);
8328
8329      if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8330							      mode[arg_count]))
8331	op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8332    }
8333
8334  switch (arg_count)
8335    {
8336    case 1:
8337      pat = GEN_FCN (icode) (op[0], op[1]);
8338      break;
8339    case 2:
8340      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8341      break;
8342    case 3:
8343      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8344      break;
8345    default:
8346      gcc_unreachable ();
8347    }
8348
8349  if (!pat)
8350    return NULL_RTX;
8351
8352  emit_insn (pat);
8353
8354  return op[0];
8355}
8356
8357static int
8358sparc_vis_mul8x16 (int e8, int e16)
8359{
8360  return (e8 * e16 + 128) / 256;
8361}
8362
8363/* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8364   by FNCODE.  All of the elements in ELTS0 and ELTS1 lists must be integer
8365   constants.  A tree list with the results of the multiplications is returned,
8366   and each element in the list is of INNER_TYPE.  */
8367
8368static tree
8369sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8370{
8371  tree n_elts = NULL_TREE;
8372  int scale;
8373
8374  switch (fncode)
8375    {
8376    case CODE_FOR_fmul8x16_vis:
8377      for (; elts0 && elts1;
8378	   elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8379	{
8380	  int val
8381	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8382				 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8383	  n_elts = tree_cons (NULL_TREE,
8384			      build_int_cst (inner_type, val),
8385			      n_elts);
8386	}
8387      break;
8388
8389    case CODE_FOR_fmul8x16au_vis:
8390      scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8391
8392      for (; elts0; elts0 = TREE_CHAIN (elts0))
8393	{
8394	  int val
8395	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8396				 scale);
8397	  n_elts = tree_cons (NULL_TREE,
8398			      build_int_cst (inner_type, val),
8399			      n_elts);
8400	}
8401      break;
8402
8403    case CODE_FOR_fmul8x16al_vis:
8404      scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8405
8406      for (; elts0; elts0 = TREE_CHAIN (elts0))
8407	{
8408	  int val
8409	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8410				 scale);
8411	  n_elts = tree_cons (NULL_TREE,
8412			      build_int_cst (inner_type, val),
8413			      n_elts);
8414	}
8415      break;
8416
8417    default:
8418      gcc_unreachable ();
8419    }
8420
8421  return nreverse (n_elts);
8422
8423}
8424/* Handle TARGET_FOLD_BUILTIN target hook.
8425   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
8426   result of the function call is ignored.  NULL_TREE is returned if the
8427   function could not be folded.  */
8428
8429static tree
8430sparc_fold_builtin (tree fndecl, tree arglist, bool ignore)
8431{
8432  tree arg0, arg1, arg2;
8433  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8434  enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8435
8436  if (ignore
8437      && icode != CODE_FOR_alignaddrsi_vis
8438      && icode != CODE_FOR_alignaddrdi_vis)
8439    return fold_convert (rtype, integer_zero_node);
8440
8441  switch (icode)
8442    {
8443    case CODE_FOR_fexpand_vis:
8444      arg0 = TREE_VALUE (arglist);
8445      STRIP_NOPS (arg0);
8446
8447      if (TREE_CODE (arg0) == VECTOR_CST)
8448	{
8449	  tree inner_type = TREE_TYPE (rtype);
8450	  tree elts = TREE_VECTOR_CST_ELTS (arg0);
8451	  tree n_elts = NULL_TREE;
8452
8453	  for (; elts; elts = TREE_CHAIN (elts))
8454	    {
8455	      unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8456	      n_elts = tree_cons (NULL_TREE,
8457				  build_int_cst (inner_type, val),
8458				  n_elts);
8459	    }
8460	  return build_vector (rtype, nreverse (n_elts));
8461	}
8462      break;
8463
8464    case CODE_FOR_fmul8x16_vis:
8465    case CODE_FOR_fmul8x16au_vis:
8466    case CODE_FOR_fmul8x16al_vis:
8467      arg0 = TREE_VALUE (arglist);
8468      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8469      STRIP_NOPS (arg0);
8470      STRIP_NOPS (arg1);
8471
8472      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8473	{
8474	  tree inner_type = TREE_TYPE (rtype);
8475	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8476	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8477	  tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8478						  elts1);
8479
8480	  return build_vector (rtype, n_elts);
8481	}
8482      break;
8483
8484    case CODE_FOR_fpmerge_vis:
8485      arg0 = TREE_VALUE (arglist);
8486      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8487      STRIP_NOPS (arg0);
8488      STRIP_NOPS (arg1);
8489
8490      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8491	{
8492	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8493	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8494	  tree n_elts = NULL_TREE;
8495
8496	  for (; elts0 && elts1;
8497	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8498	    {
8499	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8500	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8501	    }
8502
8503	  return build_vector (rtype, nreverse (n_elts));
8504	}
8505      break;
8506
8507    case CODE_FOR_pdist_vis:
8508      arg0 = TREE_VALUE (arglist);
8509      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8510      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8511      STRIP_NOPS (arg0);
8512      STRIP_NOPS (arg1);
8513      STRIP_NOPS (arg2);
8514
8515      if (TREE_CODE (arg0) == VECTOR_CST
8516	  && TREE_CODE (arg1) == VECTOR_CST
8517	  && TREE_CODE (arg2) == INTEGER_CST)
8518	{
8519	  int overflow = 0;
8520	  unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8521	  HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8522	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8523	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8524
8525	  for (; elts0 && elts1;
8526	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8527	    {
8528	      unsigned HOST_WIDE_INT
8529		low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8530		low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8531	      HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8532	      HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8533
8534	      unsigned HOST_WIDE_INT l;
8535	      HOST_WIDE_INT h;
8536
8537	      overflow |= neg_double (low1, high1, &l, &h);
8538	      overflow |= add_double (low0, high0, l, h, &l, &h);
8539	      if (h < 0)
8540		overflow |= neg_double (l, h, &l, &h);
8541
8542	      overflow |= add_double (low, high, l, h, &low, &high);
8543	    }
8544
8545	  gcc_assert (overflow == 0);
8546
8547	  return build_int_cst_wide (rtype, low, high);
8548	}
8549
8550    default:
8551      break;
8552    }
8553
8554  return NULL_TREE;
8555}
8556
8557/* ??? This duplicates information provided to the compiler by the
8558   ??? scheduler description.  Some day, teach genautomata to output
8559   ??? the latencies and then CSE will just use that.  */
8560
8561static bool
8562sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8563		 bool speed ATTRIBUTE_UNUSED)
8564{
8565  enum machine_mode mode = GET_MODE (x);
8566  bool float_mode_p = FLOAT_MODE_P (mode);
8567
8568  switch (code)
8569    {
8570    case CONST_INT:
8571      if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8572	{
8573	  *total = 0;
8574	  return true;
8575	}
8576      /* FALLTHRU */
8577
8578    case HIGH:
8579      *total = 2;
8580      return true;
8581
8582    case CONST:
8583    case LABEL_REF:
8584    case SYMBOL_REF:
8585      *total = 4;
8586      return true;
8587
8588    case CONST_DOUBLE:
8589      if (GET_MODE (x) == VOIDmode
8590	  && ((CONST_DOUBLE_HIGH (x) == 0
8591	       && CONST_DOUBLE_LOW (x) < 0x1000)
8592	      || (CONST_DOUBLE_HIGH (x) == -1
8593		  && CONST_DOUBLE_LOW (x) < 0
8594		  && CONST_DOUBLE_LOW (x) >= -0x1000)))
8595	*total = 0;
8596      else
8597	*total = 8;
8598      return true;
8599
8600    case MEM:
8601      /* If outer-code was a sign or zero extension, a cost
8602	 of COSTS_N_INSNS (1) was already added in.  This is
8603	 why we are subtracting it back out.  */
8604      if (outer_code == ZERO_EXTEND)
8605	{
8606	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
8607	}
8608      else if (outer_code == SIGN_EXTEND)
8609	{
8610	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
8611	}
8612      else if (float_mode_p)
8613	{
8614	  *total = sparc_costs->float_load;
8615	}
8616      else
8617	{
8618	  *total = sparc_costs->int_load;
8619	}
8620
8621      return true;
8622
8623    case PLUS:
8624    case MINUS:
8625      if (float_mode_p)
8626	*total = sparc_costs->float_plusminus;
8627      else
8628	*total = COSTS_N_INSNS (1);
8629      return false;
8630
8631    case MULT:
8632      if (float_mode_p)
8633	*total = sparc_costs->float_mul;
8634      else if (! TARGET_HARD_MUL)
8635	*total = COSTS_N_INSNS (25);
8636      else
8637	{
8638	  int bit_cost;
8639
8640	  bit_cost = 0;
8641	  if (sparc_costs->int_mul_bit_factor)
8642	    {
8643	      int nbits;
8644
8645	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8646		{
8647		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
8648		  for (nbits = 0; value != 0; value &= value - 1)
8649		    nbits++;
8650		}
8651	      else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
8652		       && GET_MODE (XEXP (x, 1)) == VOIDmode)
8653		{
8654		  rtx x1 = XEXP (x, 1);
8655		  unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
8656		  unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
8657
8658		  for (nbits = 0; value1 != 0; value1 &= value1 - 1)
8659		    nbits++;
8660		  for (; value2 != 0; value2 &= value2 - 1)
8661		    nbits++;
8662		}
8663	      else
8664		nbits = 7;
8665
8666	      if (nbits < 3)
8667		nbits = 3;
8668	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
8669	      bit_cost = COSTS_N_INSNS (bit_cost);
8670	    }
8671
8672	  if (mode == DImode)
8673	    *total = sparc_costs->int_mulX + bit_cost;
8674	  else
8675	    *total = sparc_costs->int_mul + bit_cost;
8676	}
8677      return false;
8678
8679    case ASHIFT:
8680    case ASHIFTRT:
8681    case LSHIFTRT:
8682      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
8683      return false;
8684
8685    case DIV:
8686    case UDIV:
8687    case MOD:
8688    case UMOD:
8689      if (float_mode_p)
8690	{
8691	  if (mode == DFmode)
8692	    *total = sparc_costs->float_div_df;
8693	  else
8694	    *total = sparc_costs->float_div_sf;
8695	}
8696      else
8697	{
8698	  if (mode == DImode)
8699	    *total = sparc_costs->int_divX;
8700	  else
8701	    *total = sparc_costs->int_div;
8702	}
8703      return false;
8704
8705    case NEG:
8706      if (! float_mode_p)
8707	{
8708	  *total = COSTS_N_INSNS (1);
8709	  return false;
8710	}
8711      /* FALLTHRU */
8712
8713    case ABS:
8714    case FLOAT:
8715    case UNSIGNED_FLOAT:
8716    case FIX:
8717    case UNSIGNED_FIX:
8718    case FLOAT_EXTEND:
8719    case FLOAT_TRUNCATE:
8720      *total = sparc_costs->float_move;
8721      return false;
8722
8723    case SQRT:
8724      if (mode == DFmode)
8725	*total = sparc_costs->float_sqrt_df;
8726      else
8727	*total = sparc_costs->float_sqrt_sf;
8728      return false;
8729
8730    case COMPARE:
8731      if (float_mode_p)
8732	*total = sparc_costs->float_cmp;
8733      else
8734	*total = COSTS_N_INSNS (1);
8735      return false;
8736
8737    case IF_THEN_ELSE:
8738      if (float_mode_p)
8739	*total = sparc_costs->float_cmove;
8740      else
8741	*total = sparc_costs->int_cmove;
8742      return false;
8743
8744    case IOR:
8745      /* Handle the NAND vector patterns.  */
8746      if (sparc_vector_mode_supported_p (GET_MODE (x))
8747	  && GET_CODE (XEXP (x, 0)) == NOT
8748	  && GET_CODE (XEXP (x, 1)) == NOT)
8749	{
8750	  *total = COSTS_N_INSNS (1);
8751	  return true;
8752	}
8753      else
8754        return false;
8755
8756    default:
8757      return false;
8758    }
8759}
8760
8761/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
8762   This is achieved by means of a manual dynamic stack space allocation in
8763   the current frame.  We make the assumption that SEQ doesn't contain any
8764   function calls, with the possible exception of calls to the GOT helper.  */
8765
8766static void
8767emit_and_preserve (rtx seq, rtx reg, rtx reg2)
8768{
8769  /* We must preserve the lowest 16 words for the register save area.  */
8770  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
8771  /* We really need only 2 words of fresh stack space.  */
8772  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
8773
8774  rtx slot
8775    = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
8776					     SPARC_STACK_BIAS + offset));
8777
8778  emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
8779  emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
8780  if (reg2)
8781    emit_insn (gen_rtx_SET (VOIDmode,
8782			    adjust_address (slot, word_mode, UNITS_PER_WORD),
8783			    reg2));
8784  emit_insn (seq);
8785  if (reg2)
8786    emit_insn (gen_rtx_SET (VOIDmode,
8787			    reg2,
8788			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
8789  emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
8790  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
8791}
8792
8793/* Output the assembler code for a thunk function.  THUNK_DECL is the
8794   declaration for the thunk function itself, FUNCTION is the decl for
8795   the target function.  DELTA is an immediate constant offset to be
8796   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
8797   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
8798
8799static void
8800sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8801		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8802		       tree function)
8803{
8804  rtx this_rtx, insn, funexp;
8805  unsigned int int_arg_first;
8806
8807  reload_completed = 1;
8808  epilogue_completed = 1;
8809
8810  emit_note (NOTE_INSN_PROLOGUE_END);
8811
8812  if (flag_delayed_branch)
8813    {
8814      /* We will emit a regular sibcall below, so we need to instruct
8815	 output_sibcall that we are in a leaf function.  */
8816      sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
8817
8818      /* This will cause final.c to invoke leaf_renumber_regs so we
8819	 must behave as if we were in a not-yet-leafified function.  */
8820      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
8821    }
8822  else
8823    {
8824      /* We will emit the sibcall manually below, so we will need to
8825	 manually spill non-leaf registers.  */
8826      sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
8827
8828      /* We really are in a leaf function.  */
8829      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
8830    }
8831
8832  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
8833     returns a structure, the structure return pointer is there instead.  */
8834  if (TARGET_ARCH64
8835      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8836    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
8837  else
8838    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
8839
8840  /* Add DELTA.  When possible use a plain add, otherwise load it into
8841     a register first.  */
8842  if (delta)
8843    {
8844      rtx delta_rtx = GEN_INT (delta);
8845
8846      if (! SPARC_SIMM13_P (delta))
8847	{
8848	  rtx scratch = gen_rtx_REG (Pmode, 1);
8849	  emit_move_insn (scratch, delta_rtx);
8850	  delta_rtx = scratch;
8851	}
8852
8853      /* THIS_RTX += DELTA.  */
8854      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
8855    }
8856
8857  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
8858  if (vcall_offset)
8859    {
8860      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8861      rtx scratch = gen_rtx_REG (Pmode, 1);
8862
8863      gcc_assert (vcall_offset < 0);
8864
8865      /* SCRATCH = *THIS_RTX.  */
8866      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
8867
8868      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
8869	 may not have any available scratch register at this point.  */
8870      if (SPARC_SIMM13_P (vcall_offset))
8871	;
8872      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
8873      else if (! fixed_regs[5]
8874	       /* The below sequence is made up of at least 2 insns,
8875		  while the default method may need only one.  */
8876	       && vcall_offset < -8192)
8877	{
8878	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
8879	  emit_move_insn (scratch2, vcall_offset_rtx);
8880	  vcall_offset_rtx = scratch2;
8881	}
8882      else
8883	{
8884	  rtx increment = GEN_INT (-4096);
8885
8886	  /* VCALL_OFFSET is a negative number whose typical range can be
8887	     estimated as -32768..0 in 32-bit mode.  In almost all cases
8888	     it is therefore cheaper to emit multiple add insns than
8889	     spilling and loading the constant into a register (at least
8890	     6 insns).  */
8891	  while (! SPARC_SIMM13_P (vcall_offset))
8892	    {
8893	      emit_insn (gen_add2_insn (scratch, increment));
8894	      vcall_offset += 4096;
8895	    }
8896	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
8897	}
8898
8899      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
8900      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
8901					    gen_rtx_PLUS (Pmode,
8902							  scratch,
8903							  vcall_offset_rtx)));
8904
8905      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
8906      emit_insn (gen_add2_insn (this_rtx, scratch));
8907    }
8908
8909  /* Generate a tail call to the target function.  */
8910  if (! TREE_USED (function))
8911    {
8912      assemble_external (function);
8913      TREE_USED (function) = 1;
8914    }
8915  funexp = XEXP (DECL_RTL (function), 0);
8916
8917  if (flag_delayed_branch)
8918    {
8919      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8920      insn = emit_call_insn (gen_sibcall (funexp));
8921      SIBLING_CALL_P (insn) = 1;
8922    }
8923  else
8924    {
8925      /* The hoops we have to jump through in order to generate a sibcall
8926	 without using delay slots...  */
8927      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
8928
8929      if (flag_pic)
8930        {
8931	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
8932	  start_sequence ();
8933	  /* Delay emitting the GOT helper function because it needs to
8934	     change the section and we are emitting assembly code.  */
8935	  load_got_register ();  /* clobbers %o7 */
8936	  scratch = legitimize_pic_address (funexp, scratch);
8937	  seq = get_insns ();
8938	  end_sequence ();
8939	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
8940	}
8941      else if (TARGET_ARCH32)
8942	{
8943	  emit_insn (gen_rtx_SET (VOIDmode,
8944				  scratch,
8945				  gen_rtx_HIGH (SImode, funexp)));
8946	  emit_insn (gen_rtx_SET (VOIDmode,
8947				  scratch,
8948				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
8949	}
8950      else  /* TARGET_ARCH64 */
8951        {
8952	  switch (sparc_cmodel)
8953	    {
8954	    case CM_MEDLOW:
8955	    case CM_MEDMID:
8956	      /* The destination can serve as a temporary.  */
8957	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
8958	      break;
8959
8960	    case CM_MEDANY:
8961	    case CM_EMBMEDANY:
8962	      /* The destination cannot serve as a temporary.  */
8963	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
8964	      start_sequence ();
8965	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
8966	      seq = get_insns ();
8967	      end_sequence ();
8968	      emit_and_preserve (seq, spill_reg, 0);
8969	      break;
8970
8971	    default:
8972	      gcc_unreachable ();
8973	    }
8974	}
8975
8976      emit_jump_insn (gen_indirect_jump (scratch));
8977    }
8978
8979  emit_barrier ();
8980
8981  /* Run just enough of rest_of_compilation to get the insns emitted.
8982     There's not really enough bulk here to make other passes such as
8983     instruction scheduling worth while.  Note that use_thunk calls
8984     assemble_start_function and assemble_end_function.  */
8985  insn = get_insns ();
8986  insn_locators_alloc ();
8987  shorten_branches (insn);
8988  final_start_function (insn, file, 1);
8989  final (insn, file, 1);
8990  final_end_function ();
8991
8992  reload_completed = 0;
8993  epilogue_completed = 0;
8994}
8995
8996/* Return true if sparc_output_mi_thunk would be able to output the
8997   assembler code for the thunk function specified by the arguments
8998   it is passed, and false otherwise.  */
8999static bool
9000sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
9001			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
9002			   HOST_WIDE_INT vcall_offset,
9003			   const_tree function ATTRIBUTE_UNUSED)
9004{
9005  /* Bound the loop used in the default method above.  */
9006  return (vcall_offset >= -32768 || ! fixed_regs[5]);
9007}
9008
9009/* How to allocate a 'struct machine_function'.  */
9010
9011static struct machine_function *
9012sparc_init_machine_status (void)
9013{
9014  return GGC_CNEW (struct machine_function);
9015}
9016
9017/* Locate some local-dynamic symbol still in use by this function
9018   so that we can print its name in local-dynamic base patterns.  */
9019
9020static const char *
9021get_some_local_dynamic_name (void)
9022{
9023  rtx insn;
9024
9025  if (cfun->machine->some_ld_name)
9026    return cfun->machine->some_ld_name;
9027
9028  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9029    if (INSN_P (insn)
9030	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9031      return cfun->machine->some_ld_name;
9032
9033  gcc_unreachable ();
9034}
9035
9036static int
9037get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9038{
9039  rtx x = *px;
9040
9041  if (x
9042      && GET_CODE (x) == SYMBOL_REF
9043      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9044    {
9045      cfun->machine->some_ld_name = XSTR (x, 0);
9046      return 1;
9047    }
9048
9049  return 0;
9050}
9051
9052/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
9053   This is called from dwarf2out.c to emit call frame instructions
9054   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
9055static void
9056sparc_dwarf_handle_frame_unspec (const char *label,
9057				 rtx pattern ATTRIBUTE_UNUSED,
9058				 int index ATTRIBUTE_UNUSED)
9059{
9060  gcc_assert (index == UNSPECV_SAVEW);
9061  dwarf2out_window_save (label);
9062}
9063
9064/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9065   We need to emit DTP-relative relocations.  */
9066
9067static void
9068sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
9069{
9070  switch (size)
9071    {
9072    case 4:
9073      fputs ("\t.word\t%r_tls_dtpoff32(", file);
9074      break;
9075    case 8:
9076      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9077      break;
9078    default:
9079      gcc_unreachable ();
9080    }
9081  output_addr_const (file, x);
9082  fputs (")", file);
9083}
9084
9085/* Do whatever processing is required at the end of a file.  */
9086
9087static void
9088sparc_file_end (void)
9089{
9090  /* If we need to emit the special GOT helper function, do so now.  */
9091  if (got_helper_rtx)
9092    {
9093      const char *name = XSTR (got_helper_rtx, 0);
9094      const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
9095#ifdef DWARF2_UNWIND_INFO
9096      bool do_cfi;
9097#endif
9098
9099      if (USE_HIDDEN_LINKONCE)
9100	{
9101	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9102				  get_identifier (name),
9103				  build_function_type (void_type_node,
9104						       void_list_node));
9105	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9106					   NULL_TREE, void_type_node);
9107	  TREE_STATIC (decl) = 1;
9108	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
9109	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
9110	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
9111	  resolve_unique_section (decl, 0, flag_function_sections);
9112	  allocate_struct_function (decl, true);
9113	  cfun->is_thunk = 1;
9114	  current_function_decl = decl;
9115	  init_varasm_status ();
9116	  assemble_start_function (decl, name);
9117	}
9118      else
9119	{
9120	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9121          switch_to_section (text_section);
9122	  if (align > 0)
9123	    ASM_OUTPUT_ALIGN (asm_out_file, align);
9124	  ASM_OUTPUT_LABEL (asm_out_file, name);
9125	}
9126
9127#ifdef DWARF2_UNWIND_INFO
9128      do_cfi = dwarf2out_do_cfi_asm ();
9129      if (do_cfi)
9130	fprintf (asm_out_file, "\t.cfi_startproc\n");
9131#endif
9132      if (flag_delayed_branch)
9133	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
9134		 reg_name, reg_name);
9135      else
9136	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
9137		 reg_name, reg_name);
9138#ifdef DWARF2_UNWIND_INFO
9139      if (do_cfi)
9140	fprintf (asm_out_file, "\t.cfi_endproc\n");
9141#endif
9142    }
9143
9144  if (NEED_INDICATE_EXEC_STACK)
9145    file_end_indicate_exec_stack ();
9146}
9147
9148#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9149/* Implement TARGET_MANGLE_TYPE.  */
9150
9151static const char *
9152sparc_mangle_type (const_tree type)
9153{
9154  if (!TARGET_64BIT
9155      && TYPE_MAIN_VARIANT (type) == long_double_type_node
9156      && TARGET_LONG_DOUBLE_128)
9157    return "g";
9158
9159  /* For all other types, use normal C++ mangling.  */
9160  return NULL;
9161}
9162#endif
9163
9164/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9165   compare and swap on the word containing the byte or half-word.  */
9166
9167void
9168sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9169{
9170  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9171  rtx addr = gen_reg_rtx (Pmode);
9172  rtx off = gen_reg_rtx (SImode);
9173  rtx oldv = gen_reg_rtx (SImode);
9174  rtx newv = gen_reg_rtx (SImode);
9175  rtx oldvalue = gen_reg_rtx (SImode);
9176  rtx newvalue = gen_reg_rtx (SImode);
9177  rtx res = gen_reg_rtx (SImode);
9178  rtx resv = gen_reg_rtx (SImode);
9179  rtx memsi, val, mask, end_label, loop_label, cc;
9180
9181  emit_insn (gen_rtx_SET (VOIDmode, addr,
9182			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9183
9184  if (Pmode != SImode)
9185    addr1 = gen_lowpart (SImode, addr1);
9186  emit_insn (gen_rtx_SET (VOIDmode, off,
9187			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9188
9189  memsi = gen_rtx_MEM (SImode, addr);
9190  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9191  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9192
9193  val = force_reg (SImode, memsi);
9194
9195  emit_insn (gen_rtx_SET (VOIDmode, off,
9196			  gen_rtx_XOR (SImode, off,
9197				       GEN_INT (GET_MODE (mem) == QImode
9198						? 3 : 2))));
9199
9200  emit_insn (gen_rtx_SET (VOIDmode, off,
9201			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9202
9203  if (GET_MODE (mem) == QImode)
9204    mask = force_reg (SImode, GEN_INT (0xff));
9205  else
9206    mask = force_reg (SImode, GEN_INT (0xffff));
9207
9208  emit_insn (gen_rtx_SET (VOIDmode, mask,
9209			  gen_rtx_ASHIFT (SImode, mask, off)));
9210
9211  emit_insn (gen_rtx_SET (VOIDmode, val,
9212			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9213				       val)));
9214
9215  oldval = gen_lowpart (SImode, oldval);
9216  emit_insn (gen_rtx_SET (VOIDmode, oldv,
9217			  gen_rtx_ASHIFT (SImode, oldval, off)));
9218
9219  newval = gen_lowpart_common (SImode, newval);
9220  emit_insn (gen_rtx_SET (VOIDmode, newv,
9221			  gen_rtx_ASHIFT (SImode, newval, off)));
9222
9223  emit_insn (gen_rtx_SET (VOIDmode, oldv,
9224			  gen_rtx_AND (SImode, oldv, mask)));
9225
9226  emit_insn (gen_rtx_SET (VOIDmode, newv,
9227			  gen_rtx_AND (SImode, newv, mask)));
9228
9229  end_label = gen_label_rtx ();
9230  loop_label = gen_label_rtx ();
9231  emit_label (loop_label);
9232
9233  emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9234			  gen_rtx_IOR (SImode, oldv, val)));
9235
9236  emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9237			  gen_rtx_IOR (SImode, newv, val)));
9238
9239  emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9240
9241  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9242
9243  emit_insn (gen_rtx_SET (VOIDmode, resv,
9244			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9245				       res)));
9246
9247  cc = gen_compare_reg_1 (NE, resv, val);
9248  emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9249
9250  /* Use cbranchcc4 to separate the compare and branch!  */
9251  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9252				  cc, const0_rtx, loop_label));
9253
9254  emit_label (end_label);
9255
9256  emit_insn (gen_rtx_SET (VOIDmode, res,
9257			  gen_rtx_AND (SImode, res, mask)));
9258
9259  emit_insn (gen_rtx_SET (VOIDmode, res,
9260			  gen_rtx_LSHIFTRT (SImode, res, off)));
9261
9262  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9263}
9264
9265/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
9266
9267bool
9268sparc_frame_pointer_required (void)
9269{
9270  return !(leaf_function_p () && only_leaf_regs_used ());
9271}
9272
9273/* The way this is structured, we can't eliminate SFP in favor of SP
9274   if the frame pointer is required: we want to use the SFP->HFP elimination
9275   in that case.  But the test in update_eliminables doesn't know we are
9276   assuming below that we only do the former elimination.  */
9277
9278bool
9279sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
9280{
9281  return (to == HARD_FRAME_POINTER_REGNUM
9282          || !targetm.frame_pointer_required ());
9283}
9284
9285#include "gt-sparc.h"
9286