sparc.c revision 1.1.1.1
1/* Subroutines for insn-output.c for SPARC.
2   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4   Free Software Foundation, Inc.
5   Contributed by Michael Tiemann (tiemann@cygnus.com)
6   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7   at Cygnus Support.
8
9This file is part of GCC.
10
11GCC is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 3, or (at your option)
14any later version.
15
16GCC is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GCC; see the file COPYING3.  If not see
23<http://www.gnu.org/licenses/>.  */
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "tree.h"
30#include "rtl.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "insn-codes.h"
36#include "conditions.h"
37#include "output.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "function.h"
41#include "expr.h"
42#include "optabs.h"
43#include "recog.h"
44#include "toplev.h"
45#include "ggc.h"
46#include "tm_p.h"
47#include "debug.h"
48#include "target.h"
49#include "target-def.h"
50#include "cfglayout.h"
51#include "gimple.h"
52#include "langhooks.h"
53#include "params.h"
54#include "df.h"
55#include "dwarf2out.h"
56
57/* Processor costs */
58static const
59struct processor_costs cypress_costs = {
60  COSTS_N_INSNS (2), /* int load */
61  COSTS_N_INSNS (2), /* int signed load */
62  COSTS_N_INSNS (2), /* int zeroed load */
63  COSTS_N_INSNS (2), /* float load */
64  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
65  COSTS_N_INSNS (5), /* fadd, fsub */
66  COSTS_N_INSNS (1), /* fcmp */
67  COSTS_N_INSNS (1), /* fmov, fmovr */
68  COSTS_N_INSNS (7), /* fmul */
69  COSTS_N_INSNS (37), /* fdivs */
70  COSTS_N_INSNS (37), /* fdivd */
71  COSTS_N_INSNS (63), /* fsqrts */
72  COSTS_N_INSNS (63), /* fsqrtd */
73  COSTS_N_INSNS (1), /* imul */
74  COSTS_N_INSNS (1), /* imulX */
75  0, /* imul bit factor */
76  COSTS_N_INSNS (1), /* idiv */
77  COSTS_N_INSNS (1), /* idivX */
78  COSTS_N_INSNS (1), /* movcc/movr */
79  0, /* shift penalty */
80};
81
82static const
83struct processor_costs supersparc_costs = {
84  COSTS_N_INSNS (1), /* int load */
85  COSTS_N_INSNS (1), /* int signed load */
86  COSTS_N_INSNS (1), /* int zeroed load */
87  COSTS_N_INSNS (0), /* float load */
88  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
89  COSTS_N_INSNS (3), /* fadd, fsub */
90  COSTS_N_INSNS (3), /* fcmp */
91  COSTS_N_INSNS (1), /* fmov, fmovr */
92  COSTS_N_INSNS (3), /* fmul */
93  COSTS_N_INSNS (6), /* fdivs */
94  COSTS_N_INSNS (9), /* fdivd */
95  COSTS_N_INSNS (12), /* fsqrts */
96  COSTS_N_INSNS (12), /* fsqrtd */
97  COSTS_N_INSNS (4), /* imul */
98  COSTS_N_INSNS (4), /* imulX */
99  0, /* imul bit factor */
100  COSTS_N_INSNS (4), /* idiv */
101  COSTS_N_INSNS (4), /* idivX */
102  COSTS_N_INSNS (1), /* movcc/movr */
103  1, /* shift penalty */
104};
105
106static const
107struct processor_costs hypersparc_costs = {
108  COSTS_N_INSNS (1), /* int load */
109  COSTS_N_INSNS (1), /* int signed load */
110  COSTS_N_INSNS (1), /* int zeroed load */
111  COSTS_N_INSNS (1), /* float load */
112  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
113  COSTS_N_INSNS (1), /* fadd, fsub */
114  COSTS_N_INSNS (1), /* fcmp */
115  COSTS_N_INSNS (1), /* fmov, fmovr */
116  COSTS_N_INSNS (1), /* fmul */
117  COSTS_N_INSNS (8), /* fdivs */
118  COSTS_N_INSNS (12), /* fdivd */
119  COSTS_N_INSNS (17), /* fsqrts */
120  COSTS_N_INSNS (17), /* fsqrtd */
121  COSTS_N_INSNS (17), /* imul */
122  COSTS_N_INSNS (17), /* imulX */
123  0, /* imul bit factor */
124  COSTS_N_INSNS (17), /* idiv */
125  COSTS_N_INSNS (17), /* idivX */
126  COSTS_N_INSNS (1), /* movcc/movr */
127  0, /* shift penalty */
128};
129
130static const
131struct processor_costs sparclet_costs = {
132  COSTS_N_INSNS (3), /* int load */
133  COSTS_N_INSNS (3), /* int signed load */
134  COSTS_N_INSNS (1), /* int zeroed load */
135  COSTS_N_INSNS (1), /* float load */
136  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
137  COSTS_N_INSNS (1), /* fadd, fsub */
138  COSTS_N_INSNS (1), /* fcmp */
139  COSTS_N_INSNS (1), /* fmov, fmovr */
140  COSTS_N_INSNS (1), /* fmul */
141  COSTS_N_INSNS (1), /* fdivs */
142  COSTS_N_INSNS (1), /* fdivd */
143  COSTS_N_INSNS (1), /* fsqrts */
144  COSTS_N_INSNS (1), /* fsqrtd */
145  COSTS_N_INSNS (5), /* imul */
146  COSTS_N_INSNS (5), /* imulX */
147  0, /* imul bit factor */
148  COSTS_N_INSNS (5), /* idiv */
149  COSTS_N_INSNS (5), /* idivX */
150  COSTS_N_INSNS (1), /* movcc/movr */
151  0, /* shift penalty */
152};
153
154static const
155struct processor_costs ultrasparc_costs = {
156  COSTS_N_INSNS (2), /* int load */
157  COSTS_N_INSNS (3), /* int signed load */
158  COSTS_N_INSNS (2), /* int zeroed load */
159  COSTS_N_INSNS (2), /* float load */
160  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
161  COSTS_N_INSNS (4), /* fadd, fsub */
162  COSTS_N_INSNS (1), /* fcmp */
163  COSTS_N_INSNS (2), /* fmov, fmovr */
164  COSTS_N_INSNS (4), /* fmul */
165  COSTS_N_INSNS (13), /* fdivs */
166  COSTS_N_INSNS (23), /* fdivd */
167  COSTS_N_INSNS (13), /* fsqrts */
168  COSTS_N_INSNS (23), /* fsqrtd */
169  COSTS_N_INSNS (4), /* imul */
170  COSTS_N_INSNS (4), /* imulX */
171  2, /* imul bit factor */
172  COSTS_N_INSNS (37), /* idiv */
173  COSTS_N_INSNS (68), /* idivX */
174  COSTS_N_INSNS (2), /* movcc/movr */
175  2, /* shift penalty */
176};
177
178static const
179struct processor_costs ultrasparc3_costs = {
180  COSTS_N_INSNS (2), /* int load */
181  COSTS_N_INSNS (3), /* int signed load */
182  COSTS_N_INSNS (3), /* int zeroed load */
183  COSTS_N_INSNS (2), /* float load */
184  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
185  COSTS_N_INSNS (4), /* fadd, fsub */
186  COSTS_N_INSNS (5), /* fcmp */
187  COSTS_N_INSNS (3), /* fmov, fmovr */
188  COSTS_N_INSNS (4), /* fmul */
189  COSTS_N_INSNS (17), /* fdivs */
190  COSTS_N_INSNS (20), /* fdivd */
191  COSTS_N_INSNS (20), /* fsqrts */
192  COSTS_N_INSNS (29), /* fsqrtd */
193  COSTS_N_INSNS (6), /* imul */
194  COSTS_N_INSNS (6), /* imulX */
195  0, /* imul bit factor */
196  COSTS_N_INSNS (40), /* idiv */
197  COSTS_N_INSNS (71), /* idivX */
198  COSTS_N_INSNS (2), /* movcc/movr */
199  0, /* shift penalty */
200};
201
202static const
203struct processor_costs niagara_costs = {
204  COSTS_N_INSNS (3), /* int load */
205  COSTS_N_INSNS (3), /* int signed load */
206  COSTS_N_INSNS (3), /* int zeroed load */
207  COSTS_N_INSNS (9), /* float load */
208  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
209  COSTS_N_INSNS (8), /* fadd, fsub */
210  COSTS_N_INSNS (26), /* fcmp */
211  COSTS_N_INSNS (8), /* fmov, fmovr */
212  COSTS_N_INSNS (29), /* fmul */
213  COSTS_N_INSNS (54), /* fdivs */
214  COSTS_N_INSNS (83), /* fdivd */
215  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
216  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
217  COSTS_N_INSNS (11), /* imul */
218  COSTS_N_INSNS (11), /* imulX */
219  0, /* imul bit factor */
220  COSTS_N_INSNS (72), /* idiv */
221  COSTS_N_INSNS (72), /* idivX */
222  COSTS_N_INSNS (1), /* movcc/movr */
223  0, /* shift penalty */
224};
225
226static const
227struct processor_costs niagara2_costs = {
228  COSTS_N_INSNS (3), /* int load */
229  COSTS_N_INSNS (3), /* int signed load */
230  COSTS_N_INSNS (3), /* int zeroed load */
231  COSTS_N_INSNS (3), /* float load */
232  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
233  COSTS_N_INSNS (6), /* fadd, fsub */
234  COSTS_N_INSNS (6), /* fcmp */
235  COSTS_N_INSNS (6), /* fmov, fmovr */
236  COSTS_N_INSNS (6), /* fmul */
237  COSTS_N_INSNS (19), /* fdivs */
238  COSTS_N_INSNS (33), /* fdivd */
239  COSTS_N_INSNS (19), /* fsqrts */
240  COSTS_N_INSNS (33), /* fsqrtd */
241  COSTS_N_INSNS (5), /* imul */
242  COSTS_N_INSNS (5), /* imulX */
243  0, /* imul bit factor */
244  COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
245  COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
246  COSTS_N_INSNS (1), /* movcc/movr */
247  0, /* shift penalty */
248};
249
250const struct processor_costs *sparc_costs = &cypress_costs;
251
252#ifdef HAVE_AS_RELAX_OPTION
253/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
254   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
255   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
256   somebody does not branch between the sethi and jmp.  */
257#define LEAF_SIBCALL_SLOT_RESERVED_P 1
258#else
259#define LEAF_SIBCALL_SLOT_RESERVED_P \
260  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
261#endif
262
263/* Global variables for machine-dependent things.  */
264
265/* Size of frame.  Need to know this to emit return insns from leaf procedures.
266   ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
267   reload pass.  This is important as the value is later used for scheduling
268   (to see what can go in a delay slot).
269   APPARENT_FSIZE is the size of the stack less the register save area and less
270   the outgoing argument area.  It is used when saving call preserved regs.  */
271static HOST_WIDE_INT apparent_fsize;
272static HOST_WIDE_INT actual_fsize;
273
274/* Number of live general or floating point registers needed to be
275   saved (as 4-byte quantities).  */
276static int num_gfregs;
277
278/* The alias set for prologue/epilogue register save/restore.  */
279static GTY(()) alias_set_type sparc_sr_alias_set;
280
281/* The alias set for the structure return value.  */
282static GTY(()) alias_set_type struct_value_alias_set;
283
284/* Vector to say how input registers are mapped to output registers.
285   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
286   eliminate it.  You must use -fomit-frame-pointer to get that.  */
287char leaf_reg_remap[] =
288{ 0, 1, 2, 3, 4, 5, 6, 7,
289  -1, -1, -1, -1, -1, -1, 14, -1,
290  -1, -1, -1, -1, -1, -1, -1, -1,
291  8, 9, 10, 11, 12, 13, -1, 15,
292
293  32, 33, 34, 35, 36, 37, 38, 39,
294  40, 41, 42, 43, 44, 45, 46, 47,
295  48, 49, 50, 51, 52, 53, 54, 55,
296  56, 57, 58, 59, 60, 61, 62, 63,
297  64, 65, 66, 67, 68, 69, 70, 71,
298  72, 73, 74, 75, 76, 77, 78, 79,
299  80, 81, 82, 83, 84, 85, 86, 87,
300  88, 89, 90, 91, 92, 93, 94, 95,
301  96, 97, 98, 99, 100};
302
303/* Vector, indexed by hard register number, which contains 1
304   for a register that is allowable in a candidate for leaf
305   function treatment.  */
306char sparc_leaf_regs[] =
307{ 1, 1, 1, 1, 1, 1, 1, 1,
308  0, 0, 0, 0, 0, 0, 1, 0,
309  0, 0, 0, 0, 0, 0, 0, 0,
310  1, 1, 1, 1, 1, 1, 0, 1,
311  1, 1, 1, 1, 1, 1, 1, 1,
312  1, 1, 1, 1, 1, 1, 1, 1,
313  1, 1, 1, 1, 1, 1, 1, 1,
314  1, 1, 1, 1, 1, 1, 1, 1,
315  1, 1, 1, 1, 1, 1, 1, 1,
316  1, 1, 1, 1, 1, 1, 1, 1,
317  1, 1, 1, 1, 1, 1, 1, 1,
318  1, 1, 1, 1, 1, 1, 1, 1,
319  1, 1, 1, 1, 1};
320
321struct GTY(()) machine_function
322{
323  /* Some local-dynamic TLS symbol name.  */
324  const char *some_ld_name;
325
326  /* True if the current function is leaf and uses only leaf regs,
327     so that the SPARC leaf function optimization can be applied.
328     Private version of current_function_uses_only_leaf_regs, see
329     sparc_expand_prologue for the rationale.  */
330  int leaf_function_p;
331
332  /* True if the data calculated by sparc_expand_prologue are valid.  */
333  bool prologue_data_valid_p;
334};
335
336#define sparc_leaf_function_p  cfun->machine->leaf_function_p
337#define sparc_prologue_data_valid_p  cfun->machine->prologue_data_valid_p
338
339/* Register we pretend to think the frame pointer is allocated to.
340   Normally, this is %fp, but if we are in a leaf procedure, this
341   is %sp+"something".  We record "something" separately as it may
342   be too big for reg+constant addressing.  */
343static rtx frame_base_reg;
344static HOST_WIDE_INT frame_base_offset;
345
346/* 1 if the next opcode is to be specially indented.  */
347int sparc_indent_opcode = 0;
348
349static bool sparc_handle_option (size_t, const char *, int);
350static void sparc_init_modes (void);
351static void scan_record_type (tree, int *, int *, int *);
352static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
353				tree, int, int, int *, int *);
354
355static int supersparc_adjust_cost (rtx, rtx, rtx, int);
356static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
357
358static void sparc_output_addr_vec (rtx);
359static void sparc_output_addr_diff_vec (rtx);
360static void sparc_output_deferred_case_vectors (void);
361static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
362static rtx sparc_builtin_saveregs (void);
363static int epilogue_renumber (rtx *, int);
364static bool sparc_assemble_integer (rtx, unsigned int, int);
365static int set_extends (rtx);
366static void load_got_register (void);
367static int save_or_restore_regs (int, int, rtx, int, int);
368static void emit_save_or_restore_regs (int);
369static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
370static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
371static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
372						 tree) ATTRIBUTE_UNUSED;
373static int sparc_adjust_cost (rtx, rtx, rtx, int);
374static int sparc_issue_rate (void);
375static void sparc_sched_init (FILE *, int, int);
376static int sparc_use_sched_lookahead (void);
377
378static void emit_soft_tfmode_libcall (const char *, int, rtx *);
379static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
380static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
381static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
382static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
383
384static bool sparc_function_ok_for_sibcall (tree, tree);
385static void sparc_init_libfuncs (void);
386static void sparc_init_builtins (void);
387static void sparc_vis_init_builtins (void);
388static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
389static tree sparc_fold_builtin (tree, tree, bool);
390static int sparc_vis_mul8x16 (int, int);
391static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
392static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
393				   HOST_WIDE_INT, tree);
394static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
395				       HOST_WIDE_INT, const_tree);
396static struct machine_function * sparc_init_machine_status (void);
397static bool sparc_cannot_force_const_mem (rtx);
398static rtx sparc_tls_get_addr (void);
399static rtx sparc_tls_got (void);
400static const char *get_some_local_dynamic_name (void);
401static int get_some_local_dynamic_name_1 (rtx *, void *);
402static bool sparc_rtx_costs (rtx, int, int, int *, bool);
403static bool sparc_promote_prototypes (const_tree);
404static rtx sparc_struct_value_rtx (tree, int);
405static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
406						      int *, const_tree, int);
407static bool sparc_return_in_memory (const_tree, const_tree);
408static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
409static void sparc_va_start (tree, rtx);
410static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
411static bool sparc_vector_mode_supported_p (enum machine_mode);
412static bool sparc_tls_referenced_p (rtx);
413static rtx legitimize_tls_address (rtx);
414static rtx legitimize_pic_address (rtx, rtx);
415static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
416static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
417				     enum machine_mode, const_tree, bool);
418static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
419				    enum machine_mode, tree, bool);
420static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
421static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
422static void sparc_file_end (void);
423static bool sparc_frame_pointer_required (void);
424static bool sparc_can_eliminate (const int, const int);
425#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
426static const char *sparc_mangle_type (const_tree);
427#endif
428static void sparc_trampoline_init (rtx, tree, rtx);
429
430#ifdef SUBTARGET_ATTRIBUTE_TABLE
431/* Table of valid machine attributes.  */
432static const struct attribute_spec sparc_attribute_table[] =
433{
434  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
435  SUBTARGET_ATTRIBUTE_TABLE,
436  { NULL,        0, 0, false, false, false, NULL }
437};
438#endif
439
440/* Option handling.  */
441
442/* Parsed value.  */
443enum cmodel sparc_cmodel;
444
445char sparc_hard_reg_printed[8];
446
447struct sparc_cpu_select sparc_select[] =
448{
449  /* switch	name,		tune	arch */
450  { (char *)0,	"default",	1,	1 },
451  { (char *)0,	"-mcpu=",	1,	1 },
452  { (char *)0,	"-mtune=",	1,	0 },
453  { 0, 0, 0, 0 }
454};
455
456/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
457enum processor_type sparc_cpu;
458
459/* Whetheran FPU option was specified.  */
460static bool fpu_option_set = false;
461
462/* Initialize the GCC target structure.  */
463
464/* The default is to use .half rather than .short for aligned HI objects.  */
465#undef TARGET_ASM_ALIGNED_HI_OP
466#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
467
468#undef TARGET_ASM_UNALIGNED_HI_OP
469#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
470#undef TARGET_ASM_UNALIGNED_SI_OP
471#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
472#undef TARGET_ASM_UNALIGNED_DI_OP
473#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
474
475/* The target hook has to handle DI-mode values.  */
476#undef TARGET_ASM_INTEGER
477#define TARGET_ASM_INTEGER sparc_assemble_integer
478
479#undef TARGET_ASM_FUNCTION_PROLOGUE
480#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
481#undef TARGET_ASM_FUNCTION_EPILOGUE
482#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
483
484#undef TARGET_SCHED_ADJUST_COST
485#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
486#undef TARGET_SCHED_ISSUE_RATE
487#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
488#undef TARGET_SCHED_INIT
489#define TARGET_SCHED_INIT sparc_sched_init
490#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
491#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
492
493#undef TARGET_FUNCTION_OK_FOR_SIBCALL
494#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
495
496#undef TARGET_INIT_LIBFUNCS
497#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
498#undef TARGET_INIT_BUILTINS
499#define TARGET_INIT_BUILTINS sparc_init_builtins
500
501#undef TARGET_LEGITIMIZE_ADDRESS
502#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
503
504#undef TARGET_EXPAND_BUILTIN
505#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
506#undef TARGET_FOLD_BUILTIN
507#define TARGET_FOLD_BUILTIN sparc_fold_builtin
508
509#if TARGET_TLS
510#undef TARGET_HAVE_TLS
511#define TARGET_HAVE_TLS true
512#endif
513
514#undef TARGET_CANNOT_FORCE_CONST_MEM
515#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
516
517#undef TARGET_ASM_OUTPUT_MI_THUNK
518#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
519#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
520#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
521
522#undef TARGET_RTX_COSTS
523#define TARGET_RTX_COSTS sparc_rtx_costs
524#undef TARGET_ADDRESS_COST
525#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
526
527#undef TARGET_PROMOTE_FUNCTION_MODE
528#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
529
530#undef TARGET_PROMOTE_PROTOTYPES
531#define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
532
533#undef TARGET_STRUCT_VALUE_RTX
534#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
535#undef TARGET_RETURN_IN_MEMORY
536#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
537#undef TARGET_MUST_PASS_IN_STACK
538#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
539#undef TARGET_PASS_BY_REFERENCE
540#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
541#undef TARGET_ARG_PARTIAL_BYTES
542#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
543
544#undef TARGET_EXPAND_BUILTIN_SAVEREGS
545#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
546#undef TARGET_STRICT_ARGUMENT_NAMING
547#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
548
549#undef TARGET_EXPAND_BUILTIN_VA_START
550#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
551#undef TARGET_GIMPLIFY_VA_ARG_EXPR
552#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
553
554#undef TARGET_VECTOR_MODE_SUPPORTED_P
555#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
556
557#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
558#define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
559
560#ifdef SUBTARGET_INSERT_ATTRIBUTES
561#undef TARGET_INSERT_ATTRIBUTES
562#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
563#endif
564
565#ifdef SUBTARGET_ATTRIBUTE_TABLE
566#undef TARGET_ATTRIBUTE_TABLE
567#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
568#endif
569
570#undef TARGET_RELAXED_ORDERING
571#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
572
573#undef TARGET_DEFAULT_TARGET_FLAGS
574#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
575#undef TARGET_HANDLE_OPTION
576#define TARGET_HANDLE_OPTION sparc_handle_option
577
578#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
579#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
580#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
581#endif
582
583#undef TARGET_ASM_FILE_END
584#define TARGET_ASM_FILE_END sparc_file_end
585
586#undef TARGET_FRAME_POINTER_REQUIRED
587#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
588
589#undef TARGET_CAN_ELIMINATE
590#define TARGET_CAN_ELIMINATE sparc_can_eliminate
591
592#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
593#undef TARGET_MANGLE_TYPE
594#define TARGET_MANGLE_TYPE sparc_mangle_type
595#endif
596
597#undef TARGET_LEGITIMATE_ADDRESS_P
598#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
599
600#undef TARGET_TRAMPOLINE_INIT
601#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
602
603struct gcc_target targetm = TARGET_INITIALIZER;
604
605/* Implement TARGET_HANDLE_OPTION.  */
606
607static bool
608sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
609{
610  switch (code)
611    {
612    case OPT_mfpu:
613    case OPT_mhard_float:
614    case OPT_msoft_float:
615      fpu_option_set = true;
616      break;
617
618    case OPT_mcpu_:
619      sparc_select[1].string = arg;
620      break;
621
622    case OPT_mtune_:
623      sparc_select[2].string = arg;
624      break;
625    }
626
627  return true;
628}
629
630/* Specify default optimizations.  */
631
632void
633sparc_optimization_options (int l ATTRIBUTE_UNUSED, int s ATTRIBUTE_UNUSED)
634{
635  /* Disable save slot sharing for call-clobbered registers by default.
636     The IRA sharing algorithm works on single registers only and this
637     pessimizes for double floating-point registers.  */
638  flag_ira_share_save_slots = 0;
639}
640
641/* Validate and override various options, and do some machine dependent
642   initialization.  */
643
644void
645sparc_override_options (void)
646{
647  static struct code_model {
648    const char *const name;
649    const enum cmodel value;
650  } const cmodels[] = {
651    { "32", CM_32 },
652    { "medlow", CM_MEDLOW },
653    { "medmid", CM_MEDMID },
654    { "medany", CM_MEDANY },
655    { "embmedany", CM_EMBMEDANY },
656    { NULL, (enum cmodel) 0 }
657  };
658  const struct code_model *cmodel;
659  /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
660  static struct cpu_default {
661    const int cpu;
662    const char *const name;
663  } const cpu_default[] = {
664    /* There must be one entry here for each TARGET_CPU value.  */
665    { TARGET_CPU_sparc, "cypress" },
666    { TARGET_CPU_sparclet, "tsc701" },
667    { TARGET_CPU_sparclite, "f930" },
668    { TARGET_CPU_v8, "v8" },
669    { TARGET_CPU_hypersparc, "hypersparc" },
670    { TARGET_CPU_sparclite86x, "sparclite86x" },
671    { TARGET_CPU_supersparc, "supersparc" },
672    { TARGET_CPU_v9, "v9" },
673    { TARGET_CPU_ultrasparc, "ultrasparc" },
674    { TARGET_CPU_ultrasparc3, "ultrasparc3" },
675    { TARGET_CPU_niagara, "niagara" },
676    { TARGET_CPU_niagara2, "niagara2" },
677    { 0, 0 }
678  };
679  const struct cpu_default *def;
680  /* Table of values for -m{cpu,tune}=.  */
681  static struct cpu_table {
682    const char *const name;
683    const enum processor_type processor;
684    const int disable;
685    const int enable;
686  } const cpu_table[] = {
687    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
688    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
689    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
690    /* TI TMS390Z55 supersparc */
691    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
692    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
693    /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
694       The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
695    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
696    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
697    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
698    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
699      MASK_SPARCLITE },
700    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
701    /* TEMIC sparclet */
702    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
703    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
704    /* TI ultrasparc I, II, IIi */
705    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
706    /* Although insns using %y are deprecated, it is a clear win on current
707       ultrasparcs.  */
708    						    |MASK_DEPRECATED_V8_INSNS},
709    /* TI ultrasparc III */
710    /* ??? Check if %y issue still holds true in ultra3.  */
711    { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
712    /* UltraSPARC T1 */
713    { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
714    { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
715    { 0, (enum processor_type) 0, 0, 0 }
716  };
717  const struct cpu_table *cpu;
718  const struct sparc_cpu_select *sel;
719  int fpu;
720
721#ifndef SPARC_BI_ARCH
722  /* Check for unsupported architecture size.  */
723  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
724    error ("%s is not supported by this configuration",
725	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
726#endif
727
728  /* We force all 64bit archs to use 128 bit long double */
729  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
730    {
731      error ("-mlong-double-64 not allowed with -m64");
732      target_flags |= MASK_LONG_DOUBLE_128;
733    }
734
735  /* Code model selection.  */
736  sparc_cmodel = SPARC_DEFAULT_CMODEL;
737
738#ifdef SPARC_BI_ARCH
739  if (TARGET_ARCH32)
740    sparc_cmodel = CM_32;
741#endif
742
743  if (sparc_cmodel_string != NULL)
744    {
745      if (TARGET_ARCH64)
746	{
747	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
748	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
749	      break;
750	  if (cmodel->name == NULL)
751	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
752	  else
753	    sparc_cmodel = cmodel->value;
754	}
755      else
756	error ("-mcmodel= is not supported on 32 bit systems");
757    }
758
759  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
760
761  /* Set the default CPU.  */
762  for (def = &cpu_default[0]; def->name; ++def)
763    if (def->cpu == TARGET_CPU_DEFAULT)
764      break;
765  gcc_assert (def->name);
766  sparc_select[0].string = def->name;
767
768  for (sel = &sparc_select[0]; sel->name; ++sel)
769    {
770      if (sel->string)
771	{
772	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
773	    if (! strcmp (sel->string, cpu->name))
774	      {
775		if (sel->set_tune_p)
776		  sparc_cpu = cpu->processor;
777
778		if (sel->set_arch_p)
779		  {
780		    target_flags &= ~cpu->disable;
781		    target_flags |= cpu->enable;
782		  }
783		break;
784	      }
785
786	  if (! cpu->name)
787	    error ("bad value (%s) for %s switch", sel->string, sel->name);
788	}
789    }
790
791  /* If -mfpu or -mno-fpu was explicitly used, don't override with
792     the processor default.  */
793  if (fpu_option_set)
794    target_flags = (target_flags & ~MASK_FPU) | fpu;
795
796  /* Don't allow -mvis if FPU is disabled.  */
797  if (! TARGET_FPU)
798    target_flags &= ~MASK_VIS;
799
800  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
801     are available.
802     -m64 also implies v9.  */
803  if (TARGET_VIS || TARGET_ARCH64)
804    {
805      target_flags |= MASK_V9;
806      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
807    }
808
809  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
810  if (TARGET_V9 && TARGET_ARCH32)
811    target_flags |= MASK_DEPRECATED_V8_INSNS;
812
813  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
814  if (! TARGET_V9 || TARGET_ARCH64)
815    target_flags &= ~MASK_V8PLUS;
816
817  /* Don't use stack biasing in 32 bit mode.  */
818  if (TARGET_ARCH32)
819    target_flags &= ~MASK_STACK_BIAS;
820
821  /* Supply a default value for align_functions.  */
822  if (align_functions == 0
823      && (sparc_cpu == PROCESSOR_ULTRASPARC
824	  || sparc_cpu == PROCESSOR_ULTRASPARC3
825	  || sparc_cpu == PROCESSOR_NIAGARA
826	  || sparc_cpu == PROCESSOR_NIAGARA2))
827    align_functions = 32;
828
829  /* Validate PCC_STRUCT_RETURN.  */
830  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
831    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
832
833  /* Only use .uaxword when compiling for a 64-bit target.  */
834  if (!TARGET_ARCH64)
835    targetm.asm_out.unaligned_op.di = NULL;
836
837  /* Do various machine dependent initializations.  */
838  sparc_init_modes ();
839
840  /* Acquire unique alias sets for our private stuff.  */
841  sparc_sr_alias_set = new_alias_set ();
842  struct_value_alias_set = new_alias_set ();
843
844  /* Set up function hooks.  */
845  init_machine_status = sparc_init_machine_status;
846
847  switch (sparc_cpu)
848    {
849    case PROCESSOR_V7:
850    case PROCESSOR_CYPRESS:
851      sparc_costs = &cypress_costs;
852      break;
853    case PROCESSOR_V8:
854    case PROCESSOR_SPARCLITE:
855    case PROCESSOR_SUPERSPARC:
856      sparc_costs = &supersparc_costs;
857      break;
858    case PROCESSOR_F930:
859    case PROCESSOR_F934:
860    case PROCESSOR_HYPERSPARC:
861    case PROCESSOR_SPARCLITE86X:
862      sparc_costs = &hypersparc_costs;
863      break;
864    case PROCESSOR_SPARCLET:
865    case PROCESSOR_TSC701:
866      sparc_costs = &sparclet_costs;
867      break;
868    case PROCESSOR_V9:
869    case PROCESSOR_ULTRASPARC:
870      sparc_costs = &ultrasparc_costs;
871      break;
872    case PROCESSOR_ULTRASPARC3:
873      sparc_costs = &ultrasparc3_costs;
874      break;
875    case PROCESSOR_NIAGARA:
876      sparc_costs = &niagara_costs;
877      break;
878    case PROCESSOR_NIAGARA2:
879      sparc_costs = &niagara2_costs;
880      break;
881    };
882
883#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
884  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
885    target_flags |= MASK_LONG_DOUBLE_128;
886#endif
887
888  if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
889    set_param_value ("simultaneous-prefetches",
890		     ((sparc_cpu == PROCESSOR_ULTRASPARC
891		       || sparc_cpu == PROCESSOR_NIAGARA
892		       || sparc_cpu == PROCESSOR_NIAGARA2)
893		      ? 2
894		      : (sparc_cpu == PROCESSOR_ULTRASPARC3
895			 ? 8 : 3)));
896  if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
897    set_param_value ("l1-cache-line-size",
898		     ((sparc_cpu == PROCESSOR_ULTRASPARC
899		       || sparc_cpu == PROCESSOR_ULTRASPARC3
900		       || sparc_cpu == PROCESSOR_NIAGARA
901		       || sparc_cpu == PROCESSOR_NIAGARA2)
902		      ? 64 : 32));
903}
904
905/* Miscellaneous utilities.  */
906
907/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
908   or branch on register contents instructions.  */
909
910int
911v9_regcmp_p (enum rtx_code code)
912{
913  return (code == EQ || code == NE || code == GE || code == LT
914	  || code == LE || code == GT);
915}
916
917/* Nonzero if OP is a floating point constant which can
918   be loaded into an integer register using a single
919   sethi instruction.  */
920
921int
922fp_sethi_p (rtx op)
923{
924  if (GET_CODE (op) == CONST_DOUBLE)
925    {
926      REAL_VALUE_TYPE r;
927      long i;
928
929      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
930      REAL_VALUE_TO_TARGET_SINGLE (r, i);
931      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
932    }
933
934  return 0;
935}
936
937/* Nonzero if OP is a floating point constant which can
938   be loaded into an integer register using a single
939   mov instruction.  */
940
941int
942fp_mov_p (rtx op)
943{
944  if (GET_CODE (op) == CONST_DOUBLE)
945    {
946      REAL_VALUE_TYPE r;
947      long i;
948
949      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
950      REAL_VALUE_TO_TARGET_SINGLE (r, i);
951      return SPARC_SIMM13_P (i);
952    }
953
954  return 0;
955}
956
957/* Nonzero if OP is a floating point constant which can
958   be loaded into an integer register using a high/losum
959   instruction sequence.  */
960
961int
962fp_high_losum_p (rtx op)
963{
964  /* The constraints calling this should only be in
965     SFmode move insns, so any constant which cannot
966     be moved using a single insn will do.  */
967  if (GET_CODE (op) == CONST_DOUBLE)
968    {
969      REAL_VALUE_TYPE r;
970      long i;
971
972      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
973      REAL_VALUE_TO_TARGET_SINGLE (r, i);
974      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
975    }
976
977  return 0;
978}
979
980/* Return true if the address of LABEL can be loaded by means of the
981   mov{si,di}_pic_label_ref patterns in PIC mode.  */
982
983static bool
984can_use_mov_pic_label_ref (rtx label)
985{
986  /* VxWorks does not impose a fixed gap between segments; the run-time
987     gap can be different from the object-file gap.  We therefore can't
988     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
989     are absolutely sure that X is in the same segment as the GOT.
990     Unfortunately, the flexibility of linker scripts means that we
991     can't be sure of that in general, so assume that GOT-relative
992     accesses are never valid on VxWorks.  */
993  if (TARGET_VXWORKS_RTP)
994    return false;
995
996  /* Similarly, if the label is non-local, it might end up being placed
997     in a different section than the current one; now mov_pic_label_ref
998     requires the label and the code to be in the same section.  */
999  if (LABEL_REF_NONLOCAL_P (label))
1000    return false;
1001
1002  /* Finally, if we are reordering basic blocks and partition into hot
1003     and cold sections, this might happen for any label.  */
1004  if (flag_reorder_blocks_and_partition)
1005    return false;
1006
1007  return true;
1008}
1009
1010/* Expand a move instruction.  Return true if all work is done.  */
1011
1012bool
1013sparc_expand_move (enum machine_mode mode, rtx *operands)
1014{
1015  /* Handle sets of MEM first.  */
1016  if (GET_CODE (operands[0]) == MEM)
1017    {
1018      /* 0 is a register (or a pair of registers) on SPARC.  */
1019      if (register_or_zero_operand (operands[1], mode))
1020	return false;
1021
1022      if (!reload_in_progress)
1023	{
1024	  operands[0] = validize_mem (operands[0]);
1025	  operands[1] = force_reg (mode, operands[1]);
1026	}
1027    }
1028
1029  /* Fixup TLS cases.  */
1030  if (TARGET_HAVE_TLS
1031      && CONSTANT_P (operands[1])
1032      && sparc_tls_referenced_p (operands [1]))
1033    {
1034      operands[1] = legitimize_tls_address (operands[1]);
1035      return false;
1036    }
1037
1038  /* Fixup PIC cases.  */
1039  if (flag_pic && CONSTANT_P (operands[1]))
1040    {
1041      if (pic_address_needs_scratch (operands[1]))
1042	operands[1] = legitimize_pic_address (operands[1], NULL_RTX);
1043
1044      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
1045      if (GET_CODE (operands[1]) == LABEL_REF
1046	  && can_use_mov_pic_label_ref (operands[1]))
1047	{
1048	  if (mode == SImode)
1049	    {
1050	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1051	      return true;
1052	    }
1053
1054	  if (mode == DImode)
1055	    {
1056	      gcc_assert (TARGET_ARCH64);
1057	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1058	      return true;
1059	    }
1060	}
1061
1062      if (symbolic_operand (operands[1], mode))
1063	{
1064	  operands[1] = legitimize_pic_address (operands[1],
1065						reload_in_progress
1066						? operands[0] : NULL_RTX);
1067	  return false;
1068	}
1069    }
1070
1071  /* If we are trying to toss an integer constant into FP registers,
1072     or loading a FP or vector constant, force it into memory.  */
1073  if (CONSTANT_P (operands[1])
1074      && REG_P (operands[0])
1075      && (SPARC_FP_REG_P (REGNO (operands[0]))
1076	  || SCALAR_FLOAT_MODE_P (mode)
1077	  || VECTOR_MODE_P (mode)))
1078    {
1079      /* emit_group_store will send such bogosity to us when it is
1080         not storing directly into memory.  So fix this up to avoid
1081         crashes in output_constant_pool.  */
1082      if (operands [1] == const0_rtx)
1083	operands[1] = CONST0_RTX (mode);
1084
1085      /* We can clear FP registers if TARGET_VIS, and always other regs.  */
1086      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1087	  && const_zero_operand (operands[1], mode))
1088	return false;
1089
1090      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1091	  /* We are able to build any SF constant in integer registers
1092	     with at most 2 instructions.  */
1093	  && (mode == SFmode
1094	      /* And any DF constant in integer registers.  */
1095	      || (mode == DFmode
1096		  && (reload_completed || reload_in_progress))))
1097	return false;
1098
1099      operands[1] = force_const_mem (mode, operands[1]);
1100      if (!reload_in_progress)
1101	operands[1] = validize_mem (operands[1]);
1102      return false;
1103    }
1104
1105  /* Accept non-constants and valid constants unmodified.  */
1106  if (!CONSTANT_P (operands[1])
1107      || GET_CODE (operands[1]) == HIGH
1108      || input_operand (operands[1], mode))
1109    return false;
1110
1111  switch (mode)
1112    {
1113    case QImode:
1114      /* All QImode constants require only one insn, so proceed.  */
1115      break;
1116
1117    case HImode:
1118    case SImode:
1119      sparc_emit_set_const32 (operands[0], operands[1]);
1120      return true;
1121
1122    case DImode:
1123      /* input_operand should have filtered out 32-bit mode.  */
1124      sparc_emit_set_const64 (operands[0], operands[1]);
1125      return true;
1126
1127    default:
1128      gcc_unreachable ();
1129    }
1130
1131  return false;
1132}
1133
1134/* Load OP1, a 32-bit constant, into OP0, a register.
1135   We know it can't be done in one insn when we get
1136   here, the move expander guarantees this.  */
1137
1138void
1139sparc_emit_set_const32 (rtx op0, rtx op1)
1140{
1141  enum machine_mode mode = GET_MODE (op0);
1142  rtx temp;
1143
1144  if (reload_in_progress || reload_completed)
1145    temp = op0;
1146  else
1147    temp = gen_reg_rtx (mode);
1148
1149  if (GET_CODE (op1) == CONST_INT)
1150    {
1151      gcc_assert (!small_int_operand (op1, mode)
1152		  && !const_high_operand (op1, mode));
1153
1154      /* Emit them as real moves instead of a HIGH/LO_SUM,
1155	 this way CSE can see everything and reuse intermediate
1156	 values if it wants.  */
1157      emit_insn (gen_rtx_SET (VOIDmode, temp,
1158			      GEN_INT (INTVAL (op1)
1159			        & ~(HOST_WIDE_INT)0x3ff)));
1160
1161      emit_insn (gen_rtx_SET (VOIDmode,
1162			      op0,
1163			      gen_rtx_IOR (mode, temp,
1164					   GEN_INT (INTVAL (op1) & 0x3ff))));
1165    }
1166  else
1167    {
1168      /* A symbol, emit in the traditional way.  */
1169      emit_insn (gen_rtx_SET (VOIDmode, temp,
1170			      gen_rtx_HIGH (mode, op1)));
1171      emit_insn (gen_rtx_SET (VOIDmode,
1172			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
1173    }
1174}
1175
1176/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1177   If TEMP is nonzero, we are forbidden to use any other scratch
1178   registers.  Otherwise, we are allowed to generate them as needed.
1179
1180   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1181   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
1182
1183void
1184sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1185{
1186  rtx temp1, temp2, temp3, temp4, temp5;
1187  rtx ti_temp = 0;
1188
1189  if (temp && GET_MODE (temp) == TImode)
1190    {
1191      ti_temp = temp;
1192      temp = gen_rtx_REG (DImode, REGNO (temp));
1193    }
1194
1195  /* SPARC-V9 code-model support.  */
1196  switch (sparc_cmodel)
1197    {
1198    case CM_MEDLOW:
1199      /* The range spanned by all instructions in the object is less
1200	 than 2^31 bytes (2GB) and the distance from any instruction
1201	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1202	 than 2^31 bytes (2GB).
1203
1204	 The executable must be in the low 4TB of the virtual address
1205	 space.
1206
1207	 sethi	%hi(symbol), %temp1
1208	 or	%temp1, %lo(symbol), %reg  */
1209      if (temp)
1210	temp1 = temp;  /* op0 is allowed.  */
1211      else
1212	temp1 = gen_reg_rtx (DImode);
1213
1214      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1215      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1216      break;
1217
1218    case CM_MEDMID:
1219      /* The range spanned by all instructions in the object is less
1220	 than 2^31 bytes (2GB) and the distance from any instruction
1221	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1222	 than 2^31 bytes (2GB).
1223
1224	 The executable must be in the low 16TB of the virtual address
1225	 space.
1226
1227	 sethi	%h44(symbol), %temp1
1228	 or	%temp1, %m44(symbol), %temp2
1229	 sllx	%temp2, 12, %temp3
1230	 or	%temp3, %l44(symbol), %reg  */
1231      if (temp)
1232	{
1233	  temp1 = op0;
1234	  temp2 = op0;
1235	  temp3 = temp;  /* op0 is allowed.  */
1236	}
1237      else
1238	{
1239	  temp1 = gen_reg_rtx (DImode);
1240	  temp2 = gen_reg_rtx (DImode);
1241	  temp3 = gen_reg_rtx (DImode);
1242	}
1243
1244      emit_insn (gen_seth44 (temp1, op1));
1245      emit_insn (gen_setm44 (temp2, temp1, op1));
1246      emit_insn (gen_rtx_SET (VOIDmode, temp3,
1247			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1248      emit_insn (gen_setl44 (op0, temp3, op1));
1249      break;
1250
1251    case CM_MEDANY:
1252      /* The range spanned by all instructions in the object is less
1253	 than 2^31 bytes (2GB) and the distance from any instruction
1254	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1255	 than 2^31 bytes (2GB).
1256
1257	 The executable can be placed anywhere in the virtual address
1258	 space.
1259
1260	 sethi	%hh(symbol), %temp1
1261	 sethi	%lm(symbol), %temp2
1262	 or	%temp1, %hm(symbol), %temp3
1263	 sllx	%temp3, 32, %temp4
1264	 or	%temp4, %temp2, %temp5
1265	 or	%temp5, %lo(symbol), %reg  */
1266      if (temp)
1267	{
1268	  /* It is possible that one of the registers we got for operands[2]
1269	     might coincide with that of operands[0] (which is why we made
1270	     it TImode).  Pick the other one to use as our scratch.  */
1271	  if (rtx_equal_p (temp, op0))
1272	    {
1273	      gcc_assert (ti_temp);
1274	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1275	    }
1276	  temp1 = op0;
1277	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1278	  temp3 = op0;
1279	  temp4 = op0;
1280	  temp5 = op0;
1281	}
1282      else
1283	{
1284	  temp1 = gen_reg_rtx (DImode);
1285	  temp2 = gen_reg_rtx (DImode);
1286	  temp3 = gen_reg_rtx (DImode);
1287	  temp4 = gen_reg_rtx (DImode);
1288	  temp5 = gen_reg_rtx (DImode);
1289	}
1290
1291      emit_insn (gen_sethh (temp1, op1));
1292      emit_insn (gen_setlm (temp2, op1));
1293      emit_insn (gen_sethm (temp3, temp1, op1));
1294      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1295			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1296      emit_insn (gen_rtx_SET (VOIDmode, temp5,
1297			      gen_rtx_PLUS (DImode, temp4, temp2)));
1298      emit_insn (gen_setlo (op0, temp5, op1));
1299      break;
1300
1301    case CM_EMBMEDANY:
1302      /* Old old old backwards compatibility kruft here.
1303	 Essentially it is MEDLOW with a fixed 64-bit
1304	 virtual base added to all data segment addresses.
1305	 Text-segment stuff is computed like MEDANY, we can't
1306	 reuse the code above because the relocation knobs
1307	 look different.
1308
1309	 Data segment:	sethi	%hi(symbol), %temp1
1310			add	%temp1, EMBMEDANY_BASE_REG, %temp2
1311			or	%temp2, %lo(symbol), %reg  */
1312      if (data_segment_operand (op1, GET_MODE (op1)))
1313	{
1314	  if (temp)
1315	    {
1316	      temp1 = temp;  /* op0 is allowed.  */
1317	      temp2 = op0;
1318	    }
1319	  else
1320	    {
1321	      temp1 = gen_reg_rtx (DImode);
1322	      temp2 = gen_reg_rtx (DImode);
1323	    }
1324
1325	  emit_insn (gen_embmedany_sethi (temp1, op1));
1326	  emit_insn (gen_embmedany_brsum (temp2, temp1));
1327	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
1328	}
1329
1330      /* Text segment:	sethi	%uhi(symbol), %temp1
1331			sethi	%hi(symbol), %temp2
1332			or	%temp1, %ulo(symbol), %temp3
1333			sllx	%temp3, 32, %temp4
1334			or	%temp4, %temp2, %temp5
1335			or	%temp5, %lo(symbol), %reg  */
1336      else
1337	{
1338	  if (temp)
1339	    {
1340	      /* It is possible that one of the registers we got for operands[2]
1341		 might coincide with that of operands[0] (which is why we made
1342		 it TImode).  Pick the other one to use as our scratch.  */
1343	      if (rtx_equal_p (temp, op0))
1344		{
1345		  gcc_assert (ti_temp);
1346		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1347		}
1348	      temp1 = op0;
1349	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1350	      temp3 = op0;
1351	      temp4 = op0;
1352	      temp5 = op0;
1353	    }
1354	  else
1355	    {
1356	      temp1 = gen_reg_rtx (DImode);
1357	      temp2 = gen_reg_rtx (DImode);
1358	      temp3 = gen_reg_rtx (DImode);
1359	      temp4 = gen_reg_rtx (DImode);
1360	      temp5 = gen_reg_rtx (DImode);
1361	    }
1362
1363	  emit_insn (gen_embmedany_textuhi (temp1, op1));
1364	  emit_insn (gen_embmedany_texthi  (temp2, op1));
1365	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1366	  emit_insn (gen_rtx_SET (VOIDmode, temp4,
1367				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1368	  emit_insn (gen_rtx_SET (VOIDmode, temp5,
1369				  gen_rtx_PLUS (DImode, temp4, temp2)));
1370	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
1371	}
1372      break;
1373
1374    default:
1375      gcc_unreachable ();
1376    }
1377}
1378
1379#if HOST_BITS_PER_WIDE_INT == 32
1380void
1381sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1382{
1383  gcc_unreachable ();
1384}
1385#else
1386/* These avoid problems when cross compiling.  If we do not
1387   go through all this hair then the optimizer will see
1388   invalid REG_EQUAL notes or in some cases none at all.  */
1389static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1390static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1391static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1392static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1393
1394/* The optimizer is not to assume anything about exactly
1395   which bits are set for a HIGH, they are unspecified.
1396   Unfortunately this leads to many missed optimizations
1397   during CSE.  We mask out the non-HIGH bits, and matches
1398   a plain movdi, to alleviate this problem.  */
1399static rtx
1400gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1401{
1402  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1403}
1404
1405static rtx
1406gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1407{
1408  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1409}
1410
1411static rtx
1412gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1413{
1414  return gen_rtx_IOR (DImode, src, GEN_INT (val));
1415}
1416
1417static rtx
1418gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1419{
1420  return gen_rtx_XOR (DImode, src, GEN_INT (val));
1421}
1422
1423/* Worker routines for 64-bit constant formation on arch64.
1424   One of the key things to be doing in these emissions is
1425   to create as many temp REGs as possible.  This makes it
1426   possible for half-built constants to be used later when
1427   such values are similar to something required later on.
1428   Without doing this, the optimizer cannot see such
1429   opportunities.  */
1430
1431static void sparc_emit_set_const64_quick1 (rtx, rtx,
1432					   unsigned HOST_WIDE_INT, int);
1433
1434static void
1435sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1436			       unsigned HOST_WIDE_INT low_bits, int is_neg)
1437{
1438  unsigned HOST_WIDE_INT high_bits;
1439
1440  if (is_neg)
1441    high_bits = (~low_bits) & 0xffffffff;
1442  else
1443    high_bits = low_bits;
1444
1445  emit_insn (gen_safe_HIGH64 (temp, high_bits));
1446  if (!is_neg)
1447    {
1448      emit_insn (gen_rtx_SET (VOIDmode, op0,
1449			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1450    }
1451  else
1452    {
1453      /* If we are XOR'ing with -1, then we should emit a one's complement
1454	 instead.  This way the combiner will notice logical operations
1455	 such as ANDN later on and substitute.  */
1456      if ((low_bits & 0x3ff) == 0x3ff)
1457	{
1458	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1459				  gen_rtx_NOT (DImode, temp)));
1460	}
1461      else
1462	{
1463	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1464				  gen_safe_XOR64 (temp,
1465						  (-(HOST_WIDE_INT)0x400
1466						   | (low_bits & 0x3ff)))));
1467	}
1468    }
1469}
1470
1471static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1472					   unsigned HOST_WIDE_INT, int);
1473
1474static void
1475sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1476			       unsigned HOST_WIDE_INT high_bits,
1477			       unsigned HOST_WIDE_INT low_immediate,
1478			       int shift_count)
1479{
1480  rtx temp2 = op0;
1481
1482  if ((high_bits & 0xfffffc00) != 0)
1483    {
1484      emit_insn (gen_safe_HIGH64 (temp, high_bits));
1485      if ((high_bits & ~0xfffffc00) != 0)
1486	emit_insn (gen_rtx_SET (VOIDmode, op0,
1487				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1488      else
1489	temp2 = temp;
1490    }
1491  else
1492    {
1493      emit_insn (gen_safe_SET64 (temp, high_bits));
1494      temp2 = temp;
1495    }
1496
1497  /* Now shift it up into place.  */
1498  emit_insn (gen_rtx_SET (VOIDmode, op0,
1499			  gen_rtx_ASHIFT (DImode, temp2,
1500					  GEN_INT (shift_count))));
1501
1502  /* If there is a low immediate part piece, finish up by
1503     putting that in as well.  */
1504  if (low_immediate != 0)
1505    emit_insn (gen_rtx_SET (VOIDmode, op0,
1506			    gen_safe_OR64 (op0, low_immediate)));
1507}
1508
1509static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1510					    unsigned HOST_WIDE_INT);
1511
1512/* Full 64-bit constant decomposition.  Even though this is the
1513   'worst' case, we still optimize a few things away.  */
1514static void
1515sparc_emit_set_const64_longway (rtx op0, rtx temp,
1516				unsigned HOST_WIDE_INT high_bits,
1517				unsigned HOST_WIDE_INT low_bits)
1518{
1519  rtx sub_temp;
1520
1521  if (reload_in_progress || reload_completed)
1522    sub_temp = op0;
1523  else
1524    sub_temp = gen_reg_rtx (DImode);
1525
1526  if ((high_bits & 0xfffffc00) != 0)
1527    {
1528      emit_insn (gen_safe_HIGH64 (temp, high_bits));
1529      if ((high_bits & ~0xfffffc00) != 0)
1530	emit_insn (gen_rtx_SET (VOIDmode,
1531				sub_temp,
1532				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1533      else
1534	sub_temp = temp;
1535    }
1536  else
1537    {
1538      emit_insn (gen_safe_SET64 (temp, high_bits));
1539      sub_temp = temp;
1540    }
1541
1542  if (!reload_in_progress && !reload_completed)
1543    {
1544      rtx temp2 = gen_reg_rtx (DImode);
1545      rtx temp3 = gen_reg_rtx (DImode);
1546      rtx temp4 = gen_reg_rtx (DImode);
1547
1548      emit_insn (gen_rtx_SET (VOIDmode, temp4,
1549			      gen_rtx_ASHIFT (DImode, sub_temp,
1550					      GEN_INT (32))));
1551
1552      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1553      if ((low_bits & ~0xfffffc00) != 0)
1554	{
1555	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
1556				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1557	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1558				  gen_rtx_PLUS (DImode, temp4, temp3)));
1559	}
1560      else
1561	{
1562	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1563				  gen_rtx_PLUS (DImode, temp4, temp2)));
1564	}
1565    }
1566  else
1567    {
1568      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1569      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1570      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1571      int to_shift = 12;
1572
1573      /* We are in the middle of reload, so this is really
1574	 painful.  However we do still make an attempt to
1575	 avoid emitting truly stupid code.  */
1576      if (low1 != const0_rtx)
1577	{
1578	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1579				  gen_rtx_ASHIFT (DImode, sub_temp,
1580						  GEN_INT (to_shift))));
1581	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1582				  gen_rtx_IOR (DImode, op0, low1)));
1583	  sub_temp = op0;
1584	  to_shift = 12;
1585	}
1586      else
1587	{
1588	  to_shift += 12;
1589	}
1590      if (low2 != const0_rtx)
1591	{
1592	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1593				  gen_rtx_ASHIFT (DImode, sub_temp,
1594						  GEN_INT (to_shift))));
1595	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1596				  gen_rtx_IOR (DImode, op0, low2)));
1597	  sub_temp = op0;
1598	  to_shift = 8;
1599	}
1600      else
1601	{
1602	  to_shift += 8;
1603	}
1604      emit_insn (gen_rtx_SET (VOIDmode, op0,
1605			      gen_rtx_ASHIFT (DImode, sub_temp,
1606					      GEN_INT (to_shift))));
1607      if (low3 != const0_rtx)
1608	emit_insn (gen_rtx_SET (VOIDmode, op0,
1609				gen_rtx_IOR (DImode, op0, low3)));
1610      /* phew...  */
1611    }
1612}
1613
1614/* Analyze a 64-bit constant for certain properties.  */
1615static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1616				    unsigned HOST_WIDE_INT,
1617				    int *, int *, int *);
1618
1619static void
1620analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1621			unsigned HOST_WIDE_INT low_bits,
1622			int *hbsp, int *lbsp, int *abbasp)
1623{
1624  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1625  int i;
1626
1627  lowest_bit_set = highest_bit_set = -1;
1628  i = 0;
1629  do
1630    {
1631      if ((lowest_bit_set == -1)
1632	  && ((low_bits >> i) & 1))
1633	lowest_bit_set = i;
1634      if ((highest_bit_set == -1)
1635	  && ((high_bits >> (32 - i - 1)) & 1))
1636	highest_bit_set = (64 - i - 1);
1637    }
1638  while (++i < 32
1639	 && ((highest_bit_set == -1)
1640	     || (lowest_bit_set == -1)));
1641  if (i == 32)
1642    {
1643      i = 0;
1644      do
1645	{
1646	  if ((lowest_bit_set == -1)
1647	      && ((high_bits >> i) & 1))
1648	    lowest_bit_set = i + 32;
1649	  if ((highest_bit_set == -1)
1650	      && ((low_bits >> (32 - i - 1)) & 1))
1651	    highest_bit_set = 32 - i - 1;
1652	}
1653      while (++i < 32
1654	     && ((highest_bit_set == -1)
1655		 || (lowest_bit_set == -1)));
1656    }
1657  /* If there are no bits set this should have gone out
1658     as one instruction!  */
1659  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1660  all_bits_between_are_set = 1;
1661  for (i = lowest_bit_set; i <= highest_bit_set; i++)
1662    {
1663      if (i < 32)
1664	{
1665	  if ((low_bits & (1 << i)) != 0)
1666	    continue;
1667	}
1668      else
1669	{
1670	  if ((high_bits & (1 << (i - 32))) != 0)
1671	    continue;
1672	}
1673      all_bits_between_are_set = 0;
1674      break;
1675    }
1676  *hbsp = highest_bit_set;
1677  *lbsp = lowest_bit_set;
1678  *abbasp = all_bits_between_are_set;
1679}
1680
1681static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1682
1683static int
1684const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1685		   unsigned HOST_WIDE_INT low_bits)
1686{
1687  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1688
1689  if (high_bits == 0
1690      || high_bits == 0xffffffff)
1691    return 1;
1692
1693  analyze_64bit_constant (high_bits, low_bits,
1694			  &highest_bit_set, &lowest_bit_set,
1695			  &all_bits_between_are_set);
1696
1697  if ((highest_bit_set == 63
1698       || lowest_bit_set == 0)
1699      && all_bits_between_are_set != 0)
1700    return 1;
1701
1702  if ((highest_bit_set - lowest_bit_set) < 21)
1703    return 1;
1704
1705  return 0;
1706}
1707
1708static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1709							unsigned HOST_WIDE_INT,
1710							int, int);
1711
1712static unsigned HOST_WIDE_INT
1713create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1714			  unsigned HOST_WIDE_INT low_bits,
1715			  int lowest_bit_set, int shift)
1716{
1717  HOST_WIDE_INT hi, lo;
1718
1719  if (lowest_bit_set < 32)
1720    {
1721      lo = (low_bits >> lowest_bit_set) << shift;
1722      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1723    }
1724  else
1725    {
1726      lo = 0;
1727      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1728    }
1729  gcc_assert (! (hi & lo));
1730  return (hi | lo);
1731}
1732
1733/* Here we are sure to be arch64 and this is an integer constant
1734   being loaded into a register.  Emit the most efficient
1735   insn sequence possible.  Detection of all the 1-insn cases
1736   has been done already.  */
1737void
1738sparc_emit_set_const64 (rtx op0, rtx op1)
1739{
1740  unsigned HOST_WIDE_INT high_bits, low_bits;
1741  int lowest_bit_set, highest_bit_set;
1742  int all_bits_between_are_set;
1743  rtx temp = 0;
1744
1745  /* Sanity check that we know what we are working with.  */
1746  gcc_assert (TARGET_ARCH64
1747	      && (GET_CODE (op0) == SUBREG
1748		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1749
1750  if (reload_in_progress || reload_completed)
1751    temp = op0;
1752
1753  if (GET_CODE (op1) != CONST_INT)
1754    {
1755      sparc_emit_set_symbolic_const64 (op0, op1, temp);
1756      return;
1757    }
1758
1759  if (! temp)
1760    temp = gen_reg_rtx (DImode);
1761
1762  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1763  low_bits = (INTVAL (op1) & 0xffffffff);
1764
1765  /* low_bits	bits 0  --> 31
1766     high_bits	bits 32 --> 63  */
1767
1768  analyze_64bit_constant (high_bits, low_bits,
1769			  &highest_bit_set, &lowest_bit_set,
1770			  &all_bits_between_are_set);
1771
1772  /* First try for a 2-insn sequence.  */
1773
1774  /* These situations are preferred because the optimizer can
1775   * do more things with them:
1776   * 1) mov	-1, %reg
1777   *    sllx	%reg, shift, %reg
1778   * 2) mov	-1, %reg
1779   *    srlx	%reg, shift, %reg
1780   * 3) mov	some_small_const, %reg
1781   *    sllx	%reg, shift, %reg
1782   */
1783  if (((highest_bit_set == 63
1784	|| lowest_bit_set == 0)
1785       && all_bits_between_are_set != 0)
1786      || ((highest_bit_set - lowest_bit_set) < 12))
1787    {
1788      HOST_WIDE_INT the_const = -1;
1789      int shift = lowest_bit_set;
1790
1791      if ((highest_bit_set != 63
1792	   && lowest_bit_set != 0)
1793	  || all_bits_between_are_set == 0)
1794	{
1795	  the_const =
1796	    create_simple_focus_bits (high_bits, low_bits,
1797				      lowest_bit_set, 0);
1798	}
1799      else if (lowest_bit_set == 0)
1800	shift = -(63 - highest_bit_set);
1801
1802      gcc_assert (SPARC_SIMM13_P (the_const));
1803      gcc_assert (shift != 0);
1804
1805      emit_insn (gen_safe_SET64 (temp, the_const));
1806      if (shift > 0)
1807	emit_insn (gen_rtx_SET (VOIDmode,
1808				op0,
1809				gen_rtx_ASHIFT (DImode,
1810						temp,
1811						GEN_INT (shift))));
1812      else if (shift < 0)
1813	emit_insn (gen_rtx_SET (VOIDmode,
1814				op0,
1815				gen_rtx_LSHIFTRT (DImode,
1816						  temp,
1817						  GEN_INT (-shift))));
1818      return;
1819    }
1820
1821  /* Now a range of 22 or less bits set somewhere.
1822   * 1) sethi	%hi(focus_bits), %reg
1823   *    sllx	%reg, shift, %reg
1824   * 2) sethi	%hi(focus_bits), %reg
1825   *    srlx	%reg, shift, %reg
1826   */
1827  if ((highest_bit_set - lowest_bit_set) < 21)
1828    {
1829      unsigned HOST_WIDE_INT focus_bits =
1830	create_simple_focus_bits (high_bits, low_bits,
1831				  lowest_bit_set, 10);
1832
1833      gcc_assert (SPARC_SETHI_P (focus_bits));
1834      gcc_assert (lowest_bit_set != 10);
1835
1836      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1837
1838      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1839      if (lowest_bit_set < 10)
1840	emit_insn (gen_rtx_SET (VOIDmode,
1841				op0,
1842				gen_rtx_LSHIFTRT (DImode, temp,
1843						  GEN_INT (10 - lowest_bit_set))));
1844      else if (lowest_bit_set > 10)
1845	emit_insn (gen_rtx_SET (VOIDmode,
1846				op0,
1847				gen_rtx_ASHIFT (DImode, temp,
1848						GEN_INT (lowest_bit_set - 10))));
1849      return;
1850    }
1851
1852  /* 1) sethi	%hi(low_bits), %reg
1853   *    or	%reg, %lo(low_bits), %reg
1854   * 2) sethi	%hi(~low_bits), %reg
1855   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1856   */
1857  if (high_bits == 0
1858      || high_bits == 0xffffffff)
1859    {
1860      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1861				     (high_bits == 0xffffffff));
1862      return;
1863    }
1864
1865  /* Now, try 3-insn sequences.  */
1866
1867  /* 1) sethi	%hi(high_bits), %reg
1868   *    or	%reg, %lo(high_bits), %reg
1869   *    sllx	%reg, 32, %reg
1870   */
1871  if (low_bits == 0)
1872    {
1873      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1874      return;
1875    }
1876
1877  /* We may be able to do something quick
1878     when the constant is negated, so try that.  */
1879  if (const64_is_2insns ((~high_bits) & 0xffffffff,
1880			 (~low_bits) & 0xfffffc00))
1881    {
1882      /* NOTE: The trailing bits get XOR'd so we need the
1883	 non-negated bits, not the negated ones.  */
1884      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1885
1886      if ((((~high_bits) & 0xffffffff) == 0
1887	   && ((~low_bits) & 0x80000000) == 0)
1888	  || (((~high_bits) & 0xffffffff) == 0xffffffff
1889	      && ((~low_bits) & 0x80000000) != 0))
1890	{
1891	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1892
1893	  if ((SPARC_SETHI_P (fast_int)
1894	       && (~high_bits & 0xffffffff) == 0)
1895	      || SPARC_SIMM13_P (fast_int))
1896	    emit_insn (gen_safe_SET64 (temp, fast_int));
1897	  else
1898	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1899	}
1900      else
1901	{
1902	  rtx negated_const;
1903	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1904				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1905	  sparc_emit_set_const64 (temp, negated_const);
1906	}
1907
1908      /* If we are XOR'ing with -1, then we should emit a one's complement
1909	 instead.  This way the combiner will notice logical operations
1910	 such as ANDN later on and substitute.  */
1911      if (trailing_bits == 0x3ff)
1912	{
1913	  emit_insn (gen_rtx_SET (VOIDmode, op0,
1914				  gen_rtx_NOT (DImode, temp)));
1915	}
1916      else
1917	{
1918	  emit_insn (gen_rtx_SET (VOIDmode,
1919				  op0,
1920				  gen_safe_XOR64 (temp,
1921						  (-0x400 | trailing_bits))));
1922	}
1923      return;
1924    }
1925
1926  /* 1) sethi	%hi(xxx), %reg
1927   *    or	%reg, %lo(xxx), %reg
1928   *	sllx	%reg, yyy, %reg
1929   *
1930   * ??? This is just a generalized version of the low_bits==0
1931   * thing above, FIXME...
1932   */
1933  if ((highest_bit_set - lowest_bit_set) < 32)
1934    {
1935      unsigned HOST_WIDE_INT focus_bits =
1936	create_simple_focus_bits (high_bits, low_bits,
1937				  lowest_bit_set, 0);
1938
1939      /* We can't get here in this state.  */
1940      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1941
1942      /* So what we know is that the set bits straddle the
1943	 middle of the 64-bit word.  */
1944      sparc_emit_set_const64_quick2 (op0, temp,
1945				     focus_bits, 0,
1946				     lowest_bit_set);
1947      return;
1948    }
1949
1950  /* 1) sethi	%hi(high_bits), %reg
1951   *    or	%reg, %lo(high_bits), %reg
1952   *    sllx	%reg, 32, %reg
1953   *	or	%reg, low_bits, %reg
1954   */
1955  if (SPARC_SIMM13_P(low_bits)
1956      && ((int)low_bits > 0))
1957    {
1958      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1959      return;
1960    }
1961
1962  /* The easiest way when all else fails, is full decomposition.  */
1963#if 0
1964  printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1965	  high_bits, low_bits, ~high_bits, ~low_bits);
1966#endif
1967  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1968}
1969#endif /* HOST_BITS_PER_WIDE_INT == 32 */
1970
1971/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1972   return the mode to be used for the comparison.  For floating-point,
1973   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
1974   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
1975   processing is needed.  */
1976
1977enum machine_mode
1978select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1979{
1980  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1981    {
1982      switch (op)
1983	{
1984	case EQ:
1985	case NE:
1986	case UNORDERED:
1987	case ORDERED:
1988	case UNLT:
1989	case UNLE:
1990	case UNGT:
1991	case UNGE:
1992	case UNEQ:
1993	case LTGT:
1994	  return CCFPmode;
1995
1996	case LT:
1997	case LE:
1998	case GT:
1999	case GE:
2000	  return CCFPEmode;
2001
2002	default:
2003	  gcc_unreachable ();
2004	}
2005    }
2006  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2007	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2008    {
2009      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2010	return CCX_NOOVmode;
2011      else
2012	return CC_NOOVmode;
2013    }
2014  else
2015    {
2016      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2017	return CCXmode;
2018      else
2019	return CCmode;
2020    }
2021}
2022
2023/* Emit the compare insn and return the CC reg for a CODE comparison
2024   with operands X and Y.  */
2025
2026static rtx
2027gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2028{
2029  enum machine_mode mode;
2030  rtx cc_reg;
2031
2032  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2033    return x;
2034
2035  mode = SELECT_CC_MODE (code, x, y);
2036
2037  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2038     fcc regs (cse can't tell they're really call clobbered regs and will
2039     remove a duplicate comparison even if there is an intervening function
2040     call - it will then try to reload the cc reg via an int reg which is why
2041     we need the movcc patterns).  It is possible to provide the movcc
2042     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2043     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2044     to tell cse that CCFPE mode registers (even pseudos) are call
2045     clobbered.  */
2046
2047  /* ??? This is an experiment.  Rather than making changes to cse which may
2048     or may not be easy/clean, we do our own cse.  This is possible because
2049     we will generate hard registers.  Cse knows they're call clobbered (it
2050     doesn't know the same thing about pseudos). If we guess wrong, no big
2051     deal, but if we win, great!  */
2052
2053  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2054#if 1 /* experiment */
2055    {
2056      int reg;
2057      /* We cycle through the registers to ensure they're all exercised.  */
2058      static int next_fcc_reg = 0;
2059      /* Previous x,y for each fcc reg.  */
2060      static rtx prev_args[4][2];
2061
2062      /* Scan prev_args for x,y.  */
2063      for (reg = 0; reg < 4; reg++)
2064	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2065	  break;
2066      if (reg == 4)
2067	{
2068	  reg = next_fcc_reg;
2069	  prev_args[reg][0] = x;
2070	  prev_args[reg][1] = y;
2071	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2072	}
2073      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2074    }
2075#else
2076    cc_reg = gen_reg_rtx (mode);
2077#endif /* ! experiment */
2078  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2079    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2080  else
2081    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2082
2083  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2084     will only result in an unrecognizable insn so no point in asserting.  */
2085  emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2086
2087  return cc_reg;
2088}
2089
2090
2091/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2092
2093rtx
2094gen_compare_reg (rtx cmp)
2095{
2096  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2097}
2098
2099/* This function is used for v9 only.
2100   DEST is the target of the Scc insn.
2101   CODE is the code for an Scc's comparison.
2102   X and Y are the values we compare.
2103
2104   This function is needed to turn
2105
2106	   (set (reg:SI 110)
2107	       (gt (reg:CCX 100 %icc)
2108	           (const_int 0)))
2109   into
2110	   (set (reg:SI 110)
2111	       (gt:DI (reg:CCX 100 %icc)
2112	           (const_int 0)))
2113
2114   IE: The instruction recognizer needs to see the mode of the comparison to
2115   find the right instruction. We could use "gt:DI" right in the
2116   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2117
2118static int
2119gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2120{
2121  if (! TARGET_ARCH64
2122      && (GET_MODE (x) == DImode
2123	  || GET_MODE (dest) == DImode))
2124    return 0;
2125
2126  /* Try to use the movrCC insns.  */
2127  if (TARGET_ARCH64
2128      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2129      && y == const0_rtx
2130      && v9_regcmp_p (compare_code))
2131    {
2132      rtx op0 = x;
2133      rtx temp;
2134
2135      /* Special case for op0 != 0.  This can be done with one instruction if
2136	 dest == x.  */
2137
2138      if (compare_code == NE
2139	  && GET_MODE (dest) == DImode
2140	  && rtx_equal_p (op0, dest))
2141	{
2142	  emit_insn (gen_rtx_SET (VOIDmode, dest,
2143			      gen_rtx_IF_THEN_ELSE (DImode,
2144				       gen_rtx_fmt_ee (compare_code, DImode,
2145						       op0, const0_rtx),
2146				       const1_rtx,
2147				       dest)));
2148	  return 1;
2149	}
2150
2151      if (reg_overlap_mentioned_p (dest, op0))
2152	{
2153	  /* Handle the case where dest == x.
2154	     We "early clobber" the result.  */
2155	  op0 = gen_reg_rtx (GET_MODE (x));
2156	  emit_move_insn (op0, x);
2157	}
2158
2159      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2160      if (GET_MODE (op0) != DImode)
2161	{
2162	  temp = gen_reg_rtx (DImode);
2163	  convert_move (temp, op0, 0);
2164	}
2165      else
2166	temp = op0;
2167      emit_insn (gen_rtx_SET (VOIDmode, dest,
2168			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2169				   gen_rtx_fmt_ee (compare_code, DImode,
2170						   temp, const0_rtx),
2171				   const1_rtx,
2172				   dest)));
2173      return 1;
2174    }
2175  else
2176    {
2177      x = gen_compare_reg_1 (compare_code, x, y);
2178      y = const0_rtx;
2179
2180      gcc_assert (GET_MODE (x) != CC_NOOVmode
2181		  && GET_MODE (x) != CCX_NOOVmode);
2182
2183      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2184      emit_insn (gen_rtx_SET (VOIDmode, dest,
2185			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2186				   gen_rtx_fmt_ee (compare_code,
2187						   GET_MODE (x), x, y),
2188				    const1_rtx, dest)));
2189      return 1;
2190    }
2191}
2192
2193
2194/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2195   without jumps using the addx/subx instructions.  */
2196
2197bool
2198emit_scc_insn (rtx operands[])
2199{
2200  rtx tem;
2201  rtx x;
2202  rtx y;
2203  enum rtx_code code;
2204
2205  /* The quad-word fp compare library routines all return nonzero to indicate
2206     true, which is different from the equivalent libgcc routines, so we must
2207     handle them specially here.  */
2208  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2209    {
2210      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2211					      GET_CODE (operands[1]));
2212      operands[2] = XEXP (operands[1], 0);
2213      operands[3] = XEXP (operands[1], 1);
2214    }
2215
2216  code = GET_CODE (operands[1]);
2217  x = operands[2];
2218  y = operands[3];
2219
2220  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2221     more applications).  The exception to this is "reg != 0" which can
2222     be done in one instruction on v9 (so we do it).  */
2223  if (code == EQ)
2224    {
2225      if (GET_MODE (x) == SImode)
2226        {
2227          rtx pat = gen_seqsi_special (operands[0], x, y);
2228          emit_insn (pat);
2229          return true;
2230        }
2231      else if (GET_MODE (x) == DImode)
2232        {
2233          rtx pat = gen_seqdi_special (operands[0], x, y);
2234          emit_insn (pat);
2235          return true;
2236        }
2237    }
2238
2239  if (code == NE)
2240    {
2241      if (GET_MODE (x) == SImode)
2242        {
2243          rtx pat = gen_snesi_special (operands[0], x, y);
2244          emit_insn (pat);
2245          return true;
2246        }
2247      else if (GET_MODE (x) == DImode)
2248        {
2249          rtx pat = gen_snedi_special (operands[0], x, y);
2250          emit_insn (pat);
2251          return true;
2252        }
2253    }
2254
2255  /* For the rest, on v9 we can use conditional moves.  */
2256
2257  if (TARGET_V9)
2258    {
2259      if (gen_v9_scc (operands[0], code, x, y))
2260        return true;
2261    }
2262
2263  /* We can do LTU and GEU using the addx/subx instructions too.  And
2264     for GTU/LEU, if both operands are registers swap them and fall
2265     back to the easy case.  */
2266  if (code == GTU || code == LEU)
2267    {
2268      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2269          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2270        {
2271          tem = x;
2272          x = y;
2273          y = tem;
2274          code = swap_condition (code);
2275        }
2276    }
2277
2278  if (code == LTU || code == GEU)
2279    {
2280      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2281			      gen_rtx_fmt_ee (code, SImode,
2282					      gen_compare_reg_1 (code, x, y),
2283					      const0_rtx)));
2284      return true;
2285    }
2286
2287  /* Nope, do branches.  */
2288  return false;
2289}
2290
2291/* Emit a conditional jump insn for the v9 architecture using comparison code
2292   CODE and jump target LABEL.
2293   This function exists to take advantage of the v9 brxx insns.  */
2294
2295static void
2296emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2297{
2298  emit_jump_insn (gen_rtx_SET (VOIDmode,
2299			   pc_rtx,
2300			   gen_rtx_IF_THEN_ELSE (VOIDmode,
2301				    gen_rtx_fmt_ee (code, GET_MODE (op0),
2302						    op0, const0_rtx),
2303				    gen_rtx_LABEL_REF (VOIDmode, label),
2304				    pc_rtx)));
2305}
2306
2307void
2308emit_conditional_branch_insn (rtx operands[])
2309{
2310  /* The quad-word fp compare library routines all return nonzero to indicate
2311     true, which is different from the equivalent libgcc routines, so we must
2312     handle them specially here.  */
2313  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2314    {
2315      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2316					      GET_CODE (operands[0]));
2317      operands[1] = XEXP (operands[0], 0);
2318      operands[2] = XEXP (operands[0], 1);
2319    }
2320
2321  if (TARGET_ARCH64 && operands[2] == const0_rtx
2322      && GET_CODE (operands[1]) == REG
2323      && GET_MODE (operands[1]) == DImode)
2324    {
2325      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2326      return;
2327    }
2328
2329  operands[1] = gen_compare_reg (operands[0]);
2330  operands[2] = const0_rtx;
2331  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2332				operands[1], operands[2]);
2333  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2334				  operands[3]));
2335}
2336
2337
2338/* Generate a DFmode part of a hard TFmode register.
2339   REG is the TFmode hard register, LOW is 1 for the
2340   low 64bit of the register and 0 otherwise.
2341 */
2342rtx
2343gen_df_reg (rtx reg, int low)
2344{
2345  int regno = REGNO (reg);
2346
2347  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2348    regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2349  return gen_rtx_REG (DFmode, regno);
2350}
2351
2352/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
2353   Unlike normal calls, TFmode operands are passed by reference.  It is
2354   assumed that no more than 3 operands are required.  */
2355
2356static void
2357emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2358{
2359  rtx ret_slot = NULL, arg[3], func_sym;
2360  int i;
2361
2362  /* We only expect to be called for conversions, unary, and binary ops.  */
2363  gcc_assert (nargs == 2 || nargs == 3);
2364
2365  for (i = 0; i < nargs; ++i)
2366    {
2367      rtx this_arg = operands[i];
2368      rtx this_slot;
2369
2370      /* TFmode arguments and return values are passed by reference.  */
2371      if (GET_MODE (this_arg) == TFmode)
2372	{
2373	  int force_stack_temp;
2374
2375	  force_stack_temp = 0;
2376	  if (TARGET_BUGGY_QP_LIB && i == 0)
2377	    force_stack_temp = 1;
2378
2379	  if (GET_CODE (this_arg) == MEM
2380	      && ! force_stack_temp)
2381	    this_arg = XEXP (this_arg, 0);
2382	  else if (CONSTANT_P (this_arg)
2383		   && ! force_stack_temp)
2384	    {
2385	      this_slot = force_const_mem (TFmode, this_arg);
2386	      this_arg = XEXP (this_slot, 0);
2387	    }
2388	  else
2389	    {
2390	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2391
2392	      /* Operand 0 is the return value.  We'll copy it out later.  */
2393	      if (i > 0)
2394		emit_move_insn (this_slot, this_arg);
2395	      else
2396		ret_slot = this_slot;
2397
2398	      this_arg = XEXP (this_slot, 0);
2399	    }
2400	}
2401
2402      arg[i] = this_arg;
2403    }
2404
2405  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2406
2407  if (GET_MODE (operands[0]) == TFmode)
2408    {
2409      if (nargs == 2)
2410	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2411			   arg[0], GET_MODE (arg[0]),
2412			   arg[1], GET_MODE (arg[1]));
2413      else
2414	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2415			   arg[0], GET_MODE (arg[0]),
2416			   arg[1], GET_MODE (arg[1]),
2417			   arg[2], GET_MODE (arg[2]));
2418
2419      if (ret_slot)
2420	emit_move_insn (operands[0], ret_slot);
2421    }
2422  else
2423    {
2424      rtx ret;
2425
2426      gcc_assert (nargs == 2);
2427
2428      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2429				     GET_MODE (operands[0]), 1,
2430				     arg[1], GET_MODE (arg[1]));
2431
2432      if (ret != operands[0])
2433	emit_move_insn (operands[0], ret);
2434    }
2435}
2436
2437/* Expand soft-float TFmode calls to sparc abi routines.  */
2438
2439static void
2440emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2441{
2442  const char *func;
2443
2444  switch (code)
2445    {
2446    case PLUS:
2447      func = "_Qp_add";
2448      break;
2449    case MINUS:
2450      func = "_Qp_sub";
2451      break;
2452    case MULT:
2453      func = "_Qp_mul";
2454      break;
2455    case DIV:
2456      func = "_Qp_div";
2457      break;
2458    default:
2459      gcc_unreachable ();
2460    }
2461
2462  emit_soft_tfmode_libcall (func, 3, operands);
2463}
2464
2465static void
2466emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2467{
2468  const char *func;
2469
2470  gcc_assert (code == SQRT);
2471  func = "_Qp_sqrt";
2472
2473  emit_soft_tfmode_libcall (func, 2, operands);
2474}
2475
2476static void
2477emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2478{
2479  const char *func;
2480
2481  switch (code)
2482    {
2483    case FLOAT_EXTEND:
2484      switch (GET_MODE (operands[1]))
2485	{
2486	case SFmode:
2487	  func = "_Qp_stoq";
2488	  break;
2489	case DFmode:
2490	  func = "_Qp_dtoq";
2491	  break;
2492	default:
2493	  gcc_unreachable ();
2494	}
2495      break;
2496
2497    case FLOAT_TRUNCATE:
2498      switch (GET_MODE (operands[0]))
2499	{
2500	case SFmode:
2501	  func = "_Qp_qtos";
2502	  break;
2503	case DFmode:
2504	  func = "_Qp_qtod";
2505	  break;
2506	default:
2507	  gcc_unreachable ();
2508	}
2509      break;
2510
2511    case FLOAT:
2512      switch (GET_MODE (operands[1]))
2513	{
2514	case SImode:
2515	  func = "_Qp_itoq";
2516	  if (TARGET_ARCH64)
2517	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2518	  break;
2519	case DImode:
2520	  func = "_Qp_xtoq";
2521	  break;
2522	default:
2523	  gcc_unreachable ();
2524	}
2525      break;
2526
2527    case UNSIGNED_FLOAT:
2528      switch (GET_MODE (operands[1]))
2529	{
2530	case SImode:
2531	  func = "_Qp_uitoq";
2532	  if (TARGET_ARCH64)
2533	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2534	  break;
2535	case DImode:
2536	  func = "_Qp_uxtoq";
2537	  break;
2538	default:
2539	  gcc_unreachable ();
2540	}
2541      break;
2542
2543    case FIX:
2544      switch (GET_MODE (operands[0]))
2545	{
2546	case SImode:
2547	  func = "_Qp_qtoi";
2548	  break;
2549	case DImode:
2550	  func = "_Qp_qtox";
2551	  break;
2552	default:
2553	  gcc_unreachable ();
2554	}
2555      break;
2556
2557    case UNSIGNED_FIX:
2558      switch (GET_MODE (operands[0]))
2559	{
2560	case SImode:
2561	  func = "_Qp_qtoui";
2562	  break;
2563	case DImode:
2564	  func = "_Qp_qtoux";
2565	  break;
2566	default:
2567	  gcc_unreachable ();
2568	}
2569      break;
2570
2571    default:
2572      gcc_unreachable ();
2573    }
2574
2575  emit_soft_tfmode_libcall (func, 2, operands);
2576}
2577
2578/* Expand a hard-float tfmode operation.  All arguments must be in
2579   registers.  */
2580
2581static void
2582emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2583{
2584  rtx op, dest;
2585
2586  if (GET_RTX_CLASS (code) == RTX_UNARY)
2587    {
2588      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2589      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2590    }
2591  else
2592    {
2593      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2594      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2595      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2596			   operands[1], operands[2]);
2597    }
2598
2599  if (register_operand (operands[0], VOIDmode))
2600    dest = operands[0];
2601  else
2602    dest = gen_reg_rtx (GET_MODE (operands[0]));
2603
2604  emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2605
2606  if (dest != operands[0])
2607    emit_move_insn (operands[0], dest);
2608}
2609
2610void
2611emit_tfmode_binop (enum rtx_code code, rtx *operands)
2612{
2613  if (TARGET_HARD_QUAD)
2614    emit_hard_tfmode_operation (code, operands);
2615  else
2616    emit_soft_tfmode_binop (code, operands);
2617}
2618
2619void
2620emit_tfmode_unop (enum rtx_code code, rtx *operands)
2621{
2622  if (TARGET_HARD_QUAD)
2623    emit_hard_tfmode_operation (code, operands);
2624  else
2625    emit_soft_tfmode_unop (code, operands);
2626}
2627
2628void
2629emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2630{
2631  if (TARGET_HARD_QUAD)
2632    emit_hard_tfmode_operation (code, operands);
2633  else
2634    emit_soft_tfmode_cvt (code, operands);
2635}
2636
2637/* Return nonzero if a branch/jump/call instruction will be emitting
2638   nop into its delay slot.  */
2639
2640int
2641empty_delay_slot (rtx insn)
2642{
2643  rtx seq;
2644
2645  /* If no previous instruction (should not happen), return true.  */
2646  if (PREV_INSN (insn) == NULL)
2647    return 1;
2648
2649  seq = NEXT_INSN (PREV_INSN (insn));
2650  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2651    return 0;
2652
2653  return 1;
2654}
2655
2656/* Return nonzero if TRIAL can go into the call delay slot.  */
2657
2658int
2659tls_call_delay (rtx trial)
2660{
2661  rtx pat;
2662
2663  /* Binutils allows
2664       call __tls_get_addr, %tgd_call (foo)
2665        add %l7, %o0, %o0, %tgd_add (foo)
2666     while Sun as/ld does not.  */
2667  if (TARGET_GNU_TLS || !TARGET_TLS)
2668    return 1;
2669
2670  pat = PATTERN (trial);
2671
2672  /* We must reject tgd_add{32|64}, i.e.
2673       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2674     and tldm_add{32|64}, i.e.
2675       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2676     for Sun as/ld.  */
2677  if (GET_CODE (pat) == SET
2678      && GET_CODE (SET_SRC (pat)) == PLUS)
2679    {
2680      rtx unspec = XEXP (SET_SRC (pat), 1);
2681
2682      if (GET_CODE (unspec) == UNSPEC
2683	  && (XINT (unspec, 1) == UNSPEC_TLSGD
2684	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
2685	return 0;
2686    }
2687
2688  return 1;
2689}
2690
2691/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2692   instruction.  RETURN_P is true if the v9 variant 'return' is to be
2693   considered in the test too.
2694
2695   TRIAL must be a SET whose destination is a REG appropriate for the
2696   'restore' instruction or, if RETURN_P is true, for the 'return'
2697   instruction.  */
2698
2699static int
2700eligible_for_restore_insn (rtx trial, bool return_p)
2701{
2702  rtx pat = PATTERN (trial);
2703  rtx src = SET_SRC (pat);
2704
2705  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
2706  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2707      && arith_operand (src, GET_MODE (src)))
2708    {
2709      if (TARGET_ARCH64)
2710        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2711      else
2712        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2713    }
2714
2715  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
2716  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2717	   && arith_double_operand (src, GET_MODE (src)))
2718    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2719
2720  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
2721  else if (! TARGET_FPU && register_operand (src, SFmode))
2722    return 1;
2723
2724  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
2725  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2726    return 1;
2727
2728  /* If we have the 'return' instruction, anything that does not use
2729     local or output registers and can go into a delay slot wins.  */
2730  else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2731	   && (get_attr_in_uncond_branch_delay (trial)
2732	       == IN_UNCOND_BRANCH_DELAY_TRUE))
2733    return 1;
2734
2735  /* The 'restore src1,src2,dest' pattern for SImode.  */
2736  else if (GET_CODE (src) == PLUS
2737	   && register_operand (XEXP (src, 0), SImode)
2738	   && arith_operand (XEXP (src, 1), SImode))
2739    return 1;
2740
2741  /* The 'restore src1,src2,dest' pattern for DImode.  */
2742  else if (GET_CODE (src) == PLUS
2743	   && register_operand (XEXP (src, 0), DImode)
2744	   && arith_double_operand (XEXP (src, 1), DImode))
2745    return 1;
2746
2747  /* The 'restore src1,%lo(src2),dest' pattern.  */
2748  else if (GET_CODE (src) == LO_SUM
2749	   && ! TARGET_CM_MEDMID
2750	   && ((register_operand (XEXP (src, 0), SImode)
2751	        && immediate_operand (XEXP (src, 1), SImode))
2752	       || (TARGET_ARCH64
2753		   && register_operand (XEXP (src, 0), DImode)
2754		   && immediate_operand (XEXP (src, 1), DImode))))
2755    return 1;
2756
2757  /* The 'restore src,src,dest' pattern.  */
2758  else if (GET_CODE (src) == ASHIFT
2759	   && (register_operand (XEXP (src, 0), SImode)
2760	       || register_operand (XEXP (src, 0), DImode))
2761	   && XEXP (src, 1) == const1_rtx)
2762    return 1;
2763
2764  return 0;
2765}
2766
2767/* Return nonzero if TRIAL can go into the function return's
2768   delay slot.  */
2769
2770int
2771eligible_for_return_delay (rtx trial)
2772{
2773  rtx pat;
2774
2775  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2776    return 0;
2777
2778  if (get_attr_length (trial) != 1)
2779    return 0;
2780
2781  /* If the function uses __builtin_eh_return, the eh_return machinery
2782     occupies the delay slot.  */
2783  if (crtl->calls_eh_return)
2784    return 0;
2785
2786  /* In the case of a true leaf function, anything can go into the slot.  */
2787  if (sparc_leaf_function_p)
2788    return get_attr_in_uncond_branch_delay (trial)
2789	   == IN_UNCOND_BRANCH_DELAY_TRUE;
2790
2791  pat = PATTERN (trial);
2792
2793  /* Otherwise, only operations which can be done in tandem with
2794     a `restore' or `return' insn can go into the delay slot.  */
2795  if (GET_CODE (SET_DEST (pat)) != REG
2796      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2797    return 0;
2798
2799  /* If this instruction sets up floating point register and we have a return
2800     instruction, it can probably go in.  But restore will not work
2801     with FP_REGS.  */
2802  if (REGNO (SET_DEST (pat)) >= 32)
2803    return (TARGET_V9
2804	    && ! epilogue_renumber (&pat, 1)
2805	    && (get_attr_in_uncond_branch_delay (trial)
2806		== IN_UNCOND_BRANCH_DELAY_TRUE));
2807
2808  return eligible_for_restore_insn (trial, true);
2809}
2810
2811/* Return nonzero if TRIAL can go into the sibling call's
2812   delay slot.  */
2813
2814int
2815eligible_for_sibcall_delay (rtx trial)
2816{
2817  rtx pat;
2818
2819  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2820    return 0;
2821
2822  if (get_attr_length (trial) != 1)
2823    return 0;
2824
2825  pat = PATTERN (trial);
2826
2827  if (sparc_leaf_function_p)
2828    {
2829      /* If the tail call is done using the call instruction,
2830	 we have to restore %o7 in the delay slot.  */
2831      if (LEAF_SIBCALL_SLOT_RESERVED_P)
2832	return 0;
2833
2834      /* %g1 is used to build the function address */
2835      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2836	return 0;
2837
2838      return 1;
2839    }
2840
2841  /* Otherwise, only operations which can be done in tandem with
2842     a `restore' insn can go into the delay slot.  */
2843  if (GET_CODE (SET_DEST (pat)) != REG
2844      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2845      || REGNO (SET_DEST (pat)) >= 32)
2846    return 0;
2847
2848  /* If it mentions %o7, it can't go in, because sibcall will clobber it
2849     in most cases.  */
2850  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2851    return 0;
2852
2853  return eligible_for_restore_insn (trial, false);
2854}
2855
2856int
2857short_branch (int uid1, int uid2)
2858{
2859  int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2860
2861  /* Leave a few words of "slop".  */
2862  if (delta >= -1023 && delta <= 1022)
2863    return 1;
2864
2865  return 0;
2866}
2867
2868/* Return nonzero if REG is not used after INSN.
2869   We assume REG is a reload reg, and therefore does
2870   not live past labels or calls or jumps.  */
2871int
2872reg_unused_after (rtx reg, rtx insn)
2873{
2874  enum rtx_code code, prev_code = UNKNOWN;
2875
2876  while ((insn = NEXT_INSN (insn)))
2877    {
2878      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2879	return 1;
2880
2881      code = GET_CODE (insn);
2882      if (GET_CODE (insn) == CODE_LABEL)
2883	return 1;
2884
2885      if (INSN_P (insn))
2886	{
2887	  rtx set = single_set (insn);
2888	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2889	  if (set && in_src)
2890	    return 0;
2891	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2892	    return 1;
2893	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2894	    return 0;
2895	}
2896      prev_code = code;
2897    }
2898  return 1;
2899}
2900
2901/* Determine if it's legal to put X into the constant pool.  This
2902   is not possible if X contains the address of a symbol that is
2903   not constant (TLS) or not known at final link time (PIC).  */
2904
2905static bool
2906sparc_cannot_force_const_mem (rtx x)
2907{
2908  switch (GET_CODE (x))
2909    {
2910    case CONST_INT:
2911    case CONST_DOUBLE:
2912    case CONST_VECTOR:
2913      /* Accept all non-symbolic constants.  */
2914      return false;
2915
2916    case LABEL_REF:
2917      /* Labels are OK iff we are non-PIC.  */
2918      return flag_pic != 0;
2919
2920    case SYMBOL_REF:
2921      /* 'Naked' TLS symbol references are never OK,
2922	 non-TLS symbols are OK iff we are non-PIC.  */
2923      if (SYMBOL_REF_TLS_MODEL (x))
2924	return true;
2925      else
2926	return flag_pic != 0;
2927
2928    case CONST:
2929      return sparc_cannot_force_const_mem (XEXP (x, 0));
2930    case PLUS:
2931    case MINUS:
2932      return sparc_cannot_force_const_mem (XEXP (x, 0))
2933         || sparc_cannot_force_const_mem (XEXP (x, 1));
2934    case UNSPEC:
2935      return true;
2936    default:
2937      gcc_unreachable ();
2938    }
2939}
2940
2941/* Global Offset Table support.  */
2942static GTY(()) rtx got_helper_rtx = NULL_RTX;
2943static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
2944
2945/* Return the SYMBOL_REF for the Global Offset Table.  */
2946
2947static GTY(()) rtx sparc_got_symbol = NULL_RTX;
2948
2949static rtx
2950sparc_got (void)
2951{
2952  if (!sparc_got_symbol)
2953    sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2954
2955  return sparc_got_symbol;
2956}
2957
2958/* Ensure that we are not using patterns that are not OK with PIC.  */
2959
2960int
2961check_pic (int i)
2962{
2963  rtx op;
2964
2965  switch (flag_pic)
2966    {
2967    case 1:
2968      op = recog_data.operand[i];
2969      gcc_assert (GET_CODE (op) != SYMBOL_REF
2970	  	  && (GET_CODE (op) != CONST
2971		      || (GET_CODE (XEXP (op, 0)) == MINUS
2972			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
2973			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
2974    case 2:
2975    default:
2976      return 1;
2977    }
2978}
2979
2980/* Return true if X is an address which needs a temporary register when
2981   reloaded while generating PIC code.  */
2982
2983int
2984pic_address_needs_scratch (rtx x)
2985{
2986  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2987  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2988      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2989      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2990      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2991    return 1;
2992
2993  return 0;
2994}
2995
2996/* Determine if a given RTX is a valid constant.  We already know this
2997   satisfies CONSTANT_P.  */
2998
2999bool
3000legitimate_constant_p (rtx x)
3001{
3002  switch (GET_CODE (x))
3003    {
3004    case CONST:
3005    case SYMBOL_REF:
3006      if (sparc_tls_referenced_p (x))
3007	return false;
3008      break;
3009
3010    case CONST_DOUBLE:
3011      if (GET_MODE (x) == VOIDmode)
3012        return true;
3013
3014      /* Floating point constants are generally not ok.
3015	 The only exception is 0.0 in VIS.  */
3016      if (TARGET_VIS
3017	  && SCALAR_FLOAT_MODE_P (GET_MODE (x))
3018	  && const_zero_operand (x, GET_MODE (x)))
3019	return true;
3020
3021      return false;
3022
3023    case CONST_VECTOR:
3024      /* Vector constants are generally not ok.
3025	 The only exception is 0 in VIS.  */
3026      if (TARGET_VIS
3027	  && const_zero_operand (x, GET_MODE (x)))
3028	return true;
3029
3030      return false;
3031
3032    default:
3033      break;
3034    }
3035
3036  return true;
3037}
3038
3039/* Determine if a given RTX is a valid constant address.  */
3040
3041bool
3042constant_address_p (rtx x)
3043{
3044  switch (GET_CODE (x))
3045    {
3046    case LABEL_REF:
3047    case CONST_INT:
3048    case HIGH:
3049      return true;
3050
3051    case CONST:
3052      if (flag_pic && pic_address_needs_scratch (x))
3053	return false;
3054      return legitimate_constant_p (x);
3055
3056    case SYMBOL_REF:
3057      return !flag_pic && legitimate_constant_p (x);
3058
3059    default:
3060      return false;
3061    }
3062}
3063
3064/* Nonzero if the constant value X is a legitimate general operand
3065   when generating PIC code.  It is given that flag_pic is on and
3066   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3067
3068bool
3069legitimate_pic_operand_p (rtx x)
3070{
3071  if (pic_address_needs_scratch (x))
3072    return false;
3073  if (sparc_tls_referenced_p (x))
3074    return false;
3075  return true;
3076}
3077
3078/* Return nonzero if ADDR is a valid memory address.
3079   STRICT specifies whether strict register checking applies.  */
3080
3081static bool
3082sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3083{
3084  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3085
3086  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3087    rs1 = addr;
3088  else if (GET_CODE (addr) == PLUS)
3089    {
3090      rs1 = XEXP (addr, 0);
3091      rs2 = XEXP (addr, 1);
3092
3093      /* Canonicalize.  REG comes first, if there are no regs,
3094	 LO_SUM comes first.  */
3095      if (!REG_P (rs1)
3096	  && GET_CODE (rs1) != SUBREG
3097	  && (REG_P (rs2)
3098	      || GET_CODE (rs2) == SUBREG
3099	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3100	{
3101	  rs1 = XEXP (addr, 1);
3102	  rs2 = XEXP (addr, 0);
3103	}
3104
3105      if ((flag_pic == 1
3106	   && rs1 == pic_offset_table_rtx
3107	   && !REG_P (rs2)
3108	   && GET_CODE (rs2) != SUBREG
3109	   && GET_CODE (rs2) != LO_SUM
3110	   && GET_CODE (rs2) != MEM
3111	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3112	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3113	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3114	  || ((REG_P (rs1)
3115	       || GET_CODE (rs1) == SUBREG)
3116	      && RTX_OK_FOR_OFFSET_P (rs2)))
3117	{
3118	  imm1 = rs2;
3119	  rs2 = NULL;
3120	}
3121      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3122	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3123	{
3124	  /* We prohibit REG + REG for TFmode when there are no quad move insns
3125	     and we consequently need to split.  We do this because REG+REG
3126	     is not an offsettable address.  If we get the situation in reload
3127	     where source and destination of a movtf pattern are both MEMs with
3128	     REG+REG address, then only one of them gets converted to an
3129	     offsettable address.  */
3130	  if (mode == TFmode
3131	      && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3132	    return 0;
3133
3134	  /* We prohibit REG + REG on ARCH32 if not optimizing for
3135	     DFmode/DImode because then mem_min_alignment is likely to be zero
3136	     after reload and the  forced split would lack a matching splitter
3137	     pattern.  */
3138	  if (TARGET_ARCH32 && !optimize
3139	      && (mode == DFmode || mode == DImode))
3140	    return 0;
3141	}
3142      else if (USE_AS_OFFSETABLE_LO10
3143	       && GET_CODE (rs1) == LO_SUM
3144	       && TARGET_ARCH64
3145	       && ! TARGET_CM_MEDMID
3146	       && RTX_OK_FOR_OLO10_P (rs2))
3147	{
3148	  rs2 = NULL;
3149	  imm1 = XEXP (rs1, 1);
3150	  rs1 = XEXP (rs1, 0);
3151	  if (!CONSTANT_P (imm1)
3152	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3153	    return 0;
3154	}
3155    }
3156  else if (GET_CODE (addr) == LO_SUM)
3157    {
3158      rs1 = XEXP (addr, 0);
3159      imm1 = XEXP (addr, 1);
3160
3161      if (!CONSTANT_P (imm1)
3162	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3163	return 0;
3164
3165      /* We can't allow TFmode in 32-bit mode, because an offset greater
3166	 than the alignment (8) may cause the LO_SUM to overflow.  */
3167      if (mode == TFmode && TARGET_ARCH32)
3168	return 0;
3169    }
3170  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3171    return 1;
3172  else
3173    return 0;
3174
3175  if (GET_CODE (rs1) == SUBREG)
3176    rs1 = SUBREG_REG (rs1);
3177  if (!REG_P (rs1))
3178    return 0;
3179
3180  if (rs2)
3181    {
3182      if (GET_CODE (rs2) == SUBREG)
3183	rs2 = SUBREG_REG (rs2);
3184      if (!REG_P (rs2))
3185	return 0;
3186    }
3187
3188  if (strict)
3189    {
3190      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3191	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3192	return 0;
3193    }
3194  else
3195    {
3196      if ((REGNO (rs1) >= 32
3197	   && REGNO (rs1) != FRAME_POINTER_REGNUM
3198	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3199	  || (rs2
3200	      && (REGNO (rs2) >= 32
3201		  && REGNO (rs2) != FRAME_POINTER_REGNUM
3202		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3203	return 0;
3204    }
3205  return 1;
3206}
3207
3208/* Return the SYMBOL_REF for the tls_get_addr function.  */
3209
3210static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3211
3212static rtx
3213sparc_tls_get_addr (void)
3214{
3215  if (!sparc_tls_symbol)
3216    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3217
3218  return sparc_tls_symbol;
3219}
3220
3221/* Return the Global Offset Table to be used in TLS mode.  */
3222
3223static rtx
3224sparc_tls_got (void)
3225{
3226  /* In PIC mode, this is just the PIC offset table.  */
3227  if (flag_pic)
3228    {
3229      crtl->uses_pic_offset_table = 1;
3230      return pic_offset_table_rtx;
3231    }
3232
3233  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3234     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
3235  if (TARGET_SUN_TLS && TARGET_ARCH32)
3236    {
3237      load_got_register ();
3238      return global_offset_table_rtx;
3239    }
3240
3241  /* In all other cases, we load a new pseudo with the GOT symbol.  */
3242  return copy_to_reg (sparc_got ());
3243}
3244
3245/* Return true if X contains a thread-local symbol.  */
3246
3247static bool
3248sparc_tls_referenced_p (rtx x)
3249{
3250  if (!TARGET_HAVE_TLS)
3251    return false;
3252
3253  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3254    x = XEXP (XEXP (x, 0), 0);
3255
3256  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3257    return true;
3258
3259  /* That's all we handle in legitimize_tls_address for now.  */
3260  return false;
3261}
3262
3263/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
3264   this (thread-local) address.  */
3265
3266static rtx
3267legitimize_tls_address (rtx addr)
3268{
3269  rtx temp1, temp2, temp3, ret, o0, got, insn;
3270
3271  gcc_assert (can_create_pseudo_p ());
3272
3273  if (GET_CODE (addr) == SYMBOL_REF)
3274    switch (SYMBOL_REF_TLS_MODEL (addr))
3275      {
3276      case TLS_MODEL_GLOBAL_DYNAMIC:
3277	start_sequence ();
3278	temp1 = gen_reg_rtx (SImode);
3279	temp2 = gen_reg_rtx (SImode);
3280	ret = gen_reg_rtx (Pmode);
3281	o0 = gen_rtx_REG (Pmode, 8);
3282	got = sparc_tls_got ();
3283	emit_insn (gen_tgd_hi22 (temp1, addr));
3284	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3285	if (TARGET_ARCH32)
3286	  {
3287	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3288	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3289						   addr, const1_rtx));
3290	  }
3291	else
3292	  {
3293	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3294	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3295						   addr, const1_rtx));
3296	  }
3297        CALL_INSN_FUNCTION_USAGE (insn)
3298	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3299			       CALL_INSN_FUNCTION_USAGE (insn));
3300	insn = get_insns ();
3301	end_sequence ();
3302	emit_libcall_block (insn, ret, o0, addr);
3303	break;
3304
3305      case TLS_MODEL_LOCAL_DYNAMIC:
3306	start_sequence ();
3307	temp1 = gen_reg_rtx (SImode);
3308	temp2 = gen_reg_rtx (SImode);
3309	temp3 = gen_reg_rtx (Pmode);
3310	ret = gen_reg_rtx (Pmode);
3311	o0 = gen_rtx_REG (Pmode, 8);
3312	got = sparc_tls_got ();
3313	emit_insn (gen_tldm_hi22 (temp1));
3314	emit_insn (gen_tldm_lo10 (temp2, temp1));
3315	if (TARGET_ARCH32)
3316	  {
3317	    emit_insn (gen_tldm_add32 (o0, got, temp2));
3318	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3319						    const1_rtx));
3320	  }
3321	else
3322	  {
3323	    emit_insn (gen_tldm_add64 (o0, got, temp2));
3324	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3325						    const1_rtx));
3326	  }
3327        CALL_INSN_FUNCTION_USAGE (insn)
3328	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3329			       CALL_INSN_FUNCTION_USAGE (insn));
3330	insn = get_insns ();
3331	end_sequence ();
3332	emit_libcall_block (insn, temp3, o0,
3333			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3334					    UNSPEC_TLSLD_BASE));
3335	temp1 = gen_reg_rtx (SImode);
3336	temp2 = gen_reg_rtx (SImode);
3337	emit_insn (gen_tldo_hix22 (temp1, addr));
3338	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3339	if (TARGET_ARCH32)
3340	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3341	else
3342	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3343	break;
3344
3345      case TLS_MODEL_INITIAL_EXEC:
3346	temp1 = gen_reg_rtx (SImode);
3347	temp2 = gen_reg_rtx (SImode);
3348	temp3 = gen_reg_rtx (Pmode);
3349	got = sparc_tls_got ();
3350	emit_insn (gen_tie_hi22 (temp1, addr));
3351	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3352	if (TARGET_ARCH32)
3353	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3354	else
3355	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3356        if (TARGET_SUN_TLS)
3357	  {
3358	    ret = gen_reg_rtx (Pmode);
3359	    if (TARGET_ARCH32)
3360	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3361					temp3, addr));
3362	    else
3363	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3364					temp3, addr));
3365	  }
3366	else
3367	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3368	break;
3369
3370      case TLS_MODEL_LOCAL_EXEC:
3371	temp1 = gen_reg_rtx (Pmode);
3372	temp2 = gen_reg_rtx (Pmode);
3373	if (TARGET_ARCH32)
3374	  {
3375	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3376	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3377	  }
3378	else
3379	  {
3380	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3381	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3382	  }
3383	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3384	break;
3385
3386      default:
3387	gcc_unreachable ();
3388      }
3389
3390  else if (GET_CODE (addr) == CONST)
3391    {
3392      rtx base, offset;
3393
3394      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3395
3396      base = legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3397      offset = XEXP (XEXP (addr, 0), 1);
3398
3399      base = force_operand (base, NULL_RTX);
3400      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3401	offset = force_reg (Pmode, offset);
3402      ret = gen_rtx_PLUS (Pmode, base, offset);
3403    }
3404
3405  else
3406    gcc_unreachable ();  /* for now ... */
3407
3408  return ret;
3409}
3410
3411/* Legitimize PIC addresses.  If the address is already position-independent,
3412   we return ORIG.  Newly generated position-independent addresses go into a
3413   reg.  This is REG if nonzero, otherwise we allocate register(s) as
3414   necessary.  */
3415
3416static rtx
3417legitimize_pic_address (rtx orig, rtx reg)
3418{
3419  bool gotdata_op = false;
3420
3421  if (GET_CODE (orig) == SYMBOL_REF
3422      /* See the comment in sparc_expand_move.  */
3423      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3424    {
3425      rtx pic_ref, address;
3426      rtx insn;
3427
3428      if (reg == 0)
3429	{
3430	  gcc_assert (! reload_in_progress && ! reload_completed);
3431	  reg = gen_reg_rtx (Pmode);
3432	}
3433
3434      if (flag_pic == 2)
3435	{
3436	  /* If not during reload, allocate another temp reg here for loading
3437	     in the address, so that these instructions can be optimized
3438	     properly.  */
3439	  rtx temp_reg = ((reload_in_progress || reload_completed)
3440			  ? reg : gen_reg_rtx (Pmode));
3441
3442	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3443	     won't get confused into thinking that these two instructions
3444	     are loading in the true address of the symbol.  If in the
3445	     future a PIC rtx exists, that should be used instead.  */
3446	  if (TARGET_ARCH64)
3447	    {
3448	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
3449	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3450	    }
3451	  else
3452	    {
3453	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
3454	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3455	    }
3456	  address = temp_reg;
3457	  gotdata_op = true;
3458	}
3459      else
3460	address = orig;
3461
3462      crtl->uses_pic_offset_table = 1;
3463      if (gotdata_op)
3464	{
3465	  if (TARGET_ARCH64)
3466	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg, pic_offset_table_rtx,
3467							address, orig));
3468	  else
3469	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg, pic_offset_table_rtx,
3470							address, orig));
3471	}
3472      else
3473	{
3474	  pic_ref
3475	    = gen_const_mem (Pmode,
3476			     gen_rtx_PLUS (Pmode,
3477					   pic_offset_table_rtx, address));
3478	  insn = emit_move_insn (reg, pic_ref);
3479	}
3480
3481      /* Put a REG_EQUAL note on this insn, so that it can be optimized
3482	 by loop.  */
3483      set_unique_reg_note (insn, REG_EQUAL, orig);
3484      return reg;
3485    }
3486  else if (GET_CODE (orig) == CONST)
3487    {
3488      rtx base, offset;
3489
3490      if (GET_CODE (XEXP (orig, 0)) == PLUS
3491	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3492	return orig;
3493
3494      if (reg == 0)
3495	{
3496	  gcc_assert (! reload_in_progress && ! reload_completed);
3497	  reg = gen_reg_rtx (Pmode);
3498	}
3499
3500      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3501      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3502      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3503			 	       base == reg ? NULL_RTX : reg);
3504
3505      if (GET_CODE (offset) == CONST_INT)
3506	{
3507	  if (SMALL_INT (offset))
3508	    return plus_constant (base, INTVAL (offset));
3509	  else if (! reload_in_progress && ! reload_completed)
3510	    offset = force_reg (Pmode, offset);
3511	  else
3512	    /* If we reach here, then something is seriously wrong.  */
3513	    gcc_unreachable ();
3514	}
3515      return gen_rtx_PLUS (Pmode, base, offset);
3516    }
3517  else if (GET_CODE (orig) == LABEL_REF)
3518    /* ??? We ought to be checking that the register is live instead, in case
3519       it is eliminated.  */
3520    crtl->uses_pic_offset_table = 1;
3521
3522  return orig;
3523}
3524
3525/* Try machine-dependent ways of modifying an illegitimate address X
3526   to be legitimate.  If we find one, return the new, valid address.
3527
3528   OLDX is the address as it was before break_out_memory_refs was called.
3529   In some cases it is useful to look at this to decide what needs to be done.
3530
3531   MODE is the mode of the operand pointed to by X.
3532
3533   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
3534
3535static rtx
3536sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3537			  enum machine_mode mode)
3538{
3539  rtx orig_x = x;
3540
3541  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3542    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3543		      force_operand (XEXP (x, 0), NULL_RTX));
3544  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3545    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3546		      force_operand (XEXP (x, 1), NULL_RTX));
3547  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3548    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3549		      XEXP (x, 1));
3550  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3551    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3552		      force_operand (XEXP (x, 1), NULL_RTX));
3553
3554  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3555    return x;
3556
3557  if (sparc_tls_referenced_p (x))
3558    x = legitimize_tls_address (x);
3559  else if (flag_pic)
3560    x = legitimize_pic_address (x, NULL_RTX);
3561  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3562    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3563		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
3564  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3565    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3566		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
3567  else if (GET_CODE (x) == SYMBOL_REF
3568	   || GET_CODE (x) == CONST
3569	   || GET_CODE (x) == LABEL_REF)
3570    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3571
3572  return x;
3573}
3574
3575#ifdef HAVE_GAS_HIDDEN
3576# define USE_HIDDEN_LINKONCE 1
3577#else
3578# define USE_HIDDEN_LINKONCE 0
3579#endif
3580
3581static void
3582get_pc_thunk_name (char name[32], unsigned int regno)
3583{
3584  const char *reg_name = reg_names[regno];
3585
3586  /* Skip the leading '%' as that cannot be used in a
3587     symbol name.  */
3588  reg_name += 1;
3589
3590  if (USE_HIDDEN_LINKONCE)
3591    sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
3592  else
3593    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
3594}
3595
3596/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
3597
3598static rtx
3599gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
3600{
3601  int orig_flag_pic = flag_pic;
3602  rtx insn;
3603
3604  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
3605  flag_pic = 0;
3606  if (TARGET_ARCH64)
3607    insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
3608  else
3609    insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
3610  flag_pic = orig_flag_pic;
3611
3612  return insn;
3613}
3614
3615/* Emit code to load the GOT register.  */
3616
3617static void
3618load_got_register (void)
3619{
3620  /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
3621  if (!global_offset_table_rtx)
3622    global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
3623
3624  if (TARGET_VXWORKS_RTP)
3625    emit_insn (gen_vxworks_load_got ());
3626  else
3627    {
3628      /* The GOT symbol is subject to a PC-relative relocation so we need a
3629	 helper function to add the PC value and thus get the final value.  */
3630      if (!got_helper_rtx)
3631	{
3632	  char name[32];
3633	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
3634	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3635	}
3636
3637      emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
3638				     got_helper_rtx,
3639				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
3640    }
3641
3642  /* Need to emit this whether or not we obey regdecls,
3643     since setjmp/longjmp can cause life info to screw up.
3644     ??? In the case where we don't obey regdecls, this is not sufficient
3645     since we may not fall out the bottom.  */
3646  emit_use (global_offset_table_rtx);
3647}
3648
3649/* Emit a call instruction with the pattern given by PAT.  ADDR is the
3650   address of the call target.  */
3651
3652void
3653sparc_emit_call_insn (rtx pat, rtx addr)
3654{
3655  rtx insn;
3656
3657  insn = emit_call_insn (pat);
3658
3659  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
3660  if (TARGET_VXWORKS_RTP
3661      && flag_pic
3662      && GET_CODE (addr) == SYMBOL_REF
3663      && (SYMBOL_REF_DECL (addr)
3664	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3665	  : !SYMBOL_REF_LOCAL_P (addr)))
3666    {
3667      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3668      crtl->uses_pic_offset_table = 1;
3669    }
3670}
3671
3672/* Return 1 if RTX is a MEM which is known to be aligned to at
3673   least a DESIRED byte boundary.  */
3674
3675int
3676mem_min_alignment (rtx mem, int desired)
3677{
3678  rtx addr, base, offset;
3679
3680  /* If it's not a MEM we can't accept it.  */
3681  if (GET_CODE (mem) != MEM)
3682    return 0;
3683
3684  /* Obviously...  */
3685  if (!TARGET_UNALIGNED_DOUBLES
3686      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3687    return 1;
3688
3689  /* ??? The rest of the function predates MEM_ALIGN so
3690     there is probably a bit of redundancy.  */
3691  addr = XEXP (mem, 0);
3692  base = offset = NULL_RTX;
3693  if (GET_CODE (addr) == PLUS)
3694    {
3695      if (GET_CODE (XEXP (addr, 0)) == REG)
3696	{
3697	  base = XEXP (addr, 0);
3698
3699	  /* What we are saying here is that if the base
3700	     REG is aligned properly, the compiler will make
3701	     sure any REG based index upon it will be so
3702	     as well.  */
3703	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3704	    offset = XEXP (addr, 1);
3705	  else
3706	    offset = const0_rtx;
3707	}
3708    }
3709  else if (GET_CODE (addr) == REG)
3710    {
3711      base = addr;
3712      offset = const0_rtx;
3713    }
3714
3715  if (base != NULL_RTX)
3716    {
3717      int regno = REGNO (base);
3718
3719      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3720	{
3721	  /* Check if the compiler has recorded some information
3722	     about the alignment of the base REG.  If reload has
3723	     completed, we already matched with proper alignments.
3724	     If not running global_alloc, reload might give us
3725	     unaligned pointer to local stack though.  */
3726	  if (((cfun != 0
3727		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3728	       || (optimize && reload_completed))
3729	      && (INTVAL (offset) & (desired - 1)) == 0)
3730	    return 1;
3731	}
3732      else
3733	{
3734	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3735	    return 1;
3736	}
3737    }
3738  else if (! TARGET_UNALIGNED_DOUBLES
3739	   || CONSTANT_P (addr)
3740	   || GET_CODE (addr) == LO_SUM)
3741    {
3742      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3743	 is true, in which case we can only assume that an access is aligned if
3744	 it is to a constant address, or the address involves a LO_SUM.  */
3745      return 1;
3746    }
3747
3748  /* An obviously unaligned address.  */
3749  return 0;
3750}
3751
3752
3753/* Vectors to keep interesting information about registers where it can easily
3754   be got.  We used to use the actual mode value as the bit number, but there
3755   are more than 32 modes now.  Instead we use two tables: one indexed by
3756   hard register number, and one indexed by mode.  */
3757
3758/* The purpose of sparc_mode_class is to shrink the range of modes so that
3759   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
3760   mapped into one sparc_mode_class mode.  */
3761
3762enum sparc_mode_class {
3763  S_MODE, D_MODE, T_MODE, O_MODE,
3764  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3765  CC_MODE, CCFP_MODE
3766};
3767
3768/* Modes for single-word and smaller quantities.  */
3769#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3770
3771/* Modes for double-word and smaller quantities.  */
3772#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3773
3774/* Modes for quad-word and smaller quantities.  */
3775#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3776
3777/* Modes for 8-word and smaller quantities.  */
3778#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3779
3780/* Modes for single-float quantities.  We must allow any single word or
3781   smaller quantity.  This is because the fix/float conversion instructions
3782   take integer inputs/outputs from the float registers.  */
3783#define SF_MODES (S_MODES)
3784
3785/* Modes for double-float and smaller quantities.  */
3786#define DF_MODES (D_MODES)
3787
3788/* Modes for quad-float and smaller quantities.  */
3789#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
3790
3791/* Modes for quad-float pairs and smaller quantities.  */
3792#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
3793
3794/* Modes for double-float only quantities.  */
3795#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3796
3797/* Modes for quad-float and double-float only quantities.  */
3798#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
3799
3800/* Modes for quad-float pairs and double-float only quantities.  */
3801#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
3802
3803/* Modes for condition codes.  */
3804#define CC_MODES (1 << (int) CC_MODE)
3805#define CCFP_MODES (1 << (int) CCFP_MODE)
3806
3807/* Value is 1 if register/mode pair is acceptable on sparc.
3808   The funny mixture of D and T modes is because integer operations
3809   do not specially operate on tetra quantities, so non-quad-aligned
3810   registers can hold quadword quantities (except %o4 and %i4 because
3811   they cross fixed registers).  */
3812
3813/* This points to either the 32 bit or the 64 bit version.  */
3814const int *hard_regno_mode_classes;
3815
3816static const int hard_32bit_mode_classes[] = {
3817  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3818  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3819  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3820  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3821
3822  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3823  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3824  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3825  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3826
3827  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3828     and none can hold SFmode/SImode values.  */
3829  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3830  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3831  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3832  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3833
3834  /* %fcc[0123] */
3835  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3836
3837  /* %icc */
3838  CC_MODES
3839};
3840
3841static const int hard_64bit_mode_classes[] = {
3842  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3843  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3844  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3845  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3846
3847  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3848  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3849  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3850  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3851
3852  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3853     and none can hold SFmode/SImode values.  */
3854  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3855  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3856  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3857  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3858
3859  /* %fcc[0123] */
3860  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3861
3862  /* %icc */
3863  CC_MODES
3864};
3865
3866int sparc_mode_class [NUM_MACHINE_MODES];
3867
3868enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3869
3870static void
3871sparc_init_modes (void)
3872{
3873  int i;
3874
3875  for (i = 0; i < NUM_MACHINE_MODES; i++)
3876    {
3877      switch (GET_MODE_CLASS (i))
3878	{
3879	case MODE_INT:
3880	case MODE_PARTIAL_INT:
3881	case MODE_COMPLEX_INT:
3882	  if (GET_MODE_SIZE (i) <= 4)
3883	    sparc_mode_class[i] = 1 << (int) S_MODE;
3884	  else if (GET_MODE_SIZE (i) == 8)
3885	    sparc_mode_class[i] = 1 << (int) D_MODE;
3886	  else if (GET_MODE_SIZE (i) == 16)
3887	    sparc_mode_class[i] = 1 << (int) T_MODE;
3888	  else if (GET_MODE_SIZE (i) == 32)
3889	    sparc_mode_class[i] = 1 << (int) O_MODE;
3890	  else
3891	    sparc_mode_class[i] = 0;
3892	  break;
3893	case MODE_VECTOR_INT:
3894	  if (GET_MODE_SIZE (i) <= 4)
3895	    sparc_mode_class[i] = 1 << (int)SF_MODE;
3896	  else if (GET_MODE_SIZE (i) == 8)
3897	    sparc_mode_class[i] = 1 << (int)DF_MODE;
3898	  break;
3899	case MODE_FLOAT:
3900	case MODE_COMPLEX_FLOAT:
3901	  if (GET_MODE_SIZE (i) <= 4)
3902	    sparc_mode_class[i] = 1 << (int) SF_MODE;
3903	  else if (GET_MODE_SIZE (i) == 8)
3904	    sparc_mode_class[i] = 1 << (int) DF_MODE;
3905	  else if (GET_MODE_SIZE (i) == 16)
3906	    sparc_mode_class[i] = 1 << (int) TF_MODE;
3907	  else if (GET_MODE_SIZE (i) == 32)
3908	    sparc_mode_class[i] = 1 << (int) OF_MODE;
3909	  else
3910	    sparc_mode_class[i] = 0;
3911	  break;
3912	case MODE_CC:
3913	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
3914	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3915	  else
3916	    sparc_mode_class[i] = 1 << (int) CC_MODE;
3917	  break;
3918	default:
3919	  sparc_mode_class[i] = 0;
3920	  break;
3921	}
3922    }
3923
3924  if (TARGET_ARCH64)
3925    hard_regno_mode_classes = hard_64bit_mode_classes;
3926  else
3927    hard_regno_mode_classes = hard_32bit_mode_classes;
3928
3929  /* Initialize the array used by REGNO_REG_CLASS.  */
3930  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3931    {
3932      if (i < 16 && TARGET_V8PLUS)
3933	sparc_regno_reg_class[i] = I64_REGS;
3934      else if (i < 32 || i == FRAME_POINTER_REGNUM)
3935	sparc_regno_reg_class[i] = GENERAL_REGS;
3936      else if (i < 64)
3937	sparc_regno_reg_class[i] = FP_REGS;
3938      else if (i < 96)
3939	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3940      else if (i < 100)
3941	sparc_regno_reg_class[i] = FPCC_REGS;
3942      else
3943	sparc_regno_reg_class[i] = NO_REGS;
3944    }
3945}
3946
3947/* Compute the frame size required by the function.  This function is called
3948   during the reload pass and also by sparc_expand_prologue.  */
3949
3950HOST_WIDE_INT
3951sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3952{
3953  int outgoing_args_size = (crtl->outgoing_args_size
3954			    + REG_PARM_STACK_SPACE (current_function_decl));
3955  int n_regs = 0;  /* N_REGS is the number of 4-byte regs saved thus far.  */
3956  int i;
3957
3958  if (TARGET_ARCH64)
3959    {
3960      for (i = 0; i < 8; i++)
3961	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3962	  n_regs += 2;
3963    }
3964  else
3965    {
3966      for (i = 0; i < 8; i += 2)
3967	if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3968	    || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3969	  n_regs += 2;
3970    }
3971
3972  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3973    if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
3974	|| (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
3975      n_regs += 2;
3976
3977  /* Set up values for use in prologue and epilogue.  */
3978  num_gfregs = n_regs;
3979
3980  if (leaf_function_p
3981      && n_regs == 0
3982      && size == 0
3983      && crtl->outgoing_args_size == 0)
3984    actual_fsize = apparent_fsize = 0;
3985  else
3986    {
3987      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
3988      apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3989      apparent_fsize += n_regs * 4;
3990      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3991    }
3992
3993  /* Make sure nothing can clobber our register windows.
3994     If a SAVE must be done, or there is a stack-local variable,
3995     the register window area must be allocated.  */
3996  if (! leaf_function_p || size > 0)
3997    actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
3998
3999  return SPARC_STACK_ALIGN (actual_fsize);
4000}
4001
4002/* Output any necessary .register pseudo-ops.  */
4003
4004void
4005sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4006{
4007#ifdef HAVE_AS_REGISTER_PSEUDO_OP
4008  int i;
4009
4010  if (TARGET_ARCH32)
4011    return;
4012
4013  /* Check if %g[2367] were used without
4014     .register being printed for them already.  */
4015  for (i = 2; i < 8; i++)
4016    {
4017      if (df_regs_ever_live_p (i)
4018	  && ! sparc_hard_reg_printed [i])
4019	{
4020	  sparc_hard_reg_printed [i] = 1;
4021	  /* %g7 is used as TLS base register, use #ignore
4022	     for it instead of #scratch.  */
4023	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4024		   i == 7 ? "ignore" : "scratch");
4025	}
4026      if (i == 3) i = 5;
4027    }
4028#endif
4029}
4030
4031/* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
4032   as needed.  LOW should be double-word aligned for 32-bit registers.
4033   Return the new OFFSET.  */
4034
4035#define SORR_SAVE    0
4036#define SORR_RESTORE 1
4037
4038static int
4039save_or_restore_regs (int low, int high, rtx base, int offset, int action)
4040{
4041  rtx mem, insn;
4042  int i;
4043
4044  if (TARGET_ARCH64 && high <= 32)
4045    {
4046      for (i = low; i < high; i++)
4047	{
4048	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4049	    {
4050	      mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4051	      set_mem_alias_set (mem, sparc_sr_alias_set);
4052	      if (action == SORR_SAVE)
4053		{
4054		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4055		  RTX_FRAME_RELATED_P (insn) = 1;
4056		}
4057	      else  /* action == SORR_RESTORE */
4058		emit_move_insn (gen_rtx_REG (DImode, i), mem);
4059	      offset += 8;
4060	    }
4061	}
4062    }
4063  else
4064    {
4065      for (i = low; i < high; i += 2)
4066	{
4067	  bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4068	  bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4069	  enum machine_mode mode;
4070	  int regno;
4071
4072	  if (reg0 && reg1)
4073	    {
4074	      mode = i < 32 ? DImode : DFmode;
4075	      regno = i;
4076	    }
4077	  else if (reg0)
4078	    {
4079	      mode = i < 32 ? SImode : SFmode;
4080	      regno = i;
4081	    }
4082	  else if (reg1)
4083	    {
4084	      mode = i < 32 ? SImode : SFmode;
4085	      regno = i + 1;
4086	      offset += 4;
4087	    }
4088	  else
4089	    continue;
4090
4091	  mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4092	  set_mem_alias_set (mem, sparc_sr_alias_set);
4093	  if (action == SORR_SAVE)
4094	    {
4095	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4096	      RTX_FRAME_RELATED_P (insn) = 1;
4097	    }
4098	  else  /* action == SORR_RESTORE */
4099	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
4100
4101	  /* Always preserve double-word alignment.  */
4102	  offset = (offset + 8) & -8;
4103	}
4104    }
4105
4106  return offset;
4107}
4108
4109/* Emit code to save call-saved registers.  */
4110
4111static void
4112emit_save_or_restore_regs (int action)
4113{
4114  HOST_WIDE_INT offset;
4115  rtx base;
4116
4117  offset = frame_base_offset - apparent_fsize;
4118
4119  if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4120    {
4121      /* ??? This might be optimized a little as %g1 might already have a
4122	 value close enough that a single add insn will do.  */
4123      /* ??? Although, all of this is probably only a temporary fix
4124	 because if %g1 can hold a function result, then
4125	 sparc_expand_epilogue will lose (the result will be
4126	 clobbered).  */
4127      base = gen_rtx_REG (Pmode, 1);
4128      emit_move_insn (base, GEN_INT (offset));
4129      emit_insn (gen_rtx_SET (VOIDmode,
4130			      base,
4131			      gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4132      offset = 0;
4133    }
4134  else
4135    base = frame_base_reg;
4136
4137  offset = save_or_restore_regs (0, 8, base, offset, action);
4138  save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4139}
4140
4141/* Generate a save_register_window insn.  */
4142
4143static rtx
4144gen_save_register_window (rtx increment)
4145{
4146  if (TARGET_ARCH64)
4147    return gen_save_register_windowdi (increment);
4148  else
4149    return gen_save_register_windowsi (increment);
4150}
4151
4152/* Generate an increment for the stack pointer.  */
4153
4154static rtx
4155gen_stack_pointer_inc (rtx increment)
4156{
4157  return gen_rtx_SET (VOIDmode,
4158		      stack_pointer_rtx,
4159		      gen_rtx_PLUS (Pmode,
4160				    stack_pointer_rtx,
4161				    increment));
4162}
4163
4164/* Generate a decrement for the stack pointer.  */
4165
4166static rtx
4167gen_stack_pointer_dec (rtx decrement)
4168{
4169  return gen_rtx_SET (VOIDmode,
4170		      stack_pointer_rtx,
4171		      gen_rtx_MINUS (Pmode,
4172				     stack_pointer_rtx,
4173				     decrement));
4174}
4175
4176/* Expand the function prologue.  The prologue is responsible for reserving
4177   storage for the frame, saving the call-saved registers and loading the
4178   GOT register if needed.  */
4179
4180void
4181sparc_expand_prologue (void)
4182{
4183  rtx insn;
4184  int i;
4185
4186  /* Compute a snapshot of current_function_uses_only_leaf_regs.  Relying
4187     on the final value of the flag means deferring the prologue/epilogue
4188     expansion until just before the second scheduling pass, which is too
4189     late to emit multiple epilogues or return insns.
4190
4191     Of course we are making the assumption that the value of the flag
4192     will not change between now and its final value.  Of the three parts
4193     of the formula, only the last one can reasonably vary.  Let's take a
4194     closer look, after assuming that the first two ones are set to true
4195     (otherwise the last value is effectively silenced).
4196
4197     If only_leaf_regs_used returns false, the global predicate will also
4198     be false so the actual frame size calculated below will be positive.
4199     As a consequence, the save_register_window insn will be emitted in
4200     the instruction stream; now this insn explicitly references %fp
4201     which is not a leaf register so only_leaf_regs_used will always
4202     return false subsequently.
4203
4204     If only_leaf_regs_used returns true, we hope that the subsequent
4205     optimization passes won't cause non-leaf registers to pop up.  For
4206     example, the regrename pass has special provisions to not rename to
4207     non-leaf registers in a leaf function.  */
4208  sparc_leaf_function_p
4209    = optimize > 0 && current_function_is_leaf && only_leaf_regs_used ();
4210
4211  /* Need to use actual_fsize, since we are also allocating
4212     space for our callee (and our own register save area).  */
4213  actual_fsize
4214    = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4215
4216  /* Advertise that the data calculated just above are now valid.  */
4217  sparc_prologue_data_valid_p = true;
4218
4219  if (sparc_leaf_function_p)
4220    {
4221      frame_base_reg = stack_pointer_rtx;
4222      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4223    }
4224  else
4225    {
4226      frame_base_reg = hard_frame_pointer_rtx;
4227      frame_base_offset = SPARC_STACK_BIAS;
4228    }
4229
4230  if (actual_fsize == 0)
4231    /* do nothing.  */ ;
4232  else if (sparc_leaf_function_p)
4233    {
4234      if (actual_fsize <= 4096)
4235	insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4236      else if (actual_fsize <= 8192)
4237	{
4238	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4239	  RTX_FRAME_RELATED_P (insn) = 1;
4240
4241	  /* %sp is still the CFA register.  */
4242	  insn
4243	    = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4244	}
4245      else
4246	{
4247	  rtx reg = gen_rtx_REG (Pmode, 1);
4248	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4249	  insn = emit_insn (gen_stack_pointer_inc (reg));
4250	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4251			gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4252	}
4253
4254      RTX_FRAME_RELATED_P (insn) = 1;
4255    }
4256  else
4257    {
4258      if (actual_fsize <= 4096)
4259	insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4260      else if (actual_fsize <= 8192)
4261	{
4262	  insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4263
4264	  /* %sp is not the CFA register anymore.  */
4265	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4266
4267	  /* Make sure no %fp-based store is issued until after the frame is
4268	     established.  The offset between the frame pointer and the stack
4269	     pointer is calculated relative to the value of the stack pointer
4270	     at the end of the function prologue, and moving instructions that
4271	     access the stack via the frame pointer between the instructions
4272	     that decrement the stack pointer could result in accessing the
4273	     register window save area, which is volatile.  */
4274	  emit_insn (gen_frame_blockage ());
4275	}
4276      else
4277	{
4278	  rtx reg = gen_rtx_REG (Pmode, 1);
4279	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4280	  insn = emit_insn (gen_save_register_window (reg));
4281	}
4282
4283      RTX_FRAME_RELATED_P (insn) = 1;
4284      for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4285        RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4286    }
4287
4288  if (num_gfregs)
4289    emit_save_or_restore_regs (SORR_SAVE);
4290
4291  /* Load the GOT register if needed.  */
4292  if (crtl->uses_pic_offset_table)
4293    load_got_register ();
4294}
4295
4296/* This function generates the assembly code for function entry, which boils
4297   down to emitting the necessary .register directives.  */
4298
4299static void
4300sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4301{
4302  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
4303  gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4304
4305  sparc_output_scratch_registers (file);
4306}
4307
4308/* Expand the function epilogue, either normal or part of a sibcall.
4309   We emit all the instructions except the return or the call.  */
4310
4311void
4312sparc_expand_epilogue (void)
4313{
4314  if (num_gfregs)
4315    emit_save_or_restore_regs (SORR_RESTORE);
4316
4317  if (actual_fsize == 0)
4318    /* do nothing.  */ ;
4319  else if (sparc_leaf_function_p)
4320    {
4321      if (actual_fsize <= 4096)
4322	emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4323      else if (actual_fsize <= 8192)
4324	{
4325	  emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4326	  emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4327	}
4328      else
4329	{
4330	  rtx reg = gen_rtx_REG (Pmode, 1);
4331	  emit_move_insn (reg, GEN_INT (-actual_fsize));
4332	  emit_insn (gen_stack_pointer_dec (reg));
4333	}
4334    }
4335}
4336
4337/* Return true if it is appropriate to emit `return' instructions in the
4338   body of a function.  */
4339
4340bool
4341sparc_can_use_return_insn_p (void)
4342{
4343  return sparc_prologue_data_valid_p
4344	 && num_gfregs == 0
4345	 && (actual_fsize == 0 || !sparc_leaf_function_p);
4346}
4347
4348/* This function generates the assembly code for function exit.  */
4349
4350static void
4351sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4352{
4353  /* If code does not drop into the epilogue, we have to still output
4354     a dummy nop for the sake of sane backtraces.  Otherwise, if the
4355     last two instructions of a function were "call foo; dslot;" this
4356     can make the return PC of foo (i.e. address of call instruction
4357     plus 8) point to the first instruction in the next function.  */
4358
4359  rtx insn, last_real_insn;
4360
4361  insn = get_last_insn ();
4362
4363  last_real_insn = prev_real_insn (insn);
4364  if (last_real_insn
4365      && GET_CODE (last_real_insn) == INSN
4366      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4367    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4368
4369  if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN)
4370    fputs("\tnop\n", file);
4371
4372  sparc_output_deferred_case_vectors ();
4373}
4374
4375/* Output a 'restore' instruction.  */
4376
4377static void
4378output_restore (rtx pat)
4379{
4380  rtx operands[3];
4381
4382  if (! pat)
4383    {
4384      fputs ("\t restore\n", asm_out_file);
4385      return;
4386    }
4387
4388  gcc_assert (GET_CODE (pat) == SET);
4389
4390  operands[0] = SET_DEST (pat);
4391  pat = SET_SRC (pat);
4392
4393  switch (GET_CODE (pat))
4394    {
4395      case PLUS:
4396	operands[1] = XEXP (pat, 0);
4397	operands[2] = XEXP (pat, 1);
4398	output_asm_insn (" restore %r1, %2, %Y0", operands);
4399	break;
4400      case LO_SUM:
4401	operands[1] = XEXP (pat, 0);
4402	operands[2] = XEXP (pat, 1);
4403	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4404	break;
4405      case ASHIFT:
4406	operands[1] = XEXP (pat, 0);
4407	gcc_assert (XEXP (pat, 1) == const1_rtx);
4408	output_asm_insn (" restore %r1, %r1, %Y0", operands);
4409	break;
4410      default:
4411	operands[1] = pat;
4412	output_asm_insn (" restore %%g0, %1, %Y0", operands);
4413	break;
4414    }
4415}
4416
4417/* Output a return.  */
4418
4419const char *
4420output_return (rtx insn)
4421{
4422  if (sparc_leaf_function_p)
4423    {
4424      /* This is a leaf function so we don't have to bother restoring the
4425	 register window, which frees us from dealing with the convoluted
4426	 semantics of restore/return.  We simply output the jump to the
4427	 return address and the insn in the delay slot (if any).  */
4428
4429      gcc_assert (! crtl->calls_eh_return);
4430
4431      return "jmp\t%%o7+%)%#";
4432    }
4433  else
4434    {
4435      /* This is a regular function so we have to restore the register window.
4436	 We may have a pending insn for the delay slot, which will be either
4437	 combined with the 'restore' instruction or put in the delay slot of
4438	 the 'return' instruction.  */
4439
4440      if (crtl->calls_eh_return)
4441	{
4442	  /* If the function uses __builtin_eh_return, the eh_return
4443	     machinery occupies the delay slot.  */
4444	  gcc_assert (! final_sequence);
4445
4446          if (flag_delayed_branch)
4447	    {
4448	      if (TARGET_V9)
4449		fputs ("\treturn\t%i7+8\n", asm_out_file);
4450	      else
4451		fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4452
4453	      fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4454	    }
4455	  else
4456	    {
4457	      fputs ("\trestore\n\tadd\t%sp, %g1, %sp\n", asm_out_file);
4458	      fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
4459	    }
4460	}
4461      else if (final_sequence)
4462	{
4463	  rtx delay, pat;
4464
4465	  delay = NEXT_INSN (insn);
4466	  gcc_assert (delay);
4467
4468	  pat = PATTERN (delay);
4469
4470	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4471	    {
4472	      epilogue_renumber (&pat, 0);
4473	      return "return\t%%i7+%)%#";
4474	    }
4475	  else
4476	    {
4477	      output_asm_insn ("jmp\t%%i7+%)", NULL);
4478	      output_restore (pat);
4479	      PATTERN (delay) = gen_blockage ();
4480	      INSN_CODE (delay) = -1;
4481	    }
4482	}
4483      else
4484        {
4485	  /* The delay slot is empty.  */
4486	  if (TARGET_V9)
4487	    return "return\t%%i7+%)\n\t nop";
4488	  else if (flag_delayed_branch)
4489	    return "jmp\t%%i7+%)\n\t restore";
4490	  else
4491	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
4492	}
4493    }
4494
4495  return "";
4496}
4497
4498/* Output a sibling call.  */
4499
4500const char *
4501output_sibcall (rtx insn, rtx call_operand)
4502{
4503  rtx operands[1];
4504
4505  gcc_assert (flag_delayed_branch);
4506
4507  operands[0] = call_operand;
4508
4509  if (sparc_leaf_function_p)
4510    {
4511      /* This is a leaf function so we don't have to bother restoring the
4512	 register window.  We simply output the jump to the function and
4513	 the insn in the delay slot (if any).  */
4514
4515      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4516
4517      if (final_sequence)
4518	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4519			 operands);
4520      else
4521	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4522	   it into branch if possible.  */
4523	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4524			 operands);
4525    }
4526  else
4527    {
4528      /* This is a regular function so we have to restore the register window.
4529	 We may have a pending insn for the delay slot, which will be combined
4530	 with the 'restore' instruction.  */
4531
4532      output_asm_insn ("call\t%a0, 0", operands);
4533
4534      if (final_sequence)
4535	{
4536	  rtx delay = NEXT_INSN (insn);
4537	  gcc_assert (delay);
4538
4539	  output_restore (PATTERN (delay));
4540
4541	  PATTERN (delay) = gen_blockage ();
4542	  INSN_CODE (delay) = -1;
4543	}
4544      else
4545	output_restore (NULL_RTX);
4546    }
4547
4548  return "";
4549}
4550
4551/* Functions for handling argument passing.
4552
4553   For 32-bit, the first 6 args are normally in registers and the rest are
4554   pushed.  Any arg that starts within the first 6 words is at least
4555   partially passed in a register unless its data type forbids.
4556
4557   For 64-bit, the argument registers are laid out as an array of 16 elements
4558   and arguments are added sequentially.  The first 6 int args and up to the
4559   first 16 fp args (depending on size) are passed in regs.
4560
4561   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
4562   ----    -----   --------   -----   ------------------   ------   -----------
4563    15   [SP+248]              %f31       %f30,%f31         %d30
4564    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
4565    13   [SP+232]              %f27       %f26,%f27         %d26
4566    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
4567    11   [SP+216]              %f23       %f22,%f23         %d22
4568    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
4569     9   [SP+200]              %f19       %f18,%f19         %d18
4570     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
4571     7   [SP+184]              %f15       %f14,%f15         %d14
4572     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
4573     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
4574     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
4575     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
4576     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
4577     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
4578     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
4579
4580   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4581
4582   Integral arguments are always passed as 64-bit quantities appropriately
4583   extended.
4584
4585   Passing of floating point values is handled as follows.
4586   If a prototype is in scope:
4587     If the value is in a named argument (i.e. not a stdarg function or a
4588     value not part of the `...') then the value is passed in the appropriate
4589     fp reg.
4590     If the value is part of the `...' and is passed in one of the first 6
4591     slots then the value is passed in the appropriate int reg.
4592     If the value is part of the `...' and is not passed in one of the first 6
4593     slots then the value is passed in memory.
4594   If a prototype is not in scope:
4595     If the value is one of the first 6 arguments the value is passed in the
4596     appropriate integer reg and the appropriate fp reg.
4597     If the value is not one of the first 6 arguments the value is passed in
4598     the appropriate fp reg and in memory.
4599
4600
4601   Summary of the calling conventions implemented by GCC on the SPARC:
4602
4603   32-bit ABI:
4604                                size      argument     return value
4605
4606      small integer              <4       int. reg.      int. reg.
4607      word                        4       int. reg.      int. reg.
4608      double word                 8       int. reg.      int. reg.
4609
4610      _Complex small integer     <8       int. reg.      int. reg.
4611      _Complex word               8       int. reg.      int. reg.
4612      _Complex double word       16        memory        int. reg.
4613
4614      vector integer            <=8       int. reg.       FP reg.
4615      vector integer             >8        memory         memory
4616
4617      float                       4       int. reg.       FP reg.
4618      double                      8       int. reg.       FP reg.
4619      long double                16        memory         memory
4620
4621      _Complex float              8        memory         FP reg.
4622      _Complex double            16        memory         FP reg.
4623      _Complex long double       32        memory         FP reg.
4624
4625      vector float              any        memory         memory
4626
4627      aggregate                 any        memory         memory
4628
4629
4630
4631    64-bit ABI:
4632                                size      argument     return value
4633
4634      small integer              <8       int. reg.      int. reg.
4635      word                        8       int. reg.      int. reg.
4636      double word                16       int. reg.      int. reg.
4637
4638      _Complex small integer    <16       int. reg.      int. reg.
4639      _Complex word              16       int. reg.      int. reg.
4640      _Complex double word       32        memory        int. reg.
4641
4642      vector integer           <=16        FP reg.        FP reg.
4643      vector integer       16<s<=32        memory         FP reg.
4644      vector integer            >32        memory         memory
4645
4646      float                       4        FP reg.        FP reg.
4647      double                      8        FP reg.        FP reg.
4648      long double                16        FP reg.        FP reg.
4649
4650      _Complex float              8        FP reg.        FP reg.
4651      _Complex double            16        FP reg.        FP reg.
4652      _Complex long double       32        memory         FP reg.
4653
4654      vector float             <=16        FP reg.        FP reg.
4655      vector float         16<s<=32        memory         FP reg.
4656      vector float              >32        memory         memory
4657
4658      aggregate                <=16         reg.           reg.
4659      aggregate            16<s<=32        memory          reg.
4660      aggregate                 >32        memory         memory
4661
4662
4663
4664Note #1: complex floating-point types follow the extended SPARC ABIs as
4665implemented by the Sun compiler.
4666
4667Note #2: integral vector types follow the scalar floating-point types
4668conventions to match what is implemented by the Sun VIS SDK.
4669
4670Note #3: floating-point vector types follow the aggregate types
4671conventions.  */
4672
4673
4674/* Maximum number of int regs for args.  */
4675#define SPARC_INT_ARG_MAX 6
4676/* Maximum number of fp regs for args.  */
4677#define SPARC_FP_ARG_MAX 16
4678
4679#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4680
4681/* Handle the INIT_CUMULATIVE_ARGS macro.
4682   Initialize a variable CUM of type CUMULATIVE_ARGS
4683   for a call to a function whose data type is FNTYPE.
4684   For a library call, FNTYPE is 0.  */
4685
4686void
4687init_cumulative_args (struct sparc_args *cum, tree fntype,
4688		      rtx libname ATTRIBUTE_UNUSED,
4689		      tree fndecl ATTRIBUTE_UNUSED)
4690{
4691  cum->words = 0;
4692  cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4693  cum->libcall_p = fntype == 0;
4694}
4695
4696/* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4697   When a prototype says `char' or `short', really pass an `int'.  */
4698
4699static bool
4700sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
4701{
4702  return TARGET_ARCH32 ? true : false;
4703}
4704
4705/* Handle promotion of pointer and integer arguments.  */
4706
4707static enum machine_mode
4708sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
4709                             enum machine_mode mode,
4710                             int *punsignedp ATTRIBUTE_UNUSED,
4711                             const_tree fntype ATTRIBUTE_UNUSED,
4712                             int for_return ATTRIBUTE_UNUSED)
4713{
4714  if (POINTER_TYPE_P (type))
4715    {
4716      *punsignedp = POINTERS_EXTEND_UNSIGNED;
4717      return Pmode;
4718    }
4719
4720  /* For TARGET_ARCH64 we need this, as we don't have instructions
4721     for arithmetic operations which do zero/sign extension at the same time,
4722     so without this we end up with a srl/sra after every assignment to an
4723     user variable,  which means very very bad code.  */
4724  if (TARGET_ARCH64
4725      && GET_MODE_CLASS (mode) == MODE_INT
4726      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4727    return word_mode;
4728
4729  return mode;
4730}
4731
4732/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
4733
4734static bool
4735sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4736{
4737  return TARGET_ARCH64 ? true : false;
4738}
4739
4740/* Scan the record type TYPE and return the following predicates:
4741    - INTREGS_P: the record contains at least one field or sub-field
4742      that is eligible for promotion in integer registers.
4743    - FP_REGS_P: the record contains at least one field or sub-field
4744      that is eligible for promotion in floating-point registers.
4745    - PACKED_P: the record contains at least one field that is packed.
4746
4747   Sub-fields are not taken into account for the PACKED_P predicate.  */
4748
4749static void
4750scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4751{
4752  tree field;
4753
4754  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4755    {
4756      if (TREE_CODE (field) == FIELD_DECL)
4757	{
4758	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4759	    scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4760	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4761		   || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4762		  && TARGET_FPU)
4763	    *fpregs_p = 1;
4764	  else
4765	    *intregs_p = 1;
4766
4767	  if (packed_p && DECL_PACKED (field))
4768	    *packed_p = 1;
4769	}
4770    }
4771}
4772
4773/* Compute the slot number to pass an argument in.
4774   Return the slot number or -1 if passing on the stack.
4775
4776   CUM is a variable of type CUMULATIVE_ARGS which gives info about
4777    the preceding args and about the function being called.
4778   MODE is the argument's machine mode.
4779   TYPE is the data type of the argument (as a tree).
4780    This is null for libcalls where that information may
4781    not be available.
4782   NAMED is nonzero if this argument is a named parameter
4783    (otherwise it is an extra parameter matching an ellipsis).
4784   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4785   *PREGNO records the register number to use if scalar type.
4786   *PPADDING records the amount of padding needed in words.  */
4787
4788static int
4789function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4790		     tree type, int named, int incoming_p,
4791		     int *pregno, int *ppadding)
4792{
4793  int regbase = (incoming_p
4794		 ? SPARC_INCOMING_INT_ARG_FIRST
4795		 : SPARC_OUTGOING_INT_ARG_FIRST);
4796  int slotno = cum->words;
4797  enum mode_class mclass;
4798  int regno;
4799
4800  *ppadding = 0;
4801
4802  if (type && TREE_ADDRESSABLE (type))
4803    return -1;
4804
4805  if (TARGET_ARCH32
4806      && mode == BLKmode
4807      && type
4808      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4809    return -1;
4810
4811  /* For SPARC64, objects requiring 16-byte alignment get it.  */
4812  if (TARGET_ARCH64
4813      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
4814      && (slotno & 1) != 0)
4815    slotno++, *ppadding = 1;
4816
4817  mclass = GET_MODE_CLASS (mode);
4818  if (type && TREE_CODE (type) == VECTOR_TYPE)
4819    {
4820      /* Vector types deserve special treatment because they are
4821	 polymorphic wrt their mode, depending upon whether VIS
4822	 instructions are enabled.  */
4823      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4824	{
4825	  /* The SPARC port defines no floating-point vector modes.  */
4826	  gcc_assert (mode == BLKmode);
4827	}
4828      else
4829	{
4830	  /* Integral vector types should either have a vector
4831	     mode or an integral mode, because we are guaranteed
4832	     by pass_by_reference that their size is not greater
4833	     than 16 bytes and TImode is 16-byte wide.  */
4834	  gcc_assert (mode != BLKmode);
4835
4836	  /* Vector integers are handled like floats according to
4837	     the Sun VIS SDK.  */
4838	  mclass = MODE_FLOAT;
4839	}
4840    }
4841
4842  switch (mclass)
4843    {
4844    case MODE_FLOAT:
4845    case MODE_COMPLEX_FLOAT:
4846    case MODE_VECTOR_INT:
4847      if (TARGET_ARCH64 && TARGET_FPU && named)
4848	{
4849	  if (slotno >= SPARC_FP_ARG_MAX)
4850	    return -1;
4851	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
4852	  /* Arguments filling only one single FP register are
4853	     right-justified in the outer double FP register.  */
4854	  if (GET_MODE_SIZE (mode) <= 4)
4855	    regno++;
4856	  break;
4857	}
4858      /* fallthrough */
4859
4860    case MODE_INT:
4861    case MODE_COMPLEX_INT:
4862      if (slotno >= SPARC_INT_ARG_MAX)
4863	return -1;
4864      regno = regbase + slotno;
4865      break;
4866
4867    case MODE_RANDOM:
4868      if (mode == VOIDmode)
4869	/* MODE is VOIDmode when generating the actual call.  */
4870	return -1;
4871
4872      gcc_assert (mode == BLKmode);
4873
4874      if (TARGET_ARCH32
4875	  || !type
4876	  || (TREE_CODE (type) != VECTOR_TYPE
4877	      && TREE_CODE (type) != RECORD_TYPE))
4878	{
4879	  if (slotno >= SPARC_INT_ARG_MAX)
4880	    return -1;
4881	  regno = regbase + slotno;
4882	}
4883      else  /* TARGET_ARCH64 && type */
4884	{
4885	  int intregs_p = 0, fpregs_p = 0, packed_p = 0;
4886
4887	  /* First see what kinds of registers we would need.  */
4888	  if (TREE_CODE (type) == VECTOR_TYPE)
4889	    fpregs_p = 1;
4890	  else
4891	    scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
4892
4893	  /* The ABI obviously doesn't specify how packed structures
4894	     are passed.  These are defined to be passed in int regs
4895	     if possible, otherwise memory.  */
4896	  if (packed_p || !named)
4897	    fpregs_p = 0, intregs_p = 1;
4898
4899	  /* If all arg slots are filled, then must pass on stack.  */
4900	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4901	    return -1;
4902
4903	  /* If there are only int args and all int arg slots are filled,
4904	     then must pass on stack.  */
4905	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4906	    return -1;
4907
4908	  /* Note that even if all int arg slots are filled, fp members may
4909	     still be passed in regs if such regs are available.
4910	     *PREGNO isn't set because there may be more than one, it's up
4911	     to the caller to compute them.  */
4912	  return slotno;
4913	}
4914      break;
4915
4916    default :
4917      gcc_unreachable ();
4918    }
4919
4920  *pregno = regno;
4921  return slotno;
4922}
4923
4924/* Handle recursive register counting for structure field layout.  */
4925
4926struct function_arg_record_value_parms
4927{
4928  rtx ret;		/* return expression being built.  */
4929  int slotno;		/* slot number of the argument.  */
4930  int named;		/* whether the argument is named.  */
4931  int regbase;		/* regno of the base register.  */
4932  int stack;		/* 1 if part of the argument is on the stack.  */
4933  int intoffset;	/* offset of the first pending integer field.  */
4934  unsigned int nregs;	/* number of words passed in registers.  */
4935};
4936
4937static void function_arg_record_value_3
4938 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
4939static void function_arg_record_value_2
4940 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4941static void function_arg_record_value_1
4942 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4943static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
4944static rtx function_arg_union_value (int, enum machine_mode, int, int);
4945
4946/* A subroutine of function_arg_record_value.  Traverse the structure
4947   recursively and determine how many registers will be required.  */
4948
4949static void
4950function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
4951			     struct function_arg_record_value_parms *parms,
4952			     bool packed_p)
4953{
4954  tree field;
4955
4956  /* We need to compute how many registers are needed so we can
4957     allocate the PARALLEL but before we can do that we need to know
4958     whether there are any packed fields.  The ABI obviously doesn't
4959     specify how structures are passed in this case, so they are
4960     defined to be passed in int regs if possible, otherwise memory,
4961     regardless of whether there are fp values present.  */
4962
4963  if (! packed_p)
4964    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4965      {
4966	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4967	  {
4968	    packed_p = true;
4969	    break;
4970	  }
4971      }
4972
4973  /* Compute how many registers we need.  */
4974  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4975    {
4976      if (TREE_CODE (field) == FIELD_DECL)
4977	{
4978	  HOST_WIDE_INT bitpos = startbitpos;
4979
4980	  if (DECL_SIZE (field) != 0)
4981	    {
4982	      if (integer_zerop (DECL_SIZE (field)))
4983		continue;
4984
4985	      if (host_integerp (bit_position (field), 1))
4986		bitpos += int_bit_position (field);
4987	    }
4988
4989	  /* ??? FIXME: else assume zero offset.  */
4990
4991	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4992	    function_arg_record_value_1 (TREE_TYPE (field),
4993	    				 bitpos,
4994					 parms,
4995					 packed_p);
4996	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4997		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4998		   && TARGET_FPU
4999		   && parms->named
5000		   && ! packed_p)
5001	    {
5002	      if (parms->intoffset != -1)
5003		{
5004		  unsigned int startbit, endbit;
5005		  int intslots, this_slotno;
5006
5007		  startbit = parms->intoffset & -BITS_PER_WORD;
5008		  endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5009
5010		  intslots = (endbit - startbit) / BITS_PER_WORD;
5011		  this_slotno = parms->slotno + parms->intoffset
5012		    / BITS_PER_WORD;
5013
5014		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5015		    {
5016		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5017		      /* We need to pass this field on the stack.  */
5018		      parms->stack = 1;
5019		    }
5020
5021		  parms->nregs += intslots;
5022		  parms->intoffset = -1;
5023		}
5024
5025	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
5026		 If it wasn't true we wouldn't be here.  */
5027	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5028		  && DECL_MODE (field) == BLKmode)
5029		parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5030	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5031		parms->nregs += 2;
5032	      else
5033		parms->nregs += 1;
5034	    }
5035	  else
5036	    {
5037	      if (parms->intoffset == -1)
5038		parms->intoffset = bitpos;
5039	    }
5040	}
5041    }
5042}
5043
5044/* A subroutine of function_arg_record_value.  Assign the bits of the
5045   structure between parms->intoffset and bitpos to integer registers.  */
5046
5047static void
5048function_arg_record_value_3 (HOST_WIDE_INT bitpos,
5049			     struct function_arg_record_value_parms *parms)
5050{
5051  enum machine_mode mode;
5052  unsigned int regno;
5053  unsigned int startbit, endbit;
5054  int this_slotno, intslots, intoffset;
5055  rtx reg;
5056
5057  if (parms->intoffset == -1)
5058    return;
5059
5060  intoffset = parms->intoffset;
5061  parms->intoffset = -1;
5062
5063  startbit = intoffset & -BITS_PER_WORD;
5064  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5065  intslots = (endbit - startbit) / BITS_PER_WORD;
5066  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
5067
5068  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
5069  if (intslots <= 0)
5070    return;
5071
5072  /* If this is the trailing part of a word, only load that much into
5073     the register.  Otherwise load the whole register.  Note that in
5074     the latter case we may pick up unwanted bits.  It's not a problem
5075     at the moment but may wish to revisit.  */
5076
5077  if (intoffset % BITS_PER_WORD != 0)
5078    mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
5079			  	   MODE_INT);
5080  else
5081    mode = word_mode;
5082
5083  intoffset /= BITS_PER_UNIT;
5084  do
5085    {
5086      regno = parms->regbase + this_slotno;
5087      reg = gen_rtx_REG (mode, regno);
5088      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5089	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5090
5091      this_slotno += 1;
5092      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5093      mode = word_mode;
5094      parms->nregs += 1;
5095      intslots -= 1;
5096    }
5097  while (intslots > 0);
5098}
5099
5100/* A subroutine of function_arg_record_value.  Traverse the structure
5101   recursively and assign bits to floating point registers.  Track which
5102   bits in between need integer registers; invoke function_arg_record_value_3
5103   to make that happen.  */
5104
5105static void
5106function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5107			     struct function_arg_record_value_parms *parms,
5108			     bool packed_p)
5109{
5110  tree field;
5111
5112  if (! packed_p)
5113    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5114      {
5115	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5116	  {
5117	    packed_p = true;
5118	    break;
5119	  }
5120      }
5121
5122  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5123    {
5124      if (TREE_CODE (field) == FIELD_DECL)
5125	{
5126	  HOST_WIDE_INT bitpos = startbitpos;
5127
5128	  if (DECL_SIZE (field) != 0)
5129	    {
5130	      if (integer_zerop (DECL_SIZE (field)))
5131		continue;
5132
5133	      if (host_integerp (bit_position (field), 1))
5134		bitpos += int_bit_position (field);
5135	    }
5136
5137	  /* ??? FIXME: else assume zero offset.  */
5138
5139	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5140	    function_arg_record_value_2 (TREE_TYPE (field),
5141	    				 bitpos,
5142					 parms,
5143					 packed_p);
5144	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5145		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5146		   && TARGET_FPU
5147		   && parms->named
5148		   && ! packed_p)
5149	    {
5150	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5151	      int regno, nregs, pos;
5152	      enum machine_mode mode = DECL_MODE (field);
5153	      rtx reg;
5154
5155	      function_arg_record_value_3 (bitpos, parms);
5156
5157	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5158		  && mode == BLKmode)
5159	        {
5160		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5161		  nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5162		}
5163	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5164	        {
5165		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5166		  nregs = 2;
5167		}
5168	      else
5169	        nregs = 1;
5170
5171	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5172	      if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5173		regno++;
5174	      reg = gen_rtx_REG (mode, regno);
5175	      pos = bitpos / BITS_PER_UNIT;
5176	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5177		= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5178	      parms->nregs += 1;
5179	      while (--nregs > 0)
5180		{
5181		  regno += GET_MODE_SIZE (mode) / 4;
5182	  	  reg = gen_rtx_REG (mode, regno);
5183		  pos += GET_MODE_SIZE (mode);
5184		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5185		    = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5186		  parms->nregs += 1;
5187		}
5188	    }
5189	  else
5190	    {
5191	      if (parms->intoffset == -1)
5192		parms->intoffset = bitpos;
5193	    }
5194	}
5195    }
5196}
5197
5198/* Used by function_arg and function_value to implement the complex
5199   conventions of the 64-bit ABI for passing and returning structures.
5200   Return an expression valid as a return value for the two macros
5201   FUNCTION_ARG and FUNCTION_VALUE.
5202
5203   TYPE is the data type of the argument (as a tree).
5204    This is null for libcalls where that information may
5205    not be available.
5206   MODE is the argument's machine mode.
5207   SLOTNO is the index number of the argument's slot in the parameter array.
5208   NAMED is nonzero if this argument is a named parameter
5209    (otherwise it is an extra parameter matching an ellipsis).
5210   REGBASE is the regno of the base register for the parameter array.  */
5211
5212static rtx
5213function_arg_record_value (const_tree type, enum machine_mode mode,
5214			   int slotno, int named, int regbase)
5215{
5216  HOST_WIDE_INT typesize = int_size_in_bytes (type);
5217  struct function_arg_record_value_parms parms;
5218  unsigned int nregs;
5219
5220  parms.ret = NULL_RTX;
5221  parms.slotno = slotno;
5222  parms.named = named;
5223  parms.regbase = regbase;
5224  parms.stack = 0;
5225
5226  /* Compute how many registers we need.  */
5227  parms.nregs = 0;
5228  parms.intoffset = 0;
5229  function_arg_record_value_1 (type, 0, &parms, false);
5230
5231  /* Take into account pending integer fields.  */
5232  if (parms.intoffset != -1)
5233    {
5234      unsigned int startbit, endbit;
5235      int intslots, this_slotno;
5236
5237      startbit = parms.intoffset & -BITS_PER_WORD;
5238      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5239      intslots = (endbit - startbit) / BITS_PER_WORD;
5240      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5241
5242      if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5243        {
5244	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5245	  /* We need to pass this field on the stack.  */
5246	  parms.stack = 1;
5247        }
5248
5249      parms.nregs += intslots;
5250    }
5251  nregs = parms.nregs;
5252
5253  /* Allocate the vector and handle some annoying special cases.  */
5254  if (nregs == 0)
5255    {
5256      /* ??? Empty structure has no value?  Duh?  */
5257      if (typesize <= 0)
5258	{
5259	  /* Though there's nothing really to store, return a word register
5260	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
5261	     leads to breakage due to the fact that there are zero bytes to
5262	     load.  */
5263	  return gen_rtx_REG (mode, regbase);
5264	}
5265      else
5266	{
5267	  /* ??? C++ has structures with no fields, and yet a size.  Give up
5268	     for now and pass everything back in integer registers.  */
5269	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5270	}
5271      if (nregs + slotno > SPARC_INT_ARG_MAX)
5272	nregs = SPARC_INT_ARG_MAX - slotno;
5273    }
5274  gcc_assert (nregs != 0);
5275
5276  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5277
5278  /* If at least one field must be passed on the stack, generate
5279     (parallel [(expr_list (nil) ...) ...]) so that all fields will
5280     also be passed on the stack.  We can't do much better because the
5281     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5282     of structures for which the fields passed exclusively in registers
5283     are not at the beginning of the structure.  */
5284  if (parms.stack)
5285    XVECEXP (parms.ret, 0, 0)
5286      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5287
5288  /* Fill in the entries.  */
5289  parms.nregs = 0;
5290  parms.intoffset = 0;
5291  function_arg_record_value_2 (type, 0, &parms, false);
5292  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5293
5294  gcc_assert (parms.nregs == nregs);
5295
5296  return parms.ret;
5297}
5298
5299/* Used by function_arg and function_value to implement the conventions
5300   of the 64-bit ABI for passing and returning unions.
5301   Return an expression valid as a return value for the two macros
5302   FUNCTION_ARG and FUNCTION_VALUE.
5303
5304   SIZE is the size in bytes of the union.
5305   MODE is the argument's machine mode.
5306   REGNO is the hard register the union will be passed in.  */
5307
5308static rtx
5309function_arg_union_value (int size, enum machine_mode mode, int slotno,
5310			  int regno)
5311{
5312  int nwords = ROUND_ADVANCE (size), i;
5313  rtx regs;
5314
5315  /* See comment in previous function for empty structures.  */
5316  if (nwords == 0)
5317    return gen_rtx_REG (mode, regno);
5318
5319  if (slotno == SPARC_INT_ARG_MAX - 1)
5320    nwords = 1;
5321
5322  regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5323
5324  for (i = 0; i < nwords; i++)
5325    {
5326      /* Unions are passed left-justified.  */
5327      XVECEXP (regs, 0, i)
5328	= gen_rtx_EXPR_LIST (VOIDmode,
5329			     gen_rtx_REG (word_mode, regno),
5330			     GEN_INT (UNITS_PER_WORD * i));
5331      regno++;
5332    }
5333
5334  return regs;
5335}
5336
5337/* Used by function_arg and function_value to implement the conventions
5338   for passing and returning large (BLKmode) vectors.
5339   Return an expression valid as a return value for the two macros
5340   FUNCTION_ARG and FUNCTION_VALUE.
5341
5342   SIZE is the size in bytes of the vector (at least 8 bytes).
5343   REGNO is the FP hard register the vector will be passed in.  */
5344
5345static rtx
5346function_arg_vector_value (int size, int regno)
5347{
5348  int i, nregs = size / 8;
5349  rtx regs;
5350
5351  regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5352
5353  for (i = 0; i < nregs; i++)
5354    {
5355      XVECEXP (regs, 0, i)
5356	= gen_rtx_EXPR_LIST (VOIDmode,
5357			     gen_rtx_REG (DImode, regno + 2*i),
5358			     GEN_INT (i*8));
5359    }
5360
5361  return regs;
5362}
5363
5364/* Handle the FUNCTION_ARG macro.
5365   Determine where to put an argument to a function.
5366   Value is zero to push the argument on the stack,
5367   or a hard register in which to store the argument.
5368
5369   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5370    the preceding args and about the function being called.
5371   MODE is the argument's machine mode.
5372   TYPE is the data type of the argument (as a tree).
5373    This is null for libcalls where that information may
5374    not be available.
5375   NAMED is nonzero if this argument is a named parameter
5376    (otherwise it is an extra parameter matching an ellipsis).
5377   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
5378
5379rtx
5380function_arg (const struct sparc_args *cum, enum machine_mode mode,
5381	      tree type, int named, int incoming_p)
5382{
5383  int regbase = (incoming_p
5384		 ? SPARC_INCOMING_INT_ARG_FIRST
5385		 : SPARC_OUTGOING_INT_ARG_FIRST);
5386  int slotno, regno, padding;
5387  enum mode_class mclass = GET_MODE_CLASS (mode);
5388
5389  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5390				&regno, &padding);
5391  if (slotno == -1)
5392    return 0;
5393
5394  /* Vector types deserve special treatment because they are polymorphic wrt
5395     their mode, depending upon whether VIS instructions are enabled.  */
5396  if (type && TREE_CODE (type) == VECTOR_TYPE)
5397    {
5398      HOST_WIDE_INT size = int_size_in_bytes (type);
5399      gcc_assert ((TARGET_ARCH32 && size <= 8)
5400		  || (TARGET_ARCH64 && size <= 16));
5401
5402      if (mode == BLKmode)
5403	return function_arg_vector_value (size,
5404					  SPARC_FP_ARG_FIRST + 2*slotno);
5405      else
5406	mclass = MODE_FLOAT;
5407    }
5408
5409  if (TARGET_ARCH32)
5410    return gen_rtx_REG (mode, regno);
5411
5412  /* Structures up to 16 bytes in size are passed in arg slots on the stack
5413     and are promoted to registers if possible.  */
5414  if (type && TREE_CODE (type) == RECORD_TYPE)
5415    {
5416      HOST_WIDE_INT size = int_size_in_bytes (type);
5417      gcc_assert (size <= 16);
5418
5419      return function_arg_record_value (type, mode, slotno, named, regbase);
5420    }
5421
5422  /* Unions up to 16 bytes in size are passed in integer registers.  */
5423  else if (type && TREE_CODE (type) == UNION_TYPE)
5424    {
5425      HOST_WIDE_INT size = int_size_in_bytes (type);
5426      gcc_assert (size <= 16);
5427
5428      return function_arg_union_value (size, mode, slotno, regno);
5429    }
5430
5431  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5432     but also have the slot allocated for them.
5433     If no prototype is in scope fp values in register slots get passed
5434     in two places, either fp regs and int regs or fp regs and memory.  */
5435  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5436	   && SPARC_FP_REG_P (regno))
5437    {
5438      rtx reg = gen_rtx_REG (mode, regno);
5439      if (cum->prototype_p || cum->libcall_p)
5440	{
5441	  /* "* 2" because fp reg numbers are recorded in 4 byte
5442	     quantities.  */
5443#if 0
5444	  /* ??? This will cause the value to be passed in the fp reg and
5445	     in the stack.  When a prototype exists we want to pass the
5446	     value in the reg but reserve space on the stack.  That's an
5447	     optimization, and is deferred [for a bit].  */
5448	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5449	    return gen_rtx_PARALLEL (mode,
5450			    gen_rtvec (2,
5451				       gen_rtx_EXPR_LIST (VOIDmode,
5452						NULL_RTX, const0_rtx),
5453				       gen_rtx_EXPR_LIST (VOIDmode,
5454						reg, const0_rtx)));
5455	  else
5456#else
5457	  /* ??? It seems that passing back a register even when past
5458	     the area declared by REG_PARM_STACK_SPACE will allocate
5459	     space appropriately, and will not copy the data onto the
5460	     stack, exactly as we desire.
5461
5462	     This is due to locate_and_pad_parm being called in
5463	     expand_call whenever reg_parm_stack_space > 0, which
5464	     while beneficial to our example here, would seem to be
5465	     in error from what had been intended.  Ho hum...  -- r~ */
5466#endif
5467	    return reg;
5468	}
5469      else
5470	{
5471	  rtx v0, v1;
5472
5473	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5474	    {
5475	      int intreg;
5476
5477	      /* On incoming, we don't need to know that the value
5478		 is passed in %f0 and %i0, and it confuses other parts
5479		 causing needless spillage even on the simplest cases.  */
5480	      if (incoming_p)
5481		return reg;
5482
5483	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5484			+ (regno - SPARC_FP_ARG_FIRST) / 2);
5485
5486	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5487	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5488				      const0_rtx);
5489	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5490	    }
5491	  else
5492	    {
5493	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5494	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5495	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5496	    }
5497	}
5498    }
5499
5500  /* All other aggregate types are passed in an integer register in a mode
5501     corresponding to the size of the type.  */
5502  else if (type && AGGREGATE_TYPE_P (type))
5503    {
5504      HOST_WIDE_INT size = int_size_in_bytes (type);
5505      gcc_assert (size <= 16);
5506
5507      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5508    }
5509
5510  return gen_rtx_REG (mode, regno);
5511}
5512
5513/* For an arg passed partly in registers and partly in memory,
5514   this is the number of bytes of registers used.
5515   For args passed entirely in registers or entirely in memory, zero.
5516
5517   Any arg that starts in the first 6 regs but won't entirely fit in them
5518   needs partial registers on v8.  On v9, structures with integer
5519   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5520   values that begin in the last fp reg [where "last fp reg" varies with the
5521   mode] will be split between that reg and memory.  */
5522
5523static int
5524sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5525			 tree type, bool named)
5526{
5527  int slotno, regno, padding;
5528
5529  /* We pass 0 for incoming_p here, it doesn't matter.  */
5530  slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5531
5532  if (slotno == -1)
5533    return 0;
5534
5535  if (TARGET_ARCH32)
5536    {
5537      if ((slotno + (mode == BLKmode
5538		     ? ROUND_ADVANCE (int_size_in_bytes (type))
5539		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5540	  > SPARC_INT_ARG_MAX)
5541	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5542    }
5543  else
5544    {
5545      /* We are guaranteed by pass_by_reference that the size of the
5546	 argument is not greater than 16 bytes, so we only need to return
5547	 one word if the argument is partially passed in registers.  */
5548
5549      if (type && AGGREGATE_TYPE_P (type))
5550	{
5551	  int size = int_size_in_bytes (type);
5552
5553	  if (size > UNITS_PER_WORD
5554	      && slotno == SPARC_INT_ARG_MAX - 1)
5555	    return UNITS_PER_WORD;
5556	}
5557      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5558	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5559		   && ! (TARGET_FPU && named)))
5560	{
5561	  /* The complex types are passed as packed types.  */
5562	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5563	      && slotno == SPARC_INT_ARG_MAX - 1)
5564	    return UNITS_PER_WORD;
5565	}
5566      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5567	{
5568	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5569	      > SPARC_FP_ARG_MAX)
5570	    return UNITS_PER_WORD;
5571	}
5572    }
5573
5574  return 0;
5575}
5576
5577/* Handle the TARGET_PASS_BY_REFERENCE target hook.
5578   Specify whether to pass the argument by reference.  */
5579
5580static bool
5581sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5582			 enum machine_mode mode, const_tree type,
5583			 bool named ATTRIBUTE_UNUSED)
5584{
5585  if (TARGET_ARCH32)
5586    /* Original SPARC 32-bit ABI says that structures and unions,
5587       and quad-precision floats are passed by reference.  For Pascal,
5588       also pass arrays by reference.  All other base types are passed
5589       in registers.
5590
5591       Extended ABI (as implemented by the Sun compiler) says that all
5592       complex floats are passed by reference.  Pass complex integers
5593       in registers up to 8 bytes.  More generally, enforce the 2-word
5594       cap for passing arguments in registers.
5595
5596       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5597       integers are passed like floats of the same size, that is in
5598       registers up to 8 bytes.  Pass all vector floats by reference
5599       like structure and unions.  */
5600    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5601	    || mode == SCmode
5602	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
5603	    || GET_MODE_SIZE (mode) > 8
5604	    || (type
5605		&& TREE_CODE (type) == VECTOR_TYPE
5606		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5607  else
5608    /* Original SPARC 64-bit ABI says that structures and unions
5609       smaller than 16 bytes are passed in registers, as well as
5610       all other base types.
5611
5612       Extended ABI (as implemented by the Sun compiler) says that
5613       complex floats are passed in registers up to 16 bytes.  Pass
5614       all complex integers in registers up to 16 bytes.  More generally,
5615       enforce the 2-word cap for passing arguments in registers.
5616
5617       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5618       integers are passed like floats of the same size, that is in
5619       registers (up to 16 bytes).  Pass all vector floats like structure
5620       and unions.  */
5621    return ((type
5622	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5623	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5624	    /* Catch CTImode and TCmode.  */
5625	    || GET_MODE_SIZE (mode) > 16);
5626}
5627
5628/* Handle the FUNCTION_ARG_ADVANCE macro.
5629   Update the data in CUM to advance over an argument
5630   of mode MODE and data type TYPE.
5631   TYPE is null for libcalls where that information may not be available.  */
5632
5633void
5634function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5635		      tree type, int named)
5636{
5637  int regno, padding;
5638
5639  /* We pass 0 for incoming_p here, it doesn't matter.  */
5640  function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5641
5642  /* If argument requires leading padding, add it.  */
5643  cum->words += padding;
5644
5645  if (TARGET_ARCH32)
5646    {
5647      cum->words += (mode != BLKmode
5648		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5649		     : ROUND_ADVANCE (int_size_in_bytes (type)));
5650    }
5651  else
5652    {
5653      if (type && AGGREGATE_TYPE_P (type))
5654	{
5655	  int size = int_size_in_bytes (type);
5656
5657	  if (size <= 8)
5658	    ++cum->words;
5659	  else if (size <= 16)
5660	    cum->words += 2;
5661	  else /* passed by reference */
5662	    ++cum->words;
5663	}
5664      else
5665	{
5666	  cum->words += (mode != BLKmode
5667			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5668			 : ROUND_ADVANCE (int_size_in_bytes (type)));
5669	}
5670    }
5671}
5672
5673/* Handle the FUNCTION_ARG_PADDING macro.
5674   For the 64 bit ABI structs are always stored left shifted in their
5675   argument slot.  */
5676
5677enum direction
5678function_arg_padding (enum machine_mode mode, const_tree type)
5679{
5680  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5681    return upward;
5682
5683  /* Fall back to the default.  */
5684  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5685}
5686
5687/* Handle the TARGET_RETURN_IN_MEMORY target hook.
5688   Specify whether to return the return value in memory.  */
5689
5690static bool
5691sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5692{
5693  if (TARGET_ARCH32)
5694    /* Original SPARC 32-bit ABI says that structures and unions,
5695       and quad-precision floats are returned in memory.  All other
5696       base types are returned in registers.
5697
5698       Extended ABI (as implemented by the Sun compiler) says that
5699       all complex floats are returned in registers (8 FP registers
5700       at most for '_Complex long double').  Return all complex integers
5701       in registers (4 at most for '_Complex long long').
5702
5703       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5704       integers are returned like floats of the same size, that is in
5705       registers up to 8 bytes and in memory otherwise.  Return all
5706       vector floats in memory like structure and unions; note that
5707       they always have BLKmode like the latter.  */
5708    return (TYPE_MODE (type) == BLKmode
5709	    || TYPE_MODE (type) == TFmode
5710	    || (TREE_CODE (type) == VECTOR_TYPE
5711		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5712  else
5713    /* Original SPARC 64-bit ABI says that structures and unions
5714       smaller than 32 bytes are returned in registers, as well as
5715       all other base types.
5716
5717       Extended ABI (as implemented by the Sun compiler) says that all
5718       complex floats are returned in registers (8 FP registers at most
5719       for '_Complex long double').  Return all complex integers in
5720       registers (4 at most for '_Complex TItype').
5721
5722       Vector ABI (as implemented by the Sun VIS SDK) says that vector
5723       integers are returned like floats of the same size, that is in
5724       registers.  Return all vector floats like structure and unions;
5725       note that they always have BLKmode like the latter.  */
5726    return ((TYPE_MODE (type) == BLKmode
5727	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5728}
5729
5730/* Handle the TARGET_STRUCT_VALUE target hook.
5731   Return where to find the structure return value address.  */
5732
5733static rtx
5734sparc_struct_value_rtx (tree fndecl, int incoming)
5735{
5736  if (TARGET_ARCH64)
5737    return 0;
5738  else
5739    {
5740      rtx mem;
5741
5742      if (incoming)
5743	mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5744						 STRUCT_VALUE_OFFSET));
5745      else
5746	mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5747						 STRUCT_VALUE_OFFSET));
5748
5749      /* Only follow the SPARC ABI for fixed-size structure returns.
5750         Variable size structure returns are handled per the normal
5751         procedures in GCC. This is enabled by -mstd-struct-return */
5752      if (incoming == 2
5753	  && sparc_std_struct_return
5754	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
5755	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
5756	{
5757	  /* We must check and adjust the return address, as it is
5758	     optional as to whether the return object is really
5759	     provided.  */
5760	  rtx ret_rtx = gen_rtx_REG (Pmode, 31);
5761	  rtx scratch = gen_reg_rtx (SImode);
5762	  rtx endlab = gen_label_rtx ();
5763
5764	  /* Calculate the return object size */
5765	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
5766	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
5767	  /* Construct a temporary return value */
5768	  rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
5769
5770	  /* Implement SPARC 32-bit psABI callee returns struck checking
5771	     requirements:
5772
5773	      Fetch the instruction where we will return to and see if
5774	     it's an unimp instruction (the most significant 10 bits
5775	     will be zero).  */
5776	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
5777						plus_constant (ret_rtx, 8)));
5778	  /* Assume the size is valid and pre-adjust */
5779	  emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5780	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab);
5781	  emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5782	  /* Assign stack temp:
5783	     Write the address of the memory pointed to by temp_val into
5784	     the memory pointed to by mem */
5785	  emit_move_insn (mem, XEXP (temp_val, 0));
5786	  emit_label (endlab);
5787	}
5788
5789      set_mem_alias_set (mem, struct_value_alias_set);
5790      return mem;
5791    }
5792}
5793
5794/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5795   For v9, function return values are subject to the same rules as arguments,
5796   except that up to 32 bytes may be returned in registers.  */
5797
5798rtx
5799function_value (const_tree type, enum machine_mode mode, int incoming_p)
5800{
5801  /* Beware that the two values are swapped here wrt function_arg.  */
5802  int regbase = (incoming_p
5803		 ? SPARC_OUTGOING_INT_ARG_FIRST
5804		 : SPARC_INCOMING_INT_ARG_FIRST);
5805  enum mode_class mclass = GET_MODE_CLASS (mode);
5806  int regno;
5807
5808  /* Vector types deserve special treatment because they are polymorphic wrt
5809     their mode, depending upon whether VIS instructions are enabled.  */
5810  if (type && TREE_CODE (type) == VECTOR_TYPE)
5811    {
5812      HOST_WIDE_INT size = int_size_in_bytes (type);
5813      gcc_assert ((TARGET_ARCH32 && size <= 8)
5814		  || (TARGET_ARCH64 && size <= 32));
5815
5816      if (mode == BLKmode)
5817	return function_arg_vector_value (size,
5818					  SPARC_FP_ARG_FIRST);
5819      else
5820	mclass = MODE_FLOAT;
5821    }
5822
5823  if (TARGET_ARCH64 && type)
5824    {
5825      /* Structures up to 32 bytes in size are returned in registers.  */
5826      if (TREE_CODE (type) == RECORD_TYPE)
5827	{
5828	  HOST_WIDE_INT size = int_size_in_bytes (type);
5829	  gcc_assert (size <= 32);
5830
5831	  return function_arg_record_value (type, mode, 0, 1, regbase);
5832	}
5833
5834      /* Unions up to 32 bytes in size are returned in integer registers.  */
5835      else if (TREE_CODE (type) == UNION_TYPE)
5836	{
5837	  HOST_WIDE_INT size = int_size_in_bytes (type);
5838	  gcc_assert (size <= 32);
5839
5840	  return function_arg_union_value (size, mode, 0, regbase);
5841	}
5842
5843      /* Objects that require it are returned in FP registers.  */
5844      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5845	;
5846
5847      /* All other aggregate types are returned in an integer register in a
5848	 mode corresponding to the size of the type.  */
5849      else if (AGGREGATE_TYPE_P (type))
5850	{
5851	  /* All other aggregate types are passed in an integer register
5852	     in a mode corresponding to the size of the type.  */
5853	  HOST_WIDE_INT size = int_size_in_bytes (type);
5854	  gcc_assert (size <= 32);
5855
5856	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5857
5858	  /* ??? We probably should have made the same ABI change in
5859	     3.4.0 as the one we made for unions.   The latter was
5860	     required by the SCD though, while the former is not
5861	     specified, so we favored compatibility and efficiency.
5862
5863	     Now we're stuck for aggregates larger than 16 bytes,
5864	     because OImode vanished in the meantime.  Let's not
5865	     try to be unduly clever, and simply follow the ABI
5866	     for unions in that case.  */
5867	  if (mode == BLKmode)
5868	    return function_arg_union_value (size, mode, 0, regbase);
5869	  else
5870	    mclass = MODE_INT;
5871	}
5872
5873      /* This must match sparc_promote_function_mode.
5874	 ??? Maybe 32-bit pointers should actually remain in Pmode?  */
5875      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5876	mode = word_mode;
5877    }
5878
5879  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
5880    regno = SPARC_FP_ARG_FIRST;
5881  else
5882    regno = regbase;
5883
5884  return gen_rtx_REG (mode, regno);
5885}
5886
5887/* Do what is necessary for `va_start'.  We look at the current function
5888   to determine if stdarg or varargs is used and return the address of
5889   the first unnamed parameter.  */
5890
5891static rtx
5892sparc_builtin_saveregs (void)
5893{
5894  int first_reg = crtl->args.info.words;
5895  rtx address;
5896  int regno;
5897
5898  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
5899    emit_move_insn (gen_rtx_MEM (word_mode,
5900				 gen_rtx_PLUS (Pmode,
5901					       frame_pointer_rtx,
5902					       GEN_INT (FIRST_PARM_OFFSET (0)
5903							+ (UNITS_PER_WORD
5904							   * regno)))),
5905		    gen_rtx_REG (word_mode,
5906				 SPARC_INCOMING_INT_ARG_FIRST + regno));
5907
5908  address = gen_rtx_PLUS (Pmode,
5909			  frame_pointer_rtx,
5910			  GEN_INT (FIRST_PARM_OFFSET (0)
5911				   + UNITS_PER_WORD * first_reg));
5912
5913  return address;
5914}
5915
5916/* Implement `va_start' for stdarg.  */
5917
5918static void
5919sparc_va_start (tree valist, rtx nextarg)
5920{
5921  nextarg = expand_builtin_saveregs ();
5922  std_expand_builtin_va_start (valist, nextarg);
5923}
5924
5925/* Implement `va_arg' for stdarg.  */
5926
5927static tree
5928sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5929		       gimple_seq *post_p)
5930{
5931  HOST_WIDE_INT size, rsize, align;
5932  tree addr, incr;
5933  bool indirect;
5934  tree ptrtype = build_pointer_type (type);
5935
5936  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5937    {
5938      indirect = true;
5939      size = rsize = UNITS_PER_WORD;
5940      align = 0;
5941    }
5942  else
5943    {
5944      indirect = false;
5945      size = int_size_in_bytes (type);
5946      rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5947      align = 0;
5948
5949      if (TARGET_ARCH64)
5950	{
5951	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
5952	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5953	    align = 2 * UNITS_PER_WORD;
5954
5955	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
5956	     are left-justified in their slots.  */
5957	  if (AGGREGATE_TYPE_P (type))
5958	    {
5959	      if (size == 0)
5960		size = rsize = UNITS_PER_WORD;
5961	      else
5962		size = rsize;
5963	    }
5964	}
5965    }
5966
5967  incr = valist;
5968  if (align)
5969    {
5970      incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5971			  size_int (align - 1));
5972      incr = fold_convert (sizetype, incr);
5973      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
5974			  size_int (-align));
5975      incr = fold_convert (ptr_type_node, incr);
5976    }
5977
5978  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
5979  addr = incr;
5980
5981  if (BYTES_BIG_ENDIAN && size < rsize)
5982    addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
5983			size_int (rsize - size));
5984
5985  if (indirect)
5986    {
5987      addr = fold_convert (build_pointer_type (ptrtype), addr);
5988      addr = build_va_arg_indirect_ref (addr);
5989    }
5990
5991  /* If the address isn't aligned properly for the type, we need a temporary.
5992     FIXME: This is inefficient, usually we can do this in registers.  */
5993  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
5994    {
5995      tree tmp = create_tmp_var (type, "va_arg_tmp");
5996      tree dest_addr = build_fold_addr_expr (tmp);
5997      tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
5998				   3, dest_addr, addr, size_int (rsize));
5999      TREE_ADDRESSABLE (tmp) = 1;
6000      gimplify_and_add (copy, pre_p);
6001      addr = dest_addr;
6002    }
6003
6004  else
6005    addr = fold_convert (ptrtype, addr);
6006
6007  incr
6008    = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
6009  gimplify_assign (valist, incr, post_p);
6010
6011  return build_va_arg_indirect_ref (addr);
6012}
6013
6014/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
6015   Specify whether the vector mode is supported by the hardware.  */
6016
6017static bool
6018sparc_vector_mode_supported_p (enum machine_mode mode)
6019{
6020  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
6021}
6022
6023/* Return the string to output an unconditional branch to LABEL, which is
6024   the operand number of the label.
6025
6026   DEST is the destination insn (i.e. the label), INSN is the source.  */
6027
6028const char *
6029output_ubranch (rtx dest, int label, rtx insn)
6030{
6031  static char string[64];
6032  bool v9_form = false;
6033  char *p;
6034
6035  if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
6036    {
6037      int delta = (INSN_ADDRESSES (INSN_UID (dest))
6038		   - INSN_ADDRESSES (INSN_UID (insn)));
6039      /* Leave some instructions for "slop".  */
6040      if (delta >= -260000 && delta < 260000)
6041	v9_form = true;
6042    }
6043
6044  if (v9_form)
6045    strcpy (string, "ba%*,pt\t%%xcc, ");
6046  else
6047    strcpy (string, "b%*\t");
6048
6049  p = strchr (string, '\0');
6050  *p++ = '%';
6051  *p++ = 'l';
6052  *p++ = '0' + label;
6053  *p++ = '%';
6054  *p++ = '(';
6055  *p = '\0';
6056
6057  return string;
6058}
6059
6060/* Return the string to output a conditional branch to LABEL, which is
6061   the operand number of the label.  OP is the conditional expression.
6062   XEXP (OP, 0) is assumed to be a condition code register (integer or
6063   floating point) and its mode specifies what kind of comparison we made.
6064
6065   DEST is the destination insn (i.e. the label), INSN is the source.
6066
6067   REVERSED is nonzero if we should reverse the sense of the comparison.
6068
6069   ANNUL is nonzero if we should generate an annulling branch.  */
6070
6071const char *
6072output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
6073		rtx insn)
6074{
6075  static char string[64];
6076  enum rtx_code code = GET_CODE (op);
6077  rtx cc_reg = XEXP (op, 0);
6078  enum machine_mode mode = GET_MODE (cc_reg);
6079  const char *labelno, *branch;
6080  int spaces = 8, far;
6081  char *p;
6082
6083  /* v9 branches are limited to +-1MB.  If it is too far away,
6084     change
6085
6086     bne,pt %xcc, .LC30
6087
6088     to
6089
6090     be,pn %xcc, .+12
6091      nop
6092     ba .LC30
6093
6094     and
6095
6096     fbne,a,pn %fcc2, .LC29
6097
6098     to
6099
6100     fbe,pt %fcc2, .+16
6101      nop
6102     ba .LC29  */
6103
6104  far = TARGET_V9 && (get_attr_length (insn) >= 3);
6105  if (reversed ^ far)
6106    {
6107      /* Reversal of FP compares takes care -- an ordered compare
6108	 becomes an unordered compare and vice versa.  */
6109      if (mode == CCFPmode || mode == CCFPEmode)
6110	code = reverse_condition_maybe_unordered (code);
6111      else
6112	code = reverse_condition (code);
6113    }
6114
6115  /* Start by writing the branch condition.  */
6116  if (mode == CCFPmode || mode == CCFPEmode)
6117    {
6118      switch (code)
6119	{
6120	case NE:
6121	  branch = "fbne";
6122	  break;
6123	case EQ:
6124	  branch = "fbe";
6125	  break;
6126	case GE:
6127	  branch = "fbge";
6128	  break;
6129	case GT:
6130	  branch = "fbg";
6131	  break;
6132	case LE:
6133	  branch = "fble";
6134	  break;
6135	case LT:
6136	  branch = "fbl";
6137	  break;
6138	case UNORDERED:
6139	  branch = "fbu";
6140	  break;
6141	case ORDERED:
6142	  branch = "fbo";
6143	  break;
6144	case UNGT:
6145	  branch = "fbug";
6146	  break;
6147	case UNLT:
6148	  branch = "fbul";
6149	  break;
6150	case UNEQ:
6151	  branch = "fbue";
6152	  break;
6153	case UNGE:
6154	  branch = "fbuge";
6155	  break;
6156	case UNLE:
6157	  branch = "fbule";
6158	  break;
6159	case LTGT:
6160	  branch = "fblg";
6161	  break;
6162
6163	default:
6164	  gcc_unreachable ();
6165	}
6166
6167      /* ??? !v9: FP branches cannot be preceded by another floating point
6168	 insn.  Because there is currently no concept of pre-delay slots,
6169	 we can fix this only by always emitting a nop before a floating
6170	 point branch.  */
6171
6172      string[0] = '\0';
6173      if (! TARGET_V9)
6174	strcpy (string, "nop\n\t");
6175      strcat (string, branch);
6176    }
6177  else
6178    {
6179      switch (code)
6180	{
6181	case NE:
6182	  branch = "bne";
6183	  break;
6184	case EQ:
6185	  branch = "be";
6186	  break;
6187	case GE:
6188	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6189	    branch = "bpos";
6190	  else
6191	    branch = "bge";
6192	  break;
6193	case GT:
6194	  branch = "bg";
6195	  break;
6196	case LE:
6197	  branch = "ble";
6198	  break;
6199	case LT:
6200	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6201	    branch = "bneg";
6202	  else
6203	    branch = "bl";
6204	  break;
6205	case GEU:
6206	  branch = "bgeu";
6207	  break;
6208	case GTU:
6209	  branch = "bgu";
6210	  break;
6211	case LEU:
6212	  branch = "bleu";
6213	  break;
6214	case LTU:
6215	  branch = "blu";
6216	  break;
6217
6218	default:
6219	  gcc_unreachable ();
6220	}
6221      strcpy (string, branch);
6222    }
6223  spaces -= strlen (branch);
6224  p = strchr (string, '\0');
6225
6226  /* Now add the annulling, the label, and a possible noop.  */
6227  if (annul && ! far)
6228    {
6229      strcpy (p, ",a");
6230      p += 2;
6231      spaces -= 2;
6232    }
6233
6234  if (TARGET_V9)
6235    {
6236      rtx note;
6237      int v8 = 0;
6238
6239      if (! far && insn && INSN_ADDRESSES_SET_P ())
6240	{
6241	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
6242		       - INSN_ADDRESSES (INSN_UID (insn)));
6243	  /* Leave some instructions for "slop".  */
6244	  if (delta < -260000 || delta >= 260000)
6245	    v8 = 1;
6246	}
6247
6248      if (mode == CCFPmode || mode == CCFPEmode)
6249	{
6250	  static char v9_fcc_labelno[] = "%%fccX, ";
6251	  /* Set the char indicating the number of the fcc reg to use.  */
6252	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6253	  labelno = v9_fcc_labelno;
6254	  if (v8)
6255	    {
6256	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6257	      labelno = "";
6258	    }
6259	}
6260      else if (mode == CCXmode || mode == CCX_NOOVmode)
6261	{
6262	  labelno = "%%xcc, ";
6263	  gcc_assert (! v8);
6264	}
6265      else
6266	{
6267	  labelno = "%%icc, ";
6268	  if (v8)
6269	    labelno = "";
6270	}
6271
6272      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6273	{
6274	  strcpy (p,
6275		  ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6276		  ? ",pt" : ",pn");
6277	  p += 3;
6278	  spaces -= 3;
6279	}
6280    }
6281  else
6282    labelno = "";
6283
6284  if (spaces > 0)
6285    *p++ = '\t';
6286  else
6287    *p++ = ' ';
6288  strcpy (p, labelno);
6289  p = strchr (p, '\0');
6290  if (far)
6291    {
6292      strcpy (p, ".+12\n\t nop\n\tb\t");
6293      /* Skip the next insn if requested or
6294	 if we know that it will be a nop.  */
6295      if (annul || ! final_sequence)
6296        p[3] = '6';
6297      p += 14;
6298    }
6299  *p++ = '%';
6300  *p++ = 'l';
6301  *p++ = label + '0';
6302  *p++ = '%';
6303  *p++ = '#';
6304  *p = '\0';
6305
6306  return string;
6307}
6308
6309/* Emit a library call comparison between floating point X and Y.
6310   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6311   Return the new operator to be used in the comparison sequence.
6312
6313   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6314   values as arguments instead of the TFmode registers themselves,
6315   that's why we cannot call emit_float_lib_cmp.  */
6316
6317rtx
6318sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6319{
6320  const char *qpfunc;
6321  rtx slot0, slot1, result, tem, tem2, libfunc;
6322  enum machine_mode mode;
6323  enum rtx_code new_comparison;
6324
6325  switch (comparison)
6326    {
6327    case EQ:
6328      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6329      break;
6330
6331    case NE:
6332      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6333      break;
6334
6335    case GT:
6336      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6337      break;
6338
6339    case GE:
6340      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6341      break;
6342
6343    case LT:
6344      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6345      break;
6346
6347    case LE:
6348      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6349      break;
6350
6351    case ORDERED:
6352    case UNORDERED:
6353    case UNGT:
6354    case UNLT:
6355    case UNEQ:
6356    case UNGE:
6357    case UNLE:
6358    case LTGT:
6359      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6360      break;
6361
6362    default:
6363      gcc_unreachable ();
6364    }
6365
6366  if (TARGET_ARCH64)
6367    {
6368      if (MEM_P (x))
6369	slot0 = x;
6370      else
6371	{
6372	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6373	  emit_move_insn (slot0, x);
6374	}
6375
6376      if (MEM_P (y))
6377	slot1 = y;
6378      else
6379	{
6380	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6381	  emit_move_insn (slot1, y);
6382	}
6383
6384      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6385      emit_library_call (libfunc, LCT_NORMAL,
6386			 DImode, 2,
6387			 XEXP (slot0, 0), Pmode,
6388			 XEXP (slot1, 0), Pmode);
6389      mode = DImode;
6390    }
6391  else
6392    {
6393      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6394      emit_library_call (libfunc, LCT_NORMAL,
6395			 SImode, 2,
6396			 x, TFmode, y, TFmode);
6397      mode = SImode;
6398    }
6399
6400
6401  /* Immediately move the result of the libcall into a pseudo
6402     register so reload doesn't clobber the value if it needs
6403     the return register for a spill reg.  */
6404  result = gen_reg_rtx (mode);
6405  emit_move_insn (result, hard_libcall_value (mode, libfunc));
6406
6407  switch (comparison)
6408    {
6409    default:
6410      return gen_rtx_NE (VOIDmode, result, const0_rtx);
6411    case ORDERED:
6412    case UNORDERED:
6413      new_comparison = (comparison == UNORDERED ? EQ : NE);
6414      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6415    case UNGT:
6416    case UNGE:
6417      new_comparison = (comparison == UNGT ? GT : NE);
6418      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6419    case UNLE:
6420      return gen_rtx_NE (VOIDmode, result, const2_rtx);
6421    case UNLT:
6422      tem = gen_reg_rtx (mode);
6423      if (TARGET_ARCH32)
6424	emit_insn (gen_andsi3 (tem, result, const1_rtx));
6425      else
6426	emit_insn (gen_anddi3 (tem, result, const1_rtx));
6427      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6428    case UNEQ:
6429    case LTGT:
6430      tem = gen_reg_rtx (mode);
6431      if (TARGET_ARCH32)
6432	emit_insn (gen_addsi3 (tem, result, const1_rtx));
6433      else
6434	emit_insn (gen_adddi3 (tem, result, const1_rtx));
6435      tem2 = gen_reg_rtx (mode);
6436      if (TARGET_ARCH32)
6437	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6438      else
6439	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6440      new_comparison = (comparison == UNEQ ? EQ : NE);
6441      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6442    }
6443
6444  gcc_unreachable ();
6445}
6446
6447/* Generate an unsigned DImode to FP conversion.  This is the same code
6448   optabs would emit if we didn't have TFmode patterns.  */
6449
6450void
6451sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6452{
6453  rtx neglab, donelab, i0, i1, f0, in, out;
6454
6455  out = operands[0];
6456  in = force_reg (DImode, operands[1]);
6457  neglab = gen_label_rtx ();
6458  donelab = gen_label_rtx ();
6459  i0 = gen_reg_rtx (DImode);
6460  i1 = gen_reg_rtx (DImode);
6461  f0 = gen_reg_rtx (mode);
6462
6463  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6464
6465  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6466  emit_jump_insn (gen_jump (donelab));
6467  emit_barrier ();
6468
6469  emit_label (neglab);
6470
6471  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6472  emit_insn (gen_anddi3 (i1, in, const1_rtx));
6473  emit_insn (gen_iordi3 (i0, i0, i1));
6474  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6475  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6476
6477  emit_label (donelab);
6478}
6479
6480/* Generate an FP to unsigned DImode conversion.  This is the same code
6481   optabs would emit if we didn't have TFmode patterns.  */
6482
6483void
6484sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6485{
6486  rtx neglab, donelab, i0, i1, f0, in, out, limit;
6487
6488  out = operands[0];
6489  in = force_reg (mode, operands[1]);
6490  neglab = gen_label_rtx ();
6491  donelab = gen_label_rtx ();
6492  i0 = gen_reg_rtx (DImode);
6493  i1 = gen_reg_rtx (DImode);
6494  limit = gen_reg_rtx (mode);
6495  f0 = gen_reg_rtx (mode);
6496
6497  emit_move_insn (limit,
6498		  CONST_DOUBLE_FROM_REAL_VALUE (
6499		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6500  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6501
6502  emit_insn (gen_rtx_SET (VOIDmode,
6503			  out,
6504			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6505  emit_jump_insn (gen_jump (donelab));
6506  emit_barrier ();
6507
6508  emit_label (neglab);
6509
6510  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6511  emit_insn (gen_rtx_SET (VOIDmode,
6512			  i0,
6513			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6514  emit_insn (gen_movdi (i1, const1_rtx));
6515  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6516  emit_insn (gen_xordi3 (out, i0, i1));
6517
6518  emit_label (donelab);
6519}
6520
6521/* Return the string to output a conditional branch to LABEL, testing
6522   register REG.  LABEL is the operand number of the label; REG is the
6523   operand number of the reg.  OP is the conditional expression.  The mode
6524   of REG says what kind of comparison we made.
6525
6526   DEST is the destination insn (i.e. the label), INSN is the source.
6527
6528   REVERSED is nonzero if we should reverse the sense of the comparison.
6529
6530   ANNUL is nonzero if we should generate an annulling branch.  */
6531
6532const char *
6533output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6534		 int annul, rtx insn)
6535{
6536  static char string[64];
6537  enum rtx_code code = GET_CODE (op);
6538  enum machine_mode mode = GET_MODE (XEXP (op, 0));
6539  rtx note;
6540  int far;
6541  char *p;
6542
6543  /* branch on register are limited to +-128KB.  If it is too far away,
6544     change
6545
6546     brnz,pt %g1, .LC30
6547
6548     to
6549
6550     brz,pn %g1, .+12
6551      nop
6552     ba,pt %xcc, .LC30
6553
6554     and
6555
6556     brgez,a,pn %o1, .LC29
6557
6558     to
6559
6560     brlz,pt %o1, .+16
6561      nop
6562     ba,pt %xcc, .LC29  */
6563
6564  far = get_attr_length (insn) >= 3;
6565
6566  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
6567  if (reversed ^ far)
6568    code = reverse_condition (code);
6569
6570  /* Only 64 bit versions of these instructions exist.  */
6571  gcc_assert (mode == DImode);
6572
6573  /* Start by writing the branch condition.  */
6574
6575  switch (code)
6576    {
6577    case NE:
6578      strcpy (string, "brnz");
6579      break;
6580
6581    case EQ:
6582      strcpy (string, "brz");
6583      break;
6584
6585    case GE:
6586      strcpy (string, "brgez");
6587      break;
6588
6589    case LT:
6590      strcpy (string, "brlz");
6591      break;
6592
6593    case LE:
6594      strcpy (string, "brlez");
6595      break;
6596
6597    case GT:
6598      strcpy (string, "brgz");
6599      break;
6600
6601    default:
6602      gcc_unreachable ();
6603    }
6604
6605  p = strchr (string, '\0');
6606
6607  /* Now add the annulling, reg, label, and nop.  */
6608  if (annul && ! far)
6609    {
6610      strcpy (p, ",a");
6611      p += 2;
6612    }
6613
6614  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6615    {
6616      strcpy (p,
6617	      ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6618	      ? ",pt" : ",pn");
6619      p += 3;
6620    }
6621
6622  *p = p < string + 8 ? '\t' : ' ';
6623  p++;
6624  *p++ = '%';
6625  *p++ = '0' + reg;
6626  *p++ = ',';
6627  *p++ = ' ';
6628  if (far)
6629    {
6630      int veryfar = 1, delta;
6631
6632      if (INSN_ADDRESSES_SET_P ())
6633	{
6634	  delta = (INSN_ADDRESSES (INSN_UID (dest))
6635		   - INSN_ADDRESSES (INSN_UID (insn)));
6636	  /* Leave some instructions for "slop".  */
6637	  if (delta >= -260000 && delta < 260000)
6638	    veryfar = 0;
6639	}
6640
6641      strcpy (p, ".+12\n\t nop\n\t");
6642      /* Skip the next insn if requested or
6643	 if we know that it will be a nop.  */
6644      if (annul || ! final_sequence)
6645        p[3] = '6';
6646      p += 12;
6647      if (veryfar)
6648	{
6649	  strcpy (p, "b\t");
6650	  p += 2;
6651	}
6652      else
6653	{
6654	  strcpy (p, "ba,pt\t%%xcc, ");
6655	  p += 13;
6656	}
6657    }
6658  *p++ = '%';
6659  *p++ = 'l';
6660  *p++ = '0' + label;
6661  *p++ = '%';
6662  *p++ = '#';
6663  *p = '\0';
6664
6665  return string;
6666}
6667
6668/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6669   Such instructions cannot be used in the delay slot of return insn on v9.
6670   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6671 */
6672
6673static int
6674epilogue_renumber (register rtx *where, int test)
6675{
6676  register const char *fmt;
6677  register int i;
6678  register enum rtx_code code;
6679
6680  if (*where == 0)
6681    return 0;
6682
6683  code = GET_CODE (*where);
6684
6685  switch (code)
6686    {
6687    case REG:
6688      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
6689	return 1;
6690      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6691	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6692    case SCRATCH:
6693    case CC0:
6694    case PC:
6695    case CONST_INT:
6696    case CONST_DOUBLE:
6697      return 0;
6698
6699      /* Do not replace the frame pointer with the stack pointer because
6700	 it can cause the delayed instruction to load below the stack.
6701	 This occurs when instructions like:
6702
6703	 (set (reg/i:SI 24 %i0)
6704	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6705                       (const_int -20 [0xffffffec])) 0))
6706
6707	 are in the return delayed slot.  */
6708    case PLUS:
6709      if (GET_CODE (XEXP (*where, 0)) == REG
6710	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6711	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6712	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6713	return 1;
6714      break;
6715
6716    case MEM:
6717      if (SPARC_STACK_BIAS
6718	  && GET_CODE (XEXP (*where, 0)) == REG
6719	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6720	return 1;
6721      break;
6722
6723    default:
6724      break;
6725    }
6726
6727  fmt = GET_RTX_FORMAT (code);
6728
6729  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6730    {
6731      if (fmt[i] == 'E')
6732	{
6733	  register int j;
6734	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6735	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6736	      return 1;
6737	}
6738      else if (fmt[i] == 'e'
6739	       && epilogue_renumber (&(XEXP (*where, i)), test))
6740	return 1;
6741    }
6742  return 0;
6743}
6744
6745/* Leaf functions and non-leaf functions have different needs.  */
6746
6747static const int
6748reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6749
6750static const int
6751reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6752
6753static const int *const reg_alloc_orders[] = {
6754  reg_leaf_alloc_order,
6755  reg_nonleaf_alloc_order};
6756
6757void
6758order_regs_for_local_alloc (void)
6759{
6760  static int last_order_nonleaf = 1;
6761
6762  if (df_regs_ever_live_p (15) != last_order_nonleaf)
6763    {
6764      last_order_nonleaf = !last_order_nonleaf;
6765      memcpy ((char *) reg_alloc_order,
6766	      (const char *) reg_alloc_orders[last_order_nonleaf],
6767	      FIRST_PSEUDO_REGISTER * sizeof (int));
6768    }
6769}
6770
6771/* Return 1 if REG and MEM are legitimate enough to allow the various
6772   mem<-->reg splits to be run.  */
6773
6774int
6775sparc_splitdi_legitimate (rtx reg, rtx mem)
6776{
6777  /* Punt if we are here by mistake.  */
6778  gcc_assert (reload_completed);
6779
6780  /* We must have an offsettable memory reference.  */
6781  if (! offsettable_memref_p (mem))
6782    return 0;
6783
6784  /* If we have legitimate args for ldd/std, we do not want
6785     the split to happen.  */
6786  if ((REGNO (reg) % 2) == 0
6787      && mem_min_alignment (mem, 8))
6788    return 0;
6789
6790  /* Success.  */
6791  return 1;
6792}
6793
6794/* Return 1 if x and y are some kind of REG and they refer to
6795   different hard registers.  This test is guaranteed to be
6796   run after reload.  */
6797
6798int
6799sparc_absnegfloat_split_legitimate (rtx x, rtx y)
6800{
6801  if (GET_CODE (x) != REG)
6802    return 0;
6803  if (GET_CODE (y) != REG)
6804    return 0;
6805  if (REGNO (x) == REGNO (y))
6806    return 0;
6807  return 1;
6808}
6809
6810/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6811   This makes them candidates for using ldd and std insns.
6812
6813   Note reg1 and reg2 *must* be hard registers.  */
6814
6815int
6816registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
6817{
6818  /* We might have been passed a SUBREG.  */
6819  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6820    return 0;
6821
6822  if (REGNO (reg1) % 2 != 0)
6823    return 0;
6824
6825  /* Integer ldd is deprecated in SPARC V9 */
6826  if (TARGET_V9 && REGNO (reg1) < 32)
6827    return 0;
6828
6829  return (REGNO (reg1) == REGNO (reg2) - 1);
6830}
6831
6832/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6833   an ldd or std insn.
6834
6835   This can only happen when addr1 and addr2, the addresses in mem1
6836   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6837   addr1 must also be aligned on a 64-bit boundary.
6838
6839   Also iff dependent_reg_rtx is not null it should not be used to
6840   compute the address for mem1, i.e. we cannot optimize a sequence
6841   like:
6842   	ld [%o0], %o0
6843	ld [%o0 + 4], %o1
6844   to
6845   	ldd [%o0], %o0
6846   nor:
6847	ld [%g3 + 4], %g3
6848	ld [%g3], %g2
6849   to
6850        ldd [%g3], %g2
6851
6852   But, note that the transformation from:
6853	ld [%g2 + 4], %g3
6854        ld [%g2], %g2
6855   to
6856	ldd [%g2], %g2
6857   is perfectly fine.  Thus, the peephole2 patterns always pass us
6858   the destination register of the first load, never the second one.
6859
6860   For stores we don't have a similar problem, so dependent_reg_rtx is
6861   NULL_RTX.  */
6862
6863int
6864mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
6865{
6866  rtx addr1, addr2;
6867  unsigned int reg1;
6868  HOST_WIDE_INT offset1;
6869
6870  /* The mems cannot be volatile.  */
6871  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6872    return 0;
6873
6874  /* MEM1 should be aligned on a 64-bit boundary.  */
6875  if (MEM_ALIGN (mem1) < 64)
6876    return 0;
6877
6878  addr1 = XEXP (mem1, 0);
6879  addr2 = XEXP (mem2, 0);
6880
6881  /* Extract a register number and offset (if used) from the first addr.  */
6882  if (GET_CODE (addr1) == PLUS)
6883    {
6884      /* If not a REG, return zero.  */
6885      if (GET_CODE (XEXP (addr1, 0)) != REG)
6886	return 0;
6887      else
6888	{
6889          reg1 = REGNO (XEXP (addr1, 0));
6890	  /* The offset must be constant!  */
6891	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6892            return 0;
6893          offset1 = INTVAL (XEXP (addr1, 1));
6894	}
6895    }
6896  else if (GET_CODE (addr1) != REG)
6897    return 0;
6898  else
6899    {
6900      reg1 = REGNO (addr1);
6901      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
6902      offset1 = 0;
6903    }
6904
6905  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
6906  if (GET_CODE (addr2) != PLUS)
6907    return 0;
6908
6909  if (GET_CODE (XEXP (addr2, 0)) != REG
6910      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6911    return 0;
6912
6913  if (reg1 != REGNO (XEXP (addr2, 0)))
6914    return 0;
6915
6916  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6917    return 0;
6918
6919  /* The first offset must be evenly divisible by 8 to ensure the
6920     address is 64 bit aligned.  */
6921  if (offset1 % 8 != 0)
6922    return 0;
6923
6924  /* The offset for the second addr must be 4 more than the first addr.  */
6925  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6926    return 0;
6927
6928  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
6929     instructions.  */
6930  return 1;
6931}
6932
6933/* Return 1 if reg is a pseudo, or is the first register in
6934   a hard register pair.  This makes it suitable for use in
6935   ldd and std insns.  */
6936
6937int
6938register_ok_for_ldd (rtx reg)
6939{
6940  /* We might have been passed a SUBREG.  */
6941  if (!REG_P (reg))
6942    return 0;
6943
6944  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6945    return (REGNO (reg) % 2 == 0);
6946
6947  return 1;
6948}
6949
6950/* Return 1 if OP is a memory whose address is known to be
6951   aligned to 8-byte boundary, or a pseudo during reload.
6952   This makes it suitable for use in ldd and std insns.  */
6953
6954int
6955memory_ok_for_ldd (rtx op)
6956{
6957  if (MEM_P (op))
6958    {
6959      /* In 64-bit mode, we assume that the address is word-aligned.  */
6960      if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
6961	return 0;
6962
6963      if ((reload_in_progress || reload_completed)
6964	  && !strict_memory_address_p (Pmode, XEXP (op, 0)))
6965	return 0;
6966    }
6967  else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
6968    {
6969      if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
6970	return 0;
6971    }
6972  else
6973    return 0;
6974
6975  return 1;
6976}
6977
6978/* Print operand X (an rtx) in assembler syntax to file FILE.
6979   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6980   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
6981
6982void
6983print_operand (FILE *file, rtx x, int code)
6984{
6985  switch (code)
6986    {
6987    case '#':
6988      /* Output an insn in a delay slot.  */
6989      if (final_sequence)
6990        sparc_indent_opcode = 1;
6991      else
6992	fputs ("\n\t nop", file);
6993      return;
6994    case '*':
6995      /* Output an annul flag if there's nothing for the delay slot and we
6996	 are optimizing.  This is always used with '(' below.
6997         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6998	 this is a dbx bug.  So, we only do this when optimizing.
6999         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
7000	 Always emit a nop in case the next instruction is a branch.  */
7001      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
7002	fputs (",a", file);
7003      return;
7004    case '(':
7005      /* Output a 'nop' if there's nothing for the delay slot and we are
7006	 not optimizing.  This is always used with '*' above.  */
7007      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
7008	fputs ("\n\t nop", file);
7009      else if (final_sequence)
7010        sparc_indent_opcode = 1;
7011      return;
7012    case ')':
7013      /* Output the right displacement from the saved PC on function return.
7014	 The caller may have placed an "unimp" insn immediately after the call
7015	 so we have to account for it.  This insn is used in the 32-bit ABI
7016	 when calling a function that returns a non zero-sized structure.  The
7017	 64-bit ABI doesn't have it.  Be careful to have this test be the same
7018	 as that for the call.  The exception is when sparc_std_struct_return
7019	 is enabled, the psABI is followed exactly and the adjustment is made
7020	 by the code in sparc_struct_value_rtx.  The call emitted is the same
7021	 when sparc_std_struct_return is enabled. */
7022     if (!TARGET_ARCH64
7023	 && cfun->returns_struct
7024	 && !sparc_std_struct_return
7025	 && DECL_SIZE (DECL_RESULT (current_function_decl))
7026	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
7027	     == INTEGER_CST
7028	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
7029	fputs ("12", file);
7030      else
7031        fputc ('8', file);
7032      return;
7033    case '_':
7034      /* Output the Embedded Medium/Anywhere code model base register.  */
7035      fputs (EMBMEDANY_BASE_REG, file);
7036      return;
7037    case '&':
7038      /* Print some local dynamic TLS name.  */
7039      assemble_name (file, get_some_local_dynamic_name ());
7040      return;
7041
7042    case 'Y':
7043      /* Adjust the operand to take into account a RESTORE operation.  */
7044      if (GET_CODE (x) == CONST_INT)
7045	break;
7046      else if (GET_CODE (x) != REG)
7047	output_operand_lossage ("invalid %%Y operand");
7048      else if (REGNO (x) < 8)
7049	fputs (reg_names[REGNO (x)], file);
7050      else if (REGNO (x) >= 24 && REGNO (x) < 32)
7051	fputs (reg_names[REGNO (x)-16], file);
7052      else
7053	output_operand_lossage ("invalid %%Y operand");
7054      return;
7055    case 'L':
7056      /* Print out the low order register name of a register pair.  */
7057      if (WORDS_BIG_ENDIAN)
7058	fputs (reg_names[REGNO (x)+1], file);
7059      else
7060	fputs (reg_names[REGNO (x)], file);
7061      return;
7062    case 'H':
7063      /* Print out the high order register name of a register pair.  */
7064      if (WORDS_BIG_ENDIAN)
7065	fputs (reg_names[REGNO (x)], file);
7066      else
7067	fputs (reg_names[REGNO (x)+1], file);
7068      return;
7069    case 'R':
7070      /* Print out the second register name of a register pair or quad.
7071	 I.e., R (%o0) => %o1.  */
7072      fputs (reg_names[REGNO (x)+1], file);
7073      return;
7074    case 'S':
7075      /* Print out the third register name of a register quad.
7076	 I.e., S (%o0) => %o2.  */
7077      fputs (reg_names[REGNO (x)+2], file);
7078      return;
7079    case 'T':
7080      /* Print out the fourth register name of a register quad.
7081	 I.e., T (%o0) => %o3.  */
7082      fputs (reg_names[REGNO (x)+3], file);
7083      return;
7084    case 'x':
7085      /* Print a condition code register.  */
7086      if (REGNO (x) == SPARC_ICC_REG)
7087	{
7088	  /* We don't handle CC[X]_NOOVmode because they're not supposed
7089	     to occur here.  */
7090	  if (GET_MODE (x) == CCmode)
7091	    fputs ("%icc", file);
7092	  else if (GET_MODE (x) == CCXmode)
7093	    fputs ("%xcc", file);
7094	  else
7095	    gcc_unreachable ();
7096	}
7097      else
7098	/* %fccN register */
7099	fputs (reg_names[REGNO (x)], file);
7100      return;
7101    case 'm':
7102      /* Print the operand's address only.  */
7103      output_address (XEXP (x, 0));
7104      return;
7105    case 'r':
7106      /* In this case we need a register.  Use %g0 if the
7107	 operand is const0_rtx.  */
7108      if (x == const0_rtx
7109	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7110	{
7111	  fputs ("%g0", file);
7112	  return;
7113	}
7114      else
7115	break;
7116
7117    case 'A':
7118      switch (GET_CODE (x))
7119	{
7120	case IOR: fputs ("or", file); break;
7121	case AND: fputs ("and", file); break;
7122	case XOR: fputs ("xor", file); break;
7123	default: output_operand_lossage ("invalid %%A operand");
7124	}
7125      return;
7126
7127    case 'B':
7128      switch (GET_CODE (x))
7129	{
7130	case IOR: fputs ("orn", file); break;
7131	case AND: fputs ("andn", file); break;
7132	case XOR: fputs ("xnor", file); break;
7133	default: output_operand_lossage ("invalid %%B operand");
7134	}
7135      return;
7136
7137      /* These are used by the conditional move instructions.  */
7138    case 'c' :
7139    case 'C':
7140      {
7141	enum rtx_code rc = GET_CODE (x);
7142
7143	if (code == 'c')
7144	  {
7145	    enum machine_mode mode = GET_MODE (XEXP (x, 0));
7146	    if (mode == CCFPmode || mode == CCFPEmode)
7147	      rc = reverse_condition_maybe_unordered (GET_CODE (x));
7148	    else
7149	      rc = reverse_condition (GET_CODE (x));
7150	  }
7151	switch (rc)
7152	  {
7153	  case NE: fputs ("ne", file); break;
7154	  case EQ: fputs ("e", file); break;
7155	  case GE: fputs ("ge", file); break;
7156	  case GT: fputs ("g", file); break;
7157	  case LE: fputs ("le", file); break;
7158	  case LT: fputs ("l", file); break;
7159	  case GEU: fputs ("geu", file); break;
7160	  case GTU: fputs ("gu", file); break;
7161	  case LEU: fputs ("leu", file); break;
7162	  case LTU: fputs ("lu", file); break;
7163	  case LTGT: fputs ("lg", file); break;
7164	  case UNORDERED: fputs ("u", file); break;
7165	  case ORDERED: fputs ("o", file); break;
7166	  case UNLT: fputs ("ul", file); break;
7167	  case UNLE: fputs ("ule", file); break;
7168	  case UNGT: fputs ("ug", file); break;
7169	  case UNGE: fputs ("uge", file); break;
7170	  case UNEQ: fputs ("ue", file); break;
7171	  default: output_operand_lossage (code == 'c'
7172					   ? "invalid %%c operand"
7173					   : "invalid %%C operand");
7174	  }
7175	return;
7176      }
7177
7178      /* These are used by the movr instruction pattern.  */
7179    case 'd':
7180    case 'D':
7181      {
7182	enum rtx_code rc = (code == 'd'
7183			    ? reverse_condition (GET_CODE (x))
7184			    : GET_CODE (x));
7185	switch (rc)
7186	  {
7187	  case NE: fputs ("ne", file); break;
7188	  case EQ: fputs ("e", file); break;
7189	  case GE: fputs ("gez", file); break;
7190	  case LT: fputs ("lz", file); break;
7191	  case LE: fputs ("lez", file); break;
7192	  case GT: fputs ("gz", file); break;
7193	  default: output_operand_lossage (code == 'd'
7194					   ? "invalid %%d operand"
7195					   : "invalid %%D operand");
7196	  }
7197	return;
7198      }
7199
7200    case 'b':
7201      {
7202	/* Print a sign-extended character.  */
7203	int i = trunc_int_for_mode (INTVAL (x), QImode);
7204	fprintf (file, "%d", i);
7205	return;
7206      }
7207
7208    case 'f':
7209      /* Operand must be a MEM; write its address.  */
7210      if (GET_CODE (x) != MEM)
7211	output_operand_lossage ("invalid %%f operand");
7212      output_address (XEXP (x, 0));
7213      return;
7214
7215    case 's':
7216      {
7217	/* Print a sign-extended 32-bit value.  */
7218	HOST_WIDE_INT i;
7219	if (GET_CODE(x) == CONST_INT)
7220	  i = INTVAL (x);
7221	else if (GET_CODE(x) == CONST_DOUBLE)
7222	  i = CONST_DOUBLE_LOW (x);
7223	else
7224	  {
7225	    output_operand_lossage ("invalid %%s operand");
7226	    return;
7227	  }
7228	i = trunc_int_for_mode (i, SImode);
7229	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7230	return;
7231      }
7232
7233    case 0:
7234      /* Do nothing special.  */
7235      break;
7236
7237    default:
7238      /* Undocumented flag.  */
7239      output_operand_lossage ("invalid operand output code");
7240    }
7241
7242  if (GET_CODE (x) == REG)
7243    fputs (reg_names[REGNO (x)], file);
7244  else if (GET_CODE (x) == MEM)
7245    {
7246      fputc ('[', file);
7247	/* Poor Sun assembler doesn't understand absolute addressing.  */
7248      if (CONSTANT_P (XEXP (x, 0)))
7249	fputs ("%g0+", file);
7250      output_address (XEXP (x, 0));
7251      fputc (']', file);
7252    }
7253  else if (GET_CODE (x) == HIGH)
7254    {
7255      fputs ("%hi(", file);
7256      output_addr_const (file, XEXP (x, 0));
7257      fputc (')', file);
7258    }
7259  else if (GET_CODE (x) == LO_SUM)
7260    {
7261      print_operand (file, XEXP (x, 0), 0);
7262      if (TARGET_CM_MEDMID)
7263	fputs ("+%l44(", file);
7264      else
7265	fputs ("+%lo(", file);
7266      output_addr_const (file, XEXP (x, 1));
7267      fputc (')', file);
7268    }
7269  else if (GET_CODE (x) == CONST_DOUBLE
7270	   && (GET_MODE (x) == VOIDmode
7271	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7272    {
7273      if (CONST_DOUBLE_HIGH (x) == 0)
7274	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7275      else if (CONST_DOUBLE_HIGH (x) == -1
7276	       && CONST_DOUBLE_LOW (x) < 0)
7277	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7278      else
7279	output_operand_lossage ("long long constant not a valid immediate operand");
7280    }
7281  else if (GET_CODE (x) == CONST_DOUBLE)
7282    output_operand_lossage ("floating point constant not a valid immediate operand");
7283  else { output_addr_const (file, x); }
7284}
7285
7286/* Target hook for assembling integer objects.  The sparc version has
7287   special handling for aligned DI-mode objects.  */
7288
7289static bool
7290sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7291{
7292  /* ??? We only output .xword's for symbols and only then in environments
7293     where the assembler can handle them.  */
7294  if (aligned_p && size == 8
7295      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7296    {
7297      if (TARGET_V9)
7298	{
7299	  assemble_integer_with_op ("\t.xword\t", x);
7300	  return true;
7301	}
7302      else
7303	{
7304	  assemble_aligned_integer (4, const0_rtx);
7305	  assemble_aligned_integer (4, x);
7306	  return true;
7307	}
7308    }
7309  return default_assemble_integer (x, size, aligned_p);
7310}
7311
7312/* Return the value of a code used in the .proc pseudo-op that says
7313   what kind of result this function returns.  For non-C types, we pick
7314   the closest C type.  */
7315
7316#ifndef SHORT_TYPE_SIZE
7317#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7318#endif
7319
7320#ifndef INT_TYPE_SIZE
7321#define INT_TYPE_SIZE BITS_PER_WORD
7322#endif
7323
7324#ifndef LONG_TYPE_SIZE
7325#define LONG_TYPE_SIZE BITS_PER_WORD
7326#endif
7327
7328#ifndef LONG_LONG_TYPE_SIZE
7329#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7330#endif
7331
7332#ifndef FLOAT_TYPE_SIZE
7333#define FLOAT_TYPE_SIZE BITS_PER_WORD
7334#endif
7335
7336#ifndef DOUBLE_TYPE_SIZE
7337#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7338#endif
7339
7340#ifndef LONG_DOUBLE_TYPE_SIZE
7341#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7342#endif
7343
7344unsigned long
7345sparc_type_code (register tree type)
7346{
7347  register unsigned long qualifiers = 0;
7348  register unsigned shift;
7349
7350  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
7351     setting more, since some assemblers will give an error for this.  Also,
7352     we must be careful to avoid shifts of 32 bits or more to avoid getting
7353     unpredictable results.  */
7354
7355  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7356    {
7357      switch (TREE_CODE (type))
7358	{
7359	case ERROR_MARK:
7360	  return qualifiers;
7361
7362	case ARRAY_TYPE:
7363	  qualifiers |= (3 << shift);
7364	  break;
7365
7366	case FUNCTION_TYPE:
7367	case METHOD_TYPE:
7368	  qualifiers |= (2 << shift);
7369	  break;
7370
7371	case POINTER_TYPE:
7372	case REFERENCE_TYPE:
7373	case OFFSET_TYPE:
7374	  qualifiers |= (1 << shift);
7375	  break;
7376
7377	case RECORD_TYPE:
7378	  return (qualifiers | 8);
7379
7380	case UNION_TYPE:
7381	case QUAL_UNION_TYPE:
7382	  return (qualifiers | 9);
7383
7384	case ENUMERAL_TYPE:
7385	  return (qualifiers | 10);
7386
7387	case VOID_TYPE:
7388	  return (qualifiers | 16);
7389
7390	case INTEGER_TYPE:
7391	  /* If this is a range type, consider it to be the underlying
7392	     type.  */
7393	  if (TREE_TYPE (type) != 0)
7394	    break;
7395
7396	  /* Carefully distinguish all the standard types of C,
7397	     without messing up if the language is not C.  We do this by
7398	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
7399	     look at both the names and the above fields, but that's redundant.
7400	     Any type whose size is between two C types will be considered
7401	     to be the wider of the two types.  Also, we do not have a
7402	     special code to use for "long long", so anything wider than
7403	     long is treated the same.  Note that we can't distinguish
7404	     between "int" and "long" in this code if they are the same
7405	     size, but that's fine, since neither can the assembler.  */
7406
7407	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7408	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7409
7410	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7411	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7412
7413	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7414	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7415
7416	  else
7417	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7418
7419	case REAL_TYPE:
7420	  /* If this is a range type, consider it to be the underlying
7421	     type.  */
7422	  if (TREE_TYPE (type) != 0)
7423	    break;
7424
7425	  /* Carefully distinguish all the standard types of C,
7426	     without messing up if the language is not C.  */
7427
7428	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7429	    return (qualifiers | 6);
7430
7431	  else
7432	    return (qualifiers | 7);
7433
7434	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
7435	  /* ??? We need to distinguish between double and float complex types,
7436	     but I don't know how yet because I can't reach this code from
7437	     existing front-ends.  */
7438	  return (qualifiers | 7);	/* Who knows? */
7439
7440	case VECTOR_TYPE:
7441	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
7442	case LANG_TYPE:		/* ? */
7443	  return qualifiers;
7444
7445	default:
7446	  gcc_unreachable ();		/* Not a type! */
7447        }
7448    }
7449
7450  return qualifiers;
7451}
7452
7453/* Nested function support.  */
7454
7455/* Emit RTL insns to initialize the variable parts of a trampoline.
7456   FNADDR is an RTX for the address of the function's pure code.
7457   CXT is an RTX for the static chain value for the function.
7458
7459   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7460   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7461   (to store insns).  This is a bit excessive.  Perhaps a different
7462   mechanism would be better here.
7463
7464   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
7465
7466static void
7467sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7468{
7469  /* SPARC 32-bit trampoline:
7470
7471 	sethi	%hi(fn), %g1
7472 	sethi	%hi(static), %g2
7473 	jmp	%g1+%lo(fn)
7474 	or	%g2, %lo(static), %g2
7475
7476    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7477    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7478   */
7479
7480  emit_move_insn
7481    (adjust_address (m_tramp, SImode, 0),
7482     expand_binop (SImode, ior_optab,
7483		   expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7484				 size_int (10), 0, 1),
7485		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7486		   NULL_RTX, 1, OPTAB_DIRECT));
7487
7488  emit_move_insn
7489    (adjust_address (m_tramp, SImode, 4),
7490     expand_binop (SImode, ior_optab,
7491		   expand_shift (RSHIFT_EXPR, SImode, cxt,
7492				 size_int (10), 0, 1),
7493		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7494		   NULL_RTX, 1, OPTAB_DIRECT));
7495
7496  emit_move_insn
7497    (adjust_address (m_tramp, SImode, 8),
7498     expand_binop (SImode, ior_optab,
7499		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7500		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7501		   NULL_RTX, 1, OPTAB_DIRECT));
7502
7503  emit_move_insn
7504    (adjust_address (m_tramp, SImode, 12),
7505     expand_binop (SImode, ior_optab,
7506		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7507		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7508		   NULL_RTX, 1, OPTAB_DIRECT));
7509
7510  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
7511     aligned on a 16 byte boundary so one flush clears it all.  */
7512  emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
7513  if (sparc_cpu != PROCESSOR_ULTRASPARC
7514      && sparc_cpu != PROCESSOR_ULTRASPARC3
7515      && sparc_cpu != PROCESSOR_NIAGARA
7516      && sparc_cpu != PROCESSOR_NIAGARA2)
7517    emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
7518
7519  /* Call __enable_execute_stack after writing onto the stack to make sure
7520     the stack address is accessible.  */
7521#ifdef ENABLE_EXECUTE_STACK
7522  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7523                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7524#endif
7525
7526}
7527
7528/* The 64-bit version is simpler because it makes more sense to load the
7529   values as "immediate" data out of the trampoline.  It's also easier since
7530   we can read the PC without clobbering a register.  */
7531
7532static void
7533sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7534{
7535  /* SPARC 64-bit trampoline:
7536
7537	rd	%pc, %g1
7538	ldx	[%g1+24], %g5
7539	jmp	%g5
7540	ldx	[%g1+16], %g5
7541	+16 bytes data
7542   */
7543
7544  emit_move_insn (adjust_address (m_tramp, SImode, 0),
7545		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7546  emit_move_insn (adjust_address (m_tramp, SImode, 4),
7547		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7548  emit_move_insn (adjust_address (m_tramp, SImode, 8),
7549		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7550  emit_move_insn (adjust_address (m_tramp, SImode, 12),
7551		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7552  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
7553  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
7554  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
7555
7556  if (sparc_cpu != PROCESSOR_ULTRASPARC
7557      && sparc_cpu != PROCESSOR_ULTRASPARC3
7558      && sparc_cpu != PROCESSOR_NIAGARA
7559      && sparc_cpu != PROCESSOR_NIAGARA2)
7560    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
7561
7562  /* Call __enable_execute_stack after writing onto the stack to make sure
7563     the stack address is accessible.  */
7564#ifdef ENABLE_EXECUTE_STACK
7565  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7566                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7567#endif
7568}
7569
7570/* Worker for TARGET_TRAMPOLINE_INIT.  */
7571
7572static void
7573sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
7574{
7575  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
7576  cxt = force_reg (Pmode, cxt);
7577  if (TARGET_ARCH64)
7578    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
7579  else
7580    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
7581}
7582
7583/* Adjust the cost of a scheduling dependency.  Return the new cost of
7584   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7585
7586static int
7587supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7588{
7589  enum attr_type insn_type;
7590
7591  if (! recog_memoized (insn))
7592    return 0;
7593
7594  insn_type = get_attr_type (insn);
7595
7596  if (REG_NOTE_KIND (link) == 0)
7597    {
7598      /* Data dependency; DEP_INSN writes a register that INSN reads some
7599	 cycles later.  */
7600
7601      /* if a load, then the dependence must be on the memory address;
7602	 add an extra "cycle".  Note that the cost could be two cycles
7603	 if the reg was written late in an instruction group; we ca not tell
7604	 here.  */
7605      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7606	return cost + 3;
7607
7608      /* Get the delay only if the address of the store is the dependence.  */
7609      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7610	{
7611	  rtx pat = PATTERN(insn);
7612	  rtx dep_pat = PATTERN (dep_insn);
7613
7614	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7615	    return cost;  /* This should not happen!  */
7616
7617	  /* The dependency between the two instructions was on the data that
7618	     is being stored.  Assume that this implies that the address of the
7619	     store is not dependent.  */
7620	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7621	    return cost;
7622
7623	  return cost + 3;  /* An approximation.  */
7624	}
7625
7626      /* A shift instruction cannot receive its data from an instruction
7627	 in the same cycle; add a one cycle penalty.  */
7628      if (insn_type == TYPE_SHIFT)
7629	return cost + 3;   /* Split before cascade into shift.  */
7630    }
7631  else
7632    {
7633      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7634	 INSN writes some cycles later.  */
7635
7636      /* These are only significant for the fpu unit; writing a fp reg before
7637         the fpu has finished with it stalls the processor.  */
7638
7639      /* Reusing an integer register causes no problems.  */
7640      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7641	return 0;
7642    }
7643
7644  return cost;
7645}
7646
7647static int
7648hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7649{
7650  enum attr_type insn_type, dep_type;
7651  rtx pat = PATTERN(insn);
7652  rtx dep_pat = PATTERN (dep_insn);
7653
7654  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7655    return cost;
7656
7657  insn_type = get_attr_type (insn);
7658  dep_type = get_attr_type (dep_insn);
7659
7660  switch (REG_NOTE_KIND (link))
7661    {
7662    case 0:
7663      /* Data dependency; DEP_INSN writes a register that INSN reads some
7664	 cycles later.  */
7665
7666      switch (insn_type)
7667	{
7668	case TYPE_STORE:
7669	case TYPE_FPSTORE:
7670	  /* Get the delay iff the address of the store is the dependence.  */
7671	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7672	    return cost;
7673
7674	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7675	    return cost;
7676	  return cost + 3;
7677
7678	case TYPE_LOAD:
7679	case TYPE_SLOAD:
7680	case TYPE_FPLOAD:
7681	  /* If a load, then the dependence must be on the memory address.  If
7682	     the addresses aren't equal, then it might be a false dependency */
7683	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7684	    {
7685	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7686		  || GET_CODE (SET_DEST (dep_pat)) != MEM
7687		  || GET_CODE (SET_SRC (pat)) != MEM
7688		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7689				    XEXP (SET_SRC (pat), 0)))
7690		return cost + 2;
7691
7692	      return cost + 8;
7693	    }
7694	  break;
7695
7696	case TYPE_BRANCH:
7697	  /* Compare to branch latency is 0.  There is no benefit from
7698	     separating compare and branch.  */
7699	  if (dep_type == TYPE_COMPARE)
7700	    return 0;
7701	  /* Floating point compare to branch latency is less than
7702	     compare to conditional move.  */
7703	  if (dep_type == TYPE_FPCMP)
7704	    return cost - 1;
7705	  break;
7706	default:
7707	  break;
7708	}
7709	break;
7710
7711    case REG_DEP_ANTI:
7712      /* Anti-dependencies only penalize the fpu unit.  */
7713      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7714        return 0;
7715      break;
7716
7717    default:
7718      break;
7719    }
7720
7721  return cost;
7722}
7723
7724static int
7725sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7726{
7727  switch (sparc_cpu)
7728    {
7729    case PROCESSOR_SUPERSPARC:
7730      cost = supersparc_adjust_cost (insn, link, dep, cost);
7731      break;
7732    case PROCESSOR_HYPERSPARC:
7733    case PROCESSOR_SPARCLITE86X:
7734      cost = hypersparc_adjust_cost (insn, link, dep, cost);
7735      break;
7736    default:
7737      break;
7738    }
7739  return cost;
7740}
7741
7742static void
7743sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7744		  int sched_verbose ATTRIBUTE_UNUSED,
7745		  int max_ready ATTRIBUTE_UNUSED)
7746{}
7747
7748static int
7749sparc_use_sched_lookahead (void)
7750{
7751  if (sparc_cpu == PROCESSOR_NIAGARA
7752      || sparc_cpu == PROCESSOR_NIAGARA2)
7753    return 0;
7754  if (sparc_cpu == PROCESSOR_ULTRASPARC
7755      || sparc_cpu == PROCESSOR_ULTRASPARC3)
7756    return 4;
7757  if ((1 << sparc_cpu) &
7758      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7759       (1 << PROCESSOR_SPARCLITE86X)))
7760    return 3;
7761  return 0;
7762}
7763
7764static int
7765sparc_issue_rate (void)
7766{
7767  switch (sparc_cpu)
7768    {
7769    case PROCESSOR_NIAGARA:
7770    case PROCESSOR_NIAGARA2:
7771    default:
7772      return 1;
7773    case PROCESSOR_V9:
7774      /* Assume V9 processors are capable of at least dual-issue.  */
7775      return 2;
7776    case PROCESSOR_SUPERSPARC:
7777      return 3;
7778    case PROCESSOR_HYPERSPARC:
7779    case PROCESSOR_SPARCLITE86X:
7780      return 2;
7781    case PROCESSOR_ULTRASPARC:
7782    case PROCESSOR_ULTRASPARC3:
7783      return 4;
7784    }
7785}
7786
7787static int
7788set_extends (rtx insn)
7789{
7790  register rtx pat = PATTERN (insn);
7791
7792  switch (GET_CODE (SET_SRC (pat)))
7793    {
7794      /* Load and some shift instructions zero extend.  */
7795    case MEM:
7796    case ZERO_EXTEND:
7797      /* sethi clears the high bits */
7798    case HIGH:
7799      /* LO_SUM is used with sethi.  sethi cleared the high
7800	 bits and the values used with lo_sum are positive */
7801    case LO_SUM:
7802      /* Store flag stores 0 or 1 */
7803    case LT: case LTU:
7804    case GT: case GTU:
7805    case LE: case LEU:
7806    case GE: case GEU:
7807    case EQ:
7808    case NE:
7809      return 1;
7810    case AND:
7811      {
7812	rtx op0 = XEXP (SET_SRC (pat), 0);
7813	rtx op1 = XEXP (SET_SRC (pat), 1);
7814	if (GET_CODE (op1) == CONST_INT)
7815	  return INTVAL (op1) >= 0;
7816	if (GET_CODE (op0) != REG)
7817	  return 0;
7818	if (sparc_check_64 (op0, insn) == 1)
7819	  return 1;
7820	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7821      }
7822    case IOR:
7823    case XOR:
7824      {
7825	rtx op0 = XEXP (SET_SRC (pat), 0);
7826	rtx op1 = XEXP (SET_SRC (pat), 1);
7827	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
7828	  return 0;
7829	if (GET_CODE (op1) == CONST_INT)
7830	  return INTVAL (op1) >= 0;
7831	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7832      }
7833    case LSHIFTRT:
7834      return GET_MODE (SET_SRC (pat)) == SImode;
7835      /* Positive integers leave the high bits zero.  */
7836    case CONST_DOUBLE:
7837      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
7838    case CONST_INT:
7839      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
7840    case ASHIFTRT:
7841    case SIGN_EXTEND:
7842      return - (GET_MODE (SET_SRC (pat)) == SImode);
7843    case REG:
7844      return sparc_check_64 (SET_SRC (pat), insn);
7845    default:
7846      return 0;
7847    }
7848}
7849
7850/* We _ought_ to have only one kind per function, but...  */
7851static GTY(()) rtx sparc_addr_diff_list;
7852static GTY(()) rtx sparc_addr_list;
7853
7854void
7855sparc_defer_case_vector (rtx lab, rtx vec, int diff)
7856{
7857  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7858  if (diff)
7859    sparc_addr_diff_list
7860      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7861  else
7862    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7863}
7864
7865static void
7866sparc_output_addr_vec (rtx vec)
7867{
7868  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7869  int idx, vlen = XVECLEN (body, 0);
7870
7871#ifdef ASM_OUTPUT_ADDR_VEC_START
7872  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7873#endif
7874
7875#ifdef ASM_OUTPUT_CASE_LABEL
7876  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7877			 NEXT_INSN (lab));
7878#else
7879  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7880#endif
7881
7882  for (idx = 0; idx < vlen; idx++)
7883    {
7884      ASM_OUTPUT_ADDR_VEC_ELT
7885	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7886    }
7887
7888#ifdef ASM_OUTPUT_ADDR_VEC_END
7889  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7890#endif
7891}
7892
7893static void
7894sparc_output_addr_diff_vec (rtx vec)
7895{
7896  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7897  rtx base = XEXP (XEXP (body, 0), 0);
7898  int idx, vlen = XVECLEN (body, 1);
7899
7900#ifdef ASM_OUTPUT_ADDR_VEC_START
7901  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7902#endif
7903
7904#ifdef ASM_OUTPUT_CASE_LABEL
7905  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7906			 NEXT_INSN (lab));
7907#else
7908  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7909#endif
7910
7911  for (idx = 0; idx < vlen; idx++)
7912    {
7913      ASM_OUTPUT_ADDR_DIFF_ELT
7914        (asm_out_file,
7915         body,
7916         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7917         CODE_LABEL_NUMBER (base));
7918    }
7919
7920#ifdef ASM_OUTPUT_ADDR_VEC_END
7921  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7922#endif
7923}
7924
7925static void
7926sparc_output_deferred_case_vectors (void)
7927{
7928  rtx t;
7929  int align;
7930
7931  if (sparc_addr_list == NULL_RTX
7932      && sparc_addr_diff_list == NULL_RTX)
7933    return;
7934
7935  /* Align to cache line in the function's code section.  */
7936  switch_to_section (current_function_section ());
7937
7938  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7939  if (align > 0)
7940    ASM_OUTPUT_ALIGN (asm_out_file, align);
7941
7942  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7943    sparc_output_addr_vec (XEXP (t, 0));
7944  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7945    sparc_output_addr_diff_vec (XEXP (t, 0));
7946
7947  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7948}
7949
7950/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7951   unknown.  Return 1 if the high bits are zero, -1 if the register is
7952   sign extended.  */
7953int
7954sparc_check_64 (rtx x, rtx insn)
7955{
7956  /* If a register is set only once it is safe to ignore insns this
7957     code does not know how to handle.  The loop will either recognize
7958     the single set and return the correct value or fail to recognize
7959     it and return 0.  */
7960  int set_once = 0;
7961  rtx y = x;
7962
7963  gcc_assert (GET_CODE (x) == REG);
7964
7965  if (GET_MODE (x) == DImode)
7966    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
7967
7968  if (flag_expensive_optimizations
7969      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
7970    set_once = 1;
7971
7972  if (insn == 0)
7973    {
7974      if (set_once)
7975	insn = get_last_insn_anywhere ();
7976      else
7977	return 0;
7978    }
7979
7980  while ((insn = PREV_INSN (insn)))
7981    {
7982      switch (GET_CODE (insn))
7983	{
7984	case JUMP_INSN:
7985	case NOTE:
7986	  break;
7987	case CODE_LABEL:
7988	case CALL_INSN:
7989	default:
7990	  if (! set_once)
7991	    return 0;
7992	  break;
7993	case INSN:
7994	  {
7995	    rtx pat = PATTERN (insn);
7996	    if (GET_CODE (pat) != SET)
7997	      return 0;
7998	    if (rtx_equal_p (x, SET_DEST (pat)))
7999	      return set_extends (insn);
8000	    if (y && rtx_equal_p (y, SET_DEST (pat)))
8001	      return set_extends (insn);
8002	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
8003	      return 0;
8004	  }
8005	}
8006    }
8007  return 0;
8008}
8009
8010/* Returns assembly code to perform a DImode shift using
8011   a 64-bit global or out register on SPARC-V8+.  */
8012const char *
8013output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
8014{
8015  static char asm_code[60];
8016
8017  /* The scratch register is only required when the destination
8018     register is not a 64-bit global or out register.  */
8019  if (which_alternative != 2)
8020    operands[3] = operands[0];
8021
8022  /* We can only shift by constants <= 63. */
8023  if (GET_CODE (operands[2]) == CONST_INT)
8024    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
8025
8026  if (GET_CODE (operands[1]) == CONST_INT)
8027    {
8028      output_asm_insn ("mov\t%1, %3", operands);
8029    }
8030  else
8031    {
8032      output_asm_insn ("sllx\t%H1, 32, %3", operands);
8033      if (sparc_check_64 (operands[1], insn) <= 0)
8034	output_asm_insn ("srl\t%L1, 0, %L1", operands);
8035      output_asm_insn ("or\t%L1, %3, %3", operands);
8036    }
8037
8038  strcpy(asm_code, opcode);
8039
8040  if (which_alternative != 2)
8041    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8042  else
8043    return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8044}
8045
8046/* Output rtl to increment the profiler label LABELNO
8047   for profiling a function entry.  */
8048
8049void
8050sparc_profile_hook (int labelno)
8051{
8052  char buf[32];
8053  rtx lab, fun;
8054
8055  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
8056  if (NO_PROFILE_COUNTERS)
8057    {
8058      emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
8059    }
8060  else
8061    {
8062      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8063      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
8064      emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
8065    }
8066}
8067
8068/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
8069
8070static void
8071sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
8072				     tree decl ATTRIBUTE_UNUSED)
8073{
8074  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8075
8076  if (!(flags & SECTION_DEBUG))
8077    fputs (",#alloc", asm_out_file);
8078  if (flags & SECTION_WRITE)
8079    fputs (",#write", asm_out_file);
8080  if (flags & SECTION_TLS)
8081    fputs (",#tls", asm_out_file);
8082  if (flags & SECTION_CODE)
8083    fputs (",#execinstr", asm_out_file);
8084
8085  /* ??? Handle SECTION_BSS.  */
8086
8087  fputc ('\n', asm_out_file);
8088}
8089
8090/* We do not allow indirect calls to be optimized into sibling calls.
8091
8092   We cannot use sibling calls when delayed branches are disabled
8093   because they will likely require the call delay slot to be filled.
8094
8095   Also, on SPARC 32-bit we cannot emit a sibling call when the
8096   current function returns a structure.  This is because the "unimp
8097   after call" convention would cause the callee to return to the
8098   wrong place.  The generic code already disallows cases where the
8099   function being called returns a structure.
8100
8101   It may seem strange how this last case could occur.  Usually there
8102   is code after the call which jumps to epilogue code which dumps the
8103   return value into the struct return area.  That ought to invalidate
8104   the sibling call right?  Well, in the C++ case we can end up passing
8105   the pointer to the struct return area to a constructor (which returns
8106   void) and then nothing else happens.  Such a sibling call would look
8107   valid without the added check here.
8108
8109   VxWorks PIC PLT entries require the global pointer to be initialized
8110   on entry.  We therefore can't emit sibling calls to them.  */
8111static bool
8112sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8113{
8114  return (decl
8115	  && flag_delayed_branch
8116	  && (TARGET_ARCH64 || ! cfun->returns_struct)
8117	  && !(TARGET_VXWORKS_RTP
8118	       && flag_pic
8119	       && !targetm.binds_local_p (decl)));
8120}
8121
8122/* libfunc renaming.  */
8123#include "config/gofast.h"
8124
8125static void
8126sparc_init_libfuncs (void)
8127{
8128  if (TARGET_ARCH32)
8129    {
8130      /* Use the subroutines that Sun's library provides for integer
8131	 multiply and divide.  The `*' prevents an underscore from
8132	 being prepended by the compiler. .umul is a little faster
8133	 than .mul.  */
8134      set_optab_libfunc (smul_optab, SImode, "*.umul");
8135      set_optab_libfunc (sdiv_optab, SImode, "*.div");
8136      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8137      set_optab_libfunc (smod_optab, SImode, "*.rem");
8138      set_optab_libfunc (umod_optab, SImode, "*.urem");
8139
8140      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
8141      set_optab_libfunc (add_optab, TFmode, "_Q_add");
8142      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8143      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8144      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8145      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8146
8147      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
8148	 is because with soft-float, the SFmode and DFmode sqrt
8149	 instructions will be absent, and the compiler will notice and
8150	 try to use the TFmode sqrt instruction for calls to the
8151	 builtin function sqrt, but this fails.  */
8152      if (TARGET_FPU)
8153	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8154
8155      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8156      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8157      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8158      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8159      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8160      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8161
8162      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
8163      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
8164      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
8165      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
8166
8167      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
8168      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
8169      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8170      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8171
8172      if (DITF_CONVERSION_LIBFUNCS)
8173	{
8174	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
8175	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
8176	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8177	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8178	}
8179
8180      if (SUN_CONVERSION_LIBFUNCS)
8181	{
8182	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8183	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8184	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8185	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8186	}
8187    }
8188  if (TARGET_ARCH64)
8189    {
8190      /* In the SPARC 64bit ABI, SImode multiply and divide functions
8191	 do not exist in the library.  Make sure the compiler does not
8192	 emit calls to them by accident.  (It should always use the
8193         hardware instructions.)  */
8194      set_optab_libfunc (smul_optab, SImode, 0);
8195      set_optab_libfunc (sdiv_optab, SImode, 0);
8196      set_optab_libfunc (udiv_optab, SImode, 0);
8197      set_optab_libfunc (smod_optab, SImode, 0);
8198      set_optab_libfunc (umod_optab, SImode, 0);
8199
8200      if (SUN_INTEGER_MULTIPLY_64)
8201	{
8202	  set_optab_libfunc (smul_optab, DImode, "__mul64");
8203	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
8204	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8205	  set_optab_libfunc (smod_optab, DImode, "__rem64");
8206	  set_optab_libfunc (umod_optab, DImode, "__urem64");
8207	}
8208
8209      if (SUN_CONVERSION_LIBFUNCS)
8210	{
8211	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8212	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8213	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8214	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8215	}
8216    }
8217
8218  gofast_maybe_init_libfuncs ();
8219}
8220
8221#define def_builtin(NAME, CODE, TYPE) \
8222  add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8223                       NULL_TREE)
8224
8225/* Implement the TARGET_INIT_BUILTINS target hook.
8226   Create builtin functions for special SPARC instructions.  */
8227
8228static void
8229sparc_init_builtins (void)
8230{
8231  if (TARGET_VIS)
8232    sparc_vis_init_builtins ();
8233}
8234
8235/* Create builtin functions for VIS 1.0 instructions.  */
8236
8237static void
8238sparc_vis_init_builtins (void)
8239{
8240  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8241  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8242  tree v4hi = build_vector_type (intHI_type_node, 4);
8243  tree v2hi = build_vector_type (intHI_type_node, 2);
8244  tree v2si = build_vector_type (intSI_type_node, 2);
8245
8246  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8247  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8248  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8249  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8250  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8251  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8252  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8253  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8254  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8255  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8256  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8257  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8258  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8259							 v8qi, v8qi,
8260							 intDI_type_node, 0);
8261  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8262						  intDI_type_node,
8263						  intDI_type_node, 0);
8264  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8265		        			    ptr_type_node,
8266					            intSI_type_node, 0);
8267  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8268		        			    ptr_type_node,
8269					            intDI_type_node, 0);
8270
8271  /* Packing and expanding vectors.  */
8272  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8273  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8274	       v8qi_ftype_v2si_v8qi);
8275  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8276	       v2hi_ftype_v2si);
8277  def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8278  def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8279	       v8qi_ftype_v4qi_v4qi);
8280
8281  /* Multiplications.  */
8282  def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8283	       v4hi_ftype_v4qi_v4hi);
8284  def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8285	       v4hi_ftype_v4qi_v2hi);
8286  def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8287	       v4hi_ftype_v4qi_v2hi);
8288  def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8289	       v4hi_ftype_v8qi_v4hi);
8290  def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8291	       v4hi_ftype_v8qi_v4hi);
8292  def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8293	       v2si_ftype_v4qi_v2hi);
8294  def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8295	       v2si_ftype_v4qi_v2hi);
8296
8297  /* Data aligning.  */
8298  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8299	       v4hi_ftype_v4hi_v4hi);
8300  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8301	       v8qi_ftype_v8qi_v8qi);
8302  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8303	       v2si_ftype_v2si_v2si);
8304  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8305               di_ftype_di_di);
8306  if (TARGET_ARCH64)
8307    def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8308	         ptr_ftype_ptr_di);
8309  else
8310    def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8311	         ptr_ftype_ptr_si);
8312
8313  /* Pixel distance.  */
8314  def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8315	       di_ftype_v8qi_v8qi_di);
8316}
8317
8318/* Handle TARGET_EXPAND_BUILTIN target hook.
8319   Expand builtin functions for sparc intrinsics.  */
8320
8321static rtx
8322sparc_expand_builtin (tree exp, rtx target,
8323		      rtx subtarget ATTRIBUTE_UNUSED,
8324		      enum machine_mode tmode ATTRIBUTE_UNUSED,
8325		      int ignore ATTRIBUTE_UNUSED)
8326{
8327  tree arg;
8328  call_expr_arg_iterator iter;
8329  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8330  unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8331  rtx pat, op[4];
8332  enum machine_mode mode[4];
8333  int arg_count = 0;
8334
8335  mode[0] = insn_data[icode].operand[0].mode;
8336  if (!target
8337      || GET_MODE (target) != mode[0]
8338      || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8339    op[0] = gen_reg_rtx (mode[0]);
8340  else
8341    op[0] = target;
8342
8343  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8344    {
8345      arg_count++;
8346      mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8347      op[arg_count] = expand_normal (arg);
8348
8349      if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8350							      mode[arg_count]))
8351	op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8352    }
8353
8354  switch (arg_count)
8355    {
8356    case 1:
8357      pat = GEN_FCN (icode) (op[0], op[1]);
8358      break;
8359    case 2:
8360      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8361      break;
8362    case 3:
8363      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8364      break;
8365    default:
8366      gcc_unreachable ();
8367    }
8368
8369  if (!pat)
8370    return NULL_RTX;
8371
8372  emit_insn (pat);
8373
8374  return op[0];
8375}
8376
8377static int
8378sparc_vis_mul8x16 (int e8, int e16)
8379{
8380  return (e8 * e16 + 128) / 256;
8381}
8382
8383/* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8384   by FNCODE.  All of the elements in ELTS0 and ELTS1 lists must be integer
8385   constants.  A tree list with the results of the multiplications is returned,
8386   and each element in the list is of INNER_TYPE.  */
8387
8388static tree
8389sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8390{
8391  tree n_elts = NULL_TREE;
8392  int scale;
8393
8394  switch (fncode)
8395    {
8396    case CODE_FOR_fmul8x16_vis:
8397      for (; elts0 && elts1;
8398	   elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8399	{
8400	  int val
8401	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8402				 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8403	  n_elts = tree_cons (NULL_TREE,
8404			      build_int_cst (inner_type, val),
8405			      n_elts);
8406	}
8407      break;
8408
8409    case CODE_FOR_fmul8x16au_vis:
8410      scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8411
8412      for (; elts0; elts0 = TREE_CHAIN (elts0))
8413	{
8414	  int val
8415	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8416				 scale);
8417	  n_elts = tree_cons (NULL_TREE,
8418			      build_int_cst (inner_type, val),
8419			      n_elts);
8420	}
8421      break;
8422
8423    case CODE_FOR_fmul8x16al_vis:
8424      scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8425
8426      for (; elts0; elts0 = TREE_CHAIN (elts0))
8427	{
8428	  int val
8429	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8430				 scale);
8431	  n_elts = tree_cons (NULL_TREE,
8432			      build_int_cst (inner_type, val),
8433			      n_elts);
8434	}
8435      break;
8436
8437    default:
8438      gcc_unreachable ();
8439    }
8440
8441  return nreverse (n_elts);
8442
8443}
8444/* Handle TARGET_FOLD_BUILTIN target hook.
8445   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
8446   result of the function call is ignored.  NULL_TREE is returned if the
8447   function could not be folded.  */
8448
8449static tree
8450sparc_fold_builtin (tree fndecl, tree arglist, bool ignore)
8451{
8452  tree arg0, arg1, arg2;
8453  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8454  enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8455
8456  if (ignore
8457      && icode != CODE_FOR_alignaddrsi_vis
8458      && icode != CODE_FOR_alignaddrdi_vis)
8459    return fold_convert (rtype, integer_zero_node);
8460
8461  switch (icode)
8462    {
8463    case CODE_FOR_fexpand_vis:
8464      arg0 = TREE_VALUE (arglist);
8465      STRIP_NOPS (arg0);
8466
8467      if (TREE_CODE (arg0) == VECTOR_CST)
8468	{
8469	  tree inner_type = TREE_TYPE (rtype);
8470	  tree elts = TREE_VECTOR_CST_ELTS (arg0);
8471	  tree n_elts = NULL_TREE;
8472
8473	  for (; elts; elts = TREE_CHAIN (elts))
8474	    {
8475	      unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8476	      n_elts = tree_cons (NULL_TREE,
8477				  build_int_cst (inner_type, val),
8478				  n_elts);
8479	    }
8480	  return build_vector (rtype, nreverse (n_elts));
8481	}
8482      break;
8483
8484    case CODE_FOR_fmul8x16_vis:
8485    case CODE_FOR_fmul8x16au_vis:
8486    case CODE_FOR_fmul8x16al_vis:
8487      arg0 = TREE_VALUE (arglist);
8488      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8489      STRIP_NOPS (arg0);
8490      STRIP_NOPS (arg1);
8491
8492      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8493	{
8494	  tree inner_type = TREE_TYPE (rtype);
8495	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8496	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8497	  tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8498						  elts1);
8499
8500	  return build_vector (rtype, n_elts);
8501	}
8502      break;
8503
8504    case CODE_FOR_fpmerge_vis:
8505      arg0 = TREE_VALUE (arglist);
8506      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8507      STRIP_NOPS (arg0);
8508      STRIP_NOPS (arg1);
8509
8510      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8511	{
8512	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8513	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8514	  tree n_elts = NULL_TREE;
8515
8516	  for (; elts0 && elts1;
8517	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8518	    {
8519	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8520	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8521	    }
8522
8523	  return build_vector (rtype, nreverse (n_elts));
8524	}
8525      break;
8526
8527    case CODE_FOR_pdist_vis:
8528      arg0 = TREE_VALUE (arglist);
8529      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8530      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8531      STRIP_NOPS (arg0);
8532      STRIP_NOPS (arg1);
8533      STRIP_NOPS (arg2);
8534
8535      if (TREE_CODE (arg0) == VECTOR_CST
8536	  && TREE_CODE (arg1) == VECTOR_CST
8537	  && TREE_CODE (arg2) == INTEGER_CST)
8538	{
8539	  int overflow = 0;
8540	  unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8541	  HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8542	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8543	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8544
8545	  for (; elts0 && elts1;
8546	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8547	    {
8548	      unsigned HOST_WIDE_INT
8549		low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8550		low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8551	      HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8552	      HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8553
8554	      unsigned HOST_WIDE_INT l;
8555	      HOST_WIDE_INT h;
8556
8557	      overflow |= neg_double (low1, high1, &l, &h);
8558	      overflow |= add_double (low0, high0, l, h, &l, &h);
8559	      if (h < 0)
8560		overflow |= neg_double (l, h, &l, &h);
8561
8562	      overflow |= add_double (low, high, l, h, &low, &high);
8563	    }
8564
8565	  gcc_assert (overflow == 0);
8566
8567	  return build_int_cst_wide (rtype, low, high);
8568	}
8569
8570    default:
8571      break;
8572    }
8573
8574  return NULL_TREE;
8575}
8576
8577/* ??? This duplicates information provided to the compiler by the
8578   ??? scheduler description.  Some day, teach genautomata to output
8579   ??? the latencies and then CSE will just use that.  */
8580
8581static bool
8582sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8583		 bool speed ATTRIBUTE_UNUSED)
8584{
8585  enum machine_mode mode = GET_MODE (x);
8586  bool float_mode_p = FLOAT_MODE_P (mode);
8587
8588  switch (code)
8589    {
8590    case CONST_INT:
8591      if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8592	{
8593	  *total = 0;
8594	  return true;
8595	}
8596      /* FALLTHRU */
8597
8598    case HIGH:
8599      *total = 2;
8600      return true;
8601
8602    case CONST:
8603    case LABEL_REF:
8604    case SYMBOL_REF:
8605      *total = 4;
8606      return true;
8607
8608    case CONST_DOUBLE:
8609      if (GET_MODE (x) == VOIDmode
8610	  && ((CONST_DOUBLE_HIGH (x) == 0
8611	       && CONST_DOUBLE_LOW (x) < 0x1000)
8612	      || (CONST_DOUBLE_HIGH (x) == -1
8613		  && CONST_DOUBLE_LOW (x) < 0
8614		  && CONST_DOUBLE_LOW (x) >= -0x1000)))
8615	*total = 0;
8616      else
8617	*total = 8;
8618      return true;
8619
8620    case MEM:
8621      /* If outer-code was a sign or zero extension, a cost
8622	 of COSTS_N_INSNS (1) was already added in.  This is
8623	 why we are subtracting it back out.  */
8624      if (outer_code == ZERO_EXTEND)
8625	{
8626	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
8627	}
8628      else if (outer_code == SIGN_EXTEND)
8629	{
8630	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
8631	}
8632      else if (float_mode_p)
8633	{
8634	  *total = sparc_costs->float_load;
8635	}
8636      else
8637	{
8638	  *total = sparc_costs->int_load;
8639	}
8640
8641      return true;
8642
8643    case PLUS:
8644    case MINUS:
8645      if (float_mode_p)
8646	*total = sparc_costs->float_plusminus;
8647      else
8648	*total = COSTS_N_INSNS (1);
8649      return false;
8650
8651    case MULT:
8652      if (float_mode_p)
8653	*total = sparc_costs->float_mul;
8654      else if (! TARGET_HARD_MUL)
8655	*total = COSTS_N_INSNS (25);
8656      else
8657	{
8658	  int bit_cost;
8659
8660	  bit_cost = 0;
8661	  if (sparc_costs->int_mul_bit_factor)
8662	    {
8663	      int nbits;
8664
8665	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8666		{
8667		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
8668		  for (nbits = 0; value != 0; value &= value - 1)
8669		    nbits++;
8670		}
8671	      else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
8672		       && GET_MODE (XEXP (x, 1)) == VOIDmode)
8673		{
8674		  rtx x1 = XEXP (x, 1);
8675		  unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
8676		  unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
8677
8678		  for (nbits = 0; value1 != 0; value1 &= value1 - 1)
8679		    nbits++;
8680		  for (; value2 != 0; value2 &= value2 - 1)
8681		    nbits++;
8682		}
8683	      else
8684		nbits = 7;
8685
8686	      if (nbits < 3)
8687		nbits = 3;
8688	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
8689	      bit_cost = COSTS_N_INSNS (bit_cost);
8690	    }
8691
8692	  if (mode == DImode)
8693	    *total = sparc_costs->int_mulX + bit_cost;
8694	  else
8695	    *total = sparc_costs->int_mul + bit_cost;
8696	}
8697      return false;
8698
8699    case ASHIFT:
8700    case ASHIFTRT:
8701    case LSHIFTRT:
8702      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
8703      return false;
8704
8705    case DIV:
8706    case UDIV:
8707    case MOD:
8708    case UMOD:
8709      if (float_mode_p)
8710	{
8711	  if (mode == DFmode)
8712	    *total = sparc_costs->float_div_df;
8713	  else
8714	    *total = sparc_costs->float_div_sf;
8715	}
8716      else
8717	{
8718	  if (mode == DImode)
8719	    *total = sparc_costs->int_divX;
8720	  else
8721	    *total = sparc_costs->int_div;
8722	}
8723      return false;
8724
8725    case NEG:
8726      if (! float_mode_p)
8727	{
8728	  *total = COSTS_N_INSNS (1);
8729	  return false;
8730	}
8731      /* FALLTHRU */
8732
8733    case ABS:
8734    case FLOAT:
8735    case UNSIGNED_FLOAT:
8736    case FIX:
8737    case UNSIGNED_FIX:
8738    case FLOAT_EXTEND:
8739    case FLOAT_TRUNCATE:
8740      *total = sparc_costs->float_move;
8741      return false;
8742
8743    case SQRT:
8744      if (mode == DFmode)
8745	*total = sparc_costs->float_sqrt_df;
8746      else
8747	*total = sparc_costs->float_sqrt_sf;
8748      return false;
8749
8750    case COMPARE:
8751      if (float_mode_p)
8752	*total = sparc_costs->float_cmp;
8753      else
8754	*total = COSTS_N_INSNS (1);
8755      return false;
8756
8757    case IF_THEN_ELSE:
8758      if (float_mode_p)
8759	*total = sparc_costs->float_cmove;
8760      else
8761	*total = sparc_costs->int_cmove;
8762      return false;
8763
8764    case IOR:
8765      /* Handle the NAND vector patterns.  */
8766      if (sparc_vector_mode_supported_p (GET_MODE (x))
8767	  && GET_CODE (XEXP (x, 0)) == NOT
8768	  && GET_CODE (XEXP (x, 1)) == NOT)
8769	{
8770	  *total = COSTS_N_INSNS (1);
8771	  return true;
8772	}
8773      else
8774        return false;
8775
8776    default:
8777      return false;
8778    }
8779}
8780
8781/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
8782   This is achieved by means of a manual dynamic stack space allocation in
8783   the current frame.  We make the assumption that SEQ doesn't contain any
8784   function calls, with the possible exception of calls to the GOT helper.  */
8785
8786static void
8787emit_and_preserve (rtx seq, rtx reg, rtx reg2)
8788{
8789  /* We must preserve the lowest 16 words for the register save area.  */
8790  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
8791  /* We really need only 2 words of fresh stack space.  */
8792  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
8793
8794  rtx slot
8795    = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
8796					     SPARC_STACK_BIAS + offset));
8797
8798  emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
8799  emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
8800  if (reg2)
8801    emit_insn (gen_rtx_SET (VOIDmode,
8802			    adjust_address (slot, word_mode, UNITS_PER_WORD),
8803			    reg2));
8804  emit_insn (seq);
8805  if (reg2)
8806    emit_insn (gen_rtx_SET (VOIDmode,
8807			    reg2,
8808			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
8809  emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
8810  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
8811}
8812
8813/* Output the assembler code for a thunk function.  THUNK_DECL is the
8814   declaration for the thunk function itself, FUNCTION is the decl for
8815   the target function.  DELTA is an immediate constant offset to be
8816   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
8817   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
8818
8819static void
8820sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8821		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8822		       tree function)
8823{
8824  rtx this_rtx, insn, funexp;
8825  unsigned int int_arg_first;
8826
8827  reload_completed = 1;
8828  epilogue_completed = 1;
8829
8830  emit_note (NOTE_INSN_PROLOGUE_END);
8831
8832  if (flag_delayed_branch)
8833    {
8834      /* We will emit a regular sibcall below, so we need to instruct
8835	 output_sibcall that we are in a leaf function.  */
8836      sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
8837
8838      /* This will cause final.c to invoke leaf_renumber_regs so we
8839	 must behave as if we were in a not-yet-leafified function.  */
8840      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
8841    }
8842  else
8843    {
8844      /* We will emit the sibcall manually below, so we will need to
8845	 manually spill non-leaf registers.  */
8846      sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
8847
8848      /* We really are in a leaf function.  */
8849      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
8850    }
8851
8852  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
8853     returns a structure, the structure return pointer is there instead.  */
8854  if (TARGET_ARCH64
8855      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8856    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
8857  else
8858    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
8859
8860  /* Add DELTA.  When possible use a plain add, otherwise load it into
8861     a register first.  */
8862  if (delta)
8863    {
8864      rtx delta_rtx = GEN_INT (delta);
8865
8866      if (! SPARC_SIMM13_P (delta))
8867	{
8868	  rtx scratch = gen_rtx_REG (Pmode, 1);
8869	  emit_move_insn (scratch, delta_rtx);
8870	  delta_rtx = scratch;
8871	}
8872
8873      /* THIS_RTX += DELTA.  */
8874      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
8875    }
8876
8877  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
8878  if (vcall_offset)
8879    {
8880      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8881      rtx scratch = gen_rtx_REG (Pmode, 1);
8882
8883      gcc_assert (vcall_offset < 0);
8884
8885      /* SCRATCH = *THIS_RTX.  */
8886      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
8887
8888      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
8889	 may not have any available scratch register at this point.  */
8890      if (SPARC_SIMM13_P (vcall_offset))
8891	;
8892      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
8893      else if (! fixed_regs[5]
8894	       /* The below sequence is made up of at least 2 insns,
8895		  while the default method may need only one.  */
8896	       && vcall_offset < -8192)
8897	{
8898	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
8899	  emit_move_insn (scratch2, vcall_offset_rtx);
8900	  vcall_offset_rtx = scratch2;
8901	}
8902      else
8903	{
8904	  rtx increment = GEN_INT (-4096);
8905
8906	  /* VCALL_OFFSET is a negative number whose typical range can be
8907	     estimated as -32768..0 in 32-bit mode.  In almost all cases
8908	     it is therefore cheaper to emit multiple add insns than
8909	     spilling and loading the constant into a register (at least
8910	     6 insns).  */
8911	  while (! SPARC_SIMM13_P (vcall_offset))
8912	    {
8913	      emit_insn (gen_add2_insn (scratch, increment));
8914	      vcall_offset += 4096;
8915	    }
8916	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
8917	}
8918
8919      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
8920      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
8921					    gen_rtx_PLUS (Pmode,
8922							  scratch,
8923							  vcall_offset_rtx)));
8924
8925      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
8926      emit_insn (gen_add2_insn (this_rtx, scratch));
8927    }
8928
8929  /* Generate a tail call to the target function.  */
8930  if (! TREE_USED (function))
8931    {
8932      assemble_external (function);
8933      TREE_USED (function) = 1;
8934    }
8935  funexp = XEXP (DECL_RTL (function), 0);
8936
8937  if (flag_delayed_branch)
8938    {
8939      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8940      insn = emit_call_insn (gen_sibcall (funexp));
8941      SIBLING_CALL_P (insn) = 1;
8942    }
8943  else
8944    {
8945      /* The hoops we have to jump through in order to generate a sibcall
8946	 without using delay slots...  */
8947      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
8948
8949      if (flag_pic)
8950        {
8951	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
8952	  start_sequence ();
8953	  /* Delay emitting the GOT helper function because it needs to
8954	     change the section and we are emitting assembly code.  */
8955	  load_got_register ();  /* clobbers %o7 */
8956	  scratch = legitimize_pic_address (funexp, scratch);
8957	  seq = get_insns ();
8958	  end_sequence ();
8959	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
8960	}
8961      else if (TARGET_ARCH32)
8962	{
8963	  emit_insn (gen_rtx_SET (VOIDmode,
8964				  scratch,
8965				  gen_rtx_HIGH (SImode, funexp)));
8966	  emit_insn (gen_rtx_SET (VOIDmode,
8967				  scratch,
8968				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
8969	}
8970      else  /* TARGET_ARCH64 */
8971        {
8972	  switch (sparc_cmodel)
8973	    {
8974	    case CM_MEDLOW:
8975	    case CM_MEDMID:
8976	      /* The destination can serve as a temporary.  */
8977	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
8978	      break;
8979
8980	    case CM_MEDANY:
8981	    case CM_EMBMEDANY:
8982	      /* The destination cannot serve as a temporary.  */
8983	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
8984	      start_sequence ();
8985	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
8986	      seq = get_insns ();
8987	      end_sequence ();
8988	      emit_and_preserve (seq, spill_reg, 0);
8989	      break;
8990
8991	    default:
8992	      gcc_unreachable ();
8993	    }
8994	}
8995
8996      emit_jump_insn (gen_indirect_jump (scratch));
8997    }
8998
8999  emit_barrier ();
9000
9001  /* Run just enough of rest_of_compilation to get the insns emitted.
9002     There's not really enough bulk here to make other passes such as
9003     instruction scheduling worth while.  Note that use_thunk calls
9004     assemble_start_function and assemble_end_function.  */
9005  insn = get_insns ();
9006  insn_locators_alloc ();
9007  shorten_branches (insn);
9008  final_start_function (insn, file, 1);
9009  final (insn, file, 1);
9010  final_end_function ();
9011
9012  reload_completed = 0;
9013  epilogue_completed = 0;
9014}
9015
9016/* Return true if sparc_output_mi_thunk would be able to output the
9017   assembler code for the thunk function specified by the arguments
9018   it is passed, and false otherwise.  */
9019static bool
9020sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
9021			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
9022			   HOST_WIDE_INT vcall_offset,
9023			   const_tree function ATTRIBUTE_UNUSED)
9024{
9025  /* Bound the loop used in the default method above.  */
9026  return (vcall_offset >= -32768 || ! fixed_regs[5]);
9027}
9028
9029/* How to allocate a 'struct machine_function'.  */
9030
9031static struct machine_function *
9032sparc_init_machine_status (void)
9033{
9034  return GGC_CNEW (struct machine_function);
9035}
9036
9037/* Locate some local-dynamic symbol still in use by this function
9038   so that we can print its name in local-dynamic base patterns.  */
9039
9040static const char *
9041get_some_local_dynamic_name (void)
9042{
9043  rtx insn;
9044
9045  if (cfun->machine->some_ld_name)
9046    return cfun->machine->some_ld_name;
9047
9048  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9049    if (INSN_P (insn)
9050	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9051      return cfun->machine->some_ld_name;
9052
9053  gcc_unreachable ();
9054}
9055
9056static int
9057get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9058{
9059  rtx x = *px;
9060
9061  if (x
9062      && GET_CODE (x) == SYMBOL_REF
9063      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9064    {
9065      cfun->machine->some_ld_name = XSTR (x, 0);
9066      return 1;
9067    }
9068
9069  return 0;
9070}
9071
9072/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
9073   This is called from dwarf2out.c to emit call frame instructions
9074   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
9075static void
9076sparc_dwarf_handle_frame_unspec (const char *label,
9077				 rtx pattern ATTRIBUTE_UNUSED,
9078				 int index ATTRIBUTE_UNUSED)
9079{
9080  gcc_assert (index == UNSPECV_SAVEW);
9081  dwarf2out_window_save (label);
9082}
9083
9084/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9085   We need to emit DTP-relative relocations.  */
9086
9087static void
9088sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
9089{
9090  switch (size)
9091    {
9092    case 4:
9093      fputs ("\t.word\t%r_tls_dtpoff32(", file);
9094      break;
9095    case 8:
9096      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9097      break;
9098    default:
9099      gcc_unreachable ();
9100    }
9101  output_addr_const (file, x);
9102  fputs (")", file);
9103}
9104
9105/* Do whatever processing is required at the end of a file.  */
9106
9107static void
9108sparc_file_end (void)
9109{
9110  /* If we need to emit the special GOT helper function, do so now.  */
9111  if (got_helper_rtx)
9112    {
9113      const char *name = XSTR (got_helper_rtx, 0);
9114      const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
9115#ifdef DWARF2_UNWIND_INFO
9116      bool do_cfi;
9117#endif
9118
9119      if (USE_HIDDEN_LINKONCE)
9120	{
9121	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9122				  get_identifier (name),
9123				  build_function_type (void_type_node,
9124						       void_list_node));
9125	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9126					   NULL_TREE, void_type_node);
9127	  TREE_PUBLIC (decl) = 1;
9128	  TREE_STATIC (decl) = 1;
9129	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
9130	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
9131	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
9132	  resolve_unique_section (decl, 0, flag_function_sections);
9133	  allocate_struct_function (decl, true);
9134	  cfun->is_thunk = 1;
9135	  current_function_decl = decl;
9136	  init_varasm_status ();
9137	  assemble_start_function (decl, name);
9138	}
9139      else
9140	{
9141	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9142          switch_to_section (text_section);
9143	  if (align > 0)
9144	    ASM_OUTPUT_ALIGN (asm_out_file, align);
9145	  ASM_OUTPUT_LABEL (asm_out_file, name);
9146	}
9147
9148#ifdef DWARF2_UNWIND_INFO
9149      do_cfi = dwarf2out_do_cfi_asm ();
9150      if (do_cfi)
9151	fprintf (asm_out_file, "\t.cfi_startproc\n");
9152#endif
9153      if (flag_delayed_branch)
9154	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
9155		 reg_name, reg_name);
9156      else
9157	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
9158		 reg_name, reg_name);
9159#ifdef DWARF2_UNWIND_INFO
9160      if (do_cfi)
9161	fprintf (asm_out_file, "\t.cfi_endproc\n");
9162#endif
9163    }
9164
9165  if (NEED_INDICATE_EXEC_STACK)
9166    file_end_indicate_exec_stack ();
9167}
9168
9169#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9170/* Implement TARGET_MANGLE_TYPE.  */
9171
9172static const char *
9173sparc_mangle_type (const_tree type)
9174{
9175  if (!TARGET_64BIT
9176      && TYPE_MAIN_VARIANT (type) == long_double_type_node
9177      && TARGET_LONG_DOUBLE_128)
9178    return "g";
9179
9180  /* For all other types, use normal C++ mangling.  */
9181  return NULL;
9182}
9183#endif
9184
9185/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9186   compare and swap on the word containing the byte or half-word.  */
9187
9188void
9189sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9190{
9191  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9192  rtx addr = gen_reg_rtx (Pmode);
9193  rtx off = gen_reg_rtx (SImode);
9194  rtx oldv = gen_reg_rtx (SImode);
9195  rtx newv = gen_reg_rtx (SImode);
9196  rtx oldvalue = gen_reg_rtx (SImode);
9197  rtx newvalue = gen_reg_rtx (SImode);
9198  rtx res = gen_reg_rtx (SImode);
9199  rtx resv = gen_reg_rtx (SImode);
9200  rtx memsi, val, mask, end_label, loop_label, cc;
9201
9202  emit_insn (gen_rtx_SET (VOIDmode, addr,
9203			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9204
9205  if (Pmode != SImode)
9206    addr1 = gen_lowpart (SImode, addr1);
9207  emit_insn (gen_rtx_SET (VOIDmode, off,
9208			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9209
9210  memsi = gen_rtx_MEM (SImode, addr);
9211  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9212  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9213
9214  val = force_reg (SImode, memsi);
9215
9216  emit_insn (gen_rtx_SET (VOIDmode, off,
9217			  gen_rtx_XOR (SImode, off,
9218				       GEN_INT (GET_MODE (mem) == QImode
9219						? 3 : 2))));
9220
9221  emit_insn (gen_rtx_SET (VOIDmode, off,
9222			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9223
9224  if (GET_MODE (mem) == QImode)
9225    mask = force_reg (SImode, GEN_INT (0xff));
9226  else
9227    mask = force_reg (SImode, GEN_INT (0xffff));
9228
9229  emit_insn (gen_rtx_SET (VOIDmode, mask,
9230			  gen_rtx_ASHIFT (SImode, mask, off)));
9231
9232  emit_insn (gen_rtx_SET (VOIDmode, val,
9233			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9234				       val)));
9235
9236  oldval = gen_lowpart (SImode, oldval);
9237  emit_insn (gen_rtx_SET (VOIDmode, oldv,
9238			  gen_rtx_ASHIFT (SImode, oldval, off)));
9239
9240  newval = gen_lowpart_common (SImode, newval);
9241  emit_insn (gen_rtx_SET (VOIDmode, newv,
9242			  gen_rtx_ASHIFT (SImode, newval, off)));
9243
9244  emit_insn (gen_rtx_SET (VOIDmode, oldv,
9245			  gen_rtx_AND (SImode, oldv, mask)));
9246
9247  emit_insn (gen_rtx_SET (VOIDmode, newv,
9248			  gen_rtx_AND (SImode, newv, mask)));
9249
9250  end_label = gen_label_rtx ();
9251  loop_label = gen_label_rtx ();
9252  emit_label (loop_label);
9253
9254  emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9255			  gen_rtx_IOR (SImode, oldv, val)));
9256
9257  emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9258			  gen_rtx_IOR (SImode, newv, val)));
9259
9260  emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9261
9262  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9263
9264  emit_insn (gen_rtx_SET (VOIDmode, resv,
9265			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9266				       res)));
9267
9268  cc = gen_compare_reg_1 (NE, resv, val);
9269  emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9270
9271  /* Use cbranchcc4 to separate the compare and branch!  */
9272  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9273				  cc, const0_rtx, loop_label));
9274
9275  emit_label (end_label);
9276
9277  emit_insn (gen_rtx_SET (VOIDmode, res,
9278			  gen_rtx_AND (SImode, res, mask)));
9279
9280  emit_insn (gen_rtx_SET (VOIDmode, res,
9281			  gen_rtx_LSHIFTRT (SImode, res, off)));
9282
9283  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9284}
9285
9286/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
9287
9288bool
9289sparc_frame_pointer_required (void)
9290{
9291  return !(current_function_is_leaf && only_leaf_regs_used ());
9292}
9293
9294/* The way this is structured, we can't eliminate SFP in favor of SP
9295   if the frame pointer is required: we want to use the SFP->HFP elimination
9296   in that case.  But the test in update_eliminables doesn't know we are
9297   assuming below that we only do the former elimination.  */
9298
9299bool
9300sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
9301{
9302  return (to == HARD_FRAME_POINTER_REGNUM
9303          || !targetm.frame_pointer_required ());
9304}
9305
9306#include "gt-sparc.h"
9307