sparc.c revision 1.8
1/* Subroutines for insn-output.c for SPARC.
2   Copyright (C) 1987-2016 Free Software Foundation, Inc.
3   Contributed by Michael Tiemann (tiemann@cygnus.com)
4   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5   at Cygnus Support.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 3, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "gimple.h"
31#include "df.h"
32#include "tm_p.h"
33#include "stringpool.h"
34#include "expmed.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic-core.h"
40#include "alias.h"
41#include "fold-const.h"
42#include "stor-layout.h"
43#include "calls.h"
44#include "varasm.h"
45#include "output.h"
46#include "insn-attr.h"
47#include "explow.h"
48#include "expr.h"
49#include "debug.h"
50#include "common/common-target.h"
51#include "gimplify.h"
52#include "langhooks.h"
53#include "reload.h"
54#include "params.h"
55#include "tree-pass.h"
56#include "context.h"
57#include "builtins.h"
58
59/* This file should be included last.  */
60#include "target-def.h"
61
62/* Processor costs */
63
64struct processor_costs {
65  /* Integer load */
66  const int int_load;
67
68  /* Integer signed load */
69  const int int_sload;
70
71  /* Integer zeroed load */
72  const int int_zload;
73
74  /* Float load */
75  const int float_load;
76
77  /* fmov, fneg, fabs */
78  const int float_move;
79
80  /* fadd, fsub */
81  const int float_plusminus;
82
83  /* fcmp */
84  const int float_cmp;
85
86  /* fmov, fmovr */
87  const int float_cmove;
88
89  /* fmul */
90  const int float_mul;
91
92  /* fdivs */
93  const int float_div_sf;
94
95  /* fdivd */
96  const int float_div_df;
97
98  /* fsqrts */
99  const int float_sqrt_sf;
100
101  /* fsqrtd */
102  const int float_sqrt_df;
103
104  /* umul/smul */
105  const int int_mul;
106
107  /* mulX */
108  const int int_mulX;
109
110  /* integer multiply cost for each bit set past the most
111     significant 3, so the formula for multiply cost becomes:
112
113	if (rs1 < 0)
114	  highest_bit = highest_clear_bit(rs1);
115	else
116	  highest_bit = highest_set_bit(rs1);
117	if (highest_bit < 3)
118	  highest_bit = 3;
119	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
120
121     A value of zero indicates that the multiply costs is fixed,
122     and not variable.  */
123  const int int_mul_bit_factor;
124
125  /* udiv/sdiv */
126  const int int_div;
127
128  /* divX */
129  const int int_divX;
130
131  /* movcc, movr */
132  const int int_cmove;
133
134  /* penalty for shifts, due to scheduling rules etc. */
135  const int shift_penalty;
136};
137
138static const
139struct processor_costs cypress_costs = {
140  COSTS_N_INSNS (2), /* int load */
141  COSTS_N_INSNS (2), /* int signed load */
142  COSTS_N_INSNS (2), /* int zeroed load */
143  COSTS_N_INSNS (2), /* float load */
144  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
145  COSTS_N_INSNS (5), /* fadd, fsub */
146  COSTS_N_INSNS (1), /* fcmp */
147  COSTS_N_INSNS (1), /* fmov, fmovr */
148  COSTS_N_INSNS (7), /* fmul */
149  COSTS_N_INSNS (37), /* fdivs */
150  COSTS_N_INSNS (37), /* fdivd */
151  COSTS_N_INSNS (63), /* fsqrts */
152  COSTS_N_INSNS (63), /* fsqrtd */
153  COSTS_N_INSNS (1), /* imul */
154  COSTS_N_INSNS (1), /* imulX */
155  0, /* imul bit factor */
156  COSTS_N_INSNS (1), /* idiv */
157  COSTS_N_INSNS (1), /* idivX */
158  COSTS_N_INSNS (1), /* movcc/movr */
159  0, /* shift penalty */
160};
161
162static const
163struct processor_costs supersparc_costs = {
164  COSTS_N_INSNS (1), /* int load */
165  COSTS_N_INSNS (1), /* int signed load */
166  COSTS_N_INSNS (1), /* int zeroed load */
167  COSTS_N_INSNS (0), /* float load */
168  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
169  COSTS_N_INSNS (3), /* fadd, fsub */
170  COSTS_N_INSNS (3), /* fcmp */
171  COSTS_N_INSNS (1), /* fmov, fmovr */
172  COSTS_N_INSNS (3), /* fmul */
173  COSTS_N_INSNS (6), /* fdivs */
174  COSTS_N_INSNS (9), /* fdivd */
175  COSTS_N_INSNS (12), /* fsqrts */
176  COSTS_N_INSNS (12), /* fsqrtd */
177  COSTS_N_INSNS (4), /* imul */
178  COSTS_N_INSNS (4), /* imulX */
179  0, /* imul bit factor */
180  COSTS_N_INSNS (4), /* idiv */
181  COSTS_N_INSNS (4), /* idivX */
182  COSTS_N_INSNS (1), /* movcc/movr */
183  1, /* shift penalty */
184};
185
186static const
187struct processor_costs hypersparc_costs = {
188  COSTS_N_INSNS (1), /* int load */
189  COSTS_N_INSNS (1), /* int signed load */
190  COSTS_N_INSNS (1), /* int zeroed load */
191  COSTS_N_INSNS (1), /* float load */
192  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
193  COSTS_N_INSNS (1), /* fadd, fsub */
194  COSTS_N_INSNS (1), /* fcmp */
195  COSTS_N_INSNS (1), /* fmov, fmovr */
196  COSTS_N_INSNS (1), /* fmul */
197  COSTS_N_INSNS (8), /* fdivs */
198  COSTS_N_INSNS (12), /* fdivd */
199  COSTS_N_INSNS (17), /* fsqrts */
200  COSTS_N_INSNS (17), /* fsqrtd */
201  COSTS_N_INSNS (17), /* imul */
202  COSTS_N_INSNS (17), /* imulX */
203  0, /* imul bit factor */
204  COSTS_N_INSNS (17), /* idiv */
205  COSTS_N_INSNS (17), /* idivX */
206  COSTS_N_INSNS (1), /* movcc/movr */
207  0, /* shift penalty */
208};
209
210static const
211struct processor_costs leon_costs = {
212  COSTS_N_INSNS (1), /* int load */
213  COSTS_N_INSNS (1), /* int signed load */
214  COSTS_N_INSNS (1), /* int zeroed load */
215  COSTS_N_INSNS (1), /* float load */
216  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
217  COSTS_N_INSNS (1), /* fadd, fsub */
218  COSTS_N_INSNS (1), /* fcmp */
219  COSTS_N_INSNS (1), /* fmov, fmovr */
220  COSTS_N_INSNS (1), /* fmul */
221  COSTS_N_INSNS (15), /* fdivs */
222  COSTS_N_INSNS (15), /* fdivd */
223  COSTS_N_INSNS (23), /* fsqrts */
224  COSTS_N_INSNS (23), /* fsqrtd */
225  COSTS_N_INSNS (5), /* imul */
226  COSTS_N_INSNS (5), /* imulX */
227  0, /* imul bit factor */
228  COSTS_N_INSNS (5), /* idiv */
229  COSTS_N_INSNS (5), /* idivX */
230  COSTS_N_INSNS (1), /* movcc/movr */
231  0, /* shift penalty */
232};
233
234static const
235struct processor_costs leon3_costs = {
236  COSTS_N_INSNS (1), /* int load */
237  COSTS_N_INSNS (1), /* int signed load */
238  COSTS_N_INSNS (1), /* int zeroed load */
239  COSTS_N_INSNS (1), /* float load */
240  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
241  COSTS_N_INSNS (1), /* fadd, fsub */
242  COSTS_N_INSNS (1), /* fcmp */
243  COSTS_N_INSNS (1), /* fmov, fmovr */
244  COSTS_N_INSNS (1), /* fmul */
245  COSTS_N_INSNS (14), /* fdivs */
246  COSTS_N_INSNS (15), /* fdivd */
247  COSTS_N_INSNS (22), /* fsqrts */
248  COSTS_N_INSNS (23), /* fsqrtd */
249  COSTS_N_INSNS (5), /* imul */
250  COSTS_N_INSNS (5), /* imulX */
251  0, /* imul bit factor */
252  COSTS_N_INSNS (35), /* idiv */
253  COSTS_N_INSNS (35), /* idivX */
254  COSTS_N_INSNS (1), /* movcc/movr */
255  0, /* shift penalty */
256};
257
258static const
259struct processor_costs sparclet_costs = {
260  COSTS_N_INSNS (3), /* int load */
261  COSTS_N_INSNS (3), /* int signed load */
262  COSTS_N_INSNS (1), /* int zeroed load */
263  COSTS_N_INSNS (1), /* float load */
264  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
265  COSTS_N_INSNS (1), /* fadd, fsub */
266  COSTS_N_INSNS (1), /* fcmp */
267  COSTS_N_INSNS (1), /* fmov, fmovr */
268  COSTS_N_INSNS (1), /* fmul */
269  COSTS_N_INSNS (1), /* fdivs */
270  COSTS_N_INSNS (1), /* fdivd */
271  COSTS_N_INSNS (1), /* fsqrts */
272  COSTS_N_INSNS (1), /* fsqrtd */
273  COSTS_N_INSNS (5), /* imul */
274  COSTS_N_INSNS (5), /* imulX */
275  0, /* imul bit factor */
276  COSTS_N_INSNS (5), /* idiv */
277  COSTS_N_INSNS (5), /* idivX */
278  COSTS_N_INSNS (1), /* movcc/movr */
279  0, /* shift penalty */
280};
281
282static const
283struct processor_costs ultrasparc_costs = {
284  COSTS_N_INSNS (2), /* int load */
285  COSTS_N_INSNS (3), /* int signed load */
286  COSTS_N_INSNS (2), /* int zeroed load */
287  COSTS_N_INSNS (2), /* float load */
288  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
289  COSTS_N_INSNS (4), /* fadd, fsub */
290  COSTS_N_INSNS (1), /* fcmp */
291  COSTS_N_INSNS (2), /* fmov, fmovr */
292  COSTS_N_INSNS (4), /* fmul */
293  COSTS_N_INSNS (13), /* fdivs */
294  COSTS_N_INSNS (23), /* fdivd */
295  COSTS_N_INSNS (13), /* fsqrts */
296  COSTS_N_INSNS (23), /* fsqrtd */
297  COSTS_N_INSNS (4), /* imul */
298  COSTS_N_INSNS (4), /* imulX */
299  2, /* imul bit factor */
300  COSTS_N_INSNS (37), /* idiv */
301  COSTS_N_INSNS (68), /* idivX */
302  COSTS_N_INSNS (2), /* movcc/movr */
303  2, /* shift penalty */
304};
305
306static const
307struct processor_costs ultrasparc3_costs = {
308  COSTS_N_INSNS (2), /* int load */
309  COSTS_N_INSNS (3), /* int signed load */
310  COSTS_N_INSNS (3), /* int zeroed load */
311  COSTS_N_INSNS (2), /* float load */
312  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
313  COSTS_N_INSNS (4), /* fadd, fsub */
314  COSTS_N_INSNS (5), /* fcmp */
315  COSTS_N_INSNS (3), /* fmov, fmovr */
316  COSTS_N_INSNS (4), /* fmul */
317  COSTS_N_INSNS (17), /* fdivs */
318  COSTS_N_INSNS (20), /* fdivd */
319  COSTS_N_INSNS (20), /* fsqrts */
320  COSTS_N_INSNS (29), /* fsqrtd */
321  COSTS_N_INSNS (6), /* imul */
322  COSTS_N_INSNS (6), /* imulX */
323  0, /* imul bit factor */
324  COSTS_N_INSNS (40), /* idiv */
325  COSTS_N_INSNS (71), /* idivX */
326  COSTS_N_INSNS (2), /* movcc/movr */
327  0, /* shift penalty */
328};
329
330static const
331struct processor_costs niagara_costs = {
332  COSTS_N_INSNS (3), /* int load */
333  COSTS_N_INSNS (3), /* int signed load */
334  COSTS_N_INSNS (3), /* int zeroed load */
335  COSTS_N_INSNS (9), /* float load */
336  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
337  COSTS_N_INSNS (8), /* fadd, fsub */
338  COSTS_N_INSNS (26), /* fcmp */
339  COSTS_N_INSNS (8), /* fmov, fmovr */
340  COSTS_N_INSNS (29), /* fmul */
341  COSTS_N_INSNS (54), /* fdivs */
342  COSTS_N_INSNS (83), /* fdivd */
343  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
344  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
345  COSTS_N_INSNS (11), /* imul */
346  COSTS_N_INSNS (11), /* imulX */
347  0, /* imul bit factor */
348  COSTS_N_INSNS (72), /* idiv */
349  COSTS_N_INSNS (72), /* idivX */
350  COSTS_N_INSNS (1), /* movcc/movr */
351  0, /* shift penalty */
352};
353
354static const
355struct processor_costs niagara2_costs = {
356  COSTS_N_INSNS (3), /* int load */
357  COSTS_N_INSNS (3), /* int signed load */
358  COSTS_N_INSNS (3), /* int zeroed load */
359  COSTS_N_INSNS (3), /* float load */
360  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
361  COSTS_N_INSNS (6), /* fadd, fsub */
362  COSTS_N_INSNS (6), /* fcmp */
363  COSTS_N_INSNS (6), /* fmov, fmovr */
364  COSTS_N_INSNS (6), /* fmul */
365  COSTS_N_INSNS (19), /* fdivs */
366  COSTS_N_INSNS (33), /* fdivd */
367  COSTS_N_INSNS (19), /* fsqrts */
368  COSTS_N_INSNS (33), /* fsqrtd */
369  COSTS_N_INSNS (5), /* imul */
370  COSTS_N_INSNS (5), /* imulX */
371  0, /* imul bit factor */
372  COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
373  COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
374  COSTS_N_INSNS (1), /* movcc/movr */
375  0, /* shift penalty */
376};
377
378static const
379struct processor_costs niagara3_costs = {
380  COSTS_N_INSNS (3), /* int load */
381  COSTS_N_INSNS (3), /* int signed load */
382  COSTS_N_INSNS (3), /* int zeroed load */
383  COSTS_N_INSNS (3), /* float load */
384  COSTS_N_INSNS (9), /* fmov, fneg, fabs */
385  COSTS_N_INSNS (9), /* fadd, fsub */
386  COSTS_N_INSNS (9), /* fcmp */
387  COSTS_N_INSNS (9), /* fmov, fmovr */
388  COSTS_N_INSNS (9), /* fmul */
389  COSTS_N_INSNS (23), /* fdivs */
390  COSTS_N_INSNS (37), /* fdivd */
391  COSTS_N_INSNS (23), /* fsqrts */
392  COSTS_N_INSNS (37), /* fsqrtd */
393  COSTS_N_INSNS (9), /* imul */
394  COSTS_N_INSNS (9), /* imulX */
395  0, /* imul bit factor */
396  COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
397  COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
398  COSTS_N_INSNS (1), /* movcc/movr */
399  0, /* shift penalty */
400};
401
402static const
403struct processor_costs niagara4_costs = {
404  COSTS_N_INSNS (5), /* int load */
405  COSTS_N_INSNS (5), /* int signed load */
406  COSTS_N_INSNS (5), /* int zeroed load */
407  COSTS_N_INSNS (5), /* float load */
408  COSTS_N_INSNS (11), /* fmov, fneg, fabs */
409  COSTS_N_INSNS (11), /* fadd, fsub */
410  COSTS_N_INSNS (11), /* fcmp */
411  COSTS_N_INSNS (11), /* fmov, fmovr */
412  COSTS_N_INSNS (11), /* fmul */
413  COSTS_N_INSNS (24), /* fdivs */
414  COSTS_N_INSNS (37), /* fdivd */
415  COSTS_N_INSNS (24), /* fsqrts */
416  COSTS_N_INSNS (37), /* fsqrtd */
417  COSTS_N_INSNS (12), /* imul */
418  COSTS_N_INSNS (12), /* imulX */
419  0, /* imul bit factor */
420  COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
421  COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
422  COSTS_N_INSNS (1), /* movcc/movr */
423  0, /* shift penalty */
424};
425
426static const
427struct processor_costs niagara7_costs = {
428  COSTS_N_INSNS (5), /* int load */
429  COSTS_N_INSNS (5), /* int signed load */
430  COSTS_N_INSNS (5), /* int zeroed load */
431  COSTS_N_INSNS (5), /* float load */
432  COSTS_N_INSNS (11), /* fmov, fneg, fabs */
433  COSTS_N_INSNS (11), /* fadd, fsub */
434  COSTS_N_INSNS (11), /* fcmp */
435  COSTS_N_INSNS (11), /* fmov, fmovr */
436  COSTS_N_INSNS (11), /* fmul */
437  COSTS_N_INSNS (24), /* fdivs */
438  COSTS_N_INSNS (37), /* fdivd */
439  COSTS_N_INSNS (24), /* fsqrts */
440  COSTS_N_INSNS (37), /* fsqrtd */
441  COSTS_N_INSNS (12), /* imul */
442  COSTS_N_INSNS (12), /* imulX */
443  0, /* imul bit factor */
444  COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
445  COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
446  COSTS_N_INSNS (1), /* movcc/movr */
447  0, /* shift penalty */
448};
449
450static const struct processor_costs *sparc_costs = &cypress_costs;
451
452#ifdef HAVE_AS_RELAX_OPTION
453/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
454   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
455   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
456   somebody does not branch between the sethi and jmp.  */
457#define LEAF_SIBCALL_SLOT_RESERVED_P 1
458#else
459#define LEAF_SIBCALL_SLOT_RESERVED_P \
460  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
461#endif
462
463/* Vector to say how input registers are mapped to output registers.
464   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
465   eliminate it.  You must use -fomit-frame-pointer to get that.  */
466char leaf_reg_remap[] =
467{ 0, 1, 2, 3, 4, 5, 6, 7,
468  -1, -1, -1, -1, -1, -1, 14, -1,
469  -1, -1, -1, -1, -1, -1, -1, -1,
470  8, 9, 10, 11, 12, 13, -1, 15,
471
472  32, 33, 34, 35, 36, 37, 38, 39,
473  40, 41, 42, 43, 44, 45, 46, 47,
474  48, 49, 50, 51, 52, 53, 54, 55,
475  56, 57, 58, 59, 60, 61, 62, 63,
476  64, 65, 66, 67, 68, 69, 70, 71,
477  72, 73, 74, 75, 76, 77, 78, 79,
478  80, 81, 82, 83, 84, 85, 86, 87,
479  88, 89, 90, 91, 92, 93, 94, 95,
480  96, 97, 98, 99, 100, 101, 102};
481
482/* Vector, indexed by hard register number, which contains 1
483   for a register that is allowable in a candidate for leaf
484   function treatment.  */
485char sparc_leaf_regs[] =
486{ 1, 1, 1, 1, 1, 1, 1, 1,
487  0, 0, 0, 0, 0, 0, 1, 0,
488  0, 0, 0, 0, 0, 0, 0, 0,
489  1, 1, 1, 1, 1, 1, 0, 1,
490  1, 1, 1, 1, 1, 1, 1, 1,
491  1, 1, 1, 1, 1, 1, 1, 1,
492  1, 1, 1, 1, 1, 1, 1, 1,
493  1, 1, 1, 1, 1, 1, 1, 1,
494  1, 1, 1, 1, 1, 1, 1, 1,
495  1, 1, 1, 1, 1, 1, 1, 1,
496  1, 1, 1, 1, 1, 1, 1, 1,
497  1, 1, 1, 1, 1, 1, 1, 1,
498  1, 1, 1, 1, 1, 1, 1};
499
500struct GTY(()) machine_function
501{
502  /* Size of the frame of the function.  */
503  HOST_WIDE_INT frame_size;
504
505  /* Size of the frame of the function minus the register window save area
506     and the outgoing argument area.  */
507  HOST_WIDE_INT apparent_frame_size;
508
509  /* Register we pretend the frame pointer is allocated to.  Normally, this
510     is %fp, but if we are in a leaf procedure, this is (%sp + offset).  We
511     record "offset" separately as it may be too big for (reg + disp).  */
512  rtx frame_base_reg;
513  HOST_WIDE_INT frame_base_offset;
514
515  /* Number of global or FP registers to be saved (as 4-byte quantities).  */
516  int n_global_fp_regs;
517
518  /* True if the current function is leaf and uses only leaf regs,
519     so that the SPARC leaf function optimization can be applied.
520     Private version of crtl->uses_only_leaf_regs, see
521     sparc_expand_prologue for the rationale.  */
522  int leaf_function_p;
523
524  /* True if the prologue saves local or in registers.  */
525  bool save_local_in_regs_p;
526
527  /* True if the data calculated by sparc_expand_prologue are valid.  */
528  bool prologue_data_valid_p;
529};
530
531#define sparc_frame_size		cfun->machine->frame_size
532#define sparc_apparent_frame_size	cfun->machine->apparent_frame_size
533#define sparc_frame_base_reg		cfun->machine->frame_base_reg
534#define sparc_frame_base_offset		cfun->machine->frame_base_offset
535#define sparc_n_global_fp_regs		cfun->machine->n_global_fp_regs
536#define sparc_leaf_function_p		cfun->machine->leaf_function_p
537#define sparc_save_local_in_regs_p	cfun->machine->save_local_in_regs_p
538#define sparc_prologue_data_valid_p	cfun->machine->prologue_data_valid_p
539
540/* 1 if the next opcode is to be specially indented.  */
541int sparc_indent_opcode = 0;
542
543static void sparc_option_override (void);
544static void sparc_init_modes (void);
545static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
546				const_tree, bool, bool, int *, int *);
547
548static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
549static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
550
551static void sparc_emit_set_const32 (rtx, rtx);
552static void sparc_emit_set_const64 (rtx, rtx);
553static void sparc_output_addr_vec (rtx);
554static void sparc_output_addr_diff_vec (rtx);
555static void sparc_output_deferred_case_vectors (void);
556static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
557static bool sparc_legitimate_constant_p (machine_mode, rtx);
558static rtx sparc_builtin_saveregs (void);
559static int epilogue_renumber (rtx *, int);
560static bool sparc_assemble_integer (rtx, unsigned int, int);
561static int set_extends (rtx_insn *);
562static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
563static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
564#ifdef TARGET_SOLARIS
565static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
566						 tree) ATTRIBUTE_UNUSED;
567#endif
568static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
569static int sparc_issue_rate (void);
570static void sparc_sched_init (FILE *, int, int);
571static int sparc_use_sched_lookahead (void);
572
573static void emit_soft_tfmode_libcall (const char *, int, rtx *);
574static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
575static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
576static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
577static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
578
579static bool sparc_function_ok_for_sibcall (tree, tree);
580static void sparc_init_libfuncs (void);
581static void sparc_init_builtins (void);
582static void sparc_fpu_init_builtins (void);
583static void sparc_vis_init_builtins (void);
584static tree sparc_builtin_decl (unsigned, bool);
585static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
586static tree sparc_fold_builtin (tree, int, tree *, bool);
587static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
588				   HOST_WIDE_INT, tree);
589static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
590				       HOST_WIDE_INT, const_tree);
591static struct machine_function * sparc_init_machine_status (void);
592static bool sparc_cannot_force_const_mem (machine_mode, rtx);
593static rtx sparc_tls_get_addr (void);
594static rtx sparc_tls_got (void);
595static int sparc_register_move_cost (machine_mode,
596				     reg_class_t, reg_class_t);
597static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
598static rtx sparc_function_value (const_tree, const_tree, bool);
599static rtx sparc_libcall_value (machine_mode, const_rtx);
600static bool sparc_function_value_regno_p (const unsigned int);
601static rtx sparc_struct_value_rtx (tree, int);
602static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
603						      int *, const_tree, int);
604static bool sparc_return_in_memory (const_tree, const_tree);
605static bool sparc_strict_argument_naming (cumulative_args_t);
606static void sparc_va_start (tree, rtx);
607static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
608static bool sparc_vector_mode_supported_p (machine_mode);
609static bool sparc_tls_referenced_p (rtx);
610static rtx sparc_legitimize_tls_address (rtx);
611static rtx sparc_legitimize_pic_address (rtx, rtx);
612static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
613static rtx sparc_delegitimize_address (rtx);
614static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
615static bool sparc_pass_by_reference (cumulative_args_t,
616				     machine_mode, const_tree, bool);
617static void sparc_function_arg_advance (cumulative_args_t,
618					machine_mode, const_tree, bool);
619static rtx sparc_function_arg_1 (cumulative_args_t,
620				 machine_mode, const_tree, bool, bool);
621static rtx sparc_function_arg (cumulative_args_t,
622			       machine_mode, const_tree, bool);
623static rtx sparc_function_incoming_arg (cumulative_args_t,
624					machine_mode, const_tree, bool);
625static unsigned int sparc_function_arg_boundary (machine_mode,
626						 const_tree);
627static int sparc_arg_partial_bytes (cumulative_args_t,
628				    machine_mode, tree, bool);
629static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
630static void sparc_file_end (void);
631static bool sparc_frame_pointer_required (void);
632static bool sparc_can_eliminate (const int, const int);
633static rtx sparc_builtin_setjmp_frame_value (void);
634static void sparc_conditional_register_usage (void);
635#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
636static const char *sparc_mangle_type (const_tree);
637#endif
638static void sparc_trampoline_init (rtx, tree, rtx);
639static machine_mode sparc_preferred_simd_mode (machine_mode);
640static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
641static bool sparc_print_operand_punct_valid_p (unsigned char);
642static void sparc_print_operand (FILE *, rtx, int);
643static void sparc_print_operand_address (FILE *, machine_mode, rtx);
644static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
645					   machine_mode,
646					   secondary_reload_info *);
647static machine_mode sparc_cstore_mode (enum insn_code icode);
648static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
649
650#ifdef SUBTARGET_ATTRIBUTE_TABLE
651/* Table of valid machine attributes.  */
652static const struct attribute_spec sparc_attribute_table[] =
653{
654  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
655       do_diagnostic } */
656  SUBTARGET_ATTRIBUTE_TABLE,
657  { NULL,        0, 0, false, false, false, NULL, false }
658};
659#endif
660
661/* Option handling.  */
662
663/* Parsed value.  */
664enum cmodel sparc_cmodel;
665
666char sparc_hard_reg_printed[8];
667
668/* Initialize the GCC target structure.  */
669
670/* The default is to use .half rather than .short for aligned HI objects.  */
671#undef TARGET_ASM_ALIGNED_HI_OP
672#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
673
674#undef TARGET_ASM_UNALIGNED_HI_OP
675#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
676#undef TARGET_ASM_UNALIGNED_SI_OP
677#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
678#undef TARGET_ASM_UNALIGNED_DI_OP
679#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
680
681/* The target hook has to handle DI-mode values.  */
682#undef TARGET_ASM_INTEGER
683#define TARGET_ASM_INTEGER sparc_assemble_integer
684
685#undef TARGET_ASM_FUNCTION_PROLOGUE
686#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
687#undef TARGET_ASM_FUNCTION_EPILOGUE
688#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
689
690#undef TARGET_SCHED_ADJUST_COST
691#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
692#undef TARGET_SCHED_ISSUE_RATE
693#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
694#undef TARGET_SCHED_INIT
695#define TARGET_SCHED_INIT sparc_sched_init
696#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
697#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
698
699#undef TARGET_FUNCTION_OK_FOR_SIBCALL
700#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
701
702#undef TARGET_INIT_LIBFUNCS
703#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
704
705#undef TARGET_LEGITIMIZE_ADDRESS
706#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
707#undef TARGET_DELEGITIMIZE_ADDRESS
708#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
709#undef TARGET_MODE_DEPENDENT_ADDRESS_P
710#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
711
712#undef TARGET_INIT_BUILTINS
713#define TARGET_INIT_BUILTINS sparc_init_builtins
714#undef TARGET_BUILTIN_DECL
715#define TARGET_BUILTIN_DECL sparc_builtin_decl
716#undef TARGET_EXPAND_BUILTIN
717#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
718#undef TARGET_FOLD_BUILTIN
719#define TARGET_FOLD_BUILTIN sparc_fold_builtin
720
721#if TARGET_TLS
722#undef TARGET_HAVE_TLS
723#define TARGET_HAVE_TLS true
724#endif
725
726#undef TARGET_CANNOT_FORCE_CONST_MEM
727#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
728
729#undef TARGET_ASM_OUTPUT_MI_THUNK
730#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
731#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
732#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
733
734#undef TARGET_RTX_COSTS
735#define TARGET_RTX_COSTS sparc_rtx_costs
736#undef TARGET_ADDRESS_COST
737#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
738#undef TARGET_REGISTER_MOVE_COST
739#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
740
741#undef TARGET_PROMOTE_FUNCTION_MODE
742#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
743
744#undef TARGET_FUNCTION_VALUE
745#define TARGET_FUNCTION_VALUE sparc_function_value
746#undef TARGET_LIBCALL_VALUE
747#define TARGET_LIBCALL_VALUE sparc_libcall_value
748#undef TARGET_FUNCTION_VALUE_REGNO_P
749#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
750
751#undef TARGET_STRUCT_VALUE_RTX
752#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
753#undef TARGET_RETURN_IN_MEMORY
754#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
755#undef TARGET_MUST_PASS_IN_STACK
756#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
757#undef TARGET_PASS_BY_REFERENCE
758#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
759#undef TARGET_ARG_PARTIAL_BYTES
760#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
761#undef TARGET_FUNCTION_ARG_ADVANCE
762#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
763#undef TARGET_FUNCTION_ARG
764#define TARGET_FUNCTION_ARG sparc_function_arg
765#undef TARGET_FUNCTION_INCOMING_ARG
766#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
767#undef TARGET_FUNCTION_ARG_BOUNDARY
768#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
769
770#undef TARGET_EXPAND_BUILTIN_SAVEREGS
771#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
772#undef TARGET_STRICT_ARGUMENT_NAMING
773#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
774
775#undef TARGET_EXPAND_BUILTIN_VA_START
776#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
777#undef TARGET_GIMPLIFY_VA_ARG_EXPR
778#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
779
780#undef TARGET_VECTOR_MODE_SUPPORTED_P
781#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
782
783#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
784#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
785
786#ifdef SUBTARGET_INSERT_ATTRIBUTES
787#undef TARGET_INSERT_ATTRIBUTES
788#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
789#endif
790
791#ifdef SUBTARGET_ATTRIBUTE_TABLE
792#undef TARGET_ATTRIBUTE_TABLE
793#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
794#endif
795
796#undef TARGET_OPTION_OVERRIDE
797#define TARGET_OPTION_OVERRIDE sparc_option_override
798
799#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
800#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
801#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
802#endif
803
804#undef TARGET_ASM_FILE_END
805#define TARGET_ASM_FILE_END sparc_file_end
806
807#undef TARGET_FRAME_POINTER_REQUIRED
808#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
809
810#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
811#define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
812
813#undef TARGET_CAN_ELIMINATE
814#define TARGET_CAN_ELIMINATE sparc_can_eliminate
815
816#undef  TARGET_PREFERRED_RELOAD_CLASS
817#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
818
819#undef TARGET_SECONDARY_RELOAD
820#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
821
822#undef TARGET_CONDITIONAL_REGISTER_USAGE
823#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
824
825#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
826#undef TARGET_MANGLE_TYPE
827#define TARGET_MANGLE_TYPE sparc_mangle_type
828#endif
829
830#undef TARGET_LEGITIMATE_ADDRESS_P
831#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
832
833#undef TARGET_LEGITIMATE_CONSTANT_P
834#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
835
836#undef TARGET_TRAMPOLINE_INIT
837#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
838
839#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
840#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
841#undef TARGET_PRINT_OPERAND
842#define TARGET_PRINT_OPERAND sparc_print_operand
843#undef TARGET_PRINT_OPERAND_ADDRESS
844#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
845
846/* The value stored by LDSTUB.  */
847#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
848#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
849
850#undef TARGET_CSTORE_MODE
851#define TARGET_CSTORE_MODE sparc_cstore_mode
852
853#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
854#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
855
856struct gcc_target targetm = TARGET_INITIALIZER;
857
858/* Return the memory reference contained in X if any, zero otherwise.  */
859
860static rtx
861mem_ref (rtx x)
862{
863  if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
864    x = XEXP (x, 0);
865
866  if (MEM_P (x))
867    return x;
868
869  return NULL_RTX;
870}
871
872/* We use a machine specific pass to enable workarounds for errata.
873   We need to have the (essentially) final form of the insn stream in order
874   to properly detect the various hazards.  Therefore, this machine specific
875   pass runs as late as possible.  The pass is inserted in the pass pipeline
876   at the end of sparc_option_override.  */
877
878static unsigned int
879sparc_do_work_around_errata (void)
880{
881  rtx_insn *insn, *next;
882
883  /* Force all instructions to be split into their final form.  */
884  split_all_insns_noflow ();
885
886  /* Now look for specific patterns in the insn stream.  */
887  for (insn = get_insns (); insn; insn = next)
888    {
889      bool insert_nop = false;
890      rtx set;
891
892      /* Look into the instruction in a delay slot.  */
893      if (NONJUMP_INSN_P (insn))
894	if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
895	  insn = seq->insn (1);
896
897      /* Look for a single-word load into an odd-numbered FP register.  */
898      if (sparc_fix_at697f
899	  && NONJUMP_INSN_P (insn)
900	  && (set = single_set (insn)) != NULL_RTX
901	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
902	  && MEM_P (SET_SRC (set))
903	  && REG_P (SET_DEST (set))
904	  && REGNO (SET_DEST (set)) > 31
905	  && REGNO (SET_DEST (set)) % 2 != 0)
906	{
907	  /* The wrong dependency is on the enclosing double register.  */
908	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
909	  unsigned int src1, src2, dest;
910	  int code;
911
912	  next = next_active_insn (insn);
913	  if (!next)
914	    break;
915	  /* If the insn is a branch, then it cannot be problematic.  */
916	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
917	    continue;
918
919	  extract_insn (next);
920	  code = INSN_CODE (next);
921
922	  switch (code)
923	    {
924	    case CODE_FOR_adddf3:
925	    case CODE_FOR_subdf3:
926	    case CODE_FOR_muldf3:
927	    case CODE_FOR_divdf3:
928	      dest = REGNO (recog_data.operand[0]);
929	      src1 = REGNO (recog_data.operand[1]);
930	      src2 = REGNO (recog_data.operand[2]);
931	      if (src1 != src2)
932		{
933		  /* Case [1-4]:
934				 ld [address], %fx+1
935				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
936		  if ((src1 == x || src2 == x)
937		      && (dest == src1 || dest == src2))
938		    insert_nop = true;
939		}
940	      else
941		{
942		  /* Case 5:
943			     ld [address], %fx+1
944			     FPOPd %fx, %fx, %fx  */
945		  if (src1 == x
946		      && dest == src1
947		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
948		    insert_nop = true;
949		}
950	      break;
951
952	    case CODE_FOR_sqrtdf2:
953	      dest = REGNO (recog_data.operand[0]);
954	      src1 = REGNO (recog_data.operand[1]);
955	      /* Case 6:
956			 ld [address], %fx+1
957			 fsqrtd %fx, %fx  */
958	      if (src1 == x && dest == src1)
959		insert_nop = true;
960	      break;
961
962	    default:
963	      break;
964	    }
965	}
966
967      /* Look for a single-word load into an integer register.  */
968      else if (sparc_fix_ut699
969	       && NONJUMP_INSN_P (insn)
970	       && (set = single_set (insn)) != NULL_RTX
971	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
972	       && (mem_ref (SET_SRC (set)) != NULL_RTX
973		   || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
974	       && REG_P (SET_DEST (set))
975	       && REGNO (SET_DEST (set)) < 32)
976	{
977	  /* There is no problem if the second memory access has a data
978	     dependency on the first single-cycle load.  */
979	  rtx x = SET_DEST (set);
980
981	  next = next_active_insn (insn);
982	  if (!next)
983	    break;
984	  /* If the insn is a branch, then it cannot be problematic.  */
985	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
986	    continue;
987
988	  /* Look for a second memory access to/from an integer register.  */
989	  if ((set = single_set (next)) != NULL_RTX)
990	    {
991	      rtx src = SET_SRC (set);
992	      rtx dest = SET_DEST (set);
993	      rtx mem;
994
995	      /* LDD is affected.  */
996	      if ((mem = mem_ref (src)) != NULL_RTX
997		  && REG_P (dest)
998		  && REGNO (dest) < 32
999		  && !reg_mentioned_p (x, XEXP (mem, 0)))
1000		insert_nop = true;
1001
1002	      /* STD is *not* affected.  */
1003	      else if (MEM_P (dest)
1004		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1005		       && (src == CONST0_RTX (GET_MODE (dest))
1006			   || (REG_P (src)
1007			       && REGNO (src) < 32
1008			       && REGNO (src) != REGNO (x)))
1009		       && !reg_mentioned_p (x, XEXP (dest, 0)))
1010		insert_nop = true;
1011
1012	      /* GOT accesses uses LD.  */
1013	      else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1014		       && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1015		insert_nop = true;
1016	    }
1017	}
1018
1019      /* Look for a single-word load/operation into an FP register.  */
1020      else if (sparc_fix_ut699
1021	       && NONJUMP_INSN_P (insn)
1022	       && (set = single_set (insn)) != NULL_RTX
1023	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1024	       && REG_P (SET_DEST (set))
1025	       && REGNO (SET_DEST (set)) > 31)
1026	{
1027	  /* Number of instructions in the problematic window.  */
1028	  const int n_insns = 4;
1029	  /* The problematic combination is with the sibling FP register.  */
1030	  const unsigned int x = REGNO (SET_DEST (set));
1031	  const unsigned int y = x ^ 1;
1032	  rtx_insn *after;
1033	  int i;
1034
1035	  next = next_active_insn (insn);
1036	  if (!next)
1037	    break;
1038	  /* If the insn is a branch, then it cannot be problematic.  */
1039	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1040	    continue;
1041
1042	  /* Look for a second load/operation into the sibling FP register.  */
1043	  if (!((set = single_set (next)) != NULL_RTX
1044		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1045		&& REG_P (SET_DEST (set))
1046		&& REGNO (SET_DEST (set)) == y))
1047	    continue;
1048
1049	  /* Look for a (possible) store from the FP register in the next N
1050	     instructions, but bail out if it is again modified or if there
1051	     is a store from the sibling FP register before this store.  */
1052	  for (after = next, i = 0; i < n_insns; i++)
1053	    {
1054	      bool branch_p;
1055
1056	      after = next_active_insn (after);
1057	      if (!after)
1058		break;
1059
1060	      /* This is a branch with an empty delay slot.  */
1061	      if (!NONJUMP_INSN_P (after))
1062		{
1063		  if (++i == n_insns)
1064		    break;
1065		  branch_p = true;
1066		  after = NULL;
1067		}
1068	      /* This is a branch with a filled delay slot.  */
1069	      else if (rtx_sequence *seq =
1070		         dyn_cast <rtx_sequence *> (PATTERN (after)))
1071		{
1072		  if (++i == n_insns)
1073		    break;
1074		  branch_p = true;
1075		  after = seq->insn (1);
1076		}
1077	      /* This is a regular instruction.  */
1078	      else
1079		branch_p = false;
1080
1081	      if (after && (set = single_set (after)) != NULL_RTX)
1082		{
1083		  const rtx src = SET_SRC (set);
1084		  const rtx dest = SET_DEST (set);
1085		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1086
1087		  /* If the FP register is again modified before the store,
1088		     then the store isn't affected.  */
1089		  if (REG_P (dest)
1090		      && (REGNO (dest) == x
1091			  || (REGNO (dest) == y && size == 8)))
1092		    break;
1093
1094		  if (MEM_P (dest) && REG_P (src))
1095		    {
1096		      /* If there is a store from the sibling FP register
1097			 before the store, then the store is not affected.  */
1098		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1099			break;
1100
1101		      /* Otherwise, the store is affected.  */
1102		      if (REGNO (src) == x && size == 4)
1103			{
1104			  insert_nop = true;
1105			  break;
1106			}
1107		    }
1108		}
1109
1110	      /* If we have a branch in the first M instructions, then we
1111		 cannot see the (M+2)th instruction so we play safe.  */
1112	      if (branch_p && i <= (n_insns - 2))
1113		{
1114		  insert_nop = true;
1115		  break;
1116		}
1117	    }
1118	}
1119
1120      else
1121	next = NEXT_INSN (insn);
1122
1123      if (insert_nop)
1124	emit_insn_before (gen_nop (), next);
1125    }
1126
1127  return 0;
1128}
1129
1130namespace {
1131
1132const pass_data pass_data_work_around_errata =
1133{
1134  RTL_PASS, /* type */
1135  "errata", /* name */
1136  OPTGROUP_NONE, /* optinfo_flags */
1137  TV_MACH_DEP, /* tv_id */
1138  0, /* properties_required */
1139  0, /* properties_provided */
1140  0, /* properties_destroyed */
1141  0, /* todo_flags_start */
1142  0, /* todo_flags_finish */
1143};
1144
1145class pass_work_around_errata : public rtl_opt_pass
1146{
1147public:
1148  pass_work_around_errata(gcc::context *ctxt)
1149    : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1150  {}
1151
1152  /* opt_pass methods: */
1153  virtual bool gate (function *)
1154    {
1155      /* The only errata we handle are those of the AT697F and UT699.  */
1156      return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1157    }
1158
1159  virtual unsigned int execute (function *)
1160    {
1161      return sparc_do_work_around_errata ();
1162    }
1163
1164}; // class pass_work_around_errata
1165
1166} // anon namespace
1167
1168rtl_opt_pass *
1169make_pass_work_around_errata (gcc::context *ctxt)
1170{
1171  return new pass_work_around_errata (ctxt);
1172}
1173
1174/* Helpers for TARGET_DEBUG_OPTIONS.  */
1175static void
1176dump_target_flag_bits (const int flags)
1177{
1178  if (flags & MASK_64BIT)
1179    fprintf (stderr, "64BIT ");
1180  if (flags & MASK_APP_REGS)
1181    fprintf (stderr, "APP_REGS ");
1182  if (flags & MASK_FASTER_STRUCTS)
1183    fprintf (stderr, "FASTER_STRUCTS ");
1184  if (flags & MASK_FLAT)
1185    fprintf (stderr, "FLAT ");
1186  if (flags & MASK_FMAF)
1187    fprintf (stderr, "FMAF ");
1188  if (flags & MASK_FPU)
1189    fprintf (stderr, "FPU ");
1190  if (flags & MASK_HARD_QUAD)
1191    fprintf (stderr, "HARD_QUAD ");
1192  if (flags & MASK_POPC)
1193    fprintf (stderr, "POPC ");
1194  if (flags & MASK_PTR64)
1195    fprintf (stderr, "PTR64 ");
1196  if (flags & MASK_STACK_BIAS)
1197    fprintf (stderr, "STACK_BIAS ");
1198  if (flags & MASK_UNALIGNED_DOUBLES)
1199    fprintf (stderr, "UNALIGNED_DOUBLES ");
1200  if (flags & MASK_V8PLUS)
1201    fprintf (stderr, "V8PLUS ");
1202  if (flags & MASK_VIS)
1203    fprintf (stderr, "VIS ");
1204  if (flags & MASK_VIS2)
1205    fprintf (stderr, "VIS2 ");
1206  if (flags & MASK_VIS3)
1207    fprintf (stderr, "VIS3 ");
1208  if (flags & MASK_VIS4)
1209    fprintf (stderr, "VIS4 ");
1210  if (flags & MASK_CBCOND)
1211    fprintf (stderr, "CBCOND ");
1212  if (flags & MASK_DEPRECATED_V8_INSNS)
1213    fprintf (stderr, "DEPRECATED_V8_INSNS ");
1214  if (flags & MASK_SPARCLET)
1215    fprintf (stderr, "SPARCLET ");
1216  if (flags & MASK_SPARCLITE)
1217    fprintf (stderr, "SPARCLITE ");
1218  if (flags & MASK_V8)
1219    fprintf (stderr, "V8 ");
1220  if (flags & MASK_V9)
1221    fprintf (stderr, "V9 ");
1222}
1223
1224static void
1225dump_target_flags (const char *prefix, const int flags)
1226{
1227  fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1228  dump_target_flag_bits (flags);
1229  fprintf(stderr, "]\n");
1230}
1231
1232/* Validate and override various options, and do some machine dependent
1233   initialization.  */
1234
1235static void
1236sparc_option_override (void)
1237{
1238  static struct code_model {
1239    const char *const name;
1240    const enum cmodel value;
1241  } const cmodels[] = {
1242    { "32", CM_32 },
1243    { "medlow", CM_MEDLOW },
1244    { "medmid", CM_MEDMID },
1245    { "medany", CM_MEDANY },
1246    { "embmedany", CM_EMBMEDANY },
1247    { NULL, (enum cmodel) 0 }
1248  };
1249  const struct code_model *cmodel;
1250  /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
1251  static struct cpu_default {
1252    const int cpu;
1253    const enum processor_type processor;
1254  } const cpu_default[] = {
1255    /* There must be one entry here for each TARGET_CPU value.  */
1256    { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1257    { TARGET_CPU_v8, PROCESSOR_V8 },
1258    { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1259    { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1260    { TARGET_CPU_leon, PROCESSOR_LEON },
1261    { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1262    { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1263    { TARGET_CPU_sparclite, PROCESSOR_F930 },
1264    { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1265    { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1266    { TARGET_CPU_v9, PROCESSOR_V9 },
1267    { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1268    { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1269    { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1270    { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1271    { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1272    { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1273    { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1274    { -1, PROCESSOR_V7 }
1275  };
1276  const struct cpu_default *def;
1277  /* Table of values for -m{cpu,tune}=.  This must match the order of
1278     the enum processor_type in sparc-opts.h.  */
1279  static struct cpu_table {
1280    const char *const name;
1281    const int disable;
1282    const int enable;
1283  } const cpu_table[] = {
1284    { "v7",		MASK_ISA, 0 },
1285    { "cypress",	MASK_ISA, 0 },
1286    { "v8",		MASK_ISA, MASK_V8 },
1287    /* TI TMS390Z55 supersparc */
1288    { "supersparc",	MASK_ISA, MASK_V8 },
1289    { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
1290    { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1291    { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1292    { "leon3v7",	MASK_ISA, MASK_LEON3|MASK_FPU },
1293    { "sparclite",	MASK_ISA, MASK_SPARCLITE },
1294    /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
1295    { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1296    /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
1297    { "f934",		MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1298    { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1299    { "sparclet",	MASK_ISA, MASK_SPARCLET },
1300    /* TEMIC sparclet */
1301    { "tsc701",		MASK_ISA, MASK_SPARCLET },
1302    { "v9",		MASK_ISA, MASK_V9 },
1303    /* UltraSPARC I, II, IIi */
1304    { "ultrasparc",	MASK_ISA,
1305    /* Although insns using %y are deprecated, it is a clear win.  */
1306      MASK_V9|MASK_DEPRECATED_V8_INSNS },
1307    /* UltraSPARC III */
1308    /* ??? Check if %y issue still holds true.  */
1309    { "ultrasparc3",	MASK_ISA,
1310      MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1311    /* UltraSPARC T1 */
1312    { "niagara",	MASK_ISA,
1313      MASK_V9|MASK_DEPRECATED_V8_INSNS },
1314    /* UltraSPARC T2 */
1315    { "niagara2",	MASK_ISA,
1316      MASK_V9|MASK_POPC|MASK_VIS2 },
1317    /* UltraSPARC T3 */
1318    { "niagara3",	MASK_ISA,
1319      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1320    /* UltraSPARC T4 */
1321    { "niagara4",	MASK_ISA,
1322      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1323    /* UltraSPARC M7 */
1324    { "niagara7",	MASK_ISA,
1325      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_VIS4|MASK_FMAF|MASK_CBCOND },
1326  };
1327  const struct cpu_table *cpu;
1328  unsigned int i;
1329  int fpu;
1330
1331  if (sparc_debug_string != NULL)
1332    {
1333      const char *q;
1334      char *p;
1335
1336      p = ASTRDUP (sparc_debug_string);
1337      while ((q = strtok (p, ",")) != NULL)
1338	{
1339	  bool invert;
1340	  int mask;
1341
1342	  p = NULL;
1343	  if (*q == '!')
1344	    {
1345	      invert = true;
1346	      q++;
1347	    }
1348	  else
1349	    invert = false;
1350
1351	  if (! strcmp (q, "all"))
1352	    mask = MASK_DEBUG_ALL;
1353	  else if (! strcmp (q, "options"))
1354	    mask = MASK_DEBUG_OPTIONS;
1355	  else
1356	    error ("unknown -mdebug-%s switch", q);
1357
1358	  if (invert)
1359	    sparc_debug &= ~mask;
1360	  else
1361	    sparc_debug |= mask;
1362	}
1363    }
1364
1365  if (TARGET_DEBUG_OPTIONS)
1366    {
1367      dump_target_flags("Initial target_flags", target_flags);
1368      dump_target_flags("target_flags_explicit", target_flags_explicit);
1369    }
1370
1371#ifdef SUBTARGET_OVERRIDE_OPTIONS
1372  SUBTARGET_OVERRIDE_OPTIONS;
1373#endif
1374
1375#ifndef SPARC_BI_ARCH
1376  /* Check for unsupported architecture size.  */
1377  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1378    error ("%s is not supported by this configuration",
1379	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
1380#endif
1381
1382  /* We force all 64bit archs to use 128 bit long double */
1383  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1384    {
1385      error ("-mlong-double-64 not allowed with -m64");
1386      target_flags |= MASK_LONG_DOUBLE_128;
1387    }
1388
1389  /* Code model selection.  */
1390  sparc_cmodel = SPARC_DEFAULT_CMODEL;
1391
1392#ifdef SPARC_BI_ARCH
1393  if (TARGET_ARCH32)
1394    sparc_cmodel = CM_32;
1395#endif
1396
1397  if (sparc_cmodel_string != NULL)
1398    {
1399      if (TARGET_ARCH64)
1400	{
1401	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1402	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1403	      break;
1404	  if (cmodel->name == NULL)
1405	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1406	  else
1407	    sparc_cmodel = cmodel->value;
1408	}
1409      else
1410	error ("-mcmodel= is not supported on 32 bit systems");
1411    }
1412
1413  /* Check that -fcall-saved-REG wasn't specified for out registers.  */
1414  for (i = 8; i < 16; i++)
1415    if (!call_used_regs [i])
1416      {
1417	error ("-fcall-saved-REG is not supported for out registers");
1418        call_used_regs [i] = 1;
1419      }
1420
1421  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1422
1423  /* Set the default CPU.  */
1424  if (!global_options_set.x_sparc_cpu_and_features)
1425    {
1426      for (def = &cpu_default[0]; def->cpu != -1; ++def)
1427	if (def->cpu == TARGET_CPU_DEFAULT)
1428	  break;
1429      gcc_assert (def->cpu != -1);
1430      sparc_cpu_and_features = def->processor;
1431    }
1432
1433  if (!global_options_set.x_sparc_cpu)
1434    sparc_cpu = sparc_cpu_and_features;
1435
1436  cpu = &cpu_table[(int) sparc_cpu_and_features];
1437
1438  if (TARGET_DEBUG_OPTIONS)
1439    {
1440      fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1441      fprintf (stderr, "sparc_cpu: %s\n",
1442	       cpu_table[(int) sparc_cpu].name);
1443      dump_target_flags ("cpu->disable", cpu->disable);
1444      dump_target_flags ("cpu->enable", cpu->enable);
1445    }
1446
1447  target_flags &= ~cpu->disable;
1448  target_flags |= (cpu->enable
1449#ifndef HAVE_AS_FMAF_HPC_VIS3
1450		   & ~(MASK_FMAF | MASK_VIS3)
1451#endif
1452#ifndef HAVE_AS_SPARC4
1453		   & ~MASK_CBCOND
1454#endif
1455#ifndef HAVE_AS_SPARC5_VIS4
1456		   & ~MASK_VIS4
1457#endif
1458#ifndef HAVE_AS_LEON
1459		   & ~(MASK_LEON | MASK_LEON3)
1460#endif
1461		   );
1462
1463  /* If -mfpu or -mno-fpu was explicitly used, don't override with
1464     the processor default.  */
1465  if (target_flags_explicit & MASK_FPU)
1466    target_flags = (target_flags & ~MASK_FPU) | fpu;
1467
1468  /* -mvis2 implies -mvis */
1469  if (TARGET_VIS2)
1470    target_flags |= MASK_VIS;
1471
1472  /* -mvis3 implies -mvis2 and -mvis */
1473  if (TARGET_VIS3)
1474    target_flags |= MASK_VIS2 | MASK_VIS;
1475
1476  /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1477  if (TARGET_VIS4)
1478    target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1479
1480  /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1481     disabled.  */
1482  if (! TARGET_FPU)
1483    target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1484		      | MASK_FMAF);
1485
1486  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1487     are available.
1488     -m64 also implies v9.  */
1489  if (TARGET_VIS || TARGET_ARCH64)
1490    {
1491      target_flags |= MASK_V9;
1492      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1493    }
1494
1495  /* -mvis also implies -mv8plus on 32-bit */
1496  if (TARGET_VIS && ! TARGET_ARCH64)
1497    target_flags |= MASK_V8PLUS;
1498
1499  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
1500  if (TARGET_V9 && TARGET_ARCH32)
1501    target_flags |= MASK_DEPRECATED_V8_INSNS;
1502
1503  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
1504  if (! TARGET_V9 || TARGET_ARCH64)
1505    target_flags &= ~MASK_V8PLUS;
1506
1507  /* Don't use stack biasing in 32 bit mode.  */
1508  if (TARGET_ARCH32)
1509    target_flags &= ~MASK_STACK_BIAS;
1510
1511  /* Supply a default value for align_functions.  */
1512  if (align_functions == 0)
1513    {
1514      if (sparc_cpu == PROCESSOR_ULTRASPARC
1515	  || sparc_cpu == PROCESSOR_ULTRASPARC3
1516	  || sparc_cpu == PROCESSOR_NIAGARA
1517	  || sparc_cpu == PROCESSOR_NIAGARA2
1518	  || sparc_cpu == PROCESSOR_NIAGARA3
1519	  || sparc_cpu == PROCESSOR_NIAGARA4)
1520	align_functions = 32;
1521      else if (sparc_cpu == PROCESSOR_NIAGARA7)
1522	align_functions = 64;
1523    }
1524
1525  /* Validate PCC_STRUCT_RETURN.  */
1526  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1527    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1528
1529  /* Only use .uaxword when compiling for a 64-bit target.  */
1530  if (!TARGET_ARCH64)
1531    targetm.asm_out.unaligned_op.di = NULL;
1532
1533  /* Do various machine dependent initializations.  */
1534  sparc_init_modes ();
1535
1536  /* Set up function hooks.  */
1537  init_machine_status = sparc_init_machine_status;
1538
1539  switch (sparc_cpu)
1540    {
1541    case PROCESSOR_V7:
1542    case PROCESSOR_CYPRESS:
1543      sparc_costs = &cypress_costs;
1544      break;
1545    case PROCESSOR_V8:
1546    case PROCESSOR_SPARCLITE:
1547    case PROCESSOR_SUPERSPARC:
1548      sparc_costs = &supersparc_costs;
1549      break;
1550    case PROCESSOR_F930:
1551    case PROCESSOR_F934:
1552    case PROCESSOR_HYPERSPARC:
1553    case PROCESSOR_SPARCLITE86X:
1554      sparc_costs = &hypersparc_costs;
1555      break;
1556    case PROCESSOR_LEON:
1557      sparc_costs = &leon_costs;
1558      break;
1559    case PROCESSOR_LEON3:
1560    case PROCESSOR_LEON3V7:
1561      sparc_costs = &leon3_costs;
1562      break;
1563    case PROCESSOR_SPARCLET:
1564    case PROCESSOR_TSC701:
1565      sparc_costs = &sparclet_costs;
1566      break;
1567    case PROCESSOR_V9:
1568    case PROCESSOR_ULTRASPARC:
1569      sparc_costs = &ultrasparc_costs;
1570      break;
1571    case PROCESSOR_ULTRASPARC3:
1572      sparc_costs = &ultrasparc3_costs;
1573      break;
1574    case PROCESSOR_NIAGARA:
1575      sparc_costs = &niagara_costs;
1576      break;
1577    case PROCESSOR_NIAGARA2:
1578      sparc_costs = &niagara2_costs;
1579      break;
1580    case PROCESSOR_NIAGARA3:
1581      sparc_costs = &niagara3_costs;
1582      break;
1583    case PROCESSOR_NIAGARA4:
1584      sparc_costs = &niagara4_costs;
1585      break;
1586    case PROCESSOR_NIAGARA7:
1587      sparc_costs = &niagara7_costs;
1588      break;
1589    case PROCESSOR_NATIVE:
1590      gcc_unreachable ();
1591    };
1592
1593  if (sparc_memory_model == SMM_DEFAULT)
1594    {
1595      /* Choose the memory model for the operating system.  */
1596      enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1597      if (os_default != SMM_DEFAULT)
1598	sparc_memory_model = os_default;
1599      /* Choose the most relaxed model for the processor.  */
1600      else if (TARGET_V9)
1601	sparc_memory_model = SMM_RMO;
1602      else if (TARGET_LEON3)
1603	sparc_memory_model = SMM_TSO;
1604      else if (TARGET_LEON)
1605	sparc_memory_model = SMM_SC;
1606      else if (TARGET_V8)
1607	sparc_memory_model = SMM_PSO;
1608      else
1609	sparc_memory_model = SMM_SC;
1610    }
1611
1612#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1613  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1614    target_flags |= MASK_LONG_DOUBLE_128;
1615#endif
1616
1617  if (TARGET_DEBUG_OPTIONS)
1618    dump_target_flags ("Final target_flags", target_flags);
1619
1620  /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1621     can run at the same time.  More important, it is the threshold
1622     defining when additional prefetches will be dropped by the
1623     hardware.
1624
1625     The UltraSPARC-III features a documented prefetch queue with a
1626     size of 8.  Additional prefetches issued in the cpu are
1627     dropped.
1628
1629     Niagara processors are different.  In these processors prefetches
1630     are handled much like regular loads.  The L1 miss buffer is 32
1631     entries, but prefetches start getting affected when 30 entries
1632     become occupied.  That occupation could be a mix of regular loads
1633     and prefetches though.  And that buffer is shared by all threads.
1634     Once the threshold is reached, if the core is running a single
1635     thread the prefetch will retry.  If more than one thread is
1636     running, the prefetch will be dropped.
1637
1638     All this makes it very difficult to determine how many
1639     simultaneous prefetches can be issued simultaneously, even in a
1640     single-threaded program.  Experimental results show that setting
1641     this parameter to 32 works well when the number of threads is not
1642     high.  */
1643  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1644			 ((sparc_cpu == PROCESSOR_ULTRASPARC
1645			   || sparc_cpu == PROCESSOR_NIAGARA
1646			   || sparc_cpu == PROCESSOR_NIAGARA2
1647			   || sparc_cpu == PROCESSOR_NIAGARA3
1648			   || sparc_cpu == PROCESSOR_NIAGARA4)
1649			  ? 2
1650			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
1651			     ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1652				    ? 32 : 3))),
1653			 global_options.x_param_values,
1654			 global_options_set.x_param_values);
1655
1656  /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1657     params.def), so no maybe_set_param_value is needed.
1658
1659     The Oracle SPARC Architecture (previously the UltraSPARC
1660     Architecture) specification states that when a PREFETCH[A]
1661     instruction is executed an implementation-specific amount of data
1662     is prefetched, and that it is at least 64 bytes long (aligned to
1663     at least 64 bytes).
1664
1665     However, this is not correct.  The M7 (and implementations prior
1666     to that) does not guarantee a 64B prefetch into a cache if the
1667     line size is smaller.  A single cache line is all that is ever
1668     prefetched.  So for the M7, where the L1D$ has 32B lines and the
1669     L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1670     L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1671     is a read_n prefetch, which is the only type which allocates to
1672     the L1.)  */
1673
1674  /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1675     Hardvard level-1 caches) in kilobytes.  Both UltraSPARC and
1676     Niagara processors feature a L1D$ of 16KB.  */
1677  maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1678			 ((sparc_cpu == PROCESSOR_ULTRASPARC
1679			   || sparc_cpu == PROCESSOR_ULTRASPARC3
1680			   || sparc_cpu == PROCESSOR_NIAGARA
1681			   || sparc_cpu == PROCESSOR_NIAGARA2
1682			   || sparc_cpu == PROCESSOR_NIAGARA3
1683			   || sparc_cpu == PROCESSOR_NIAGARA4
1684			   || sparc_cpu == PROCESSOR_NIAGARA7)
1685			  ? 16 : 64),
1686			 global_options.x_param_values,
1687			 global_options_set.x_param_values);
1688
1689
1690  /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes.  Note
1691     that 512 is the default in params.def.  */
1692  maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1693			 (sparc_cpu == PROCESSOR_NIAGARA4
1694			  ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1695				   ? 256 : 512)),
1696			 global_options.x_param_values,
1697			 global_options_set.x_param_values);
1698
1699
1700  /* Disable save slot sharing for call-clobbered registers by default.
1701     The IRA sharing algorithm works on single registers only and this
1702     pessimizes for double floating-point registers.  */
1703  if (!global_options_set.x_flag_ira_share_save_slots)
1704    flag_ira_share_save_slots = 0;
1705
1706  /* We register a machine specific pass to work around errata, if any.
1707     The pass mut be scheduled as late as possible so that we have the
1708     (essentially) final form of the insn stream to work on.
1709     Registering the pass must be done at start up.  It's convenient to
1710     do it here.  */
1711  opt_pass *errata_pass = make_pass_work_around_errata (g);
1712  struct register_pass_info insert_pass_work_around_errata =
1713    {
1714      errata_pass,		/* pass */
1715      "dbr",			/* reference_pass_name */
1716      1,			/* ref_pass_instance_number */
1717      PASS_POS_INSERT_AFTER	/* po_op */
1718    };
1719  register_pass (&insert_pass_work_around_errata);
1720}
1721
1722/* Miscellaneous utilities.  */
1723
1724/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1725   or branch on register contents instructions.  */
1726
1727int
1728v9_regcmp_p (enum rtx_code code)
1729{
1730  return (code == EQ || code == NE || code == GE || code == LT
1731	  || code == LE || code == GT);
1732}
1733
1734/* Nonzero if OP is a floating point constant which can
1735   be loaded into an integer register using a single
1736   sethi instruction.  */
1737
1738int
1739fp_sethi_p (rtx op)
1740{
1741  if (GET_CODE (op) == CONST_DOUBLE)
1742    {
1743      long i;
1744
1745      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1746      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1747    }
1748
1749  return 0;
1750}
1751
1752/* Nonzero if OP is a floating point constant which can
1753   be loaded into an integer register using a single
1754   mov instruction.  */
1755
1756int
1757fp_mov_p (rtx op)
1758{
1759  if (GET_CODE (op) == CONST_DOUBLE)
1760    {
1761      long i;
1762
1763      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1764      return SPARC_SIMM13_P (i);
1765    }
1766
1767  return 0;
1768}
1769
1770/* Nonzero if OP is a floating point constant which can
1771   be loaded into an integer register using a high/losum
1772   instruction sequence.  */
1773
1774int
1775fp_high_losum_p (rtx op)
1776{
1777  /* The constraints calling this should only be in
1778     SFmode move insns, so any constant which cannot
1779     be moved using a single insn will do.  */
1780  if (GET_CODE (op) == CONST_DOUBLE)
1781    {
1782      long i;
1783
1784      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1785      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1786    }
1787
1788  return 0;
1789}
1790
1791/* Return true if the address of LABEL can be loaded by means of the
1792   mov{si,di}_pic_label_ref patterns in PIC mode.  */
1793
1794static bool
1795can_use_mov_pic_label_ref (rtx label)
1796{
1797  /* VxWorks does not impose a fixed gap between segments; the run-time
1798     gap can be different from the object-file gap.  We therefore can't
1799     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1800     are absolutely sure that X is in the same segment as the GOT.
1801     Unfortunately, the flexibility of linker scripts means that we
1802     can't be sure of that in general, so assume that GOT-relative
1803     accesses are never valid on VxWorks.  */
1804  if (TARGET_VXWORKS_RTP)
1805    return false;
1806
1807  /* Similarly, if the label is non-local, it might end up being placed
1808     in a different section than the current one; now mov_pic_label_ref
1809     requires the label and the code to be in the same section.  */
1810  if (LABEL_REF_NONLOCAL_P (label))
1811    return false;
1812
1813  /* Finally, if we are reordering basic blocks and partition into hot
1814     and cold sections, this might happen for any label.  */
1815  if (flag_reorder_blocks_and_partition)
1816    return false;
1817
1818  return true;
1819}
1820
1821/* Expand a move instruction.  Return true if all work is done.  */
1822
1823bool
1824sparc_expand_move (machine_mode mode, rtx *operands)
1825{
1826  /* Handle sets of MEM first.  */
1827  if (GET_CODE (operands[0]) == MEM)
1828    {
1829      /* 0 is a register (or a pair of registers) on SPARC.  */
1830      if (register_or_zero_operand (operands[1], mode))
1831	return false;
1832
1833      if (!reload_in_progress)
1834	{
1835	  operands[0] = validize_mem (operands[0]);
1836	  operands[1] = force_reg (mode, operands[1]);
1837	}
1838    }
1839
1840  /* Fix up TLS cases.  */
1841  if (TARGET_HAVE_TLS
1842      && CONSTANT_P (operands[1])
1843      && sparc_tls_referenced_p (operands [1]))
1844    {
1845      operands[1] = sparc_legitimize_tls_address (operands[1]);
1846      return false;
1847    }
1848
1849  /* Fix up PIC cases.  */
1850  if (flag_pic && CONSTANT_P (operands[1]))
1851    {
1852      if (pic_address_needs_scratch (operands[1]))
1853	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1854
1855      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
1856      if ((GET_CODE (operands[1]) == LABEL_REF
1857	   && can_use_mov_pic_label_ref (operands[1]))
1858	  || (GET_CODE (operands[1]) == CONST
1859	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
1860	      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
1861	      && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
1862	      && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
1863	{
1864	  if (mode == SImode)
1865	    {
1866	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1867	      return true;
1868	    }
1869
1870	  if (mode == DImode)
1871	    {
1872	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1873	      return true;
1874	    }
1875	}
1876
1877      if (symbolic_operand (operands[1], mode))
1878	{
1879	  operands[1]
1880	    = sparc_legitimize_pic_address (operands[1],
1881					    reload_in_progress
1882					    ? operands[0] : NULL_RTX);
1883	  return false;
1884	}
1885    }
1886
1887  /* If we are trying to toss an integer constant into FP registers,
1888     or loading a FP or vector constant, force it into memory.  */
1889  if (CONSTANT_P (operands[1])
1890      && REG_P (operands[0])
1891      && (SPARC_FP_REG_P (REGNO (operands[0]))
1892	  || SCALAR_FLOAT_MODE_P (mode)
1893	  || VECTOR_MODE_P (mode)))
1894    {
1895      /* emit_group_store will send such bogosity to us when it is
1896         not storing directly into memory.  So fix this up to avoid
1897         crashes in output_constant_pool.  */
1898      if (operands [1] == const0_rtx)
1899	operands[1] = CONST0_RTX (mode);
1900
1901      /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1902	 always other regs.  */
1903      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1904	  && (const_zero_operand (operands[1], mode)
1905	      || const_all_ones_operand (operands[1], mode)))
1906	return false;
1907
1908      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1909	  /* We are able to build any SF constant in integer registers
1910	     with at most 2 instructions.  */
1911	  && (mode == SFmode
1912	      /* And any DF constant in integer registers.  */
1913	      || (mode == DFmode
1914		  && ! can_create_pseudo_p ())))
1915	return false;
1916
1917      operands[1] = force_const_mem (mode, operands[1]);
1918      if (!reload_in_progress)
1919	operands[1] = validize_mem (operands[1]);
1920      return false;
1921    }
1922
1923  /* Accept non-constants and valid constants unmodified.  */
1924  if (!CONSTANT_P (operands[1])
1925      || GET_CODE (operands[1]) == HIGH
1926      || input_operand (operands[1], mode))
1927    return false;
1928
1929  switch (mode)
1930    {
1931    case QImode:
1932      /* All QImode constants require only one insn, so proceed.  */
1933      break;
1934
1935    case HImode:
1936    case SImode:
1937      sparc_emit_set_const32 (operands[0], operands[1]);
1938      return true;
1939
1940    case DImode:
1941      /* input_operand should have filtered out 32-bit mode.  */
1942      sparc_emit_set_const64 (operands[0], operands[1]);
1943      return true;
1944
1945    case TImode:
1946      {
1947	rtx high, low;
1948	/* TImode isn't available in 32-bit mode.  */
1949	split_double (operands[1], &high, &low);
1950	emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1951			      high));
1952	emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1953			      low));
1954      }
1955      return true;
1956
1957    default:
1958      gcc_unreachable ();
1959    }
1960
1961  return false;
1962}
1963
1964/* Load OP1, a 32-bit constant, into OP0, a register.
1965   We know it can't be done in one insn when we get
1966   here, the move expander guarantees this.  */
1967
1968static void
1969sparc_emit_set_const32 (rtx op0, rtx op1)
1970{
1971  machine_mode mode = GET_MODE (op0);
1972  rtx temp = op0;
1973
1974  if (can_create_pseudo_p ())
1975    temp = gen_reg_rtx (mode);
1976
1977  if (GET_CODE (op1) == CONST_INT)
1978    {
1979      gcc_assert (!small_int_operand (op1, mode)
1980		  && !const_high_operand (op1, mode));
1981
1982      /* Emit them as real moves instead of a HIGH/LO_SUM,
1983	 this way CSE can see everything and reuse intermediate
1984	 values if it wants.  */
1985      emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1986					     & ~(HOST_WIDE_INT) 0x3ff)));
1987
1988      emit_insn (gen_rtx_SET (op0,
1989			      gen_rtx_IOR (mode, temp,
1990					   GEN_INT (INTVAL (op1) & 0x3ff))));
1991    }
1992  else
1993    {
1994      /* A symbol, emit in the traditional way.  */
1995      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1996      emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1997    }
1998}
1999
2000/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2001   If TEMP is nonzero, we are forbidden to use any other scratch
2002   registers.  Otherwise, we are allowed to generate them as needed.
2003
2004   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2005   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
2006
2007void
2008sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2009{
2010  rtx temp1, temp2, temp3, temp4, temp5;
2011  rtx ti_temp = 0;
2012
2013  if (temp && GET_MODE (temp) == TImode)
2014    {
2015      ti_temp = temp;
2016      temp = gen_rtx_REG (DImode, REGNO (temp));
2017    }
2018
2019  /* SPARC-V9 code-model support.  */
2020  switch (sparc_cmodel)
2021    {
2022    case CM_MEDLOW:
2023      /* The range spanned by all instructions in the object is less
2024	 than 2^31 bytes (2GB) and the distance from any instruction
2025	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2026	 than 2^31 bytes (2GB).
2027
2028	 The executable must be in the low 4TB of the virtual address
2029	 space.
2030
2031	 sethi	%hi(symbol), %temp1
2032	 or	%temp1, %lo(symbol), %reg  */
2033      if (temp)
2034	temp1 = temp;  /* op0 is allowed.  */
2035      else
2036	temp1 = gen_reg_rtx (DImode);
2037
2038      emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2039      emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2040      break;
2041
2042    case CM_MEDMID:
2043      /* The range spanned by all instructions in the object is less
2044	 than 2^31 bytes (2GB) and the distance from any instruction
2045	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2046	 than 2^31 bytes (2GB).
2047
2048	 The executable must be in the low 16TB of the virtual address
2049	 space.
2050
2051	 sethi	%h44(symbol), %temp1
2052	 or	%temp1, %m44(symbol), %temp2
2053	 sllx	%temp2, 12, %temp3
2054	 or	%temp3, %l44(symbol), %reg  */
2055      if (temp)
2056	{
2057	  temp1 = op0;
2058	  temp2 = op0;
2059	  temp3 = temp;  /* op0 is allowed.  */
2060	}
2061      else
2062	{
2063	  temp1 = gen_reg_rtx (DImode);
2064	  temp2 = gen_reg_rtx (DImode);
2065	  temp3 = gen_reg_rtx (DImode);
2066	}
2067
2068      emit_insn (gen_seth44 (temp1, op1));
2069      emit_insn (gen_setm44 (temp2, temp1, op1));
2070      emit_insn (gen_rtx_SET (temp3,
2071			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2072      emit_insn (gen_setl44 (op0, temp3, op1));
2073      break;
2074
2075    case CM_MEDANY:
2076      /* The range spanned by all instructions in the object is less
2077	 than 2^31 bytes (2GB) and the distance from any instruction
2078	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2079	 than 2^31 bytes (2GB).
2080
2081	 The executable can be placed anywhere in the virtual address
2082	 space.
2083
2084	 sethi	%hh(symbol), %temp1
2085	 sethi	%lm(symbol), %temp2
2086	 or	%temp1, %hm(symbol), %temp3
2087	 sllx	%temp3, 32, %temp4
2088	 or	%temp4, %temp2, %temp5
2089	 or	%temp5, %lo(symbol), %reg  */
2090      if (temp)
2091	{
2092	  /* It is possible that one of the registers we got for operands[2]
2093	     might coincide with that of operands[0] (which is why we made
2094	     it TImode).  Pick the other one to use as our scratch.  */
2095	  if (rtx_equal_p (temp, op0))
2096	    {
2097	      gcc_assert (ti_temp);
2098	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2099	    }
2100	  temp1 = op0;
2101	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2102	  temp3 = op0;
2103	  temp4 = op0;
2104	  temp5 = op0;
2105	}
2106      else
2107	{
2108	  temp1 = gen_reg_rtx (DImode);
2109	  temp2 = gen_reg_rtx (DImode);
2110	  temp3 = gen_reg_rtx (DImode);
2111	  temp4 = gen_reg_rtx (DImode);
2112	  temp5 = gen_reg_rtx (DImode);
2113	}
2114
2115      emit_insn (gen_sethh (temp1, op1));
2116      emit_insn (gen_setlm (temp2, op1));
2117      emit_insn (gen_sethm (temp3, temp1, op1));
2118      emit_insn (gen_rtx_SET (temp4,
2119			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2120      emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2121      emit_insn (gen_setlo (op0, temp5, op1));
2122      break;
2123
2124    case CM_EMBMEDANY:
2125      /* Old old old backwards compatibility kruft here.
2126	 Essentially it is MEDLOW with a fixed 64-bit
2127	 virtual base added to all data segment addresses.
2128	 Text-segment stuff is computed like MEDANY, we can't
2129	 reuse the code above because the relocation knobs
2130	 look different.
2131
2132	 Data segment:	sethi	%hi(symbol), %temp1
2133			add	%temp1, EMBMEDANY_BASE_REG, %temp2
2134			or	%temp2, %lo(symbol), %reg  */
2135      if (data_segment_operand (op1, GET_MODE (op1)))
2136	{
2137	  if (temp)
2138	    {
2139	      temp1 = temp;  /* op0 is allowed.  */
2140	      temp2 = op0;
2141	    }
2142	  else
2143	    {
2144	      temp1 = gen_reg_rtx (DImode);
2145	      temp2 = gen_reg_rtx (DImode);
2146	    }
2147
2148	  emit_insn (gen_embmedany_sethi (temp1, op1));
2149	  emit_insn (gen_embmedany_brsum (temp2, temp1));
2150	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
2151	}
2152
2153      /* Text segment:	sethi	%uhi(symbol), %temp1
2154			sethi	%hi(symbol), %temp2
2155			or	%temp1, %ulo(symbol), %temp3
2156			sllx	%temp3, 32, %temp4
2157			or	%temp4, %temp2, %temp5
2158			or	%temp5, %lo(symbol), %reg  */
2159      else
2160	{
2161	  if (temp)
2162	    {
2163	      /* It is possible that one of the registers we got for operands[2]
2164		 might coincide with that of operands[0] (which is why we made
2165		 it TImode).  Pick the other one to use as our scratch.  */
2166	      if (rtx_equal_p (temp, op0))
2167		{
2168		  gcc_assert (ti_temp);
2169		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2170		}
2171	      temp1 = op0;
2172	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2173	      temp3 = op0;
2174	      temp4 = op0;
2175	      temp5 = op0;
2176	    }
2177	  else
2178	    {
2179	      temp1 = gen_reg_rtx (DImode);
2180	      temp2 = gen_reg_rtx (DImode);
2181	      temp3 = gen_reg_rtx (DImode);
2182	      temp4 = gen_reg_rtx (DImode);
2183	      temp5 = gen_reg_rtx (DImode);
2184	    }
2185
2186	  emit_insn (gen_embmedany_textuhi (temp1, op1));
2187	  emit_insn (gen_embmedany_texthi  (temp2, op1));
2188	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2189	  emit_insn (gen_rtx_SET (temp4,
2190				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2191	  emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2192	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
2193	}
2194      break;
2195
2196    default:
2197      gcc_unreachable ();
2198    }
2199}
2200
2201/* These avoid problems when cross compiling.  If we do not
2202   go through all this hair then the optimizer will see
2203   invalid REG_EQUAL notes or in some cases none at all.  */
2204static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2205static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2206static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2207static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2208
2209/* The optimizer is not to assume anything about exactly
2210   which bits are set for a HIGH, they are unspecified.
2211   Unfortunately this leads to many missed optimizations
2212   during CSE.  We mask out the non-HIGH bits, and matches
2213   a plain movdi, to alleviate this problem.  */
2214static rtx
2215gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2216{
2217  return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2218}
2219
2220static rtx
2221gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2222{
2223  return gen_rtx_SET (dest, GEN_INT (val));
2224}
2225
2226static rtx
2227gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2228{
2229  return gen_rtx_IOR (DImode, src, GEN_INT (val));
2230}
2231
2232static rtx
2233gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2234{
2235  return gen_rtx_XOR (DImode, src, GEN_INT (val));
2236}
2237
2238/* Worker routines for 64-bit constant formation on arch64.
2239   One of the key things to be doing in these emissions is
2240   to create as many temp REGs as possible.  This makes it
2241   possible for half-built constants to be used later when
2242   such values are similar to something required later on.
2243   Without doing this, the optimizer cannot see such
2244   opportunities.  */
2245
2246static void sparc_emit_set_const64_quick1 (rtx, rtx,
2247					   unsigned HOST_WIDE_INT, int);
2248
2249static void
2250sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2251			       unsigned HOST_WIDE_INT low_bits, int is_neg)
2252{
2253  unsigned HOST_WIDE_INT high_bits;
2254
2255  if (is_neg)
2256    high_bits = (~low_bits) & 0xffffffff;
2257  else
2258    high_bits = low_bits;
2259
2260  emit_insn (gen_safe_HIGH64 (temp, high_bits));
2261  if (!is_neg)
2262    {
2263      emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2264    }
2265  else
2266    {
2267      /* If we are XOR'ing with -1, then we should emit a one's complement
2268	 instead.  This way the combiner will notice logical operations
2269	 such as ANDN later on and substitute.  */
2270      if ((low_bits & 0x3ff) == 0x3ff)
2271	{
2272	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2273	}
2274      else
2275	{
2276	  emit_insn (gen_rtx_SET (op0,
2277				  gen_safe_XOR64 (temp,
2278						  (-(HOST_WIDE_INT)0x400
2279						   | (low_bits & 0x3ff)))));
2280	}
2281    }
2282}
2283
2284static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2285					   unsigned HOST_WIDE_INT, int);
2286
2287static void
2288sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2289			       unsigned HOST_WIDE_INT high_bits,
2290			       unsigned HOST_WIDE_INT low_immediate,
2291			       int shift_count)
2292{
2293  rtx temp2 = op0;
2294
2295  if ((high_bits & 0xfffffc00) != 0)
2296    {
2297      emit_insn (gen_safe_HIGH64 (temp, high_bits));
2298      if ((high_bits & ~0xfffffc00) != 0)
2299	emit_insn (gen_rtx_SET (op0,
2300				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2301      else
2302	temp2 = temp;
2303    }
2304  else
2305    {
2306      emit_insn (gen_safe_SET64 (temp, high_bits));
2307      temp2 = temp;
2308    }
2309
2310  /* Now shift it up into place.  */
2311  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2312					       GEN_INT (shift_count))));
2313
2314  /* If there is a low immediate part piece, finish up by
2315     putting that in as well.  */
2316  if (low_immediate != 0)
2317    emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2318}
2319
2320static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2321					    unsigned HOST_WIDE_INT);
2322
2323/* Full 64-bit constant decomposition.  Even though this is the
2324   'worst' case, we still optimize a few things away.  */
2325static void
2326sparc_emit_set_const64_longway (rtx op0, rtx temp,
2327				unsigned HOST_WIDE_INT high_bits,
2328				unsigned HOST_WIDE_INT low_bits)
2329{
2330  rtx sub_temp = op0;
2331
2332  if (can_create_pseudo_p ())
2333    sub_temp = gen_reg_rtx (DImode);
2334
2335  if ((high_bits & 0xfffffc00) != 0)
2336    {
2337      emit_insn (gen_safe_HIGH64 (temp, high_bits));
2338      if ((high_bits & ~0xfffffc00) != 0)
2339	emit_insn (gen_rtx_SET (sub_temp,
2340				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2341      else
2342	sub_temp = temp;
2343    }
2344  else
2345    {
2346      emit_insn (gen_safe_SET64 (temp, high_bits));
2347      sub_temp = temp;
2348    }
2349
2350  if (can_create_pseudo_p ())
2351    {
2352      rtx temp2 = gen_reg_rtx (DImode);
2353      rtx temp3 = gen_reg_rtx (DImode);
2354      rtx temp4 = gen_reg_rtx (DImode);
2355
2356      emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2357						     GEN_INT (32))));
2358
2359      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2360      if ((low_bits & ~0xfffffc00) != 0)
2361	{
2362	  emit_insn (gen_rtx_SET (temp3,
2363				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2364	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2365	}
2366      else
2367	{
2368	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2369	}
2370    }
2371  else
2372    {
2373      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
2374      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
2375      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2376      int to_shift = 12;
2377
2378      /* We are in the middle of reload, so this is really
2379	 painful.  However we do still make an attempt to
2380	 avoid emitting truly stupid code.  */
2381      if (low1 != const0_rtx)
2382	{
2383	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2384						       GEN_INT (to_shift))));
2385	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2386	  sub_temp = op0;
2387	  to_shift = 12;
2388	}
2389      else
2390	{
2391	  to_shift += 12;
2392	}
2393      if (low2 != const0_rtx)
2394	{
2395	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2396						       GEN_INT (to_shift))));
2397	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2398	  sub_temp = op0;
2399	  to_shift = 8;
2400	}
2401      else
2402	{
2403	  to_shift += 8;
2404	}
2405      emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2406						   GEN_INT (to_shift))));
2407      if (low3 != const0_rtx)
2408	emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2409      /* phew...  */
2410    }
2411}
2412
2413/* Analyze a 64-bit constant for certain properties.  */
2414static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2415				    unsigned HOST_WIDE_INT,
2416				    int *, int *, int *);
2417
2418static void
2419analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2420			unsigned HOST_WIDE_INT low_bits,
2421			int *hbsp, int *lbsp, int *abbasp)
2422{
2423  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2424  int i;
2425
2426  lowest_bit_set = highest_bit_set = -1;
2427  i = 0;
2428  do
2429    {
2430      if ((lowest_bit_set == -1)
2431	  && ((low_bits >> i) & 1))
2432	lowest_bit_set = i;
2433      if ((highest_bit_set == -1)
2434	  && ((high_bits >> (32 - i - 1)) & 1))
2435	highest_bit_set = (64 - i - 1);
2436    }
2437  while (++i < 32
2438	 && ((highest_bit_set == -1)
2439	     || (lowest_bit_set == -1)));
2440  if (i == 32)
2441    {
2442      i = 0;
2443      do
2444	{
2445	  if ((lowest_bit_set == -1)
2446	      && ((high_bits >> i) & 1))
2447	    lowest_bit_set = i + 32;
2448	  if ((highest_bit_set == -1)
2449	      && ((low_bits >> (32 - i - 1)) & 1))
2450	    highest_bit_set = 32 - i - 1;
2451	}
2452      while (++i < 32
2453	     && ((highest_bit_set == -1)
2454		 || (lowest_bit_set == -1)));
2455    }
2456  /* If there are no bits set this should have gone out
2457     as one instruction!  */
2458  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2459  all_bits_between_are_set = 1;
2460  for (i = lowest_bit_set; i <= highest_bit_set; i++)
2461    {
2462      if (i < 32)
2463	{
2464	  if ((low_bits & (1 << i)) != 0)
2465	    continue;
2466	}
2467      else
2468	{
2469	  if ((high_bits & (1 << (i - 32))) != 0)
2470	    continue;
2471	}
2472      all_bits_between_are_set = 0;
2473      break;
2474    }
2475  *hbsp = highest_bit_set;
2476  *lbsp = lowest_bit_set;
2477  *abbasp = all_bits_between_are_set;
2478}
2479
2480static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2481
2482static int
2483const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2484		   unsigned HOST_WIDE_INT low_bits)
2485{
2486  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2487
2488  if (high_bits == 0
2489      || high_bits == 0xffffffff)
2490    return 1;
2491
2492  analyze_64bit_constant (high_bits, low_bits,
2493			  &highest_bit_set, &lowest_bit_set,
2494			  &all_bits_between_are_set);
2495
2496  if ((highest_bit_set == 63
2497       || lowest_bit_set == 0)
2498      && all_bits_between_are_set != 0)
2499    return 1;
2500
2501  if ((highest_bit_set - lowest_bit_set) < 21)
2502    return 1;
2503
2504  return 0;
2505}
2506
2507static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2508							unsigned HOST_WIDE_INT,
2509							int, int);
2510
2511static unsigned HOST_WIDE_INT
2512create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2513			  unsigned HOST_WIDE_INT low_bits,
2514			  int lowest_bit_set, int shift)
2515{
2516  HOST_WIDE_INT hi, lo;
2517
2518  if (lowest_bit_set < 32)
2519    {
2520      lo = (low_bits >> lowest_bit_set) << shift;
2521      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2522    }
2523  else
2524    {
2525      lo = 0;
2526      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2527    }
2528  gcc_assert (! (hi & lo));
2529  return (hi | lo);
2530}
2531
2532/* Here we are sure to be arch64 and this is an integer constant
2533   being loaded into a register.  Emit the most efficient
2534   insn sequence possible.  Detection of all the 1-insn cases
2535   has been done already.  */
2536static void
2537sparc_emit_set_const64 (rtx op0, rtx op1)
2538{
2539  unsigned HOST_WIDE_INT high_bits, low_bits;
2540  int lowest_bit_set, highest_bit_set;
2541  int all_bits_between_are_set;
2542  rtx temp = 0;
2543
2544  /* Sanity check that we know what we are working with.  */
2545  gcc_assert (TARGET_ARCH64
2546	      && (GET_CODE (op0) == SUBREG
2547		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2548
2549  if (! can_create_pseudo_p ())
2550    temp = op0;
2551
2552  if (GET_CODE (op1) != CONST_INT)
2553    {
2554      sparc_emit_set_symbolic_const64 (op0, op1, temp);
2555      return;
2556    }
2557
2558  if (! temp)
2559    temp = gen_reg_rtx (DImode);
2560
2561  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2562  low_bits = (INTVAL (op1) & 0xffffffff);
2563
2564  /* low_bits	bits 0  --> 31
2565     high_bits	bits 32 --> 63  */
2566
2567  analyze_64bit_constant (high_bits, low_bits,
2568			  &highest_bit_set, &lowest_bit_set,
2569			  &all_bits_between_are_set);
2570
2571  /* First try for a 2-insn sequence.  */
2572
2573  /* These situations are preferred because the optimizer can
2574   * do more things with them:
2575   * 1) mov	-1, %reg
2576   *    sllx	%reg, shift, %reg
2577   * 2) mov	-1, %reg
2578   *    srlx	%reg, shift, %reg
2579   * 3) mov	some_small_const, %reg
2580   *    sllx	%reg, shift, %reg
2581   */
2582  if (((highest_bit_set == 63
2583	|| lowest_bit_set == 0)
2584       && all_bits_between_are_set != 0)
2585      || ((highest_bit_set - lowest_bit_set) < 12))
2586    {
2587      HOST_WIDE_INT the_const = -1;
2588      int shift = lowest_bit_set;
2589
2590      if ((highest_bit_set != 63
2591	   && lowest_bit_set != 0)
2592	  || all_bits_between_are_set == 0)
2593	{
2594	  the_const =
2595	    create_simple_focus_bits (high_bits, low_bits,
2596				      lowest_bit_set, 0);
2597	}
2598      else if (lowest_bit_set == 0)
2599	shift = -(63 - highest_bit_set);
2600
2601      gcc_assert (SPARC_SIMM13_P (the_const));
2602      gcc_assert (shift != 0);
2603
2604      emit_insn (gen_safe_SET64 (temp, the_const));
2605      if (shift > 0)
2606	emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2607						     GEN_INT (shift))));
2608      else if (shift < 0)
2609	emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2610						       GEN_INT (-shift))));
2611      return;
2612    }
2613
2614  /* Now a range of 22 or less bits set somewhere.
2615   * 1) sethi	%hi(focus_bits), %reg
2616   *    sllx	%reg, shift, %reg
2617   * 2) sethi	%hi(focus_bits), %reg
2618   *    srlx	%reg, shift, %reg
2619   */
2620  if ((highest_bit_set - lowest_bit_set) < 21)
2621    {
2622      unsigned HOST_WIDE_INT focus_bits =
2623	create_simple_focus_bits (high_bits, low_bits,
2624				  lowest_bit_set, 10);
2625
2626      gcc_assert (SPARC_SETHI_P (focus_bits));
2627      gcc_assert (lowest_bit_set != 10);
2628
2629      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2630
2631      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
2632      if (lowest_bit_set < 10)
2633	emit_insn (gen_rtx_SET (op0,
2634				gen_rtx_LSHIFTRT (DImode, temp,
2635						  GEN_INT (10 - lowest_bit_set))));
2636      else if (lowest_bit_set > 10)
2637	emit_insn (gen_rtx_SET (op0,
2638				gen_rtx_ASHIFT (DImode, temp,
2639						GEN_INT (lowest_bit_set - 10))));
2640      return;
2641    }
2642
2643  /* 1) sethi	%hi(low_bits), %reg
2644   *    or	%reg, %lo(low_bits), %reg
2645   * 2) sethi	%hi(~low_bits), %reg
2646   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2647   */
2648  if (high_bits == 0
2649      || high_bits == 0xffffffff)
2650    {
2651      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2652				     (high_bits == 0xffffffff));
2653      return;
2654    }
2655
2656  /* Now, try 3-insn sequences.  */
2657
2658  /* 1) sethi	%hi(high_bits), %reg
2659   *    or	%reg, %lo(high_bits), %reg
2660   *    sllx	%reg, 32, %reg
2661   */
2662  if (low_bits == 0)
2663    {
2664      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2665      return;
2666    }
2667
2668  /* We may be able to do something quick
2669     when the constant is negated, so try that.  */
2670  if (const64_is_2insns ((~high_bits) & 0xffffffff,
2671			 (~low_bits) & 0xfffffc00))
2672    {
2673      /* NOTE: The trailing bits get XOR'd so we need the
2674	 non-negated bits, not the negated ones.  */
2675      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2676
2677      if ((((~high_bits) & 0xffffffff) == 0
2678	   && ((~low_bits) & 0x80000000) == 0)
2679	  || (((~high_bits) & 0xffffffff) == 0xffffffff
2680	      && ((~low_bits) & 0x80000000) != 0))
2681	{
2682	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2683
2684	  if ((SPARC_SETHI_P (fast_int)
2685	       && (~high_bits & 0xffffffff) == 0)
2686	      || SPARC_SIMM13_P (fast_int))
2687	    emit_insn (gen_safe_SET64 (temp, fast_int));
2688	  else
2689	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2690	}
2691      else
2692	{
2693	  rtx negated_const;
2694	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2695				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2696	  sparc_emit_set_const64 (temp, negated_const);
2697	}
2698
2699      /* If we are XOR'ing with -1, then we should emit a one's complement
2700	 instead.  This way the combiner will notice logical operations
2701	 such as ANDN later on and substitute.  */
2702      if (trailing_bits == 0x3ff)
2703	{
2704	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2705	}
2706      else
2707	{
2708	  emit_insn (gen_rtx_SET (op0,
2709				  gen_safe_XOR64 (temp,
2710						  (-0x400 | trailing_bits))));
2711	}
2712      return;
2713    }
2714
2715  /* 1) sethi	%hi(xxx), %reg
2716   *    or	%reg, %lo(xxx), %reg
2717   *	sllx	%reg, yyy, %reg
2718   *
2719   * ??? This is just a generalized version of the low_bits==0
2720   * thing above, FIXME...
2721   */
2722  if ((highest_bit_set - lowest_bit_set) < 32)
2723    {
2724      unsigned HOST_WIDE_INT focus_bits =
2725	create_simple_focus_bits (high_bits, low_bits,
2726				  lowest_bit_set, 0);
2727
2728      /* We can't get here in this state.  */
2729      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2730
2731      /* So what we know is that the set bits straddle the
2732	 middle of the 64-bit word.  */
2733      sparc_emit_set_const64_quick2 (op0, temp,
2734				     focus_bits, 0,
2735				     lowest_bit_set);
2736      return;
2737    }
2738
2739  /* 1) sethi	%hi(high_bits), %reg
2740   *    or	%reg, %lo(high_bits), %reg
2741   *    sllx	%reg, 32, %reg
2742   *	or	%reg, low_bits, %reg
2743   */
2744  if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2745    {
2746      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2747      return;
2748    }
2749
2750  /* The easiest way when all else fails, is full decomposition.  */
2751  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2752}
2753
2754/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2755   return the mode to be used for the comparison.  For floating-point,
2756   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
2757   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
2758   processing is needed.  */
2759
2760machine_mode
2761select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2762{
2763  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2764    {
2765      switch (op)
2766	{
2767	case EQ:
2768	case NE:
2769	case UNORDERED:
2770	case ORDERED:
2771	case UNLT:
2772	case UNLE:
2773	case UNGT:
2774	case UNGE:
2775	case UNEQ:
2776	case LTGT:
2777	  return CCFPmode;
2778
2779	case LT:
2780	case LE:
2781	case GT:
2782	case GE:
2783	  return CCFPEmode;
2784
2785	default:
2786	  gcc_unreachable ();
2787	}
2788    }
2789  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2790	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2791    {
2792      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2793	return CCX_NOOVmode;
2794      else
2795	return CC_NOOVmode;
2796    }
2797  else
2798    {
2799      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2800	return CCXmode;
2801      else
2802	return CCmode;
2803    }
2804}
2805
2806/* Emit the compare insn and return the CC reg for a CODE comparison
2807   with operands X and Y.  */
2808
2809static rtx
2810gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2811{
2812  machine_mode mode;
2813  rtx cc_reg;
2814
2815  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2816    return x;
2817
2818  mode = SELECT_CC_MODE (code, x, y);
2819
2820  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2821     fcc regs (cse can't tell they're really call clobbered regs and will
2822     remove a duplicate comparison even if there is an intervening function
2823     call - it will then try to reload the cc reg via an int reg which is why
2824     we need the movcc patterns).  It is possible to provide the movcc
2825     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2826     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2827     to tell cse that CCFPE mode registers (even pseudos) are call
2828     clobbered.  */
2829
2830  /* ??? This is an experiment.  Rather than making changes to cse which may
2831     or may not be easy/clean, we do our own cse.  This is possible because
2832     we will generate hard registers.  Cse knows they're call clobbered (it
2833     doesn't know the same thing about pseudos). If we guess wrong, no big
2834     deal, but if we win, great!  */
2835
2836  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2837#if 1 /* experiment */
2838    {
2839      int reg;
2840      /* We cycle through the registers to ensure they're all exercised.  */
2841      static int next_fcc_reg = 0;
2842      /* Previous x,y for each fcc reg.  */
2843      static rtx prev_args[4][2];
2844
2845      /* Scan prev_args for x,y.  */
2846      for (reg = 0; reg < 4; reg++)
2847	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2848	  break;
2849      if (reg == 4)
2850	{
2851	  reg = next_fcc_reg;
2852	  prev_args[reg][0] = x;
2853	  prev_args[reg][1] = y;
2854	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2855	}
2856      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2857    }
2858#else
2859    cc_reg = gen_reg_rtx (mode);
2860#endif /* ! experiment */
2861  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2862    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2863  else
2864    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2865
2866  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2867     will only result in an unrecognizable insn so no point in asserting.  */
2868  emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2869
2870  return cc_reg;
2871}
2872
2873
2874/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2875
2876rtx
2877gen_compare_reg (rtx cmp)
2878{
2879  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2880}
2881
2882/* This function is used for v9 only.
2883   DEST is the target of the Scc insn.
2884   CODE is the code for an Scc's comparison.
2885   X and Y are the values we compare.
2886
2887   This function is needed to turn
2888
2889	   (set (reg:SI 110)
2890	       (gt (reg:CCX 100 %icc)
2891	           (const_int 0)))
2892   into
2893	   (set (reg:SI 110)
2894	       (gt:DI (reg:CCX 100 %icc)
2895	           (const_int 0)))
2896
2897   IE: The instruction recognizer needs to see the mode of the comparison to
2898   find the right instruction. We could use "gt:DI" right in the
2899   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2900
2901static int
2902gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2903{
2904  if (! TARGET_ARCH64
2905      && (GET_MODE (x) == DImode
2906	  || GET_MODE (dest) == DImode))
2907    return 0;
2908
2909  /* Try to use the movrCC insns.  */
2910  if (TARGET_ARCH64
2911      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2912      && y == const0_rtx
2913      && v9_regcmp_p (compare_code))
2914    {
2915      rtx op0 = x;
2916      rtx temp;
2917
2918      /* Special case for op0 != 0.  This can be done with one instruction if
2919	 dest == x.  */
2920
2921      if (compare_code == NE
2922	  && GET_MODE (dest) == DImode
2923	  && rtx_equal_p (op0, dest))
2924	{
2925	  emit_insn (gen_rtx_SET (dest,
2926			      gen_rtx_IF_THEN_ELSE (DImode,
2927				       gen_rtx_fmt_ee (compare_code, DImode,
2928						       op0, const0_rtx),
2929				       const1_rtx,
2930				       dest)));
2931	  return 1;
2932	}
2933
2934      if (reg_overlap_mentioned_p (dest, op0))
2935	{
2936	  /* Handle the case where dest == x.
2937	     We "early clobber" the result.  */
2938	  op0 = gen_reg_rtx (GET_MODE (x));
2939	  emit_move_insn (op0, x);
2940	}
2941
2942      emit_insn (gen_rtx_SET (dest, const0_rtx));
2943      if (GET_MODE (op0) != DImode)
2944	{
2945	  temp = gen_reg_rtx (DImode);
2946	  convert_move (temp, op0, 0);
2947	}
2948      else
2949	temp = op0;
2950      emit_insn (gen_rtx_SET (dest,
2951			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2952				   gen_rtx_fmt_ee (compare_code, DImode,
2953						   temp, const0_rtx),
2954				   const1_rtx,
2955				   dest)));
2956      return 1;
2957    }
2958  else
2959    {
2960      x = gen_compare_reg_1 (compare_code, x, y);
2961      y = const0_rtx;
2962
2963      gcc_assert (GET_MODE (x) != CC_NOOVmode
2964		  && GET_MODE (x) != CCX_NOOVmode);
2965
2966      emit_insn (gen_rtx_SET (dest, const0_rtx));
2967      emit_insn (gen_rtx_SET (dest,
2968			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2969				   gen_rtx_fmt_ee (compare_code,
2970						   GET_MODE (x), x, y),
2971				    const1_rtx, dest)));
2972      return 1;
2973    }
2974}
2975
2976
2977/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2978   without jumps using the addx/subx instructions.  */
2979
2980bool
2981emit_scc_insn (rtx operands[])
2982{
2983  rtx tem;
2984  rtx x;
2985  rtx y;
2986  enum rtx_code code;
2987
2988  /* The quad-word fp compare library routines all return nonzero to indicate
2989     true, which is different from the equivalent libgcc routines, so we must
2990     handle them specially here.  */
2991  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2992    {
2993      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2994					      GET_CODE (operands[1]));
2995      operands[2] = XEXP (operands[1], 0);
2996      operands[3] = XEXP (operands[1], 1);
2997    }
2998
2999  code = GET_CODE (operands[1]);
3000  x = operands[2];
3001  y = operands[3];
3002
3003  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3004     more applications).  The exception to this is "reg != 0" which can
3005     be done in one instruction on v9 (so we do it).  */
3006  if (code == EQ)
3007    {
3008      if (GET_MODE (x) == SImode)
3009        {
3010	  rtx pat;
3011	  if (TARGET_ARCH64)
3012	    pat = gen_seqsidi_special (operands[0], x, y);
3013	  else
3014	    pat = gen_seqsisi_special (operands[0], x, y);
3015          emit_insn (pat);
3016          return true;
3017        }
3018      else if (GET_MODE (x) == DImode)
3019        {
3020	  rtx pat = gen_seqdi_special (operands[0], x, y);
3021          emit_insn (pat);
3022          return true;
3023        }
3024    }
3025
3026  if (code == NE)
3027    {
3028      if (GET_MODE (x) == SImode)
3029        {
3030          rtx pat;
3031	  if (TARGET_ARCH64)
3032	    pat = gen_snesidi_special (operands[0], x, y);
3033	  else
3034	    pat = gen_snesisi_special (operands[0], x, y);
3035          emit_insn (pat);
3036          return true;
3037        }
3038      else if (GET_MODE (x) == DImode)
3039        {
3040	  rtx pat;
3041	  if (TARGET_VIS3)
3042	    pat = gen_snedi_special_vis3 (operands[0], x, y);
3043	  else
3044	    pat = gen_snedi_special (operands[0], x, y);
3045          emit_insn (pat);
3046          return true;
3047        }
3048    }
3049
3050  if (TARGET_V9
3051      && TARGET_ARCH64
3052      && GET_MODE (x) == DImode
3053      && !(TARGET_VIS3
3054	   && (code == GTU || code == LTU))
3055      && gen_v9_scc (operands[0], code, x, y))
3056    return true;
3057
3058  /* We can do LTU and GEU using the addx/subx instructions too.  And
3059     for GTU/LEU, if both operands are registers swap them and fall
3060     back to the easy case.  */
3061  if (code == GTU || code == LEU)
3062    {
3063      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3064          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3065        {
3066          tem = x;
3067          x = y;
3068          y = tem;
3069          code = swap_condition (code);
3070        }
3071    }
3072
3073  if (code == LTU
3074      || (!TARGET_VIS3 && code == GEU))
3075    {
3076      emit_insn (gen_rtx_SET (operands[0],
3077			      gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3078					      gen_compare_reg_1 (code, x, y),
3079					      const0_rtx)));
3080      return true;
3081    }
3082
3083  /* All the posibilities to use addx/subx based sequences has been
3084     exhausted, try for a 3 instruction sequence using v9 conditional
3085     moves.  */
3086  if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3087    return true;
3088
3089  /* Nope, do branches.  */
3090  return false;
3091}
3092
3093/* Emit a conditional jump insn for the v9 architecture using comparison code
3094   CODE and jump target LABEL.
3095   This function exists to take advantage of the v9 brxx insns.  */
3096
3097static void
3098emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3099{
3100  emit_jump_insn (gen_rtx_SET (pc_rtx,
3101			   gen_rtx_IF_THEN_ELSE (VOIDmode,
3102				    gen_rtx_fmt_ee (code, GET_MODE (op0),
3103						    op0, const0_rtx),
3104				    gen_rtx_LABEL_REF (VOIDmode, label),
3105				    pc_rtx)));
3106}
3107
3108/* Emit a conditional jump insn for the UA2011 architecture using
3109   comparison code CODE and jump target LABEL.  This function exists
3110   to take advantage of the UA2011 Compare and Branch insns.  */
3111
3112static void
3113emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3114{
3115  rtx if_then_else;
3116
3117  if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3118				       gen_rtx_fmt_ee(code, GET_MODE(op0),
3119						      op0, op1),
3120				       gen_rtx_LABEL_REF (VOIDmode, label),
3121				       pc_rtx);
3122
3123  emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3124}
3125
3126void
3127emit_conditional_branch_insn (rtx operands[])
3128{
3129  /* The quad-word fp compare library routines all return nonzero to indicate
3130     true, which is different from the equivalent libgcc routines, so we must
3131     handle them specially here.  */
3132  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3133    {
3134      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3135					      GET_CODE (operands[0]));
3136      operands[1] = XEXP (operands[0], 0);
3137      operands[2] = XEXP (operands[0], 1);
3138    }
3139
3140  /* If we can tell early on that the comparison is against a constant
3141     that won't fit in the 5-bit signed immediate field of a cbcond,
3142     use one of the other v9 conditional branch sequences.  */
3143  if (TARGET_CBCOND
3144      && GET_CODE (operands[1]) == REG
3145      && (GET_MODE (operands[1]) == SImode
3146	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3147      && (GET_CODE (operands[2]) != CONST_INT
3148	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
3149    {
3150      emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3151      return;
3152    }
3153
3154  if (TARGET_ARCH64 && operands[2] == const0_rtx
3155      && GET_CODE (operands[1]) == REG
3156      && GET_MODE (operands[1]) == DImode)
3157    {
3158      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3159      return;
3160    }
3161
3162  operands[1] = gen_compare_reg (operands[0]);
3163  operands[2] = const0_rtx;
3164  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3165				operands[1], operands[2]);
3166  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3167				  operands[3]));
3168}
3169
3170
3171/* Generate a DFmode part of a hard TFmode register.
3172   REG is the TFmode hard register, LOW is 1 for the
3173   low 64bit of the register and 0 otherwise.
3174 */
3175rtx
3176gen_df_reg (rtx reg, int low)
3177{
3178  int regno = REGNO (reg);
3179
3180  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3181    regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3182  return gen_rtx_REG (DFmode, regno);
3183}
3184
3185/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
3186   Unlike normal calls, TFmode operands are passed by reference.  It is
3187   assumed that no more than 3 operands are required.  */
3188
3189static void
3190emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3191{
3192  rtx ret_slot = NULL, arg[3], func_sym;
3193  int i;
3194
3195  /* We only expect to be called for conversions, unary, and binary ops.  */
3196  gcc_assert (nargs == 2 || nargs == 3);
3197
3198  for (i = 0; i < nargs; ++i)
3199    {
3200      rtx this_arg = operands[i];
3201      rtx this_slot;
3202
3203      /* TFmode arguments and return values are passed by reference.  */
3204      if (GET_MODE (this_arg) == TFmode)
3205	{
3206	  int force_stack_temp;
3207
3208	  force_stack_temp = 0;
3209	  if (TARGET_BUGGY_QP_LIB && i == 0)
3210	    force_stack_temp = 1;
3211
3212	  if (GET_CODE (this_arg) == MEM
3213	      && ! force_stack_temp)
3214	    {
3215	      tree expr = MEM_EXPR (this_arg);
3216	      if (expr)
3217		mark_addressable (expr);
3218	      this_arg = XEXP (this_arg, 0);
3219	    }
3220	  else if (CONSTANT_P (this_arg)
3221		   && ! force_stack_temp)
3222	    {
3223	      this_slot = force_const_mem (TFmode, this_arg);
3224	      this_arg = XEXP (this_slot, 0);
3225	    }
3226	  else
3227	    {
3228	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3229
3230	      /* Operand 0 is the return value.  We'll copy it out later.  */
3231	      if (i > 0)
3232		emit_move_insn (this_slot, this_arg);
3233	      else
3234		ret_slot = this_slot;
3235
3236	      this_arg = XEXP (this_slot, 0);
3237	    }
3238	}
3239
3240      arg[i] = this_arg;
3241    }
3242
3243  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3244
3245  if (GET_MODE (operands[0]) == TFmode)
3246    {
3247      if (nargs == 2)
3248	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3249			   arg[0], GET_MODE (arg[0]),
3250			   arg[1], GET_MODE (arg[1]));
3251      else
3252	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3253			   arg[0], GET_MODE (arg[0]),
3254			   arg[1], GET_MODE (arg[1]),
3255			   arg[2], GET_MODE (arg[2]));
3256
3257      if (ret_slot)
3258	emit_move_insn (operands[0], ret_slot);
3259    }
3260  else
3261    {
3262      rtx ret;
3263
3264      gcc_assert (nargs == 2);
3265
3266      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3267				     GET_MODE (operands[0]), 1,
3268				     arg[1], GET_MODE (arg[1]));
3269
3270      if (ret != operands[0])
3271	emit_move_insn (operands[0], ret);
3272    }
3273}
3274
3275/* Expand soft-float TFmode calls to sparc abi routines.  */
3276
3277static void
3278emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3279{
3280  const char *func;
3281
3282  switch (code)
3283    {
3284    case PLUS:
3285      func = "_Qp_add";
3286      break;
3287    case MINUS:
3288      func = "_Qp_sub";
3289      break;
3290    case MULT:
3291      func = "_Qp_mul";
3292      break;
3293    case DIV:
3294      func = "_Qp_div";
3295      break;
3296    default:
3297      gcc_unreachable ();
3298    }
3299
3300  emit_soft_tfmode_libcall (func, 3, operands);
3301}
3302
3303static void
3304emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3305{
3306  const char *func;
3307
3308  gcc_assert (code == SQRT);
3309  func = "_Qp_sqrt";
3310
3311  emit_soft_tfmode_libcall (func, 2, operands);
3312}
3313
3314static void
3315emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3316{
3317  const char *func;
3318
3319  switch (code)
3320    {
3321    case FLOAT_EXTEND:
3322      switch (GET_MODE (operands[1]))
3323	{
3324	case SFmode:
3325	  func = "_Qp_stoq";
3326	  break;
3327	case DFmode:
3328	  func = "_Qp_dtoq";
3329	  break;
3330	default:
3331	  gcc_unreachable ();
3332	}
3333      break;
3334
3335    case FLOAT_TRUNCATE:
3336      switch (GET_MODE (operands[0]))
3337	{
3338	case SFmode:
3339	  func = "_Qp_qtos";
3340	  break;
3341	case DFmode:
3342	  func = "_Qp_qtod";
3343	  break;
3344	default:
3345	  gcc_unreachable ();
3346	}
3347      break;
3348
3349    case FLOAT:
3350      switch (GET_MODE (operands[1]))
3351	{
3352	case SImode:
3353	  func = "_Qp_itoq";
3354	  if (TARGET_ARCH64)
3355	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3356	  break;
3357	case DImode:
3358	  func = "_Qp_xtoq";
3359	  break;
3360	default:
3361	  gcc_unreachable ();
3362	}
3363      break;
3364
3365    case UNSIGNED_FLOAT:
3366      switch (GET_MODE (operands[1]))
3367	{
3368	case SImode:
3369	  func = "_Qp_uitoq";
3370	  if (TARGET_ARCH64)
3371	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3372	  break;
3373	case DImode:
3374	  func = "_Qp_uxtoq";
3375	  break;
3376	default:
3377	  gcc_unreachable ();
3378	}
3379      break;
3380
3381    case FIX:
3382      switch (GET_MODE (operands[0]))
3383	{
3384	case SImode:
3385	  func = "_Qp_qtoi";
3386	  break;
3387	case DImode:
3388	  func = "_Qp_qtox";
3389	  break;
3390	default:
3391	  gcc_unreachable ();
3392	}
3393      break;
3394
3395    case UNSIGNED_FIX:
3396      switch (GET_MODE (operands[0]))
3397	{
3398	case SImode:
3399	  func = "_Qp_qtoui";
3400	  break;
3401	case DImode:
3402	  func = "_Qp_qtoux";
3403	  break;
3404	default:
3405	  gcc_unreachable ();
3406	}
3407      break;
3408
3409    default:
3410      gcc_unreachable ();
3411    }
3412
3413  emit_soft_tfmode_libcall (func, 2, operands);
3414}
3415
3416/* Expand a hard-float tfmode operation.  All arguments must be in
3417   registers.  */
3418
3419static void
3420emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3421{
3422  rtx op, dest;
3423
3424  if (GET_RTX_CLASS (code) == RTX_UNARY)
3425    {
3426      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3427      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3428    }
3429  else
3430    {
3431      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3432      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3433      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3434			   operands[1], operands[2]);
3435    }
3436
3437  if (register_operand (operands[0], VOIDmode))
3438    dest = operands[0];
3439  else
3440    dest = gen_reg_rtx (GET_MODE (operands[0]));
3441
3442  emit_insn (gen_rtx_SET (dest, op));
3443
3444  if (dest != operands[0])
3445    emit_move_insn (operands[0], dest);
3446}
3447
3448void
3449emit_tfmode_binop (enum rtx_code code, rtx *operands)
3450{
3451  if (TARGET_HARD_QUAD)
3452    emit_hard_tfmode_operation (code, operands);
3453  else
3454    emit_soft_tfmode_binop (code, operands);
3455}
3456
3457void
3458emit_tfmode_unop (enum rtx_code code, rtx *operands)
3459{
3460  if (TARGET_HARD_QUAD)
3461    emit_hard_tfmode_operation (code, operands);
3462  else
3463    emit_soft_tfmode_unop (code, operands);
3464}
3465
3466void
3467emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3468{
3469  if (TARGET_HARD_QUAD)
3470    emit_hard_tfmode_operation (code, operands);
3471  else
3472    emit_soft_tfmode_cvt (code, operands);
3473}
3474
3475/* Return nonzero if a branch/jump/call instruction will be emitting
3476   nop into its delay slot.  */
3477
3478int
3479empty_delay_slot (rtx_insn *insn)
3480{
3481  rtx seq;
3482
3483  /* If no previous instruction (should not happen), return true.  */
3484  if (PREV_INSN (insn) == NULL)
3485    return 1;
3486
3487  seq = NEXT_INSN (PREV_INSN (insn));
3488  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3489    return 0;
3490
3491  return 1;
3492}
3493
3494/* Return nonzero if we should emit a nop after a cbcond instruction.
3495   The cbcond instruction does not have a delay slot, however there is
3496   a severe performance penalty if a control transfer appears right
3497   after a cbcond.  Therefore we emit a nop when we detect this
3498   situation.  */
3499
3500int
3501emit_cbcond_nop (rtx insn)
3502{
3503  rtx next = next_active_insn (insn);
3504
3505  if (!next)
3506    return 1;
3507
3508  if (NONJUMP_INSN_P (next)
3509      && GET_CODE (PATTERN (next)) == SEQUENCE)
3510    next = XVECEXP (PATTERN (next), 0, 0);
3511  else if (CALL_P (next)
3512	   && GET_CODE (PATTERN (next)) == PARALLEL)
3513    {
3514      rtx delay = XVECEXP (PATTERN (next), 0, 1);
3515
3516      if (GET_CODE (delay) == RETURN)
3517	{
3518	  /* It's a sibling call.  Do not emit the nop if we're going
3519	     to emit something other than the jump itself as the first
3520	     instruction of the sibcall sequence.  */
3521	  if (sparc_leaf_function_p || TARGET_FLAT)
3522	    return 0;
3523	}
3524    }
3525
3526  if (NONJUMP_INSN_P (next))
3527    return 0;
3528
3529  return 1;
3530}
3531
3532/* Return nonzero if TRIAL can go into the call delay slot.  */
3533
3534int
3535eligible_for_call_delay (rtx_insn *trial)
3536{
3537  rtx pat;
3538
3539  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3540    return 0;
3541
3542  /* Binutils allows
3543       call __tls_get_addr, %tgd_call (foo)
3544        add %l7, %o0, %o0, %tgd_add (foo)
3545     while Sun as/ld does not.  */
3546  if (TARGET_GNU_TLS || !TARGET_TLS)
3547    return 1;
3548
3549  pat = PATTERN (trial);
3550
3551  /* We must reject tgd_add{32|64}, i.e.
3552       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3553     and tldm_add{32|64}, i.e.
3554       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3555     for Sun as/ld.  */
3556  if (GET_CODE (pat) == SET
3557      && GET_CODE (SET_SRC (pat)) == PLUS)
3558    {
3559      rtx unspec = XEXP (SET_SRC (pat), 1);
3560
3561      if (GET_CODE (unspec) == UNSPEC
3562	  && (XINT (unspec, 1) == UNSPEC_TLSGD
3563	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
3564	return 0;
3565    }
3566
3567  return 1;
3568}
3569
3570/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3571   instruction.  RETURN_P is true if the v9 variant 'return' is to be
3572   considered in the test too.
3573
3574   TRIAL must be a SET whose destination is a REG appropriate for the
3575   'restore' instruction or, if RETURN_P is true, for the 'return'
3576   instruction.  */
3577
3578static int
3579eligible_for_restore_insn (rtx trial, bool return_p)
3580{
3581  rtx pat = PATTERN (trial);
3582  rtx src = SET_SRC (pat);
3583  bool src_is_freg = false;
3584  rtx src_reg;
3585
3586  /* Since we now can do moves between float and integer registers when
3587     VIS3 is enabled, we have to catch this case.  We can allow such
3588     moves when doing a 'return' however.  */
3589  src_reg = src;
3590  if (GET_CODE (src_reg) == SUBREG)
3591    src_reg = SUBREG_REG (src_reg);
3592  if (GET_CODE (src_reg) == REG
3593      && SPARC_FP_REG_P (REGNO (src_reg)))
3594    src_is_freg = true;
3595
3596  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
3597  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3598      && arith_operand (src, GET_MODE (src))
3599      && ! src_is_freg)
3600    {
3601      if (TARGET_ARCH64)
3602        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3603      else
3604        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3605    }
3606
3607  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
3608  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3609	   && arith_double_operand (src, GET_MODE (src))
3610	   && ! src_is_freg)
3611    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3612
3613  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
3614  else if (! TARGET_FPU && register_operand (src, SFmode))
3615    return 1;
3616
3617  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
3618  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3619    return 1;
3620
3621  /* If we have the 'return' instruction, anything that does not use
3622     local or output registers and can go into a delay slot wins.  */
3623  else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3624    return 1;
3625
3626  /* The 'restore src1,src2,dest' pattern for SImode.  */
3627  else if (GET_CODE (src) == PLUS
3628	   && register_operand (XEXP (src, 0), SImode)
3629	   && arith_operand (XEXP (src, 1), SImode))
3630    return 1;
3631
3632  /* The 'restore src1,src2,dest' pattern for DImode.  */
3633  else if (GET_CODE (src) == PLUS
3634	   && register_operand (XEXP (src, 0), DImode)
3635	   && arith_double_operand (XEXP (src, 1), DImode))
3636    return 1;
3637
3638  /* The 'restore src1,%lo(src2),dest' pattern.  */
3639  else if (GET_CODE (src) == LO_SUM
3640	   && ! TARGET_CM_MEDMID
3641	   && ((register_operand (XEXP (src, 0), SImode)
3642	        && immediate_operand (XEXP (src, 1), SImode))
3643	       || (TARGET_ARCH64
3644		   && register_operand (XEXP (src, 0), DImode)
3645		   && immediate_operand (XEXP (src, 1), DImode))))
3646    return 1;
3647
3648  /* The 'restore src,src,dest' pattern.  */
3649  else if (GET_CODE (src) == ASHIFT
3650	   && (register_operand (XEXP (src, 0), SImode)
3651	       || register_operand (XEXP (src, 0), DImode))
3652	   && XEXP (src, 1) == const1_rtx)
3653    return 1;
3654
3655  return 0;
3656}
3657
3658/* Return nonzero if TRIAL can go into the function return's delay slot.  */
3659
3660int
3661eligible_for_return_delay (rtx_insn *trial)
3662{
3663  int regno;
3664  rtx pat;
3665
3666  /* If the function uses __builtin_eh_return, the eh_return machinery
3667     occupies the delay slot.  */
3668  if (crtl->calls_eh_return)
3669    return 0;
3670
3671  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3672    return 0;
3673
3674  /* In the case of a leaf or flat function, anything can go into the slot.  */
3675  if (sparc_leaf_function_p || TARGET_FLAT)
3676    return 1;
3677
3678  if (!NONJUMP_INSN_P (trial))
3679    return 0;
3680
3681  pat = PATTERN (trial);
3682  if (GET_CODE (pat) == PARALLEL)
3683    {
3684      int i;
3685
3686      if (! TARGET_V9)
3687	return 0;
3688      for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3689	{
3690	  rtx expr = XVECEXP (pat, 0, i);
3691	  if (GET_CODE (expr) != SET)
3692	    return 0;
3693	  if (GET_CODE (SET_DEST (expr)) != REG)
3694	    return 0;
3695	  regno = REGNO (SET_DEST (expr));
3696	  if (regno >= 8 && regno < 24)
3697	    return 0;
3698	}
3699      return !epilogue_renumber (&pat, 1);
3700    }
3701
3702  if (GET_CODE (pat) != SET)
3703    return 0;
3704
3705  if (GET_CODE (SET_DEST (pat)) != REG)
3706    return 0;
3707
3708  regno = REGNO (SET_DEST (pat));
3709
3710  /* Otherwise, only operations which can be done in tandem with
3711     a `restore' or `return' insn can go into the delay slot.  */
3712  if (regno >= 8 && regno < 24)
3713    return 0;
3714
3715  /* If this instruction sets up floating point register and we have a return
3716     instruction, it can probably go in.  But restore will not work
3717     with FP_REGS.  */
3718  if (! SPARC_INT_REG_P (regno))
3719    return TARGET_V9 && !epilogue_renumber (&pat, 1);
3720
3721  return eligible_for_restore_insn (trial, true);
3722}
3723
3724/* Return nonzero if TRIAL can go into the sibling call's delay slot.  */
3725
3726int
3727eligible_for_sibcall_delay (rtx_insn *trial)
3728{
3729  rtx pat;
3730
3731  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3732    return 0;
3733
3734  if (!NONJUMP_INSN_P (trial))
3735    return 0;
3736
3737  pat = PATTERN (trial);
3738
3739  if (sparc_leaf_function_p || TARGET_FLAT)
3740    {
3741      /* If the tail call is done using the call instruction,
3742	 we have to restore %o7 in the delay slot.  */
3743      if (LEAF_SIBCALL_SLOT_RESERVED_P)
3744	return 0;
3745
3746      /* %g1 is used to build the function address */
3747      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3748	return 0;
3749
3750      return 1;
3751    }
3752
3753  if (GET_CODE (pat) != SET)
3754    return 0;
3755
3756  /* Otherwise, only operations which can be done in tandem with
3757     a `restore' insn can go into the delay slot.  */
3758  if (GET_CODE (SET_DEST (pat)) != REG
3759      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3760      || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3761    return 0;
3762
3763  /* If it mentions %o7, it can't go in, because sibcall will clobber it
3764     in most cases.  */
3765  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3766    return 0;
3767
3768  return eligible_for_restore_insn (trial, false);
3769}
3770
3771/* Determine if it's legal to put X into the constant pool.  This
3772   is not possible if X contains the address of a symbol that is
3773   not constant (TLS) or not known at final link time (PIC).  */
3774
3775static bool
3776sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3777{
3778  switch (GET_CODE (x))
3779    {
3780    case CONST_INT:
3781    case CONST_WIDE_INT:
3782    case CONST_DOUBLE:
3783    case CONST_VECTOR:
3784      /* Accept all non-symbolic constants.  */
3785      return false;
3786
3787    case LABEL_REF:
3788      /* Labels are OK iff we are non-PIC.  */
3789      return flag_pic != 0;
3790
3791    case SYMBOL_REF:
3792      /* 'Naked' TLS symbol references are never OK,
3793	 non-TLS symbols are OK iff we are non-PIC.  */
3794      if (SYMBOL_REF_TLS_MODEL (x))
3795	return true;
3796      else
3797	return flag_pic != 0;
3798
3799    case CONST:
3800      return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3801    case PLUS:
3802    case MINUS:
3803      return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3804         || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3805    case UNSPEC:
3806      return true;
3807    default:
3808      gcc_unreachable ();
3809    }
3810}
3811
3812/* Global Offset Table support.  */
3813static GTY(()) rtx got_helper_rtx = NULL_RTX;
3814static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3815
3816/* Return the SYMBOL_REF for the Global Offset Table.  */
3817
3818static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3819
3820static rtx
3821sparc_got (void)
3822{
3823  if (!sparc_got_symbol)
3824    sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3825
3826  return sparc_got_symbol;
3827}
3828
3829/* Ensure that we are not using patterns that are not OK with PIC.  */
3830
3831int
3832check_pic (int i)
3833{
3834  rtx op;
3835
3836  switch (flag_pic)
3837    {
3838    case 1:
3839      op = recog_data.operand[i];
3840      gcc_assert (GET_CODE (op) != SYMBOL_REF
3841	  	  && (GET_CODE (op) != CONST
3842		      || (GET_CODE (XEXP (op, 0)) == MINUS
3843			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
3844			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3845    case 2:
3846    default:
3847      return 1;
3848    }
3849}
3850
3851/* Return true if X is an address which needs a temporary register when
3852   reloaded while generating PIC code.  */
3853
3854int
3855pic_address_needs_scratch (rtx x)
3856{
3857  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
3858  if (GET_CODE (x) == CONST
3859      && GET_CODE (XEXP (x, 0)) == PLUS
3860      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3861      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3862      && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
3863    return 1;
3864
3865  return 0;
3866}
3867
3868/* Determine if a given RTX is a valid constant.  We already know this
3869   satisfies CONSTANT_P.  */
3870
3871static bool
3872sparc_legitimate_constant_p (machine_mode mode, rtx x)
3873{
3874  switch (GET_CODE (x))
3875    {
3876    case CONST:
3877    case SYMBOL_REF:
3878      if (sparc_tls_referenced_p (x))
3879	return false;
3880      break;
3881
3882    case CONST_DOUBLE:
3883      /* Floating point constants are generally not ok.
3884	 The only exception is 0.0 and all-ones in VIS.  */
3885      if (TARGET_VIS
3886	  && SCALAR_FLOAT_MODE_P (mode)
3887	  && (const_zero_operand (x, mode)
3888	      || const_all_ones_operand (x, mode)))
3889	return true;
3890
3891      return false;
3892
3893    case CONST_VECTOR:
3894      /* Vector constants are generally not ok.
3895	 The only exception is 0 or -1 in VIS.  */
3896      if (TARGET_VIS
3897	  && (const_zero_operand (x, mode)
3898	      || const_all_ones_operand (x, mode)))
3899	return true;
3900
3901      return false;
3902
3903    default:
3904      break;
3905    }
3906
3907  return true;
3908}
3909
3910/* Determine if a given RTX is a valid constant address.  */
3911
3912bool
3913constant_address_p (rtx x)
3914{
3915  switch (GET_CODE (x))
3916    {
3917    case LABEL_REF:
3918    case CONST_INT:
3919    case HIGH:
3920      return true;
3921
3922    case CONST:
3923      if (flag_pic && pic_address_needs_scratch (x))
3924	return false;
3925      return sparc_legitimate_constant_p (Pmode, x);
3926
3927    case SYMBOL_REF:
3928      return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3929
3930    default:
3931      return false;
3932    }
3933}
3934
3935/* Nonzero if the constant value X is a legitimate general operand
3936   when generating PIC code.  It is given that flag_pic is on and
3937   that X satisfies CONSTANT_P.  */
3938
3939bool
3940legitimate_pic_operand_p (rtx x)
3941{
3942  if (pic_address_needs_scratch (x))
3943    return false;
3944  if (sparc_tls_referenced_p (x))
3945    return false;
3946  return true;
3947}
3948
3949#define RTX_OK_FOR_OFFSET_P(X, MODE)			\
3950  (CONST_INT_P (X)					\
3951   && INTVAL (X) >= -0x1000				\
3952   && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3953
3954#define RTX_OK_FOR_OLO10_P(X, MODE)			\
3955  (CONST_INT_P (X)					\
3956   && INTVAL (X) >= -0x1000				\
3957   && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3958
3959/* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3960
3961   On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3962   ordinarily.  This changes a bit when generating PIC.  */
3963
3964static bool
3965sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3966{
3967  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3968
3969  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3970    rs1 = addr;
3971  else if (GET_CODE (addr) == PLUS)
3972    {
3973      rs1 = XEXP (addr, 0);
3974      rs2 = XEXP (addr, 1);
3975
3976      /* Canonicalize.  REG comes first, if there are no regs,
3977	 LO_SUM comes first.  */
3978      if (!REG_P (rs1)
3979	  && GET_CODE (rs1) != SUBREG
3980	  && (REG_P (rs2)
3981	      || GET_CODE (rs2) == SUBREG
3982	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3983	{
3984	  rs1 = XEXP (addr, 1);
3985	  rs2 = XEXP (addr, 0);
3986	}
3987
3988      if ((flag_pic == 1
3989	   && rs1 == pic_offset_table_rtx
3990	   && !REG_P (rs2)
3991	   && GET_CODE (rs2) != SUBREG
3992	   && GET_CODE (rs2) != LO_SUM
3993	   && GET_CODE (rs2) != MEM
3994	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3995	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3996	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3997	  || ((REG_P (rs1)
3998	       || GET_CODE (rs1) == SUBREG)
3999	      && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4000	{
4001	  imm1 = rs2;
4002	  rs2 = NULL;
4003	}
4004      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4005	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4006	{
4007	  /* We prohibit REG + REG for TFmode when there are no quad move insns
4008	     and we consequently need to split.  We do this because REG+REG
4009	     is not an offsettable address.  If we get the situation in reload
4010	     where source and destination of a movtf pattern are both MEMs with
4011	     REG+REG address, then only one of them gets converted to an
4012	     offsettable address.  */
4013	  if (mode == TFmode
4014	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4015	    return 0;
4016
4017	  /* Likewise for TImode, but in all cases.  */
4018	  if (mode == TImode)
4019	    return 0;
4020
4021	  /* We prohibit REG + REG on ARCH32 if not optimizing for
4022	     DFmode/DImode because then mem_min_alignment is likely to be zero
4023	     after reload and the  forced split would lack a matching splitter
4024	     pattern.  */
4025	  if (TARGET_ARCH32 && !optimize
4026	      && (mode == DFmode || mode == DImode))
4027	    return 0;
4028	}
4029      else if (USE_AS_OFFSETABLE_LO10
4030	       && GET_CODE (rs1) == LO_SUM
4031	       && TARGET_ARCH64
4032	       && ! TARGET_CM_MEDMID
4033	       && RTX_OK_FOR_OLO10_P (rs2, mode))
4034	{
4035	  rs2 = NULL;
4036	  imm1 = XEXP (rs1, 1);
4037	  rs1 = XEXP (rs1, 0);
4038	  if (!CONSTANT_P (imm1)
4039	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4040	    return 0;
4041	}
4042    }
4043  else if (GET_CODE (addr) == LO_SUM)
4044    {
4045      rs1 = XEXP (addr, 0);
4046      imm1 = XEXP (addr, 1);
4047
4048      if (!CONSTANT_P (imm1)
4049	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4050	return 0;
4051
4052      /* We can't allow TFmode in 32-bit mode, because an offset greater
4053	 than the alignment (8) may cause the LO_SUM to overflow.  */
4054      if (mode == TFmode && TARGET_ARCH32)
4055	return 0;
4056    }
4057  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4058    return 1;
4059  else
4060    return 0;
4061
4062  if (GET_CODE (rs1) == SUBREG)
4063    rs1 = SUBREG_REG (rs1);
4064  if (!REG_P (rs1))
4065    return 0;
4066
4067  if (rs2)
4068    {
4069      if (GET_CODE (rs2) == SUBREG)
4070	rs2 = SUBREG_REG (rs2);
4071      if (!REG_P (rs2))
4072	return 0;
4073    }
4074
4075  if (strict)
4076    {
4077      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4078	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4079	return 0;
4080    }
4081  else
4082    {
4083      if ((! SPARC_INT_REG_P (REGNO (rs1))
4084	   && REGNO (rs1) != FRAME_POINTER_REGNUM
4085	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4086	  || (rs2
4087	      && (! SPARC_INT_REG_P (REGNO (rs2))
4088		  && REGNO (rs2) != FRAME_POINTER_REGNUM
4089		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4090	return 0;
4091    }
4092  return 1;
4093}
4094
4095/* Return the SYMBOL_REF for the tls_get_addr function.  */
4096
4097static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4098
4099static rtx
4100sparc_tls_get_addr (void)
4101{
4102  if (!sparc_tls_symbol)
4103    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4104
4105  return sparc_tls_symbol;
4106}
4107
4108/* Return the Global Offset Table to be used in TLS mode.  */
4109
4110static rtx
4111sparc_tls_got (void)
4112{
4113  /* In PIC mode, this is just the PIC offset table.  */
4114  if (flag_pic)
4115    {
4116      crtl->uses_pic_offset_table = 1;
4117      return pic_offset_table_rtx;
4118    }
4119
4120  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4121     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
4122  if (TARGET_SUN_TLS && TARGET_ARCH32)
4123    {
4124      load_got_register ();
4125      return global_offset_table_rtx;
4126    }
4127
4128  /* In all other cases, we load a new pseudo with the GOT symbol.  */
4129  return copy_to_reg (sparc_got ());
4130}
4131
4132/* Return true if X contains a thread-local symbol.  */
4133
4134static bool
4135sparc_tls_referenced_p (rtx x)
4136{
4137  if (!TARGET_HAVE_TLS)
4138    return false;
4139
4140  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4141    x = XEXP (XEXP (x, 0), 0);
4142
4143  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4144    return true;
4145
4146  /* That's all we handle in sparc_legitimize_tls_address for now.  */
4147  return false;
4148}
4149
4150/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
4151   this (thread-local) address.  */
4152
4153static rtx
4154sparc_legitimize_tls_address (rtx addr)
4155{
4156  rtx temp1, temp2, temp3, ret, o0, got;
4157  rtx_insn *insn;
4158
4159  gcc_assert (can_create_pseudo_p ());
4160
4161  if (GET_CODE (addr) == SYMBOL_REF)
4162    switch (SYMBOL_REF_TLS_MODEL (addr))
4163      {
4164      case TLS_MODEL_GLOBAL_DYNAMIC:
4165	start_sequence ();
4166	temp1 = gen_reg_rtx (SImode);
4167	temp2 = gen_reg_rtx (SImode);
4168	ret = gen_reg_rtx (Pmode);
4169	o0 = gen_rtx_REG (Pmode, 8);
4170	got = sparc_tls_got ();
4171	emit_insn (gen_tgd_hi22 (temp1, addr));
4172	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4173	if (TARGET_ARCH32)
4174	  {
4175	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4176	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4177						   addr, const1_rtx));
4178	  }
4179	else
4180	  {
4181	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4182	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4183						   addr, const1_rtx));
4184	  }
4185	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4186	insn = get_insns ();
4187	end_sequence ();
4188	emit_libcall_block (insn, ret, o0, addr);
4189	break;
4190
4191      case TLS_MODEL_LOCAL_DYNAMIC:
4192	start_sequence ();
4193	temp1 = gen_reg_rtx (SImode);
4194	temp2 = gen_reg_rtx (SImode);
4195	temp3 = gen_reg_rtx (Pmode);
4196	ret = gen_reg_rtx (Pmode);
4197	o0 = gen_rtx_REG (Pmode, 8);
4198	got = sparc_tls_got ();
4199	emit_insn (gen_tldm_hi22 (temp1));
4200	emit_insn (gen_tldm_lo10 (temp2, temp1));
4201	if (TARGET_ARCH32)
4202	  {
4203	    emit_insn (gen_tldm_add32 (o0, got, temp2));
4204	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4205						    const1_rtx));
4206	  }
4207	else
4208	  {
4209	    emit_insn (gen_tldm_add64 (o0, got, temp2));
4210	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4211						    const1_rtx));
4212	  }
4213	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4214	insn = get_insns ();
4215	end_sequence ();
4216	emit_libcall_block (insn, temp3, o0,
4217			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4218					    UNSPEC_TLSLD_BASE));
4219	temp1 = gen_reg_rtx (SImode);
4220	temp2 = gen_reg_rtx (SImode);
4221	emit_insn (gen_tldo_hix22 (temp1, addr));
4222	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4223	if (TARGET_ARCH32)
4224	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4225	else
4226	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4227	break;
4228
4229      case TLS_MODEL_INITIAL_EXEC:
4230	temp1 = gen_reg_rtx (SImode);
4231	temp2 = gen_reg_rtx (SImode);
4232	temp3 = gen_reg_rtx (Pmode);
4233	got = sparc_tls_got ();
4234	emit_insn (gen_tie_hi22 (temp1, addr));
4235	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4236	if (TARGET_ARCH32)
4237	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4238	else
4239	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4240        if (TARGET_SUN_TLS)
4241	  {
4242	    ret = gen_reg_rtx (Pmode);
4243	    if (TARGET_ARCH32)
4244	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4245					temp3, addr));
4246	    else
4247	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4248					temp3, addr));
4249	  }
4250	else
4251	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4252	break;
4253
4254      case TLS_MODEL_LOCAL_EXEC:
4255	temp1 = gen_reg_rtx (Pmode);
4256	temp2 = gen_reg_rtx (Pmode);
4257	if (TARGET_ARCH32)
4258	  {
4259	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4260	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4261	  }
4262	else
4263	  {
4264	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4265	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4266	  }
4267	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4268	break;
4269
4270      default:
4271	gcc_unreachable ();
4272      }
4273
4274  else if (GET_CODE (addr) == CONST)
4275    {
4276      rtx base, offset;
4277
4278      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4279
4280      base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4281      offset = XEXP (XEXP (addr, 0), 1);
4282
4283      base = force_operand (base, NULL_RTX);
4284      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4285	offset = force_reg (Pmode, offset);
4286      ret = gen_rtx_PLUS (Pmode, base, offset);
4287    }
4288
4289  else
4290    gcc_unreachable ();  /* for now ... */
4291
4292  return ret;
4293}
4294
4295/* Legitimize PIC addresses.  If the address is already position-independent,
4296   we return ORIG.  Newly generated position-independent addresses go into a
4297   reg.  This is REG if nonzero, otherwise we allocate register(s) as
4298   necessary.  */
4299
4300static rtx
4301sparc_legitimize_pic_address (rtx orig, rtx reg)
4302{
4303  if (GET_CODE (orig) == SYMBOL_REF
4304      /* See the comment in sparc_expand_move.  */
4305      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4306    {
4307      bool gotdata_op = false;
4308      rtx pic_ref, address;
4309      rtx_insn *insn;
4310
4311      if (!reg)
4312	{
4313	  gcc_assert (can_create_pseudo_p ());
4314	  reg = gen_reg_rtx (Pmode);
4315	}
4316
4317      if (flag_pic == 2)
4318	{
4319	  /* If not during reload, allocate another temp reg here for loading
4320	     in the address, so that these instructions can be optimized
4321	     properly.  */
4322	  rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4323
4324	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4325	     won't get confused into thinking that these two instructions
4326	     are loading in the true address of the symbol.  If in the
4327	     future a PIC rtx exists, that should be used instead.  */
4328	  if (TARGET_ARCH64)
4329	    {
4330	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
4331	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4332	    }
4333	  else
4334	    {
4335	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
4336	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4337	    }
4338
4339	  address = temp_reg;
4340	  gotdata_op = true;
4341	}
4342      else
4343	address = orig;
4344
4345      crtl->uses_pic_offset_table = 1;
4346      if (gotdata_op)
4347	{
4348	  if (TARGET_ARCH64)
4349	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4350							pic_offset_table_rtx,
4351							address, orig));
4352	  else
4353	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4354							pic_offset_table_rtx,
4355							address, orig));
4356	}
4357      else
4358	{
4359	  pic_ref
4360	    = gen_const_mem (Pmode,
4361			     gen_rtx_PLUS (Pmode,
4362					   pic_offset_table_rtx, address));
4363	  insn = emit_move_insn (reg, pic_ref);
4364	}
4365
4366      /* Put a REG_EQUAL note on this insn, so that it can be optimized
4367	 by loop.  */
4368      set_unique_reg_note (insn, REG_EQUAL, orig);
4369      return reg;
4370    }
4371  else if (GET_CODE (orig) == CONST)
4372    {
4373      rtx base, offset;
4374
4375      if (GET_CODE (XEXP (orig, 0)) == PLUS
4376	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4377	return orig;
4378
4379      if (!reg)
4380	{
4381	  gcc_assert (can_create_pseudo_p ());
4382	  reg = gen_reg_rtx (Pmode);
4383	}
4384
4385      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4386      base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4387      offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4388			 		     base == reg ? NULL_RTX : reg);
4389
4390      if (GET_CODE (offset) == CONST_INT)
4391	{
4392	  if (SMALL_INT (offset))
4393	    return plus_constant (Pmode, base, INTVAL (offset));
4394	  else if (can_create_pseudo_p ())
4395	    offset = force_reg (Pmode, offset);
4396	  else
4397	    /* If we reach here, then something is seriously wrong.  */
4398	    gcc_unreachable ();
4399	}
4400      return gen_rtx_PLUS (Pmode, base, offset);
4401    }
4402  else if (GET_CODE (orig) == LABEL_REF)
4403    /* ??? We ought to be checking that the register is live instead, in case
4404       it is eliminated.  */
4405    crtl->uses_pic_offset_table = 1;
4406
4407  return orig;
4408}
4409
4410/* Try machine-dependent ways of modifying an illegitimate address X
4411   to be legitimate.  If we find one, return the new, valid address.
4412
4413   OLDX is the address as it was before break_out_memory_refs was called.
4414   In some cases it is useful to look at this to decide what needs to be done.
4415
4416   MODE is the mode of the operand pointed to by X.
4417
4418   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
4419
4420static rtx
4421sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4422			  machine_mode mode)
4423{
4424  rtx orig_x = x;
4425
4426  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4427    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4428		      force_operand (XEXP (x, 0), NULL_RTX));
4429  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4430    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4431		      force_operand (XEXP (x, 1), NULL_RTX));
4432  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4433    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4434		      XEXP (x, 1));
4435  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4436    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4437		      force_operand (XEXP (x, 1), NULL_RTX));
4438
4439  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4440    return x;
4441
4442  if (sparc_tls_referenced_p (x))
4443    x = sparc_legitimize_tls_address (x);
4444  else if (flag_pic)
4445    x = sparc_legitimize_pic_address (x, NULL_RTX);
4446  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4447    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4448		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
4449  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4450    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4451		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
4452  else if (GET_CODE (x) == SYMBOL_REF
4453	   || GET_CODE (x) == CONST
4454	   || GET_CODE (x) == LABEL_REF)
4455    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4456
4457  return x;
4458}
4459
4460/* Delegitimize an address that was legitimized by the above function.  */
4461
4462static rtx
4463sparc_delegitimize_address (rtx x)
4464{
4465  x = delegitimize_mem_from_attrs (x);
4466
4467  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4468    switch (XINT (XEXP (x, 1), 1))
4469      {
4470      case UNSPEC_MOVE_PIC:
4471      case UNSPEC_TLSLE:
4472	x = XVECEXP (XEXP (x, 1), 0, 0);
4473	gcc_assert (GET_CODE (x) == SYMBOL_REF);
4474	break;
4475      default:
4476	break;
4477      }
4478
4479  /* This is generated by mov{si,di}_pic_label_ref in PIC mode.  */
4480  if (GET_CODE (x) == MINUS
4481      && REG_P (XEXP (x, 0))
4482      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4483      && GET_CODE (XEXP (x, 1)) == LO_SUM
4484      && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4485      && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4486    {
4487      x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4488      gcc_assert (GET_CODE (x) == LABEL_REF
4489		  || (GET_CODE (x) == CONST
4490		      && GET_CODE (XEXP (x, 0)) == PLUS
4491		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4492		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
4493    }
4494
4495  return x;
4496}
4497
4498/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
4499   replace the input X, or the original X if no replacement is called for.
4500   The output parameter *WIN is 1 if the calling macro should goto WIN,
4501   0 if it should not.
4502
4503   For SPARC, we wish to handle addresses by splitting them into
4504   HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4505   This cuts the number of extra insns by one.
4506
4507   Do nothing when generating PIC code and the address is a symbolic
4508   operand or requires a scratch register.  */
4509
4510rtx
4511sparc_legitimize_reload_address (rtx x, machine_mode mode,
4512				 int opnum, int type,
4513				 int ind_levels ATTRIBUTE_UNUSED, int *win)
4514{
4515  /* Decompose SImode constants into HIGH+LO_SUM.  */
4516  if (CONSTANT_P (x)
4517      && (mode != TFmode || TARGET_ARCH64)
4518      && GET_MODE (x) == SImode
4519      && GET_CODE (x) != LO_SUM
4520      && GET_CODE (x) != HIGH
4521      && sparc_cmodel <= CM_MEDLOW
4522      && !(flag_pic
4523	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4524    {
4525      x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4526      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4527		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4528		   opnum, (enum reload_type)type);
4529      *win = 1;
4530      return x;
4531    }
4532
4533  /* We have to recognize what we have already generated above.  */
4534  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4535    {
4536      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4537		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4538		   opnum, (enum reload_type)type);
4539      *win = 1;
4540      return x;
4541    }
4542
4543  *win = 0;
4544  return x;
4545}
4546
4547/* Return true if ADDR (a legitimate address expression)
4548   has an effect that depends on the machine mode it is used for.
4549
4550   In PIC mode,
4551
4552      (mem:HI [%l7+a])
4553
4554   is not equivalent to
4555
4556      (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4557
4558   because [%l7+a+1] is interpreted as the address of (a+1).  */
4559
4560
4561static bool
4562sparc_mode_dependent_address_p (const_rtx addr,
4563				addr_space_t as ATTRIBUTE_UNUSED)
4564{
4565  if (flag_pic && GET_CODE (addr) == PLUS)
4566    {
4567      rtx op0 = XEXP (addr, 0);
4568      rtx op1 = XEXP (addr, 1);
4569      if (op0 == pic_offset_table_rtx
4570	  && symbolic_operand (op1, VOIDmode))
4571	return true;
4572    }
4573
4574  return false;
4575}
4576
4577#ifdef HAVE_GAS_HIDDEN
4578# define USE_HIDDEN_LINKONCE 1
4579#else
4580# define USE_HIDDEN_LINKONCE 0
4581#endif
4582
4583static void
4584get_pc_thunk_name (char name[32], unsigned int regno)
4585{
4586  const char *reg_name = reg_names[regno];
4587
4588  /* Skip the leading '%' as that cannot be used in a
4589     symbol name.  */
4590  reg_name += 1;
4591
4592  if (USE_HIDDEN_LINKONCE)
4593    sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4594  else
4595    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4596}
4597
4598/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
4599
4600static rtx
4601gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4602{
4603  int orig_flag_pic = flag_pic;
4604  rtx insn;
4605
4606  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
4607  flag_pic = 0;
4608  if (TARGET_ARCH64)
4609    insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4610  else
4611    insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4612  flag_pic = orig_flag_pic;
4613
4614  return insn;
4615}
4616
4617/* Emit code to load the GOT register.  */
4618
4619void
4620load_got_register (void)
4621{
4622  /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
4623  if (!global_offset_table_rtx)
4624    global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4625
4626  if (TARGET_VXWORKS_RTP)
4627    emit_insn (gen_vxworks_load_got ());
4628  else
4629    {
4630      /* The GOT symbol is subject to a PC-relative relocation so we need a
4631	 helper function to add the PC value and thus get the final value.  */
4632      if (!got_helper_rtx)
4633	{
4634	  char name[32];
4635	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4636	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4637	}
4638
4639      emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4640				     got_helper_rtx,
4641				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4642    }
4643
4644  /* Need to emit this whether or not we obey regdecls,
4645     since setjmp/longjmp can cause life info to screw up.
4646     ??? In the case where we don't obey regdecls, this is not sufficient
4647     since we may not fall out the bottom.  */
4648  emit_use (global_offset_table_rtx);
4649}
4650
4651/* Emit a call instruction with the pattern given by PAT.  ADDR is the
4652   address of the call target.  */
4653
4654void
4655sparc_emit_call_insn (rtx pat, rtx addr)
4656{
4657  rtx_insn *insn;
4658
4659  insn = emit_call_insn (pat);
4660
4661  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
4662  if (TARGET_VXWORKS_RTP
4663      && flag_pic
4664      && GET_CODE (addr) == SYMBOL_REF
4665      && (SYMBOL_REF_DECL (addr)
4666	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4667	  : !SYMBOL_REF_LOCAL_P (addr)))
4668    {
4669      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4670      crtl->uses_pic_offset_table = 1;
4671    }
4672}
4673
4674/* Return 1 if RTX is a MEM which is known to be aligned to at
4675   least a DESIRED byte boundary.  */
4676
4677int
4678mem_min_alignment (rtx mem, int desired)
4679{
4680  rtx addr, base, offset;
4681
4682  /* If it's not a MEM we can't accept it.  */
4683  if (GET_CODE (mem) != MEM)
4684    return 0;
4685
4686  /* Obviously...  */
4687  if (!TARGET_UNALIGNED_DOUBLES
4688      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4689    return 1;
4690
4691  /* ??? The rest of the function predates MEM_ALIGN so
4692     there is probably a bit of redundancy.  */
4693  addr = XEXP (mem, 0);
4694  base = offset = NULL_RTX;
4695  if (GET_CODE (addr) == PLUS)
4696    {
4697      if (GET_CODE (XEXP (addr, 0)) == REG)
4698	{
4699	  base = XEXP (addr, 0);
4700
4701	  /* What we are saying here is that if the base
4702	     REG is aligned properly, the compiler will make
4703	     sure any REG based index upon it will be so
4704	     as well.  */
4705	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4706	    offset = XEXP (addr, 1);
4707	  else
4708	    offset = const0_rtx;
4709	}
4710    }
4711  else if (GET_CODE (addr) == REG)
4712    {
4713      base = addr;
4714      offset = const0_rtx;
4715    }
4716
4717  if (base != NULL_RTX)
4718    {
4719      int regno = REGNO (base);
4720
4721      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4722	{
4723	  /* Check if the compiler has recorded some information
4724	     about the alignment of the base REG.  If reload has
4725	     completed, we already matched with proper alignments.
4726	     If not running global_alloc, reload might give us
4727	     unaligned pointer to local stack though.  */
4728	  if (((cfun != 0
4729		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4730	       || (optimize && reload_completed))
4731	      && (INTVAL (offset) & (desired - 1)) == 0)
4732	    return 1;
4733	}
4734      else
4735	{
4736	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4737	    return 1;
4738	}
4739    }
4740  else if (! TARGET_UNALIGNED_DOUBLES
4741	   || CONSTANT_P (addr)
4742	   || GET_CODE (addr) == LO_SUM)
4743    {
4744      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4745	 is true, in which case we can only assume that an access is aligned if
4746	 it is to a constant address, or the address involves a LO_SUM.  */
4747      return 1;
4748    }
4749
4750  /* An obviously unaligned address.  */
4751  return 0;
4752}
4753
4754
4755/* Vectors to keep interesting information about registers where it can easily
4756   be got.  We used to use the actual mode value as the bit number, but there
4757   are more than 32 modes now.  Instead we use two tables: one indexed by
4758   hard register number, and one indexed by mode.  */
4759
4760/* The purpose of sparc_mode_class is to shrink the range of modes so that
4761   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
4762   mapped into one sparc_mode_class mode.  */
4763
4764enum sparc_mode_class {
4765  H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4766  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4767  CC_MODE, CCFP_MODE
4768};
4769
4770/* Modes for single-word and smaller quantities.  */
4771#define S_MODES \
4772  ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4773
4774/* Modes for double-word and smaller quantities.  */
4775#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4776
4777/* Modes for quad-word and smaller quantities.  */
4778#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4779
4780/* Modes for 8-word and smaller quantities.  */
4781#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4782
4783/* Modes for single-float quantities.  */
4784#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4785
4786/* Modes for double-float and smaller quantities.  */
4787#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4788
4789/* Modes for quad-float and smaller quantities.  */
4790#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4791
4792/* Modes for quad-float pairs and smaller quantities.  */
4793#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4794
4795/* Modes for double-float only quantities.  */
4796#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4797
4798/* Modes for quad-float and double-float only quantities.  */
4799#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4800
4801/* Modes for quad-float pairs and double-float only quantities.  */
4802#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4803
4804/* Modes for condition codes.  */
4805#define CC_MODES (1 << (int) CC_MODE)
4806#define CCFP_MODES (1 << (int) CCFP_MODE)
4807
4808/* Value is 1 if register/mode pair is acceptable on sparc.
4809
4810   The funny mixture of D and T modes is because integer operations
4811   do not specially operate on tetra quantities, so non-quad-aligned
4812   registers can hold quadword quantities (except %o4 and %i4 because
4813   they cross fixed registers).
4814
4815   ??? Note that, despite the settings, non-double-aligned parameter
4816   registers can hold double-word quantities in 32-bit mode.  */
4817
4818/* This points to either the 32 bit or the 64 bit version.  */
4819const int *hard_regno_mode_classes;
4820
4821static const int hard_32bit_mode_classes[] = {
4822  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4823  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4824  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4825  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4826
4827  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4828  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4829  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4830  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4831
4832  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
4833     and none can hold SFmode/SImode values.  */
4834  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4835  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4836  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4837  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4838
4839  /* %fcc[0123] */
4840  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4841
4842  /* %icc, %sfp, %gsr */
4843  CC_MODES, 0, D_MODES
4844};
4845
4846static const int hard_64bit_mode_classes[] = {
4847  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4848  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4849  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4850  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4851
4852  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4853  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4854  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4855  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4856
4857  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
4858     and none can hold SFmode/SImode values.  */
4859  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4860  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4861  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4862  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4863
4864  /* %fcc[0123] */
4865  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4866
4867  /* %icc, %sfp, %gsr */
4868  CC_MODES, 0, D_MODES
4869};
4870
4871int sparc_mode_class [NUM_MACHINE_MODES];
4872
4873enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4874
4875static void
4876sparc_init_modes (void)
4877{
4878  int i;
4879
4880  for (i = 0; i < NUM_MACHINE_MODES; i++)
4881    {
4882      machine_mode m = (machine_mode) i;
4883      unsigned int size = GET_MODE_SIZE (m);
4884
4885      switch (GET_MODE_CLASS (m))
4886	{
4887	case MODE_INT:
4888	case MODE_PARTIAL_INT:
4889	case MODE_COMPLEX_INT:
4890	  if (size < 4)
4891	    sparc_mode_class[i] = 1 << (int) H_MODE;
4892	  else if (size == 4)
4893	    sparc_mode_class[i] = 1 << (int) S_MODE;
4894	  else if (size == 8)
4895	    sparc_mode_class[i] = 1 << (int) D_MODE;
4896	  else if (size == 16)
4897	    sparc_mode_class[i] = 1 << (int) T_MODE;
4898	  else if (size == 32)
4899	    sparc_mode_class[i] = 1 << (int) O_MODE;
4900	  else
4901	    sparc_mode_class[i] = 0;
4902	  break;
4903	case MODE_VECTOR_INT:
4904	  if (size == 4)
4905	    sparc_mode_class[i] = 1 << (int) SF_MODE;
4906	  else if (size == 8)
4907	    sparc_mode_class[i] = 1 << (int) DF_MODE;
4908	  else
4909	    sparc_mode_class[i] = 0;
4910	  break;
4911	case MODE_FLOAT:
4912	case MODE_COMPLEX_FLOAT:
4913	  if (size == 4)
4914	    sparc_mode_class[i] = 1 << (int) SF_MODE;
4915	  else if (size == 8)
4916	    sparc_mode_class[i] = 1 << (int) DF_MODE;
4917	  else if (size == 16)
4918	    sparc_mode_class[i] = 1 << (int) TF_MODE;
4919	  else if (size == 32)
4920	    sparc_mode_class[i] = 1 << (int) OF_MODE;
4921	  else
4922	    sparc_mode_class[i] = 0;
4923	  break;
4924	case MODE_CC:
4925	  if (m == CCFPmode || m == CCFPEmode)
4926	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4927	  else
4928	    sparc_mode_class[i] = 1 << (int) CC_MODE;
4929	  break;
4930	default:
4931	  sparc_mode_class[i] = 0;
4932	  break;
4933	}
4934    }
4935
4936  if (TARGET_ARCH64)
4937    hard_regno_mode_classes = hard_64bit_mode_classes;
4938  else
4939    hard_regno_mode_classes = hard_32bit_mode_classes;
4940
4941  /* Initialize the array used by REGNO_REG_CLASS.  */
4942  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4943    {
4944      if (i < 16 && TARGET_V8PLUS)
4945	sparc_regno_reg_class[i] = I64_REGS;
4946      else if (i < 32 || i == FRAME_POINTER_REGNUM)
4947	sparc_regno_reg_class[i] = GENERAL_REGS;
4948      else if (i < 64)
4949	sparc_regno_reg_class[i] = FP_REGS;
4950      else if (i < 96)
4951	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4952      else if (i < 100)
4953	sparc_regno_reg_class[i] = FPCC_REGS;
4954      else
4955	sparc_regno_reg_class[i] = NO_REGS;
4956    }
4957}
4958
4959/* Return whether REGNO, a global or FP register, must be saved/restored.  */
4960
4961static inline bool
4962save_global_or_fp_reg_p (unsigned int regno,
4963			 int leaf_function ATTRIBUTE_UNUSED)
4964{
4965  return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4966}
4967
4968/* Return whether the return address register (%i7) is needed.  */
4969
4970static inline bool
4971return_addr_reg_needed_p (int leaf_function)
4972{
4973  /* If it is live, for example because of __builtin_return_address (0).  */
4974  if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4975    return true;
4976
4977  /* Otherwise, it is needed as save register if %o7 is clobbered.  */
4978  if (!leaf_function
4979      /* Loading the GOT register clobbers %o7.  */
4980      || crtl->uses_pic_offset_table
4981      || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4982    return true;
4983
4984  return false;
4985}
4986
4987/* Return whether REGNO, a local or in register, must be saved/restored.  */
4988
4989static bool
4990save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4991{
4992  /* General case: call-saved registers live at some point.  */
4993  if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4994    return true;
4995
4996  /* Frame pointer register (%fp) if needed.  */
4997  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4998    return true;
4999
5000  /* Return address register (%i7) if needed.  */
5001  if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5002    return true;
5003
5004  /* GOT register (%l7) if needed.  */
5005  if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5006    return true;
5007
5008  /* If the function accesses prior frames, the frame pointer and the return
5009     address of the previous frame must be saved on the stack.  */
5010  if (crtl->accesses_prior_frames
5011      && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5012    return true;
5013
5014  return false;
5015}
5016
5017/* Compute the frame size required by the function.  This function is called
5018   during the reload pass and also by sparc_expand_prologue.  */
5019
5020HOST_WIDE_INT
5021sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5022{
5023  HOST_WIDE_INT frame_size, apparent_frame_size;
5024  int args_size, n_global_fp_regs = 0;
5025  bool save_local_in_regs_p = false;
5026  unsigned int i;
5027
5028  /* If the function allocates dynamic stack space, the dynamic offset is
5029     computed early and contains REG_PARM_STACK_SPACE, so we need to cope.  */
5030  if (leaf_function && !cfun->calls_alloca)
5031    args_size = 0;
5032  else
5033    args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5034
5035  /* Calculate space needed for global registers.  */
5036  if (TARGET_ARCH64)
5037    {
5038      for (i = 0; i < 8; i++)
5039	if (save_global_or_fp_reg_p (i, 0))
5040	  n_global_fp_regs += 2;
5041    }
5042  else
5043    {
5044      for (i = 0; i < 8; i += 2)
5045	if (save_global_or_fp_reg_p (i, 0)
5046	    || save_global_or_fp_reg_p (i + 1, 0))
5047	  n_global_fp_regs += 2;
5048    }
5049
5050  /* In the flat window model, find out which local and in registers need to
5051     be saved.  We don't reserve space in the current frame for them as they
5052     will be spilled into the register window save area of the caller's frame.
5053     However, as soon as we use this register window save area, we must create
5054     that of the current frame to make it the live one.  */
5055  if (TARGET_FLAT)
5056    for (i = 16; i < 32; i++)
5057      if (save_local_or_in_reg_p (i, leaf_function))
5058	{
5059	 save_local_in_regs_p = true;
5060	 break;
5061	}
5062
5063  /* Calculate space needed for FP registers.  */
5064  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5065    if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5066      n_global_fp_regs += 2;
5067
5068  if (size == 0
5069      && n_global_fp_regs == 0
5070      && args_size == 0
5071      && !save_local_in_regs_p)
5072    frame_size = apparent_frame_size = 0;
5073  else
5074    {
5075      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
5076      apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5077      apparent_frame_size += n_global_fp_regs * 4;
5078
5079      /* We need to add the size of the outgoing argument area.  */
5080      frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5081
5082      /* And that of the register window save area.  */
5083      frame_size += FIRST_PARM_OFFSET (cfun->decl);
5084
5085      /* Finally, bump to the appropriate alignment.  */
5086      frame_size = SPARC_STACK_ALIGN (frame_size);
5087    }
5088
5089  /* Set up values for use in prologue and epilogue.  */
5090  sparc_frame_size = frame_size;
5091  sparc_apparent_frame_size = apparent_frame_size;
5092  sparc_n_global_fp_regs = n_global_fp_regs;
5093  sparc_save_local_in_regs_p = save_local_in_regs_p;
5094
5095  return frame_size;
5096}
5097
5098/* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
5099
5100int
5101sparc_initial_elimination_offset (int to)
5102{
5103  int offset;
5104
5105  if (to == STACK_POINTER_REGNUM)
5106    offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5107  else
5108    offset = 0;
5109
5110  offset += SPARC_STACK_BIAS;
5111  return offset;
5112}
5113
5114/* Output any necessary .register pseudo-ops.  */
5115
5116void
5117sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5118{
5119#ifdef HAVE_AS_REGISTER_PSEUDO_OP
5120  int i;
5121
5122  if (TARGET_ARCH32)
5123    return;
5124
5125  /* Check if %g[2367] were used without
5126     .register being printed for them already.  */
5127  for (i = 2; i < 8; i++)
5128    {
5129      if (df_regs_ever_live_p (i)
5130	  && ! sparc_hard_reg_printed [i])
5131	{
5132	  sparc_hard_reg_printed [i] = 1;
5133	  /* %g7 is used as TLS base register, use #ignore
5134	     for it instead of #scratch.  */
5135	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5136		   i == 7 ? "ignore" : "scratch");
5137	}
5138      if (i == 3) i = 5;
5139    }
5140#endif
5141}
5142
5143#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5144
5145#if PROBE_INTERVAL > 4096
5146#error Cannot use indexed addressing mode for stack probing
5147#endif
5148
5149/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5150   inclusive.  These are offsets from the current stack pointer.
5151
5152   Note that we don't use the REG+REG addressing mode for the probes because
5153   of the stack bias in 64-bit mode.  And it doesn't really buy us anything
5154   so the advantages of having a single code win here.  */
5155
5156static void
5157sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5158{
5159  rtx g1 = gen_rtx_REG (Pmode, 1);
5160
5161  /* See if we have a constant small number of probes to generate.  If so,
5162     that's the easy case.  */
5163  if (size <= PROBE_INTERVAL)
5164    {
5165      emit_move_insn (g1, GEN_INT (first));
5166      emit_insn (gen_rtx_SET (g1,
5167			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5168      emit_stack_probe (plus_constant (Pmode, g1, -size));
5169    }
5170
5171  /* The run-time loop is made up of 9 insns in the generic case while the
5172     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
5173  else if (size <= 4 * PROBE_INTERVAL)
5174    {
5175      HOST_WIDE_INT i;
5176
5177      emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5178      emit_insn (gen_rtx_SET (g1,
5179			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5180      emit_stack_probe (g1);
5181
5182      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5183	 it exceeds SIZE.  If only two probes are needed, this will not
5184	 generate any code.  Then probe at FIRST + SIZE.  */
5185      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5186	{
5187	  emit_insn (gen_rtx_SET (g1,
5188				  plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5189	  emit_stack_probe (g1);
5190	}
5191
5192      emit_stack_probe (plus_constant (Pmode, g1,
5193				       (i - PROBE_INTERVAL) - size));
5194    }
5195
5196  /* Otherwise, do the same as above, but in a loop.  Note that we must be
5197     extra careful with variables wrapping around because we might be at
5198     the very top (or the very bottom) of the address space and we have
5199     to be able to handle this case properly; in particular, we use an
5200     equality test for the loop condition.  */
5201  else
5202    {
5203      HOST_WIDE_INT rounded_size;
5204      rtx g4 = gen_rtx_REG (Pmode, 4);
5205
5206      emit_move_insn (g1, GEN_INT (first));
5207
5208
5209      /* Step 1: round SIZE to the previous multiple of the interval.  */
5210
5211      rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5212      emit_move_insn (g4, GEN_INT (rounded_size));
5213
5214
5215      /* Step 2: compute initial and final value of the loop counter.  */
5216
5217      /* TEST_ADDR = SP + FIRST.  */
5218      emit_insn (gen_rtx_SET (g1,
5219			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5220
5221      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
5222      emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5223
5224
5225      /* Step 3: the loop
5226
5227	 while (TEST_ADDR != LAST_ADDR)
5228	   {
5229	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5230	     probe at TEST_ADDR
5231	   }
5232
5233	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5234	 until it is equal to ROUNDED_SIZE.  */
5235
5236      if (TARGET_ARCH64)
5237	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5238      else
5239	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5240
5241
5242      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5243	 that SIZE is equal to ROUNDED_SIZE.  */
5244
5245      if (size != rounded_size)
5246	emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5247    }
5248
5249  /* Make sure nothing is scheduled before we are done.  */
5250  emit_insn (gen_blockage ());
5251}
5252
5253/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
5254   absolute addresses.  */
5255
5256const char *
5257output_probe_stack_range (rtx reg1, rtx reg2)
5258{
5259  static int labelno = 0;
5260  char loop_lab[32];
5261  rtx xops[2];
5262
5263  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5264
5265  /* Loop.  */
5266  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5267
5268  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
5269  xops[0] = reg1;
5270  xops[1] = GEN_INT (-PROBE_INTERVAL);
5271  output_asm_insn ("add\t%0, %1, %0", xops);
5272
5273  /* Test if TEST_ADDR == LAST_ADDR.  */
5274  xops[1] = reg2;
5275  output_asm_insn ("cmp\t%0, %1", xops);
5276
5277  /* Probe at TEST_ADDR and branch.  */
5278  if (TARGET_ARCH64)
5279    fputs ("\tbne,pt\t%xcc,", asm_out_file);
5280  else
5281    fputs ("\tbne\t", asm_out_file);
5282  assemble_name_raw (asm_out_file, loop_lab);
5283  fputc ('\n', asm_out_file);
5284  xops[1] = GEN_INT (SPARC_STACK_BIAS);
5285  output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5286
5287  return "";
5288}
5289
5290/* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5291   needed.  LOW is supposed to be double-word aligned for 32-bit registers.
5292   SAVE_P decides whether a register must be saved/restored.  ACTION_TRUE
5293   is the action to be performed if SAVE_P returns true and ACTION_FALSE
5294   the action to be performed if it returns false.  Return the new offset.  */
5295
5296typedef bool (*sorr_pred_t) (unsigned int, int);
5297typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5298
5299static int
5300emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5301			   int offset, int leaf_function, sorr_pred_t save_p,
5302			   sorr_act_t action_true, sorr_act_t action_false)
5303{
5304  unsigned int i;
5305  rtx mem;
5306  rtx_insn *insn;
5307
5308  if (TARGET_ARCH64 && high <= 32)
5309    {
5310      int fp_offset = -1;
5311
5312      for (i = low; i < high; i++)
5313	{
5314	  if (save_p (i, leaf_function))
5315	    {
5316	      mem = gen_frame_mem (DImode, plus_constant (Pmode,
5317							  base, offset));
5318	      if (action_true == SORR_SAVE)
5319		{
5320		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5321		  RTX_FRAME_RELATED_P (insn) = 1;
5322		}
5323	      else  /* action_true == SORR_RESTORE */
5324		{
5325		  /* The frame pointer must be restored last since its old
5326		     value may be used as base address for the frame.  This
5327		     is problematic in 64-bit mode only because of the lack
5328		     of double-word load instruction.  */
5329		  if (i == HARD_FRAME_POINTER_REGNUM)
5330		    fp_offset = offset;
5331		  else
5332		    emit_move_insn (gen_rtx_REG (DImode, i), mem);
5333		}
5334	      offset += 8;
5335	    }
5336	  else if (action_false == SORR_ADVANCE)
5337	    offset += 8;
5338	}
5339
5340      if (fp_offset >= 0)
5341	{
5342	  mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5343	  emit_move_insn (hard_frame_pointer_rtx, mem);
5344	}
5345    }
5346  else
5347    {
5348      for (i = low; i < high; i += 2)
5349	{
5350	  bool reg0 = save_p (i, leaf_function);
5351	  bool reg1 = save_p (i + 1, leaf_function);
5352	  machine_mode mode;
5353	  int regno;
5354
5355	  if (reg0 && reg1)
5356	    {
5357	      mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5358	      regno = i;
5359	    }
5360	  else if (reg0)
5361	    {
5362	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5363	      regno = i;
5364	    }
5365	  else if (reg1)
5366	    {
5367	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5368	      regno = i + 1;
5369	      offset += 4;
5370	    }
5371	  else
5372	    {
5373	      if (action_false == SORR_ADVANCE)
5374		offset += 8;
5375	      continue;
5376	    }
5377
5378	  mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5379	  if (action_true == SORR_SAVE)
5380	    {
5381	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5382	      RTX_FRAME_RELATED_P (insn) = 1;
5383	      if (mode == DImode)
5384		{
5385		  rtx set1, set2;
5386		  mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5387							      offset));
5388		  set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5389		  RTX_FRAME_RELATED_P (set1) = 1;
5390		  mem
5391		    = gen_frame_mem (SImode, plus_constant (Pmode, base,
5392							    offset + 4));
5393		  set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5394		  RTX_FRAME_RELATED_P (set2) = 1;
5395		  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5396				gen_rtx_PARALLEL (VOIDmode,
5397						  gen_rtvec (2, set1, set2)));
5398		}
5399	    }
5400	  else  /* action_true == SORR_RESTORE */
5401	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
5402
5403	  /* Bump and round down to double word
5404	     in case we already bumped by 4.  */
5405	  offset = ROUND_DOWN (offset + 8, 8);
5406	}
5407    }
5408
5409  return offset;
5410}
5411
5412/* Emit code to adjust BASE to OFFSET.  Return the new base.  */
5413
5414static rtx
5415emit_adjust_base_to_offset (rtx base, int offset)
5416{
5417  /* ??? This might be optimized a little as %g1 might already have a
5418     value close enough that a single add insn will do.  */
5419  /* ??? Although, all of this is probably only a temporary fix because
5420     if %g1 can hold a function result, then sparc_expand_epilogue will
5421     lose (the result will be clobbered).  */
5422  rtx new_base = gen_rtx_REG (Pmode, 1);
5423  emit_move_insn (new_base, GEN_INT (offset));
5424  emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5425  return new_base;
5426}
5427
5428/* Emit code to save/restore call-saved global and FP registers.  */
5429
5430static void
5431emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5432{
5433  if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5434    {
5435      base = emit_adjust_base_to_offset  (base, offset);
5436      offset = 0;
5437    }
5438
5439  offset
5440    = emit_save_or_restore_regs (0, 8, base, offset, 0,
5441				 save_global_or_fp_reg_p, action, SORR_NONE);
5442  emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5443			     save_global_or_fp_reg_p, action, SORR_NONE);
5444}
5445
5446/* Emit code to save/restore call-saved local and in registers.  */
5447
5448static void
5449emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5450{
5451  if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5452    {
5453      base = emit_adjust_base_to_offset  (base, offset);
5454      offset = 0;
5455    }
5456
5457  emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5458			     save_local_or_in_reg_p, action, SORR_ADVANCE);
5459}
5460
5461/* Emit a window_save insn.  */
5462
5463static rtx_insn *
5464emit_window_save (rtx increment)
5465{
5466  rtx_insn *insn = emit_insn (gen_window_save (increment));
5467  RTX_FRAME_RELATED_P (insn) = 1;
5468
5469  /* The incoming return address (%o7) is saved in %i7.  */
5470  add_reg_note (insn, REG_CFA_REGISTER,
5471		gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5472			     gen_rtx_REG (Pmode,
5473					  INCOMING_RETURN_ADDR_REGNUM)));
5474
5475  /* The window save event.  */
5476  add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5477
5478  /* The CFA is %fp, the hard frame pointer.  */
5479  add_reg_note (insn, REG_CFA_DEF_CFA,
5480		plus_constant (Pmode, hard_frame_pointer_rtx,
5481			       INCOMING_FRAME_SP_OFFSET));
5482
5483  return insn;
5484}
5485
5486/* Generate an increment for the stack pointer.  */
5487
5488static rtx
5489gen_stack_pointer_inc (rtx increment)
5490{
5491  return gen_rtx_SET (stack_pointer_rtx,
5492		      gen_rtx_PLUS (Pmode,
5493				    stack_pointer_rtx,
5494				    increment));
5495}
5496
5497/* Expand the function prologue.  The prologue is responsible for reserving
5498   storage for the frame, saving the call-saved registers and loading the
5499   GOT register if needed.  */
5500
5501void
5502sparc_expand_prologue (void)
5503{
5504  HOST_WIDE_INT size;
5505  rtx_insn *insn;
5506
5507  /* Compute a snapshot of crtl->uses_only_leaf_regs.  Relying
5508     on the final value of the flag means deferring the prologue/epilogue
5509     expansion until just before the second scheduling pass, which is too
5510     late to emit multiple epilogues or return insns.
5511
5512     Of course we are making the assumption that the value of the flag
5513     will not change between now and its final value.  Of the three parts
5514     of the formula, only the last one can reasonably vary.  Let's take a
5515     closer look, after assuming that the first two ones are set to true
5516     (otherwise the last value is effectively silenced).
5517
5518     If only_leaf_regs_used returns false, the global predicate will also
5519     be false so the actual frame size calculated below will be positive.
5520     As a consequence, the save_register_window insn will be emitted in
5521     the instruction stream; now this insn explicitly references %fp
5522     which is not a leaf register so only_leaf_regs_used will always
5523     return false subsequently.
5524
5525     If only_leaf_regs_used returns true, we hope that the subsequent
5526     optimization passes won't cause non-leaf registers to pop up.  For
5527     example, the regrename pass has special provisions to not rename to
5528     non-leaf registers in a leaf function.  */
5529  sparc_leaf_function_p
5530    = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5531
5532  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5533
5534  if (flag_stack_usage_info)
5535    current_function_static_stack_size = size;
5536
5537  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5538    {
5539      if (crtl->is_leaf && !cfun->calls_alloca)
5540	{
5541	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5542	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5543					  size - STACK_CHECK_PROTECT);
5544	}
5545      else if (size > 0)
5546	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5547    }
5548
5549  if (size == 0)
5550    ; /* do nothing.  */
5551  else if (sparc_leaf_function_p)
5552    {
5553      rtx size_int_rtx = GEN_INT (-size);
5554
5555      if (size <= 4096)
5556	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5557      else if (size <= 8192)
5558	{
5559	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5560	  RTX_FRAME_RELATED_P (insn) = 1;
5561
5562	  /* %sp is still the CFA register.  */
5563	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5564	}
5565      else
5566	{
5567	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
5568	  emit_move_insn (size_rtx, size_int_rtx);
5569	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5570	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5571			gen_stack_pointer_inc (size_int_rtx));
5572	}
5573
5574      RTX_FRAME_RELATED_P (insn) = 1;
5575    }
5576  else
5577    {
5578      rtx size_int_rtx = GEN_INT (-size);
5579
5580      if (size <= 4096)
5581	emit_window_save (size_int_rtx);
5582      else if (size <= 8192)
5583	{
5584	  emit_window_save (GEN_INT (-4096));
5585
5586	  /* %sp is not the CFA register anymore.  */
5587	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5588
5589	  /* Make sure no %fp-based store is issued until after the frame is
5590	     established.  The offset between the frame pointer and the stack
5591	     pointer is calculated relative to the value of the stack pointer
5592	     at the end of the function prologue, and moving instructions that
5593	     access the stack via the frame pointer between the instructions
5594	     that decrement the stack pointer could result in accessing the
5595	     register window save area, which is volatile.  */
5596	  emit_insn (gen_frame_blockage ());
5597	}
5598      else
5599	{
5600	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
5601	  emit_move_insn (size_rtx, size_int_rtx);
5602	  emit_window_save (size_rtx);
5603	}
5604    }
5605
5606  if (sparc_leaf_function_p)
5607    {
5608      sparc_frame_base_reg = stack_pointer_rtx;
5609      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5610    }
5611  else
5612    {
5613      sparc_frame_base_reg = hard_frame_pointer_rtx;
5614      sparc_frame_base_offset = SPARC_STACK_BIAS;
5615    }
5616
5617  if (sparc_n_global_fp_regs > 0)
5618    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5619				         sparc_frame_base_offset
5620					   - sparc_apparent_frame_size,
5621					 SORR_SAVE);
5622
5623  /* Load the GOT register if needed.  */
5624  if (crtl->uses_pic_offset_table)
5625    load_got_register ();
5626
5627  /* Advertise that the data calculated just above are now valid.  */
5628  sparc_prologue_data_valid_p = true;
5629}
5630
5631/* Expand the function prologue.  The prologue is responsible for reserving
5632   storage for the frame, saving the call-saved registers and loading the
5633   GOT register if needed.  */
5634
5635void
5636sparc_flat_expand_prologue (void)
5637{
5638  HOST_WIDE_INT size;
5639  rtx_insn *insn;
5640
5641  sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5642
5643  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5644
5645  if (flag_stack_usage_info)
5646    current_function_static_stack_size = size;
5647
5648  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5649    {
5650      if (crtl->is_leaf && !cfun->calls_alloca)
5651	{
5652	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5653	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5654					  size - STACK_CHECK_PROTECT);
5655	}
5656      else if (size > 0)
5657	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5658    }
5659
5660  if (sparc_save_local_in_regs_p)
5661    emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5662					SORR_SAVE);
5663
5664  if (size == 0)
5665    ; /* do nothing.  */
5666  else
5667    {
5668      rtx size_int_rtx, size_rtx;
5669
5670      size_rtx = size_int_rtx = GEN_INT (-size);
5671
5672      /* We establish the frame (i.e. decrement the stack pointer) first, even
5673	 if we use a frame pointer, because we cannot clobber any call-saved
5674	 registers, including the frame pointer, if we haven't created a new
5675	 register save area, for the sake of compatibility with the ABI.  */
5676      if (size <= 4096)
5677	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5678      else if (size <= 8192 && !frame_pointer_needed)
5679	{
5680	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5681	  RTX_FRAME_RELATED_P (insn) = 1;
5682	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5683	}
5684      else
5685	{
5686	  size_rtx = gen_rtx_REG (Pmode, 1);
5687	  emit_move_insn (size_rtx, size_int_rtx);
5688	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5689	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
5690			gen_stack_pointer_inc (size_int_rtx));
5691	}
5692      RTX_FRAME_RELATED_P (insn) = 1;
5693
5694      /* Ensure nothing is scheduled until after the frame is established.  */
5695      emit_insn (gen_blockage ());
5696
5697      if (frame_pointer_needed)
5698	{
5699	  insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5700					 gen_rtx_MINUS (Pmode,
5701							stack_pointer_rtx,
5702							size_rtx)));
5703	  RTX_FRAME_RELATED_P (insn) = 1;
5704
5705	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
5706			gen_rtx_SET (hard_frame_pointer_rtx,
5707				     plus_constant (Pmode, stack_pointer_rtx,
5708						    size)));
5709	}
5710
5711      if (return_addr_reg_needed_p (sparc_leaf_function_p))
5712	{
5713	  rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5714	  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5715
5716	  insn = emit_move_insn (i7, o7);
5717	  RTX_FRAME_RELATED_P (insn) = 1;
5718
5719	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5720
5721	  /* Prevent this instruction from ever being considered dead,
5722	     even if this function has no epilogue.  */
5723	  emit_use (i7);
5724	}
5725    }
5726
5727  if (frame_pointer_needed)
5728    {
5729      sparc_frame_base_reg = hard_frame_pointer_rtx;
5730      sparc_frame_base_offset = SPARC_STACK_BIAS;
5731    }
5732  else
5733    {
5734      sparc_frame_base_reg = stack_pointer_rtx;
5735      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5736    }
5737
5738  if (sparc_n_global_fp_regs > 0)
5739    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5740				         sparc_frame_base_offset
5741					   - sparc_apparent_frame_size,
5742					 SORR_SAVE);
5743
5744  /* Load the GOT register if needed.  */
5745  if (crtl->uses_pic_offset_table)
5746    load_got_register ();
5747
5748  /* Advertise that the data calculated just above are now valid.  */
5749  sparc_prologue_data_valid_p = true;
5750}
5751
5752/* This function generates the assembly code for function entry, which boils
5753   down to emitting the necessary .register directives.  */
5754
5755static void
5756sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5757{
5758  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
5759  if (!TARGET_FLAT)
5760    gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5761
5762  sparc_output_scratch_registers (file);
5763}
5764
5765/* Expand the function epilogue, either normal or part of a sibcall.
5766   We emit all the instructions except the return or the call.  */
5767
5768void
5769sparc_expand_epilogue (bool for_eh)
5770{
5771  HOST_WIDE_INT size = sparc_frame_size;
5772
5773  if (cfun->calls_alloca)
5774    emit_insn (gen_frame_blockage ());
5775
5776  if (sparc_n_global_fp_regs > 0)
5777    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5778				         sparc_frame_base_offset
5779					   - sparc_apparent_frame_size,
5780					 SORR_RESTORE);
5781
5782  if (size == 0 || for_eh)
5783    ; /* do nothing.  */
5784  else if (sparc_leaf_function_p)
5785    {
5786      if (size <= 4096)
5787	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5788      else if (size <= 8192)
5789	{
5790	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5791	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5792	}
5793      else
5794	{
5795	  rtx reg = gen_rtx_REG (Pmode, 1);
5796	  emit_move_insn (reg, GEN_INT (size));
5797	  emit_insn (gen_stack_pointer_inc (reg));
5798	}
5799    }
5800}
5801
5802/* Expand the function epilogue, either normal or part of a sibcall.
5803   We emit all the instructions except the return or the call.  */
5804
5805void
5806sparc_flat_expand_epilogue (bool for_eh)
5807{
5808  HOST_WIDE_INT size = sparc_frame_size;
5809
5810  if (sparc_n_global_fp_regs > 0)
5811    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5812				         sparc_frame_base_offset
5813					   - sparc_apparent_frame_size,
5814					 SORR_RESTORE);
5815
5816  /* If we have a frame pointer, we'll need both to restore it before the
5817     frame is destroyed and use its current value in destroying the frame.
5818     Since we don't have an atomic way to do that in the flat window model,
5819     we save the current value into a temporary register (%g1).  */
5820  if (frame_pointer_needed && !for_eh)
5821    emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5822
5823  if (return_addr_reg_needed_p (sparc_leaf_function_p))
5824    emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5825		    gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5826
5827  if (sparc_save_local_in_regs_p)
5828    emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5829					sparc_frame_base_offset,
5830					SORR_RESTORE);
5831
5832  if (size == 0 || for_eh)
5833    ; /* do nothing.  */
5834  else if (frame_pointer_needed)
5835    {
5836      /* Make sure the frame is destroyed after everything else is done.  */
5837      emit_insn (gen_blockage ());
5838
5839      emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5840    }
5841  else
5842    {
5843      /* Likewise.  */
5844      emit_insn (gen_blockage ());
5845
5846      if (size <= 4096)
5847	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5848      else if (size <= 8192)
5849	{
5850	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5851	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5852	}
5853      else
5854	{
5855	  rtx reg = gen_rtx_REG (Pmode, 1);
5856	  emit_move_insn (reg, GEN_INT (size));
5857	  emit_insn (gen_stack_pointer_inc (reg));
5858	}
5859    }
5860}
5861
5862/* Return true if it is appropriate to emit `return' instructions in the
5863   body of a function.  */
5864
5865bool
5866sparc_can_use_return_insn_p (void)
5867{
5868  return sparc_prologue_data_valid_p
5869	 && sparc_n_global_fp_regs == 0
5870	 && TARGET_FLAT
5871	    ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5872	    : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5873}
5874
5875/* This function generates the assembly code for function exit.  */
5876
5877static void
5878sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5879{
5880  /* If the last two instructions of a function are "call foo; dslot;"
5881     the return address might point to the first instruction in the next
5882     function and we have to output a dummy nop for the sake of sane
5883     backtraces in such cases.  This is pointless for sibling calls since
5884     the return address is explicitly adjusted.  */
5885
5886  rtx insn, last_real_insn;
5887
5888  insn = get_last_insn ();
5889
5890  last_real_insn = prev_real_insn (insn);
5891  if (last_real_insn
5892      && NONJUMP_INSN_P (last_real_insn)
5893      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5894    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5895
5896  if (last_real_insn
5897      && CALL_P (last_real_insn)
5898      && !SIBLING_CALL_P (last_real_insn))
5899    fputs("\tnop\n", file);
5900
5901  sparc_output_deferred_case_vectors ();
5902}
5903
5904/* Output a 'restore' instruction.  */
5905
5906static void
5907output_restore (rtx pat)
5908{
5909  rtx operands[3];
5910
5911  if (! pat)
5912    {
5913      fputs ("\t restore\n", asm_out_file);
5914      return;
5915    }
5916
5917  gcc_assert (GET_CODE (pat) == SET);
5918
5919  operands[0] = SET_DEST (pat);
5920  pat = SET_SRC (pat);
5921
5922  switch (GET_CODE (pat))
5923    {
5924      case PLUS:
5925	operands[1] = XEXP (pat, 0);
5926	operands[2] = XEXP (pat, 1);
5927	output_asm_insn (" restore %r1, %2, %Y0", operands);
5928	break;
5929      case LO_SUM:
5930	operands[1] = XEXP (pat, 0);
5931	operands[2] = XEXP (pat, 1);
5932	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5933	break;
5934      case ASHIFT:
5935	operands[1] = XEXP (pat, 0);
5936	gcc_assert (XEXP (pat, 1) == const1_rtx);
5937	output_asm_insn (" restore %r1, %r1, %Y0", operands);
5938	break;
5939      default:
5940	operands[1] = pat;
5941	output_asm_insn (" restore %%g0, %1, %Y0", operands);
5942	break;
5943    }
5944}
5945
5946/* Output a return.  */
5947
5948const char *
5949output_return (rtx_insn *insn)
5950{
5951  if (crtl->calls_eh_return)
5952    {
5953      /* If the function uses __builtin_eh_return, the eh_return
5954	 machinery occupies the delay slot.  */
5955      gcc_assert (!final_sequence);
5956
5957      if (flag_delayed_branch)
5958	{
5959	  if (!TARGET_FLAT && TARGET_V9)
5960	    fputs ("\treturn\t%i7+8\n", asm_out_file);
5961	  else
5962	    {
5963	      if (!TARGET_FLAT)
5964		fputs ("\trestore\n", asm_out_file);
5965
5966	      fputs ("\tjmp\t%o7+8\n", asm_out_file);
5967	    }
5968
5969	  fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5970	}
5971      else
5972	{
5973	  if (!TARGET_FLAT)
5974	    fputs ("\trestore\n", asm_out_file);
5975
5976	  fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5977	  fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5978	}
5979    }
5980  else if (sparc_leaf_function_p || TARGET_FLAT)
5981    {
5982      /* This is a leaf or flat function so we don't have to bother restoring
5983	 the register window, which frees us from dealing with the convoluted
5984	 semantics of restore/return.  We simply output the jump to the
5985	 return address and the insn in the delay slot (if any).  */
5986
5987      return "jmp\t%%o7+%)%#";
5988    }
5989  else
5990    {
5991      /* This is a regular function so we have to restore the register window.
5992	 We may have a pending insn for the delay slot, which will be either
5993	 combined with the 'restore' instruction or put in the delay slot of
5994	 the 'return' instruction.  */
5995
5996      if (final_sequence)
5997	{
5998	  rtx delay, pat;
5999
6000	  delay = NEXT_INSN (insn);
6001	  gcc_assert (delay);
6002
6003	  pat = PATTERN (delay);
6004
6005	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6006	    {
6007	      epilogue_renumber (&pat, 0);
6008	      return "return\t%%i7+%)%#";
6009	    }
6010	  else
6011	    {
6012	      output_asm_insn ("jmp\t%%i7+%)", NULL);
6013	      output_restore (pat);
6014	      PATTERN (delay) = gen_blockage ();
6015	      INSN_CODE (delay) = -1;
6016	    }
6017	}
6018      else
6019        {
6020	  /* The delay slot is empty.  */
6021	  if (TARGET_V9)
6022	    return "return\t%%i7+%)\n\t nop";
6023	  else if (flag_delayed_branch)
6024	    return "jmp\t%%i7+%)\n\t restore";
6025	  else
6026	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
6027	}
6028    }
6029
6030  return "";
6031}
6032
6033/* Output a sibling call.  */
6034
6035const char *
6036output_sibcall (rtx_insn *insn, rtx call_operand)
6037{
6038  rtx operands[1];
6039
6040  gcc_assert (flag_delayed_branch);
6041
6042  operands[0] = call_operand;
6043
6044  if (sparc_leaf_function_p || TARGET_FLAT)
6045    {
6046      /* This is a leaf or flat function so we don't have to bother restoring
6047	 the register window.  We simply output the jump to the function and
6048	 the insn in the delay slot (if any).  */
6049
6050      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6051
6052      if (final_sequence)
6053	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6054			 operands);
6055      else
6056	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6057	   it into branch if possible.  */
6058	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6059			 operands);
6060    }
6061  else
6062    {
6063      /* This is a regular function so we have to restore the register window.
6064	 We may have a pending insn for the delay slot, which will be combined
6065	 with the 'restore' instruction.  */
6066
6067      output_asm_insn ("call\t%a0, 0", operands);
6068
6069      if (final_sequence)
6070	{
6071	  rtx_insn *delay = NEXT_INSN (insn);
6072	  gcc_assert (delay);
6073
6074	  output_restore (PATTERN (delay));
6075
6076	  PATTERN (delay) = gen_blockage ();
6077	  INSN_CODE (delay) = -1;
6078	}
6079      else
6080	output_restore (NULL_RTX);
6081    }
6082
6083  return "";
6084}
6085
6086/* Functions for handling argument passing.
6087
6088   For 32-bit, the first 6 args are normally in registers and the rest are
6089   pushed.  Any arg that starts within the first 6 words is at least
6090   partially passed in a register unless its data type forbids.
6091
6092   For 64-bit, the argument registers are laid out as an array of 16 elements
6093   and arguments are added sequentially.  The first 6 int args and up to the
6094   first 16 fp args (depending on size) are passed in regs.
6095
6096   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
6097   ----    -----   --------   -----   ------------------   ------   -----------
6098    15   [SP+248]              %f31       %f30,%f31         %d30
6099    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
6100    13   [SP+232]              %f27       %f26,%f27         %d26
6101    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
6102    11   [SP+216]              %f23       %f22,%f23         %d22
6103    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
6104     9   [SP+200]              %f19       %f18,%f19         %d18
6105     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
6106     7   [SP+184]              %f15       %f14,%f15         %d14
6107     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
6108     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
6109     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
6110     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
6111     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
6112     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
6113     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
6114
6115   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6116
6117   Integral arguments are always passed as 64-bit quantities appropriately
6118   extended.
6119
6120   Passing of floating point values is handled as follows.
6121   If a prototype is in scope:
6122     If the value is in a named argument (i.e. not a stdarg function or a
6123     value not part of the `...') then the value is passed in the appropriate
6124     fp reg.
6125     If the value is part of the `...' and is passed in one of the first 6
6126     slots then the value is passed in the appropriate int reg.
6127     If the value is part of the `...' and is not passed in one of the first 6
6128     slots then the value is passed in memory.
6129   If a prototype is not in scope:
6130     If the value is one of the first 6 arguments the value is passed in the
6131     appropriate integer reg and the appropriate fp reg.
6132     If the value is not one of the first 6 arguments the value is passed in
6133     the appropriate fp reg and in memory.
6134
6135
6136   Summary of the calling conventions implemented by GCC on the SPARC:
6137
6138   32-bit ABI:
6139                                size      argument     return value
6140
6141      small integer              <4       int. reg.      int. reg.
6142      word                        4       int. reg.      int. reg.
6143      double word                 8       int. reg.      int. reg.
6144
6145      _Complex small integer     <8       int. reg.      int. reg.
6146      _Complex word               8       int. reg.      int. reg.
6147      _Complex double word       16        memory        int. reg.
6148
6149      vector integer            <=8       int. reg.       FP reg.
6150      vector integer             >8        memory         memory
6151
6152      float                       4       int. reg.       FP reg.
6153      double                      8       int. reg.       FP reg.
6154      long double                16        memory         memory
6155
6156      _Complex float              8        memory         FP reg.
6157      _Complex double            16        memory         FP reg.
6158      _Complex long double       32        memory         FP reg.
6159
6160      vector float              any        memory         memory
6161
6162      aggregate                 any        memory         memory
6163
6164
6165
6166    64-bit ABI:
6167                                size      argument     return value
6168
6169      small integer              <8       int. reg.      int. reg.
6170      word                        8       int. reg.      int. reg.
6171      double word                16       int. reg.      int. reg.
6172
6173      _Complex small integer    <16       int. reg.      int. reg.
6174      _Complex word              16       int. reg.      int. reg.
6175      _Complex double word       32        memory        int. reg.
6176
6177      vector integer           <=16        FP reg.        FP reg.
6178      vector integer       16<s<=32        memory         FP reg.
6179      vector integer            >32        memory         memory
6180
6181      float                       4        FP reg.        FP reg.
6182      double                      8        FP reg.        FP reg.
6183      long double                16        FP reg.        FP reg.
6184
6185      _Complex float              8        FP reg.        FP reg.
6186      _Complex double            16        FP reg.        FP reg.
6187      _Complex long double       32        memory         FP reg.
6188
6189      vector float             <=16        FP reg.        FP reg.
6190      vector float         16<s<=32        memory         FP reg.
6191      vector float              >32        memory         memory
6192
6193      aggregate                <=16         reg.           reg.
6194      aggregate            16<s<=32        memory          reg.
6195      aggregate                 >32        memory         memory
6196
6197
6198
6199Note #1: complex floating-point types follow the extended SPARC ABIs as
6200implemented by the Sun compiler.
6201
6202Note #2: integral vector types follow the scalar floating-point types
6203conventions to match what is implemented by the Sun VIS SDK.
6204
6205Note #3: floating-point vector types follow the aggregate types
6206conventions.  */
6207
6208
6209/* Maximum number of int regs for args.  */
6210#define SPARC_INT_ARG_MAX 6
6211/* Maximum number of fp regs for args.  */
6212#define SPARC_FP_ARG_MAX 16
6213/* Number of words (partially) occupied for a given size in units.  */
6214#define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6215
6216/* Handle the INIT_CUMULATIVE_ARGS macro.
6217   Initialize a variable CUM of type CUMULATIVE_ARGS
6218   for a call to a function whose data type is FNTYPE.
6219   For a library call, FNTYPE is 0.  */
6220
6221void
6222init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6223{
6224  cum->words = 0;
6225  cum->prototype_p = fntype && prototype_p (fntype);
6226  cum->libcall_p = !fntype;
6227}
6228
6229/* Handle promotion of pointer and integer arguments.  */
6230
6231static machine_mode
6232sparc_promote_function_mode (const_tree type, machine_mode mode,
6233			     int *punsignedp, const_tree, int)
6234{
6235  if (type && POINTER_TYPE_P (type))
6236    {
6237      *punsignedp = POINTERS_EXTEND_UNSIGNED;
6238      return Pmode;
6239    }
6240
6241  /* Integral arguments are passed as full words, as per the ABI.  */
6242  if (GET_MODE_CLASS (mode) == MODE_INT
6243      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6244    return word_mode;
6245
6246  return mode;
6247}
6248
6249/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
6250
6251static bool
6252sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6253{
6254  return TARGET_ARCH64 ? true : false;
6255}
6256
6257/* Traverse the record TYPE recursively and call FUNC on its fields.
6258   NAMED is true if this is for a named parameter.  DATA is passed
6259   to FUNC for each field.  OFFSET is the starting position and
6260   PACKED is true if we are inside a packed record.  */
6261
6262template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6263static void
6264traverse_record_type (const_tree type, bool named, T *data,
6265		      HOST_WIDE_INT offset = 0, bool packed = false)
6266{
6267  /* The ABI obviously doesn't specify how packed structures are passed.
6268     These are passed in integer regs if possible, otherwise memory.  */
6269  if (!packed)
6270    for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6271      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6272	{
6273	  packed = true;
6274	  break;
6275	}
6276
6277  /* Walk the real fields, but skip those with no size or a zero size.
6278     ??? Fields with variable offset are handled as having zero offset.  */
6279  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6280    if (TREE_CODE (field) == FIELD_DECL)
6281      {
6282	if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6283	  continue;
6284
6285	HOST_WIDE_INT bitpos = offset;
6286	if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6287	  bitpos += int_bit_position (field);
6288
6289	tree field_type = TREE_TYPE (field);
6290	if (TREE_CODE (field_type) == RECORD_TYPE)
6291	  traverse_record_type<T, Func> (field_type, named, data, bitpos,
6292					 packed);
6293	else
6294	  {
6295	    const bool fp_type
6296	      = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6297	    Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6298		  data);
6299	  }
6300      }
6301}
6302
6303/* Handle recursive register classifying for structure layout.  */
6304
6305typedef struct
6306{
6307  bool fp_regs;		/* true if field eligible to FP registers.  */
6308  bool fp_regs_in_first_word;	/* true if such field in first word.  */
6309} classify_data_t;
6310
6311/* A subroutine of function_arg_slotno.  Classify the field.  */
6312
6313inline void
6314classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6315		    classify_data_t *data)
6316{
6317  if (fp)
6318    {
6319      data->fp_regs = true;
6320      if (bitpos < BITS_PER_WORD)
6321	data->fp_regs_in_first_word = true;
6322    }
6323}
6324
6325/* Compute the slot number to pass an argument in.
6326   Return the slot number or -1 if passing on the stack.
6327
6328   CUM is a variable of type CUMULATIVE_ARGS which gives info about
6329    the preceding args and about the function being called.
6330   MODE is the argument's machine mode.
6331   TYPE is the data type of the argument (as a tree).
6332    This is null for libcalls where that information may
6333    not be available.
6334   NAMED is nonzero if this argument is a named parameter
6335    (otherwise it is an extra parameter matching an ellipsis).
6336   INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6337   *PREGNO records the register number to use if scalar type.
6338   *PPADDING records the amount of padding needed in words.  */
6339
6340static int
6341function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6342		     const_tree type, bool named, bool incoming,
6343		     int *pregno, int *ppadding)
6344{
6345  int regbase = (incoming
6346		 ? SPARC_INCOMING_INT_ARG_FIRST
6347		 : SPARC_OUTGOING_INT_ARG_FIRST);
6348  int slotno = cum->words;
6349  enum mode_class mclass;
6350  int regno;
6351
6352  *ppadding = 0;
6353
6354  if (type && TREE_ADDRESSABLE (type))
6355    return -1;
6356
6357  if (TARGET_ARCH32
6358      && mode == BLKmode
6359      && type
6360      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6361    return -1;
6362
6363  /* For SPARC64, objects requiring 16-byte alignment get it.  */
6364  if (TARGET_ARCH64
6365      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6366      && (slotno & 1) != 0)
6367    slotno++, *ppadding = 1;
6368
6369  mclass = GET_MODE_CLASS (mode);
6370  if (type && TREE_CODE (type) == VECTOR_TYPE)
6371    {
6372      /* Vector types deserve special treatment because they are
6373	 polymorphic wrt their mode, depending upon whether VIS
6374	 instructions are enabled.  */
6375      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6376	{
6377	  /* The SPARC port defines no floating-point vector modes.  */
6378	  gcc_assert (mode == BLKmode);
6379	}
6380      else
6381	{
6382	  /* Integral vector types should either have a vector
6383	     mode or an integral mode, because we are guaranteed
6384	     by pass_by_reference that their size is not greater
6385	     than 16 bytes and TImode is 16-byte wide.  */
6386	  gcc_assert (mode != BLKmode);
6387
6388	  /* Vector integers are handled like floats according to
6389	     the Sun VIS SDK.  */
6390	  mclass = MODE_FLOAT;
6391	}
6392    }
6393
6394  switch (mclass)
6395    {
6396    case MODE_FLOAT:
6397    case MODE_COMPLEX_FLOAT:
6398    case MODE_VECTOR_INT:
6399      if (TARGET_ARCH64 && TARGET_FPU && named)
6400	{
6401	  /* If all arg slots are filled, then must pass on stack.  */
6402	  if (slotno >= SPARC_FP_ARG_MAX)
6403	    return -1;
6404
6405	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
6406	  /* Arguments filling only one single FP register are
6407	     right-justified in the outer double FP register.  */
6408	  if (GET_MODE_SIZE (mode) <= 4)
6409	    regno++;
6410	  break;
6411	}
6412      /* fallthrough */
6413
6414    case MODE_INT:
6415    case MODE_COMPLEX_INT:
6416      /* If all arg slots are filled, then must pass on stack.  */
6417      if (slotno >= SPARC_INT_ARG_MAX)
6418	return -1;
6419
6420      regno = regbase + slotno;
6421      break;
6422
6423    case MODE_RANDOM:
6424      if (mode == VOIDmode)
6425	/* MODE is VOIDmode when generating the actual call.  */
6426	return -1;
6427
6428      gcc_assert (mode == BLKmode);
6429
6430      if (TARGET_ARCH32
6431	  || !type
6432	  || (TREE_CODE (type) != RECORD_TYPE
6433	      && TREE_CODE (type) != VECTOR_TYPE))
6434	{
6435	  /* If all arg slots are filled, then must pass on stack.  */
6436	  if (slotno >= SPARC_INT_ARG_MAX)
6437	    return -1;
6438
6439	  regno = regbase + slotno;
6440	}
6441      else  /* TARGET_ARCH64 && type */
6442	{
6443	  /* If all arg slots are filled, then must pass on stack.  */
6444	  if (slotno >= SPARC_FP_ARG_MAX)
6445	    return -1;
6446
6447	  if (TREE_CODE (type) == RECORD_TYPE)
6448	    {
6449	      classify_data_t data = { false, false };
6450	      traverse_record_type<classify_data_t, classify_registers>
6451		(type, named, &data);
6452
6453	      if (data.fp_regs)
6454		{
6455		  /* If all FP slots are filled except for the last one and
6456		     there is no FP field in the first word, then must pass
6457		     on stack.  */
6458		  if (slotno >= SPARC_FP_ARG_MAX - 1
6459		      && !data.fp_regs_in_first_word)
6460		    return -1;
6461		}
6462	      else
6463		{
6464		  /* If all int slots are filled, then must pass on stack.  */
6465		  if (slotno >= SPARC_INT_ARG_MAX)
6466		    return -1;
6467		}
6468	    }
6469
6470	  /* PREGNO isn't set since both int and FP regs can be used.  */
6471	  return slotno;
6472	}
6473      break;
6474
6475    default :
6476      gcc_unreachable ();
6477    }
6478
6479  *pregno = regno;
6480  return slotno;
6481}
6482
6483/* Handle recursive register counting/assigning for structure layout.  */
6484
6485typedef struct
6486{
6487  int slotno;		/* slot number of the argument.  */
6488  int regbase;		/* regno of the base register.  */
6489  int intoffset;	/* offset of the first pending integer field.  */
6490  int nregs;		/* number of words passed in registers.  */
6491  bool stack;		/* true if part of the argument is on the stack.  */
6492  rtx ret;		/* return expression being built.  */
6493} assign_data_t;
6494
6495/* A subroutine of function_arg_record_value.  Compute the number of integer
6496   registers to be assigned between PARMS->intoffset and BITPOS.  Return
6497   true if at least one integer register is assigned or false otherwise.  */
6498
6499static bool
6500compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6501{
6502  if (data->intoffset < 0)
6503    return false;
6504
6505  const int intoffset = data->intoffset;
6506  data->intoffset = -1;
6507
6508  const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6509  const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6510  const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6511  int nregs = (endbit - startbit) / BITS_PER_WORD;
6512
6513  if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6514    {
6515      nregs = SPARC_INT_ARG_MAX - this_slotno;
6516
6517      /* We need to pass this field (partly) on the stack.  */
6518      data->stack = 1;
6519    }
6520
6521  if (nregs <= 0)
6522    return false;
6523
6524  *pnregs = nregs;
6525  return true;
6526}
6527
6528/* A subroutine of function_arg_record_value.  Compute the number and the mode
6529   of the FP registers to be assigned for FIELD.  Return true if at least one
6530   FP register is assigned or false otherwise.  */
6531
6532static bool
6533compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6534		   assign_data_t *data,
6535		   int *pnregs, machine_mode *pmode)
6536{
6537  const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6538  machine_mode mode = DECL_MODE (field);
6539  int nregs, nslots;
6540
6541  /* Slots are counted as words while regs are counted as having the size of
6542     the (inner) mode.  */
6543  if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6544    {
6545      mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6546      nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6547    }
6548  else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6549    {
6550      mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6551      nregs = 2;
6552    }
6553  else
6554    nregs = 1;
6555
6556  nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6557
6558  if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6559    {
6560      nslots = SPARC_FP_ARG_MAX - this_slotno;
6561      nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6562
6563      /* We need to pass this field (partly) on the stack.  */
6564      data->stack = 1;
6565
6566      if (nregs <= 0)
6567	return false;
6568    }
6569
6570  *pnregs = nregs;
6571  *pmode = mode;
6572  return true;
6573}
6574
6575/* A subroutine of function_arg_record_value.  Count the number of registers
6576   to be assigned for FIELD and between PARMS->intoffset and BITPOS.  */
6577
6578inline void
6579count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6580		 assign_data_t *data)
6581{
6582  if (fp)
6583    {
6584      int nregs;
6585      machine_mode mode;
6586
6587      if (compute_int_layout (bitpos, data, &nregs))
6588	data->nregs += nregs;
6589
6590      if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6591	data->nregs += nregs;
6592    }
6593  else
6594    {
6595      if (data->intoffset < 0)
6596	data->intoffset = bitpos;
6597    }
6598}
6599
6600/* A subroutine of function_arg_record_value.  Assign the bits of the
6601   structure between PARMS->intoffset and BITPOS to integer registers.  */
6602
6603static void
6604assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6605{
6606  int intoffset = data->intoffset;
6607  machine_mode mode;
6608  int nregs;
6609
6610  if (!compute_int_layout (bitpos, data, &nregs))
6611    return;
6612
6613  /* If this is the trailing part of a word, only load that much into
6614     the register.  Otherwise load the whole register.  Note that in
6615     the latter case we may pick up unwanted bits.  It's not a problem
6616     at the moment but may wish to revisit.  */
6617  if (intoffset % BITS_PER_WORD != 0)
6618    mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6619			  	   MODE_INT);
6620  else
6621    mode = word_mode;
6622
6623  const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6624  unsigned int regno = data->regbase + this_slotno;
6625  intoffset /= BITS_PER_UNIT;
6626
6627  do
6628    {
6629      rtx reg = gen_rtx_REG (mode, regno);
6630      XVECEXP (data->ret, 0, data->stack + data->nregs)
6631	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6632      data->nregs += 1;
6633      mode = word_mode;
6634      regno += 1;
6635      intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6636    }
6637  while (--nregs > 0);
6638}
6639
6640/* A subroutine of function_arg_record_value.  Assign FIELD at position
6641   BITPOS to FP registers.  */
6642
6643static void
6644assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6645			     assign_data_t *data)
6646{
6647  int nregs;
6648  machine_mode mode;
6649
6650  if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6651    return;
6652
6653  const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6654  int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6655  if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6656    regno++;
6657  int pos = bitpos / BITS_PER_UNIT;
6658
6659  do
6660    {
6661      rtx reg = gen_rtx_REG (mode, regno);
6662      XVECEXP (data->ret, 0, data->stack + data->nregs)
6663	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6664      data->nregs += 1;
6665      regno += GET_MODE_SIZE (mode) / 4;
6666      pos += GET_MODE_SIZE (mode);
6667    }
6668  while (--nregs > 0);
6669}
6670
6671/* A subroutine of function_arg_record_value.  Assign FIELD and the bits of
6672   the structure between PARMS->intoffset and BITPOS to registers.  */
6673
6674inline void
6675assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6676		  assign_data_t *data)
6677{
6678  if (fp)
6679    {
6680      assign_int_registers (bitpos, data);
6681
6682      assign_fp_registers (field, bitpos, data);
6683    }
6684  else
6685    {
6686      if (data->intoffset < 0)
6687	data->intoffset = bitpos;
6688    }
6689}
6690
6691/* Used by function_arg and sparc_function_value_1 to implement the complex
6692   conventions of the 64-bit ABI for passing and returning structures.
6693   Return an expression valid as a return value for the FUNCTION_ARG
6694   and TARGET_FUNCTION_VALUE.
6695
6696   TYPE is the data type of the argument (as a tree).
6697    This is null for libcalls where that information may
6698    not be available.
6699   MODE is the argument's machine mode.
6700   SLOTNO is the index number of the argument's slot in the parameter array.
6701   NAMED is true if this argument is a named parameter
6702    (otherwise it is an extra parameter matching an ellipsis).
6703   REGBASE is the regno of the base register for the parameter array.  */
6704
6705static rtx
6706function_arg_record_value (const_tree type, machine_mode mode,
6707			   int slotno, bool named, int regbase)
6708{
6709  HOST_WIDE_INT typesize = int_size_in_bytes (type);
6710  assign_data_t data;
6711  int nregs;
6712
6713  data.slotno = slotno;
6714  data.regbase = regbase;
6715
6716  /* Count how many registers we need.  */
6717  data.nregs = 0;
6718  data.intoffset = 0;
6719  data.stack = false;
6720  traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6721
6722  /* Take into account pending integer fields.  */
6723  if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6724    data.nregs += nregs;
6725
6726  /* Allocate the vector and handle some annoying special cases.  */
6727  nregs = data.nregs;
6728
6729  if (nregs == 0)
6730    {
6731      /* ??? Empty structure has no value?  Duh?  */
6732      if (typesize <= 0)
6733	{
6734	  /* Though there's nothing really to store, return a word register
6735	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
6736	     leads to breakage due to the fact that there are zero bytes to
6737	     load.  */
6738	  return gen_rtx_REG (mode, regbase);
6739	}
6740
6741      /* ??? C++ has structures with no fields, and yet a size.  Give up
6742	 for now and pass everything back in integer registers.  */
6743      nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744      if (nregs + slotno > SPARC_INT_ARG_MAX)
6745	nregs = SPARC_INT_ARG_MAX - slotno;
6746    }
6747
6748  gcc_assert (nregs > 0);
6749
6750  data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6751
6752  /* If at least one field must be passed on the stack, generate
6753     (parallel [(expr_list (nil) ...) ...]) so that all fields will
6754     also be passed on the stack.  We can't do much better because the
6755     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6756     of structures for which the fields passed exclusively in registers
6757     are not at the beginning of the structure.  */
6758  if (data.stack)
6759    XVECEXP (data.ret, 0, 0)
6760      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6761
6762  /* Assign the registers.  */
6763  data.nregs = 0;
6764  data.intoffset = 0;
6765  traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6766
6767  /* Assign pending integer fields.  */
6768  assign_int_registers (typesize * BITS_PER_UNIT, &data);
6769
6770  gcc_assert (data.nregs == nregs);
6771
6772  return data.ret;
6773}
6774
6775/* Used by function_arg and sparc_function_value_1 to implement the conventions
6776   of the 64-bit ABI for passing and returning unions.
6777   Return an expression valid as a return value for the FUNCTION_ARG
6778   and TARGET_FUNCTION_VALUE.
6779
6780   SIZE is the size in bytes of the union.
6781   MODE is the argument's machine mode.
6782   REGNO is the hard register the union will be passed in.  */
6783
6784static rtx
6785function_arg_union_value (int size, machine_mode mode, int slotno,
6786			  int regno)
6787{
6788  int nwords = CEIL_NWORDS (size), i;
6789  rtx regs;
6790
6791  /* See comment in previous function for empty structures.  */
6792  if (nwords == 0)
6793    return gen_rtx_REG (mode, regno);
6794
6795  if (slotno == SPARC_INT_ARG_MAX - 1)
6796    nwords = 1;
6797
6798  regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6799
6800  for (i = 0; i < nwords; i++)
6801    {
6802      /* Unions are passed left-justified.  */
6803      XVECEXP (regs, 0, i)
6804	= gen_rtx_EXPR_LIST (VOIDmode,
6805			     gen_rtx_REG (word_mode, regno),
6806			     GEN_INT (UNITS_PER_WORD * i));
6807      regno++;
6808    }
6809
6810  return regs;
6811}
6812
6813/* Used by function_arg and sparc_function_value_1 to implement the conventions
6814   for passing and returning BLKmode vectors.
6815   Return an expression valid as a return value for the FUNCTION_ARG
6816   and TARGET_FUNCTION_VALUE.
6817
6818   SIZE is the size in bytes of the vector.
6819   REGNO is the FP hard register the vector will be passed in.  */
6820
6821static rtx
6822function_arg_vector_value (int size, int regno)
6823{
6824  const int nregs = MAX (1, size / 8);
6825  rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6826
6827  if (size < 8)
6828    XVECEXP (regs, 0, 0)
6829      = gen_rtx_EXPR_LIST (VOIDmode,
6830			   gen_rtx_REG (SImode, regno),
6831			   const0_rtx);
6832  else
6833    for (int i = 0; i < nregs; i++)
6834      XVECEXP (regs, 0, i)
6835	= gen_rtx_EXPR_LIST (VOIDmode,
6836			     gen_rtx_REG (DImode, regno + 2*i),
6837			     GEN_INT (i*8));
6838
6839  return regs;
6840}
6841
6842/* Determine where to put an argument to a function.
6843   Value is zero to push the argument on the stack,
6844   or a hard register in which to store the argument.
6845
6846   CUM is a variable of type CUMULATIVE_ARGS which gives info about
6847    the preceding args and about the function being called.
6848   MODE is the argument's machine mode.
6849   TYPE is the data type of the argument (as a tree).
6850    This is null for libcalls where that information may
6851    not be available.
6852   NAMED is true if this argument is a named parameter
6853    (otherwise it is an extra parameter matching an ellipsis).
6854   INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6855    TARGET_FUNCTION_INCOMING_ARG.  */
6856
6857static rtx
6858sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6859		      const_tree type, bool named, bool incoming)
6860{
6861  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6862
6863  int regbase = (incoming
6864		 ? SPARC_INCOMING_INT_ARG_FIRST
6865		 : SPARC_OUTGOING_INT_ARG_FIRST);
6866  int slotno, regno, padding;
6867  enum mode_class mclass = GET_MODE_CLASS (mode);
6868
6869  slotno = function_arg_slotno (cum, mode, type, named, incoming,
6870				&regno, &padding);
6871  if (slotno == -1)
6872    return 0;
6873
6874  /* Vector types deserve special treatment because they are polymorphic wrt
6875     their mode, depending upon whether VIS instructions are enabled.  */
6876  if (type && TREE_CODE (type) == VECTOR_TYPE)
6877    {
6878      HOST_WIDE_INT size = int_size_in_bytes (type);
6879      gcc_assert ((TARGET_ARCH32 && size <= 8)
6880		  || (TARGET_ARCH64 && size <= 16));
6881
6882      if (mode == BLKmode)
6883	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6884
6885      mclass = MODE_FLOAT;
6886    }
6887
6888  if (TARGET_ARCH32)
6889    return gen_rtx_REG (mode, regno);
6890
6891  /* Structures up to 16 bytes in size are passed in arg slots on the stack
6892     and are promoted to registers if possible.  */
6893  if (type && TREE_CODE (type) == RECORD_TYPE)
6894    {
6895      HOST_WIDE_INT size = int_size_in_bytes (type);
6896      gcc_assert (size <= 16);
6897
6898      return function_arg_record_value (type, mode, slotno, named, regbase);
6899    }
6900
6901  /* Unions up to 16 bytes in size are passed in integer registers.  */
6902  else if (type && TREE_CODE (type) == UNION_TYPE)
6903    {
6904      HOST_WIDE_INT size = int_size_in_bytes (type);
6905      gcc_assert (size <= 16);
6906
6907      return function_arg_union_value (size, mode, slotno, regno);
6908    }
6909
6910  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6911     but also have the slot allocated for them.
6912     If no prototype is in scope fp values in register slots get passed
6913     in two places, either fp regs and int regs or fp regs and memory.  */
6914  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6915	   && SPARC_FP_REG_P (regno))
6916    {
6917      rtx reg = gen_rtx_REG (mode, regno);
6918      if (cum->prototype_p || cum->libcall_p)
6919	return reg;
6920      else
6921	{
6922	  rtx v0, v1;
6923
6924	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6925	    {
6926	      int intreg;
6927
6928	      /* On incoming, we don't need to know that the value
6929		 is passed in %f0 and %i0, and it confuses other parts
6930		 causing needless spillage even on the simplest cases.  */
6931	      if (incoming)
6932		return reg;
6933
6934	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6935			+ (regno - SPARC_FP_ARG_FIRST) / 2);
6936
6937	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6938	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6939				      const0_rtx);
6940	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6941	    }
6942	  else
6943	    {
6944	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6945	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6946	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6947	    }
6948	}
6949    }
6950
6951  /* All other aggregate types are passed in an integer register in a mode
6952     corresponding to the size of the type.  */
6953  else if (type && AGGREGATE_TYPE_P (type))
6954    {
6955      HOST_WIDE_INT size = int_size_in_bytes (type);
6956      gcc_assert (size <= 16);
6957
6958      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6959    }
6960
6961  return gen_rtx_REG (mode, regno);
6962}
6963
6964/* Handle the TARGET_FUNCTION_ARG target hook.  */
6965
6966static rtx
6967sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6968		    const_tree type, bool named)
6969{
6970  return sparc_function_arg_1 (cum, mode, type, named, false);
6971}
6972
6973/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
6974
6975static rtx
6976sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6977			     const_tree type, bool named)
6978{
6979  return sparc_function_arg_1 (cum, mode, type, named, true);
6980}
6981
6982/* For sparc64, objects requiring 16 byte alignment are passed that way.  */
6983
6984static unsigned int
6985sparc_function_arg_boundary (machine_mode mode, const_tree type)
6986{
6987  return ((TARGET_ARCH64
6988	   && (GET_MODE_ALIGNMENT (mode) == 128
6989	       || (type && TYPE_ALIGN (type) == 128)))
6990	  ? 128
6991	  : PARM_BOUNDARY);
6992}
6993
6994/* For an arg passed partly in registers and partly in memory,
6995   this is the number of bytes of registers used.
6996   For args passed entirely in registers or entirely in memory, zero.
6997
6998   Any arg that starts in the first 6 regs but won't entirely fit in them
6999   needs partial registers on v8.  On v9, structures with integer
7000   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7001   values that begin in the last fp reg [where "last fp reg" varies with the
7002   mode] will be split between that reg and memory.  */
7003
7004static int
7005sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7006			 tree type, bool named)
7007{
7008  int slotno, regno, padding;
7009
7010  /* We pass false for incoming here, it doesn't matter.  */
7011  slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7012				false, &regno, &padding);
7013
7014  if (slotno == -1)
7015    return 0;
7016
7017  if (TARGET_ARCH32)
7018    {
7019      if ((slotno + (mode == BLKmode
7020		     ? CEIL_NWORDS (int_size_in_bytes (type))
7021		     : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7022	  > SPARC_INT_ARG_MAX)
7023	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7024    }
7025  else
7026    {
7027      /* We are guaranteed by pass_by_reference that the size of the
7028	 argument is not greater than 16 bytes, so we only need to return
7029	 one word if the argument is partially passed in registers.  */
7030
7031      if (type && AGGREGATE_TYPE_P (type))
7032	{
7033	  int size = int_size_in_bytes (type);
7034
7035	  if (size > UNITS_PER_WORD
7036	      && (slotno == SPARC_INT_ARG_MAX - 1
7037		  || slotno == SPARC_FP_ARG_MAX - 1))
7038	    return UNITS_PER_WORD;
7039	}
7040      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7041	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7042		   && ! (TARGET_FPU && named)))
7043	{
7044	  /* The complex types are passed as packed types.  */
7045	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7046	      && slotno == SPARC_INT_ARG_MAX - 1)
7047	    return UNITS_PER_WORD;
7048	}
7049      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7050	{
7051	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7052	      > SPARC_FP_ARG_MAX)
7053	    return UNITS_PER_WORD;
7054	}
7055    }
7056
7057  return 0;
7058}
7059
7060/* Handle the TARGET_PASS_BY_REFERENCE target hook.
7061   Specify whether to pass the argument by reference.  */
7062
7063static bool
7064sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7065			 machine_mode mode, const_tree type,
7066			 bool named ATTRIBUTE_UNUSED)
7067{
7068  if (TARGET_ARCH32)
7069    /* Original SPARC 32-bit ABI says that structures and unions,
7070       and quad-precision floats are passed by reference.  For Pascal,
7071       also pass arrays by reference.  All other base types are passed
7072       in registers.
7073
7074       Extended ABI (as implemented by the Sun compiler) says that all
7075       complex floats are passed by reference.  Pass complex integers
7076       in registers up to 8 bytes.  More generally, enforce the 2-word
7077       cap for passing arguments in registers.
7078
7079       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7080       integers are passed like floats of the same size, that is in
7081       registers up to 8 bytes.  Pass all vector floats by reference
7082       like structure and unions.  */
7083    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7084	    || mode == SCmode
7085	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
7086	    || GET_MODE_SIZE (mode) > 8
7087	    || (type
7088		&& TREE_CODE (type) == VECTOR_TYPE
7089		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7090  else
7091    /* Original SPARC 64-bit ABI says that structures and unions
7092       smaller than 16 bytes are passed in registers, as well as
7093       all other base types.
7094
7095       Extended ABI (as implemented by the Sun compiler) says that
7096       complex floats are passed in registers up to 16 bytes.  Pass
7097       all complex integers in registers up to 16 bytes.  More generally,
7098       enforce the 2-word cap for passing arguments in registers.
7099
7100       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7101       integers are passed like floats of the same size, that is in
7102       registers (up to 16 bytes).  Pass all vector floats like structure
7103       and unions.  */
7104    return ((type
7105	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7106	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7107	    /* Catch CTImode and TCmode.  */
7108	    || GET_MODE_SIZE (mode) > 16);
7109}
7110
7111/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7112   Update the data in CUM to advance over an argument
7113   of mode MODE and data type TYPE.
7114   TYPE is null for libcalls where that information may not be available.  */
7115
7116static void
7117sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7118			    const_tree type, bool named)
7119{
7120  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7121  int regno, padding;
7122
7123  /* We pass false for incoming here, it doesn't matter.  */
7124  function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7125
7126  /* If argument requires leading padding, add it.  */
7127  cum->words += padding;
7128
7129  if (TARGET_ARCH32)
7130    cum->words += (mode == BLKmode
7131		   ? CEIL_NWORDS (int_size_in_bytes (type))
7132		   : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7133  else
7134    {
7135      if (type && AGGREGATE_TYPE_P (type))
7136	{
7137	  int size = int_size_in_bytes (type);
7138
7139	  if (size <= 8)
7140	    ++cum->words;
7141	  else if (size <= 16)
7142	    cum->words += 2;
7143	  else /* passed by reference */
7144	    ++cum->words;
7145	}
7146      else
7147	cum->words += (mode == BLKmode
7148		       ? CEIL_NWORDS (int_size_in_bytes (type))
7149		       : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7150    }
7151}
7152
7153/* Handle the FUNCTION_ARG_PADDING macro.
7154   For the 64 bit ABI structs are always stored left shifted in their
7155   argument slot.  */
7156
7157enum direction
7158function_arg_padding (machine_mode mode, const_tree type)
7159{
7160  if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7161    return upward;
7162
7163  /* Fall back to the default.  */
7164  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7165}
7166
7167/* Handle the TARGET_RETURN_IN_MEMORY target hook.
7168   Specify whether to return the return value in memory.  */
7169
7170static bool
7171sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7172{
7173  if (TARGET_ARCH32)
7174    /* Original SPARC 32-bit ABI says that structures and unions,
7175       and quad-precision floats are returned in memory.  All other
7176       base types are returned in registers.
7177
7178       Extended ABI (as implemented by the Sun compiler) says that
7179       all complex floats are returned in registers (8 FP registers
7180       at most for '_Complex long double').  Return all complex integers
7181       in registers (4 at most for '_Complex long long').
7182
7183       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7184       integers are returned like floats of the same size, that is in
7185       registers up to 8 bytes and in memory otherwise.  Return all
7186       vector floats in memory like structure and unions; note that
7187       they always have BLKmode like the latter.  */
7188    return (TYPE_MODE (type) == BLKmode
7189	    || TYPE_MODE (type) == TFmode
7190	    || (TREE_CODE (type) == VECTOR_TYPE
7191		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7192  else
7193    /* Original SPARC 64-bit ABI says that structures and unions
7194       smaller than 32 bytes are returned in registers, as well as
7195       all other base types.
7196
7197       Extended ABI (as implemented by the Sun compiler) says that all
7198       complex floats are returned in registers (8 FP registers at most
7199       for '_Complex long double').  Return all complex integers in
7200       registers (4 at most for '_Complex TItype').
7201
7202       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7203       integers are returned like floats of the same size, that is in
7204       registers.  Return all vector floats like structure and unions;
7205       note that they always have BLKmode like the latter.  */
7206    return (TYPE_MODE (type) == BLKmode
7207	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7208}
7209
7210/* Handle the TARGET_STRUCT_VALUE target hook.
7211   Return where to find the structure return value address.  */
7212
7213static rtx
7214sparc_struct_value_rtx (tree fndecl, int incoming)
7215{
7216  if (TARGET_ARCH64)
7217    return 0;
7218  else
7219    {
7220      rtx mem;
7221
7222      if (incoming)
7223	mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7224						   STRUCT_VALUE_OFFSET));
7225      else
7226	mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7227						   STRUCT_VALUE_OFFSET));
7228
7229      /* Only follow the SPARC ABI for fixed-size structure returns.
7230         Variable size structure returns are handled per the normal
7231         procedures in GCC. This is enabled by -mstd-struct-return */
7232      if (incoming == 2
7233	  && sparc_std_struct_return
7234	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7235	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7236	{
7237	  /* We must check and adjust the return address, as it is optional
7238	     as to whether the return object is really provided.  */
7239	  rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7240	  rtx scratch = gen_reg_rtx (SImode);
7241	  rtx_code_label *endlab = gen_label_rtx ();
7242
7243	  /* Calculate the return object size.  */
7244	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7245	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7246	  /* Construct a temporary return value.  */
7247	  rtx temp_val
7248	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7249
7250	  /* Implement SPARC 32-bit psABI callee return struct checking:
7251
7252	     Fetch the instruction where we will return to and see if
7253	     it's an unimp instruction (the most significant 10 bits
7254	     will be zero).  */
7255	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
7256						plus_constant (Pmode,
7257							       ret_reg, 8)));
7258	  /* Assume the size is valid and pre-adjust.  */
7259	  emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7260	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7261				   0, endlab);
7262	  emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7263	  /* Write the address of the memory pointed to by temp_val into
7264	     the memory pointed to by mem.  */
7265	  emit_move_insn (mem, XEXP (temp_val, 0));
7266	  emit_label (endlab);
7267	}
7268
7269      return mem;
7270    }
7271}
7272
7273/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7274   For v9, function return values are subject to the same rules as arguments,
7275   except that up to 32 bytes may be returned in registers.  */
7276
7277static rtx
7278sparc_function_value_1 (const_tree type, machine_mode mode,
7279			bool outgoing)
7280{
7281  /* Beware that the two values are swapped here wrt function_arg.  */
7282  int regbase = (outgoing
7283		 ? SPARC_INCOMING_INT_ARG_FIRST
7284		 : SPARC_OUTGOING_INT_ARG_FIRST);
7285  enum mode_class mclass = GET_MODE_CLASS (mode);
7286  int regno;
7287
7288  /* Vector types deserve special treatment because they are polymorphic wrt
7289     their mode, depending upon whether VIS instructions are enabled.  */
7290  if (type && TREE_CODE (type) == VECTOR_TYPE)
7291    {
7292      HOST_WIDE_INT size = int_size_in_bytes (type);
7293      gcc_assert ((TARGET_ARCH32 && size <= 8)
7294		  || (TARGET_ARCH64 && size <= 32));
7295
7296      if (mode == BLKmode)
7297	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7298
7299      mclass = MODE_FLOAT;
7300    }
7301
7302  if (TARGET_ARCH64 && type)
7303    {
7304      /* Structures up to 32 bytes in size are returned in registers.  */
7305      if (TREE_CODE (type) == RECORD_TYPE)
7306	{
7307	  HOST_WIDE_INT size = int_size_in_bytes (type);
7308	  gcc_assert (size <= 32);
7309
7310	  return function_arg_record_value (type, mode, 0, 1, regbase);
7311	}
7312
7313      /* Unions up to 32 bytes in size are returned in integer registers.  */
7314      else if (TREE_CODE (type) == UNION_TYPE)
7315	{
7316	  HOST_WIDE_INT size = int_size_in_bytes (type);
7317	  gcc_assert (size <= 32);
7318
7319	  return function_arg_union_value (size, mode, 0, regbase);
7320	}
7321
7322      /* Objects that require it are returned in FP registers.  */
7323      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7324	;
7325
7326      /* All other aggregate types are returned in an integer register in a
7327	 mode corresponding to the size of the type.  */
7328      else if (AGGREGATE_TYPE_P (type))
7329	{
7330	  /* All other aggregate types are passed in an integer register
7331	     in a mode corresponding to the size of the type.  */
7332	  HOST_WIDE_INT size = int_size_in_bytes (type);
7333	  gcc_assert (size <= 32);
7334
7335	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7336
7337	  /* ??? We probably should have made the same ABI change in
7338	     3.4.0 as the one we made for unions.   The latter was
7339	     required by the SCD though, while the former is not
7340	     specified, so we favored compatibility and efficiency.
7341
7342	     Now we're stuck for aggregates larger than 16 bytes,
7343	     because OImode vanished in the meantime.  Let's not
7344	     try to be unduly clever, and simply follow the ABI
7345	     for unions in that case.  */
7346	  if (mode == BLKmode)
7347	    return function_arg_union_value (size, mode, 0, regbase);
7348	  else
7349	    mclass = MODE_INT;
7350	}
7351
7352      /* We should only have pointer and integer types at this point.  This
7353	 must match sparc_promote_function_mode.  */
7354      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7355	mode = word_mode;
7356    }
7357
7358  /* We should only have pointer and integer types at this point, except with
7359     -freg-struct-return.  This must match sparc_promote_function_mode.  */
7360  else if (TARGET_ARCH32
7361	   && !(type && AGGREGATE_TYPE_P (type))
7362	   && mclass == MODE_INT
7363	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7364    mode = word_mode;
7365
7366  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7367    regno = SPARC_FP_ARG_FIRST;
7368  else
7369    regno = regbase;
7370
7371  return gen_rtx_REG (mode, regno);
7372}
7373
7374/* Handle TARGET_FUNCTION_VALUE.
7375   On the SPARC, the value is found in the first "output" register, but the
7376   called function leaves it in the first "input" register.  */
7377
7378static rtx
7379sparc_function_value (const_tree valtype,
7380		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7381		      bool outgoing)
7382{
7383  return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7384}
7385
7386/* Handle TARGET_LIBCALL_VALUE.  */
7387
7388static rtx
7389sparc_libcall_value (machine_mode mode,
7390		     const_rtx fun ATTRIBUTE_UNUSED)
7391{
7392  return sparc_function_value_1 (NULL_TREE, mode, false);
7393}
7394
7395/* Handle FUNCTION_VALUE_REGNO_P.
7396   On the SPARC, the first "output" reg is used for integer values, and the
7397   first floating point register is used for floating point values.  */
7398
7399static bool
7400sparc_function_value_regno_p (const unsigned int regno)
7401{
7402  return (regno == 8 || (TARGET_FPU && regno == 32));
7403}
7404
7405/* Do what is necessary for `va_start'.  We look at the current function
7406   to determine if stdarg or varargs is used and return the address of
7407   the first unnamed parameter.  */
7408
7409static rtx
7410sparc_builtin_saveregs (void)
7411{
7412  int first_reg = crtl->args.info.words;
7413  rtx address;
7414  int regno;
7415
7416  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7417    emit_move_insn (gen_rtx_MEM (word_mode,
7418				 gen_rtx_PLUS (Pmode,
7419					       frame_pointer_rtx,
7420					       GEN_INT (FIRST_PARM_OFFSET (0)
7421							+ (UNITS_PER_WORD
7422							   * regno)))),
7423		    gen_rtx_REG (word_mode,
7424				 SPARC_INCOMING_INT_ARG_FIRST + regno));
7425
7426  address = gen_rtx_PLUS (Pmode,
7427			  frame_pointer_rtx,
7428			  GEN_INT (FIRST_PARM_OFFSET (0)
7429				   + UNITS_PER_WORD * first_reg));
7430
7431  return address;
7432}
7433
7434/* Implement `va_start' for stdarg.  */
7435
7436static void
7437sparc_va_start (tree valist, rtx nextarg)
7438{
7439  nextarg = expand_builtin_saveregs ();
7440  std_expand_builtin_va_start (valist, nextarg);
7441}
7442
7443/* Implement `va_arg' for stdarg.  */
7444
7445static tree
7446sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7447		       gimple_seq *post_p)
7448{
7449  HOST_WIDE_INT size, rsize, align;
7450  tree addr, incr;
7451  bool indirect;
7452  tree ptrtype = build_pointer_type (type);
7453
7454  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7455    {
7456      indirect = true;
7457      size = rsize = UNITS_PER_WORD;
7458      align = 0;
7459    }
7460  else
7461    {
7462      indirect = false;
7463      size = int_size_in_bytes (type);
7464      rsize = ROUND_UP (size, UNITS_PER_WORD);
7465      align = 0;
7466
7467      if (TARGET_ARCH64)
7468	{
7469	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
7470	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7471	    align = 2 * UNITS_PER_WORD;
7472
7473	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
7474	     are left-justified in their slots.  */
7475	  if (AGGREGATE_TYPE_P (type))
7476	    {
7477	      if (size == 0)
7478		size = rsize = UNITS_PER_WORD;
7479	      else
7480		size = rsize;
7481	    }
7482	}
7483    }
7484
7485  incr = valist;
7486  if (align)
7487    {
7488      incr = fold_build_pointer_plus_hwi (incr, align - 1);
7489      incr = fold_convert (sizetype, incr);
7490      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7491			  size_int (-align));
7492      incr = fold_convert (ptr_type_node, incr);
7493    }
7494
7495  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7496  addr = incr;
7497
7498  if (BYTES_BIG_ENDIAN && size < rsize)
7499    addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7500
7501  if (indirect)
7502    {
7503      addr = fold_convert (build_pointer_type (ptrtype), addr);
7504      addr = build_va_arg_indirect_ref (addr);
7505    }
7506
7507  /* If the address isn't aligned properly for the type, we need a temporary.
7508     FIXME: This is inefficient, usually we can do this in registers.  */
7509  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7510    {
7511      tree tmp = create_tmp_var (type, "va_arg_tmp");
7512      tree dest_addr = build_fold_addr_expr (tmp);
7513      tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7514				   3, dest_addr, addr, size_int (rsize));
7515      TREE_ADDRESSABLE (tmp) = 1;
7516      gimplify_and_add (copy, pre_p);
7517      addr = dest_addr;
7518    }
7519
7520  else
7521    addr = fold_convert (ptrtype, addr);
7522
7523  incr = fold_build_pointer_plus_hwi (incr, rsize);
7524  gimplify_assign (valist, incr, post_p);
7525
7526  return build_va_arg_indirect_ref (addr);
7527}
7528
7529/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7530   Specify whether the vector mode is supported by the hardware.  */
7531
7532static bool
7533sparc_vector_mode_supported_p (machine_mode mode)
7534{
7535  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7536}
7537
7538/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
7539
7540static machine_mode
7541sparc_preferred_simd_mode (machine_mode mode)
7542{
7543  if (TARGET_VIS)
7544    switch (mode)
7545      {
7546      case SImode:
7547	return V2SImode;
7548      case HImode:
7549	return V4HImode;
7550      case QImode:
7551	return V8QImode;
7552
7553      default:;
7554      }
7555
7556  return word_mode;
7557}
7558
7559/* Return the string to output an unconditional branch to LABEL, which is
7560   the operand number of the label.
7561
7562   DEST is the destination insn (i.e. the label), INSN is the source.  */
7563
7564const char *
7565output_ubranch (rtx dest, rtx_insn *insn)
7566{
7567  static char string[64];
7568  bool v9_form = false;
7569  int delta;
7570  char *p;
7571
7572  /* Even if we are trying to use cbcond for this, evaluate
7573     whether we can use V9 branches as our backup plan.  */
7574
7575  delta = 5000000;
7576  if (INSN_ADDRESSES_SET_P ())
7577    delta = (INSN_ADDRESSES (INSN_UID (dest))
7578	     - INSN_ADDRESSES (INSN_UID (insn)));
7579
7580  /* Leave some instructions for "slop".  */
7581  if (TARGET_V9 && delta >= -260000 && delta < 260000)
7582    v9_form = true;
7583
7584  if (TARGET_CBCOND)
7585    {
7586      bool emit_nop = emit_cbcond_nop (insn);
7587      bool far = false;
7588      const char *rval;
7589
7590      if (delta < -500 || delta > 500)
7591	far = true;
7592
7593      if (far)
7594	{
7595	  if (v9_form)
7596	    rval = "ba,a,pt\t%%xcc, %l0";
7597	  else
7598	    rval = "b,a\t%l0";
7599	}
7600      else
7601	{
7602	  if (emit_nop)
7603	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7604	  else
7605	    rval = "cwbe\t%%g0, %%g0, %l0";
7606	}
7607      return rval;
7608    }
7609
7610  if (v9_form)
7611    strcpy (string, "ba%*,pt\t%%xcc, ");
7612  else
7613    strcpy (string, "b%*\t");
7614
7615  p = strchr (string, '\0');
7616  *p++ = '%';
7617  *p++ = 'l';
7618  *p++ = '0';
7619  *p++ = '%';
7620  *p++ = '(';
7621  *p = '\0';
7622
7623  return string;
7624}
7625
7626/* Return the string to output a conditional branch to LABEL, which is
7627   the operand number of the label.  OP is the conditional expression.
7628   XEXP (OP, 0) is assumed to be a condition code register (integer or
7629   floating point) and its mode specifies what kind of comparison we made.
7630
7631   DEST is the destination insn (i.e. the label), INSN is the source.
7632
7633   REVERSED is nonzero if we should reverse the sense of the comparison.
7634
7635   ANNUL is nonzero if we should generate an annulling branch.  */
7636
7637const char *
7638output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7639		rtx_insn *insn)
7640{
7641  static char string[64];
7642  enum rtx_code code = GET_CODE (op);
7643  rtx cc_reg = XEXP (op, 0);
7644  machine_mode mode = GET_MODE (cc_reg);
7645  const char *labelno, *branch;
7646  int spaces = 8, far;
7647  char *p;
7648
7649  /* v9 branches are limited to +-1MB.  If it is too far away,
7650     change
7651
7652     bne,pt %xcc, .LC30
7653
7654     to
7655
7656     be,pn %xcc, .+12
7657      nop
7658     ba .LC30
7659
7660     and
7661
7662     fbne,a,pn %fcc2, .LC29
7663
7664     to
7665
7666     fbe,pt %fcc2, .+16
7667      nop
7668     ba .LC29  */
7669
7670  far = TARGET_V9 && (get_attr_length (insn) >= 3);
7671  if (reversed ^ far)
7672    {
7673      /* Reversal of FP compares takes care -- an ordered compare
7674	 becomes an unordered compare and vice versa.  */
7675      if (mode == CCFPmode || mode == CCFPEmode)
7676	code = reverse_condition_maybe_unordered (code);
7677      else
7678	code = reverse_condition (code);
7679    }
7680
7681  /* Start by writing the branch condition.  */
7682  if (mode == CCFPmode || mode == CCFPEmode)
7683    {
7684      switch (code)
7685	{
7686	case NE:
7687	  branch = "fbne";
7688	  break;
7689	case EQ:
7690	  branch = "fbe";
7691	  break;
7692	case GE:
7693	  branch = "fbge";
7694	  break;
7695	case GT:
7696	  branch = "fbg";
7697	  break;
7698	case LE:
7699	  branch = "fble";
7700	  break;
7701	case LT:
7702	  branch = "fbl";
7703	  break;
7704	case UNORDERED:
7705	  branch = "fbu";
7706	  break;
7707	case ORDERED:
7708	  branch = "fbo";
7709	  break;
7710	case UNGT:
7711	  branch = "fbug";
7712	  break;
7713	case UNLT:
7714	  branch = "fbul";
7715	  break;
7716	case UNEQ:
7717	  branch = "fbue";
7718	  break;
7719	case UNGE:
7720	  branch = "fbuge";
7721	  break;
7722	case UNLE:
7723	  branch = "fbule";
7724	  break;
7725	case LTGT:
7726	  branch = "fblg";
7727	  break;
7728
7729	default:
7730	  gcc_unreachable ();
7731	}
7732
7733      /* ??? !v9: FP branches cannot be preceded by another floating point
7734	 insn.  Because there is currently no concept of pre-delay slots,
7735	 we can fix this only by always emitting a nop before a floating
7736	 point branch.  */
7737
7738      string[0] = '\0';
7739      if (! TARGET_V9)
7740	strcpy (string, "nop\n\t");
7741      strcat (string, branch);
7742    }
7743  else
7744    {
7745      switch (code)
7746	{
7747	case NE:
7748	  branch = "bne";
7749	  break;
7750	case EQ:
7751	  branch = "be";
7752	  break;
7753	case GE:
7754	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7755	    branch = "bpos";
7756	  else
7757	    branch = "bge";
7758	  break;
7759	case GT:
7760	  branch = "bg";
7761	  break;
7762	case LE:
7763	  branch = "ble";
7764	  break;
7765	case LT:
7766	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7767	    branch = "bneg";
7768	  else
7769	    branch = "bl";
7770	  break;
7771	case GEU:
7772	  branch = "bgeu";
7773	  break;
7774	case GTU:
7775	  branch = "bgu";
7776	  break;
7777	case LEU:
7778	  branch = "bleu";
7779	  break;
7780	case LTU:
7781	  branch = "blu";
7782	  break;
7783
7784	default:
7785	  gcc_unreachable ();
7786	}
7787      strcpy (string, branch);
7788    }
7789  spaces -= strlen (branch);
7790  p = strchr (string, '\0');
7791
7792  /* Now add the annulling, the label, and a possible noop.  */
7793  if (annul && ! far)
7794    {
7795      strcpy (p, ",a");
7796      p += 2;
7797      spaces -= 2;
7798    }
7799
7800  if (TARGET_V9)
7801    {
7802      rtx note;
7803      int v8 = 0;
7804
7805      if (! far && insn && INSN_ADDRESSES_SET_P ())
7806	{
7807	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
7808		       - INSN_ADDRESSES (INSN_UID (insn)));
7809	  /* Leave some instructions for "slop".  */
7810	  if (delta < -260000 || delta >= 260000)
7811	    v8 = 1;
7812	}
7813
7814      if (mode == CCFPmode || mode == CCFPEmode)
7815	{
7816	  static char v9_fcc_labelno[] = "%%fccX, ";
7817	  /* Set the char indicating the number of the fcc reg to use.  */
7818	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7819	  labelno = v9_fcc_labelno;
7820	  if (v8)
7821	    {
7822	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7823	      labelno = "";
7824	    }
7825	}
7826      else if (mode == CCXmode || mode == CCX_NOOVmode)
7827	{
7828	  labelno = "%%xcc, ";
7829	  gcc_assert (! v8);
7830	}
7831      else
7832	{
7833	  labelno = "%%icc, ";
7834	  if (v8)
7835	    labelno = "";
7836	}
7837
7838      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7839	{
7840	  strcpy (p,
7841		  ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7842		  ? ",pt" : ",pn");
7843	  p += 3;
7844	  spaces -= 3;
7845	}
7846    }
7847  else
7848    labelno = "";
7849
7850  if (spaces > 0)
7851    *p++ = '\t';
7852  else
7853    *p++ = ' ';
7854  strcpy (p, labelno);
7855  p = strchr (p, '\0');
7856  if (far)
7857    {
7858      strcpy (p, ".+12\n\t nop\n\tb\t");
7859      /* Skip the next insn if requested or
7860	 if we know that it will be a nop.  */
7861      if (annul || ! final_sequence)
7862        p[3] = '6';
7863      p += 14;
7864    }
7865  *p++ = '%';
7866  *p++ = 'l';
7867  *p++ = label + '0';
7868  *p++ = '%';
7869  *p++ = '#';
7870  *p = '\0';
7871
7872  return string;
7873}
7874
7875/* Emit a library call comparison between floating point X and Y.
7876   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7877   Return the new operator to be used in the comparison sequence.
7878
7879   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7880   values as arguments instead of the TFmode registers themselves,
7881   that's why we cannot call emit_float_lib_cmp.  */
7882
7883rtx
7884sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7885{
7886  const char *qpfunc;
7887  rtx slot0, slot1, result, tem, tem2, libfunc;
7888  machine_mode mode;
7889  enum rtx_code new_comparison;
7890
7891  switch (comparison)
7892    {
7893    case EQ:
7894      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7895      break;
7896
7897    case NE:
7898      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7899      break;
7900
7901    case GT:
7902      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7903      break;
7904
7905    case GE:
7906      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7907      break;
7908
7909    case LT:
7910      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7911      break;
7912
7913    case LE:
7914      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7915      break;
7916
7917    case ORDERED:
7918    case UNORDERED:
7919    case UNGT:
7920    case UNLT:
7921    case UNEQ:
7922    case UNGE:
7923    case UNLE:
7924    case LTGT:
7925      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7926      break;
7927
7928    default:
7929      gcc_unreachable ();
7930    }
7931
7932  if (TARGET_ARCH64)
7933    {
7934      if (MEM_P (x))
7935	{
7936	  tree expr = MEM_EXPR (x);
7937	  if (expr)
7938	    mark_addressable (expr);
7939	  slot0 = x;
7940	}
7941      else
7942	{
7943	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7944	  emit_move_insn (slot0, x);
7945	}
7946
7947      if (MEM_P (y))
7948	{
7949	  tree expr = MEM_EXPR (y);
7950	  if (expr)
7951	    mark_addressable (expr);
7952	  slot1 = y;
7953	}
7954      else
7955	{
7956	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7957	  emit_move_insn (slot1, y);
7958	}
7959
7960      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7961      emit_library_call (libfunc, LCT_NORMAL,
7962			 DImode, 2,
7963			 XEXP (slot0, 0), Pmode,
7964			 XEXP (slot1, 0), Pmode);
7965      mode = DImode;
7966    }
7967  else
7968    {
7969      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7970      emit_library_call (libfunc, LCT_NORMAL,
7971			 SImode, 2,
7972			 x, TFmode, y, TFmode);
7973      mode = SImode;
7974    }
7975
7976
7977  /* Immediately move the result of the libcall into a pseudo
7978     register so reload doesn't clobber the value if it needs
7979     the return register for a spill reg.  */
7980  result = gen_reg_rtx (mode);
7981  emit_move_insn (result, hard_libcall_value (mode, libfunc));
7982
7983  switch (comparison)
7984    {
7985    default:
7986      return gen_rtx_NE (VOIDmode, result, const0_rtx);
7987    case ORDERED:
7988    case UNORDERED:
7989      new_comparison = (comparison == UNORDERED ? EQ : NE);
7990      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7991    case UNGT:
7992    case UNGE:
7993      new_comparison = (comparison == UNGT ? GT : NE);
7994      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7995    case UNLE:
7996      return gen_rtx_NE (VOIDmode, result, const2_rtx);
7997    case UNLT:
7998      tem = gen_reg_rtx (mode);
7999      if (TARGET_ARCH32)
8000	emit_insn (gen_andsi3 (tem, result, const1_rtx));
8001      else
8002	emit_insn (gen_anddi3 (tem, result, const1_rtx));
8003      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8004    case UNEQ:
8005    case LTGT:
8006      tem = gen_reg_rtx (mode);
8007      if (TARGET_ARCH32)
8008	emit_insn (gen_addsi3 (tem, result, const1_rtx));
8009      else
8010	emit_insn (gen_adddi3 (tem, result, const1_rtx));
8011      tem2 = gen_reg_rtx (mode);
8012      if (TARGET_ARCH32)
8013	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8014      else
8015	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8016      new_comparison = (comparison == UNEQ ? EQ : NE);
8017      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8018    }
8019
8020  gcc_unreachable ();
8021}
8022
8023/* Generate an unsigned DImode to FP conversion.  This is the same code
8024   optabs would emit if we didn't have TFmode patterns.  */
8025
8026void
8027sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8028{
8029  rtx i0, i1, f0, in, out;
8030
8031  out = operands[0];
8032  in = force_reg (DImode, operands[1]);
8033  rtx_code_label *neglab = gen_label_rtx ();
8034  rtx_code_label *donelab = gen_label_rtx ();
8035  i0 = gen_reg_rtx (DImode);
8036  i1 = gen_reg_rtx (DImode);
8037  f0 = gen_reg_rtx (mode);
8038
8039  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8040
8041  emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8042  emit_jump_insn (gen_jump (donelab));
8043  emit_barrier ();
8044
8045  emit_label (neglab);
8046
8047  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8048  emit_insn (gen_anddi3 (i1, in, const1_rtx));
8049  emit_insn (gen_iordi3 (i0, i0, i1));
8050  emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8051  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8052
8053  emit_label (donelab);
8054}
8055
8056/* Generate an FP to unsigned DImode conversion.  This is the same code
8057   optabs would emit if we didn't have TFmode patterns.  */
8058
8059void
8060sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8061{
8062  rtx i0, i1, f0, in, out, limit;
8063
8064  out = operands[0];
8065  in = force_reg (mode, operands[1]);
8066  rtx_code_label *neglab = gen_label_rtx ();
8067  rtx_code_label *donelab = gen_label_rtx ();
8068  i0 = gen_reg_rtx (DImode);
8069  i1 = gen_reg_rtx (DImode);
8070  limit = gen_reg_rtx (mode);
8071  f0 = gen_reg_rtx (mode);
8072
8073  emit_move_insn (limit,
8074		  const_double_from_real_value (
8075		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8076  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8077
8078  emit_insn (gen_rtx_SET (out,
8079			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8080  emit_jump_insn (gen_jump (donelab));
8081  emit_barrier ();
8082
8083  emit_label (neglab);
8084
8085  emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8086  emit_insn (gen_rtx_SET (i0,
8087			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8088  emit_insn (gen_movdi (i1, const1_rtx));
8089  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8090  emit_insn (gen_xordi3 (out, i0, i1));
8091
8092  emit_label (donelab);
8093}
8094
8095/* Return the string to output a compare and branch instruction to DEST.
8096   DEST is the destination insn (i.e. the label), INSN is the source,
8097   and OP is the conditional expression.  */
8098
8099const char *
8100output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8101{
8102  machine_mode mode = GET_MODE (XEXP (op, 0));
8103  enum rtx_code code = GET_CODE (op);
8104  const char *cond_str, *tmpl;
8105  int far, emit_nop, len;
8106  static char string[64];
8107  char size_char;
8108
8109  /* Compare and Branch is limited to +-2KB.  If it is too far away,
8110     change
8111
8112     cxbne X, Y, .LC30
8113
8114     to
8115
8116     cxbe X, Y, .+16
8117     nop
8118     ba,pt xcc, .LC30
8119      nop  */
8120
8121  len = get_attr_length (insn);
8122
8123  far = len == 4;
8124  emit_nop = len == 2;
8125
8126  if (far)
8127    code = reverse_condition (code);
8128
8129  size_char = ((mode == SImode) ? 'w' : 'x');
8130
8131  switch (code)
8132    {
8133    case NE:
8134      cond_str = "ne";
8135      break;
8136
8137    case EQ:
8138      cond_str = "e";
8139      break;
8140
8141    case GE:
8142      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8143	cond_str = "pos";
8144      else
8145	cond_str = "ge";
8146      break;
8147
8148    case GT:
8149      cond_str = "g";
8150      break;
8151
8152    case LE:
8153      cond_str = "le";
8154      break;
8155
8156    case LT:
8157      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8158	cond_str = "neg";
8159      else
8160	cond_str = "l";
8161      break;
8162
8163    case GEU:
8164      cond_str = "cc";
8165      break;
8166
8167    case GTU:
8168      cond_str = "gu";
8169      break;
8170
8171    case LEU:
8172      cond_str = "leu";
8173      break;
8174
8175    case LTU:
8176      cond_str = "cs";
8177      break;
8178
8179    default:
8180      gcc_unreachable ();
8181    }
8182
8183  if (far)
8184    {
8185      int veryfar = 1, delta;
8186
8187      if (INSN_ADDRESSES_SET_P ())
8188	{
8189	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8190		   - INSN_ADDRESSES (INSN_UID (insn)));
8191	  /* Leave some instructions for "slop".  */
8192	  if (delta >= -260000 && delta < 260000)
8193	    veryfar = 0;
8194	}
8195
8196      if (veryfar)
8197	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8198      else
8199	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8200    }
8201  else
8202    {
8203      if (emit_nop)
8204	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8205      else
8206	tmpl = "c%cb%s\t%%1, %%2, %%3";
8207    }
8208
8209  snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8210
8211  return string;
8212}
8213
8214/* Return the string to output a conditional branch to LABEL, testing
8215   register REG.  LABEL is the operand number of the label; REG is the
8216   operand number of the reg.  OP is the conditional expression.  The mode
8217   of REG says what kind of comparison we made.
8218
8219   DEST is the destination insn (i.e. the label), INSN is the source.
8220
8221   REVERSED is nonzero if we should reverse the sense of the comparison.
8222
8223   ANNUL is nonzero if we should generate an annulling branch.  */
8224
8225const char *
8226output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8227		 int annul, rtx_insn *insn)
8228{
8229  static char string[64];
8230  enum rtx_code code = GET_CODE (op);
8231  machine_mode mode = GET_MODE (XEXP (op, 0));
8232  rtx note;
8233  int far;
8234  char *p;
8235
8236  /* branch on register are limited to +-128KB.  If it is too far away,
8237     change
8238
8239     brnz,pt %g1, .LC30
8240
8241     to
8242
8243     brz,pn %g1, .+12
8244      nop
8245     ba,pt %xcc, .LC30
8246
8247     and
8248
8249     brgez,a,pn %o1, .LC29
8250
8251     to
8252
8253     brlz,pt %o1, .+16
8254      nop
8255     ba,pt %xcc, .LC29  */
8256
8257  far = get_attr_length (insn) >= 3;
8258
8259  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
8260  if (reversed ^ far)
8261    code = reverse_condition (code);
8262
8263  /* Only 64 bit versions of these instructions exist.  */
8264  gcc_assert (mode == DImode);
8265
8266  /* Start by writing the branch condition.  */
8267
8268  switch (code)
8269    {
8270    case NE:
8271      strcpy (string, "brnz");
8272      break;
8273
8274    case EQ:
8275      strcpy (string, "brz");
8276      break;
8277
8278    case GE:
8279      strcpy (string, "brgez");
8280      break;
8281
8282    case LT:
8283      strcpy (string, "brlz");
8284      break;
8285
8286    case LE:
8287      strcpy (string, "brlez");
8288      break;
8289
8290    case GT:
8291      strcpy (string, "brgz");
8292      break;
8293
8294    default:
8295      gcc_unreachable ();
8296    }
8297
8298  p = strchr (string, '\0');
8299
8300  /* Now add the annulling, reg, label, and nop.  */
8301  if (annul && ! far)
8302    {
8303      strcpy (p, ",a");
8304      p += 2;
8305    }
8306
8307  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8308    {
8309      strcpy (p,
8310	      ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8311	      ? ",pt" : ",pn");
8312      p += 3;
8313    }
8314
8315  *p = p < string + 8 ? '\t' : ' ';
8316  p++;
8317  *p++ = '%';
8318  *p++ = '0' + reg;
8319  *p++ = ',';
8320  *p++ = ' ';
8321  if (far)
8322    {
8323      int veryfar = 1, delta;
8324
8325      if (INSN_ADDRESSES_SET_P ())
8326	{
8327	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8328		   - INSN_ADDRESSES (INSN_UID (insn)));
8329	  /* Leave some instructions for "slop".  */
8330	  if (delta >= -260000 && delta < 260000)
8331	    veryfar = 0;
8332	}
8333
8334      strcpy (p, ".+12\n\t nop\n\t");
8335      /* Skip the next insn if requested or
8336	 if we know that it will be a nop.  */
8337      if (annul || ! final_sequence)
8338        p[3] = '6';
8339      p += 12;
8340      if (veryfar)
8341	{
8342	  strcpy (p, "b\t");
8343	  p += 2;
8344	}
8345      else
8346	{
8347	  strcpy (p, "ba,pt\t%%xcc, ");
8348	  p += 13;
8349	}
8350    }
8351  *p++ = '%';
8352  *p++ = 'l';
8353  *p++ = '0' + label;
8354  *p++ = '%';
8355  *p++ = '#';
8356  *p = '\0';
8357
8358  return string;
8359}
8360
8361/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8362   Such instructions cannot be used in the delay slot of return insn on v9.
8363   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8364 */
8365
8366static int
8367epilogue_renumber (register rtx *where, int test)
8368{
8369  register const char *fmt;
8370  register int i;
8371  register enum rtx_code code;
8372
8373  if (*where == 0)
8374    return 0;
8375
8376  code = GET_CODE (*where);
8377
8378  switch (code)
8379    {
8380    case REG:
8381      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
8382	return 1;
8383      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8384	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8385    case SCRATCH:
8386    case CC0:
8387    case PC:
8388    case CONST_INT:
8389    case CONST_WIDE_INT:
8390    case CONST_DOUBLE:
8391      return 0;
8392
8393      /* Do not replace the frame pointer with the stack pointer because
8394	 it can cause the delayed instruction to load below the stack.
8395	 This occurs when instructions like:
8396
8397	 (set (reg/i:SI 24 %i0)
8398	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8399                       (const_int -20 [0xffffffec])) 0))
8400
8401	 are in the return delayed slot.  */
8402    case PLUS:
8403      if (GET_CODE (XEXP (*where, 0)) == REG
8404	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8405	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8406	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8407	return 1;
8408      break;
8409
8410    case MEM:
8411      if (SPARC_STACK_BIAS
8412	  && GET_CODE (XEXP (*where, 0)) == REG
8413	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8414	return 1;
8415      break;
8416
8417    default:
8418      break;
8419    }
8420
8421  fmt = GET_RTX_FORMAT (code);
8422
8423  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8424    {
8425      if (fmt[i] == 'E')
8426	{
8427	  register int j;
8428	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8429	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8430	      return 1;
8431	}
8432      else if (fmt[i] == 'e'
8433	       && epilogue_renumber (&(XEXP (*where, i)), test))
8434	return 1;
8435    }
8436  return 0;
8437}
8438
8439/* Leaf functions and non-leaf functions have different needs.  */
8440
8441static const int
8442reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8443
8444static const int
8445reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8446
8447static const int *const reg_alloc_orders[] = {
8448  reg_leaf_alloc_order,
8449  reg_nonleaf_alloc_order};
8450
8451void
8452order_regs_for_local_alloc (void)
8453{
8454  static int last_order_nonleaf = 1;
8455
8456  if (df_regs_ever_live_p (15) != last_order_nonleaf)
8457    {
8458      last_order_nonleaf = !last_order_nonleaf;
8459      memcpy ((char *) reg_alloc_order,
8460	      (const char *) reg_alloc_orders[last_order_nonleaf],
8461	      FIRST_PSEUDO_REGISTER * sizeof (int));
8462    }
8463}
8464
8465/* Return 1 if REG and MEM are legitimate enough to allow the various
8466   mem<-->reg splits to be run.  */
8467
8468int
8469sparc_splitdi_legitimate (rtx reg, rtx mem)
8470{
8471  /* Punt if we are here by mistake.  */
8472  gcc_assert (reload_completed);
8473
8474  /* We must have an offsettable memory reference.  */
8475  if (! offsettable_memref_p (mem))
8476    return 0;
8477
8478  /* If we have legitimate args for ldd/std, we do not want
8479     the split to happen.  */
8480  if ((REGNO (reg) % 2) == 0
8481      && mem_min_alignment (mem, 8))
8482    return 0;
8483
8484  /* Success.  */
8485  return 1;
8486}
8487
8488/* Like sparc_splitdi_legitimate but for REG <--> REG moves.  */
8489
8490int
8491sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8492{
8493  int regno1, regno2;
8494
8495  if (GET_CODE (reg1) == SUBREG)
8496    reg1 = SUBREG_REG (reg1);
8497  if (GET_CODE (reg1) != REG)
8498    return 0;
8499  regno1 = REGNO (reg1);
8500
8501  if (GET_CODE (reg2) == SUBREG)
8502    reg2 = SUBREG_REG (reg2);
8503  if (GET_CODE (reg2) != REG)
8504    return 0;
8505  regno2 = REGNO (reg2);
8506
8507  if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8508    return 1;
8509
8510  if (TARGET_VIS3)
8511    {
8512      if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8513	  || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8514	return 1;
8515    }
8516
8517  return 0;
8518}
8519
8520/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8521   This makes them candidates for using ldd and std insns.
8522
8523   Note reg1 and reg2 *must* be hard registers.  */
8524
8525int
8526registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8527{
8528  /* We might have been passed a SUBREG.  */
8529  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8530    return 0;
8531
8532  if (REGNO (reg1) % 2 != 0)
8533    return 0;
8534
8535  /* Integer ldd is deprecated in SPARC V9 */
8536  if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8537    return 0;
8538
8539  return (REGNO (reg1) == REGNO (reg2) - 1);
8540}
8541
8542/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8543   an ldd or std insn.
8544
8545   This can only happen when addr1 and addr2, the addresses in mem1
8546   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8547   addr1 must also be aligned on a 64-bit boundary.
8548
8549   Also iff dependent_reg_rtx is not null it should not be used to
8550   compute the address for mem1, i.e. we cannot optimize a sequence
8551   like:
8552   	ld [%o0], %o0
8553	ld [%o0 + 4], %o1
8554   to
8555   	ldd [%o0], %o0
8556   nor:
8557	ld [%g3 + 4], %g3
8558	ld [%g3], %g2
8559   to
8560        ldd [%g3], %g2
8561
8562   But, note that the transformation from:
8563	ld [%g2 + 4], %g3
8564        ld [%g2], %g2
8565   to
8566	ldd [%g2], %g2
8567   is perfectly fine.  Thus, the peephole2 patterns always pass us
8568   the destination register of the first load, never the second one.
8569
8570   For stores we don't have a similar problem, so dependent_reg_rtx is
8571   NULL_RTX.  */
8572
8573int
8574mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8575{
8576  rtx addr1, addr2;
8577  unsigned int reg1;
8578  HOST_WIDE_INT offset1;
8579
8580  /* The mems cannot be volatile.  */
8581  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8582    return 0;
8583
8584  /* MEM1 should be aligned on a 64-bit boundary.  */
8585  if (MEM_ALIGN (mem1) < 64)
8586    return 0;
8587
8588  addr1 = XEXP (mem1, 0);
8589  addr2 = XEXP (mem2, 0);
8590
8591  /* Extract a register number and offset (if used) from the first addr.  */
8592  if (GET_CODE (addr1) == PLUS)
8593    {
8594      /* If not a REG, return zero.  */
8595      if (GET_CODE (XEXP (addr1, 0)) != REG)
8596	return 0;
8597      else
8598	{
8599          reg1 = REGNO (XEXP (addr1, 0));
8600	  /* The offset must be constant!  */
8601	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8602            return 0;
8603          offset1 = INTVAL (XEXP (addr1, 1));
8604	}
8605    }
8606  else if (GET_CODE (addr1) != REG)
8607    return 0;
8608  else
8609    {
8610      reg1 = REGNO (addr1);
8611      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
8612      offset1 = 0;
8613    }
8614
8615  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
8616  if (GET_CODE (addr2) != PLUS)
8617    return 0;
8618
8619  if (GET_CODE (XEXP (addr2, 0)) != REG
8620      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8621    return 0;
8622
8623  if (reg1 != REGNO (XEXP (addr2, 0)))
8624    return 0;
8625
8626  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8627    return 0;
8628
8629  /* The first offset must be evenly divisible by 8 to ensure the
8630     address is 64 bit aligned.  */
8631  if (offset1 % 8 != 0)
8632    return 0;
8633
8634  /* The offset for the second addr must be 4 more than the first addr.  */
8635  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8636    return 0;
8637
8638  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
8639     instructions.  */
8640  return 1;
8641}
8642
8643/* Return the widened memory access made of MEM1 and MEM2 in MODE.  */
8644
8645rtx
8646widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8647{
8648  rtx x = widen_memory_access (mem1, mode, 0);
8649  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8650  return x;
8651}
8652
8653/* Return 1 if reg is a pseudo, or is the first register in
8654   a hard register pair.  This makes it suitable for use in
8655   ldd and std insns.  */
8656
8657int
8658register_ok_for_ldd (rtx reg)
8659{
8660  /* We might have been passed a SUBREG.  */
8661  if (!REG_P (reg))
8662    return 0;
8663
8664  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8665    return (REGNO (reg) % 2 == 0);
8666
8667  return 1;
8668}
8669
8670/* Return 1 if OP, a MEM, has an address which is known to be
8671   aligned to an 8-byte boundary.  */
8672
8673int
8674memory_ok_for_ldd (rtx op)
8675{
8676  /* In 64-bit mode, we assume that the address is word-aligned.  */
8677  if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8678    return 0;
8679
8680  if (! can_create_pseudo_p ()
8681      && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8682    return 0;
8683
8684  return 1;
8685}
8686
8687/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
8688
8689static bool
8690sparc_print_operand_punct_valid_p (unsigned char code)
8691{
8692  if (code == '#'
8693      || code == '*'
8694      || code == '('
8695      || code == ')'
8696      || code == '_'
8697      || code == '&')
8698    return true;
8699
8700  return false;
8701}
8702
8703/* Implement TARGET_PRINT_OPERAND.
8704   Print operand X (an rtx) in assembler syntax to file FILE.
8705   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8706   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
8707
8708static void
8709sparc_print_operand (FILE *file, rtx x, int code)
8710{
8711  switch (code)
8712    {
8713    case '#':
8714      /* Output an insn in a delay slot.  */
8715      if (final_sequence)
8716        sparc_indent_opcode = 1;
8717      else
8718	fputs ("\n\t nop", file);
8719      return;
8720    case '*':
8721      /* Output an annul flag if there's nothing for the delay slot and we
8722	 are optimizing.  This is always used with '(' below.
8723         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8724	 this is a dbx bug.  So, we only do this when optimizing.
8725         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8726	 Always emit a nop in case the next instruction is a branch.  */
8727      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8728	fputs (",a", file);
8729      return;
8730    case '(':
8731      /* Output a 'nop' if there's nothing for the delay slot and we are
8732	 not optimizing.  This is always used with '*' above.  */
8733      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8734	fputs ("\n\t nop", file);
8735      else if (final_sequence)
8736        sparc_indent_opcode = 1;
8737      return;
8738    case ')':
8739      /* Output the right displacement from the saved PC on function return.
8740	 The caller may have placed an "unimp" insn immediately after the call
8741	 so we have to account for it.  This insn is used in the 32-bit ABI
8742	 when calling a function that returns a non zero-sized structure.  The
8743	 64-bit ABI doesn't have it.  Be careful to have this test be the same
8744	 as that for the call.  The exception is when sparc_std_struct_return
8745	 is enabled, the psABI is followed exactly and the adjustment is made
8746	 by the code in sparc_struct_value_rtx.  The call emitted is the same
8747	 when sparc_std_struct_return is enabled. */
8748     if (!TARGET_ARCH64
8749	 && cfun->returns_struct
8750	 && !sparc_std_struct_return
8751	 && DECL_SIZE (DECL_RESULT (current_function_decl))
8752	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8753	     == INTEGER_CST
8754	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8755	fputs ("12", file);
8756      else
8757        fputc ('8', file);
8758      return;
8759    case '_':
8760      /* Output the Embedded Medium/Anywhere code model base register.  */
8761      fputs (EMBMEDANY_BASE_REG, file);
8762      return;
8763    case '&':
8764      /* Print some local dynamic TLS name.  */
8765      if (const char *name = get_some_local_dynamic_name ())
8766	assemble_name (file, name);
8767      else
8768	output_operand_lossage ("'%%&' used without any "
8769				"local dynamic TLS references");
8770      return;
8771
8772    case 'Y':
8773      /* Adjust the operand to take into account a RESTORE operation.  */
8774      if (GET_CODE (x) == CONST_INT)
8775	break;
8776      else if (GET_CODE (x) != REG)
8777	output_operand_lossage ("invalid %%Y operand");
8778      else if (REGNO (x) < 8)
8779	fputs (reg_names[REGNO (x)], file);
8780      else if (REGNO (x) >= 24 && REGNO (x) < 32)
8781	fputs (reg_names[REGNO (x)-16], file);
8782      else
8783	output_operand_lossage ("invalid %%Y operand");
8784      return;
8785    case 'L':
8786      /* Print out the low order register name of a register pair.  */
8787      if (WORDS_BIG_ENDIAN)
8788	fputs (reg_names[REGNO (x)+1], file);
8789      else
8790	fputs (reg_names[REGNO (x)], file);
8791      return;
8792    case 'H':
8793      /* Print out the high order register name of a register pair.  */
8794      if (WORDS_BIG_ENDIAN)
8795	fputs (reg_names[REGNO (x)], file);
8796      else
8797	fputs (reg_names[REGNO (x)+1], file);
8798      return;
8799    case 'R':
8800      /* Print out the second register name of a register pair or quad.
8801	 I.e., R (%o0) => %o1.  */
8802      fputs (reg_names[REGNO (x)+1], file);
8803      return;
8804    case 'S':
8805      /* Print out the third register name of a register quad.
8806	 I.e., S (%o0) => %o2.  */
8807      fputs (reg_names[REGNO (x)+2], file);
8808      return;
8809    case 'T':
8810      /* Print out the fourth register name of a register quad.
8811	 I.e., T (%o0) => %o3.  */
8812      fputs (reg_names[REGNO (x)+3], file);
8813      return;
8814    case 'x':
8815      /* Print a condition code register.  */
8816      if (REGNO (x) == SPARC_ICC_REG)
8817	{
8818	  /* We don't handle CC[X]_NOOVmode because they're not supposed
8819	     to occur here.  */
8820	  if (GET_MODE (x) == CCmode)
8821	    fputs ("%icc", file);
8822	  else if (GET_MODE (x) == CCXmode)
8823	    fputs ("%xcc", file);
8824	  else
8825	    gcc_unreachable ();
8826	}
8827      else
8828	/* %fccN register */
8829	fputs (reg_names[REGNO (x)], file);
8830      return;
8831    case 'm':
8832      /* Print the operand's address only.  */
8833      output_address (GET_MODE (x), XEXP (x, 0));
8834      return;
8835    case 'r':
8836      /* In this case we need a register.  Use %g0 if the
8837	 operand is const0_rtx.  */
8838      if (x == const0_rtx
8839	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8840	{
8841	  fputs ("%g0", file);
8842	  return;
8843	}
8844      else
8845	break;
8846
8847    case 'A':
8848      switch (GET_CODE (x))
8849	{
8850	case IOR: fputs ("or", file); break;
8851	case AND: fputs ("and", file); break;
8852	case XOR: fputs ("xor", file); break;
8853	default: output_operand_lossage ("invalid %%A operand");
8854	}
8855      return;
8856
8857    case 'B':
8858      switch (GET_CODE (x))
8859	{
8860	case IOR: fputs ("orn", file); break;
8861	case AND: fputs ("andn", file); break;
8862	case XOR: fputs ("xnor", file); break;
8863	default: output_operand_lossage ("invalid %%B operand");
8864	}
8865      return;
8866
8867      /* This is used by the conditional move instructions.  */
8868    case 'C':
8869      {
8870	enum rtx_code rc = GET_CODE (x);
8871
8872	switch (rc)
8873	  {
8874	  case NE: fputs ("ne", file); break;
8875	  case EQ: fputs ("e", file); break;
8876	  case GE: fputs ("ge", file); break;
8877	  case GT: fputs ("g", file); break;
8878	  case LE: fputs ("le", file); break;
8879	  case LT: fputs ("l", file); break;
8880	  case GEU: fputs ("geu", file); break;
8881	  case GTU: fputs ("gu", file); break;
8882	  case LEU: fputs ("leu", file); break;
8883	  case LTU: fputs ("lu", file); break;
8884	  case LTGT: fputs ("lg", file); break;
8885	  case UNORDERED: fputs ("u", file); break;
8886	  case ORDERED: fputs ("o", file); break;
8887	  case UNLT: fputs ("ul", file); break;
8888	  case UNLE: fputs ("ule", file); break;
8889	  case UNGT: fputs ("ug", file); break;
8890	  case UNGE: fputs ("uge", file); break;
8891	  case UNEQ: fputs ("ue", file); break;
8892	  default: output_operand_lossage ("invalid %%C operand");
8893	  }
8894	return;
8895      }
8896
8897      /* This are used by the movr instruction pattern.  */
8898    case 'D':
8899      {
8900	enum rtx_code rc = GET_CODE (x);
8901	switch (rc)
8902	  {
8903	  case NE: fputs ("ne", file); break;
8904	  case EQ: fputs ("e", file); break;
8905	  case GE: fputs ("gez", file); break;
8906	  case LT: fputs ("lz", file); break;
8907	  case LE: fputs ("lez", file); break;
8908	  case GT: fputs ("gz", file); break;
8909	  default: output_operand_lossage ("invalid %%D operand");
8910	  }
8911	return;
8912      }
8913
8914    case 'b':
8915      {
8916	/* Print a sign-extended character.  */
8917	int i = trunc_int_for_mode (INTVAL (x), QImode);
8918	fprintf (file, "%d", i);
8919	return;
8920      }
8921
8922    case 'f':
8923      /* Operand must be a MEM; write its address.  */
8924      if (GET_CODE (x) != MEM)
8925	output_operand_lossage ("invalid %%f operand");
8926      output_address (GET_MODE (x), XEXP (x, 0));
8927      return;
8928
8929    case 's':
8930      {
8931	/* Print a sign-extended 32-bit value.  */
8932	HOST_WIDE_INT i;
8933	if (GET_CODE(x) == CONST_INT)
8934	  i = INTVAL (x);
8935	else
8936	  {
8937	    output_operand_lossage ("invalid %%s operand");
8938	    return;
8939	  }
8940	i = trunc_int_for_mode (i, SImode);
8941	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8942	return;
8943      }
8944
8945    case 0:
8946      /* Do nothing special.  */
8947      break;
8948
8949    default:
8950      /* Undocumented flag.  */
8951      output_operand_lossage ("invalid operand output code");
8952    }
8953
8954  if (GET_CODE (x) == REG)
8955    fputs (reg_names[REGNO (x)], file);
8956  else if (GET_CODE (x) == MEM)
8957    {
8958      fputc ('[', file);
8959	/* Poor Sun assembler doesn't understand absolute addressing.  */
8960      if (CONSTANT_P (XEXP (x, 0)))
8961	fputs ("%g0+", file);
8962      output_address (GET_MODE (x), XEXP (x, 0));
8963      fputc (']', file);
8964    }
8965  else if (GET_CODE (x) == HIGH)
8966    {
8967      fputs ("%hi(", file);
8968      output_addr_const (file, XEXP (x, 0));
8969      fputc (')', file);
8970    }
8971  else if (GET_CODE (x) == LO_SUM)
8972    {
8973      sparc_print_operand (file, XEXP (x, 0), 0);
8974      if (TARGET_CM_MEDMID)
8975	fputs ("+%l44(", file);
8976      else
8977	fputs ("+%lo(", file);
8978      output_addr_const (file, XEXP (x, 1));
8979      fputc (')', file);
8980    }
8981  else if (GET_CODE (x) == CONST_DOUBLE)
8982    output_operand_lossage ("floating-point constant not a valid immediate operand");
8983  else
8984    output_addr_const (file, x);
8985}
8986
8987/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
8988
8989static void
8990sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
8991{
8992  register rtx base, index = 0;
8993  int offset = 0;
8994  register rtx addr = x;
8995
8996  if (REG_P (addr))
8997    fputs (reg_names[REGNO (addr)], file);
8998  else if (GET_CODE (addr) == PLUS)
8999    {
9000      if (CONST_INT_P (XEXP (addr, 0)))
9001	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9002      else if (CONST_INT_P (XEXP (addr, 1)))
9003	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9004      else
9005	base = XEXP (addr, 0), index = XEXP (addr, 1);
9006      if (GET_CODE (base) == LO_SUM)
9007	{
9008	  gcc_assert (USE_AS_OFFSETABLE_LO10
9009		      && TARGET_ARCH64
9010		      && ! TARGET_CM_MEDMID);
9011	  output_operand (XEXP (base, 0), 0);
9012	  fputs ("+%lo(", file);
9013	  output_address (VOIDmode, XEXP (base, 1));
9014	  fprintf (file, ")+%d", offset);
9015	}
9016      else
9017	{
9018	  fputs (reg_names[REGNO (base)], file);
9019	  if (index == 0)
9020	    fprintf (file, "%+d", offset);
9021	  else if (REG_P (index))
9022	    fprintf (file, "+%s", reg_names[REGNO (index)]);
9023	  else if (GET_CODE (index) == SYMBOL_REF
9024		   || GET_CODE (index) == LABEL_REF
9025		   || GET_CODE (index) == CONST)
9026	    fputc ('+', file), output_addr_const (file, index);
9027	  else gcc_unreachable ();
9028	}
9029    }
9030  else if (GET_CODE (addr) == MINUS
9031	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9032    {
9033      output_addr_const (file, XEXP (addr, 0));
9034      fputs ("-(", file);
9035      output_addr_const (file, XEXP (addr, 1));
9036      fputs ("-.)", file);
9037    }
9038  else if (GET_CODE (addr) == LO_SUM)
9039    {
9040      output_operand (XEXP (addr, 0), 0);
9041      if (TARGET_CM_MEDMID)
9042        fputs ("+%l44(", file);
9043      else
9044        fputs ("+%lo(", file);
9045      output_address (VOIDmode, XEXP (addr, 1));
9046      fputc (')', file);
9047    }
9048  else if (flag_pic
9049	   && GET_CODE (addr) == CONST
9050	   && GET_CODE (XEXP (addr, 0)) == MINUS
9051	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9052	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9053	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9054    {
9055      addr = XEXP (addr, 0);
9056      output_addr_const (file, XEXP (addr, 0));
9057      /* Group the args of the second CONST in parenthesis.  */
9058      fputs ("-(", file);
9059      /* Skip past the second CONST--it does nothing for us.  */
9060      output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9061      /* Close the parenthesis.  */
9062      fputc (')', file);
9063    }
9064  else
9065    {
9066      output_addr_const (file, addr);
9067    }
9068}
9069
9070/* Target hook for assembling integer objects.  The sparc version has
9071   special handling for aligned DI-mode objects.  */
9072
9073static bool
9074sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9075{
9076  /* ??? We only output .xword's for symbols and only then in environments
9077     where the assembler can handle them.  */
9078  if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9079    {
9080      if (TARGET_V9)
9081	{
9082	  assemble_integer_with_op ("\t.xword\t", x);
9083	  return true;
9084	}
9085      else
9086	{
9087	  assemble_aligned_integer (4, const0_rtx);
9088	  assemble_aligned_integer (4, x);
9089	  return true;
9090	}
9091    }
9092  return default_assemble_integer (x, size, aligned_p);
9093}
9094
9095/* Return the value of a code used in the .proc pseudo-op that says
9096   what kind of result this function returns.  For non-C types, we pick
9097   the closest C type.  */
9098
9099#ifndef SHORT_TYPE_SIZE
9100#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9101#endif
9102
9103#ifndef INT_TYPE_SIZE
9104#define INT_TYPE_SIZE BITS_PER_WORD
9105#endif
9106
9107#ifndef LONG_TYPE_SIZE
9108#define LONG_TYPE_SIZE BITS_PER_WORD
9109#endif
9110
9111#ifndef LONG_LONG_TYPE_SIZE
9112#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9113#endif
9114
9115#ifndef FLOAT_TYPE_SIZE
9116#define FLOAT_TYPE_SIZE BITS_PER_WORD
9117#endif
9118
9119#ifndef DOUBLE_TYPE_SIZE
9120#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9121#endif
9122
9123#ifndef LONG_DOUBLE_TYPE_SIZE
9124#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9125#endif
9126
9127unsigned long
9128sparc_type_code (register tree type)
9129{
9130  register unsigned long qualifiers = 0;
9131  register unsigned shift;
9132
9133  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
9134     setting more, since some assemblers will give an error for this.  Also,
9135     we must be careful to avoid shifts of 32 bits or more to avoid getting
9136     unpredictable results.  */
9137
9138  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9139    {
9140      switch (TREE_CODE (type))
9141	{
9142	case ERROR_MARK:
9143	  return qualifiers;
9144
9145	case ARRAY_TYPE:
9146	  qualifiers |= (3 << shift);
9147	  break;
9148
9149	case FUNCTION_TYPE:
9150	case METHOD_TYPE:
9151	  qualifiers |= (2 << shift);
9152	  break;
9153
9154	case POINTER_TYPE:
9155	case REFERENCE_TYPE:
9156	case OFFSET_TYPE:
9157	  qualifiers |= (1 << shift);
9158	  break;
9159
9160	case RECORD_TYPE:
9161	  return (qualifiers | 8);
9162
9163	case UNION_TYPE:
9164	case QUAL_UNION_TYPE:
9165	  return (qualifiers | 9);
9166
9167	case ENUMERAL_TYPE:
9168	  return (qualifiers | 10);
9169
9170	case VOID_TYPE:
9171	  return (qualifiers | 16);
9172
9173	case INTEGER_TYPE:
9174	  /* If this is a range type, consider it to be the underlying
9175	     type.  */
9176	  if (TREE_TYPE (type) != 0)
9177	    break;
9178
9179	  /* Carefully distinguish all the standard types of C,
9180	     without messing up if the language is not C.  We do this by
9181	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
9182	     look at both the names and the above fields, but that's redundant.
9183	     Any type whose size is between two C types will be considered
9184	     to be the wider of the two types.  Also, we do not have a
9185	     special code to use for "long long", so anything wider than
9186	     long is treated the same.  Note that we can't distinguish
9187	     between "int" and "long" in this code if they are the same
9188	     size, but that's fine, since neither can the assembler.  */
9189
9190	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9191	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9192
9193	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9194	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9195
9196	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9197	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9198
9199	  else
9200	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9201
9202	case REAL_TYPE:
9203	  /* If this is a range type, consider it to be the underlying
9204	     type.  */
9205	  if (TREE_TYPE (type) != 0)
9206	    break;
9207
9208	  /* Carefully distinguish all the standard types of C,
9209	     without messing up if the language is not C.  */
9210
9211	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9212	    return (qualifiers | 6);
9213
9214	  else
9215	    return (qualifiers | 7);
9216
9217	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
9218	  /* ??? We need to distinguish between double and float complex types,
9219	     but I don't know how yet because I can't reach this code from
9220	     existing front-ends.  */
9221	  return (qualifiers | 7);	/* Who knows? */
9222
9223	case VECTOR_TYPE:
9224	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
9225	case LANG_TYPE:
9226	case NULLPTR_TYPE:
9227	  return qualifiers;
9228
9229	default:
9230	  gcc_unreachable ();		/* Not a type! */
9231        }
9232    }
9233
9234  return qualifiers;
9235}
9236
9237/* Nested function support.  */
9238
9239/* Emit RTL insns to initialize the variable parts of a trampoline.
9240   FNADDR is an RTX for the address of the function's pure code.
9241   CXT is an RTX for the static chain value for the function.
9242
9243   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9244   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9245   (to store insns).  This is a bit excessive.  Perhaps a different
9246   mechanism would be better here.
9247
9248   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
9249
9250static void
9251sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9252{
9253  /* SPARC 32-bit trampoline:
9254
9255 	sethi	%hi(fn), %g1
9256 	sethi	%hi(static), %g2
9257 	jmp	%g1+%lo(fn)
9258 	or	%g2, %lo(static), %g2
9259
9260    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9261    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9262   */
9263
9264  emit_move_insn
9265    (adjust_address (m_tramp, SImode, 0),
9266     expand_binop (SImode, ior_optab,
9267		   expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9268		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9269		   NULL_RTX, 1, OPTAB_DIRECT));
9270
9271  emit_move_insn
9272    (adjust_address (m_tramp, SImode, 4),
9273     expand_binop (SImode, ior_optab,
9274		   expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9275		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9276		   NULL_RTX, 1, OPTAB_DIRECT));
9277
9278  emit_move_insn
9279    (adjust_address (m_tramp, SImode, 8),
9280     expand_binop (SImode, ior_optab,
9281		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9282		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9283		   NULL_RTX, 1, OPTAB_DIRECT));
9284
9285  emit_move_insn
9286    (adjust_address (m_tramp, SImode, 12),
9287     expand_binop (SImode, ior_optab,
9288		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9289		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9290		   NULL_RTX, 1, OPTAB_DIRECT));
9291
9292  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
9293     aligned on a 16 byte boundary so one flush clears it all.  */
9294  emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9295  if (sparc_cpu != PROCESSOR_ULTRASPARC
9296      && sparc_cpu != PROCESSOR_ULTRASPARC3
9297      && sparc_cpu != PROCESSOR_NIAGARA
9298      && sparc_cpu != PROCESSOR_NIAGARA2
9299      && sparc_cpu != PROCESSOR_NIAGARA3
9300      && sparc_cpu != PROCESSOR_NIAGARA4
9301      && sparc_cpu != PROCESSOR_NIAGARA7)
9302    emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9303
9304  /* Call __enable_execute_stack after writing onto the stack to make sure
9305     the stack address is accessible.  */
9306#ifdef HAVE_ENABLE_EXECUTE_STACK
9307  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9308                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9309#endif
9310
9311}
9312
9313/* The 64-bit version is simpler because it makes more sense to load the
9314   values as "immediate" data out of the trampoline.  It's also easier since
9315   we can read the PC without clobbering a register.  */
9316
9317static void
9318sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9319{
9320  /* SPARC 64-bit trampoline:
9321
9322	rd	%pc, %g1
9323	ldx	[%g1+24], %g5
9324	jmp	%g5
9325	ldx	[%g1+16], %g5
9326	+16 bytes data
9327   */
9328
9329  emit_move_insn (adjust_address (m_tramp, SImode, 0),
9330		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9331  emit_move_insn (adjust_address (m_tramp, SImode, 4),
9332		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9333  emit_move_insn (adjust_address (m_tramp, SImode, 8),
9334		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9335  emit_move_insn (adjust_address (m_tramp, SImode, 12),
9336		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9337  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9338  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9339  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9340
9341  if (sparc_cpu != PROCESSOR_ULTRASPARC
9342      && sparc_cpu != PROCESSOR_ULTRASPARC3
9343      && sparc_cpu != PROCESSOR_NIAGARA
9344      && sparc_cpu != PROCESSOR_NIAGARA2
9345      && sparc_cpu != PROCESSOR_NIAGARA3
9346      && sparc_cpu != PROCESSOR_NIAGARA4
9347      && sparc_cpu != PROCESSOR_NIAGARA7)
9348    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9349
9350  /* Call __enable_execute_stack after writing onto the stack to make sure
9351     the stack address is accessible.  */
9352#ifdef HAVE_ENABLE_EXECUTE_STACK
9353  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9354                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9355#endif
9356}
9357
9358/* Worker for TARGET_TRAMPOLINE_INIT.  */
9359
9360static void
9361sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9362{
9363  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9364  cxt = force_reg (Pmode, cxt);
9365  if (TARGET_ARCH64)
9366    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9367  else
9368    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9369}
9370
9371/* Adjust the cost of a scheduling dependency.  Return the new cost of
9372   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
9373
9374static int
9375supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9376{
9377  enum attr_type insn_type;
9378
9379  if (recog_memoized (insn) < 0)
9380    return cost;
9381
9382  insn_type = get_attr_type (insn);
9383
9384  if (REG_NOTE_KIND (link) == 0)
9385    {
9386      /* Data dependency; DEP_INSN writes a register that INSN reads some
9387	 cycles later.  */
9388
9389      /* if a load, then the dependence must be on the memory address;
9390	 add an extra "cycle".  Note that the cost could be two cycles
9391	 if the reg was written late in an instruction group; we ca not tell
9392	 here.  */
9393      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9394	return cost + 3;
9395
9396      /* Get the delay only if the address of the store is the dependence.  */
9397      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9398	{
9399	  rtx pat = PATTERN(insn);
9400	  rtx dep_pat = PATTERN (dep_insn);
9401
9402	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9403	    return cost;  /* This should not happen!  */
9404
9405	  /* The dependency between the two instructions was on the data that
9406	     is being stored.  Assume that this implies that the address of the
9407	     store is not dependent.  */
9408	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9409	    return cost;
9410
9411	  return cost + 3;  /* An approximation.  */
9412	}
9413
9414      /* A shift instruction cannot receive its data from an instruction
9415	 in the same cycle; add a one cycle penalty.  */
9416      if (insn_type == TYPE_SHIFT)
9417	return cost + 3;   /* Split before cascade into shift.  */
9418    }
9419  else
9420    {
9421      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9422	 INSN writes some cycles later.  */
9423
9424      /* These are only significant for the fpu unit; writing a fp reg before
9425         the fpu has finished with it stalls the processor.  */
9426
9427      /* Reusing an integer register causes no problems.  */
9428      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9429	return 0;
9430    }
9431
9432  return cost;
9433}
9434
9435static int
9436hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9437{
9438  enum attr_type insn_type, dep_type;
9439  rtx pat = PATTERN(insn);
9440  rtx dep_pat = PATTERN (dep_insn);
9441
9442  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9443    return cost;
9444
9445  insn_type = get_attr_type (insn);
9446  dep_type = get_attr_type (dep_insn);
9447
9448  switch (REG_NOTE_KIND (link))
9449    {
9450    case 0:
9451      /* Data dependency; DEP_INSN writes a register that INSN reads some
9452	 cycles later.  */
9453
9454      switch (insn_type)
9455	{
9456	case TYPE_STORE:
9457	case TYPE_FPSTORE:
9458	  /* Get the delay iff the address of the store is the dependence.  */
9459	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9460	    return cost;
9461
9462	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9463	    return cost;
9464	  return cost + 3;
9465
9466	case TYPE_LOAD:
9467	case TYPE_SLOAD:
9468	case TYPE_FPLOAD:
9469	  /* If a load, then the dependence must be on the memory address.  If
9470	     the addresses aren't equal, then it might be a false dependency */
9471	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9472	    {
9473	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9474		  || GET_CODE (SET_DEST (dep_pat)) != MEM
9475		  || GET_CODE (SET_SRC (pat)) != MEM
9476		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9477				    XEXP (SET_SRC (pat), 0)))
9478		return cost + 2;
9479
9480	      return cost + 8;
9481	    }
9482	  break;
9483
9484	case TYPE_BRANCH:
9485	  /* Compare to branch latency is 0.  There is no benefit from
9486	     separating compare and branch.  */
9487	  if (dep_type == TYPE_COMPARE)
9488	    return 0;
9489	  /* Floating point compare to branch latency is less than
9490	     compare to conditional move.  */
9491	  if (dep_type == TYPE_FPCMP)
9492	    return cost - 1;
9493	  break;
9494	default:
9495	  break;
9496	}
9497	break;
9498
9499    case REG_DEP_ANTI:
9500      /* Anti-dependencies only penalize the fpu unit.  */
9501      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9502        return 0;
9503      break;
9504
9505    default:
9506      break;
9507    }
9508
9509  return cost;
9510}
9511
9512static int
9513sparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9514{
9515  switch (sparc_cpu)
9516    {
9517    case PROCESSOR_SUPERSPARC:
9518      cost = supersparc_adjust_cost (insn, link, dep, cost);
9519      break;
9520    case PROCESSOR_HYPERSPARC:
9521    case PROCESSOR_SPARCLITE86X:
9522      cost = hypersparc_adjust_cost (insn, link, dep, cost);
9523      break;
9524    default:
9525      break;
9526    }
9527  return cost;
9528}
9529
9530static void
9531sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9532		  int sched_verbose ATTRIBUTE_UNUSED,
9533		  int max_ready ATTRIBUTE_UNUSED)
9534{}
9535
9536static int
9537sparc_use_sched_lookahead (void)
9538{
9539  if (sparc_cpu == PROCESSOR_NIAGARA
9540      || sparc_cpu == PROCESSOR_NIAGARA2
9541      || sparc_cpu == PROCESSOR_NIAGARA3)
9542    return 0;
9543  if (sparc_cpu == PROCESSOR_NIAGARA4
9544      || sparc_cpu == PROCESSOR_NIAGARA7)
9545    return 2;
9546  if (sparc_cpu == PROCESSOR_ULTRASPARC
9547      || sparc_cpu == PROCESSOR_ULTRASPARC3)
9548    return 4;
9549  if ((1 << sparc_cpu) &
9550      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9551       (1 << PROCESSOR_SPARCLITE86X)))
9552    return 3;
9553  return 0;
9554}
9555
9556static int
9557sparc_issue_rate (void)
9558{
9559  switch (sparc_cpu)
9560    {
9561    case PROCESSOR_NIAGARA:
9562    case PROCESSOR_NIAGARA2:
9563    case PROCESSOR_NIAGARA3:
9564    default:
9565      return 1;
9566    case PROCESSOR_NIAGARA4:
9567    case PROCESSOR_NIAGARA7:
9568    case PROCESSOR_V9:
9569      /* Assume V9 processors are capable of at least dual-issue.  */
9570      return 2;
9571    case PROCESSOR_SUPERSPARC:
9572      return 3;
9573    case PROCESSOR_HYPERSPARC:
9574    case PROCESSOR_SPARCLITE86X:
9575      return 2;
9576    case PROCESSOR_ULTRASPARC:
9577    case PROCESSOR_ULTRASPARC3:
9578      return 4;
9579    }
9580}
9581
9582static int
9583set_extends (rtx_insn *insn)
9584{
9585  register rtx pat = PATTERN (insn);
9586
9587  switch (GET_CODE (SET_SRC (pat)))
9588    {
9589      /* Load and some shift instructions zero extend.  */
9590    case MEM:
9591    case ZERO_EXTEND:
9592      /* sethi clears the high bits */
9593    case HIGH:
9594      /* LO_SUM is used with sethi.  sethi cleared the high
9595	 bits and the values used with lo_sum are positive */
9596    case LO_SUM:
9597      /* Store flag stores 0 or 1 */
9598    case LT: case LTU:
9599    case GT: case GTU:
9600    case LE: case LEU:
9601    case GE: case GEU:
9602    case EQ:
9603    case NE:
9604      return 1;
9605    case AND:
9606      {
9607	rtx op0 = XEXP (SET_SRC (pat), 0);
9608	rtx op1 = XEXP (SET_SRC (pat), 1);
9609	if (GET_CODE (op1) == CONST_INT)
9610	  return INTVAL (op1) >= 0;
9611	if (GET_CODE (op0) != REG)
9612	  return 0;
9613	if (sparc_check_64 (op0, insn) == 1)
9614	  return 1;
9615	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9616      }
9617    case IOR:
9618    case XOR:
9619      {
9620	rtx op0 = XEXP (SET_SRC (pat), 0);
9621	rtx op1 = XEXP (SET_SRC (pat), 1);
9622	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9623	  return 0;
9624	if (GET_CODE (op1) == CONST_INT)
9625	  return INTVAL (op1) >= 0;
9626	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9627      }
9628    case LSHIFTRT:
9629      return GET_MODE (SET_SRC (pat)) == SImode;
9630      /* Positive integers leave the high bits zero.  */
9631    case CONST_INT:
9632      return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9633    case ASHIFTRT:
9634    case SIGN_EXTEND:
9635      return - (GET_MODE (SET_SRC (pat)) == SImode);
9636    case REG:
9637      return sparc_check_64 (SET_SRC (pat), insn);
9638    default:
9639      return 0;
9640    }
9641}
9642
9643/* We _ought_ to have only one kind per function, but...  */
9644static GTY(()) rtx sparc_addr_diff_list;
9645static GTY(()) rtx sparc_addr_list;
9646
9647void
9648sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9649{
9650  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9651  if (diff)
9652    sparc_addr_diff_list
9653      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9654  else
9655    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9656}
9657
9658static void
9659sparc_output_addr_vec (rtx vec)
9660{
9661  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9662  int idx, vlen = XVECLEN (body, 0);
9663
9664#ifdef ASM_OUTPUT_ADDR_VEC_START
9665  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9666#endif
9667
9668#ifdef ASM_OUTPUT_CASE_LABEL
9669  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9670			 NEXT_INSN (lab));
9671#else
9672  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9673#endif
9674
9675  for (idx = 0; idx < vlen; idx++)
9676    {
9677      ASM_OUTPUT_ADDR_VEC_ELT
9678	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9679    }
9680
9681#ifdef ASM_OUTPUT_ADDR_VEC_END
9682  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9683#endif
9684}
9685
9686static void
9687sparc_output_addr_diff_vec (rtx vec)
9688{
9689  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9690  rtx base = XEXP (XEXP (body, 0), 0);
9691  int idx, vlen = XVECLEN (body, 1);
9692
9693#ifdef ASM_OUTPUT_ADDR_VEC_START
9694  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9695#endif
9696
9697#ifdef ASM_OUTPUT_CASE_LABEL
9698  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9699			 NEXT_INSN (lab));
9700#else
9701  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9702#endif
9703
9704  for (idx = 0; idx < vlen; idx++)
9705    {
9706      ASM_OUTPUT_ADDR_DIFF_ELT
9707        (asm_out_file,
9708         body,
9709         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9710         CODE_LABEL_NUMBER (base));
9711    }
9712
9713#ifdef ASM_OUTPUT_ADDR_VEC_END
9714  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9715#endif
9716}
9717
9718static void
9719sparc_output_deferred_case_vectors (void)
9720{
9721  rtx t;
9722  int align;
9723
9724  if (sparc_addr_list == NULL_RTX
9725      && sparc_addr_diff_list == NULL_RTX)
9726    return;
9727
9728  /* Align to cache line in the function's code section.  */
9729  switch_to_section (current_function_section ());
9730
9731  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9732  if (align > 0)
9733    ASM_OUTPUT_ALIGN (asm_out_file, align);
9734
9735  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9736    sparc_output_addr_vec (XEXP (t, 0));
9737  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9738    sparc_output_addr_diff_vec (XEXP (t, 0));
9739
9740  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9741}
9742
9743/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9744   unknown.  Return 1 if the high bits are zero, -1 if the register is
9745   sign extended.  */
9746int
9747sparc_check_64 (rtx x, rtx_insn *insn)
9748{
9749  /* If a register is set only once it is safe to ignore insns this
9750     code does not know how to handle.  The loop will either recognize
9751     the single set and return the correct value or fail to recognize
9752     it and return 0.  */
9753  int set_once = 0;
9754  rtx y = x;
9755
9756  gcc_assert (GET_CODE (x) == REG);
9757
9758  if (GET_MODE (x) == DImode)
9759    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9760
9761  if (flag_expensive_optimizations
9762      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9763    set_once = 1;
9764
9765  if (insn == 0)
9766    {
9767      if (set_once)
9768	insn = get_last_insn_anywhere ();
9769      else
9770	return 0;
9771    }
9772
9773  while ((insn = PREV_INSN (insn)))
9774    {
9775      switch (GET_CODE (insn))
9776	{
9777	case JUMP_INSN:
9778	case NOTE:
9779	  break;
9780	case CODE_LABEL:
9781	case CALL_INSN:
9782	default:
9783	  if (! set_once)
9784	    return 0;
9785	  break;
9786	case INSN:
9787	  {
9788	    rtx pat = PATTERN (insn);
9789	    if (GET_CODE (pat) != SET)
9790	      return 0;
9791	    if (rtx_equal_p (x, SET_DEST (pat)))
9792	      return set_extends (insn);
9793	    if (y && rtx_equal_p (y, SET_DEST (pat)))
9794	      return set_extends (insn);
9795	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9796	      return 0;
9797	  }
9798	}
9799    }
9800  return 0;
9801}
9802
9803/* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
9804   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
9805
9806const char *
9807output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9808{
9809  static char asm_code[60];
9810
9811  /* The scratch register is only required when the destination
9812     register is not a 64-bit global or out register.  */
9813  if (which_alternative != 2)
9814    operands[3] = operands[0];
9815
9816  /* We can only shift by constants <= 63. */
9817  if (GET_CODE (operands[2]) == CONST_INT)
9818    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9819
9820  if (GET_CODE (operands[1]) == CONST_INT)
9821    {
9822      output_asm_insn ("mov\t%1, %3", operands);
9823    }
9824  else
9825    {
9826      output_asm_insn ("sllx\t%H1, 32, %3", operands);
9827      if (sparc_check_64 (operands[1], insn) <= 0)
9828	output_asm_insn ("srl\t%L1, 0, %L1", operands);
9829      output_asm_insn ("or\t%L1, %3, %3", operands);
9830    }
9831
9832  strcpy (asm_code, opcode);
9833
9834  if (which_alternative != 2)
9835    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9836  else
9837    return
9838      strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9839}
9840
9841/* Output rtl to increment the profiler label LABELNO
9842   for profiling a function entry.  */
9843
9844void
9845sparc_profile_hook (int labelno)
9846{
9847  char buf[32];
9848  rtx lab, fun;
9849
9850  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9851  if (NO_PROFILE_COUNTERS)
9852    {
9853      emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9854    }
9855  else
9856    {
9857      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9858      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9859      emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9860    }
9861}
9862
9863#ifdef TARGET_SOLARIS
9864/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
9865
9866static void
9867sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9868				     tree decl ATTRIBUTE_UNUSED)
9869{
9870  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9871    {
9872      solaris_elf_asm_comdat_section (name, flags, decl);
9873      return;
9874    }
9875
9876  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9877
9878  if (!(flags & SECTION_DEBUG))
9879    fputs (",#alloc", asm_out_file);
9880  if (flags & SECTION_WRITE)
9881    fputs (",#write", asm_out_file);
9882  if (flags & SECTION_TLS)
9883    fputs (",#tls", asm_out_file);
9884  if (flags & SECTION_CODE)
9885    fputs (",#execinstr", asm_out_file);
9886
9887  if (flags & SECTION_NOTYPE)
9888    ;
9889  else if (flags & SECTION_BSS)
9890    fputs (",#nobits", asm_out_file);
9891  else
9892    fputs (",#progbits", asm_out_file);
9893
9894  fputc ('\n', asm_out_file);
9895}
9896#endif /* TARGET_SOLARIS */
9897
9898/* We do not allow indirect calls to be optimized into sibling calls.
9899
9900   We cannot use sibling calls when delayed branches are disabled
9901   because they will likely require the call delay slot to be filled.
9902
9903   Also, on SPARC 32-bit we cannot emit a sibling call when the
9904   current function returns a structure.  This is because the "unimp
9905   after call" convention would cause the callee to return to the
9906   wrong place.  The generic code already disallows cases where the
9907   function being called returns a structure.
9908
9909   It may seem strange how this last case could occur.  Usually there
9910   is code after the call which jumps to epilogue code which dumps the
9911   return value into the struct return area.  That ought to invalidate
9912   the sibling call right?  Well, in the C++ case we can end up passing
9913   the pointer to the struct return area to a constructor (which returns
9914   void) and then nothing else happens.  Such a sibling call would look
9915   valid without the added check here.
9916
9917   VxWorks PIC PLT entries require the global pointer to be initialized
9918   on entry.  We therefore can't emit sibling calls to them.  */
9919static bool
9920sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9921{
9922  return (decl
9923	  && flag_delayed_branch
9924	  && (TARGET_ARCH64 || ! cfun->returns_struct)
9925	  && !(TARGET_VXWORKS_RTP
9926	       && flag_pic
9927	       && !targetm.binds_local_p (decl)));
9928}
9929
9930/* libfunc renaming.  */
9931
9932static void
9933sparc_init_libfuncs (void)
9934{
9935  if (TARGET_ARCH32)
9936    {
9937      /* Use the subroutines that Sun's library provides for integer
9938	 multiply and divide.  The `*' prevents an underscore from
9939	 being prepended by the compiler. .umul is a little faster
9940	 than .mul.  */
9941      set_optab_libfunc (smul_optab, SImode, "*.umul");
9942      set_optab_libfunc (sdiv_optab, SImode, "*.div");
9943      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9944      set_optab_libfunc (smod_optab, SImode, "*.rem");
9945      set_optab_libfunc (umod_optab, SImode, "*.urem");
9946
9947      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
9948      set_optab_libfunc (add_optab, TFmode, "_Q_add");
9949      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9950      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9951      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9952      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9953
9954      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
9955	 is because with soft-float, the SFmode and DFmode sqrt
9956	 instructions will be absent, and the compiler will notice and
9957	 try to use the TFmode sqrt instruction for calls to the
9958	 builtin function sqrt, but this fails.  */
9959      if (TARGET_FPU)
9960	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9961
9962      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9963      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9964      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9965      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9966      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9967      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9968
9969      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
9970      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
9971      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
9972      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
9973
9974      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
9975      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
9976      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9977      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9978
9979      if (DITF_CONVERSION_LIBFUNCS)
9980	{
9981	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
9982	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
9983	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9984	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9985	}
9986
9987      if (SUN_CONVERSION_LIBFUNCS)
9988	{
9989	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9990	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9991	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9992	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9993	}
9994    }
9995  if (TARGET_ARCH64)
9996    {
9997      /* In the SPARC 64bit ABI, SImode multiply and divide functions
9998	 do not exist in the library.  Make sure the compiler does not
9999	 emit calls to them by accident.  (It should always use the
10000         hardware instructions.)  */
10001      set_optab_libfunc (smul_optab, SImode, 0);
10002      set_optab_libfunc (sdiv_optab, SImode, 0);
10003      set_optab_libfunc (udiv_optab, SImode, 0);
10004      set_optab_libfunc (smod_optab, SImode, 0);
10005      set_optab_libfunc (umod_optab, SImode, 0);
10006
10007      if (SUN_INTEGER_MULTIPLY_64)
10008	{
10009	  set_optab_libfunc (smul_optab, DImode, "__mul64");
10010	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
10011	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10012	  set_optab_libfunc (smod_optab, DImode, "__rem64");
10013	  set_optab_libfunc (umod_optab, DImode, "__urem64");
10014	}
10015
10016      if (SUN_CONVERSION_LIBFUNCS)
10017	{
10018	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10019	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10020	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10021	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10022	}
10023    }
10024}
10025
10026/* SPARC builtins.  */
10027enum sparc_builtins
10028{
10029  /* FPU builtins.  */
10030  SPARC_BUILTIN_LDFSR,
10031  SPARC_BUILTIN_STFSR,
10032
10033  /* VIS 1.0 builtins.  */
10034  SPARC_BUILTIN_FPACK16,
10035  SPARC_BUILTIN_FPACK32,
10036  SPARC_BUILTIN_FPACKFIX,
10037  SPARC_BUILTIN_FEXPAND,
10038  SPARC_BUILTIN_FPMERGE,
10039  SPARC_BUILTIN_FMUL8X16,
10040  SPARC_BUILTIN_FMUL8X16AU,
10041  SPARC_BUILTIN_FMUL8X16AL,
10042  SPARC_BUILTIN_FMUL8SUX16,
10043  SPARC_BUILTIN_FMUL8ULX16,
10044  SPARC_BUILTIN_FMULD8SUX16,
10045  SPARC_BUILTIN_FMULD8ULX16,
10046  SPARC_BUILTIN_FALIGNDATAV4HI,
10047  SPARC_BUILTIN_FALIGNDATAV8QI,
10048  SPARC_BUILTIN_FALIGNDATAV2SI,
10049  SPARC_BUILTIN_FALIGNDATADI,
10050  SPARC_BUILTIN_WRGSR,
10051  SPARC_BUILTIN_RDGSR,
10052  SPARC_BUILTIN_ALIGNADDR,
10053  SPARC_BUILTIN_ALIGNADDRL,
10054  SPARC_BUILTIN_PDIST,
10055  SPARC_BUILTIN_EDGE8,
10056  SPARC_BUILTIN_EDGE8L,
10057  SPARC_BUILTIN_EDGE16,
10058  SPARC_BUILTIN_EDGE16L,
10059  SPARC_BUILTIN_EDGE32,
10060  SPARC_BUILTIN_EDGE32L,
10061  SPARC_BUILTIN_FCMPLE16,
10062  SPARC_BUILTIN_FCMPLE32,
10063  SPARC_BUILTIN_FCMPNE16,
10064  SPARC_BUILTIN_FCMPNE32,
10065  SPARC_BUILTIN_FCMPGT16,
10066  SPARC_BUILTIN_FCMPGT32,
10067  SPARC_BUILTIN_FCMPEQ16,
10068  SPARC_BUILTIN_FCMPEQ32,
10069  SPARC_BUILTIN_FPADD16,
10070  SPARC_BUILTIN_FPADD16S,
10071  SPARC_BUILTIN_FPADD32,
10072  SPARC_BUILTIN_FPADD32S,
10073  SPARC_BUILTIN_FPSUB16,
10074  SPARC_BUILTIN_FPSUB16S,
10075  SPARC_BUILTIN_FPSUB32,
10076  SPARC_BUILTIN_FPSUB32S,
10077  SPARC_BUILTIN_ARRAY8,
10078  SPARC_BUILTIN_ARRAY16,
10079  SPARC_BUILTIN_ARRAY32,
10080
10081  /* VIS 2.0 builtins.  */
10082  SPARC_BUILTIN_EDGE8N,
10083  SPARC_BUILTIN_EDGE8LN,
10084  SPARC_BUILTIN_EDGE16N,
10085  SPARC_BUILTIN_EDGE16LN,
10086  SPARC_BUILTIN_EDGE32N,
10087  SPARC_BUILTIN_EDGE32LN,
10088  SPARC_BUILTIN_BMASK,
10089  SPARC_BUILTIN_BSHUFFLEV4HI,
10090  SPARC_BUILTIN_BSHUFFLEV8QI,
10091  SPARC_BUILTIN_BSHUFFLEV2SI,
10092  SPARC_BUILTIN_BSHUFFLEDI,
10093
10094  /* VIS 3.0 builtins.  */
10095  SPARC_BUILTIN_CMASK8,
10096  SPARC_BUILTIN_CMASK16,
10097  SPARC_BUILTIN_CMASK32,
10098  SPARC_BUILTIN_FCHKSM16,
10099  SPARC_BUILTIN_FSLL16,
10100  SPARC_BUILTIN_FSLAS16,
10101  SPARC_BUILTIN_FSRL16,
10102  SPARC_BUILTIN_FSRA16,
10103  SPARC_BUILTIN_FSLL32,
10104  SPARC_BUILTIN_FSLAS32,
10105  SPARC_BUILTIN_FSRL32,
10106  SPARC_BUILTIN_FSRA32,
10107  SPARC_BUILTIN_PDISTN,
10108  SPARC_BUILTIN_FMEAN16,
10109  SPARC_BUILTIN_FPADD64,
10110  SPARC_BUILTIN_FPSUB64,
10111  SPARC_BUILTIN_FPADDS16,
10112  SPARC_BUILTIN_FPADDS16S,
10113  SPARC_BUILTIN_FPSUBS16,
10114  SPARC_BUILTIN_FPSUBS16S,
10115  SPARC_BUILTIN_FPADDS32,
10116  SPARC_BUILTIN_FPADDS32S,
10117  SPARC_BUILTIN_FPSUBS32,
10118  SPARC_BUILTIN_FPSUBS32S,
10119  SPARC_BUILTIN_FUCMPLE8,
10120  SPARC_BUILTIN_FUCMPNE8,
10121  SPARC_BUILTIN_FUCMPGT8,
10122  SPARC_BUILTIN_FUCMPEQ8,
10123  SPARC_BUILTIN_FHADDS,
10124  SPARC_BUILTIN_FHADDD,
10125  SPARC_BUILTIN_FHSUBS,
10126  SPARC_BUILTIN_FHSUBD,
10127  SPARC_BUILTIN_FNHADDS,
10128  SPARC_BUILTIN_FNHADDD,
10129  SPARC_BUILTIN_UMULXHI,
10130  SPARC_BUILTIN_XMULX,
10131  SPARC_BUILTIN_XMULXHI,
10132
10133  /* VIS 4.0 builtins.  */
10134  SPARC_BUILTIN_FPADD8,
10135  SPARC_BUILTIN_FPADDS8,
10136  SPARC_BUILTIN_FPADDUS8,
10137  SPARC_BUILTIN_FPADDUS16,
10138  SPARC_BUILTIN_FPCMPLE8,
10139  SPARC_BUILTIN_FPCMPGT8,
10140  SPARC_BUILTIN_FPCMPULE16,
10141  SPARC_BUILTIN_FPCMPUGT16,
10142  SPARC_BUILTIN_FPCMPULE32,
10143  SPARC_BUILTIN_FPCMPUGT32,
10144  SPARC_BUILTIN_FPMAX8,
10145  SPARC_BUILTIN_FPMAX16,
10146  SPARC_BUILTIN_FPMAX32,
10147  SPARC_BUILTIN_FPMAXU8,
10148  SPARC_BUILTIN_FPMAXU16,
10149  SPARC_BUILTIN_FPMAXU32,
10150  SPARC_BUILTIN_FPMIN8,
10151  SPARC_BUILTIN_FPMIN16,
10152  SPARC_BUILTIN_FPMIN32,
10153  SPARC_BUILTIN_FPMINU8,
10154  SPARC_BUILTIN_FPMINU16,
10155  SPARC_BUILTIN_FPMINU32,
10156  SPARC_BUILTIN_FPSUB8,
10157  SPARC_BUILTIN_FPSUBS8,
10158  SPARC_BUILTIN_FPSUBUS8,
10159  SPARC_BUILTIN_FPSUBUS16,
10160
10161  SPARC_BUILTIN_MAX
10162};
10163
10164static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10165static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10166
10167/* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
10168   function decl or NULL_TREE if the builtin was not added.  */
10169
10170static tree
10171def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10172	     tree type)
10173{
10174  tree t
10175    = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10176
10177  if (t)
10178    {
10179      sparc_builtins[code] = t;
10180      sparc_builtins_icode[code] = icode;
10181    }
10182
10183  return t;
10184}
10185
10186/* Likewise, but also marks the function as "const".  */
10187
10188static tree
10189def_builtin_const (const char *name, enum insn_code icode,
10190		   enum sparc_builtins code, tree type)
10191{
10192  tree t = def_builtin (name, icode, code, type);
10193
10194  if (t)
10195    TREE_READONLY (t) = 1;
10196
10197  return t;
10198}
10199
10200/* Implement the TARGET_INIT_BUILTINS target hook.
10201   Create builtin functions for special SPARC instructions.  */
10202
10203static void
10204sparc_init_builtins (void)
10205{
10206  if (TARGET_FPU)
10207    sparc_fpu_init_builtins ();
10208
10209  if (TARGET_VIS)
10210    sparc_vis_init_builtins ();
10211}
10212
10213/* Create builtin functions for FPU instructions.  */
10214
10215static void
10216sparc_fpu_init_builtins (void)
10217{
10218  tree ftype
10219    = build_function_type_list (void_type_node,
10220				build_pointer_type (unsigned_type_node), 0);
10221  def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10222	       SPARC_BUILTIN_LDFSR, ftype);
10223  def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10224	       SPARC_BUILTIN_STFSR, ftype);
10225}
10226
10227/* Create builtin functions for VIS instructions.  */
10228
10229static void
10230sparc_vis_init_builtins (void)
10231{
10232  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10233  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10234  tree v4hi = build_vector_type (intHI_type_node, 4);
10235  tree v2hi = build_vector_type (intHI_type_node, 2);
10236  tree v2si = build_vector_type (intSI_type_node, 2);
10237  tree v1si = build_vector_type (intSI_type_node, 1);
10238
10239  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10240  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10241  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10242  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10243  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10244  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10245  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10246  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10247  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10248  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10249  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10250  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10251  tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10252  tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10253  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10254							 v8qi, v8qi,
10255							 intDI_type_node, 0);
10256  tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10257						      v8qi, v8qi, 0);
10258  tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10259						      v8qi, v8qi, 0);
10260  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10261						  intDI_type_node,
10262						  intDI_type_node, 0);
10263  tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10264						  intSI_type_node,
10265						  intSI_type_node, 0);
10266  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10267		        			    ptr_type_node,
10268					            intSI_type_node, 0);
10269  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10270		        			    ptr_type_node,
10271					            intDI_type_node, 0);
10272  tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10273		        			    ptr_type_node,
10274					            ptr_type_node, 0);
10275  tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10276		        			    ptr_type_node,
10277					            ptr_type_node, 0);
10278  tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10279						      v4hi, v4hi, 0);
10280  tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10281						      v2si, v2si, 0);
10282  tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10283						      v4hi, v4hi, 0);
10284  tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10285						      v2si, v2si, 0);
10286  tree void_ftype_di = build_function_type_list (void_type_node,
10287						 intDI_type_node, 0);
10288  tree di_ftype_void = build_function_type_list (intDI_type_node,
10289						 void_type_node, 0);
10290  tree void_ftype_si = build_function_type_list (void_type_node,
10291						 intSI_type_node, 0);
10292  tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10293						  float_type_node,
10294						  float_type_node, 0);
10295  tree df_ftype_df_df = build_function_type_list (double_type_node,
10296						  double_type_node,
10297						  double_type_node, 0);
10298
10299  /* Packing and expanding vectors.  */
10300  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10301	       SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10302  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10303	       SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10304  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10305	       SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10306  def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10307		     SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10308  def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10309		     SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10310
10311  /* Multiplications.  */
10312  def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10313		     SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10314  def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10315		     SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10316  def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10317		     SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10318  def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10319		     SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10320  def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10321		     SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10322  def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10323		     SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10324  def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10325		     SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10326
10327  /* Data aligning.  */
10328  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10329	       SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10330  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10331	       SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10332  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10333	       SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10334  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10335	       SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10336
10337  def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10338	       SPARC_BUILTIN_WRGSR, void_ftype_di);
10339  def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10340	       SPARC_BUILTIN_RDGSR, di_ftype_void);
10341
10342  if (TARGET_ARCH64)
10343    {
10344      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10345		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10346      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10347		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10348    }
10349  else
10350    {
10351      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10352		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10353      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10354		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10355    }
10356
10357  /* Pixel distance.  */
10358  def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10359		     SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10360
10361  /* Edge handling.  */
10362  if (TARGET_ARCH64)
10363    {
10364      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10365			 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10366      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10367			 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10368      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10369			 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10370      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10371			 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10372      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10373			 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10374      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10375			 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10376    }
10377  else
10378    {
10379      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10380			 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10381      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10382			 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10383      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10384			 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10385      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10386			 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10387      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10388			 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10389      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10390			 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10391    }
10392
10393  /* Pixel compare.  */
10394  if (TARGET_ARCH64)
10395    {
10396      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10397			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10398      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10399			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10400      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10401			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10402      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10403			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10404      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10405			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10406      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10407			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10408      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10409			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10410      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10411			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10412    }
10413  else
10414    {
10415      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10416			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10417      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10418			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10419      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10420			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10421      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10422			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10423      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10424			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10425      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10426			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10427      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10428			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10429      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10430			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10431    }
10432
10433  /* Addition and subtraction.  */
10434  def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10435		     SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10436  def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10437		     SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10438  def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10439		     SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10440  def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10441		     SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10442  def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10443		     SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10444  def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10445		     SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10446  def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10447		     SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10448  def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10449		     SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10450
10451  /* Three-dimensional array addressing.  */
10452  if (TARGET_ARCH64)
10453    {
10454      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10455			 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10456      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10457			 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10458      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10459			 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10460    }
10461  else
10462    {
10463      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10464			 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10465      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10466			 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10467      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10468			 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10469    }
10470
10471  if (TARGET_VIS2)
10472    {
10473      /* Edge handling.  */
10474      if (TARGET_ARCH64)
10475	{
10476	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10477			     SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10478	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10479			     SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10480	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10481			     SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10482	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10483			     SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10484	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10485			     SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10486	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10487			     SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10488	}
10489      else
10490	{
10491	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10492			     SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10493	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10494			     SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10495	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10496			     SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10497	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10498			     SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10499	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10500			     SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10501	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10502			     SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10503	}
10504
10505      /* Byte mask and shuffle.  */
10506      if (TARGET_ARCH64)
10507	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10508		     SPARC_BUILTIN_BMASK, di_ftype_di_di);
10509      else
10510	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10511		     SPARC_BUILTIN_BMASK, si_ftype_si_si);
10512      def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10513		   SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10514      def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10515		   SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10516      def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10517		   SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10518      def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10519		   SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10520    }
10521
10522  if (TARGET_VIS3)
10523    {
10524      if (TARGET_ARCH64)
10525	{
10526	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10527		       SPARC_BUILTIN_CMASK8, void_ftype_di);
10528	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10529		       SPARC_BUILTIN_CMASK16, void_ftype_di);
10530	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10531		       SPARC_BUILTIN_CMASK32, void_ftype_di);
10532	}
10533      else
10534	{
10535	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10536		       SPARC_BUILTIN_CMASK8, void_ftype_si);
10537	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10538		       SPARC_BUILTIN_CMASK16, void_ftype_si);
10539	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10540		       SPARC_BUILTIN_CMASK32, void_ftype_si);
10541	}
10542
10543      def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10544			 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10545
10546      def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10547			 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10548      def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10549			 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10550      def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10551			 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10552      def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10553			 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10554      def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10555			 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10556      def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10557			 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10558      def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10559			 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10560      def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10561			 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10562
10563      if (TARGET_ARCH64)
10564	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10565			   SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10566      else
10567	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10568			   SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10569
10570      def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10571			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10572      def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10573			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10574      def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10575			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10576
10577      def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10578			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10579      def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10580			 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10581      def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10582			 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10583      def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10584			 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10585      def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10586			 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10587      def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10588			 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10589      def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10590			 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10591      def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10592			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10593
10594      if (TARGET_ARCH64)
10595	{
10596	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10597			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10598	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10599			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10600	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10601			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10602	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10603			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10604	}
10605      else
10606	{
10607	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10608			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10609	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10610			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10611	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10612			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10613	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10614			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10615	}
10616
10617      def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10618			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10619      def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10620			 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10621      def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10622			 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10623      def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10624			 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10625      def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10626			 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10627      def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10628			 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10629
10630      def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10631			 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10632      def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10633			 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10634      def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10635			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10636    }
10637
10638  if (TARGET_VIS4)
10639    {
10640      def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10641			 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10642      def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10643			 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10644      def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10645			 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10646      def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10647			 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10648
10649
10650      if (TARGET_ARCH64)
10651	{
10652	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10653			     SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10654	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10655			     SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10656	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10657			     SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10658	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10659			     SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10660	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10661			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10662	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10663			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10664	}
10665      else
10666	{
10667	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10668			     SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10669	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10670			     SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10671	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10672			     SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10673	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10674			     SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10675	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10676			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10677	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10678			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10679	}
10680
10681      def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10682			 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10683      def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10684			 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10685      def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10686			 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10687      def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10688			 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10689      def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10690			 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10691      def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10692			 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10693      def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10694			 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10695      def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10696			 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10697      def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10698			 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10699      def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10700			 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10701      def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10702			 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10703      def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10704			 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10705      def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10706			 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10707      def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10708			 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10709      def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10710			 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10711      def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10712			 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10713    }
10714}
10715
10716/* Implement TARGET_BUILTIN_DECL hook.  */
10717
10718static tree
10719sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10720{
10721  if (code >= SPARC_BUILTIN_MAX)
10722    return error_mark_node;
10723
10724  return sparc_builtins[code];
10725}
10726
10727/* Implemented TARGET_EXPAND_BUILTIN hook.  */
10728
10729static rtx
10730sparc_expand_builtin (tree exp, rtx target,
10731		      rtx subtarget ATTRIBUTE_UNUSED,
10732		      machine_mode tmode ATTRIBUTE_UNUSED,
10733		      int ignore ATTRIBUTE_UNUSED)
10734{
10735  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10736  enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10737  enum insn_code icode = sparc_builtins_icode[code];
10738  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10739  call_expr_arg_iterator iter;
10740  int arg_count = 0;
10741  rtx pat, op[4];
10742  tree arg;
10743
10744  if (nonvoid)
10745    {
10746      machine_mode tmode = insn_data[icode].operand[0].mode;
10747      if (!target
10748	  || GET_MODE (target) != tmode
10749	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10750	op[0] = gen_reg_rtx (tmode);
10751      else
10752	op[0] = target;
10753    }
10754
10755  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10756    {
10757      const struct insn_operand_data *insn_op;
10758      int idx;
10759
10760      if (arg == error_mark_node)
10761	return NULL_RTX;
10762
10763      arg_count++;
10764      idx = arg_count - !nonvoid;
10765      insn_op = &insn_data[icode].operand[idx];
10766      op[arg_count] = expand_normal (arg);
10767
10768      if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10769	{
10770	  if (!address_operand (op[arg_count], SImode))
10771	    {
10772	      op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10773	      op[arg_count] = copy_addr_to_reg (op[arg_count]);
10774	    }
10775	  op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10776	}
10777
10778      else if (insn_op->mode == V1DImode
10779	       && GET_MODE (op[arg_count]) == DImode)
10780	op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10781
10782      else if (insn_op->mode == V1SImode
10783	       && GET_MODE (op[arg_count]) == SImode)
10784	op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10785
10786      if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10787							insn_op->mode))
10788	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10789    }
10790
10791  switch (arg_count)
10792    {
10793    case 0:
10794      pat = GEN_FCN (icode) (op[0]);
10795      break;
10796    case 1:
10797      if (nonvoid)
10798	pat = GEN_FCN (icode) (op[0], op[1]);
10799      else
10800	pat = GEN_FCN (icode) (op[1]);
10801      break;
10802    case 2:
10803      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10804      break;
10805    case 3:
10806      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10807      break;
10808    default:
10809      gcc_unreachable ();
10810    }
10811
10812  if (!pat)
10813    return NULL_RTX;
10814
10815  emit_insn (pat);
10816
10817  return (nonvoid ? op[0] : const0_rtx);
10818}
10819
10820/* Return the upper 16 bits of the 8x16 multiplication.  */
10821
10822static int
10823sparc_vis_mul8x16 (int e8, int e16)
10824{
10825  return (e8 * e16 + 128) / 256;
10826}
10827
10828/* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10829   the result into the array N_ELTS, whose elements are of INNER_TYPE.  */
10830
10831static void
10832sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10833			  tree inner_type, tree cst0, tree cst1)
10834{
10835  unsigned i, num = VECTOR_CST_NELTS (cst0);
10836  int scale;
10837
10838  switch (fncode)
10839    {
10840    case SPARC_BUILTIN_FMUL8X16:
10841      for (i = 0; i < num; ++i)
10842	{
10843	  int val
10844	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10845				 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10846	  n_elts[i] = build_int_cst (inner_type, val);
10847	}
10848      break;
10849
10850    case SPARC_BUILTIN_FMUL8X16AU:
10851      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10852
10853      for (i = 0; i < num; ++i)
10854	{
10855	  int val
10856	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10857				 scale);
10858	  n_elts[i] = build_int_cst (inner_type, val);
10859	}
10860      break;
10861
10862    case SPARC_BUILTIN_FMUL8X16AL:
10863      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10864
10865      for (i = 0; i < num; ++i)
10866	{
10867	  int val
10868	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10869				 scale);
10870	  n_elts[i] = build_int_cst (inner_type, val);
10871	}
10872      break;
10873
10874    default:
10875      gcc_unreachable ();
10876    }
10877}
10878
10879/* Implement TARGET_FOLD_BUILTIN hook.
10880
10881   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
10882   result of the function call is ignored.  NULL_TREE is returned if the
10883   function could not be folded.  */
10884
10885static tree
10886sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10887		    tree *args, bool ignore)
10888{
10889  enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10890  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10891  tree arg0, arg1, arg2;
10892
10893  if (ignore)
10894    switch (code)
10895      {
10896      case SPARC_BUILTIN_LDFSR:
10897      case SPARC_BUILTIN_STFSR:
10898      case SPARC_BUILTIN_ALIGNADDR:
10899      case SPARC_BUILTIN_WRGSR:
10900      case SPARC_BUILTIN_BMASK:
10901      case SPARC_BUILTIN_CMASK8:
10902      case SPARC_BUILTIN_CMASK16:
10903      case SPARC_BUILTIN_CMASK32:
10904	break;
10905
10906      default:
10907	return build_zero_cst (rtype);
10908      }
10909
10910  switch (code)
10911    {
10912    case SPARC_BUILTIN_FEXPAND:
10913      arg0 = args[0];
10914      STRIP_NOPS (arg0);
10915
10916      if (TREE_CODE (arg0) == VECTOR_CST)
10917	{
10918	  tree inner_type = TREE_TYPE (rtype);
10919	  tree *n_elts;
10920	  unsigned i;
10921
10922	  n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10923	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10924	    n_elts[i] = build_int_cst (inner_type,
10925				       TREE_INT_CST_LOW
10926				         (VECTOR_CST_ELT (arg0, i)) << 4);
10927	  return build_vector (rtype, n_elts);
10928	}
10929      break;
10930
10931    case SPARC_BUILTIN_FMUL8X16:
10932    case SPARC_BUILTIN_FMUL8X16AU:
10933    case SPARC_BUILTIN_FMUL8X16AL:
10934      arg0 = args[0];
10935      arg1 = args[1];
10936      STRIP_NOPS (arg0);
10937      STRIP_NOPS (arg1);
10938
10939      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10940	{
10941	  tree inner_type = TREE_TYPE (rtype);
10942	  tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10943	  sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10944	  return build_vector (rtype, n_elts);
10945	}
10946      break;
10947
10948    case SPARC_BUILTIN_FPMERGE:
10949      arg0 = args[0];
10950      arg1 = args[1];
10951      STRIP_NOPS (arg0);
10952      STRIP_NOPS (arg1);
10953
10954      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10955	{
10956	  tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10957	  unsigned i;
10958	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10959	    {
10960	      n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10961	      n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10962	    }
10963
10964	  return build_vector (rtype, n_elts);
10965	}
10966      break;
10967
10968    case SPARC_BUILTIN_PDIST:
10969    case SPARC_BUILTIN_PDISTN:
10970      arg0 = args[0];
10971      arg1 = args[1];
10972      STRIP_NOPS (arg0);
10973      STRIP_NOPS (arg1);
10974      if (code == SPARC_BUILTIN_PDIST)
10975	{
10976	  arg2 = args[2];
10977	  STRIP_NOPS (arg2);
10978	}
10979      else
10980	arg2 = integer_zero_node;
10981
10982      if (TREE_CODE (arg0) == VECTOR_CST
10983	  && TREE_CODE (arg1) == VECTOR_CST
10984	  && TREE_CODE (arg2) == INTEGER_CST)
10985	{
10986	  bool overflow = false;
10987	  widest_int result = wi::to_widest (arg2);
10988	  widest_int tmp;
10989	  unsigned i;
10990
10991	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10992	    {
10993	      tree e0 = VECTOR_CST_ELT (arg0, i);
10994	      tree e1 = VECTOR_CST_ELT (arg1, i);
10995
10996	      bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10997
10998	      tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10999	      tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11000	      if (wi::neg_p (tmp))
11001		tmp = wi::neg (tmp, &neg2_ovf);
11002	      else
11003		neg2_ovf = false;
11004	      result = wi::add (result, tmp, SIGNED, &add2_ovf);
11005	      overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11006	    }
11007
11008	  gcc_assert (!overflow);
11009
11010	  return wide_int_to_tree (rtype, result);
11011	}
11012
11013    default:
11014      break;
11015    }
11016
11017  return NULL_TREE;
11018}
11019
11020/* ??? This duplicates information provided to the compiler by the
11021   ??? scheduler description.  Some day, teach genautomata to output
11022   ??? the latencies and then CSE will just use that.  */
11023
11024static bool
11025sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11026		 int opno ATTRIBUTE_UNUSED,
11027		 int *total, bool speed ATTRIBUTE_UNUSED)
11028{
11029  int code = GET_CODE (x);
11030  bool float_mode_p = FLOAT_MODE_P (mode);
11031
11032  switch (code)
11033    {
11034    case CONST_INT:
11035      if (SMALL_INT (x))
11036	*total = 0;
11037      else
11038	*total = 2;
11039      return true;
11040
11041    case CONST_WIDE_INT:
11042      *total = 0;
11043      if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11044	*total += 2;
11045      if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11046	*total += 2;
11047      return true;
11048
11049    case HIGH:
11050      *total = 2;
11051      return true;
11052
11053    case CONST:
11054    case LABEL_REF:
11055    case SYMBOL_REF:
11056      *total = 4;
11057      return true;
11058
11059    case CONST_DOUBLE:
11060      *total = 8;
11061      return true;
11062
11063    case MEM:
11064      /* If outer-code was a sign or zero extension, a cost
11065	 of COSTS_N_INSNS (1) was already added in.  This is
11066	 why we are subtracting it back out.  */
11067      if (outer_code == ZERO_EXTEND)
11068	{
11069	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11070	}
11071      else if (outer_code == SIGN_EXTEND)
11072	{
11073	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11074	}
11075      else if (float_mode_p)
11076	{
11077	  *total = sparc_costs->float_load;
11078	}
11079      else
11080	{
11081	  *total = sparc_costs->int_load;
11082	}
11083
11084      return true;
11085
11086    case PLUS:
11087    case MINUS:
11088      if (float_mode_p)
11089	*total = sparc_costs->float_plusminus;
11090      else
11091	*total = COSTS_N_INSNS (1);
11092      return false;
11093
11094    case FMA:
11095      {
11096	rtx sub;
11097
11098	gcc_assert (float_mode_p);
11099	*total = sparc_costs->float_mul;
11100
11101	sub = XEXP (x, 0);
11102	if (GET_CODE (sub) == NEG)
11103	  sub = XEXP (sub, 0);
11104	*total += rtx_cost (sub, mode, FMA, 0, speed);
11105
11106	sub = XEXP (x, 2);
11107	if (GET_CODE (sub) == NEG)
11108	  sub = XEXP (sub, 0);
11109	*total += rtx_cost (sub, mode, FMA, 2, speed);
11110	return true;
11111      }
11112
11113    case MULT:
11114      if (float_mode_p)
11115	*total = sparc_costs->float_mul;
11116      else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11117	*total = COSTS_N_INSNS (25);
11118      else
11119	{
11120	  int bit_cost;
11121
11122	  bit_cost = 0;
11123	  if (sparc_costs->int_mul_bit_factor)
11124	    {
11125	      int nbits;
11126
11127	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11128		{
11129		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11130		  for (nbits = 0; value != 0; value &= value - 1)
11131		    nbits++;
11132		}
11133	      else
11134		nbits = 7;
11135
11136	      if (nbits < 3)
11137		nbits = 3;
11138	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11139	      bit_cost = COSTS_N_INSNS (bit_cost);
11140	    }
11141
11142	  if (mode == DImode || !TARGET_HARD_MUL)
11143	    *total = sparc_costs->int_mulX + bit_cost;
11144	  else
11145	    *total = sparc_costs->int_mul + bit_cost;
11146	}
11147      return false;
11148
11149    case ASHIFT:
11150    case ASHIFTRT:
11151    case LSHIFTRT:
11152      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11153      return false;
11154
11155    case DIV:
11156    case UDIV:
11157    case MOD:
11158    case UMOD:
11159      if (float_mode_p)
11160	{
11161	  if (mode == DFmode)
11162	    *total = sparc_costs->float_div_df;
11163	  else
11164	    *total = sparc_costs->float_div_sf;
11165	}
11166      else
11167	{
11168	  if (mode == DImode)
11169	    *total = sparc_costs->int_divX;
11170	  else
11171	    *total = sparc_costs->int_div;
11172	}
11173      return false;
11174
11175    case NEG:
11176      if (! float_mode_p)
11177	{
11178	  *total = COSTS_N_INSNS (1);
11179	  return false;
11180	}
11181      /* FALLTHRU */
11182
11183    case ABS:
11184    case FLOAT:
11185    case UNSIGNED_FLOAT:
11186    case FIX:
11187    case UNSIGNED_FIX:
11188    case FLOAT_EXTEND:
11189    case FLOAT_TRUNCATE:
11190      *total = sparc_costs->float_move;
11191      return false;
11192
11193    case SQRT:
11194      if (mode == DFmode)
11195	*total = sparc_costs->float_sqrt_df;
11196      else
11197	*total = sparc_costs->float_sqrt_sf;
11198      return false;
11199
11200    case COMPARE:
11201      if (float_mode_p)
11202	*total = sparc_costs->float_cmp;
11203      else
11204	*total = COSTS_N_INSNS (1);
11205      return false;
11206
11207    case IF_THEN_ELSE:
11208      if (float_mode_p)
11209	*total = sparc_costs->float_cmove;
11210      else
11211	*total = sparc_costs->int_cmove;
11212      return false;
11213
11214    case IOR:
11215      /* Handle the NAND vector patterns.  */
11216      if (sparc_vector_mode_supported_p (mode)
11217	  && GET_CODE (XEXP (x, 0)) == NOT
11218	  && GET_CODE (XEXP (x, 1)) == NOT)
11219	{
11220	  *total = COSTS_N_INSNS (1);
11221	  return true;
11222	}
11223      else
11224        return false;
11225
11226    default:
11227      return false;
11228    }
11229}
11230
11231/* Return true if CLASS is either GENERAL_REGS or I64_REGS.  */
11232
11233static inline bool
11234general_or_i64_p (reg_class_t rclass)
11235{
11236  return (rclass == GENERAL_REGS || rclass == I64_REGS);
11237}
11238
11239/* Implement TARGET_REGISTER_MOVE_COST.  */
11240
11241static int
11242sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11243			  reg_class_t from, reg_class_t to)
11244{
11245  bool need_memory = false;
11246
11247  if (from == FPCC_REGS || to == FPCC_REGS)
11248    need_memory = true;
11249  else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11250	   || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11251    {
11252      if (TARGET_VIS3)
11253	{
11254	  int size = GET_MODE_SIZE (mode);
11255	  if (size == 8 || size == 4)
11256	    {
11257	      if (! TARGET_ARCH32 || size == 4)
11258		return 4;
11259	      else
11260		return 6;
11261	    }
11262	}
11263      need_memory = true;
11264    }
11265
11266  if (need_memory)
11267    {
11268      if (sparc_cpu == PROCESSOR_ULTRASPARC
11269	  || sparc_cpu == PROCESSOR_ULTRASPARC3
11270	  || sparc_cpu == PROCESSOR_NIAGARA
11271	  || sparc_cpu == PROCESSOR_NIAGARA2
11272	  || sparc_cpu == PROCESSOR_NIAGARA3
11273	  || sparc_cpu == PROCESSOR_NIAGARA4
11274	  || sparc_cpu == PROCESSOR_NIAGARA7)
11275	return 12;
11276
11277      return 6;
11278    }
11279
11280  return 2;
11281}
11282
11283/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11284   This is achieved by means of a manual dynamic stack space allocation in
11285   the current frame.  We make the assumption that SEQ doesn't contain any
11286   function calls, with the possible exception of calls to the GOT helper.  */
11287
11288static void
11289emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11290{
11291  /* We must preserve the lowest 16 words for the register save area.  */
11292  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11293  /* We really need only 2 words of fresh stack space.  */
11294  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11295
11296  rtx slot
11297    = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11298					     SPARC_STACK_BIAS + offset));
11299
11300  emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11301  emit_insn (gen_rtx_SET (slot, reg));
11302  if (reg2)
11303    emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11304			    reg2));
11305  emit_insn (seq);
11306  if (reg2)
11307    emit_insn (gen_rtx_SET (reg2,
11308			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
11309  emit_insn (gen_rtx_SET (reg, slot));
11310  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11311}
11312
11313/* Output the assembler code for a thunk function.  THUNK_DECL is the
11314   declaration for the thunk function itself, FUNCTION is the decl for
11315   the target function.  DELTA is an immediate constant offset to be
11316   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
11317   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
11318
11319static void
11320sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11321		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11322		       tree function)
11323{
11324  rtx this_rtx, funexp;
11325  rtx_insn *insn;
11326  unsigned int int_arg_first;
11327
11328  reload_completed = 1;
11329  epilogue_completed = 1;
11330
11331  emit_note (NOTE_INSN_PROLOGUE_END);
11332
11333  if (TARGET_FLAT)
11334    {
11335      sparc_leaf_function_p = 1;
11336
11337      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11338    }
11339  else if (flag_delayed_branch)
11340    {
11341      /* We will emit a regular sibcall below, so we need to instruct
11342	 output_sibcall that we are in a leaf function.  */
11343      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11344
11345      /* This will cause final.c to invoke leaf_renumber_regs so we
11346	 must behave as if we were in a not-yet-leafified function.  */
11347      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11348    }
11349  else
11350    {
11351      /* We will emit the sibcall manually below, so we will need to
11352	 manually spill non-leaf registers.  */
11353      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11354
11355      /* We really are in a leaf function.  */
11356      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11357    }
11358
11359  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
11360     returns a structure, the structure return pointer is there instead.  */
11361  if (TARGET_ARCH64
11362      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11363    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11364  else
11365    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11366
11367  /* Add DELTA.  When possible use a plain add, otherwise load it into
11368     a register first.  */
11369  if (delta)
11370    {
11371      rtx delta_rtx = GEN_INT (delta);
11372
11373      if (! SPARC_SIMM13_P (delta))
11374	{
11375	  rtx scratch = gen_rtx_REG (Pmode, 1);
11376	  emit_move_insn (scratch, delta_rtx);
11377	  delta_rtx = scratch;
11378	}
11379
11380      /* THIS_RTX += DELTA.  */
11381      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11382    }
11383
11384  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
11385  if (vcall_offset)
11386    {
11387      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11388      rtx scratch = gen_rtx_REG (Pmode, 1);
11389
11390      gcc_assert (vcall_offset < 0);
11391
11392      /* SCRATCH = *THIS_RTX.  */
11393      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11394
11395      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
11396	 may not have any available scratch register at this point.  */
11397      if (SPARC_SIMM13_P (vcall_offset))
11398	;
11399      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
11400      else if (! fixed_regs[5]
11401	       /* The below sequence is made up of at least 2 insns,
11402		  while the default method may need only one.  */
11403	       && vcall_offset < -8192)
11404	{
11405	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
11406	  emit_move_insn (scratch2, vcall_offset_rtx);
11407	  vcall_offset_rtx = scratch2;
11408	}
11409      else
11410	{
11411	  rtx increment = GEN_INT (-4096);
11412
11413	  /* VCALL_OFFSET is a negative number whose typical range can be
11414	     estimated as -32768..0 in 32-bit mode.  In almost all cases
11415	     it is therefore cheaper to emit multiple add insns than
11416	     spilling and loading the constant into a register (at least
11417	     6 insns).  */
11418	  while (! SPARC_SIMM13_P (vcall_offset))
11419	    {
11420	      emit_insn (gen_add2_insn (scratch, increment));
11421	      vcall_offset += 4096;
11422	    }
11423	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11424	}
11425
11426      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
11427      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11428					    gen_rtx_PLUS (Pmode,
11429							  scratch,
11430							  vcall_offset_rtx)));
11431
11432      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
11433      emit_insn (gen_add2_insn (this_rtx, scratch));
11434    }
11435
11436  /* Generate a tail call to the target function.  */
11437  if (! TREE_USED (function))
11438    {
11439      assemble_external (function);
11440      TREE_USED (function) = 1;
11441    }
11442  funexp = XEXP (DECL_RTL (function), 0);
11443
11444  if (flag_delayed_branch)
11445    {
11446      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11447      insn = emit_call_insn (gen_sibcall (funexp));
11448      SIBLING_CALL_P (insn) = 1;
11449    }
11450  else
11451    {
11452      /* The hoops we have to jump through in order to generate a sibcall
11453	 without using delay slots...  */
11454      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11455
11456      if (flag_pic)
11457        {
11458	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
11459	  start_sequence ();
11460	  load_got_register ();  /* clobbers %o7 */
11461	  scratch = sparc_legitimize_pic_address (funexp, scratch);
11462	  seq = get_insns ();
11463	  end_sequence ();
11464	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11465	}
11466      else if (TARGET_ARCH32)
11467	{
11468	  emit_insn (gen_rtx_SET (scratch,
11469				  gen_rtx_HIGH (SImode, funexp)));
11470	  emit_insn (gen_rtx_SET (scratch,
11471				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
11472	}
11473      else  /* TARGET_ARCH64 */
11474        {
11475	  switch (sparc_cmodel)
11476	    {
11477	    case CM_MEDLOW:
11478	    case CM_MEDMID:
11479	      /* The destination can serve as a temporary.  */
11480	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11481	      break;
11482
11483	    case CM_MEDANY:
11484	    case CM_EMBMEDANY:
11485	      /* The destination cannot serve as a temporary.  */
11486	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
11487	      start_sequence ();
11488	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11489	      seq = get_insns ();
11490	      end_sequence ();
11491	      emit_and_preserve (seq, spill_reg, 0);
11492	      break;
11493
11494	    default:
11495	      gcc_unreachable ();
11496	    }
11497	}
11498
11499      emit_jump_insn (gen_indirect_jump (scratch));
11500    }
11501
11502  emit_barrier ();
11503
11504  /* Run just enough of rest_of_compilation to get the insns emitted.
11505     There's not really enough bulk here to make other passes such as
11506     instruction scheduling worth while.  Note that use_thunk calls
11507     assemble_start_function and assemble_end_function.  */
11508  insn = get_insns ();
11509  shorten_branches (insn);
11510  final_start_function (insn, file, 1);
11511  final (insn, file, 1);
11512  final_end_function ();
11513
11514  reload_completed = 0;
11515  epilogue_completed = 0;
11516}
11517
11518/* Return true if sparc_output_mi_thunk would be able to output the
11519   assembler code for the thunk function specified by the arguments
11520   it is passed, and false otherwise.  */
11521static bool
11522sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11523			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11524			   HOST_WIDE_INT vcall_offset,
11525			   const_tree function ATTRIBUTE_UNUSED)
11526{
11527  /* Bound the loop used in the default method above.  */
11528  return (vcall_offset >= -32768 || ! fixed_regs[5]);
11529}
11530
11531/* How to allocate a 'struct machine_function'.  */
11532
11533static struct machine_function *
11534sparc_init_machine_status (void)
11535{
11536  return ggc_cleared_alloc<machine_function> ();
11537}
11538
11539/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11540   We need to emit DTP-relative relocations.  */
11541
11542static void
11543sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11544{
11545  switch (size)
11546    {
11547    case 4:
11548      fputs ("\t.word\t%r_tls_dtpoff32(", file);
11549      break;
11550    case 8:
11551      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11552      break;
11553    default:
11554      gcc_unreachable ();
11555    }
11556  output_addr_const (file, x);
11557  fputs (")", file);
11558}
11559
11560/* Do whatever processing is required at the end of a file.  */
11561
11562static void
11563sparc_file_end (void)
11564{
11565  /* If we need to emit the special GOT helper function, do so now.  */
11566  if (got_helper_rtx)
11567    {
11568      const char *name = XSTR (got_helper_rtx, 0);
11569      const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11570#ifdef DWARF2_UNWIND_INFO
11571      bool do_cfi;
11572#endif
11573
11574      if (USE_HIDDEN_LINKONCE)
11575	{
11576	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11577				  get_identifier (name),
11578				  build_function_type_list (void_type_node,
11579                                                            NULL_TREE));
11580	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11581					   NULL_TREE, void_type_node);
11582	  TREE_PUBLIC (decl) = 1;
11583	  TREE_STATIC (decl) = 1;
11584	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11585	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11586	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
11587	  resolve_unique_section (decl, 0, flag_function_sections);
11588	  allocate_struct_function (decl, true);
11589	  cfun->is_thunk = 1;
11590	  current_function_decl = decl;
11591	  init_varasm_status ();
11592	  assemble_start_function (decl, name);
11593	}
11594      else
11595	{
11596	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11597          switch_to_section (text_section);
11598	  if (align > 0)
11599	    ASM_OUTPUT_ALIGN (asm_out_file, align);
11600	  ASM_OUTPUT_LABEL (asm_out_file, name);
11601	}
11602
11603#ifdef DWARF2_UNWIND_INFO
11604      do_cfi = dwarf2out_do_cfi_asm ();
11605      if (do_cfi)
11606	fprintf (asm_out_file, "\t.cfi_startproc\n");
11607#endif
11608      if (flag_delayed_branch)
11609	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11610		 reg_name, reg_name);
11611      else
11612	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11613		 reg_name, reg_name);
11614#ifdef DWARF2_UNWIND_INFO
11615      if (do_cfi)
11616	fprintf (asm_out_file, "\t.cfi_endproc\n");
11617#endif
11618    }
11619
11620  if (NEED_INDICATE_EXEC_STACK)
11621    file_end_indicate_exec_stack ();
11622
11623#ifdef TARGET_SOLARIS
11624  solaris_file_end ();
11625#endif
11626}
11627
11628#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11629/* Implement TARGET_MANGLE_TYPE.  */
11630
11631static const char *
11632sparc_mangle_type (const_tree type)
11633{
11634  if (!TARGET_64BIT
11635      && TYPE_MAIN_VARIANT (type) == long_double_type_node
11636      && TARGET_LONG_DOUBLE_128)
11637    return "g";
11638
11639  /* For all other types, use normal C++ mangling.  */
11640  return NULL;
11641}
11642#endif
11643
11644/* Expand a membar instruction for various use cases.  Both the LOAD_STORE
11645   and BEFORE_AFTER arguments of the form X_Y.  They are two-bit masks where
11646   bit 0 indicates that X is true, and bit 1 indicates Y is true.  */
11647
11648void
11649sparc_emit_membar_for_model (enum memmodel model,
11650			     int load_store, int before_after)
11651{
11652  /* Bits for the MEMBAR mmask field.  */
11653  const int LoadLoad = 1;
11654  const int StoreLoad = 2;
11655  const int LoadStore = 4;
11656  const int StoreStore = 8;
11657
11658  int mm = 0, implied = 0;
11659
11660  switch (sparc_memory_model)
11661    {
11662    case SMM_SC:
11663      /* Sequential Consistency.  All memory transactions are immediately
11664	 visible in sequential execution order.  No barriers needed.  */
11665      implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11666      break;
11667
11668    case SMM_TSO:
11669      /* Total Store Ordering: all memory transactions with store semantics
11670	 are followed by an implied StoreStore.  */
11671      implied |= StoreStore;
11672
11673      /* If we're not looking for a raw barrer (before+after), then atomic
11674	 operations get the benefit of being both load and store.  */
11675      if (load_store == 3 && before_after == 1)
11676	implied |= StoreLoad;
11677      /* FALLTHRU */
11678
11679    case SMM_PSO:
11680      /* Partial Store Ordering: all memory transactions with load semantics
11681	 are followed by an implied LoadLoad | LoadStore.  */
11682      implied |= LoadLoad | LoadStore;
11683
11684      /* If we're not looking for a raw barrer (before+after), then atomic
11685	 operations get the benefit of being both load and store.  */
11686      if (load_store == 3 && before_after == 2)
11687	implied |= StoreLoad | StoreStore;
11688      /* FALLTHRU */
11689
11690    case SMM_RMO:
11691      /* Relaxed Memory Ordering: no implicit bits.  */
11692      break;
11693
11694    default:
11695      gcc_unreachable ();
11696    }
11697
11698  if (before_after & 1)
11699    {
11700      if (is_mm_release (model) || is_mm_acq_rel (model)
11701	  || is_mm_seq_cst (model))
11702	{
11703	  if (load_store & 1)
11704	    mm |= LoadLoad | StoreLoad;
11705	  if (load_store & 2)
11706	    mm |= LoadStore | StoreStore;
11707	}
11708    }
11709  if (before_after & 2)
11710    {
11711      if (is_mm_acquire (model) || is_mm_acq_rel (model)
11712	  || is_mm_seq_cst (model))
11713	{
11714	  if (load_store & 1)
11715	    mm |= LoadLoad | LoadStore;
11716	  if (load_store & 2)
11717	    mm |= StoreLoad | StoreStore;
11718	}
11719    }
11720
11721  /* Remove the bits implied by the system memory model.  */
11722  mm &= ~implied;
11723
11724  /* For raw barriers (before+after), always emit a barrier.
11725     This will become a compile-time barrier if needed.  */
11726  if (mm || before_after == 3)
11727    emit_insn (gen_membar (GEN_INT (mm)));
11728}
11729
11730/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11731   compare and swap on the word containing the byte or half-word.  */
11732
11733static void
11734sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11735				  rtx oldval, rtx newval)
11736{
11737  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11738  rtx addr = gen_reg_rtx (Pmode);
11739  rtx off = gen_reg_rtx (SImode);
11740  rtx oldv = gen_reg_rtx (SImode);
11741  rtx newv = gen_reg_rtx (SImode);
11742  rtx oldvalue = gen_reg_rtx (SImode);
11743  rtx newvalue = gen_reg_rtx (SImode);
11744  rtx res = gen_reg_rtx (SImode);
11745  rtx resv = gen_reg_rtx (SImode);
11746  rtx memsi, val, mask, cc;
11747
11748  emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11749
11750  if (Pmode != SImode)
11751    addr1 = gen_lowpart (SImode, addr1);
11752  emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11753
11754  memsi = gen_rtx_MEM (SImode, addr);
11755  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11756  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11757
11758  val = copy_to_reg (memsi);
11759
11760  emit_insn (gen_rtx_SET (off,
11761			  gen_rtx_XOR (SImode, off,
11762				       GEN_INT (GET_MODE (mem) == QImode
11763						? 3 : 2))));
11764
11765  emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11766
11767  if (GET_MODE (mem) == QImode)
11768    mask = force_reg (SImode, GEN_INT (0xff));
11769  else
11770    mask = force_reg (SImode, GEN_INT (0xffff));
11771
11772  emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11773
11774  emit_insn (gen_rtx_SET (val,
11775			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11776				       val)));
11777
11778  oldval = gen_lowpart (SImode, oldval);
11779  emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11780
11781  newval = gen_lowpart_common (SImode, newval);
11782  emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11783
11784  emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11785
11786  emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11787
11788  rtx_code_label *end_label = gen_label_rtx ();
11789  rtx_code_label *loop_label = gen_label_rtx ();
11790  emit_label (loop_label);
11791
11792  emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11793
11794  emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11795
11796  emit_move_insn (bool_result, const1_rtx);
11797
11798  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11799
11800  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11801
11802  emit_insn (gen_rtx_SET (resv,
11803			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11804				       res)));
11805
11806  emit_move_insn (bool_result, const0_rtx);
11807
11808  cc = gen_compare_reg_1 (NE, resv, val);
11809  emit_insn (gen_rtx_SET (val, resv));
11810
11811  /* Use cbranchcc4 to separate the compare and branch!  */
11812  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11813				  cc, const0_rtx, loop_label));
11814
11815  emit_label (end_label);
11816
11817  emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11818
11819  emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11820
11821  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11822}
11823
11824/* Expand code to perform a compare-and-swap.  */
11825
11826void
11827sparc_expand_compare_and_swap (rtx operands[])
11828{
11829  rtx bval, retval, mem, oldval, newval;
11830  machine_mode mode;
11831  enum memmodel model;
11832
11833  bval = operands[0];
11834  retval = operands[1];
11835  mem = operands[2];
11836  oldval = operands[3];
11837  newval = operands[4];
11838  model = (enum memmodel) INTVAL (operands[6]);
11839  mode = GET_MODE (mem);
11840
11841  sparc_emit_membar_for_model (model, 3, 1);
11842
11843  if (reg_overlap_mentioned_p (retval, oldval))
11844    oldval = copy_to_reg (oldval);
11845
11846  if (mode == QImode || mode == HImode)
11847    sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11848  else
11849    {
11850      rtx (*gen) (rtx, rtx, rtx, rtx);
11851      rtx x;
11852
11853      if (mode == SImode)
11854	gen = gen_atomic_compare_and_swapsi_1;
11855      else
11856	gen = gen_atomic_compare_and_swapdi_1;
11857      emit_insn (gen (retval, mem, oldval, newval));
11858
11859      x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11860      if (x != bval)
11861	convert_move (bval, x, 1);
11862    }
11863
11864  sparc_emit_membar_for_model (model, 3, 2);
11865}
11866
11867void
11868sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11869{
11870  rtx t_1, t_2, t_3;
11871
11872  sel = gen_lowpart (DImode, sel);
11873  switch (vmode)
11874    {
11875    case V2SImode:
11876      /* inp = xxxxxxxAxxxxxxxB */
11877      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11878				 NULL_RTX, 1, OPTAB_DIRECT);
11879      /* t_1 = ....xxxxxxxAxxx. */
11880      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11881				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11882      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11883				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11884      /* sel = .......B */
11885      /* t_1 = ...A.... */
11886      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11887      /* sel = ...A...B */
11888      sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11889      /* sel = AAAABBBB * 4 */
11890      t_1 = force_reg (SImode, GEN_INT (0x01230123));
11891      /* sel = { A*4, A*4+1, A*4+2, ... } */
11892      break;
11893
11894    case V4HImode:
11895      /* inp = xxxAxxxBxxxCxxxD */
11896      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11897				 NULL_RTX, 1, OPTAB_DIRECT);
11898      t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11899				 NULL_RTX, 1, OPTAB_DIRECT);
11900      t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11901				 NULL_RTX, 1, OPTAB_DIRECT);
11902      /* t_1 = ..xxxAxxxBxxxCxx */
11903      /* t_2 = ....xxxAxxxBxxxC */
11904      /* t_3 = ......xxxAxxxBxx */
11905      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11906				 GEN_INT (0x07),
11907				 NULL_RTX, 1, OPTAB_DIRECT);
11908      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11909				 GEN_INT (0x0700),
11910				 NULL_RTX, 1, OPTAB_DIRECT);
11911      t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11912				 GEN_INT (0x070000),
11913				 NULL_RTX, 1, OPTAB_DIRECT);
11914      t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11915				 GEN_INT (0x07000000),
11916				 NULL_RTX, 1, OPTAB_DIRECT);
11917      /* sel = .......D */
11918      /* t_1 = .....C.. */
11919      /* t_2 = ...B.... */
11920      /* t_3 = .A...... */
11921      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11922      t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11923      sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11924      /* sel = .A.B.C.D */
11925      sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11926      /* sel = AABBCCDD * 2 */
11927      t_1 = force_reg (SImode, GEN_INT (0x01010101));
11928      /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11929      break;
11930
11931    case V8QImode:
11932      /* input = xAxBxCxDxExFxGxH */
11933      sel = expand_simple_binop (DImode, AND, sel,
11934				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11935					  | 0x0f0f0f0f),
11936				 NULL_RTX, 1, OPTAB_DIRECT);
11937      /* sel = .A.B.C.D.E.F.G.H */
11938      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11939				 NULL_RTX, 1, OPTAB_DIRECT);
11940      /* t_1 = ..A.B.C.D.E.F.G. */
11941      sel = expand_simple_binop (DImode, IOR, sel, t_1,
11942				 NULL_RTX, 1, OPTAB_DIRECT);
11943      /* sel = .AABBCCDDEEFFGGH */
11944      sel = expand_simple_binop (DImode, AND, sel,
11945				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11946					  | 0xff00ff),
11947				 NULL_RTX, 1, OPTAB_DIRECT);
11948      /* sel = ..AB..CD..EF..GH */
11949      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11950				 NULL_RTX, 1, OPTAB_DIRECT);
11951      /* t_1 = ....AB..CD..EF.. */
11952      sel = expand_simple_binop (DImode, IOR, sel, t_1,
11953				 NULL_RTX, 1, OPTAB_DIRECT);
11954      /* sel = ..ABABCDCDEFEFGH */
11955      sel = expand_simple_binop (DImode, AND, sel,
11956				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11957				 NULL_RTX, 1, OPTAB_DIRECT);
11958      /* sel = ....ABCD....EFGH */
11959      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11960				 NULL_RTX, 1, OPTAB_DIRECT);
11961      /* t_1 = ........ABCD.... */
11962      sel = gen_lowpart (SImode, sel);
11963      t_1 = gen_lowpart (SImode, t_1);
11964      break;
11965
11966    default:
11967      gcc_unreachable ();
11968    }
11969
11970  /* Always perform the final addition/merge within the bmask insn.  */
11971  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11972}
11973
11974/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
11975
11976static bool
11977sparc_frame_pointer_required (void)
11978{
11979  /* If the stack pointer is dynamically modified in the function, it cannot
11980     serve as the frame pointer.  */
11981  if (cfun->calls_alloca)
11982    return true;
11983
11984  /* If the function receives nonlocal gotos, it needs to save the frame
11985     pointer in the nonlocal_goto_save_area object.  */
11986  if (cfun->has_nonlocal_label)
11987    return true;
11988
11989  /* In flat mode, that's it.  */
11990  if (TARGET_FLAT)
11991    return false;
11992
11993  /* Otherwise, the frame pointer is required if the function isn't leaf, but
11994     we cannot use sparc_leaf_function_p since it hasn't been computed yet.  */
11995  return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
11996}
11997
11998/* The way this is structured, we can't eliminate SFP in favor of SP
11999   if the frame pointer is required: we want to use the SFP->HFP elimination
12000   in that case.  But the test in update_eliminables doesn't know we are
12001   assuming below that we only do the former elimination.  */
12002
12003static bool
12004sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12005{
12006  return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12007}
12008
12009/* Return the hard frame pointer directly to bypass the stack bias.  */
12010
12011static rtx
12012sparc_builtin_setjmp_frame_value (void)
12013{
12014  return hard_frame_pointer_rtx;
12015}
12016
12017/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12018   they won't be allocated.  */
12019
12020static void
12021sparc_conditional_register_usage (void)
12022{
12023  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12024    {
12025      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12026      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12027    }
12028  /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12029  /* then honor it.  */
12030  if (TARGET_ARCH32 && fixed_regs[5])
12031    fixed_regs[5] = 1;
12032  else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12033    fixed_regs[5] = 0;
12034  if (! TARGET_V9)
12035    {
12036      int regno;
12037      for (regno = SPARC_FIRST_V9_FP_REG;
12038	   regno <= SPARC_LAST_V9_FP_REG;
12039	   regno++)
12040	fixed_regs[regno] = 1;
12041      /* %fcc0 is used by v8 and v9.  */
12042      for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12043	   regno <= SPARC_LAST_V9_FCC_REG;
12044	   regno++)
12045	fixed_regs[regno] = 1;
12046    }
12047  if (! TARGET_FPU)
12048    {
12049      int regno;
12050      for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12051	fixed_regs[regno] = 1;
12052    }
12053  /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12054  /* then honor it.  Likewise with g3 and g4.  */
12055  if (fixed_regs[2] == 2)
12056    fixed_regs[2] = ! TARGET_APP_REGS;
12057  if (fixed_regs[3] == 2)
12058    fixed_regs[3] = ! TARGET_APP_REGS;
12059  if (TARGET_ARCH32 && fixed_regs[4] == 2)
12060    fixed_regs[4] = ! TARGET_APP_REGS;
12061  else if (TARGET_CM_EMBMEDANY)
12062    fixed_regs[4] = 1;
12063  else if (fixed_regs[4] == 2)
12064    fixed_regs[4] = 0;
12065  if (TARGET_FLAT)
12066    {
12067      int regno;
12068      /* Disable leaf functions.  */
12069      memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12070      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12071	leaf_reg_remap [regno] = regno;
12072    }
12073  if (TARGET_VIS)
12074    global_regs[SPARC_GSR_REG] = 1;
12075}
12076
12077/* Implement TARGET_PREFERRED_RELOAD_CLASS:
12078
12079   - We can't load constants into FP registers.
12080   - We can't load FP constants into integer registers when soft-float,
12081     because there is no soft-float pattern with a r/F constraint.
12082   - We can't load FP constants into integer registers for TFmode unless
12083     it is 0.0L, because there is no movtf pattern with a r/F constraint.
12084   - Try and reload integer constants (symbolic or otherwise) back into
12085     registers directly, rather than having them dumped to memory.  */
12086
12087static reg_class_t
12088sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12089{
12090  machine_mode mode = GET_MODE (x);
12091  if (CONSTANT_P (x))
12092    {
12093      if (FP_REG_CLASS_P (rclass)
12094	  || rclass == GENERAL_OR_FP_REGS
12095	  || rclass == GENERAL_OR_EXTRA_FP_REGS
12096	  || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12097	  || (mode == TFmode && ! const_zero_operand (x, mode)))
12098	return NO_REGS;
12099
12100      if (GET_MODE_CLASS (mode) == MODE_INT)
12101	return GENERAL_REGS;
12102
12103      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12104	{
12105	  if (! FP_REG_CLASS_P (rclass)
12106	      || !(const_zero_operand (x, mode)
12107		   || const_all_ones_operand (x, mode)))
12108	    return NO_REGS;
12109	}
12110    }
12111
12112  if (TARGET_VIS3
12113      && ! TARGET_ARCH64
12114      && (rclass == EXTRA_FP_REGS
12115	  || rclass == GENERAL_OR_EXTRA_FP_REGS))
12116    {
12117      int regno = true_regnum (x);
12118
12119      if (SPARC_INT_REG_P (regno))
12120	return (rclass == EXTRA_FP_REGS
12121		? FP_REGS : GENERAL_OR_FP_REGS);
12122    }
12123
12124  return rclass;
12125}
12126
12127/* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
12128   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
12129
12130const char *
12131output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12132{
12133  char mulstr[32];
12134
12135  gcc_assert (! TARGET_ARCH64);
12136
12137  if (sparc_check_64 (operands[1], insn) <= 0)
12138    output_asm_insn ("srl\t%L1, 0, %L1", operands);
12139  if (which_alternative == 1)
12140    output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12141  if (GET_CODE (operands[2]) == CONST_INT)
12142    {
12143      if (which_alternative == 1)
12144	{
12145	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
12146	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12147	  output_asm_insn (mulstr, operands);
12148	  return "srlx\t%L0, 32, %H0";
12149	}
12150      else
12151	{
12152	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
12153          output_asm_insn ("or\t%L1, %3, %3", operands);
12154          sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12155	  output_asm_insn (mulstr, operands);
12156	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
12157          return "mov\t%3, %L0";
12158	}
12159    }
12160  else if (rtx_equal_p (operands[1], operands[2]))
12161    {
12162      if (which_alternative == 1)
12163	{
12164	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
12165          sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12166	  output_asm_insn (mulstr, operands);
12167	  return "srlx\t%L0, 32, %H0";
12168	}
12169      else
12170	{
12171	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
12172          output_asm_insn ("or\t%L1, %3, %3", operands);
12173	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12174	  output_asm_insn (mulstr, operands);
12175	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
12176          return "mov\t%3, %L0";
12177	}
12178    }
12179  if (sparc_check_64 (operands[2], insn) <= 0)
12180    output_asm_insn ("srl\t%L2, 0, %L2", operands);
12181  if (which_alternative == 1)
12182    {
12183      output_asm_insn ("or\t%L1, %H1, %H1", operands);
12184      output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12185      output_asm_insn ("or\t%L2, %L1, %L1", operands);
12186      sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12187      output_asm_insn (mulstr, operands);
12188      return "srlx\t%L0, 32, %H0";
12189    }
12190  else
12191    {
12192      output_asm_insn ("sllx\t%H1, 32, %3", operands);
12193      output_asm_insn ("sllx\t%H2, 32, %4", operands);
12194      output_asm_insn ("or\t%L1, %3, %3", operands);
12195      output_asm_insn ("or\t%L2, %4, %4", operands);
12196      sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12197      output_asm_insn (mulstr, operands);
12198      output_asm_insn ("srlx\t%3, 32, %H0", operands);
12199      return "mov\t%3, %L0";
12200    }
12201}
12202
12203/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12204   all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn.  MODE
12205   and INNER_MODE are the modes describing TARGET.  */
12206
12207static void
12208vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12209		      machine_mode inner_mode)
12210{
12211  rtx t1, final_insn, sel;
12212  int bmask;
12213
12214  t1 = gen_reg_rtx (mode);
12215
12216  elt = convert_modes (SImode, inner_mode, elt, true);
12217  emit_move_insn (gen_lowpart(SImode, t1), elt);
12218
12219  switch (mode)
12220    {
12221    case V2SImode:
12222      final_insn = gen_bshufflev2si_vis (target, t1, t1);
12223      bmask = 0x45674567;
12224      break;
12225    case V4HImode:
12226      final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12227      bmask = 0x67676767;
12228      break;
12229    case V8QImode:
12230      final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12231      bmask = 0x77777777;
12232      break;
12233    default:
12234      gcc_unreachable ();
12235    }
12236
12237  sel = force_reg (SImode, GEN_INT (bmask));
12238  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12239  emit_insn (final_insn);
12240}
12241
12242/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12243   all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn.  */
12244
12245static void
12246vector_init_fpmerge (rtx target, rtx elt)
12247{
12248  rtx t1, t2, t2_low, t3, t3_low;
12249
12250  t1 = gen_reg_rtx (V4QImode);
12251  elt = convert_modes (SImode, QImode, elt, true);
12252  emit_move_insn (gen_lowpart (SImode, t1), elt);
12253
12254  t2 = gen_reg_rtx (V8QImode);
12255  t2_low = gen_lowpart (V4QImode, t2);
12256  emit_insn (gen_fpmerge_vis (t2, t1, t1));
12257
12258  t3 = gen_reg_rtx (V8QImode);
12259  t3_low = gen_lowpart (V4QImode, t3);
12260  emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12261
12262  emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12263}
12264
12265/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12266   all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn.  */
12267
12268static void
12269vector_init_faligndata (rtx target, rtx elt)
12270{
12271  rtx t1 = gen_reg_rtx (V4HImode);
12272  int i;
12273
12274  elt = convert_modes (SImode, HImode, elt, true);
12275  emit_move_insn (gen_lowpart (SImode, t1), elt);
12276
12277  emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12278				  force_reg (SImode, GEN_INT (6)),
12279				  const0_rtx));
12280
12281  for (i = 0; i < 4; i++)
12282    emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12283}
12284
12285/* Emit code to initialize TARGET to values for individual fields VALS.  */
12286
12287void
12288sparc_expand_vector_init (rtx target, rtx vals)
12289{
12290  const machine_mode mode = GET_MODE (target);
12291  const machine_mode inner_mode = GET_MODE_INNER (mode);
12292  const int n_elts = GET_MODE_NUNITS (mode);
12293  int i, n_var = 0;
12294  bool all_same = true;
12295  rtx mem;
12296
12297  for (i = 0; i < n_elts; i++)
12298    {
12299      rtx x = XVECEXP (vals, 0, i);
12300      if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12301	n_var++;
12302
12303      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12304	all_same = false;
12305    }
12306
12307  if (n_var == 0)
12308    {
12309      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12310      return;
12311    }
12312
12313  if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12314    {
12315      if (GET_MODE_SIZE (inner_mode) == 4)
12316	{
12317	  emit_move_insn (gen_lowpart (SImode, target),
12318			  gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12319	  return;
12320	}
12321      else if (GET_MODE_SIZE (inner_mode) == 8)
12322	{
12323	  emit_move_insn (gen_lowpart (DImode, target),
12324			  gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12325	  return;
12326	}
12327    }
12328  else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12329	   && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12330    {
12331      emit_move_insn (gen_highpart (word_mode, target),
12332		      gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12333      emit_move_insn (gen_lowpart (word_mode, target),
12334		      gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12335      return;
12336    }
12337
12338  if (all_same && GET_MODE_SIZE (mode) == 8)
12339    {
12340      if (TARGET_VIS2)
12341	{
12342	  vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12343	  return;
12344	}
12345      if (mode == V8QImode)
12346	{
12347	  vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12348	  return;
12349	}
12350      if (mode == V4HImode)
12351	{
12352	  vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12353	  return;
12354	}
12355    }
12356
12357  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12358  for (i = 0; i < n_elts; i++)
12359    emit_move_insn (adjust_address_nv (mem, inner_mode,
12360				       i * GET_MODE_SIZE (inner_mode)),
12361		    XVECEXP (vals, 0, i));
12362  emit_move_insn (target, mem);
12363}
12364
12365/* Implement TARGET_SECONDARY_RELOAD.  */
12366
12367static reg_class_t
12368sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12369			machine_mode mode, secondary_reload_info *sri)
12370{
12371  enum reg_class rclass = (enum reg_class) rclass_i;
12372
12373  sri->icode = CODE_FOR_nothing;
12374  sri->extra_cost = 0;
12375
12376  /* We need a temporary when loading/storing a HImode/QImode value
12377     between memory and the FPU registers.  This can happen when combine puts
12378     a paradoxical subreg in a float/fix conversion insn.  */
12379  if (FP_REG_CLASS_P (rclass)
12380      && (mode == HImode || mode == QImode)
12381      && (GET_CODE (x) == MEM
12382	  || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12383	      && true_regnum (x) == -1)))
12384    return GENERAL_REGS;
12385
12386  /* On 32-bit we need a temporary when loading/storing a DFmode value
12387     between unaligned memory and the upper FPU registers.  */
12388  if (TARGET_ARCH32
12389      && rclass == EXTRA_FP_REGS
12390      && mode == DFmode
12391      && GET_CODE (x) == MEM
12392      && ! mem_min_alignment (x, 8))
12393    return FP_REGS;
12394
12395  if (((TARGET_CM_MEDANY
12396	&& symbolic_operand (x, mode))
12397       || (TARGET_CM_EMBMEDANY
12398	   && text_segment_operand (x, mode)))
12399      && ! flag_pic)
12400    {
12401      if (in_p)
12402	sri->icode = direct_optab_handler (reload_in_optab, mode);
12403      else
12404	sri->icode = direct_optab_handler (reload_out_optab, mode);
12405      return NO_REGS;
12406    }
12407
12408  if (TARGET_VIS3 && TARGET_ARCH32)
12409    {
12410      int regno = true_regnum (x);
12411
12412      /* When using VIS3 fp<-->int register moves, on 32-bit we have
12413	 to move 8-byte values in 4-byte pieces.  This only works via
12414	 FP_REGS, and not via EXTRA_FP_REGS.  Therefore if we try to
12415	 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12416	 an FP_REGS intermediate move.  */
12417      if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12418	  || ((general_or_i64_p (rclass)
12419	       || rclass == GENERAL_OR_FP_REGS)
12420	      && SPARC_FP_REG_P (regno)))
12421	{
12422	  sri->extra_cost = 2;
12423	  return FP_REGS;
12424	}
12425    }
12426
12427  return NO_REGS;
12428}
12429
12430/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12431   OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
12432
12433bool
12434sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12435{
12436  enum rtx_code rc = GET_CODE (operands[1]);
12437  machine_mode cmp_mode;
12438  rtx cc_reg, dst, cmp;
12439
12440  cmp = operands[1];
12441  if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12442    return false;
12443
12444  if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12445    cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12446
12447  cmp_mode = GET_MODE (XEXP (cmp, 0));
12448  rc = GET_CODE (cmp);
12449
12450  dst = operands[0];
12451  if (! rtx_equal_p (operands[2], dst)
12452      && ! rtx_equal_p (operands[3], dst))
12453    {
12454      if (reg_overlap_mentioned_p (dst, cmp))
12455	dst = gen_reg_rtx (mode);
12456
12457      emit_move_insn (dst, operands[3]);
12458    }
12459  else if (operands[2] == dst)
12460    {
12461      operands[2] = operands[3];
12462
12463      if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12464        rc = reverse_condition_maybe_unordered (rc);
12465      else
12466        rc = reverse_condition (rc);
12467    }
12468
12469  if (XEXP (cmp, 1) == const0_rtx
12470      && GET_CODE (XEXP (cmp, 0)) == REG
12471      && cmp_mode == DImode
12472      && v9_regcmp_p (rc))
12473    cc_reg = XEXP (cmp, 0);
12474  else
12475    cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12476
12477  cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12478
12479  emit_insn (gen_rtx_SET (dst,
12480			  gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12481
12482  if (dst != operands[0])
12483    emit_move_insn (operands[0], dst);
12484
12485  return true;
12486}
12487
12488/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12489   into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12490   OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
12491   FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12492   code to be used for the condition mask.  */
12493
12494void
12495sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12496{
12497  rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12498  enum rtx_code code = GET_CODE (operands[3]);
12499
12500  mask = gen_reg_rtx (Pmode);
12501  cop0 = operands[4];
12502  cop1 = operands[5];
12503  if (code == LT || code == GE)
12504    {
12505      rtx t;
12506
12507      code = swap_condition (code);
12508      t = cop0; cop0 = cop1; cop1 = t;
12509    }
12510
12511  gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12512
12513  fcmp = gen_rtx_UNSPEC (Pmode,
12514			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12515			 fcode);
12516
12517  cmask = gen_rtx_UNSPEC (DImode,
12518			  gen_rtvec (2, mask, gsr),
12519			  ccode);
12520
12521  bshuf = gen_rtx_UNSPEC (mode,
12522			  gen_rtvec (3, operands[1], operands[2], gsr),
12523			  UNSPEC_BSHUFFLE);
12524
12525  emit_insn (gen_rtx_SET (mask, fcmp));
12526  emit_insn (gen_rtx_SET (gsr, cmask));
12527
12528  emit_insn (gen_rtx_SET (operands[0], bshuf));
12529}
12530
12531/* On sparc, any mode which naturally allocates into the float
12532   registers should return 4 here.  */
12533
12534unsigned int
12535sparc_regmode_natural_size (machine_mode mode)
12536{
12537  int size = UNITS_PER_WORD;
12538
12539  if (TARGET_ARCH64)
12540    {
12541      enum mode_class mclass = GET_MODE_CLASS (mode);
12542
12543      if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12544	size = 4;
12545    }
12546
12547  return size;
12548}
12549
12550/* Return TRUE if it is a good idea to tie two pseudo registers
12551   when one has mode MODE1 and one has mode MODE2.
12552   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12553   for any hard reg, then this must be FALSE for correct output.
12554
12555   For V9 we have to deal with the fact that only the lower 32 floating
12556   point registers are 32-bit addressable.  */
12557
12558bool
12559sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12560{
12561  enum mode_class mclass1, mclass2;
12562  unsigned short size1, size2;
12563
12564  if (mode1 == mode2)
12565    return true;
12566
12567  mclass1 = GET_MODE_CLASS (mode1);
12568  mclass2 = GET_MODE_CLASS (mode2);
12569  if (mclass1 != mclass2)
12570    return false;
12571
12572  if (! TARGET_V9)
12573    return true;
12574
12575  /* Classes are the same and we are V9 so we have to deal with upper
12576     vs. lower floating point registers.  If one of the modes is a
12577     4-byte mode, and the other is not, we have to mark them as not
12578     tieable because only the lower 32 floating point register are
12579     addressable 32-bits at a time.
12580
12581     We can't just test explicitly for SFmode, otherwise we won't
12582     cover the vector mode cases properly.  */
12583
12584  if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12585    return true;
12586
12587  size1 = GET_MODE_SIZE (mode1);
12588  size2 = GET_MODE_SIZE (mode2);
12589  if ((size1 > 4 && size2 == 4)
12590      || (size2 > 4 && size1 == 4))
12591    return false;
12592
12593  return true;
12594}
12595
12596/* Implement TARGET_CSTORE_MODE.  */
12597
12598static machine_mode
12599sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12600{
12601  return (TARGET_ARCH64 ? DImode : SImode);
12602}
12603
12604/* Return the compound expression made of T1 and T2.  */
12605
12606static inline tree
12607compound_expr (tree t1, tree t2)
12608{
12609  return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12610}
12611
12612/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
12613
12614static void
12615sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12616{
12617  if (!TARGET_FPU)
12618    return;
12619
12620  const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12621  const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12622
12623  /* We generate the equivalent of feholdexcept (&fenv_var):
12624
12625       unsigned int fenv_var;
12626       __builtin_store_fsr (&fenv_var);
12627
12628       unsigned int tmp1_var;
12629       tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12630
12631       __builtin_load_fsr (&tmp1_var);  */
12632
12633  tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12634  TREE_ADDRESSABLE (fenv_var) = 1;
12635  tree fenv_addr = build_fold_addr_expr (fenv_var);
12636  tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12637  tree hold_stfsr
12638    = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12639	      build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12640
12641  tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12642  TREE_ADDRESSABLE (tmp1_var) = 1;
12643  tree masked_fenv_var
12644    = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12645	      build_int_cst (unsigned_type_node,
12646			     ~(accrued_exception_mask | trap_enable_mask)));
12647  tree hold_mask
12648    = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12649	      NULL_TREE, NULL_TREE);
12650
12651  tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12652  tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12653  tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12654
12655  *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12656
12657  /* We reload the value of tmp1_var to clear the exceptions:
12658
12659       __builtin_load_fsr (&tmp1_var);  */
12660
12661  *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12662
12663  /* We generate the equivalent of feupdateenv (&fenv_var):
12664
12665       unsigned int tmp2_var;
12666       __builtin_store_fsr (&tmp2_var);
12667
12668       __builtin_load_fsr (&fenv_var);
12669
12670       if (SPARC_LOW_FE_EXCEPT_VALUES)
12671         tmp2_var >>= 5;
12672       __atomic_feraiseexcept ((int) tmp2_var);  */
12673
12674  tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12675  TREE_ADDRESSABLE (tmp2_var) = 1;
12676  tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12677  tree update_stfsr
12678    = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12679	      build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12680
12681  tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12682
12683  tree atomic_feraiseexcept
12684    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12685  tree update_call
12686    = build_call_expr (atomic_feraiseexcept, 1,
12687		       fold_convert (integer_type_node, tmp2_var));
12688
12689  if (SPARC_LOW_FE_EXCEPT_VALUES)
12690    {
12691      tree shifted_tmp2_var
12692	= build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12693		  build_int_cst (unsigned_type_node, 5));
12694      tree update_shift
12695	= build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12696      update_call = compound_expr (update_shift, update_call);
12697    }
12698
12699  *update
12700    = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12701}
12702
12703#include "gt-sparc.h"
12704