1/* Subroutines for insn-output.c for SPARC.
2   Copyright (C) 1987-2020 Free Software Foundation, Inc.
3   Contributed by Michael Tiemann (tiemann@cygnus.com)
4   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5   at Cygnus Support.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 3, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#define IN_TARGET_CODE 1
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "backend.h"
29#include "target.h"
30#include "rtl.h"
31#include "tree.h"
32#include "memmodel.h"
33#include "gimple.h"
34#include "df.h"
35#include "tm_p.h"
36#include "stringpool.h"
37#include "attribs.h"
38#include "expmed.h"
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic-core.h"
44#include "alias.h"
45#include "fold-const.h"
46#include "stor-layout.h"
47#include "calls.h"
48#include "varasm.h"
49#include "output.h"
50#include "insn-attr.h"
51#include "explow.h"
52#include "expr.h"
53#include "debug.h"
54#include "cfgrtl.h"
55#include "common/common-target.h"
56#include "gimplify.h"
57#include "langhooks.h"
58#include "reload.h"
59#include "tree-pass.h"
60#include "context.h"
61#include "builtins.h"
62#include "tree-vector-builder.h"
63#include "opts.h"
64
65/* This file should be included last.  */
66#include "target-def.h"
67
68/* Processor costs */
69
70struct processor_costs {
71  /* Integer load */
72  const int int_load;
73
74  /* Integer signed load */
75  const int int_sload;
76
77  /* Integer zeroed load */
78  const int int_zload;
79
80  /* Float load */
81  const int float_load;
82
83  /* fmov, fneg, fabs */
84  const int float_move;
85
86  /* fadd, fsub */
87  const int float_plusminus;
88
89  /* fcmp */
90  const int float_cmp;
91
92  /* fmov, fmovr */
93  const int float_cmove;
94
95  /* fmul */
96  const int float_mul;
97
98  /* fdivs */
99  const int float_div_sf;
100
101  /* fdivd */
102  const int float_div_df;
103
104  /* fsqrts */
105  const int float_sqrt_sf;
106
107  /* fsqrtd */
108  const int float_sqrt_df;
109
110  /* umul/smul */
111  const int int_mul;
112
113  /* mulX */
114  const int int_mulX;
115
116  /* integer multiply cost for each bit set past the most
117     significant 3, so the formula for multiply cost becomes:
118
119	if (rs1 < 0)
120	  highest_bit = highest_clear_bit(rs1);
121	else
122	  highest_bit = highest_set_bit(rs1);
123	if (highest_bit < 3)
124	  highest_bit = 3;
125	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127     A value of zero indicates that the multiply costs is fixed,
128     and not variable.  */
129  const int int_mul_bit_factor;
130
131  /* udiv/sdiv */
132  const int int_div;
133
134  /* divX */
135  const int int_divX;
136
137  /* movcc, movr */
138  const int int_cmove;
139
140  /* penalty for shifts, due to scheduling rules etc. */
141  const int shift_penalty;
142
143  /* cost of a (predictable) branch.  */
144  const int branch_cost;
145};
146
147static const
148struct processor_costs cypress_costs = {
149  COSTS_N_INSNS (2), /* int load */
150  COSTS_N_INSNS (2), /* int signed load */
151  COSTS_N_INSNS (2), /* int zeroed load */
152  COSTS_N_INSNS (2), /* float load */
153  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154  COSTS_N_INSNS (5), /* fadd, fsub */
155  COSTS_N_INSNS (1), /* fcmp */
156  COSTS_N_INSNS (1), /* fmov, fmovr */
157  COSTS_N_INSNS (7), /* fmul */
158  COSTS_N_INSNS (37), /* fdivs */
159  COSTS_N_INSNS (37), /* fdivd */
160  COSTS_N_INSNS (63), /* fsqrts */
161  COSTS_N_INSNS (63), /* fsqrtd */
162  COSTS_N_INSNS (1), /* imul */
163  COSTS_N_INSNS (1), /* imulX */
164  0, /* imul bit factor */
165  COSTS_N_INSNS (1), /* idiv */
166  COSTS_N_INSNS (1), /* idivX */
167  COSTS_N_INSNS (1), /* movcc/movr */
168  0, /* shift penalty */
169  3 /* branch cost */
170};
171
172static const
173struct processor_costs supersparc_costs = {
174  COSTS_N_INSNS (1), /* int load */
175  COSTS_N_INSNS (1), /* int signed load */
176  COSTS_N_INSNS (1), /* int zeroed load */
177  COSTS_N_INSNS (0), /* float load */
178  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179  COSTS_N_INSNS (3), /* fadd, fsub */
180  COSTS_N_INSNS (3), /* fcmp */
181  COSTS_N_INSNS (1), /* fmov, fmovr */
182  COSTS_N_INSNS (3), /* fmul */
183  COSTS_N_INSNS (6), /* fdivs */
184  COSTS_N_INSNS (9), /* fdivd */
185  COSTS_N_INSNS (12), /* fsqrts */
186  COSTS_N_INSNS (12), /* fsqrtd */
187  COSTS_N_INSNS (4), /* imul */
188  COSTS_N_INSNS (4), /* imulX */
189  0, /* imul bit factor */
190  COSTS_N_INSNS (4), /* idiv */
191  COSTS_N_INSNS (4), /* idivX */
192  COSTS_N_INSNS (1), /* movcc/movr */
193  1, /* shift penalty */
194  3 /* branch cost */
195};
196
197static const
198struct processor_costs hypersparc_costs = {
199  COSTS_N_INSNS (1), /* int load */
200  COSTS_N_INSNS (1), /* int signed load */
201  COSTS_N_INSNS (1), /* int zeroed load */
202  COSTS_N_INSNS (1), /* float load */
203  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204  COSTS_N_INSNS (1), /* fadd, fsub */
205  COSTS_N_INSNS (1), /* fcmp */
206  COSTS_N_INSNS (1), /* fmov, fmovr */
207  COSTS_N_INSNS (1), /* fmul */
208  COSTS_N_INSNS (8), /* fdivs */
209  COSTS_N_INSNS (12), /* fdivd */
210  COSTS_N_INSNS (17), /* fsqrts */
211  COSTS_N_INSNS (17), /* fsqrtd */
212  COSTS_N_INSNS (17), /* imul */
213  COSTS_N_INSNS (17), /* imulX */
214  0, /* imul bit factor */
215  COSTS_N_INSNS (17), /* idiv */
216  COSTS_N_INSNS (17), /* idivX */
217  COSTS_N_INSNS (1), /* movcc/movr */
218  0, /* shift penalty */
219  3 /* branch cost */
220};
221
222static const
223struct processor_costs leon_costs = {
224  COSTS_N_INSNS (1), /* int load */
225  COSTS_N_INSNS (1), /* int signed load */
226  COSTS_N_INSNS (1), /* int zeroed load */
227  COSTS_N_INSNS (1), /* float load */
228  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229  COSTS_N_INSNS (1), /* fadd, fsub */
230  COSTS_N_INSNS (1), /* fcmp */
231  COSTS_N_INSNS (1), /* fmov, fmovr */
232  COSTS_N_INSNS (1), /* fmul */
233  COSTS_N_INSNS (15), /* fdivs */
234  COSTS_N_INSNS (15), /* fdivd */
235  COSTS_N_INSNS (23), /* fsqrts */
236  COSTS_N_INSNS (23), /* fsqrtd */
237  COSTS_N_INSNS (5), /* imul */
238  COSTS_N_INSNS (5), /* imulX */
239  0, /* imul bit factor */
240  COSTS_N_INSNS (5), /* idiv */
241  COSTS_N_INSNS (5), /* idivX */
242  COSTS_N_INSNS (1), /* movcc/movr */
243  0, /* shift penalty */
244  3 /* branch cost */
245};
246
247static const
248struct processor_costs leon3_costs = {
249  COSTS_N_INSNS (1), /* int load */
250  COSTS_N_INSNS (1), /* int signed load */
251  COSTS_N_INSNS (1), /* int zeroed load */
252  COSTS_N_INSNS (1), /* float load */
253  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254  COSTS_N_INSNS (1), /* fadd, fsub */
255  COSTS_N_INSNS (1), /* fcmp */
256  COSTS_N_INSNS (1), /* fmov, fmovr */
257  COSTS_N_INSNS (1), /* fmul */
258  COSTS_N_INSNS (14), /* fdivs */
259  COSTS_N_INSNS (15), /* fdivd */
260  COSTS_N_INSNS (22), /* fsqrts */
261  COSTS_N_INSNS (23), /* fsqrtd */
262  COSTS_N_INSNS (5), /* imul */
263  COSTS_N_INSNS (5), /* imulX */
264  0, /* imul bit factor */
265  COSTS_N_INSNS (35), /* idiv */
266  COSTS_N_INSNS (35), /* idivX */
267  COSTS_N_INSNS (1), /* movcc/movr */
268  0, /* shift penalty */
269  3 /* branch cost */
270};
271
272static const
273struct processor_costs leon5_costs = {
274  COSTS_N_INSNS (1), /* int load */
275  COSTS_N_INSNS (1), /* int signed load */
276  COSTS_N_INSNS (1), /* int zeroed load */
277  COSTS_N_INSNS (1), /* float load */
278  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279  COSTS_N_INSNS (1), /* fadd, fsub */
280  COSTS_N_INSNS (1), /* fcmp */
281  COSTS_N_INSNS (1), /* fmov, fmovr */
282  COSTS_N_INSNS (1), /* fmul */
283  COSTS_N_INSNS (17), /* fdivs */
284  COSTS_N_INSNS (18), /* fdivd */
285  COSTS_N_INSNS (25), /* fsqrts */
286  COSTS_N_INSNS (26), /* fsqrtd */
287  COSTS_N_INSNS (4), /* imul */
288  COSTS_N_INSNS (4), /* imulX */
289  0, /* imul bit factor */
290  COSTS_N_INSNS (35), /* idiv */
291  COSTS_N_INSNS (35), /* idivX */
292  COSTS_N_INSNS (1), /* movcc/movr */
293  0, /* shift penalty */
294  3 /* branch cost */
295};
296
297static const
298struct processor_costs sparclet_costs = {
299  COSTS_N_INSNS (3), /* int load */
300  COSTS_N_INSNS (3), /* int signed load */
301  COSTS_N_INSNS (1), /* int zeroed load */
302  COSTS_N_INSNS (1), /* float load */
303  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304  COSTS_N_INSNS (1), /* fadd, fsub */
305  COSTS_N_INSNS (1), /* fcmp */
306  COSTS_N_INSNS (1), /* fmov, fmovr */
307  COSTS_N_INSNS (1), /* fmul */
308  COSTS_N_INSNS (1), /* fdivs */
309  COSTS_N_INSNS (1), /* fdivd */
310  COSTS_N_INSNS (1), /* fsqrts */
311  COSTS_N_INSNS (1), /* fsqrtd */
312  COSTS_N_INSNS (5), /* imul */
313  COSTS_N_INSNS (5), /* imulX */
314  0, /* imul bit factor */
315  COSTS_N_INSNS (5), /* idiv */
316  COSTS_N_INSNS (5), /* idivX */
317  COSTS_N_INSNS (1), /* movcc/movr */
318  0, /* shift penalty */
319  3 /* branch cost */
320};
321
322static const
323struct processor_costs ultrasparc_costs = {
324  COSTS_N_INSNS (2), /* int load */
325  COSTS_N_INSNS (3), /* int signed load */
326  COSTS_N_INSNS (2), /* int zeroed load */
327  COSTS_N_INSNS (2), /* float load */
328  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
329  COSTS_N_INSNS (4), /* fadd, fsub */
330  COSTS_N_INSNS (1), /* fcmp */
331  COSTS_N_INSNS (2), /* fmov, fmovr */
332  COSTS_N_INSNS (4), /* fmul */
333  COSTS_N_INSNS (13), /* fdivs */
334  COSTS_N_INSNS (23), /* fdivd */
335  COSTS_N_INSNS (13), /* fsqrts */
336  COSTS_N_INSNS (23), /* fsqrtd */
337  COSTS_N_INSNS (4), /* imul */
338  COSTS_N_INSNS (4), /* imulX */
339  2, /* imul bit factor */
340  COSTS_N_INSNS (37), /* idiv */
341  COSTS_N_INSNS (68), /* idivX */
342  COSTS_N_INSNS (2), /* movcc/movr */
343  2, /* shift penalty */
344  2 /* branch cost */
345};
346
347static const
348struct processor_costs ultrasparc3_costs = {
349  COSTS_N_INSNS (2), /* int load */
350  COSTS_N_INSNS (3), /* int signed load */
351  COSTS_N_INSNS (3), /* int zeroed load */
352  COSTS_N_INSNS (2), /* float load */
353  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
354  COSTS_N_INSNS (4), /* fadd, fsub */
355  COSTS_N_INSNS (5), /* fcmp */
356  COSTS_N_INSNS (3), /* fmov, fmovr */
357  COSTS_N_INSNS (4), /* fmul */
358  COSTS_N_INSNS (17), /* fdivs */
359  COSTS_N_INSNS (20), /* fdivd */
360  COSTS_N_INSNS (20), /* fsqrts */
361  COSTS_N_INSNS (29), /* fsqrtd */
362  COSTS_N_INSNS (6), /* imul */
363  COSTS_N_INSNS (6), /* imulX */
364  0, /* imul bit factor */
365  COSTS_N_INSNS (40), /* idiv */
366  COSTS_N_INSNS (71), /* idivX */
367  COSTS_N_INSNS (2), /* movcc/movr */
368  0, /* shift penalty */
369  2 /* branch cost */
370};
371
372static const
373struct processor_costs niagara_costs = {
374  COSTS_N_INSNS (3), /* int load */
375  COSTS_N_INSNS (3), /* int signed load */
376  COSTS_N_INSNS (3), /* int zeroed load */
377  COSTS_N_INSNS (9), /* float load */
378  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
379  COSTS_N_INSNS (8), /* fadd, fsub */
380  COSTS_N_INSNS (26), /* fcmp */
381  COSTS_N_INSNS (8), /* fmov, fmovr */
382  COSTS_N_INSNS (29), /* fmul */
383  COSTS_N_INSNS (54), /* fdivs */
384  COSTS_N_INSNS (83), /* fdivd */
385  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
386  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
387  COSTS_N_INSNS (11), /* imul */
388  COSTS_N_INSNS (11), /* imulX */
389  0, /* imul bit factor */
390  COSTS_N_INSNS (72), /* idiv */
391  COSTS_N_INSNS (72), /* idivX */
392  COSTS_N_INSNS (1), /* movcc/movr */
393  0, /* shift penalty */
394  4 /* branch cost */
395};
396
397static const
398struct processor_costs niagara2_costs = {
399  COSTS_N_INSNS (3), /* int load */
400  COSTS_N_INSNS (3), /* int signed load */
401  COSTS_N_INSNS (3), /* int zeroed load */
402  COSTS_N_INSNS (3), /* float load */
403  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
404  COSTS_N_INSNS (6), /* fadd, fsub */
405  COSTS_N_INSNS (6), /* fcmp */
406  COSTS_N_INSNS (6), /* fmov, fmovr */
407  COSTS_N_INSNS (6), /* fmul */
408  COSTS_N_INSNS (19), /* fdivs */
409  COSTS_N_INSNS (33), /* fdivd */
410  COSTS_N_INSNS (19), /* fsqrts */
411  COSTS_N_INSNS (33), /* fsqrtd */
412  COSTS_N_INSNS (5), /* imul */
413  COSTS_N_INSNS (5), /* imulX */
414  0, /* imul bit factor */
415  COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
416  COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
417  COSTS_N_INSNS (1), /* movcc/movr */
418  0, /* shift penalty */
419  5 /* branch cost */
420};
421
422static const
423struct processor_costs niagara3_costs = {
424  COSTS_N_INSNS (3), /* int load */
425  COSTS_N_INSNS (3), /* int signed load */
426  COSTS_N_INSNS (3), /* int zeroed load */
427  COSTS_N_INSNS (3), /* float load */
428  COSTS_N_INSNS (9), /* fmov, fneg, fabs */
429  COSTS_N_INSNS (9), /* fadd, fsub */
430  COSTS_N_INSNS (9), /* fcmp */
431  COSTS_N_INSNS (9), /* fmov, fmovr */
432  COSTS_N_INSNS (9), /* fmul */
433  COSTS_N_INSNS (23), /* fdivs */
434  COSTS_N_INSNS (37), /* fdivd */
435  COSTS_N_INSNS (23), /* fsqrts */
436  COSTS_N_INSNS (37), /* fsqrtd */
437  COSTS_N_INSNS (9), /* imul */
438  COSTS_N_INSNS (9), /* imulX */
439  0, /* imul bit factor */
440  COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
441  COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
442  COSTS_N_INSNS (1), /* movcc/movr */
443  0, /* shift penalty */
444  5 /* branch cost */
445};
446
447static const
448struct processor_costs niagara4_costs = {
449  COSTS_N_INSNS (5), /* int load */
450  COSTS_N_INSNS (5), /* int signed load */
451  COSTS_N_INSNS (5), /* int zeroed load */
452  COSTS_N_INSNS (5), /* float load */
453  COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454  COSTS_N_INSNS (11), /* fadd, fsub */
455  COSTS_N_INSNS (11), /* fcmp */
456  COSTS_N_INSNS (11), /* fmov, fmovr */
457  COSTS_N_INSNS (11), /* fmul */
458  COSTS_N_INSNS (24), /* fdivs */
459  COSTS_N_INSNS (37), /* fdivd */
460  COSTS_N_INSNS (24), /* fsqrts */
461  COSTS_N_INSNS (37), /* fsqrtd */
462  COSTS_N_INSNS (12), /* imul */
463  COSTS_N_INSNS (12), /* imulX */
464  0, /* imul bit factor */
465  COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
466  COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467  COSTS_N_INSNS (1), /* movcc/movr */
468  0, /* shift penalty */
469  2 /* branch cost */
470};
471
472static const
473struct processor_costs niagara7_costs = {
474  COSTS_N_INSNS (5), /* int load */
475  COSTS_N_INSNS (5), /* int signed load */
476  COSTS_N_INSNS (5), /* int zeroed load */
477  COSTS_N_INSNS (5), /* float load */
478  COSTS_N_INSNS (11), /* fmov, fneg, fabs */
479  COSTS_N_INSNS (11), /* fadd, fsub */
480  COSTS_N_INSNS (11), /* fcmp */
481  COSTS_N_INSNS (11), /* fmov, fmovr */
482  COSTS_N_INSNS (11), /* fmul */
483  COSTS_N_INSNS (24), /* fdivs */
484  COSTS_N_INSNS (37), /* fdivd */
485  COSTS_N_INSNS (24), /* fsqrts */
486  COSTS_N_INSNS (37), /* fsqrtd */
487  COSTS_N_INSNS (12), /* imul */
488  COSTS_N_INSNS (12), /* imulX */
489  0, /* imul bit factor */
490  COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
491  COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
492  COSTS_N_INSNS (1), /* movcc/movr */
493  0, /* shift penalty */
494  1 /* branch cost */
495};
496
497static const
498struct processor_costs m8_costs = {
499  COSTS_N_INSNS (3), /* int load */
500  COSTS_N_INSNS (3), /* int signed load */
501  COSTS_N_INSNS (3), /* int zeroed load */
502  COSTS_N_INSNS (3), /* float load */
503  COSTS_N_INSNS (9), /* fmov, fneg, fabs */
504  COSTS_N_INSNS (9), /* fadd, fsub */
505  COSTS_N_INSNS (9), /* fcmp */
506  COSTS_N_INSNS (9), /* fmov, fmovr */
507  COSTS_N_INSNS (9), /* fmul */
508  COSTS_N_INSNS (26), /* fdivs */
509  COSTS_N_INSNS (30), /* fdivd */
510  COSTS_N_INSNS (33), /* fsqrts */
511  COSTS_N_INSNS (41), /* fsqrtd */
512  COSTS_N_INSNS (12), /* imul */
513  COSTS_N_INSNS (10), /* imulX */
514  0, /* imul bit factor */
515  COSTS_N_INSNS (57), /* udiv/sdiv */
516  COSTS_N_INSNS (30), /* udivx/sdivx */
517  COSTS_N_INSNS (1), /* movcc/movr */
518  0, /* shift penalty */
519  1 /* branch cost */
520};
521
522static const struct processor_costs *sparc_costs = &cypress_costs;
523
524#ifdef HAVE_AS_RELAX_OPTION
525/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
526   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
527   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
528   somebody does not branch between the sethi and jmp.  */
529#define LEAF_SIBCALL_SLOT_RESERVED_P 1
530#else
531#define LEAF_SIBCALL_SLOT_RESERVED_P \
532  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
533#endif
534
535/* Vector to say how input registers are mapped to output registers.
536   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
537   eliminate it.  You must use -fomit-frame-pointer to get that.  */
538char leaf_reg_remap[] =
539{ 0, 1, 2, 3, 4, 5, 6, 7,
540  -1, -1, -1, -1, -1, -1, 14, -1,
541  -1, -1, -1, -1, -1, -1, -1, -1,
542  8, 9, 10, 11, 12, 13, -1, 15,
543
544  32, 33, 34, 35, 36, 37, 38, 39,
545  40, 41, 42, 43, 44, 45, 46, 47,
546  48, 49, 50, 51, 52, 53, 54, 55,
547  56, 57, 58, 59, 60, 61, 62, 63,
548  64, 65, 66, 67, 68, 69, 70, 71,
549  72, 73, 74, 75, 76, 77, 78, 79,
550  80, 81, 82, 83, 84, 85, 86, 87,
551  88, 89, 90, 91, 92, 93, 94, 95,
552  96, 97, 98, 99, 100, 101, 102};
553
554/* Vector, indexed by hard register number, which contains 1
555   for a register that is allowable in a candidate for leaf
556   function treatment.  */
557char sparc_leaf_regs[] =
558{ 1, 1, 1, 1, 1, 1, 1, 1,
559  0, 0, 0, 0, 0, 0, 1, 0,
560  0, 0, 0, 0, 0, 0, 0, 0,
561  1, 1, 1, 1, 1, 1, 0, 1,
562  1, 1, 1, 1, 1, 1, 1, 1,
563  1, 1, 1, 1, 1, 1, 1, 1,
564  1, 1, 1, 1, 1, 1, 1, 1,
565  1, 1, 1, 1, 1, 1, 1, 1,
566  1, 1, 1, 1, 1, 1, 1, 1,
567  1, 1, 1, 1, 1, 1, 1, 1,
568  1, 1, 1, 1, 1, 1, 1, 1,
569  1, 1, 1, 1, 1, 1, 1, 1,
570  1, 1, 1, 1, 1, 1, 1};
571
572struct GTY(()) machine_function
573{
574  /* Size of the frame of the function.  */
575  HOST_WIDE_INT frame_size;
576
577  /* Size of the frame of the function minus the register window save area
578     and the outgoing argument area.  */
579  HOST_WIDE_INT apparent_frame_size;
580
581  /* Register we pretend the frame pointer is allocated to.  Normally, this
582     is %fp, but if we are in a leaf procedure, this is (%sp + offset).  We
583     record "offset" separately as it may be too big for (reg + disp).  */
584  rtx frame_base_reg;
585  HOST_WIDE_INT frame_base_offset;
586
587  /* Number of global or FP registers to be saved (as 4-byte quantities).  */
588  int n_global_fp_regs;
589
590  /* True if the current function is leaf and uses only leaf regs,
591     so that the SPARC leaf function optimization can be applied.
592     Private version of crtl->uses_only_leaf_regs, see
593     sparc_expand_prologue for the rationale.  */
594  int leaf_function_p;
595
596  /* True if the prologue saves local or in registers.  */
597  bool save_local_in_regs_p;
598
599  /* True if the data calculated by sparc_expand_prologue are valid.  */
600  bool prologue_data_valid_p;
601};
602
603#define sparc_frame_size		cfun->machine->frame_size
604#define sparc_apparent_frame_size	cfun->machine->apparent_frame_size
605#define sparc_frame_base_reg		cfun->machine->frame_base_reg
606#define sparc_frame_base_offset		cfun->machine->frame_base_offset
607#define sparc_n_global_fp_regs		cfun->machine->n_global_fp_regs
608#define sparc_leaf_function_p		cfun->machine->leaf_function_p
609#define sparc_save_local_in_regs_p	cfun->machine->save_local_in_regs_p
610#define sparc_prologue_data_valid_p	cfun->machine->prologue_data_valid_p
611
612/* 1 if the next opcode is to be specially indented.  */
613int sparc_indent_opcode = 0;
614
615static void sparc_option_override (void);
616static void sparc_init_modes (void);
617static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
618				const_tree, bool, bool, int *, int *);
619
620static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
621static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
622static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
623
624static void sparc_emit_set_const32 (rtx, rtx);
625static void sparc_emit_set_const64 (rtx, rtx);
626static void sparc_output_addr_vec (rtx);
627static void sparc_output_addr_diff_vec (rtx);
628static void sparc_output_deferred_case_vectors (void);
629static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
630static bool sparc_legitimate_constant_p (machine_mode, rtx);
631static rtx sparc_builtin_saveregs (void);
632static int epilogue_renumber (rtx *, int);
633static bool sparc_assemble_integer (rtx, unsigned int, int);
634static int set_extends (rtx_insn *);
635static void sparc_asm_function_prologue (FILE *);
636static void sparc_asm_function_epilogue (FILE *);
637#ifdef TARGET_SOLARIS
638static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
639						 tree) ATTRIBUTE_UNUSED;
640#endif
641static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
642static int sparc_issue_rate (void);
643static void sparc_sched_init (FILE *, int, int);
644static int sparc_use_sched_lookahead (void);
645
646static void emit_soft_tfmode_libcall (const char *, int, rtx *);
647static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
648static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
649static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
650static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
651
652static bool sparc_function_ok_for_sibcall (tree, tree);
653static void sparc_init_libfuncs (void);
654static void sparc_init_builtins (void);
655static void sparc_fpu_init_builtins (void);
656static void sparc_vis_init_builtins (void);
657static tree sparc_builtin_decl (unsigned, bool);
658static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
659static tree sparc_fold_builtin (tree, int, tree *, bool);
660static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
661				   HOST_WIDE_INT, tree);
662static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
663				       HOST_WIDE_INT, const_tree);
664static struct machine_function * sparc_init_machine_status (void);
665static bool sparc_cannot_force_const_mem (machine_mode, rtx);
666static rtx sparc_tls_get_addr (void);
667static rtx sparc_tls_got (void);
668static int sparc_register_move_cost (machine_mode,
669				     reg_class_t, reg_class_t);
670static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
671static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
672						      int *, const_tree, int);
673static bool sparc_strict_argument_naming (cumulative_args_t);
674static void sparc_va_start (tree, rtx);
675static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
676static bool sparc_vector_mode_supported_p (machine_mode);
677static bool sparc_tls_referenced_p (rtx);
678static rtx sparc_legitimize_tls_address (rtx);
679static rtx sparc_legitimize_pic_address (rtx, rtx);
680static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
681static rtx sparc_delegitimize_address (rtx);
682static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
683static bool sparc_pass_by_reference (cumulative_args_t,
684				     const function_arg_info &);
685static void sparc_function_arg_advance (cumulative_args_t,
686					const function_arg_info &);
687static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
688static rtx sparc_function_incoming_arg (cumulative_args_t,
689					const function_arg_info &);
690static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
691static unsigned int sparc_function_arg_boundary (machine_mode,
692						 const_tree);
693static int sparc_arg_partial_bytes (cumulative_args_t,
694				    const function_arg_info &);
695static bool sparc_return_in_memory (const_tree, const_tree);
696static rtx sparc_struct_value_rtx (tree, int);
697static rtx sparc_function_value (const_tree, const_tree, bool);
698static rtx sparc_libcall_value (machine_mode, const_rtx);
699static bool sparc_function_value_regno_p (const unsigned int);
700static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
701static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
702static void sparc_file_end (void);
703static bool sparc_frame_pointer_required (void);
704static bool sparc_can_eliminate (const int, const int);
705static void sparc_conditional_register_usage (void);
706static bool sparc_use_pseudo_pic_reg (void);
707static void sparc_init_pic_reg (void);
708#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
709static const char *sparc_mangle_type (const_tree);
710#endif
711static void sparc_trampoline_init (rtx, tree, rtx);
712static machine_mode sparc_preferred_simd_mode (scalar_mode);
713static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
714static bool sparc_lra_p (void);
715static bool sparc_print_operand_punct_valid_p (unsigned char);
716static void sparc_print_operand (FILE *, rtx, int);
717static void sparc_print_operand_address (FILE *, machine_mode, rtx);
718static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
719					   machine_mode,
720					   secondary_reload_info *);
721static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
722					   reg_class_t);
723static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
724static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
725static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
726static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
727static unsigned int sparc_min_arithmetic_precision (void);
728static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
729static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
730static bool sparc_modes_tieable_p (machine_mode, machine_mode);
731static bool sparc_can_change_mode_class (machine_mode, machine_mode,
732					 reg_class_t);
733static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
734static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
735					    const vec_perm_indices &);
736static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
737
738#ifdef SUBTARGET_ATTRIBUTE_TABLE
739/* Table of valid machine attributes.  */
740static const struct attribute_spec sparc_attribute_table[] =
741{
742  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
743       do_diagnostic, handler, exclude } */
744  SUBTARGET_ATTRIBUTE_TABLE,
745  { NULL,        0, 0, false, false, false, false, NULL, NULL }
746};
747#endif
748
749char sparc_hard_reg_printed[8];
750
751/* Initialize the GCC target structure.  */
752
753/* The default is to use .half rather than .short for aligned HI objects.  */
754#undef TARGET_ASM_ALIGNED_HI_OP
755#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
756
757#undef TARGET_ASM_UNALIGNED_HI_OP
758#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
759#undef TARGET_ASM_UNALIGNED_SI_OP
760#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
761#undef TARGET_ASM_UNALIGNED_DI_OP
762#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
763
764/* The target hook has to handle DI-mode values.  */
765#undef TARGET_ASM_INTEGER
766#define TARGET_ASM_INTEGER sparc_assemble_integer
767
768#undef TARGET_ASM_FUNCTION_PROLOGUE
769#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
770#undef TARGET_ASM_FUNCTION_EPILOGUE
771#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
772
773#undef TARGET_SCHED_ADJUST_COST
774#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
775#undef TARGET_SCHED_ISSUE_RATE
776#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
777#undef TARGET_SCHED_INIT
778#define TARGET_SCHED_INIT sparc_sched_init
779#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
780#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
781
782#undef TARGET_FUNCTION_OK_FOR_SIBCALL
783#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
784
785#undef TARGET_INIT_LIBFUNCS
786#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
787
788#undef TARGET_LEGITIMIZE_ADDRESS
789#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
790#undef TARGET_DELEGITIMIZE_ADDRESS
791#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
792#undef TARGET_MODE_DEPENDENT_ADDRESS_P
793#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
794
795#undef TARGET_INIT_BUILTINS
796#define TARGET_INIT_BUILTINS sparc_init_builtins
797#undef TARGET_BUILTIN_DECL
798#define TARGET_BUILTIN_DECL sparc_builtin_decl
799#undef TARGET_EXPAND_BUILTIN
800#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
801#undef TARGET_FOLD_BUILTIN
802#define TARGET_FOLD_BUILTIN sparc_fold_builtin
803
804#if TARGET_TLS
805#undef TARGET_HAVE_TLS
806#define TARGET_HAVE_TLS true
807#endif
808
809#undef TARGET_CANNOT_FORCE_CONST_MEM
810#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
811
812#undef TARGET_ASM_OUTPUT_MI_THUNK
813#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
814#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
815#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
816
817#undef TARGET_RTX_COSTS
818#define TARGET_RTX_COSTS sparc_rtx_costs
819#undef TARGET_ADDRESS_COST
820#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
821#undef TARGET_REGISTER_MOVE_COST
822#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
823
824#undef TARGET_PROMOTE_FUNCTION_MODE
825#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
826#undef TARGET_STRICT_ARGUMENT_NAMING
827#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
828
829#undef TARGET_MUST_PASS_IN_STACK
830#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
831#undef TARGET_PASS_BY_REFERENCE
832#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
833#undef TARGET_ARG_PARTIAL_BYTES
834#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
835#undef TARGET_FUNCTION_ARG_ADVANCE
836#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
837#undef TARGET_FUNCTION_ARG
838#define TARGET_FUNCTION_ARG sparc_function_arg
839#undef TARGET_FUNCTION_INCOMING_ARG
840#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
841#undef TARGET_FUNCTION_ARG_PADDING
842#define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
843#undef TARGET_FUNCTION_ARG_BOUNDARY
844#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
845
846#undef TARGET_RETURN_IN_MEMORY
847#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
848#undef TARGET_STRUCT_VALUE_RTX
849#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
850#undef TARGET_FUNCTION_VALUE
851#define TARGET_FUNCTION_VALUE sparc_function_value
852#undef TARGET_LIBCALL_VALUE
853#define TARGET_LIBCALL_VALUE sparc_libcall_value
854#undef TARGET_FUNCTION_VALUE_REGNO_P
855#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
856
857#undef TARGET_EXPAND_BUILTIN_SAVEREGS
858#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
859
860#undef TARGET_ASAN_SHADOW_OFFSET
861#define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
862
863#undef TARGET_EXPAND_BUILTIN_VA_START
864#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
865#undef TARGET_GIMPLIFY_VA_ARG_EXPR
866#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
867
868#undef TARGET_VECTOR_MODE_SUPPORTED_P
869#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
870
871#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
872#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
873
874#ifdef SUBTARGET_INSERT_ATTRIBUTES
875#undef TARGET_INSERT_ATTRIBUTES
876#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
877#endif
878
879#ifdef SUBTARGET_ATTRIBUTE_TABLE
880#undef TARGET_ATTRIBUTE_TABLE
881#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
882#endif
883
884#undef TARGET_OPTION_OVERRIDE
885#define TARGET_OPTION_OVERRIDE sparc_option_override
886
887#ifdef TARGET_THREAD_SSP_OFFSET
888#undef TARGET_STACK_PROTECT_GUARD
889#define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
890#endif
891
892#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
893#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
894#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
895#endif
896
897#undef TARGET_ASM_FILE_END
898#define TARGET_ASM_FILE_END sparc_file_end
899
900#undef TARGET_FRAME_POINTER_REQUIRED
901#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
902
903#undef TARGET_CAN_ELIMINATE
904#define TARGET_CAN_ELIMINATE sparc_can_eliminate
905
906#undef  TARGET_PREFERRED_RELOAD_CLASS
907#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
908
909#undef TARGET_SECONDARY_RELOAD
910#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
911#undef TARGET_SECONDARY_MEMORY_NEEDED
912#define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
913#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
914#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
915
916#undef TARGET_CONDITIONAL_REGISTER_USAGE
917#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
918
919#undef TARGET_INIT_PIC_REG
920#define TARGET_INIT_PIC_REG sparc_init_pic_reg
921
922#undef TARGET_USE_PSEUDO_PIC_REG
923#define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
924
925#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
926#undef TARGET_MANGLE_TYPE
927#define TARGET_MANGLE_TYPE sparc_mangle_type
928#endif
929
930#undef TARGET_LRA_P
931#define TARGET_LRA_P sparc_lra_p
932
933#undef TARGET_LEGITIMATE_ADDRESS_P
934#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
935
936#undef TARGET_LEGITIMATE_CONSTANT_P
937#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
938
939#undef TARGET_TRAMPOLINE_INIT
940#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
941
942#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
943#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
944#undef TARGET_PRINT_OPERAND
945#define TARGET_PRINT_OPERAND sparc_print_operand
946#undef TARGET_PRINT_OPERAND_ADDRESS
947#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
948
949/* The value stored by LDSTUB.  */
950#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
951#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
952
953#undef TARGET_CSTORE_MODE
954#define TARGET_CSTORE_MODE sparc_cstore_mode
955
956#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
957#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
958
959#undef TARGET_FIXED_CONDITION_CODE_REGS
960#define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
961
962#undef TARGET_MIN_ARITHMETIC_PRECISION
963#define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
964
965#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
966#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
967
968#undef TARGET_HARD_REGNO_NREGS
969#define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
970#undef TARGET_HARD_REGNO_MODE_OK
971#define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
972
973#undef TARGET_MODES_TIEABLE_P
974#define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
975
976#undef TARGET_CAN_CHANGE_MODE_CLASS
977#define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
978
979#undef TARGET_CONSTANT_ALIGNMENT
980#define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
981
982#undef TARGET_VECTORIZE_VEC_PERM_CONST
983#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
984
985#undef TARGET_CAN_FOLLOW_JUMP
986#define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
987
988struct gcc_target targetm = TARGET_INITIALIZER;
989
990/* Return the memory reference contained in X if any, zero otherwise.  */
991
992static rtx
993mem_ref (rtx x)
994{
995  if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
996    x = XEXP (x, 0);
997
998  if (MEM_P (x))
999    return x;
1000
1001  return NULL_RTX;
1002}
1003
1004/* True if any of INSN's source register(s) is REG.  */
1005
1006static bool
1007insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
1008{
1009  extract_insn (insn);
1010  return ((REG_P (recog_data.operand[1])
1011	   && REGNO (recog_data.operand[1]) == reg)
1012	  || (recog_data.n_operands == 3
1013	      && REG_P (recog_data.operand[2])
1014	      && REGNO (recog_data.operand[2]) == reg));
1015}
1016
1017/* True if INSN is a floating-point division or square-root.  */
1018
1019static bool
1020div_sqrt_insn_p (rtx_insn *insn)
1021{
1022  if (GET_CODE (PATTERN (insn)) != SET)
1023    return false;
1024
1025  switch (get_attr_type (insn))
1026    {
1027    case TYPE_FPDIVS:
1028    case TYPE_FPSQRTS:
1029    case TYPE_FPDIVD:
1030    case TYPE_FPSQRTD:
1031      return true;
1032    default:
1033      return false;
1034    }
1035}
1036
1037/* True if INSN is a floating-point instruction.  */
1038
1039static bool
1040fpop_insn_p (rtx_insn *insn)
1041{
1042  if (GET_CODE (PATTERN (insn)) != SET)
1043    return false;
1044
1045  switch (get_attr_type (insn))
1046    {
1047    case TYPE_FPMOVE:
1048    case TYPE_FPCMOVE:
1049    case TYPE_FP:
1050    case TYPE_FPCMP:
1051    case TYPE_FPMUL:
1052    case TYPE_FPDIVS:
1053    case TYPE_FPSQRTS:
1054    case TYPE_FPDIVD:
1055    case TYPE_FPSQRTD:
1056      return true;
1057    default:
1058      return false;
1059    }
1060}
1061
1062/* True if INSN is an atomic instruction.  */
1063
1064static bool
1065atomic_insn_for_leon3_p (rtx_insn *insn)
1066{
1067  switch (INSN_CODE (insn))
1068    {
1069    case CODE_FOR_swapsi:
1070    case CODE_FOR_ldstub:
1071    case CODE_FOR_atomic_compare_and_swap_leon3_1:
1072      return true;
1073    default:
1074      return false;
1075    }
1076}
1077
1078/* True if INSN is a store instruction.  */
1079
1080static bool
1081store_insn_p (rtx_insn *insn)
1082{
1083  if (GET_CODE (PATTERN (insn)) != SET)
1084    return false;
1085
1086  switch (get_attr_type (insn))
1087    {
1088    case TYPE_STORE:
1089    case TYPE_FPSTORE:
1090      return true;
1091    default:
1092      return false;
1093    }
1094}
1095
1096/* True if INSN is a load instruction.  */
1097
1098static bool
1099load_insn_p (rtx_insn *insn)
1100{
1101  if (GET_CODE (PATTERN (insn)) != SET)
1102    return false;
1103
1104  switch (get_attr_type (insn))
1105    {
1106    case TYPE_LOAD:
1107    case TYPE_SLOAD:
1108    case TYPE_FPLOAD:
1109      return true;
1110    default:
1111      return false;
1112    }
1113}
1114
1115/* We use a machine specific pass to enable workarounds for errata.
1116
1117   We need to have the (essentially) final form of the insn stream in order
1118   to properly detect the various hazards.  Therefore, this machine specific
1119   pass runs as late as possible.  */
1120
1121/* True if INSN is a md pattern or asm statement.  */
1122#define USEFUL_INSN_P(INSN)						\
1123  (NONDEBUG_INSN_P (INSN)						\
1124   && GET_CODE (PATTERN (INSN)) != USE					\
1125   && GET_CODE (PATTERN (INSN)) != CLOBBER)
1126
1127rtx_insn *
1128next_active_non_empty_insn (rtx_insn *insn)
1129{
1130  insn = next_active_insn (insn);
1131
1132  while (insn
1133	 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
1134	     || GET_CODE (PATTERN (insn)) == ASM_INPUT
1135	     || (USEFUL_INSN_P (insn)
1136		 && (asm_noperands (PATTERN (insn)) >= 0)
1137		 && !strcmp (decode_asm_operands (PATTERN (insn),
1138						  NULL, NULL, NULL,
1139						  NULL, NULL), ""))))
1140    insn = next_active_insn (insn);
1141
1142  return insn;
1143}
1144
1145static unsigned int
1146sparc_do_work_around_errata (void)
1147{
1148  rtx_insn *insn, *next;
1149  bool find_first_useful = true;
1150
1151  /* Force all instructions to be split into their final form.  */
1152  split_all_insns_noflow ();
1153
1154  /* Now look for specific patterns in the insn stream.  */
1155  for (insn = get_insns (); insn; insn = next)
1156    {
1157      bool insert_nop = false;
1158      rtx set;
1159      rtx_insn *jump;
1160      rtx_sequence *seq;
1161
1162      /* Look into the instruction in a delay slot.  */
1163      if (NONJUMP_INSN_P (insn)
1164	  && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1165	{
1166	  jump = seq->insn (0);
1167	  insn = seq->insn (1);
1168	}
1169      else if (JUMP_P (insn))
1170	jump = insn;
1171      else
1172	jump = NULL;
1173
1174      /* Do not begin function with atomic instruction.  */
1175      if (sparc_fix_ut700
1176	  && find_first_useful
1177	  && USEFUL_INSN_P (insn))
1178	{
1179	  find_first_useful = false;
1180	  if (atomic_insn_for_leon3_p (insn))
1181	    emit_insn_before (gen_nop (), insn);
1182	}
1183
1184      /* Place a NOP at the branch target of an integer branch if it is a
1185	 floating-point operation or a floating-point branch.  */
1186      if (sparc_fix_gr712rc
1187	  && jump
1188	  && jump_to_label_p (jump)
1189	  && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1190	{
1191	  rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1192	  if (target
1193	      && (fpop_insn_p (target)
1194		  || (JUMP_P (target)
1195		      && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1196	    emit_insn_before (gen_nop (), target);
1197	}
1198
1199      /* Insert a NOP between load instruction and atomic instruction.  Insert
1200	 a NOP at branch target if there is a load in delay slot and an atomic
1201	 instruction at branch target.  */
1202      if (sparc_fix_ut700
1203	  && NONJUMP_INSN_P (insn)
1204	  && load_insn_p (insn))
1205	{
1206	  if (jump && jump_to_label_p (jump))
1207	    {
1208	      rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1209	      if (target && atomic_insn_for_leon3_p (target))
1210		emit_insn_before (gen_nop (), target);
1211	    }
1212
1213	  next = next_active_non_empty_insn (insn);
1214	  if (!next)
1215	    break;
1216
1217	  if (atomic_insn_for_leon3_p (next))
1218	    insert_nop = true;
1219	}
1220
1221      /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1222	 ends with another fdiv or fsqrt instruction with no dependencies on
1223	 the former, along with an appropriate pattern in between.  */
1224      if (sparc_fix_lost_divsqrt
1225	  && NONJUMP_INSN_P (insn)
1226	  && div_sqrt_insn_p (insn))
1227	{
1228	  int i;
1229	  int fp_found = 0;
1230	  rtx_insn *after;
1231
1232	  const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1233
1234	  next = next_active_insn (insn);
1235	  if (!next)
1236	    break;
1237
1238	  for (after = next, i = 0; i < 4; i++)
1239	    {
1240	      /* Count floating-point operations.  */
1241	      if (i != 3 && fpop_insn_p (after))
1242		{
1243		  /* If the insn uses the destination register of
1244		     the div/sqrt, then it cannot be problematic.  */
1245		  if (insn_uses_reg_p (after, dest_reg))
1246		    break;
1247		  fp_found++;
1248		}
1249
1250	      /* Count floating-point loads.  */
1251	      if (i != 3
1252		  && (set = single_set (after)) != NULL_RTX
1253		  && REG_P (SET_DEST (set))
1254		  && REGNO (SET_DEST (set)) > 31)
1255		{
1256		  /* If the insn uses the destination register of
1257		     the div/sqrt, then it cannot be problematic.  */
1258		  if (REGNO (SET_DEST (set)) == dest_reg)
1259		    break;
1260		  fp_found++;
1261		}
1262
1263	      /* Check if this is a problematic sequence.  */
1264	      if (i > 1
1265		  && fp_found >= 2
1266		  && div_sqrt_insn_p (after))
1267		{
1268		  /* If this is the short version of the problematic
1269		     sequence we add two NOPs in a row to also prevent
1270		     the long version.  */
1271		  if (i == 2)
1272		    emit_insn_before (gen_nop (), next);
1273		  insert_nop = true;
1274		  break;
1275		}
1276
1277	      /* No need to scan past a second div/sqrt.  */
1278	      if (div_sqrt_insn_p (after))
1279		break;
1280
1281	      /* Insert NOP before branch.  */
1282	      if (i < 3
1283		  && (!NONJUMP_INSN_P (after)
1284		      || GET_CODE (PATTERN (after)) == SEQUENCE))
1285		{
1286		  insert_nop = true;
1287		  break;
1288		}
1289
1290	      after = next_active_insn (after);
1291	      if (!after)
1292		break;
1293	    }
1294	}
1295
1296      /* Look for either of these two sequences:
1297
1298	 Sequence A:
1299	 1. store of word size or less (e.g. st / stb / sth / stf)
1300	 2. any single instruction that is not a load or store
1301	 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1302
1303	 Sequence B:
1304	 1. store of double word size (e.g. std / stdf)
1305	 2. any store instruction (e.g. st / stb / sth / stf / std / stdf)  */
1306      if (sparc_fix_b2bst
1307	  && NONJUMP_INSN_P (insn)
1308	  && (set = single_set (insn)) != NULL_RTX
1309	  && store_insn_p (insn))
1310	{
1311	  /* Sequence B begins with a double-word store.  */
1312	  bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1313	  rtx_insn *after;
1314	  int i;
1315
1316	  next = next_active_non_empty_insn (insn);
1317	  if (!next)
1318	    break;
1319
1320	  for (after = next, i = 0; i < 2; i++)
1321	    {
1322	      /* If the insn is a branch, then it cannot be problematic.  */
1323	      if (!NONJUMP_INSN_P (after)
1324		  || GET_CODE (PATTERN (after)) == SEQUENCE)
1325		break;
1326
1327	      /* Sequence B is only two instructions long.  */
1328	      if (seq_b)
1329		{
1330		  /* Add NOP if followed by a store.  */
1331		  if (store_insn_p (after))
1332		    insert_nop = true;
1333
1334		  /* Otherwise it is ok.  */
1335		  break;
1336		}
1337
1338	      /* If the second instruction is a load or a store,
1339		 then the sequence cannot be problematic.  */
1340	      if (i == 0)
1341		{
1342		  if ((set = single_set (after)) != NULL_RTX
1343		      && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1344		    break;
1345
1346		  after = next_active_non_empty_insn (after);
1347		  if (!after)
1348		    break;
1349		}
1350
1351	      /* Add NOP if third instruction is a store.  */
1352	      if (i == 1
1353		  && store_insn_p (after))
1354		insert_nop = true;
1355	    }
1356	}
1357
1358      /* Look for a single-word load into an odd-numbered FP register.  */
1359      else if (sparc_fix_at697f
1360	       && NONJUMP_INSN_P (insn)
1361	       && (set = single_set (insn)) != NULL_RTX
1362	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1363	       && mem_ref (SET_SRC (set))
1364	       && REG_P (SET_DEST (set))
1365	       && REGNO (SET_DEST (set)) > 31
1366	       && REGNO (SET_DEST (set)) % 2 != 0)
1367	{
1368	  /* The wrong dependency is on the enclosing double register.  */
1369	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
1370	  unsigned int src1, src2, dest;
1371	  int code;
1372
1373	  next = next_active_insn (insn);
1374	  if (!next)
1375	    break;
1376	  /* If the insn is a branch, then it cannot be problematic.  */
1377	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1378	    continue;
1379
1380	  extract_insn (next);
1381	  code = INSN_CODE (next);
1382
1383	  switch (code)
1384	    {
1385	    case CODE_FOR_adddf3:
1386	    case CODE_FOR_subdf3:
1387	    case CODE_FOR_muldf3:
1388	    case CODE_FOR_divdf3:
1389	      dest = REGNO (recog_data.operand[0]);
1390	      src1 = REGNO (recog_data.operand[1]);
1391	      src2 = REGNO (recog_data.operand[2]);
1392	      if (src1 != src2)
1393		{
1394		  /* Case [1-4]:
1395				 ld [address], %fx+1
1396				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
1397		  if ((src1 == x || src2 == x)
1398		      && (dest == src1 || dest == src2))
1399		    insert_nop = true;
1400		}
1401	      else
1402		{
1403		  /* Case 5:
1404			     ld [address], %fx+1
1405			     FPOPd %fx, %fx, %fx  */
1406		  if (src1 == x
1407		      && dest == src1
1408		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1409		    insert_nop = true;
1410		}
1411	      break;
1412
1413	    case CODE_FOR_sqrtdf2:
1414	      dest = REGNO (recog_data.operand[0]);
1415	      src1 = REGNO (recog_data.operand[1]);
1416	      /* Case 6:
1417			 ld [address], %fx+1
1418			 fsqrtd %fx, %fx  */
1419	      if (src1 == x && dest == src1)
1420		insert_nop = true;
1421	      break;
1422
1423	    default:
1424	      break;
1425	    }
1426	}
1427
1428      /* Look for a single-word load into an integer register.  */
1429      else if (sparc_fix_ut699
1430	       && NONJUMP_INSN_P (insn)
1431	       && (set = single_set (insn)) != NULL_RTX
1432	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1433	       && (mem_ref (SET_SRC (set)) != NULL_RTX
1434		   || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1435	       && REG_P (SET_DEST (set))
1436	       && REGNO (SET_DEST (set)) < 32)
1437	{
1438	  /* There is no problem if the second memory access has a data
1439	     dependency on the first single-cycle load.  */
1440	  rtx x = SET_DEST (set);
1441
1442	  next = next_active_insn (insn);
1443	  if (!next)
1444	    break;
1445	  /* If the insn is a branch, then it cannot be problematic.  */
1446	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1447	    continue;
1448
1449	  /* Look for a second memory access to/from an integer register.  */
1450	  if ((set = single_set (next)) != NULL_RTX)
1451	    {
1452	      rtx src = SET_SRC (set);
1453	      rtx dest = SET_DEST (set);
1454	      rtx mem;
1455
1456	      /* LDD is affected.  */
1457	      if ((mem = mem_ref (src)) != NULL_RTX
1458		  && REG_P (dest)
1459		  && REGNO (dest) < 32
1460		  && !reg_mentioned_p (x, XEXP (mem, 0)))
1461		insert_nop = true;
1462
1463	      /* STD is *not* affected.  */
1464	      else if (MEM_P (dest)
1465		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1466		       && (src == CONST0_RTX (GET_MODE (dest))
1467			   || (REG_P (src)
1468			       && REGNO (src) < 32
1469			       && REGNO (src) != REGNO (x)))
1470		       && !reg_mentioned_p (x, XEXP (dest, 0)))
1471		insert_nop = true;
1472
1473	      /* GOT accesses uses LD.  */
1474	      else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1475		       && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1476		insert_nop = true;
1477	    }
1478	}
1479
1480      /* Look for a single-word load/operation into an FP register.  */
1481      else if (sparc_fix_ut699
1482	       && NONJUMP_INSN_P (insn)
1483	       && (set = single_set (insn)) != NULL_RTX
1484	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1485	       && REG_P (SET_DEST (set))
1486	       && REGNO (SET_DEST (set)) > 31)
1487	{
1488	  /* Number of instructions in the problematic window.  */
1489	  const int n_insns = 4;
1490	  /* The problematic combination is with the sibling FP register.  */
1491	  const unsigned int x = REGNO (SET_DEST (set));
1492	  const unsigned int y = x ^ 1;
1493	  rtx_insn *after;
1494	  int i;
1495
1496	  next = next_active_insn (insn);
1497	  if (!next)
1498	    break;
1499	  /* If the insn is a branch, then it cannot be problematic.  */
1500	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1501	    continue;
1502
1503	  /* Look for a second load/operation into the sibling FP register.  */
1504	  if (!((set = single_set (next)) != NULL_RTX
1505		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1506		&& REG_P (SET_DEST (set))
1507		&& REGNO (SET_DEST (set)) == y))
1508	    continue;
1509
1510	  /* Look for a (possible) store from the FP register in the next N
1511	     instructions, but bail out if it is again modified or if there
1512	     is a store from the sibling FP register before this store.  */
1513	  for (after = next, i = 0; i < n_insns; i++)
1514	    {
1515	      bool branch_p;
1516
1517	      after = next_active_insn (after);
1518	      if (!after)
1519		break;
1520
1521	      /* This is a branch with an empty delay slot.  */
1522	      if (!NONJUMP_INSN_P (after))
1523		{
1524		  if (++i == n_insns)
1525		    break;
1526		  branch_p = true;
1527		  after = NULL;
1528		}
1529	      /* This is a branch with a filled delay slot.  */
1530	      else if (rtx_sequence *seq =
1531		         dyn_cast <rtx_sequence *> (PATTERN (after)))
1532		{
1533		  if (++i == n_insns)
1534		    break;
1535		  branch_p = true;
1536		  after = seq->insn (1);
1537		}
1538	      /* This is a regular instruction.  */
1539	      else
1540		branch_p = false;
1541
1542	      if (after && (set = single_set (after)) != NULL_RTX)
1543		{
1544		  const rtx src = SET_SRC (set);
1545		  const rtx dest = SET_DEST (set);
1546		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1547
1548		  /* If the FP register is again modified before the store,
1549		     then the store isn't affected.  */
1550		  if (REG_P (dest)
1551		      && (REGNO (dest) == x
1552			  || (REGNO (dest) == y && size == 8)))
1553		    break;
1554
1555		  if (MEM_P (dest) && REG_P (src))
1556		    {
1557		      /* If there is a store from the sibling FP register
1558			 before the store, then the store is not affected.  */
1559		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1560			break;
1561
1562		      /* Otherwise, the store is affected.  */
1563		      if (REGNO (src) == x && size == 4)
1564			{
1565			  insert_nop = true;
1566			  break;
1567			}
1568		    }
1569		}
1570
1571	      /* If we have a branch in the first M instructions, then we
1572		 cannot see the (M+2)th instruction so we play safe.  */
1573	      if (branch_p && i <= (n_insns - 2))
1574		{
1575		  insert_nop = true;
1576		  break;
1577		}
1578	    }
1579	}
1580
1581      else
1582	next = NEXT_INSN (insn);
1583
1584      if (insert_nop)
1585	emit_insn_before (gen_nop (), next);
1586    }
1587
1588  return 0;
1589}
1590
1591namespace {
1592
1593const pass_data pass_data_work_around_errata =
1594{
1595  RTL_PASS, /* type */
1596  "errata", /* name */
1597  OPTGROUP_NONE, /* optinfo_flags */
1598  TV_MACH_DEP, /* tv_id */
1599  0, /* properties_required */
1600  0, /* properties_provided */
1601  0, /* properties_destroyed */
1602  0, /* todo_flags_start */
1603  0, /* todo_flags_finish */
1604};
1605
1606class pass_work_around_errata : public rtl_opt_pass
1607{
1608public:
1609  pass_work_around_errata(gcc::context *ctxt)
1610    : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1611  {}
1612
1613  /* opt_pass methods: */
1614  virtual bool gate (function *)
1615    {
1616      return sparc_fix_at697f
1617	     || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1618	     || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1619    }
1620
1621  virtual unsigned int execute (function *)
1622    {
1623      return sparc_do_work_around_errata ();
1624    }
1625
1626}; // class pass_work_around_errata
1627
1628} // anon namespace
1629
1630rtl_opt_pass *
1631make_pass_work_around_errata (gcc::context *ctxt)
1632{
1633  return new pass_work_around_errata (ctxt);
1634}
1635
1636/* Helpers for TARGET_DEBUG_OPTIONS.  */
1637static void
1638dump_target_flag_bits (const int flags)
1639{
1640  if (flags & MASK_64BIT)
1641    fprintf (stderr, "64BIT ");
1642  if (flags & MASK_APP_REGS)
1643    fprintf (stderr, "APP_REGS ");
1644  if (flags & MASK_FASTER_STRUCTS)
1645    fprintf (stderr, "FASTER_STRUCTS ");
1646  if (flags & MASK_FLAT)
1647    fprintf (stderr, "FLAT ");
1648  if (flags & MASK_FMAF)
1649    fprintf (stderr, "FMAF ");
1650  if (flags & MASK_FSMULD)
1651    fprintf (stderr, "FSMULD ");
1652  if (flags & MASK_FPU)
1653    fprintf (stderr, "FPU ");
1654  if (flags & MASK_HARD_QUAD)
1655    fprintf (stderr, "HARD_QUAD ");
1656  if (flags & MASK_POPC)
1657    fprintf (stderr, "POPC ");
1658  if (flags & MASK_PTR64)
1659    fprintf (stderr, "PTR64 ");
1660  if (flags & MASK_STACK_BIAS)
1661    fprintf (stderr, "STACK_BIAS ");
1662  if (flags & MASK_UNALIGNED_DOUBLES)
1663    fprintf (stderr, "UNALIGNED_DOUBLES ");
1664  if (flags & MASK_V8PLUS)
1665    fprintf (stderr, "V8PLUS ");
1666  if (flags & MASK_VIS)
1667    fprintf (stderr, "VIS ");
1668  if (flags & MASK_VIS2)
1669    fprintf (stderr, "VIS2 ");
1670  if (flags & MASK_VIS3)
1671    fprintf (stderr, "VIS3 ");
1672  if (flags & MASK_VIS4)
1673    fprintf (stderr, "VIS4 ");
1674  if (flags & MASK_VIS4B)
1675    fprintf (stderr, "VIS4B ");
1676  if (flags & MASK_CBCOND)
1677    fprintf (stderr, "CBCOND ");
1678  if (flags & MASK_DEPRECATED_V8_INSNS)
1679    fprintf (stderr, "DEPRECATED_V8_INSNS ");
1680  if (flags & MASK_LEON)
1681    fprintf (stderr, "LEON ");
1682  if (flags & MASK_LEON3)
1683    fprintf (stderr, "LEON3 ");
1684  if (flags & MASK_SPARCLET)
1685    fprintf (stderr, "SPARCLET ");
1686  if (flags & MASK_SPARCLITE)
1687    fprintf (stderr, "SPARCLITE ");
1688  if (flags & MASK_V8)
1689    fprintf (stderr, "V8 ");
1690  if (flags & MASK_V9)
1691    fprintf (stderr, "V9 ");
1692}
1693
1694static void
1695dump_target_flags (const char *prefix, const int flags)
1696{
1697  fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1698  dump_target_flag_bits (flags);
1699  fprintf(stderr, "]\n");
1700}
1701
1702/* Validate and override various options, and do some machine dependent
1703   initialization.  */
1704
1705static void
1706sparc_option_override (void)
1707{
1708  /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
1709  static struct cpu_default {
1710    const int cpu;
1711    const enum sparc_processor_type processor;
1712  } const cpu_default[] = {
1713    /* There must be one entry here for each TARGET_CPU value.  */
1714    { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1715    { TARGET_CPU_v8, PROCESSOR_V8 },
1716    { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1717    { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1718    { TARGET_CPU_leon, PROCESSOR_LEON },
1719    { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1720    { TARGET_CPU_leon5, PROCESSOR_LEON5 },
1721    { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1722    { TARGET_CPU_sparclite, PROCESSOR_F930 },
1723    { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1724    { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1725    { TARGET_CPU_v9, PROCESSOR_V9 },
1726    { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1727    { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1728    { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1729    { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1730    { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1731    { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1732    { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1733    { TARGET_CPU_m8, PROCESSOR_M8 },
1734    { -1, PROCESSOR_V7 }
1735  };
1736  const struct cpu_default *def;
1737  /* Table of values for -m{cpu,tune}=.  This must match the order of
1738     the enum processor_type in sparc-opts.h.  */
1739  static struct cpu_table {
1740    const char *const name;
1741    const int disable;
1742    const int enable;
1743  } const cpu_table[] = {
1744    { "v7",		MASK_ISA, 0 },
1745    { "cypress",	MASK_ISA, 0 },
1746    { "v8",		MASK_ISA, MASK_V8 },
1747    /* TI TMS390Z55 supersparc */
1748    { "supersparc",	MASK_ISA, MASK_V8 },
1749    { "hypersparc",	MASK_ISA, MASK_V8 },
1750    { "leon",		MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1751    { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3 },
1752    { "leon5",		MASK_ISA, MASK_V8|MASK_LEON3 },
1753    { "leon3v7",	MASK_ISA, MASK_LEON3 },
1754    { "sparclite",	MASK_ISA, MASK_SPARCLITE },
1755    /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
1756    { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1757    /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
1758    { "f934",		MASK_ISA, MASK_SPARCLITE },
1759    { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1760    { "sparclet",	MASK_ISA, MASK_SPARCLET },
1761    /* TEMIC sparclet */
1762    { "tsc701",		MASK_ISA, MASK_SPARCLET },
1763    { "v9",		MASK_ISA, MASK_V9 },
1764    /* UltraSPARC I, II, IIi */
1765    { "ultrasparc",	MASK_ISA,
1766    /* Although insns using %y are deprecated, it is a clear win.  */
1767      MASK_V9|MASK_DEPRECATED_V8_INSNS },
1768    /* UltraSPARC III */
1769    /* ??? Check if %y issue still holds true.  */
1770    { "ultrasparc3",	MASK_ISA,
1771      MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1772    /* UltraSPARC T1 */
1773    { "niagara",	MASK_ISA,
1774      MASK_V9|MASK_DEPRECATED_V8_INSNS },
1775    /* UltraSPARC T2 */
1776    { "niagara2",	MASK_ISA,
1777      MASK_V9|MASK_POPC|MASK_VIS2 },
1778    /* UltraSPARC T3 */
1779    { "niagara3",	MASK_ISA,
1780      MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1781    /* UltraSPARC T4 */
1782    { "niagara4",	MASK_ISA,
1783      MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1784    /* UltraSPARC M7 */
1785    { "niagara7",	MASK_ISA,
1786      MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1787    /* UltraSPARC M8 */
1788    { "m8",		MASK_ISA,
1789      MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1790  };
1791  const struct cpu_table *cpu;
1792  unsigned int i;
1793
1794  if (sparc_debug_string != NULL)
1795    {
1796      const char *q;
1797      char *p;
1798
1799      p = ASTRDUP (sparc_debug_string);
1800      while ((q = strtok (p, ",")) != NULL)
1801	{
1802	  bool invert;
1803	  int mask;
1804
1805	  p = NULL;
1806	  if (*q == '!')
1807	    {
1808	      invert = true;
1809	      q++;
1810	    }
1811	  else
1812	    invert = false;
1813
1814	  if (! strcmp (q, "all"))
1815	    mask = MASK_DEBUG_ALL;
1816	  else if (! strcmp (q, "options"))
1817	    mask = MASK_DEBUG_OPTIONS;
1818	  else
1819	    error ("unknown %<-mdebug-%s%> switch", q);
1820
1821	  if (invert)
1822	    sparc_debug &= ~mask;
1823	  else
1824	    sparc_debug |= mask;
1825	}
1826    }
1827
1828  /* Enable the FsMULd instruction by default if not explicitly specified by
1829     the user.  It may be later disabled by the CPU (explicitly or not).  */
1830  if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1831    target_flags |= MASK_FSMULD;
1832
1833  if (TARGET_DEBUG_OPTIONS)
1834    {
1835      dump_target_flags("Initial target_flags", target_flags);
1836      dump_target_flags("target_flags_explicit", target_flags_explicit);
1837    }
1838
1839#ifdef SUBTARGET_OVERRIDE_OPTIONS
1840  SUBTARGET_OVERRIDE_OPTIONS;
1841#endif
1842
1843#ifndef SPARC_BI_ARCH
1844  /* Check for unsupported architecture size.  */
1845  if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1846    error ("%s is not supported by this configuration",
1847	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
1848#endif
1849
1850  /* We force all 64bit archs to use 128 bit long double */
1851  if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1852    {
1853      error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1854      target_flags |= MASK_LONG_DOUBLE_128;
1855    }
1856
1857  /* Check that -fcall-saved-REG wasn't specified for out registers.  */
1858  for (i = 8; i < 16; i++)
1859    if (!call_used_regs [i])
1860      {
1861	error ("%<-fcall-saved-REG%> is not supported for out registers");
1862        call_used_regs [i] = 1;
1863      }
1864
1865  /* Set the default CPU if no -mcpu option was specified.  */
1866  if (!global_options_set.x_sparc_cpu_and_features)
1867    {
1868      for (def = &cpu_default[0]; def->cpu != -1; ++def)
1869	if (def->cpu == TARGET_CPU_DEFAULT)
1870	  break;
1871      gcc_assert (def->cpu != -1);
1872      sparc_cpu_and_features = def->processor;
1873    }
1874
1875  /* Set the default CPU if no -mtune option was specified.  */
1876  if (!global_options_set.x_sparc_cpu)
1877    sparc_cpu = sparc_cpu_and_features;
1878
1879  cpu = &cpu_table[(int) sparc_cpu_and_features];
1880
1881  if (TARGET_DEBUG_OPTIONS)
1882    {
1883      fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1884      dump_target_flags ("cpu->disable", cpu->disable);
1885      dump_target_flags ("cpu->enable", cpu->enable);
1886    }
1887
1888  target_flags &= ~cpu->disable;
1889  target_flags |= (cpu->enable
1890#ifndef HAVE_AS_FMAF_HPC_VIS3
1891		   & ~(MASK_FMAF | MASK_VIS3)
1892#endif
1893#ifndef HAVE_AS_SPARC4
1894		   & ~MASK_CBCOND
1895#endif
1896#ifndef HAVE_AS_SPARC5_VIS4
1897		   & ~(MASK_VIS4 | MASK_SUBXC)
1898#endif
1899#ifndef HAVE_AS_SPARC6
1900		   & ~(MASK_VIS4B)
1901#endif
1902#ifndef HAVE_AS_LEON
1903		   & ~(MASK_LEON | MASK_LEON3)
1904#endif
1905		   & ~(target_flags_explicit & MASK_FEATURES)
1906		   );
1907
1908  /* FsMULd is a V8 instruction.  */
1909  if (!TARGET_V8 && !TARGET_V9)
1910    target_flags &= ~MASK_FSMULD;
1911
1912  /* -mvis2 implies -mvis.  */
1913  if (TARGET_VIS2)
1914    target_flags |= MASK_VIS;
1915
1916  /* -mvis3 implies -mvis2 and -mvis.  */
1917  if (TARGET_VIS3)
1918    target_flags |= MASK_VIS2 | MASK_VIS;
1919
1920  /* -mvis4 implies -mvis3, -mvis2 and -mvis.  */
1921  if (TARGET_VIS4)
1922    target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1923
1924  /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1925  if (TARGET_VIS4B)
1926    target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1927
1928  /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1929     FPU is disabled.  */
1930  if (!TARGET_FPU)
1931    target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1932		      | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1933
1934  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1935     are available; -m64 also implies v9.  */
1936  if (TARGET_VIS || TARGET_ARCH64)
1937    {
1938      target_flags |= MASK_V9;
1939      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1940    }
1941
1942  /* -mvis also implies -mv8plus on 32-bit.  */
1943  if (TARGET_VIS && !TARGET_ARCH64)
1944    target_flags |= MASK_V8PLUS;
1945
1946  /* Use the deprecated v8 insns for sparc64 in 32-bit mode.  */
1947  if (TARGET_V9 && TARGET_ARCH32)
1948    target_flags |= MASK_DEPRECATED_V8_INSNS;
1949
1950  /* V8PLUS requires V9 and makes no sense in 64-bit mode.  */
1951  if (!TARGET_V9 || TARGET_ARCH64)
1952    target_flags &= ~MASK_V8PLUS;
1953
1954  /* Don't use stack biasing in 32-bit mode.  */
1955  if (TARGET_ARCH32)
1956    target_flags &= ~MASK_STACK_BIAS;
1957
1958  /* Use LRA instead of reload, unless otherwise instructed.  */
1959  if (!(target_flags_explicit & MASK_LRA))
1960    target_flags |= MASK_LRA;
1961
1962  /* Enable applicable errata workarounds for LEON3FT.  */
1963  if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1964    {
1965      sparc_fix_b2bst = 1;
1966      sparc_fix_lost_divsqrt = 1;
1967    }
1968
1969  /* Disable FsMULd for the UT699 since it doesn't work correctly.  */
1970  if (sparc_fix_ut699)
1971    target_flags &= ~MASK_FSMULD;
1972
1973#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1974  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1975    target_flags |= MASK_LONG_DOUBLE_128;
1976#endif
1977
1978  if (TARGET_DEBUG_OPTIONS)
1979    dump_target_flags ("Final target_flags", target_flags);
1980
1981  /* Set the code model if no -mcmodel option was specified.  */
1982  if (global_options_set.x_sparc_code_model)
1983    {
1984      if (TARGET_ARCH32)
1985	error ("%<-mcmodel=%> is not supported in 32-bit mode");
1986    }
1987  else
1988    {
1989      if (TARGET_ARCH32)
1990	sparc_code_model = CM_32;
1991      else
1992	sparc_code_model = SPARC_DEFAULT_CMODEL;
1993    }
1994
1995  /* Set the memory model if no -mmemory-model option was specified.  */
1996  if (!global_options_set.x_sparc_memory_model)
1997    {
1998      /* Choose the memory model for the operating system.  */
1999      enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
2000      if (os_default != SMM_DEFAULT)
2001	sparc_memory_model = os_default;
2002      /* Choose the most relaxed model for the processor.  */
2003      else if (TARGET_V9)
2004	sparc_memory_model = SMM_RMO;
2005      else if (TARGET_LEON3)
2006	sparc_memory_model = SMM_TSO;
2007      else if (TARGET_LEON)
2008	sparc_memory_model = SMM_SC;
2009      else if (TARGET_V8)
2010	sparc_memory_model = SMM_PSO;
2011      else
2012	sparc_memory_model = SMM_SC;
2013    }
2014
2015  /* Supply a default value for align_functions.  */
2016  if (flag_align_functions && !str_align_functions)
2017    {
2018      if (sparc_cpu == PROCESSOR_ULTRASPARC
2019	  || sparc_cpu == PROCESSOR_ULTRASPARC3
2020	  || sparc_cpu == PROCESSOR_NIAGARA
2021	  || sparc_cpu == PROCESSOR_NIAGARA2
2022	  || sparc_cpu == PROCESSOR_NIAGARA3
2023	  || sparc_cpu == PROCESSOR_NIAGARA4)
2024	str_align_functions = "32";
2025      else if (sparc_cpu == PROCESSOR_NIAGARA7
2026	       || sparc_cpu == PROCESSOR_M8)
2027	str_align_functions = "64";
2028    }
2029
2030  /* Validate PCC_STRUCT_RETURN.  */
2031  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
2032    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
2033
2034  /* Only use .uaxword when compiling for a 64-bit target.  */
2035  if (!TARGET_ARCH64)
2036    targetm.asm_out.unaligned_op.di = NULL;
2037
2038  /* Set the processor costs.  */
2039  switch (sparc_cpu)
2040    {
2041    case PROCESSOR_V7:
2042    case PROCESSOR_CYPRESS:
2043      sparc_costs = &cypress_costs;
2044      break;
2045    case PROCESSOR_V8:
2046    case PROCESSOR_SPARCLITE:
2047    case PROCESSOR_SUPERSPARC:
2048      sparc_costs = &supersparc_costs;
2049      break;
2050    case PROCESSOR_F930:
2051    case PROCESSOR_F934:
2052    case PROCESSOR_HYPERSPARC:
2053    case PROCESSOR_SPARCLITE86X:
2054      sparc_costs = &hypersparc_costs;
2055      break;
2056    case PROCESSOR_LEON:
2057      sparc_costs = &leon_costs;
2058      break;
2059    case PROCESSOR_LEON3:
2060    case PROCESSOR_LEON3V7:
2061      sparc_costs = &leon3_costs;
2062      break;
2063    case PROCESSOR_LEON5:
2064      sparc_costs = &leon5_costs;
2065      break;
2066    case PROCESSOR_SPARCLET:
2067    case PROCESSOR_TSC701:
2068      sparc_costs = &sparclet_costs;
2069      break;
2070    case PROCESSOR_V9:
2071    case PROCESSOR_ULTRASPARC:
2072      sparc_costs = &ultrasparc_costs;
2073      break;
2074    case PROCESSOR_ULTRASPARC3:
2075      sparc_costs = &ultrasparc3_costs;
2076      break;
2077    case PROCESSOR_NIAGARA:
2078      sparc_costs = &niagara_costs;
2079      break;
2080    case PROCESSOR_NIAGARA2:
2081      sparc_costs = &niagara2_costs;
2082      break;
2083    case PROCESSOR_NIAGARA3:
2084      sparc_costs = &niagara3_costs;
2085      break;
2086    case PROCESSOR_NIAGARA4:
2087      sparc_costs = &niagara4_costs;
2088      break;
2089    case PROCESSOR_NIAGARA7:
2090      sparc_costs = &niagara7_costs;
2091      break;
2092    case PROCESSOR_M8:
2093      sparc_costs = &m8_costs;
2094      break;
2095    case PROCESSOR_NATIVE:
2096      gcc_unreachable ();
2097    };
2098
2099  /* param_simultaneous_prefetches is the number of prefetches that
2100     can run at the same time.  More important, it is the threshold
2101     defining when additional prefetches will be dropped by the
2102     hardware.
2103
2104     The UltraSPARC-III features a documented prefetch queue with a
2105     size of 8.  Additional prefetches issued in the cpu are
2106     dropped.
2107
2108     Niagara processors are different.  In these processors prefetches
2109     are handled much like regular loads.  The L1 miss buffer is 32
2110     entries, but prefetches start getting affected when 30 entries
2111     become occupied.  That occupation could be a mix of regular loads
2112     and prefetches though.  And that buffer is shared by all threads.
2113     Once the threshold is reached, if the core is running a single
2114     thread the prefetch will retry.  If more than one thread is
2115     running, the prefetch will be dropped.
2116
2117     All this makes it very difficult to determine how many
2118     simultaneous prefetches can be issued simultaneously, even in a
2119     single-threaded program.  Experimental results show that setting
2120     this parameter to 32 works well when the number of threads is not
2121     high.  */
2122  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2123		       param_simultaneous_prefetches,
2124		       ((sparc_cpu == PROCESSOR_ULTRASPARC
2125			 || sparc_cpu == PROCESSOR_NIAGARA
2126			 || sparc_cpu == PROCESSOR_NIAGARA2
2127			 || sparc_cpu == PROCESSOR_NIAGARA3
2128			 || sparc_cpu == PROCESSOR_NIAGARA4)
2129			? 2
2130			: (sparc_cpu == PROCESSOR_ULTRASPARC3
2131			   ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2132				   || sparc_cpu == PROCESSOR_M8)
2133				  ? 32 : 3))));
2134
2135  /* param_l1_cache_line_size is the size of the L1 cache line, in
2136     bytes.
2137
2138     The Oracle SPARC Architecture (previously the UltraSPARC
2139     Architecture) specification states that when a PREFETCH[A]
2140     instruction is executed an implementation-specific amount of data
2141     is prefetched, and that it is at least 64 bytes long (aligned to
2142     at least 64 bytes).
2143
2144     However, this is not correct.  The M7 (and implementations prior
2145     to that) does not guarantee a 64B prefetch into a cache if the
2146     line size is smaller.  A single cache line is all that is ever
2147     prefetched.  So for the M7, where the L1D$ has 32B lines and the
2148     L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2149     L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2150     is a read_n prefetch, which is the only type which allocates to
2151     the L1.)  */
2152  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2153		       param_l1_cache_line_size,
2154		       (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2155
2156  /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2157     Hardvard level-1 caches) in kilobytes.  Both UltraSPARC and
2158     Niagara processors feature a L1D$ of 16KB.  */
2159  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2160		       param_l1_cache_size,
2161		       ((sparc_cpu == PROCESSOR_ULTRASPARC
2162			 || sparc_cpu == PROCESSOR_ULTRASPARC3
2163			 || sparc_cpu == PROCESSOR_NIAGARA
2164			 || sparc_cpu == PROCESSOR_NIAGARA2
2165			 || sparc_cpu == PROCESSOR_NIAGARA3
2166			 || sparc_cpu == PROCESSOR_NIAGARA4
2167			 || sparc_cpu == PROCESSOR_NIAGARA7
2168			 || sparc_cpu == PROCESSOR_M8)
2169			? 16 : 64));
2170
2171  /* param_l2_cache_size is the size fo the L2 in kilobytes.  Note
2172     that 512 is the default in params.def.  */
2173  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2174		       param_l2_cache_size,
2175		       ((sparc_cpu == PROCESSOR_NIAGARA4
2176			 || sparc_cpu == PROCESSOR_M8)
2177			? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2178				 ? 256 : 512)));
2179
2180
2181  /* Disable save slot sharing for call-clobbered registers by default.
2182     The IRA sharing algorithm works on single registers only and this
2183     pessimizes for double floating-point registers.  */
2184  if (!global_options_set.x_flag_ira_share_save_slots)
2185    flag_ira_share_save_slots = 0;
2186
2187  /* Only enable REE by default in 64-bit mode where it helps to eliminate
2188     redundant 32-to-64-bit extensions.  */
2189  if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2190    flag_ree = 0;
2191
2192  /* Do various machine dependent initializations.  */
2193  sparc_init_modes ();
2194
2195  /* Set up function hooks.  */
2196  init_machine_status = sparc_init_machine_status;
2197}
2198
2199/* Miscellaneous utilities.  */
2200
2201/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2202   or branch on register contents instructions.  */
2203
2204int
2205v9_regcmp_p (enum rtx_code code)
2206{
2207  return (code == EQ || code == NE || code == GE || code == LT
2208	  || code == LE || code == GT);
2209}
2210
2211/* Nonzero if OP is a floating point constant which can
2212   be loaded into an integer register using a single
2213   sethi instruction.  */
2214
2215int
2216fp_sethi_p (rtx op)
2217{
2218  if (GET_CODE (op) == CONST_DOUBLE)
2219    {
2220      long i;
2221
2222      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2223      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2224    }
2225
2226  return 0;
2227}
2228
2229/* Nonzero if OP is a floating point constant which can
2230   be loaded into an integer register using a single
2231   mov instruction.  */
2232
2233int
2234fp_mov_p (rtx op)
2235{
2236  if (GET_CODE (op) == CONST_DOUBLE)
2237    {
2238      long i;
2239
2240      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2241      return SPARC_SIMM13_P (i);
2242    }
2243
2244  return 0;
2245}
2246
2247/* Nonzero if OP is a floating point constant which can
2248   be loaded into an integer register using a high/losum
2249   instruction sequence.  */
2250
2251int
2252fp_high_losum_p (rtx op)
2253{
2254  /* The constraints calling this should only be in
2255     SFmode move insns, so any constant which cannot
2256     be moved using a single insn will do.  */
2257  if (GET_CODE (op) == CONST_DOUBLE)
2258    {
2259      long i;
2260
2261      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2262      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2263    }
2264
2265  return 0;
2266}
2267
2268/* Return true if the address of LABEL can be loaded by means of the
2269   mov{si,di}_pic_label_ref patterns in PIC mode.  */
2270
2271static bool
2272can_use_mov_pic_label_ref (rtx label)
2273{
2274  /* VxWorks does not impose a fixed gap between segments; the run-time
2275     gap can be different from the object-file gap.  We therefore can't
2276     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2277     are absolutely sure that X is in the same segment as the GOT.
2278     Unfortunately, the flexibility of linker scripts means that we
2279     can't be sure of that in general, so assume that GOT-relative
2280     accesses are never valid on VxWorks.  */
2281  if (TARGET_VXWORKS_RTP)
2282    return false;
2283
2284  /* Similarly, if the label is non-local, it might end up being placed
2285     in a different section than the current one; now mov_pic_label_ref
2286     requires the label and the code to be in the same section.  */
2287  if (LABEL_REF_NONLOCAL_P (label))
2288    return false;
2289
2290  /* Finally, if we are reordering basic blocks and partition into hot
2291     and cold sections, this might happen for any label.  */
2292  if (flag_reorder_blocks_and_partition)
2293    return false;
2294
2295  return true;
2296}
2297
2298/* Expand a move instruction.  Return true if all work is done.  */
2299
2300bool
2301sparc_expand_move (machine_mode mode, rtx *operands)
2302{
2303  /* Handle sets of MEM first.  */
2304  if (GET_CODE (operands[0]) == MEM)
2305    {
2306      /* 0 is a register (or a pair of registers) on SPARC.  */
2307      if (register_or_zero_operand (operands[1], mode))
2308	return false;
2309
2310      if (!reload_in_progress)
2311	{
2312	  operands[0] = validize_mem (operands[0]);
2313	  operands[1] = force_reg (mode, operands[1]);
2314	}
2315    }
2316
2317  /* Fix up TLS cases.  */
2318  if (TARGET_HAVE_TLS
2319      && CONSTANT_P (operands[1])
2320      && sparc_tls_referenced_p (operands [1]))
2321    {
2322      operands[1] = sparc_legitimize_tls_address (operands[1]);
2323      return false;
2324    }
2325
2326  /* Fix up PIC cases.  */
2327  if (flag_pic && CONSTANT_P (operands[1]))
2328    {
2329      if (pic_address_needs_scratch (operands[1]))
2330	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2331
2332      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
2333      if ((GET_CODE (operands[1]) == LABEL_REF
2334	   && can_use_mov_pic_label_ref (operands[1]))
2335	  || (GET_CODE (operands[1]) == CONST
2336	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
2337	      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2338	      && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2339	      && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2340	{
2341	  if (mode == SImode)
2342	    {
2343	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2344	      return true;
2345	    }
2346
2347	  if (mode == DImode)
2348	    {
2349	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2350	      return true;
2351	    }
2352	}
2353
2354      if (symbolic_operand (operands[1], mode))
2355	{
2356	  operands[1]
2357	    = sparc_legitimize_pic_address (operands[1],
2358					    reload_in_progress
2359					    ? operands[0] : NULL_RTX);
2360	  return false;
2361	}
2362    }
2363
2364  /* If we are trying to toss an integer constant into FP registers,
2365     or loading a FP or vector constant, force it into memory.  */
2366  if (CONSTANT_P (operands[1])
2367      && REG_P (operands[0])
2368      && (SPARC_FP_REG_P (REGNO (operands[0]))
2369	  || SCALAR_FLOAT_MODE_P (mode)
2370	  || VECTOR_MODE_P (mode)))
2371    {
2372      /* emit_group_store will send such bogosity to us when it is
2373         not storing directly into memory.  So fix this up to avoid
2374         crashes in output_constant_pool.  */
2375      if (operands [1] == const0_rtx)
2376	operands[1] = CONST0_RTX (mode);
2377
2378      /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2379	 always other regs.  */
2380      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2381	  && (const_zero_operand (operands[1], mode)
2382	      || const_all_ones_operand (operands[1], mode)))
2383	return false;
2384
2385      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2386	  /* We are able to build any SF constant in integer registers
2387	     with at most 2 instructions.  */
2388	  && (mode == SFmode
2389	      /* And any DF constant in integer registers if needed.  */
2390	      || (mode == DFmode && !can_create_pseudo_p ())))
2391	return false;
2392
2393      operands[1] = force_const_mem (mode, operands[1]);
2394      if (!reload_in_progress)
2395	operands[1] = validize_mem (operands[1]);
2396      return false;
2397    }
2398
2399  /* Accept non-constants and valid constants unmodified.  */
2400  if (!CONSTANT_P (operands[1])
2401      || GET_CODE (operands[1]) == HIGH
2402      || input_operand (operands[1], mode))
2403    return false;
2404
2405  switch (mode)
2406    {
2407    case E_QImode:
2408      /* All QImode constants require only one insn, so proceed.  */
2409      break;
2410
2411    case E_HImode:
2412    case E_SImode:
2413      sparc_emit_set_const32 (operands[0], operands[1]);
2414      return true;
2415
2416    case E_DImode:
2417      /* input_operand should have filtered out 32-bit mode.  */
2418      sparc_emit_set_const64 (operands[0], operands[1]);
2419      return true;
2420
2421    case E_TImode:
2422      {
2423	rtx high, low;
2424	/* TImode isn't available in 32-bit mode.  */
2425	split_double (operands[1], &high, &low);
2426	emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2427			      high));
2428	emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2429			      low));
2430      }
2431      return true;
2432
2433    default:
2434      gcc_unreachable ();
2435    }
2436
2437  return false;
2438}
2439
2440/* Load OP1, a 32-bit constant, into OP0, a register.
2441   We know it can't be done in one insn when we get
2442   here, the move expander guarantees this.  */
2443
2444static void
2445sparc_emit_set_const32 (rtx op0, rtx op1)
2446{
2447  machine_mode mode = GET_MODE (op0);
2448  rtx temp = op0;
2449
2450  if (can_create_pseudo_p ())
2451    temp = gen_reg_rtx (mode);
2452
2453  if (GET_CODE (op1) == CONST_INT)
2454    {
2455      gcc_assert (!small_int_operand (op1, mode)
2456		  && !const_high_operand (op1, mode));
2457
2458      /* Emit them as real moves instead of a HIGH/LO_SUM,
2459	 this way CSE can see everything and reuse intermediate
2460	 values if it wants.  */
2461      emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2462					     & ~(HOST_WIDE_INT) 0x3ff)));
2463
2464      emit_insn (gen_rtx_SET (op0,
2465			      gen_rtx_IOR (mode, temp,
2466					   GEN_INT (INTVAL (op1) & 0x3ff))));
2467    }
2468  else
2469    {
2470      /* A symbol, emit in the traditional way.  */
2471      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2472      emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2473    }
2474}
2475
2476/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2477   If TEMP is nonzero, we are forbidden to use any other scratch
2478   registers.  Otherwise, we are allowed to generate them as needed.
2479
2480   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2481   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
2482
2483void
2484sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2485{
2486  rtx cst, temp1, temp2, temp3, temp4, temp5;
2487  rtx ti_temp = 0;
2488
2489  /* Deal with too large offsets.  */
2490  if (GET_CODE (op1) == CONST
2491      && GET_CODE (XEXP (op1, 0)) == PLUS
2492      && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2493      && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2494    {
2495      gcc_assert (!temp);
2496      temp1 = gen_reg_rtx (DImode);
2497      temp2 = gen_reg_rtx (DImode);
2498      sparc_emit_set_const64 (temp2, cst);
2499      sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2500				       NULL_RTX);
2501      emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2502      return;
2503    }
2504
2505  if (temp && GET_MODE (temp) == TImode)
2506    {
2507      ti_temp = temp;
2508      temp = gen_rtx_REG (DImode, REGNO (temp));
2509    }
2510
2511  /* SPARC-V9 code model support.  */
2512  switch (sparc_code_model)
2513    {
2514    case CM_MEDLOW:
2515      /* The range spanned by all instructions in the object is less
2516	 than 2^31 bytes (2GB) and the distance from any instruction
2517	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2518	 than 2^31 bytes (2GB).
2519
2520	 The executable must be in the low 4TB of the virtual address
2521	 space.
2522
2523	 sethi	%hi(symbol), %temp1
2524	 or	%temp1, %lo(symbol), %reg  */
2525      if (temp)
2526	temp1 = temp;  /* op0 is allowed.  */
2527      else
2528	temp1 = gen_reg_rtx (DImode);
2529
2530      emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2531      emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2532      break;
2533
2534    case CM_MEDMID:
2535      /* The range spanned by all instructions in the object is less
2536	 than 2^31 bytes (2GB) and the distance from any instruction
2537	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2538	 than 2^31 bytes (2GB).
2539
2540	 The executable must be in the low 16TB of the virtual address
2541	 space.
2542
2543	 sethi	%h44(symbol), %temp1
2544	 or	%temp1, %m44(symbol), %temp2
2545	 sllx	%temp2, 12, %temp3
2546	 or	%temp3, %l44(symbol), %reg  */
2547      if (temp)
2548	{
2549	  temp1 = op0;
2550	  temp2 = op0;
2551	  temp3 = temp;  /* op0 is allowed.  */
2552	}
2553      else
2554	{
2555	  temp1 = gen_reg_rtx (DImode);
2556	  temp2 = gen_reg_rtx (DImode);
2557	  temp3 = gen_reg_rtx (DImode);
2558	}
2559
2560      emit_insn (gen_seth44 (temp1, op1));
2561      emit_insn (gen_setm44 (temp2, temp1, op1));
2562      emit_insn (gen_rtx_SET (temp3,
2563			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2564      emit_insn (gen_setl44 (op0, temp3, op1));
2565      break;
2566
2567    case CM_MEDANY:
2568      /* The range spanned by all instructions in the object is less
2569	 than 2^31 bytes (2GB) and the distance from any instruction
2570	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2571	 than 2^31 bytes (2GB).
2572
2573	 The executable can be placed anywhere in the virtual address
2574	 space.
2575
2576	 sethi	%hh(symbol), %temp1
2577	 sethi	%lm(symbol), %temp2
2578	 or	%temp1, %hm(symbol), %temp3
2579	 sllx	%temp3, 32, %temp4
2580	 or	%temp4, %temp2, %temp5
2581	 or	%temp5, %lo(symbol), %reg  */
2582      if (temp)
2583	{
2584	  /* It is possible that one of the registers we got for operands[2]
2585	     might coincide with that of operands[0] (which is why we made
2586	     it TImode).  Pick the other one to use as our scratch.  */
2587	  if (rtx_equal_p (temp, op0))
2588	    {
2589	      gcc_assert (ti_temp);
2590	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2591	    }
2592	  temp1 = op0;
2593	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2594	  temp3 = op0;
2595	  temp4 = op0;
2596	  temp5 = op0;
2597	}
2598      else
2599	{
2600	  temp1 = gen_reg_rtx (DImode);
2601	  temp2 = gen_reg_rtx (DImode);
2602	  temp3 = gen_reg_rtx (DImode);
2603	  temp4 = gen_reg_rtx (DImode);
2604	  temp5 = gen_reg_rtx (DImode);
2605	}
2606
2607      emit_insn (gen_sethh (temp1, op1));
2608      emit_insn (gen_setlm (temp2, op1));
2609      emit_insn (gen_sethm (temp3, temp1, op1));
2610      emit_insn (gen_rtx_SET (temp4,
2611			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2612      emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2613      emit_insn (gen_setlo (op0, temp5, op1));
2614      break;
2615
2616    case CM_EMBMEDANY:
2617      /* Old old old backwards compatibility kruft here.
2618	 Essentially it is MEDLOW with a fixed 64-bit
2619	 virtual base added to all data segment addresses.
2620	 Text-segment stuff is computed like MEDANY, we can't
2621	 reuse the code above because the relocation knobs
2622	 look different.
2623
2624	 Data segment:	sethi	%hi(symbol), %temp1
2625			add	%temp1, EMBMEDANY_BASE_REG, %temp2
2626			or	%temp2, %lo(symbol), %reg  */
2627      if (data_segment_operand (op1, GET_MODE (op1)))
2628	{
2629	  if (temp)
2630	    {
2631	      temp1 = temp;  /* op0 is allowed.  */
2632	      temp2 = op0;
2633	    }
2634	  else
2635	    {
2636	      temp1 = gen_reg_rtx (DImode);
2637	      temp2 = gen_reg_rtx (DImode);
2638	    }
2639
2640	  emit_insn (gen_embmedany_sethi (temp1, op1));
2641	  emit_insn (gen_embmedany_brsum (temp2, temp1));
2642	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
2643	}
2644
2645      /* Text segment:	sethi	%uhi(symbol), %temp1
2646			sethi	%hi(symbol), %temp2
2647			or	%temp1, %ulo(symbol), %temp3
2648			sllx	%temp3, 32, %temp4
2649			or	%temp4, %temp2, %temp5
2650			or	%temp5, %lo(symbol), %reg  */
2651      else
2652	{
2653	  if (temp)
2654	    {
2655	      /* It is possible that one of the registers we got for operands[2]
2656		 might coincide with that of operands[0] (which is why we made
2657		 it TImode).  Pick the other one to use as our scratch.  */
2658	      if (rtx_equal_p (temp, op0))
2659		{
2660		  gcc_assert (ti_temp);
2661		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2662		}
2663	      temp1 = op0;
2664	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2665	      temp3 = op0;
2666	      temp4 = op0;
2667	      temp5 = op0;
2668	    }
2669	  else
2670	    {
2671	      temp1 = gen_reg_rtx (DImode);
2672	      temp2 = gen_reg_rtx (DImode);
2673	      temp3 = gen_reg_rtx (DImode);
2674	      temp4 = gen_reg_rtx (DImode);
2675	      temp5 = gen_reg_rtx (DImode);
2676	    }
2677
2678	  emit_insn (gen_embmedany_textuhi (temp1, op1));
2679	  emit_insn (gen_embmedany_texthi  (temp2, op1));
2680	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2681	  emit_insn (gen_rtx_SET (temp4,
2682				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2683	  emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2684	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
2685	}
2686      break;
2687
2688    default:
2689      gcc_unreachable ();
2690    }
2691}
2692
2693/* These avoid problems when cross compiling.  If we do not
2694   go through all this hair then the optimizer will see
2695   invalid REG_EQUAL notes or in some cases none at all.  */
2696static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2697static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2698static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2699static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2700
2701/* The optimizer is not to assume anything about exactly
2702   which bits are set for a HIGH, they are unspecified.
2703   Unfortunately this leads to many missed optimizations
2704   during CSE.  We mask out the non-HIGH bits, and matches
2705   a plain movdi, to alleviate this problem.  */
2706static rtx
2707gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2708{
2709  return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2710}
2711
2712static rtx
2713gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2714{
2715  return gen_rtx_SET (dest, GEN_INT (val));
2716}
2717
2718static rtx
2719gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2720{
2721  return gen_rtx_IOR (DImode, src, GEN_INT (val));
2722}
2723
2724static rtx
2725gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2726{
2727  return gen_rtx_XOR (DImode, src, GEN_INT (val));
2728}
2729
2730/* Worker routines for 64-bit constant formation on arch64.
2731   One of the key things to be doing in these emissions is
2732   to create as many temp REGs as possible.  This makes it
2733   possible for half-built constants to be used later when
2734   such values are similar to something required later on.
2735   Without doing this, the optimizer cannot see such
2736   opportunities.  */
2737
2738static void sparc_emit_set_const64_quick1 (rtx, rtx,
2739					   unsigned HOST_WIDE_INT, int);
2740
2741static void
2742sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2743			       unsigned HOST_WIDE_INT low_bits, int is_neg)
2744{
2745  unsigned HOST_WIDE_INT high_bits;
2746
2747  if (is_neg)
2748    high_bits = (~low_bits) & 0xffffffff;
2749  else
2750    high_bits = low_bits;
2751
2752  emit_insn (gen_safe_HIGH64 (temp, high_bits));
2753  if (!is_neg)
2754    {
2755      emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2756    }
2757  else
2758    {
2759      /* If we are XOR'ing with -1, then we should emit a one's complement
2760	 instead.  This way the combiner will notice logical operations
2761	 such as ANDN later on and substitute.  */
2762      if ((low_bits & 0x3ff) == 0x3ff)
2763	{
2764	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2765	}
2766      else
2767	{
2768	  emit_insn (gen_rtx_SET (op0,
2769				  gen_safe_XOR64 (temp,
2770						  (-(HOST_WIDE_INT)0x400
2771						   | (low_bits & 0x3ff)))));
2772	}
2773    }
2774}
2775
2776static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2777					   unsigned HOST_WIDE_INT, int);
2778
2779static void
2780sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2781			       unsigned HOST_WIDE_INT high_bits,
2782			       unsigned HOST_WIDE_INT low_immediate,
2783			       int shift_count)
2784{
2785  rtx temp2 = op0;
2786
2787  if ((high_bits & 0xfffffc00) != 0)
2788    {
2789      emit_insn (gen_safe_HIGH64 (temp, high_bits));
2790      if ((high_bits & ~0xfffffc00) != 0)
2791	emit_insn (gen_rtx_SET (op0,
2792				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2793      else
2794	temp2 = temp;
2795    }
2796  else
2797    {
2798      emit_insn (gen_safe_SET64 (temp, high_bits));
2799      temp2 = temp;
2800    }
2801
2802  /* Now shift it up into place.  */
2803  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2804					       GEN_INT (shift_count))));
2805
2806  /* If there is a low immediate part piece, finish up by
2807     putting that in as well.  */
2808  if (low_immediate != 0)
2809    emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2810}
2811
2812static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2813					    unsigned HOST_WIDE_INT);
2814
2815/* Full 64-bit constant decomposition.  Even though this is the
2816   'worst' case, we still optimize a few things away.  */
2817static void
2818sparc_emit_set_const64_longway (rtx op0, rtx temp,
2819				unsigned HOST_WIDE_INT high_bits,
2820				unsigned HOST_WIDE_INT low_bits)
2821{
2822  rtx sub_temp = op0;
2823
2824  if (can_create_pseudo_p ())
2825    sub_temp = gen_reg_rtx (DImode);
2826
2827  if ((high_bits & 0xfffffc00) != 0)
2828    {
2829      emit_insn (gen_safe_HIGH64 (temp, high_bits));
2830      if ((high_bits & ~0xfffffc00) != 0)
2831	emit_insn (gen_rtx_SET (sub_temp,
2832				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2833      else
2834	sub_temp = temp;
2835    }
2836  else
2837    {
2838      emit_insn (gen_safe_SET64 (temp, high_bits));
2839      sub_temp = temp;
2840    }
2841
2842  if (can_create_pseudo_p ())
2843    {
2844      rtx temp2 = gen_reg_rtx (DImode);
2845      rtx temp3 = gen_reg_rtx (DImode);
2846      rtx temp4 = gen_reg_rtx (DImode);
2847
2848      emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2849						     GEN_INT (32))));
2850
2851      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2852      if ((low_bits & ~0xfffffc00) != 0)
2853	{
2854	  emit_insn (gen_rtx_SET (temp3,
2855				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2856	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2857	}
2858      else
2859	{
2860	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2861	}
2862    }
2863  else
2864    {
2865      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
2866      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
2867      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2868      int to_shift = 12;
2869
2870      /* We are in the middle of reload, so this is really
2871	 painful.  However we do still make an attempt to
2872	 avoid emitting truly stupid code.  */
2873      if (low1 != const0_rtx)
2874	{
2875	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2876						       GEN_INT (to_shift))));
2877	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2878	  sub_temp = op0;
2879	  to_shift = 12;
2880	}
2881      else
2882	{
2883	  to_shift += 12;
2884	}
2885      if (low2 != const0_rtx)
2886	{
2887	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2888						       GEN_INT (to_shift))));
2889	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2890	  sub_temp = op0;
2891	  to_shift = 8;
2892	}
2893      else
2894	{
2895	  to_shift += 8;
2896	}
2897      emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2898						   GEN_INT (to_shift))));
2899      if (low3 != const0_rtx)
2900	emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2901      /* phew...  */
2902    }
2903}
2904
2905/* Analyze a 64-bit constant for certain properties.  */
2906static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2907				    unsigned HOST_WIDE_INT,
2908				    int *, int *, int *);
2909
2910static void
2911analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2912			unsigned HOST_WIDE_INT low_bits,
2913			int *hbsp, int *lbsp, int *abbasp)
2914{
2915  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2916  int i;
2917
2918  lowest_bit_set = highest_bit_set = -1;
2919  i = 0;
2920  do
2921    {
2922      if ((lowest_bit_set == -1)
2923	  && ((low_bits >> i) & 1))
2924	lowest_bit_set = i;
2925      if ((highest_bit_set == -1)
2926	  && ((high_bits >> (32 - i - 1)) & 1))
2927	highest_bit_set = (64 - i - 1);
2928    }
2929  while (++i < 32
2930	 && ((highest_bit_set == -1)
2931	     || (lowest_bit_set == -1)));
2932  if (i == 32)
2933    {
2934      i = 0;
2935      do
2936	{
2937	  if ((lowest_bit_set == -1)
2938	      && ((high_bits >> i) & 1))
2939	    lowest_bit_set = i + 32;
2940	  if ((highest_bit_set == -1)
2941	      && ((low_bits >> (32 - i - 1)) & 1))
2942	    highest_bit_set = 32 - i - 1;
2943	}
2944      while (++i < 32
2945	     && ((highest_bit_set == -1)
2946		 || (lowest_bit_set == -1)));
2947    }
2948  /* If there are no bits set this should have gone out
2949     as one instruction!  */
2950  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2951  all_bits_between_are_set = 1;
2952  for (i = lowest_bit_set; i <= highest_bit_set; i++)
2953    {
2954      if (i < 32)
2955	{
2956	  if ((low_bits & (1 << i)) != 0)
2957	    continue;
2958	}
2959      else
2960	{
2961	  if ((high_bits & (1 << (i - 32))) != 0)
2962	    continue;
2963	}
2964      all_bits_between_are_set = 0;
2965      break;
2966    }
2967  *hbsp = highest_bit_set;
2968  *lbsp = lowest_bit_set;
2969  *abbasp = all_bits_between_are_set;
2970}
2971
2972static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2973
2974static int
2975const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2976		   unsigned HOST_WIDE_INT low_bits)
2977{
2978  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2979
2980  if (high_bits == 0
2981      || high_bits == 0xffffffff)
2982    return 1;
2983
2984  analyze_64bit_constant (high_bits, low_bits,
2985			  &highest_bit_set, &lowest_bit_set,
2986			  &all_bits_between_are_set);
2987
2988  if ((highest_bit_set == 63
2989       || lowest_bit_set == 0)
2990      && all_bits_between_are_set != 0)
2991    return 1;
2992
2993  if ((highest_bit_set - lowest_bit_set) < 21)
2994    return 1;
2995
2996  return 0;
2997}
2998
2999static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
3000							unsigned HOST_WIDE_INT,
3001							int, int);
3002
3003static unsigned HOST_WIDE_INT
3004create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
3005			  unsigned HOST_WIDE_INT low_bits,
3006			  int lowest_bit_set, int shift)
3007{
3008  HOST_WIDE_INT hi, lo;
3009
3010  if (lowest_bit_set < 32)
3011    {
3012      lo = (low_bits >> lowest_bit_set) << shift;
3013      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
3014    }
3015  else
3016    {
3017      lo = 0;
3018      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
3019    }
3020  gcc_assert (! (hi & lo));
3021  return (hi | lo);
3022}
3023
3024/* Here we are sure to be arch64 and this is an integer constant
3025   being loaded into a register.  Emit the most efficient
3026   insn sequence possible.  Detection of all the 1-insn cases
3027   has been done already.  */
3028static void
3029sparc_emit_set_const64 (rtx op0, rtx op1)
3030{
3031  unsigned HOST_WIDE_INT high_bits, low_bits;
3032  int lowest_bit_set, highest_bit_set;
3033  int all_bits_between_are_set;
3034  rtx temp = 0;
3035
3036  /* Sanity check that we know what we are working with.  */
3037  gcc_assert (TARGET_ARCH64
3038	      && (GET_CODE (op0) == SUBREG
3039		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
3040
3041  if (! can_create_pseudo_p ())
3042    temp = op0;
3043
3044  if (GET_CODE (op1) != CONST_INT)
3045    {
3046      sparc_emit_set_symbolic_const64 (op0, op1, temp);
3047      return;
3048    }
3049
3050  if (! temp)
3051    temp = gen_reg_rtx (DImode);
3052
3053  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3054  low_bits = (INTVAL (op1) & 0xffffffff);
3055
3056  /* low_bits	bits 0  --> 31
3057     high_bits	bits 32 --> 63  */
3058
3059  analyze_64bit_constant (high_bits, low_bits,
3060			  &highest_bit_set, &lowest_bit_set,
3061			  &all_bits_between_are_set);
3062
3063  /* First try for a 2-insn sequence.  */
3064
3065  /* These situations are preferred because the optimizer can
3066   * do more things with them:
3067   * 1) mov	-1, %reg
3068   *    sllx	%reg, shift, %reg
3069   * 2) mov	-1, %reg
3070   *    srlx	%reg, shift, %reg
3071   * 3) mov	some_small_const, %reg
3072   *    sllx	%reg, shift, %reg
3073   */
3074  if (((highest_bit_set == 63
3075	|| lowest_bit_set == 0)
3076       && all_bits_between_are_set != 0)
3077      || ((highest_bit_set - lowest_bit_set) < 12))
3078    {
3079      HOST_WIDE_INT the_const = -1;
3080      int shift = lowest_bit_set;
3081
3082      if ((highest_bit_set != 63
3083	   && lowest_bit_set != 0)
3084	  || all_bits_between_are_set == 0)
3085	{
3086	  the_const =
3087	    create_simple_focus_bits (high_bits, low_bits,
3088				      lowest_bit_set, 0);
3089	}
3090      else if (lowest_bit_set == 0)
3091	shift = -(63 - highest_bit_set);
3092
3093      gcc_assert (SPARC_SIMM13_P (the_const));
3094      gcc_assert (shift != 0);
3095
3096      emit_insn (gen_safe_SET64 (temp, the_const));
3097      if (shift > 0)
3098	emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3099						     GEN_INT (shift))));
3100      else if (shift < 0)
3101	emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3102						       GEN_INT (-shift))));
3103      return;
3104    }
3105
3106  /* Now a range of 22 or less bits set somewhere.
3107   * 1) sethi	%hi(focus_bits), %reg
3108   *    sllx	%reg, shift, %reg
3109   * 2) sethi	%hi(focus_bits), %reg
3110   *    srlx	%reg, shift, %reg
3111   */
3112  if ((highest_bit_set - lowest_bit_set) < 21)
3113    {
3114      unsigned HOST_WIDE_INT focus_bits =
3115	create_simple_focus_bits (high_bits, low_bits,
3116				  lowest_bit_set, 10);
3117
3118      gcc_assert (SPARC_SETHI_P (focus_bits));
3119      gcc_assert (lowest_bit_set != 10);
3120
3121      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3122
3123      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
3124      if (lowest_bit_set < 10)
3125	emit_insn (gen_rtx_SET (op0,
3126				gen_rtx_LSHIFTRT (DImode, temp,
3127						  GEN_INT (10 - lowest_bit_set))));
3128      else if (lowest_bit_set > 10)
3129	emit_insn (gen_rtx_SET (op0,
3130				gen_rtx_ASHIFT (DImode, temp,
3131						GEN_INT (lowest_bit_set - 10))));
3132      return;
3133    }
3134
3135  /* 1) sethi	%hi(low_bits), %reg
3136   *    or	%reg, %lo(low_bits), %reg
3137   * 2) sethi	%hi(~low_bits), %reg
3138   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3139   */
3140  if (high_bits == 0
3141      || high_bits == 0xffffffff)
3142    {
3143      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3144				     (high_bits == 0xffffffff));
3145      return;
3146    }
3147
3148  /* Now, try 3-insn sequences.  */
3149
3150  /* 1) sethi	%hi(high_bits), %reg
3151   *    or	%reg, %lo(high_bits), %reg
3152   *    sllx	%reg, 32, %reg
3153   */
3154  if (low_bits == 0)
3155    {
3156      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3157      return;
3158    }
3159
3160  /* We may be able to do something quick
3161     when the constant is negated, so try that.  */
3162  if (const64_is_2insns ((~high_bits) & 0xffffffff,
3163			 (~low_bits) & 0xfffffc00))
3164    {
3165      /* NOTE: The trailing bits get XOR'd so we need the
3166	 non-negated bits, not the negated ones.  */
3167      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3168
3169      if ((((~high_bits) & 0xffffffff) == 0
3170	   && ((~low_bits) & 0x80000000) == 0)
3171	  || (((~high_bits) & 0xffffffff) == 0xffffffff
3172	      && ((~low_bits) & 0x80000000) != 0))
3173	{
3174	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3175
3176	  if ((SPARC_SETHI_P (fast_int)
3177	       && (~high_bits & 0xffffffff) == 0)
3178	      || SPARC_SIMM13_P (fast_int))
3179	    emit_insn (gen_safe_SET64 (temp, fast_int));
3180	  else
3181	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3182	}
3183      else
3184	{
3185	  rtx negated_const;
3186	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3187				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3188	  sparc_emit_set_const64 (temp, negated_const);
3189	}
3190
3191      /* If we are XOR'ing with -1, then we should emit a one's complement
3192	 instead.  This way the combiner will notice logical operations
3193	 such as ANDN later on and substitute.  */
3194      if (trailing_bits == 0x3ff)
3195	{
3196	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3197	}
3198      else
3199	{
3200	  emit_insn (gen_rtx_SET (op0,
3201				  gen_safe_XOR64 (temp,
3202						  (-0x400 | trailing_bits))));
3203	}
3204      return;
3205    }
3206
3207  /* 1) sethi	%hi(xxx), %reg
3208   *    or	%reg, %lo(xxx), %reg
3209   *	sllx	%reg, yyy, %reg
3210   *
3211   * ??? This is just a generalized version of the low_bits==0
3212   * thing above, FIXME...
3213   */
3214  if ((highest_bit_set - lowest_bit_set) < 32)
3215    {
3216      unsigned HOST_WIDE_INT focus_bits =
3217	create_simple_focus_bits (high_bits, low_bits,
3218				  lowest_bit_set, 0);
3219
3220      /* We can't get here in this state.  */
3221      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3222
3223      /* So what we know is that the set bits straddle the
3224	 middle of the 64-bit word.  */
3225      sparc_emit_set_const64_quick2 (op0, temp,
3226				     focus_bits, 0,
3227				     lowest_bit_set);
3228      return;
3229    }
3230
3231  /* 1) sethi	%hi(high_bits), %reg
3232   *    or	%reg, %lo(high_bits), %reg
3233   *    sllx	%reg, 32, %reg
3234   *	or	%reg, low_bits, %reg
3235   */
3236  if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3237    {
3238      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3239      return;
3240    }
3241
3242  /* The easiest way when all else fails, is full decomposition.  */
3243  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3244}
3245
3246/* Implement TARGET_FIXED_CONDITION_CODE_REGS.  */
3247
3248static bool
3249sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3250{
3251  *p1 = SPARC_ICC_REG;
3252  *p2 = SPARC_FCC_REG;
3253  return true;
3254}
3255
3256/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
3257
3258static unsigned int
3259sparc_min_arithmetic_precision (void)
3260{
3261  return 32;
3262}
3263
3264/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3265   return the mode to be used for the comparison.  For floating-point,
3266   CCFP[E]mode is used.  CCNZmode should be used when the first operand
3267   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
3268   processing is needed.  */
3269
3270machine_mode
3271select_cc_mode (enum rtx_code op, rtx x, rtx y)
3272{
3273  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3274    {
3275      switch (op)
3276	{
3277	case EQ:
3278	case NE:
3279	case UNORDERED:
3280	case ORDERED:
3281	case UNLT:
3282	case UNLE:
3283	case UNGT:
3284	case UNGE:
3285	case UNEQ:
3286	  return CCFPmode;
3287
3288	case LT:
3289	case LE:
3290	case GT:
3291	case GE:
3292	case LTGT:
3293	  return CCFPEmode;
3294
3295	default:
3296	  gcc_unreachable ();
3297	}
3298    }
3299  else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3300	    || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3301	   && y == const0_rtx)
3302    {
3303      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3304	return CCXNZmode;
3305      else
3306	return CCNZmode;
3307    }
3308  else
3309    {
3310      /* This is for the cmp<mode>_sne pattern.  */
3311      if (GET_CODE (x) == NOT && y == constm1_rtx)
3312	{
3313	  if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3314	    return CCXCmode;
3315	  else
3316	    return CCCmode;
3317	}
3318
3319      /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns.  */
3320      if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3321	{
3322	  if (GET_CODE (y) == UNSPEC
3323	      && (XINT (y, 1) == UNSPEC_ADDV
3324		 || XINT (y, 1) == UNSPEC_SUBV
3325	         || XINT (y, 1) == UNSPEC_NEGV))
3326	    return CCVmode;
3327	  else
3328	    return CCCmode;
3329	}
3330
3331      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3332	return CCXmode;
3333      else
3334	return CCmode;
3335    }
3336}
3337
3338/* Emit the compare insn and return the CC reg for a CODE comparison
3339   with operands X and Y.  */
3340
3341static rtx
3342gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3343{
3344  machine_mode mode;
3345  rtx cc_reg;
3346
3347  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3348    return x;
3349
3350  mode = SELECT_CC_MODE (code, x, y);
3351
3352  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3353     fcc regs (cse can't tell they're really call clobbered regs and will
3354     remove a duplicate comparison even if there is an intervening function
3355     call - it will then try to reload the cc reg via an int reg which is why
3356     we need the movcc patterns).  It is possible to provide the movcc
3357     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
3358     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
3359     to tell cse that CCFPE mode registers (even pseudos) are call
3360     clobbered.  */
3361
3362  /* ??? This is an experiment.  Rather than making changes to cse which may
3363     or may not be easy/clean, we do our own cse.  This is possible because
3364     we will generate hard registers.  Cse knows they're call clobbered (it
3365     doesn't know the same thing about pseudos). If we guess wrong, no big
3366     deal, but if we win, great!  */
3367
3368  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3369#if 1 /* experiment */
3370    {
3371      int reg;
3372      /* We cycle through the registers to ensure they're all exercised.  */
3373      static int next_fcc_reg = 0;
3374      /* Previous x,y for each fcc reg.  */
3375      static rtx prev_args[4][2];
3376
3377      /* Scan prev_args for x,y.  */
3378      for (reg = 0; reg < 4; reg++)
3379	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3380	  break;
3381      if (reg == 4)
3382	{
3383	  reg = next_fcc_reg;
3384	  prev_args[reg][0] = x;
3385	  prev_args[reg][1] = y;
3386	  next_fcc_reg = (next_fcc_reg + 1) & 3;
3387	}
3388      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3389    }
3390#else
3391    cc_reg = gen_reg_rtx (mode);
3392#endif /* ! experiment */
3393  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3394    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3395  else
3396    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3397
3398  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
3399     will only result in an unrecognizable insn so no point in asserting.  */
3400  emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3401
3402  return cc_reg;
3403}
3404
3405
3406/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
3407
3408rtx
3409gen_compare_reg (rtx cmp)
3410{
3411  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3412}
3413
3414/* This function is used for v9 only.
3415   DEST is the target of the Scc insn.
3416   CODE is the code for an Scc's comparison.
3417   X and Y are the values we compare.
3418
3419   This function is needed to turn
3420
3421	   (set (reg:SI 110)
3422	       (gt (reg:CCX 100 %icc)
3423	           (const_int 0)))
3424   into
3425	   (set (reg:SI 110)
3426	       (gt:DI (reg:CCX 100 %icc)
3427	           (const_int 0)))
3428
3429   IE: The instruction recognizer needs to see the mode of the comparison to
3430   find the right instruction. We could use "gt:DI" right in the
3431   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
3432
3433static int
3434gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3435{
3436  if (! TARGET_ARCH64
3437      && (GET_MODE (x) == DImode
3438	  || GET_MODE (dest) == DImode))
3439    return 0;
3440
3441  /* Try to use the movrCC insns.  */
3442  if (TARGET_ARCH64
3443      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3444      && y == const0_rtx
3445      && v9_regcmp_p (compare_code))
3446    {
3447      rtx op0 = x;
3448      rtx temp;
3449
3450      /* Special case for op0 != 0.  This can be done with one instruction if
3451	 dest == x.  */
3452
3453      if (compare_code == NE
3454	  && GET_MODE (dest) == DImode
3455	  && rtx_equal_p (op0, dest))
3456	{
3457	  emit_insn (gen_rtx_SET (dest,
3458			      gen_rtx_IF_THEN_ELSE (DImode,
3459				       gen_rtx_fmt_ee (compare_code, DImode,
3460						       op0, const0_rtx),
3461				       const1_rtx,
3462				       dest)));
3463	  return 1;
3464	}
3465
3466      if (reg_overlap_mentioned_p (dest, op0))
3467	{
3468	  /* Handle the case where dest == x.
3469	     We "early clobber" the result.  */
3470	  op0 = gen_reg_rtx (GET_MODE (x));
3471	  emit_move_insn (op0, x);
3472	}
3473
3474      emit_insn (gen_rtx_SET (dest, const0_rtx));
3475      if (GET_MODE (op0) != DImode)
3476	{
3477	  temp = gen_reg_rtx (DImode);
3478	  convert_move (temp, op0, 0);
3479	}
3480      else
3481	temp = op0;
3482      emit_insn (gen_rtx_SET (dest,
3483			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3484				   gen_rtx_fmt_ee (compare_code, DImode,
3485						   temp, const0_rtx),
3486				   const1_rtx,
3487				   dest)));
3488      return 1;
3489    }
3490  else
3491    {
3492      x = gen_compare_reg_1 (compare_code, x, y);
3493      y = const0_rtx;
3494
3495      emit_insn (gen_rtx_SET (dest, const0_rtx));
3496      emit_insn (gen_rtx_SET (dest,
3497			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3498				   gen_rtx_fmt_ee (compare_code,
3499						   GET_MODE (x), x, y),
3500				    const1_rtx, dest)));
3501      return 1;
3502    }
3503}
3504
3505
3506/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
3507   without jumps using the addx/subx instructions.  */
3508
3509bool
3510emit_scc_insn (rtx operands[])
3511{
3512  rtx tem, x, y;
3513  enum rtx_code code;
3514  machine_mode mode;
3515
3516  /* The quad-word fp compare library routines all return nonzero to indicate
3517     true, which is different from the equivalent libgcc routines, so we must
3518     handle them specially here.  */
3519  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3520    {
3521      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3522					      GET_CODE (operands[1]));
3523      operands[2] = XEXP (operands[1], 0);
3524      operands[3] = XEXP (operands[1], 1);
3525    }
3526
3527  code = GET_CODE (operands[1]);
3528  x = operands[2];
3529  y = operands[3];
3530  mode = GET_MODE (x);
3531
3532  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3533     more applications).  The exception to this is "reg != 0" which can
3534     be done in one instruction on v9 (so we do it).  */
3535  if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3536    {
3537      if (y != const0_rtx)
3538	x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3539
3540      rtx pat = gen_rtx_SET (operands[0],
3541			     gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3542					     x, const0_rtx));
3543
3544      /* If we can use addx/subx or addxc, add a clobber for CC.  */
3545      if (mode == SImode || (code == NE && TARGET_VIS3))
3546	{
3547	  rtx clobber
3548	    = gen_rtx_CLOBBER (VOIDmode,
3549			       gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3550					    SPARC_ICC_REG));
3551	  pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3552	}
3553
3554      emit_insn (pat);
3555      return true;
3556    }
3557
3558  /* We can do LTU in DImode using the addxc instruction with VIS3.  */
3559  if (TARGET_ARCH64
3560      && mode == DImode
3561      && !((code == LTU || code == GTU) && TARGET_VIS3)
3562      && gen_v9_scc (operands[0], code, x, y))
3563    return true;
3564
3565  /* We can do LTU and GEU using the addx/subx instructions too.  And
3566     for GTU/LEU, if both operands are registers swap them and fall
3567     back to the easy case.  */
3568  if (code == GTU || code == LEU)
3569    {
3570      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3571          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3572        {
3573          tem = x;
3574          x = y;
3575          y = tem;
3576          code = swap_condition (code);
3577        }
3578    }
3579
3580  if (code == LTU || code == GEU)
3581    {
3582      emit_insn (gen_rtx_SET (operands[0],
3583			      gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3584					      gen_compare_reg_1 (code, x, y),
3585					      const0_rtx)));
3586      return true;
3587    }
3588
3589  /* All the posibilities to use addx/subx based sequences has been
3590     exhausted, try for a 3 instruction sequence using v9 conditional
3591     moves.  */
3592  if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3593    return true;
3594
3595  /* Nope, do branches.  */
3596  return false;
3597}
3598
3599/* Emit a conditional jump insn for the v9 architecture using comparison code
3600   CODE and jump target LABEL.
3601   This function exists to take advantage of the v9 brxx insns.  */
3602
3603static void
3604emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3605{
3606  emit_jump_insn (gen_rtx_SET (pc_rtx,
3607			   gen_rtx_IF_THEN_ELSE (VOIDmode,
3608				    gen_rtx_fmt_ee (code, GET_MODE (op0),
3609						    op0, const0_rtx),
3610				    gen_rtx_LABEL_REF (VOIDmode, label),
3611				    pc_rtx)));
3612}
3613
3614/* Emit a conditional jump insn for the UA2011 architecture using
3615   comparison code CODE and jump target LABEL.  This function exists
3616   to take advantage of the UA2011 Compare and Branch insns.  */
3617
3618static void
3619emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3620{
3621  rtx if_then_else;
3622
3623  if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3624				       gen_rtx_fmt_ee(code, GET_MODE(op0),
3625						      op0, op1),
3626				       gen_rtx_LABEL_REF (VOIDmode, label),
3627				       pc_rtx);
3628
3629  emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3630}
3631
3632void
3633emit_conditional_branch_insn (rtx operands[])
3634{
3635  /* The quad-word fp compare library routines all return nonzero to indicate
3636     true, which is different from the equivalent libgcc routines, so we must
3637     handle them specially here.  */
3638  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3639    {
3640      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3641					      GET_CODE (operands[0]));
3642      operands[1] = XEXP (operands[0], 0);
3643      operands[2] = XEXP (operands[0], 1);
3644    }
3645
3646  /* If we can tell early on that the comparison is against a constant
3647     that won't fit in the 5-bit signed immediate field of a cbcond,
3648     use one of the other v9 conditional branch sequences.  */
3649  if (TARGET_CBCOND
3650      && GET_CODE (operands[1]) == REG
3651      && (GET_MODE (operands[1]) == SImode
3652	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3653      && (GET_CODE (operands[2]) != CONST_INT
3654	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
3655    {
3656      emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3657      return;
3658    }
3659
3660  if (TARGET_ARCH64 && operands[2] == const0_rtx
3661      && GET_CODE (operands[1]) == REG
3662      && GET_MODE (operands[1]) == DImode)
3663    {
3664      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3665      return;
3666    }
3667
3668  operands[1] = gen_compare_reg (operands[0]);
3669  operands[2] = const0_rtx;
3670  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3671				operands[1], operands[2]);
3672  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3673				  operands[3]));
3674}
3675
3676
3677/* Generate a DFmode part of a hard TFmode register.
3678   REG is the TFmode hard register, LOW is 1 for the
3679   low 64bit of the register and 0 otherwise.
3680 */
3681rtx
3682gen_df_reg (rtx reg, int low)
3683{
3684  int regno = REGNO (reg);
3685
3686  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3687    regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3688  return gen_rtx_REG (DFmode, regno);
3689}
3690
3691/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
3692   Unlike normal calls, TFmode operands are passed by reference.  It is
3693   assumed that no more than 3 operands are required.  */
3694
3695static void
3696emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3697{
3698  rtx ret_slot = NULL, arg[3], func_sym;
3699  int i;
3700
3701  /* We only expect to be called for conversions, unary, and binary ops.  */
3702  gcc_assert (nargs == 2 || nargs == 3);
3703
3704  for (i = 0; i < nargs; ++i)
3705    {
3706      rtx this_arg = operands[i];
3707      rtx this_slot;
3708
3709      /* TFmode arguments and return values are passed by reference.  */
3710      if (GET_MODE (this_arg) == TFmode)
3711	{
3712	  int force_stack_temp;
3713
3714	  force_stack_temp = 0;
3715	  if (TARGET_BUGGY_QP_LIB && i == 0)
3716	    force_stack_temp = 1;
3717
3718	  if (GET_CODE (this_arg) == MEM
3719	      && ! force_stack_temp)
3720	    {
3721	      tree expr = MEM_EXPR (this_arg);
3722	      if (expr)
3723		mark_addressable (expr);
3724	      this_arg = XEXP (this_arg, 0);
3725	    }
3726	  else if (CONSTANT_P (this_arg)
3727		   && ! force_stack_temp)
3728	    {
3729	      this_slot = force_const_mem (TFmode, this_arg);
3730	      this_arg = XEXP (this_slot, 0);
3731	    }
3732	  else
3733	    {
3734	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3735
3736	      /* Operand 0 is the return value.  We'll copy it out later.  */
3737	      if (i > 0)
3738		emit_move_insn (this_slot, this_arg);
3739	      else
3740		ret_slot = this_slot;
3741
3742	      this_arg = XEXP (this_slot, 0);
3743	    }
3744	}
3745
3746      arg[i] = this_arg;
3747    }
3748
3749  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3750
3751  if (GET_MODE (operands[0]) == TFmode)
3752    {
3753      if (nargs == 2)
3754	emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3755			   arg[0], GET_MODE (arg[0]),
3756			   arg[1], GET_MODE (arg[1]));
3757      else
3758	emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3759			   arg[0], GET_MODE (arg[0]),
3760			   arg[1], GET_MODE (arg[1]),
3761			   arg[2], GET_MODE (arg[2]));
3762
3763      if (ret_slot)
3764	emit_move_insn (operands[0], ret_slot);
3765    }
3766  else
3767    {
3768      rtx ret;
3769
3770      gcc_assert (nargs == 2);
3771
3772      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3773				     GET_MODE (operands[0]),
3774				     arg[1], GET_MODE (arg[1]));
3775
3776      if (ret != operands[0])
3777	emit_move_insn (operands[0], ret);
3778    }
3779}
3780
3781/* Expand soft-float TFmode calls to sparc abi routines.  */
3782
3783static void
3784emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3785{
3786  const char *func;
3787
3788  switch (code)
3789    {
3790    case PLUS:
3791      func = "_Qp_add";
3792      break;
3793    case MINUS:
3794      func = "_Qp_sub";
3795      break;
3796    case MULT:
3797      func = "_Qp_mul";
3798      break;
3799    case DIV:
3800      func = "_Qp_div";
3801      break;
3802    default:
3803      gcc_unreachable ();
3804    }
3805
3806  emit_soft_tfmode_libcall (func, 3, operands);
3807}
3808
3809static void
3810emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3811{
3812  const char *func;
3813
3814  gcc_assert (code == SQRT);
3815  func = "_Qp_sqrt";
3816
3817  emit_soft_tfmode_libcall (func, 2, operands);
3818}
3819
3820static void
3821emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3822{
3823  const char *func;
3824
3825  switch (code)
3826    {
3827    case FLOAT_EXTEND:
3828      switch (GET_MODE (operands[1]))
3829	{
3830	case E_SFmode:
3831	  func = "_Qp_stoq";
3832	  break;
3833	case E_DFmode:
3834	  func = "_Qp_dtoq";
3835	  break;
3836	default:
3837	  gcc_unreachable ();
3838	}
3839      break;
3840
3841    case FLOAT_TRUNCATE:
3842      switch (GET_MODE (operands[0]))
3843	{
3844	case E_SFmode:
3845	  func = "_Qp_qtos";
3846	  break;
3847	case E_DFmode:
3848	  func = "_Qp_qtod";
3849	  break;
3850	default:
3851	  gcc_unreachable ();
3852	}
3853      break;
3854
3855    case FLOAT:
3856      switch (GET_MODE (operands[1]))
3857	{
3858	case E_SImode:
3859	  func = "_Qp_itoq";
3860	  if (TARGET_ARCH64)
3861	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3862	  break;
3863	case E_DImode:
3864	  func = "_Qp_xtoq";
3865	  break;
3866	default:
3867	  gcc_unreachable ();
3868	}
3869      break;
3870
3871    case UNSIGNED_FLOAT:
3872      switch (GET_MODE (operands[1]))
3873	{
3874	case E_SImode:
3875	  func = "_Qp_uitoq";
3876	  if (TARGET_ARCH64)
3877	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3878	  break;
3879	case E_DImode:
3880	  func = "_Qp_uxtoq";
3881	  break;
3882	default:
3883	  gcc_unreachable ();
3884	}
3885      break;
3886
3887    case FIX:
3888      switch (GET_MODE (operands[0]))
3889	{
3890	case E_SImode:
3891	  func = "_Qp_qtoi";
3892	  break;
3893	case E_DImode:
3894	  func = "_Qp_qtox";
3895	  break;
3896	default:
3897	  gcc_unreachable ();
3898	}
3899      break;
3900
3901    case UNSIGNED_FIX:
3902      switch (GET_MODE (operands[0]))
3903	{
3904	case E_SImode:
3905	  func = "_Qp_qtoui";
3906	  break;
3907	case E_DImode:
3908	  func = "_Qp_qtoux";
3909	  break;
3910	default:
3911	  gcc_unreachable ();
3912	}
3913      break;
3914
3915    default:
3916      gcc_unreachable ();
3917    }
3918
3919  emit_soft_tfmode_libcall (func, 2, operands);
3920}
3921
3922/* Expand a hard-float tfmode operation.  All arguments must be in
3923   registers.  */
3924
3925static void
3926emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3927{
3928  rtx op, dest;
3929
3930  if (GET_RTX_CLASS (code) == RTX_UNARY)
3931    {
3932      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3933      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3934    }
3935  else
3936    {
3937      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3938      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3939      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3940			   operands[1], operands[2]);
3941    }
3942
3943  if (register_operand (operands[0], VOIDmode))
3944    dest = operands[0];
3945  else
3946    dest = gen_reg_rtx (GET_MODE (operands[0]));
3947
3948  emit_insn (gen_rtx_SET (dest, op));
3949
3950  if (dest != operands[0])
3951    emit_move_insn (operands[0], dest);
3952}
3953
3954void
3955emit_tfmode_binop (enum rtx_code code, rtx *operands)
3956{
3957  if (TARGET_HARD_QUAD)
3958    emit_hard_tfmode_operation (code, operands);
3959  else
3960    emit_soft_tfmode_binop (code, operands);
3961}
3962
3963void
3964emit_tfmode_unop (enum rtx_code code, rtx *operands)
3965{
3966  if (TARGET_HARD_QUAD)
3967    emit_hard_tfmode_operation (code, operands);
3968  else
3969    emit_soft_tfmode_unop (code, operands);
3970}
3971
3972void
3973emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3974{
3975  if (TARGET_HARD_QUAD)
3976    emit_hard_tfmode_operation (code, operands);
3977  else
3978    emit_soft_tfmode_cvt (code, operands);
3979}
3980
3981/* Return nonzero if a branch/jump/call instruction will be emitting
3982   nop into its delay slot.  */
3983
3984int
3985empty_delay_slot (rtx_insn *insn)
3986{
3987  rtx seq;
3988
3989  /* If no previous instruction (should not happen), return true.  */
3990  if (PREV_INSN (insn) == NULL)
3991    return 1;
3992
3993  seq = NEXT_INSN (PREV_INSN (insn));
3994  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3995    return 0;
3996
3997  return 1;
3998}
3999
4000/* Return nonzero if we should emit a nop after a cbcond instruction.
4001   The cbcond instruction does not have a delay slot, however there is
4002   a severe performance penalty if a control transfer appears right
4003   after a cbcond.  Therefore we emit a nop when we detect this
4004   situation.  */
4005
4006int
4007emit_cbcond_nop (rtx_insn *insn)
4008{
4009  rtx next = next_active_insn (insn);
4010
4011  if (!next)
4012    return 1;
4013
4014  if (NONJUMP_INSN_P (next)
4015      && GET_CODE (PATTERN (next)) == SEQUENCE)
4016    next = XVECEXP (PATTERN (next), 0, 0);
4017  else if (CALL_P (next)
4018	   && GET_CODE (PATTERN (next)) == PARALLEL)
4019    {
4020      rtx delay = XVECEXP (PATTERN (next), 0, 1);
4021
4022      if (GET_CODE (delay) == RETURN)
4023	{
4024	  /* It's a sibling call.  Do not emit the nop if we're going
4025	     to emit something other than the jump itself as the first
4026	     instruction of the sibcall sequence.  */
4027	  if (sparc_leaf_function_p || TARGET_FLAT)
4028	    return 0;
4029	}
4030    }
4031
4032  if (NONJUMP_INSN_P (next))
4033    return 0;
4034
4035  return 1;
4036}
4037
4038/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4039   instruction.  RETURN_P is true if the v9 variant 'return' is to be
4040   considered in the test too.
4041
4042   TRIAL must be a SET whose destination is a REG appropriate for the
4043   'restore' instruction or, if RETURN_P is true, for the 'return'
4044   instruction.  */
4045
4046static int
4047eligible_for_restore_insn (rtx trial, bool return_p)
4048{
4049  rtx pat = PATTERN (trial);
4050  rtx src = SET_SRC (pat);
4051  bool src_is_freg = false;
4052  rtx src_reg;
4053
4054  /* Since we now can do moves between float and integer registers when
4055     VIS3 is enabled, we have to catch this case.  We can allow such
4056     moves when doing a 'return' however.  */
4057  src_reg = src;
4058  if (GET_CODE (src_reg) == SUBREG)
4059    src_reg = SUBREG_REG (src_reg);
4060  if (GET_CODE (src_reg) == REG
4061      && SPARC_FP_REG_P (REGNO (src_reg)))
4062    src_is_freg = true;
4063
4064  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
4065  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4066      && arith_operand (src, GET_MODE (src))
4067      && ! src_is_freg)
4068    {
4069      if (TARGET_ARCH64)
4070        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4071      else
4072        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4073    }
4074
4075  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
4076  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4077	   && arith_double_operand (src, GET_MODE (src))
4078	   && ! src_is_freg)
4079    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4080
4081  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
4082  else if (! TARGET_FPU && register_operand (src, SFmode))
4083    return 1;
4084
4085  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
4086  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4087    return 1;
4088
4089  /* If we have the 'return' instruction, anything that does not use
4090     local or output registers and can go into a delay slot wins.  */
4091  else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4092    return 1;
4093
4094  /* The 'restore src1,src2,dest' pattern for SImode.  */
4095  else if (GET_CODE (src) == PLUS
4096	   && register_operand (XEXP (src, 0), SImode)
4097	   && arith_operand (XEXP (src, 1), SImode))
4098    return 1;
4099
4100  /* The 'restore src1,src2,dest' pattern for DImode.  */
4101  else if (GET_CODE (src) == PLUS
4102	   && register_operand (XEXP (src, 0), DImode)
4103	   && arith_double_operand (XEXP (src, 1), DImode))
4104    return 1;
4105
4106  /* The 'restore src1,%lo(src2),dest' pattern.  */
4107  else if (GET_CODE (src) == LO_SUM
4108	   && ! TARGET_CM_MEDMID
4109	   && ((register_operand (XEXP (src, 0), SImode)
4110	        && immediate_operand (XEXP (src, 1), SImode))
4111	       || (TARGET_ARCH64
4112		   && register_operand (XEXP (src, 0), DImode)
4113		   && immediate_operand (XEXP (src, 1), DImode))))
4114    return 1;
4115
4116  /* The 'restore src,src,dest' pattern.  */
4117  else if (GET_CODE (src) == ASHIFT
4118	   && (register_operand (XEXP (src, 0), SImode)
4119	       || register_operand (XEXP (src, 0), DImode))
4120	   && XEXP (src, 1) == const1_rtx)
4121    return 1;
4122
4123  return 0;
4124}
4125
4126/* Return nonzero if TRIAL can go into the function return's delay slot.  */
4127
4128int
4129eligible_for_return_delay (rtx_insn *trial)
4130{
4131  int regno;
4132  rtx pat;
4133
4134  /* If the function uses __builtin_eh_return, the eh_return machinery
4135     occupies the delay slot.  */
4136  if (crtl->calls_eh_return)
4137    return 0;
4138
4139  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4140    return 0;
4141
4142  /* In the case of a leaf or flat function, anything can go into the slot.  */
4143  if (sparc_leaf_function_p || TARGET_FLAT)
4144    return 1;
4145
4146  if (!NONJUMP_INSN_P (trial))
4147    return 0;
4148
4149  pat = PATTERN (trial);
4150  if (GET_CODE (pat) == PARALLEL)
4151    {
4152      int i;
4153
4154      if (! TARGET_V9)
4155	return 0;
4156      for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4157	{
4158	  rtx expr = XVECEXP (pat, 0, i);
4159	  if (GET_CODE (expr) != SET)
4160	    return 0;
4161	  if (GET_CODE (SET_DEST (expr)) != REG)
4162	    return 0;
4163	  regno = REGNO (SET_DEST (expr));
4164	  if (regno >= 8 && regno < 24)
4165	    return 0;
4166	}
4167      return !epilogue_renumber (&pat, 1);
4168    }
4169
4170  if (GET_CODE (pat) != SET)
4171    return 0;
4172
4173  if (GET_CODE (SET_DEST (pat)) != REG)
4174    return 0;
4175
4176  regno = REGNO (SET_DEST (pat));
4177
4178  /* Otherwise, only operations which can be done in tandem with
4179     a `restore' or `return' insn can go into the delay slot.  */
4180  if (regno >= 8 && regno < 24)
4181    return 0;
4182
4183  /* If this instruction sets up floating point register and we have a return
4184     instruction, it can probably go in.  But restore will not work
4185     with FP_REGS.  */
4186  if (! SPARC_INT_REG_P (regno))
4187    return TARGET_V9 && !epilogue_renumber (&pat, 1);
4188
4189  return eligible_for_restore_insn (trial, true);
4190}
4191
4192/* Return nonzero if TRIAL can go into the sibling call's delay slot.  */
4193
4194int
4195eligible_for_sibcall_delay (rtx_insn *trial)
4196{
4197  rtx pat;
4198
4199  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4200    return 0;
4201
4202  if (!NONJUMP_INSN_P (trial))
4203    return 0;
4204
4205  pat = PATTERN (trial);
4206
4207  if (sparc_leaf_function_p || TARGET_FLAT)
4208    {
4209      /* If the tail call is done using the call instruction,
4210	 we have to restore %o7 in the delay slot.  */
4211      if (LEAF_SIBCALL_SLOT_RESERVED_P)
4212	return 0;
4213
4214      /* %g1 is used to build the function address */
4215      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4216	return 0;
4217
4218      return 1;
4219    }
4220
4221  if (GET_CODE (pat) != SET)
4222    return 0;
4223
4224  /* Otherwise, only operations which can be done in tandem with
4225     a `restore' insn can go into the delay slot.  */
4226  if (GET_CODE (SET_DEST (pat)) != REG
4227      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4228      || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4229    return 0;
4230
4231  /* If it mentions %o7, it can't go in, because sibcall will clobber it
4232     in most cases.  */
4233  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4234    return 0;
4235
4236  return eligible_for_restore_insn (trial, false);
4237}
4238
4239/* Determine if it's legal to put X into the constant pool.  This
4240   is not possible if X contains the address of a symbol that is
4241   not constant (TLS) or not known at final link time (PIC).  */
4242
4243static bool
4244sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4245{
4246  switch (GET_CODE (x))
4247    {
4248    case CONST_INT:
4249    case CONST_WIDE_INT:
4250    case CONST_DOUBLE:
4251    case CONST_VECTOR:
4252      /* Accept all non-symbolic constants.  */
4253      return false;
4254
4255    case LABEL_REF:
4256      /* Labels are OK iff we are non-PIC.  */
4257      return flag_pic != 0;
4258
4259    case SYMBOL_REF:
4260      /* 'Naked' TLS symbol references are never OK,
4261	 non-TLS symbols are OK iff we are non-PIC.  */
4262      if (SYMBOL_REF_TLS_MODEL (x))
4263	return true;
4264      else
4265	return flag_pic != 0;
4266
4267    case CONST:
4268      return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4269    case PLUS:
4270    case MINUS:
4271      return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4272         || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4273    case UNSPEC:
4274      return true;
4275    default:
4276      gcc_unreachable ();
4277    }
4278}
4279
4280/* Global Offset Table support.  */
4281static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4282static GTY(()) rtx got_register_rtx = NULL_RTX;
4283static GTY(()) rtx got_helper_rtx = NULL_RTX;
4284
4285static GTY(()) bool got_helper_needed = false;
4286
4287/* Return the SYMBOL_REF for the Global Offset Table.  */
4288
4289static rtx
4290sparc_got (void)
4291{
4292  if (!got_symbol_rtx)
4293    got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4294
4295  return got_symbol_rtx;
4296}
4297
4298/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
4299
4300static rtx
4301gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4302{
4303  int orig_flag_pic = flag_pic;
4304  rtx insn;
4305
4306  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
4307  flag_pic = 0;
4308  if (TARGET_ARCH64)
4309    insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4310  else
4311    insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4312  flag_pic = orig_flag_pic;
4313
4314  return insn;
4315}
4316
4317/* Output the load_pcrel_sym{si,di} patterns.  */
4318
4319const char *
4320output_load_pcrel_sym (rtx *operands)
4321{
4322  if (flag_delayed_branch)
4323    {
4324      output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4325      output_asm_insn ("call\t%a2", operands);
4326      output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4327    }
4328  else
4329    {
4330      output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4331      output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4332      output_asm_insn ("call\t%a2", operands);
4333      output_asm_insn (" nop", NULL);
4334    }
4335
4336  if (operands[2] == got_helper_rtx)
4337    got_helper_needed = true;
4338
4339  return "";
4340}
4341
4342#ifdef HAVE_GAS_HIDDEN
4343# define USE_HIDDEN_LINKONCE 1
4344#else
4345# define USE_HIDDEN_LINKONCE 0
4346#endif
4347
4348/* Emit code to load the GOT register.  */
4349
4350void
4351load_got_register (void)
4352{
4353  rtx insn;
4354
4355  if (TARGET_VXWORKS_RTP)
4356    {
4357      if (!got_register_rtx)
4358	got_register_rtx = pic_offset_table_rtx;
4359
4360      insn = gen_vxworks_load_got ();
4361    }
4362  else
4363    {
4364      if (!got_register_rtx)
4365	got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4366
4367      /* The GOT symbol is subject to a PC-relative relocation so we need a
4368	 helper function to add the PC value and thus get the final value.  */
4369      if (!got_helper_rtx)
4370	{
4371	  char name[32];
4372
4373	  /* Skip the leading '%' as that cannot be used in a symbol name.  */
4374	  if (USE_HIDDEN_LINKONCE)
4375	    sprintf (name, "__sparc_get_pc_thunk.%s",
4376		     reg_names[REGNO (got_register_rtx)] + 1);
4377	  else
4378	    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4379					 REGNO (got_register_rtx));
4380
4381	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4382	}
4383
4384      insn
4385	= gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4386    }
4387
4388  emit_insn (insn);
4389}
4390
4391/* Ensure that we are not using patterns that are not OK with PIC.  */
4392
4393int
4394check_pic (int i)
4395{
4396  rtx op;
4397
4398  switch (flag_pic)
4399    {
4400    case 1:
4401      op = recog_data.operand[i];
4402      gcc_assert (GET_CODE (op) != SYMBOL_REF
4403	  	  && (GET_CODE (op) != CONST
4404		      || (GET_CODE (XEXP (op, 0)) == MINUS
4405			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
4406			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4407      /* fallthrough */
4408    case 2:
4409    default:
4410      return 1;
4411    }
4412}
4413
4414/* Return true if X is an address which needs a temporary register when
4415   reloaded while generating PIC code.  */
4416
4417int
4418pic_address_needs_scratch (rtx x)
4419{
4420  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
4421  if (GET_CODE (x) == CONST
4422      && GET_CODE (XEXP (x, 0)) == PLUS
4423      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4424      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4425      && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4426    return 1;
4427
4428  return 0;
4429}
4430
4431/* Determine if a given RTX is a valid constant.  We already know this
4432   satisfies CONSTANT_P.  */
4433
4434static bool
4435sparc_legitimate_constant_p (machine_mode mode, rtx x)
4436{
4437  switch (GET_CODE (x))
4438    {
4439    case CONST:
4440    case SYMBOL_REF:
4441      if (sparc_tls_referenced_p (x))
4442	return false;
4443      break;
4444
4445    case CONST_DOUBLE:
4446      /* Floating point constants are generally not ok.
4447	 The only exception is 0.0 and all-ones in VIS.  */
4448      if (TARGET_VIS
4449	  && SCALAR_FLOAT_MODE_P (mode)
4450	  && (const_zero_operand (x, mode)
4451	      || const_all_ones_operand (x, mode)))
4452	return true;
4453
4454      return false;
4455
4456    case CONST_VECTOR:
4457      /* Vector constants are generally not ok.
4458	 The only exception is 0 or -1 in VIS.  */
4459      if (TARGET_VIS
4460	  && (const_zero_operand (x, mode)
4461	      || const_all_ones_operand (x, mode)))
4462	return true;
4463
4464      return false;
4465
4466    default:
4467      break;
4468    }
4469
4470  return true;
4471}
4472
4473/* Determine if a given RTX is a valid constant address.  */
4474
4475bool
4476constant_address_p (rtx x)
4477{
4478  switch (GET_CODE (x))
4479    {
4480    case LABEL_REF:
4481    case CONST_INT:
4482    case HIGH:
4483      return true;
4484
4485    case CONST:
4486      if (flag_pic && pic_address_needs_scratch (x))
4487	return false;
4488      return sparc_legitimate_constant_p (Pmode, x);
4489
4490    case SYMBOL_REF:
4491      return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4492
4493    default:
4494      return false;
4495    }
4496}
4497
4498/* Nonzero if the constant value X is a legitimate general operand
4499   when generating PIC code.  It is given that flag_pic is on and
4500   that X satisfies CONSTANT_P.  */
4501
4502bool
4503legitimate_pic_operand_p (rtx x)
4504{
4505  if (pic_address_needs_scratch (x))
4506    return false;
4507  if (sparc_tls_referenced_p (x))
4508    return false;
4509  return true;
4510}
4511
4512/* Return true if X is a representation of the PIC register.  */
4513
4514static bool
4515sparc_pic_register_p (rtx x)
4516{
4517  if (!REG_P (x) || !pic_offset_table_rtx)
4518    return false;
4519
4520  if (x == pic_offset_table_rtx)
4521    return true;
4522
4523  if (!HARD_REGISTER_P (pic_offset_table_rtx)
4524      && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4525      && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4526    return true;
4527
4528  return false;
4529}
4530
4531#define RTX_OK_FOR_OFFSET_P(X, MODE)			\
4532  (CONST_INT_P (X)					\
4533   && INTVAL (X) >= -0x1000				\
4534   && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4535
4536#define RTX_OK_FOR_OLO10_P(X, MODE)			\
4537  (CONST_INT_P (X)					\
4538   && INTVAL (X) >= -0x1000				\
4539   && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4540
4541/* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4542
4543   On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4544   ordinarily.  This changes a bit when generating PIC.  */
4545
4546static bool
4547sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4548{
4549  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4550
4551  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4552    rs1 = addr;
4553  else if (GET_CODE (addr) == PLUS)
4554    {
4555      rs1 = XEXP (addr, 0);
4556      rs2 = XEXP (addr, 1);
4557
4558      /* Canonicalize.  REG comes first, if there are no regs,
4559	 LO_SUM comes first.  */
4560      if (!REG_P (rs1)
4561	  && GET_CODE (rs1) != SUBREG
4562	  && (REG_P (rs2)
4563	      || GET_CODE (rs2) == SUBREG
4564	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4565	{
4566	  rs1 = XEXP (addr, 1);
4567	  rs2 = XEXP (addr, 0);
4568	}
4569
4570      if ((flag_pic == 1
4571	   && sparc_pic_register_p (rs1)
4572	   && !REG_P (rs2)
4573	   && GET_CODE (rs2) != SUBREG
4574	   && GET_CODE (rs2) != LO_SUM
4575	   && GET_CODE (rs2) != MEM
4576	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4577	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4578	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4579	  || ((REG_P (rs1)
4580	       || GET_CODE (rs1) == SUBREG)
4581	      && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4582	{
4583	  imm1 = rs2;
4584	  rs2 = NULL;
4585	}
4586      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4587	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4588	{
4589	  /* We prohibit REG + REG for TFmode when there are no quad move insns
4590	     and we consequently need to split.  We do this because REG+REG
4591	     is not an offsettable address.  If we get the situation in reload
4592	     where source and destination of a movtf pattern are both MEMs with
4593	     REG+REG address, then only one of them gets converted to an
4594	     offsettable address.  */
4595	  if (mode == TFmode
4596	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4597	    return 0;
4598
4599	  /* Likewise for TImode, but in all cases.  */
4600	  if (mode == TImode)
4601	    return 0;
4602
4603	  /* We prohibit REG + REG on ARCH32 if not optimizing for
4604	     DFmode/DImode because then mem_min_alignment is likely to be zero
4605	     after reload and the  forced split would lack a matching splitter
4606	     pattern.  */
4607	  if (TARGET_ARCH32 && !optimize
4608	      && (mode == DFmode || mode == DImode))
4609	    return 0;
4610	}
4611      else if (USE_AS_OFFSETABLE_LO10
4612	       && GET_CODE (rs1) == LO_SUM
4613	       && TARGET_ARCH64
4614	       && ! TARGET_CM_MEDMID
4615	       && RTX_OK_FOR_OLO10_P (rs2, mode))
4616	{
4617	  rs2 = NULL;
4618	  imm1 = XEXP (rs1, 1);
4619	  rs1 = XEXP (rs1, 0);
4620	  if (!CONSTANT_P (imm1)
4621	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4622	    return 0;
4623	}
4624    }
4625  else if (GET_CODE (addr) == LO_SUM)
4626    {
4627      rs1 = XEXP (addr, 0);
4628      imm1 = XEXP (addr, 1);
4629
4630      if (!CONSTANT_P (imm1)
4631	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4632	return 0;
4633
4634      /* We can't allow TFmode in 32-bit mode, because an offset greater
4635	 than the alignment (8) may cause the LO_SUM to overflow.  */
4636      if (mode == TFmode && TARGET_ARCH32)
4637	return 0;
4638
4639      /* During reload, accept the HIGH+LO_SUM construct generated by
4640	 sparc_legitimize_reload_address.  */
4641      if (reload_in_progress
4642	  && GET_CODE (rs1) == HIGH
4643	  && XEXP (rs1, 0) == imm1)
4644	return 1;
4645    }
4646  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4647    return 1;
4648  else
4649    return 0;
4650
4651  if (GET_CODE (rs1) == SUBREG)
4652    rs1 = SUBREG_REG (rs1);
4653  if (!REG_P (rs1))
4654    return 0;
4655
4656  if (rs2)
4657    {
4658      if (GET_CODE (rs2) == SUBREG)
4659	rs2 = SUBREG_REG (rs2);
4660      if (!REG_P (rs2))
4661	return 0;
4662    }
4663
4664  if (strict)
4665    {
4666      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4667	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4668	return 0;
4669    }
4670  else
4671    {
4672      if ((! SPARC_INT_REG_P (REGNO (rs1))
4673	   && REGNO (rs1) != FRAME_POINTER_REGNUM
4674	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4675	  || (rs2
4676	      && (! SPARC_INT_REG_P (REGNO (rs2))
4677		  && REGNO (rs2) != FRAME_POINTER_REGNUM
4678		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4679	return 0;
4680    }
4681  return 1;
4682}
4683
4684/* Return the SYMBOL_REF for the tls_get_addr function.  */
4685
4686static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4687
4688static rtx
4689sparc_tls_get_addr (void)
4690{
4691  if (!sparc_tls_symbol)
4692    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4693
4694  return sparc_tls_symbol;
4695}
4696
4697/* Return the Global Offset Table to be used in TLS mode.  */
4698
4699static rtx
4700sparc_tls_got (void)
4701{
4702  /* In PIC mode, this is just the PIC offset table.  */
4703  if (flag_pic)
4704    {
4705      crtl->uses_pic_offset_table = 1;
4706      return pic_offset_table_rtx;
4707    }
4708
4709  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4710     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
4711  if (TARGET_SUN_TLS && TARGET_ARCH32)
4712    {
4713      load_got_register ();
4714      return got_register_rtx;
4715    }
4716
4717  /* In all other cases, we load a new pseudo with the GOT symbol.  */
4718  return copy_to_reg (sparc_got ());
4719}
4720
4721/* Return true if X contains a thread-local symbol.  */
4722
4723static bool
4724sparc_tls_referenced_p (rtx x)
4725{
4726  if (!TARGET_HAVE_TLS)
4727    return false;
4728
4729  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4730    x = XEXP (XEXP (x, 0), 0);
4731
4732  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4733    return true;
4734
4735  /* That's all we handle in sparc_legitimize_tls_address for now.  */
4736  return false;
4737}
4738
4739/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
4740   this (thread-local) address.  */
4741
4742static rtx
4743sparc_legitimize_tls_address (rtx addr)
4744{
4745  rtx temp1, temp2, temp3, ret, o0, got;
4746  rtx_insn *insn;
4747
4748  gcc_assert (can_create_pseudo_p ());
4749
4750  if (GET_CODE (addr) == SYMBOL_REF)
4751    /* Although the various sethi/or sequences generate SImode values, many of
4752       them can be transformed by the linker when relaxing and, if relaxing to
4753       local-exec, will become a sethi/xor pair, which is signed and therefore
4754       a full DImode value in 64-bit mode.  Thus we must use Pmode, lest these
4755       values be spilled onto the stack in 64-bit mode.  */
4756    switch (SYMBOL_REF_TLS_MODEL (addr))
4757      {
4758      case TLS_MODEL_GLOBAL_DYNAMIC:
4759	start_sequence ();
4760	temp1 = gen_reg_rtx (Pmode);
4761	temp2 = gen_reg_rtx (Pmode);
4762	ret = gen_reg_rtx (Pmode);
4763	o0 = gen_rtx_REG (Pmode, 8);
4764	got = sparc_tls_got ();
4765	if (TARGET_ARCH32)
4766	  {
4767	    emit_insn (gen_tgd_hi22si (temp1, addr));
4768	    emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4769	    emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4770	    insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4771						   addr, const1_rtx));
4772	  }
4773	else
4774	  {
4775	    emit_insn (gen_tgd_hi22di (temp1, addr));
4776	    emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4777	    emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4778	    insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4779						   addr, const1_rtx));
4780	  }
4781	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4782	RTL_CONST_CALL_P (insn) = 1;
4783	insn = get_insns ();
4784	end_sequence ();
4785	emit_libcall_block (insn, ret, o0, addr);
4786	break;
4787
4788      case TLS_MODEL_LOCAL_DYNAMIC:
4789	start_sequence ();
4790	temp1 = gen_reg_rtx (Pmode);
4791	temp2 = gen_reg_rtx (Pmode);
4792	temp3 = gen_reg_rtx (Pmode);
4793	ret = gen_reg_rtx (Pmode);
4794	o0 = gen_rtx_REG (Pmode, 8);
4795	got = sparc_tls_got ();
4796	if (TARGET_ARCH32)
4797	  {
4798	    emit_insn (gen_tldm_hi22si (temp1));
4799	    emit_insn (gen_tldm_lo10si (temp2, temp1));
4800	    emit_insn (gen_tldm_addsi (o0, got, temp2));
4801	    insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4802						    const1_rtx));
4803	  }
4804	else
4805	  {
4806	    emit_insn (gen_tldm_hi22di (temp1));
4807	    emit_insn (gen_tldm_lo10di (temp2, temp1));
4808	    emit_insn (gen_tldm_adddi (o0, got, temp2));
4809	    insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4810						    const1_rtx));
4811	  }
4812	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4813	RTL_CONST_CALL_P (insn) = 1;
4814	insn = get_insns ();
4815	end_sequence ();
4816	/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4817	  share the LD_BASE result with other LD model accesses.  */
4818	emit_libcall_block (insn, temp3, o0,
4819			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4820					    UNSPEC_TLSLD_BASE));
4821	temp1 = gen_reg_rtx (Pmode);
4822	temp2 = gen_reg_rtx (Pmode);
4823	if (TARGET_ARCH32)
4824	  {
4825	    emit_insn (gen_tldo_hix22si (temp1, addr));
4826	    emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4827	    emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4828	  }
4829	else
4830	  {
4831	    emit_insn (gen_tldo_hix22di (temp1, addr));
4832	    emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4833	    emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4834	  }
4835	break;
4836
4837      case TLS_MODEL_INITIAL_EXEC:
4838	temp1 = gen_reg_rtx (Pmode);
4839	temp2 = gen_reg_rtx (Pmode);
4840	temp3 = gen_reg_rtx (Pmode);
4841	got = sparc_tls_got ();
4842	if (TARGET_ARCH32)
4843	  {
4844	    emit_insn (gen_tie_hi22si (temp1, addr));
4845	    emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4846	    emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4847	  }
4848	else
4849	  {
4850	    emit_insn (gen_tie_hi22di (temp1, addr));
4851	    emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4852	    emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4853	  }
4854        if (TARGET_SUN_TLS)
4855	  {
4856	    ret = gen_reg_rtx (Pmode);
4857	    if (TARGET_ARCH32)
4858	      emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4859					temp3, addr));
4860	    else
4861	      emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4862					temp3, addr));
4863	  }
4864	else
4865	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4866	break;
4867
4868      case TLS_MODEL_LOCAL_EXEC:
4869	temp1 = gen_reg_rtx (Pmode);
4870	temp2 = gen_reg_rtx (Pmode);
4871	if (TARGET_ARCH32)
4872	  {
4873	    emit_insn (gen_tle_hix22si (temp1, addr));
4874	    emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4875	  }
4876	else
4877	  {
4878	    emit_insn (gen_tle_hix22di (temp1, addr));
4879	    emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4880	  }
4881	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4882	break;
4883
4884      default:
4885	gcc_unreachable ();
4886      }
4887
4888  else if (GET_CODE (addr) == CONST)
4889    {
4890      rtx base, offset;
4891
4892      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4893
4894      base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4895      offset = XEXP (XEXP (addr, 0), 1);
4896
4897      base = force_operand (base, NULL_RTX);
4898      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4899	offset = force_reg (Pmode, offset);
4900      ret = gen_rtx_PLUS (Pmode, base, offset);
4901    }
4902
4903  else
4904    gcc_unreachable ();  /* for now ... */
4905
4906  return ret;
4907}
4908
4909/* Legitimize PIC addresses.  If the address is already position-independent,
4910   we return ORIG.  Newly generated position-independent addresses go into a
4911   reg.  This is REG if nonzero, otherwise we allocate register(s) as
4912   necessary.  */
4913
4914static rtx
4915sparc_legitimize_pic_address (rtx orig, rtx reg)
4916{
4917  if (GET_CODE (orig) == SYMBOL_REF
4918      /* See the comment in sparc_expand_move.  */
4919      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4920    {
4921      bool gotdata_op = false;
4922      rtx pic_ref, address;
4923      rtx_insn *insn;
4924
4925      if (!reg)
4926	{
4927	  gcc_assert (can_create_pseudo_p ());
4928	  reg = gen_reg_rtx (Pmode);
4929	}
4930
4931      if (flag_pic == 2)
4932	{
4933	  /* If not during reload, allocate another temp reg here for loading
4934	     in the address, so that these instructions can be optimized
4935	     properly.  */
4936	  rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4937
4938	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4939	     won't get confused into thinking that these two instructions
4940	     are loading in the true address of the symbol.  If in the
4941	     future a PIC rtx exists, that should be used instead.  */
4942	  if (TARGET_ARCH64)
4943	    {
4944	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
4945	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4946	    }
4947	  else
4948	    {
4949	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
4950	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4951	    }
4952
4953	  address = temp_reg;
4954	  gotdata_op = true;
4955	}
4956      else
4957	address = orig;
4958
4959      crtl->uses_pic_offset_table = 1;
4960      if (gotdata_op)
4961	{
4962	  if (TARGET_ARCH64)
4963	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4964							pic_offset_table_rtx,
4965							address, orig));
4966	  else
4967	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4968							pic_offset_table_rtx,
4969							address, orig));
4970	}
4971      else
4972	{
4973	  pic_ref
4974	    = gen_const_mem (Pmode,
4975			     gen_rtx_PLUS (Pmode,
4976					   pic_offset_table_rtx, address));
4977	  insn = emit_move_insn (reg, pic_ref);
4978	}
4979
4980      /* Put a REG_EQUAL note on this insn, so that it can be optimized
4981	 by loop.  */
4982      set_unique_reg_note (insn, REG_EQUAL, orig);
4983      return reg;
4984    }
4985  else if (GET_CODE (orig) == CONST)
4986    {
4987      rtx base, offset;
4988
4989      if (GET_CODE (XEXP (orig, 0)) == PLUS
4990	  && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4991	return orig;
4992
4993      if (!reg)
4994	{
4995	  gcc_assert (can_create_pseudo_p ());
4996	  reg = gen_reg_rtx (Pmode);
4997	}
4998
4999      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5000      base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
5001      offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
5002			 		     base == reg ? NULL_RTX : reg);
5003
5004      if (GET_CODE (offset) == CONST_INT)
5005	{
5006	  if (SMALL_INT (offset))
5007	    return plus_constant (Pmode, base, INTVAL (offset));
5008	  else if (can_create_pseudo_p ())
5009	    offset = force_reg (Pmode, offset);
5010	  else
5011	    /* If we reach here, then something is seriously wrong.  */
5012	    gcc_unreachable ();
5013	}
5014      return gen_rtx_PLUS (Pmode, base, offset);
5015    }
5016  else if (GET_CODE (orig) == LABEL_REF)
5017    /* ??? We ought to be checking that the register is live instead, in case
5018       it is eliminated.  */
5019    crtl->uses_pic_offset_table = 1;
5020
5021  return orig;
5022}
5023
5024/* Try machine-dependent ways of modifying an illegitimate address X
5025   to be legitimate.  If we find one, return the new, valid address.
5026
5027   OLDX is the address as it was before break_out_memory_refs was called.
5028   In some cases it is useful to look at this to decide what needs to be done.
5029
5030   MODE is the mode of the operand pointed to by X.
5031
5032   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
5033
5034static rtx
5035sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5036			  machine_mode mode)
5037{
5038  rtx orig_x = x;
5039
5040  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
5041    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5042		      force_operand (XEXP (x, 0), NULL_RTX));
5043  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
5044    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5045		      force_operand (XEXP (x, 1), NULL_RTX));
5046  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
5047    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
5048		      XEXP (x, 1));
5049  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
5050    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5051		      force_operand (XEXP (x, 1), NULL_RTX));
5052
5053  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5054    return x;
5055
5056  if (sparc_tls_referenced_p (x))
5057    x = sparc_legitimize_tls_address (x);
5058  else if (flag_pic)
5059    x = sparc_legitimize_pic_address (x, NULL_RTX);
5060  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5061    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5062		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
5063  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5064    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5065		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
5066  else if (GET_CODE (x) == SYMBOL_REF
5067	   || GET_CODE (x) == CONST
5068	   || GET_CODE (x) == LABEL_REF)
5069    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5070
5071  return x;
5072}
5073
5074/* Delegitimize an address that was legitimized by the above function.  */
5075
5076static rtx
5077sparc_delegitimize_address (rtx x)
5078{
5079  x = delegitimize_mem_from_attrs (x);
5080
5081  if (GET_CODE (x) == LO_SUM)
5082    x = XEXP (x, 1);
5083
5084  if (GET_CODE (x) == UNSPEC)
5085    switch (XINT (x, 1))
5086      {
5087      case UNSPEC_MOVE_PIC:
5088      case UNSPEC_TLSLE:
5089	x = XVECEXP (x, 0, 0);
5090	gcc_assert (GET_CODE (x) == SYMBOL_REF);
5091	break;
5092      case UNSPEC_MOVE_GOTDATA:
5093	x = XVECEXP (x, 0, 2);
5094	gcc_assert (GET_CODE (x) == SYMBOL_REF);
5095	break;
5096      default:
5097	break;
5098      }
5099
5100  /* This is generated by mov{si,di}_pic_label_ref in PIC mode.  */
5101  if (GET_CODE (x) == MINUS
5102      && (XEXP (x, 0) == got_register_rtx
5103	  || sparc_pic_register_p (XEXP (x, 0))))
5104    {
5105      rtx y = XEXP (x, 1);
5106
5107      if (GET_CODE (y) == LO_SUM)
5108	y = XEXP (y, 1);
5109
5110      if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5111	{
5112	  x = XVECEXP (y, 0, 0);
5113	  gcc_assert (GET_CODE (x) == LABEL_REF
5114		      || (GET_CODE (x) == CONST
5115			  && GET_CODE (XEXP (x, 0)) == PLUS
5116			  && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5117			  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5118	}
5119    }
5120
5121  return x;
5122}
5123
5124/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
5125   replace the input X, or the original X if no replacement is called for.
5126   The output parameter *WIN is 1 if the calling macro should goto WIN,
5127   0 if it should not.
5128
5129   For SPARC, we wish to handle addresses by splitting them into
5130   HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5131   This cuts the number of extra insns by one.
5132
5133   Do nothing when generating PIC code and the address is a symbolic
5134   operand or requires a scratch register.  */
5135
5136rtx
5137sparc_legitimize_reload_address (rtx x, machine_mode mode,
5138				 int opnum, int type,
5139				 int ind_levels ATTRIBUTE_UNUSED, int *win)
5140{
5141  /* Decompose SImode constants into HIGH+LO_SUM.  */
5142  if (CONSTANT_P (x)
5143      && (mode != TFmode || TARGET_ARCH64)
5144      && GET_MODE (x) == SImode
5145      && GET_CODE (x) != LO_SUM
5146      && GET_CODE (x) != HIGH
5147      && sparc_code_model <= CM_MEDLOW
5148      && !(flag_pic
5149	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5150    {
5151      x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5152      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5153		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5154		   opnum, (enum reload_type)type);
5155      *win = 1;
5156      return x;
5157    }
5158
5159  /* We have to recognize what we have already generated above.  */
5160  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5161    {
5162      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5163		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5164		   opnum, (enum reload_type)type);
5165      *win = 1;
5166      return x;
5167    }
5168
5169  *win = 0;
5170  return x;
5171}
5172
5173/* Return true if ADDR (a legitimate address expression)
5174   has an effect that depends on the machine mode it is used for.
5175
5176   In PIC mode,
5177
5178      (mem:HI [%l7+a])
5179
5180   is not equivalent to
5181
5182      (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5183
5184   because [%l7+a+1] is interpreted as the address of (a+1).  */
5185
5186
5187static bool
5188sparc_mode_dependent_address_p (const_rtx addr,
5189				addr_space_t as ATTRIBUTE_UNUSED)
5190{
5191  if (GET_CODE (addr) == PLUS
5192      && sparc_pic_register_p (XEXP (addr, 0))
5193      && symbolic_operand (XEXP (addr, 1), VOIDmode))
5194    return true;
5195
5196  return false;
5197}
5198
5199/* Emit a call instruction with the pattern given by PAT.  ADDR is the
5200   address of the call target.  */
5201
5202void
5203sparc_emit_call_insn (rtx pat, rtx addr)
5204{
5205  rtx_insn *insn;
5206
5207  insn = emit_call_insn (pat);
5208
5209  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
5210  if (TARGET_VXWORKS_RTP
5211      && flag_pic
5212      && GET_CODE (addr) == SYMBOL_REF
5213      && (SYMBOL_REF_DECL (addr)
5214	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5215	  : !SYMBOL_REF_LOCAL_P (addr)))
5216    {
5217      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5218      crtl->uses_pic_offset_table = 1;
5219    }
5220}
5221
5222/* Return 1 if RTX is a MEM which is known to be aligned to at
5223   least a DESIRED byte boundary.  */
5224
5225int
5226mem_min_alignment (rtx mem, int desired)
5227{
5228  rtx addr, base, offset;
5229
5230  /* If it's not a MEM we can't accept it.  */
5231  if (GET_CODE (mem) != MEM)
5232    return 0;
5233
5234  /* Obviously...  */
5235  if (!TARGET_UNALIGNED_DOUBLES
5236      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5237    return 1;
5238
5239  /* ??? The rest of the function predates MEM_ALIGN so
5240     there is probably a bit of redundancy.  */
5241  addr = XEXP (mem, 0);
5242  base = offset = NULL_RTX;
5243  if (GET_CODE (addr) == PLUS)
5244    {
5245      if (GET_CODE (XEXP (addr, 0)) == REG)
5246	{
5247	  base = XEXP (addr, 0);
5248
5249	  /* What we are saying here is that if the base
5250	     REG is aligned properly, the compiler will make
5251	     sure any REG based index upon it will be so
5252	     as well.  */
5253	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5254	    offset = XEXP (addr, 1);
5255	  else
5256	    offset = const0_rtx;
5257	}
5258    }
5259  else if (GET_CODE (addr) == REG)
5260    {
5261      base = addr;
5262      offset = const0_rtx;
5263    }
5264
5265  if (base != NULL_RTX)
5266    {
5267      int regno = REGNO (base);
5268
5269      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5270	{
5271	  /* Check if the compiler has recorded some information
5272	     about the alignment of the base REG.  If reload has
5273	     completed, we already matched with proper alignments.
5274	     If not running global_alloc, reload might give us
5275	     unaligned pointer to local stack though.  */
5276	  if (((cfun != 0
5277		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5278	       || (optimize && reload_completed))
5279	      && (INTVAL (offset) & (desired - 1)) == 0)
5280	    return 1;
5281	}
5282      else
5283	{
5284	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5285	    return 1;
5286	}
5287    }
5288  else if (! TARGET_UNALIGNED_DOUBLES
5289	   || CONSTANT_P (addr)
5290	   || GET_CODE (addr) == LO_SUM)
5291    {
5292      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5293	 is true, in which case we can only assume that an access is aligned if
5294	 it is to a constant address, or the address involves a LO_SUM.  */
5295      return 1;
5296    }
5297
5298  /* An obviously unaligned address.  */
5299  return 0;
5300}
5301
5302
5303/* Vectors to keep interesting information about registers where it can easily
5304   be got.  We used to use the actual mode value as the bit number, but there
5305   are more than 32 modes now.  Instead we use two tables: one indexed by
5306   hard register number, and one indexed by mode.  */
5307
5308/* The purpose of sparc_mode_class is to shrink the range of modes so that
5309   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
5310   mapped into one sparc_mode_class mode.  */
5311
5312enum sparc_mode_class {
5313  H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5314  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5315  CC_MODE, CCFP_MODE
5316};
5317
5318/* Modes for single-word and smaller quantities.  */
5319#define S_MODES \
5320  ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5321
5322/* Modes for double-word and smaller quantities.  */
5323#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5324
5325/* Modes for quad-word and smaller quantities.  */
5326#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5327
5328/* Modes for 8-word and smaller quantities.  */
5329#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5330
5331/* Modes for single-float quantities.  */
5332#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5333
5334/* Modes for double-float and smaller quantities.  */
5335#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5336
5337/* Modes for quad-float and smaller quantities.  */
5338#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5339
5340/* Modes for quad-float pairs and smaller quantities.  */
5341#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5342
5343/* Modes for double-float only quantities.  */
5344#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5345
5346/* Modes for quad-float and double-float only quantities.  */
5347#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5348
5349/* Modes for quad-float pairs and double-float only quantities.  */
5350#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5351
5352/* Modes for condition codes.  */
5353#define CC_MODES (1 << (int) CC_MODE)
5354#define CCFP_MODES (1 << (int) CCFP_MODE)
5355
5356/* Value is 1 if register/mode pair is acceptable on sparc.
5357
5358   The funny mixture of D and T modes is because integer operations
5359   do not specially operate on tetra quantities, so non-quad-aligned
5360   registers can hold quadword quantities (except %o4 and %i4 because
5361   they cross fixed registers).
5362
5363   ??? Note that, despite the settings, non-double-aligned parameter
5364   registers can hold double-word quantities in 32-bit mode.  */
5365
5366/* This points to either the 32-bit or the 64-bit version.  */
5367static const int *hard_regno_mode_classes;
5368
5369static const int hard_32bit_mode_classes[] = {
5370  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5371  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5372  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5373  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5374
5375  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5376  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5377  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5378  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5379
5380  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
5381     and none can hold SFmode/SImode values.  */
5382  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5383  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5384  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5385  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5386
5387  /* %fcc[0123] */
5388  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5389
5390  /* %icc, %sfp, %gsr */
5391  CC_MODES, 0, D_MODES
5392};
5393
5394static const int hard_64bit_mode_classes[] = {
5395  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5396  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5397  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5398  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5399
5400  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5401  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5402  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5403  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5404
5405  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
5406     and none can hold SFmode/SImode values.  */
5407  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5408  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5409  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5410  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5411
5412  /* %fcc[0123] */
5413  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5414
5415  /* %icc, %sfp, %gsr */
5416  CC_MODES, 0, D_MODES
5417};
5418
5419static int sparc_mode_class [NUM_MACHINE_MODES];
5420
5421enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5422
5423static void
5424sparc_init_modes (void)
5425{
5426  int i;
5427
5428  for (i = 0; i < NUM_MACHINE_MODES; i++)
5429    {
5430      machine_mode m = (machine_mode) i;
5431      unsigned int size = GET_MODE_SIZE (m);
5432
5433      switch (GET_MODE_CLASS (m))
5434	{
5435	case MODE_INT:
5436	case MODE_PARTIAL_INT:
5437	case MODE_COMPLEX_INT:
5438	  if (size < 4)
5439	    sparc_mode_class[i] = 1 << (int) H_MODE;
5440	  else if (size == 4)
5441	    sparc_mode_class[i] = 1 << (int) S_MODE;
5442	  else if (size == 8)
5443	    sparc_mode_class[i] = 1 << (int) D_MODE;
5444	  else if (size == 16)
5445	    sparc_mode_class[i] = 1 << (int) T_MODE;
5446	  else if (size == 32)
5447	    sparc_mode_class[i] = 1 << (int) O_MODE;
5448	  else
5449	    sparc_mode_class[i] = 0;
5450	  break;
5451	case MODE_VECTOR_INT:
5452	  if (size == 4)
5453	    sparc_mode_class[i] = 1 << (int) SF_MODE;
5454	  else if (size == 8)
5455	    sparc_mode_class[i] = 1 << (int) DF_MODE;
5456	  else
5457	    sparc_mode_class[i] = 0;
5458	  break;
5459	case MODE_FLOAT:
5460	case MODE_COMPLEX_FLOAT:
5461	  if (size == 4)
5462	    sparc_mode_class[i] = 1 << (int) SF_MODE;
5463	  else if (size == 8)
5464	    sparc_mode_class[i] = 1 << (int) DF_MODE;
5465	  else if (size == 16)
5466	    sparc_mode_class[i] = 1 << (int) TF_MODE;
5467	  else if (size == 32)
5468	    sparc_mode_class[i] = 1 << (int) OF_MODE;
5469	  else
5470	    sparc_mode_class[i] = 0;
5471	  break;
5472	case MODE_CC:
5473	  if (m == CCFPmode || m == CCFPEmode)
5474	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5475	  else
5476	    sparc_mode_class[i] = 1 << (int) CC_MODE;
5477	  break;
5478	default:
5479	  sparc_mode_class[i] = 0;
5480	  break;
5481	}
5482    }
5483
5484  if (TARGET_ARCH64)
5485    hard_regno_mode_classes = hard_64bit_mode_classes;
5486  else
5487    hard_regno_mode_classes = hard_32bit_mode_classes;
5488
5489  /* Initialize the array used by REGNO_REG_CLASS.  */
5490  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5491    {
5492      if (i < 16 && TARGET_V8PLUS)
5493	sparc_regno_reg_class[i] = I64_REGS;
5494      else if (i < 32 || i == FRAME_POINTER_REGNUM)
5495	sparc_regno_reg_class[i] = GENERAL_REGS;
5496      else if (i < 64)
5497	sparc_regno_reg_class[i] = FP_REGS;
5498      else if (i < 96)
5499	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5500      else if (i < 100)
5501	sparc_regno_reg_class[i] = FPCC_REGS;
5502      else
5503	sparc_regno_reg_class[i] = NO_REGS;
5504    }
5505}
5506
5507/* Return whether REGNO, a global or FP register, must be saved/restored.  */
5508
5509static inline bool
5510save_global_or_fp_reg_p (unsigned int regno,
5511			 int leaf_function ATTRIBUTE_UNUSED)
5512{
5513  return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5514}
5515
5516/* Return whether the return address register (%i7) is needed.  */
5517
5518static inline bool
5519return_addr_reg_needed_p (int leaf_function)
5520{
5521  /* If it is live, for example because of __builtin_return_address (0).  */
5522  if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5523    return true;
5524
5525  /* Otherwise, it is needed as save register if %o7 is clobbered.  */
5526  if (!leaf_function
5527      /* Loading the GOT register clobbers %o7.  */
5528      || crtl->uses_pic_offset_table
5529      || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5530    return true;
5531
5532  return false;
5533}
5534
5535/* Return whether REGNO, a local or in register, must be saved/restored.  */
5536
5537static bool
5538save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5539{
5540  /* General case: call-saved registers live at some point.  */
5541  if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5542    return true;
5543
5544  /* Frame pointer register (%fp) if needed.  */
5545  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5546    return true;
5547
5548  /* Return address register (%i7) if needed.  */
5549  if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5550    return true;
5551
5552  /* GOT register (%l7) if needed.  */
5553  if (got_register_rtx && regno == REGNO (got_register_rtx))
5554    return true;
5555
5556  /* If the function accesses prior frames, the frame pointer and the return
5557     address of the previous frame must be saved on the stack.  */
5558  if (crtl->accesses_prior_frames
5559      && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5560    return true;
5561
5562  return false;
5563}
5564
5565/* Compute the frame size required by the function.  This function is called
5566   during the reload pass and also by sparc_expand_prologue.  */
5567
5568static HOST_WIDE_INT
5569sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5570{
5571  HOST_WIDE_INT frame_size, apparent_frame_size;
5572  int args_size, n_global_fp_regs = 0;
5573  bool save_local_in_regs_p = false;
5574  unsigned int i;
5575
5576  /* If the function allocates dynamic stack space, the dynamic offset is
5577     computed early and contains REG_PARM_STACK_SPACE, so we need to cope.  */
5578  if (leaf_function && !cfun->calls_alloca)
5579    args_size = 0;
5580  else
5581    args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5582
5583  /* Calculate space needed for global registers.  */
5584  if (TARGET_ARCH64)
5585    {
5586      for (i = 0; i < 8; i++)
5587	if (save_global_or_fp_reg_p (i, 0))
5588	  n_global_fp_regs += 2;
5589    }
5590  else
5591    {
5592      for (i = 0; i < 8; i += 2)
5593	if (save_global_or_fp_reg_p (i, 0)
5594	    || save_global_or_fp_reg_p (i + 1, 0))
5595	  n_global_fp_regs += 2;
5596    }
5597
5598  /* In the flat window model, find out which local and in registers need to
5599     be saved.  We don't reserve space in the current frame for them as they
5600     will be spilled into the register window save area of the caller's frame.
5601     However, as soon as we use this register window save area, we must create
5602     that of the current frame to make it the live one.  */
5603  if (TARGET_FLAT)
5604    for (i = 16; i < 32; i++)
5605      if (save_local_or_in_reg_p (i, leaf_function))
5606	{
5607	 save_local_in_regs_p = true;
5608	 break;
5609	}
5610
5611  /* Calculate space needed for FP registers.  */
5612  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5613    if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5614      n_global_fp_regs += 2;
5615
5616  if (size == 0
5617      && n_global_fp_regs == 0
5618      && args_size == 0
5619      && !save_local_in_regs_p)
5620    frame_size = apparent_frame_size = 0;
5621  else
5622    {
5623      /* Start from the apparent frame size.  */
5624      apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5625
5626      /* We need to add the size of the outgoing argument area.  */
5627      frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5628
5629      /* And that of the register window save area.  */
5630      frame_size += FIRST_PARM_OFFSET (cfun->decl);
5631
5632      /* Finally, bump to the appropriate alignment.  */
5633      frame_size = SPARC_STACK_ALIGN (frame_size);
5634    }
5635
5636  /* Set up values for use in prologue and epilogue.  */
5637  sparc_frame_size = frame_size;
5638  sparc_apparent_frame_size = apparent_frame_size;
5639  sparc_n_global_fp_regs = n_global_fp_regs;
5640  sparc_save_local_in_regs_p = save_local_in_regs_p;
5641
5642  return frame_size;
5643}
5644
5645/* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
5646
5647int
5648sparc_initial_elimination_offset (int to)
5649{
5650  int offset;
5651
5652  if (to == STACK_POINTER_REGNUM)
5653    offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5654  else
5655    offset = 0;
5656
5657  offset += SPARC_STACK_BIAS;
5658  return offset;
5659}
5660
5661/* Output any necessary .register pseudo-ops.  */
5662
5663void
5664sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5665{
5666  int i;
5667
5668  if (TARGET_ARCH32)
5669    return;
5670
5671  /* Check if %g[2367] were used without
5672     .register being printed for them already.  */
5673  for (i = 2; i < 8; i++)
5674    {
5675      if (df_regs_ever_live_p (i)
5676	  && ! sparc_hard_reg_printed [i])
5677	{
5678	  sparc_hard_reg_printed [i] = 1;
5679	  /* %g7 is used as TLS base register, use #ignore
5680	     for it instead of #scratch.  */
5681	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5682		   i == 7 ? "ignore" : "scratch");
5683	}
5684      if (i == 3) i = 5;
5685    }
5686}
5687
5688#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5689
5690#if PROBE_INTERVAL > 4096
5691#error Cannot use indexed addressing mode for stack probing
5692#endif
5693
5694/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5695   inclusive.  These are offsets from the current stack pointer.
5696
5697   Note that we don't use the REG+REG addressing mode for the probes because
5698   of the stack bias in 64-bit mode.  And it doesn't really buy us anything
5699   so the advantages of having a single code win here.  */
5700
5701static void
5702sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5703{
5704  rtx g1 = gen_rtx_REG (Pmode, 1);
5705
5706  /* See if we have a constant small number of probes to generate.  If so,
5707     that's the easy case.  */
5708  if (size <= PROBE_INTERVAL)
5709    {
5710      emit_move_insn (g1, GEN_INT (first));
5711      emit_insn (gen_rtx_SET (g1,
5712			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5713      emit_stack_probe (plus_constant (Pmode, g1, -size));
5714    }
5715
5716  /* The run-time loop is made up of 9 insns in the generic case while the
5717     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
5718  else if (size <= 4 * PROBE_INTERVAL)
5719    {
5720      HOST_WIDE_INT i;
5721
5722      emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5723      emit_insn (gen_rtx_SET (g1,
5724			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5725      emit_stack_probe (g1);
5726
5727      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5728	 it exceeds SIZE.  If only two probes are needed, this will not
5729	 generate any code.  Then probe at FIRST + SIZE.  */
5730      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5731	{
5732	  emit_insn (gen_rtx_SET (g1,
5733				  plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5734	  emit_stack_probe (g1);
5735	}
5736
5737      emit_stack_probe (plus_constant (Pmode, g1,
5738				       (i - PROBE_INTERVAL) - size));
5739    }
5740
5741  /* Otherwise, do the same as above, but in a loop.  Note that we must be
5742     extra careful with variables wrapping around because we might be at
5743     the very top (or the very bottom) of the address space and we have
5744     to be able to handle this case properly; in particular, we use an
5745     equality test for the loop condition.  */
5746  else
5747    {
5748      HOST_WIDE_INT rounded_size;
5749      rtx g4 = gen_rtx_REG (Pmode, 4);
5750
5751      emit_move_insn (g1, GEN_INT (first));
5752
5753
5754      /* Step 1: round SIZE to the previous multiple of the interval.  */
5755
5756      rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5757      emit_move_insn (g4, GEN_INT (rounded_size));
5758
5759
5760      /* Step 2: compute initial and final value of the loop counter.  */
5761
5762      /* TEST_ADDR = SP + FIRST.  */
5763      emit_insn (gen_rtx_SET (g1,
5764			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5765
5766      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
5767      emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5768
5769
5770      /* Step 3: the loop
5771
5772	 while (TEST_ADDR != LAST_ADDR)
5773	   {
5774	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5775	     probe at TEST_ADDR
5776	   }
5777
5778	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5779	 until it is equal to ROUNDED_SIZE.  */
5780
5781      if (TARGET_ARCH64)
5782	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5783      else
5784	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5785
5786
5787      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5788	 that SIZE is equal to ROUNDED_SIZE.  */
5789
5790      if (size != rounded_size)
5791	emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5792    }
5793
5794  /* Make sure nothing is scheduled before we are done.  */
5795  emit_insn (gen_blockage ());
5796}
5797
5798/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
5799   absolute addresses.  */
5800
5801const char *
5802output_probe_stack_range (rtx reg1, rtx reg2)
5803{
5804  static int labelno = 0;
5805  char loop_lab[32];
5806  rtx xops[2];
5807
5808  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5809
5810  /* Loop.  */
5811  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5812
5813  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
5814  xops[0] = reg1;
5815  xops[1] = GEN_INT (-PROBE_INTERVAL);
5816  output_asm_insn ("add\t%0, %1, %0", xops);
5817
5818  /* Test if TEST_ADDR == LAST_ADDR.  */
5819  xops[1] = reg2;
5820  output_asm_insn ("cmp\t%0, %1", xops);
5821
5822  /* Probe at TEST_ADDR and branch.  */
5823  if (TARGET_ARCH64)
5824    fputs ("\tbne,pt\t%xcc,", asm_out_file);
5825  else
5826    fputs ("\tbne\t", asm_out_file);
5827  assemble_name_raw (asm_out_file, loop_lab);
5828  fputc ('\n', asm_out_file);
5829  xops[1] = GEN_INT (SPARC_STACK_BIAS);
5830  output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5831
5832  return "";
5833}
5834
5835/* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5836   needed.  LOW is supposed to be double-word aligned for 32-bit registers.
5837   SAVE_P decides whether a register must be saved/restored.  ACTION_TRUE
5838   is the action to be performed if SAVE_P returns true and ACTION_FALSE
5839   the action to be performed if it returns false.  Return the new offset.  */
5840
5841typedef bool (*sorr_pred_t) (unsigned int, int);
5842typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5843
5844static int
5845emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5846			   int offset, int leaf_function, sorr_pred_t save_p,
5847			   sorr_act_t action_true, sorr_act_t action_false)
5848{
5849  unsigned int i;
5850  rtx mem;
5851  rtx_insn *insn;
5852
5853  if (TARGET_ARCH64 && high <= 32)
5854    {
5855      int fp_offset = -1;
5856
5857      for (i = low; i < high; i++)
5858	{
5859	  if (save_p (i, leaf_function))
5860	    {
5861	      mem = gen_frame_mem (DImode, plus_constant (Pmode,
5862							  base, offset));
5863	      if (action_true == SORR_SAVE)
5864		{
5865		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5866		  RTX_FRAME_RELATED_P (insn) = 1;
5867		}
5868	      else  /* action_true == SORR_RESTORE */
5869		{
5870		  /* The frame pointer must be restored last since its old
5871		     value may be used as base address for the frame.  This
5872		     is problematic in 64-bit mode only because of the lack
5873		     of double-word load instruction.  */
5874		  if (i == HARD_FRAME_POINTER_REGNUM)
5875		    fp_offset = offset;
5876		  else
5877		    emit_move_insn (gen_rtx_REG (DImode, i), mem);
5878		}
5879	      offset += 8;
5880	    }
5881	  else if (action_false == SORR_ADVANCE)
5882	    offset += 8;
5883	}
5884
5885      if (fp_offset >= 0)
5886	{
5887	  mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5888	  emit_move_insn (hard_frame_pointer_rtx, mem);
5889	}
5890    }
5891  else
5892    {
5893      for (i = low; i < high; i += 2)
5894	{
5895	  bool reg0 = save_p (i, leaf_function);
5896	  bool reg1 = save_p (i + 1, leaf_function);
5897	  machine_mode mode;
5898	  int regno;
5899
5900	  if (reg0 && reg1)
5901	    {
5902	      mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5903	      regno = i;
5904	    }
5905	  else if (reg0)
5906	    {
5907	      mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5908	      regno = i;
5909	    }
5910	  else if (reg1)
5911	    {
5912	      mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5913	      regno = i + 1;
5914	      offset += 4;
5915	    }
5916	  else
5917	    {
5918	      if (action_false == SORR_ADVANCE)
5919		offset += 8;
5920	      continue;
5921	    }
5922
5923	  mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5924	  if (action_true == SORR_SAVE)
5925	    {
5926	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5927	      RTX_FRAME_RELATED_P (insn) = 1;
5928	      if (mode == DImode)
5929		{
5930		  rtx set1, set2;
5931		  mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5932							      offset));
5933		  set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5934		  RTX_FRAME_RELATED_P (set1) = 1;
5935		  mem
5936		    = gen_frame_mem (SImode, plus_constant (Pmode, base,
5937							    offset + 4));
5938		  set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5939		  RTX_FRAME_RELATED_P (set2) = 1;
5940		  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5941				gen_rtx_PARALLEL (VOIDmode,
5942						  gen_rtvec (2, set1, set2)));
5943		}
5944	    }
5945	  else  /* action_true == SORR_RESTORE */
5946	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
5947
5948	  /* Bump and round down to double word
5949	     in case we already bumped by 4.  */
5950	  offset = ROUND_DOWN (offset + 8, 8);
5951	}
5952    }
5953
5954  return offset;
5955}
5956
5957/* Emit code to adjust BASE to OFFSET.  Return the new base.  */
5958
5959static rtx
5960emit_adjust_base_to_offset (rtx base, int offset)
5961{
5962  /* ??? This might be optimized a little as %g1 might already have a
5963     value close enough that a single add insn will do.  */
5964  /* ??? Although, all of this is probably only a temporary fix because
5965     if %g1 can hold a function result, then sparc_expand_epilogue will
5966     lose (the result will be clobbered).  */
5967  rtx new_base = gen_rtx_REG (Pmode, 1);
5968  emit_move_insn (new_base, GEN_INT (offset));
5969  emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5970  return new_base;
5971}
5972
5973/* Emit code to save/restore call-saved global and FP registers.  */
5974
5975static void
5976emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5977{
5978  if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5979    {
5980      base = emit_adjust_base_to_offset  (base, offset);
5981      offset = 0;
5982    }
5983
5984  offset
5985    = emit_save_or_restore_regs (0, 8, base, offset, 0,
5986				 save_global_or_fp_reg_p, action, SORR_NONE);
5987  emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5988			     save_global_or_fp_reg_p, action, SORR_NONE);
5989}
5990
5991/* Emit code to save/restore call-saved local and in registers.  */
5992
5993static void
5994emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5995{
5996  if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5997    {
5998      base = emit_adjust_base_to_offset  (base, offset);
5999      offset = 0;
6000    }
6001
6002  emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
6003			     save_local_or_in_reg_p, action, SORR_ADVANCE);
6004}
6005
6006/* Emit a window_save insn.  */
6007
6008static rtx_insn *
6009emit_window_save (rtx increment)
6010{
6011  rtx_insn *insn = emit_insn (gen_window_save (increment));
6012  RTX_FRAME_RELATED_P (insn) = 1;
6013
6014  /* The incoming return address (%o7) is saved in %i7.  */
6015  add_reg_note (insn, REG_CFA_REGISTER,
6016		gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
6017			     gen_rtx_REG (Pmode,
6018					  INCOMING_RETURN_ADDR_REGNUM)));
6019
6020  /* The window save event.  */
6021  add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
6022
6023  /* The CFA is %fp, the hard frame pointer.  */
6024  add_reg_note (insn, REG_CFA_DEF_CFA,
6025		plus_constant (Pmode, hard_frame_pointer_rtx,
6026			       INCOMING_FRAME_SP_OFFSET));
6027
6028  return insn;
6029}
6030
6031/* Generate an increment for the stack pointer.  */
6032
6033static rtx
6034gen_stack_pointer_inc (rtx increment)
6035{
6036  return gen_rtx_SET (stack_pointer_rtx,
6037		      gen_rtx_PLUS (Pmode,
6038				    stack_pointer_rtx,
6039				    increment));
6040}
6041
6042/* Expand the function prologue.  The prologue is responsible for reserving
6043   storage for the frame, saving the call-saved registers and loading the
6044   GOT register if needed.  */
6045
6046void
6047sparc_expand_prologue (void)
6048{
6049  HOST_WIDE_INT size;
6050  rtx_insn *insn;
6051
6052  /* Compute a snapshot of crtl->uses_only_leaf_regs.  Relying
6053     on the final value of the flag means deferring the prologue/epilogue
6054     expansion until just before the second scheduling pass, which is too
6055     late to emit multiple epilogues or return insns.
6056
6057     Of course we are making the assumption that the value of the flag
6058     will not change between now and its final value.  Of the three parts
6059     of the formula, only the last one can reasonably vary.  Let's take a
6060     closer look, after assuming that the first two ones are set to true
6061     (otherwise the last value is effectively silenced).
6062
6063     If only_leaf_regs_used returns false, the global predicate will also
6064     be false so the actual frame size calculated below will be positive.
6065     As a consequence, the save_register_window insn will be emitted in
6066     the instruction stream; now this insn explicitly references %fp
6067     which is not a leaf register so only_leaf_regs_used will always
6068     return false subsequently.
6069
6070     If only_leaf_regs_used returns true, we hope that the subsequent
6071     optimization passes won't cause non-leaf registers to pop up.  For
6072     example, the regrename pass has special provisions to not rename to
6073     non-leaf registers in a leaf function.  */
6074  sparc_leaf_function_p
6075    = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6076
6077  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6078
6079  if (flag_stack_usage_info)
6080    current_function_static_stack_size = size;
6081
6082  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6083      || flag_stack_clash_protection)
6084    {
6085      if (crtl->is_leaf && !cfun->calls_alloca)
6086	{
6087	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6088	    sparc_emit_probe_stack_range (get_stack_check_protect (),
6089					  size - get_stack_check_protect ());
6090	}
6091      else if (size > 0)
6092	sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6093    }
6094
6095  if (size == 0)
6096    ; /* do nothing.  */
6097  else if (sparc_leaf_function_p)
6098    {
6099      rtx size_int_rtx = GEN_INT (-size);
6100
6101      if (size <= 4096)
6102	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6103      else if (size <= 8192)
6104	{
6105	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6106	  RTX_FRAME_RELATED_P (insn) = 1;
6107
6108	  /* %sp is still the CFA register.  */
6109	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6110	}
6111      else
6112	{
6113	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
6114	  emit_move_insn (size_rtx, size_int_rtx);
6115	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6116	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6117			gen_stack_pointer_inc (size_int_rtx));
6118	}
6119
6120      RTX_FRAME_RELATED_P (insn) = 1;
6121
6122      /* Ensure no memory access is done before the frame is established.  */
6123      emit_insn (gen_frame_blockage ());
6124    }
6125  else
6126    {
6127      rtx size_int_rtx = GEN_INT (-size);
6128
6129      if (size <= 4096)
6130	emit_window_save (size_int_rtx);
6131      else if (size <= 8192)
6132	{
6133	  emit_window_save (GEN_INT (-4096));
6134
6135	  /* %sp is not the CFA register anymore.  */
6136	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6137
6138	  /* Likewise.  */
6139	  emit_insn (gen_frame_blockage ());
6140	}
6141      else
6142	{
6143	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
6144	  emit_move_insn (size_rtx, size_int_rtx);
6145	  emit_window_save (size_rtx);
6146	}
6147    }
6148
6149  if (sparc_leaf_function_p)
6150    {
6151      sparc_frame_base_reg = stack_pointer_rtx;
6152      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6153    }
6154  else
6155    {
6156      sparc_frame_base_reg = hard_frame_pointer_rtx;
6157      sparc_frame_base_offset = SPARC_STACK_BIAS;
6158    }
6159
6160  if (sparc_n_global_fp_regs > 0)
6161    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6162				         sparc_frame_base_offset
6163					   - sparc_apparent_frame_size,
6164					 SORR_SAVE);
6165
6166  /* Advertise that the data calculated just above are now valid.  */
6167  sparc_prologue_data_valid_p = true;
6168}
6169
6170/* Expand the function prologue.  The prologue is responsible for reserving
6171   storage for the frame, saving the call-saved registers and loading the
6172   GOT register if needed.  */
6173
6174void
6175sparc_flat_expand_prologue (void)
6176{
6177  HOST_WIDE_INT size;
6178  rtx_insn *insn;
6179
6180  sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6181
6182  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6183
6184  if (flag_stack_usage_info)
6185    current_function_static_stack_size = size;
6186
6187  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6188      || flag_stack_clash_protection)
6189    {
6190      if (crtl->is_leaf && !cfun->calls_alloca)
6191	{
6192	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6193	    sparc_emit_probe_stack_range (get_stack_check_protect (),
6194					  size - get_stack_check_protect ());
6195	}
6196      else if (size > 0)
6197	sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6198    }
6199
6200  if (sparc_save_local_in_regs_p)
6201    emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6202					SORR_SAVE);
6203
6204  if (size == 0)
6205    ; /* do nothing.  */
6206  else
6207    {
6208      rtx size_int_rtx, size_rtx;
6209
6210      size_rtx = size_int_rtx = GEN_INT (-size);
6211
6212      /* We establish the frame (i.e. decrement the stack pointer) first, even
6213	 if we use a frame pointer, because we cannot clobber any call-saved
6214	 registers, including the frame pointer, if we haven't created a new
6215	 register save area, for the sake of compatibility with the ABI.  */
6216      if (size <= 4096)
6217	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6218      else if (size <= 8192 && !frame_pointer_needed)
6219	{
6220	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6221	  RTX_FRAME_RELATED_P (insn) = 1;
6222	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6223	}
6224      else
6225	{
6226	  size_rtx = gen_rtx_REG (Pmode, 1);
6227	  emit_move_insn (size_rtx, size_int_rtx);
6228	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6229	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
6230			gen_stack_pointer_inc (size_int_rtx));
6231	}
6232      RTX_FRAME_RELATED_P (insn) = 1;
6233
6234      /* Ensure no memory access is done before the frame is established.  */
6235      emit_insn (gen_frame_blockage ());
6236
6237      if (frame_pointer_needed)
6238	{
6239	  insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6240					 gen_rtx_MINUS (Pmode,
6241							stack_pointer_rtx,
6242							size_rtx)));
6243	  RTX_FRAME_RELATED_P (insn) = 1;
6244
6245	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
6246			gen_rtx_SET (hard_frame_pointer_rtx,
6247				     plus_constant (Pmode, stack_pointer_rtx,
6248						    size)));
6249	}
6250
6251      if (return_addr_reg_needed_p (sparc_leaf_function_p))
6252	{
6253	  rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6254	  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6255
6256	  insn = emit_move_insn (i7, o7);
6257	  RTX_FRAME_RELATED_P (insn) = 1;
6258
6259	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6260
6261	  /* Prevent this instruction from ever being considered dead,
6262	     even if this function has no epilogue.  */
6263	  emit_use (i7);
6264	}
6265    }
6266
6267  if (frame_pointer_needed)
6268    {
6269      sparc_frame_base_reg = hard_frame_pointer_rtx;
6270      sparc_frame_base_offset = SPARC_STACK_BIAS;
6271    }
6272  else
6273    {
6274      sparc_frame_base_reg = stack_pointer_rtx;
6275      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6276    }
6277
6278  if (sparc_n_global_fp_regs > 0)
6279    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6280				         sparc_frame_base_offset
6281					   - sparc_apparent_frame_size,
6282					 SORR_SAVE);
6283
6284  /* Advertise that the data calculated just above are now valid.  */
6285  sparc_prologue_data_valid_p = true;
6286}
6287
6288/* This function generates the assembly code for function entry, which boils
6289   down to emitting the necessary .register directives.  */
6290
6291static void
6292sparc_asm_function_prologue (FILE *file)
6293{
6294  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
6295  if (!TARGET_FLAT)
6296    gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6297
6298  sparc_output_scratch_registers (file);
6299}
6300
6301/* Expand the function epilogue, either normal or part of a sibcall.
6302   We emit all the instructions except the return or the call.  */
6303
6304void
6305sparc_expand_epilogue (bool for_eh)
6306{
6307  HOST_WIDE_INT size = sparc_frame_size;
6308
6309  if (cfun->calls_alloca)
6310    emit_insn (gen_frame_blockage ());
6311
6312  if (sparc_n_global_fp_regs > 0)
6313    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6314				         sparc_frame_base_offset
6315					   - sparc_apparent_frame_size,
6316					 SORR_RESTORE);
6317
6318  if (size == 0 || for_eh)
6319    ; /* do nothing.  */
6320  else if (sparc_leaf_function_p)
6321    {
6322      /* Ensure no memory access is done after the frame is destroyed.  */
6323      emit_insn (gen_frame_blockage ());
6324
6325      if (size <= 4096)
6326	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6327      else if (size <= 8192)
6328	{
6329	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6330	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6331	}
6332      else
6333	{
6334	  rtx reg = gen_rtx_REG (Pmode, 1);
6335	  emit_move_insn (reg, GEN_INT (size));
6336	  emit_insn (gen_stack_pointer_inc (reg));
6337	}
6338    }
6339}
6340
6341/* Expand the function epilogue, either normal or part of a sibcall.
6342   We emit all the instructions except the return or the call.  */
6343
6344void
6345sparc_flat_expand_epilogue (bool for_eh)
6346{
6347  HOST_WIDE_INT size = sparc_frame_size;
6348
6349  if (sparc_n_global_fp_regs > 0)
6350    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6351				         sparc_frame_base_offset
6352					   - sparc_apparent_frame_size,
6353					 SORR_RESTORE);
6354
6355  /* If we have a frame pointer, we'll need both to restore it before the
6356     frame is destroyed and use its current value in destroying the frame.
6357     Since we don't have an atomic way to do that in the flat window model,
6358     we save the current value into a temporary register (%g1).  */
6359  if (frame_pointer_needed && !for_eh)
6360    emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6361
6362  if (return_addr_reg_needed_p (sparc_leaf_function_p))
6363    emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6364		    gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6365
6366  if (sparc_save_local_in_regs_p)
6367    emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6368					sparc_frame_base_offset,
6369					SORR_RESTORE);
6370
6371  if (size == 0 || for_eh)
6372    ; /* do nothing.  */
6373  else if (frame_pointer_needed)
6374    {
6375      /* Ensure no memory access is done after the frame is destroyed.  */
6376      emit_insn (gen_frame_blockage ());
6377
6378      emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6379    }
6380  else
6381    {
6382      /* Likewise.  */
6383      emit_insn (gen_frame_blockage ());
6384
6385      if (size <= 4096)
6386	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6387      else if (size <= 8192)
6388	{
6389	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6390	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6391	}
6392      else
6393	{
6394	  rtx reg = gen_rtx_REG (Pmode, 1);
6395	  emit_move_insn (reg, GEN_INT (size));
6396	  emit_insn (gen_stack_pointer_inc (reg));
6397	}
6398    }
6399}
6400
6401/* Return true if it is appropriate to emit `return' instructions in the
6402   body of a function.  */
6403
6404bool
6405sparc_can_use_return_insn_p (void)
6406{
6407  return sparc_prologue_data_valid_p
6408	 && sparc_n_global_fp_regs == 0
6409	 && TARGET_FLAT
6410	    ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6411	    : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6412}
6413
6414/* This function generates the assembly code for function exit.  */
6415
6416static void
6417sparc_asm_function_epilogue (FILE *file)
6418{
6419  /* If the last two instructions of a function are "call foo; dslot;"
6420     the return address might point to the first instruction in the next
6421     function and we have to output a dummy nop for the sake of sane
6422     backtraces in such cases.  This is pointless for sibling calls since
6423     the return address is explicitly adjusted.  */
6424
6425  rtx_insn *insn = get_last_insn ();
6426
6427  rtx last_real_insn = prev_real_insn (insn);
6428  if (last_real_insn
6429      && NONJUMP_INSN_P (last_real_insn)
6430      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6431    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6432
6433  if (last_real_insn
6434      && CALL_P (last_real_insn)
6435      && !SIBLING_CALL_P (last_real_insn))
6436    fputs("\tnop\n", file);
6437
6438  sparc_output_deferred_case_vectors ();
6439}
6440
6441/* Output a 'restore' instruction.  */
6442
6443static void
6444output_restore (rtx pat)
6445{
6446  rtx operands[3];
6447
6448  if (! pat)
6449    {
6450      fputs ("\t restore\n", asm_out_file);
6451      return;
6452    }
6453
6454  gcc_assert (GET_CODE (pat) == SET);
6455
6456  operands[0] = SET_DEST (pat);
6457  pat = SET_SRC (pat);
6458
6459  switch (GET_CODE (pat))
6460    {
6461      case PLUS:
6462	operands[1] = XEXP (pat, 0);
6463	operands[2] = XEXP (pat, 1);
6464	output_asm_insn (" restore %r1, %2, %Y0", operands);
6465	break;
6466      case LO_SUM:
6467	operands[1] = XEXP (pat, 0);
6468	operands[2] = XEXP (pat, 1);
6469	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6470	break;
6471      case ASHIFT:
6472	operands[1] = XEXP (pat, 0);
6473	gcc_assert (XEXP (pat, 1) == const1_rtx);
6474	output_asm_insn (" restore %r1, %r1, %Y0", operands);
6475	break;
6476      default:
6477	operands[1] = pat;
6478	output_asm_insn (" restore %%g0, %1, %Y0", operands);
6479	break;
6480    }
6481}
6482
6483/* Output a return.  */
6484
6485const char *
6486output_return (rtx_insn *insn)
6487{
6488  if (crtl->calls_eh_return)
6489    {
6490      /* If the function uses __builtin_eh_return, the eh_return
6491	 machinery occupies the delay slot.  */
6492      gcc_assert (!final_sequence);
6493
6494      if (flag_delayed_branch)
6495	{
6496	  if (!TARGET_FLAT && TARGET_V9)
6497	    fputs ("\treturn\t%i7+8\n", asm_out_file);
6498	  else
6499	    {
6500	      if (!TARGET_FLAT)
6501		fputs ("\trestore\n", asm_out_file);
6502
6503	      fputs ("\tjmp\t%o7+8\n", asm_out_file);
6504	    }
6505
6506	  fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6507	}
6508      else
6509	{
6510	  if (!TARGET_FLAT)
6511	    fputs ("\trestore\n", asm_out_file);
6512
6513	  fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6514	  fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6515	}
6516    }
6517  else if (sparc_leaf_function_p || TARGET_FLAT)
6518    {
6519      /* This is a leaf or flat function so we don't have to bother restoring
6520	 the register window, which frees us from dealing with the convoluted
6521	 semantics of restore/return.  We simply output the jump to the
6522	 return address and the insn in the delay slot (if any).  */
6523
6524      return "jmp\t%%o7+%)%#";
6525    }
6526  else
6527    {
6528      /* This is a regular function so we have to restore the register window.
6529	 We may have a pending insn for the delay slot, which will be either
6530	 combined with the 'restore' instruction or put in the delay slot of
6531	 the 'return' instruction.  */
6532
6533      if (final_sequence)
6534	{
6535	  rtx_insn *delay;
6536	  rtx pat;
6537
6538	  delay = NEXT_INSN (insn);
6539	  gcc_assert (delay);
6540
6541	  pat = PATTERN (delay);
6542
6543	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6544	    {
6545	      epilogue_renumber (&pat, 0);
6546	      return "return\t%%i7+%)%#";
6547	    }
6548	  else
6549	    {
6550	      output_asm_insn ("jmp\t%%i7+%)", NULL);
6551
6552	      /* We're going to output the insn in the delay slot manually.
6553		 Make sure to output its source location first.  */
6554	      PATTERN (delay) = gen_blockage ();
6555	      INSN_CODE (delay) = -1;
6556	      final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6557	      INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6558
6559	      output_restore (pat);
6560	    }
6561	}
6562      else
6563        {
6564	  /* The delay slot is empty.  */
6565	  if (TARGET_V9)
6566	    return "return\t%%i7+%)\n\t nop";
6567	  else if (flag_delayed_branch)
6568	    return "jmp\t%%i7+%)\n\t restore";
6569	  else
6570	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
6571	}
6572    }
6573
6574  return "";
6575}
6576
6577/* Output a sibling call.  */
6578
6579const char *
6580output_sibcall (rtx_insn *insn, rtx call_operand)
6581{
6582  rtx operands[1];
6583
6584  gcc_assert (flag_delayed_branch);
6585
6586  operands[0] = call_operand;
6587
6588  if (sparc_leaf_function_p || TARGET_FLAT)
6589    {
6590      /* This is a leaf or flat function so we don't have to bother restoring
6591	 the register window.  We simply output the jump to the function and
6592	 the insn in the delay slot (if any).  */
6593
6594      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6595
6596      if (final_sequence)
6597	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6598			 operands);
6599      else
6600	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6601	   it into branch if possible.  */
6602	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6603			 operands);
6604    }
6605  else
6606    {
6607      /* This is a regular function so we have to restore the register window.
6608	 We may have a pending insn for the delay slot, which will be combined
6609	 with the 'restore' instruction.  */
6610
6611      output_asm_insn ("call\t%a0, 0", operands);
6612
6613      if (final_sequence)
6614	{
6615	  rtx_insn *delay;
6616	  rtx pat;
6617
6618	  delay = NEXT_INSN (insn);
6619	  gcc_assert (delay);
6620
6621	  pat = PATTERN (delay);
6622
6623	  /* We're going to output the insn in the delay slot manually.
6624	     Make sure to output its source location first.  */
6625	  PATTERN (delay) = gen_blockage ();
6626	  INSN_CODE (delay) = -1;
6627	  final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6628	  INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6629
6630	  output_restore (pat);
6631	}
6632      else
6633	output_restore (NULL_RTX);
6634    }
6635
6636  return "";
6637}
6638
6639/* Functions for handling argument passing.
6640
6641   For 32-bit, the first 6 args are normally in registers and the rest are
6642   pushed.  Any arg that starts within the first 6 words is at least
6643   partially passed in a register unless its data type forbids.
6644
6645   For 64-bit, the argument registers are laid out as an array of 16 elements
6646   and arguments are added sequentially.  The first 6 int args and up to the
6647   first 16 fp args (depending on size) are passed in regs.
6648
6649   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
6650   ----    -----   --------   -----   ------------------   ------   -----------
6651    15   [SP+248]              %f31       %f30,%f31         %d30
6652    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
6653    13   [SP+232]              %f27       %f26,%f27         %d26
6654    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
6655    11   [SP+216]              %f23       %f22,%f23         %d22
6656    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
6657     9   [SP+200]              %f19       %f18,%f19         %d18
6658     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
6659     7   [SP+184]              %f15       %f14,%f15         %d14
6660     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
6661     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
6662     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
6663     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
6664     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
6665     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
6666     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
6667
6668   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6669
6670   Integral arguments are always passed as 64-bit quantities appropriately
6671   extended.
6672
6673   Passing of floating point values is handled as follows.
6674   If a prototype is in scope:
6675     If the value is in a named argument (i.e. not a stdarg function or a
6676     value not part of the `...') then the value is passed in the appropriate
6677     fp reg.
6678     If the value is part of the `...' and is passed in one of the first 6
6679     slots then the value is passed in the appropriate int reg.
6680     If the value is part of the `...' and is not passed in one of the first 6
6681     slots then the value is passed in memory.
6682   If a prototype is not in scope:
6683     If the value is one of the first 6 arguments the value is passed in the
6684     appropriate integer reg and the appropriate fp reg.
6685     If the value is not one of the first 6 arguments the value is passed in
6686     the appropriate fp reg and in memory.
6687
6688
6689   Summary of the calling conventions implemented by GCC on the SPARC:
6690
6691   32-bit ABI:
6692                                size      argument     return value
6693
6694      small integer              <4       int. reg.      int. reg.
6695      word                        4       int. reg.      int. reg.
6696      double word                 8       int. reg.      int. reg.
6697
6698      _Complex small integer     <8       int. reg.      int. reg.
6699      _Complex word               8       int. reg.      int. reg.
6700      _Complex double word       16        memory        int. reg.
6701
6702      vector integer            <=8       int. reg.       FP reg.
6703      vector integer             >8        memory         memory
6704
6705      float                       4       int. reg.       FP reg.
6706      double                      8       int. reg.       FP reg.
6707      long double                16        memory         memory
6708
6709      _Complex float              8        memory         FP reg.
6710      _Complex double            16        memory         FP reg.
6711      _Complex long double       32        memory         FP reg.
6712
6713      vector float              any        memory         memory
6714
6715      aggregate                 any        memory         memory
6716
6717
6718
6719    64-bit ABI:
6720                                size      argument     return value
6721
6722      small integer              <8       int. reg.      int. reg.
6723      word                        8       int. reg.      int. reg.
6724      double word                16       int. reg.      int. reg.
6725
6726      _Complex small integer    <16       int. reg.      int. reg.
6727      _Complex word              16       int. reg.      int. reg.
6728      _Complex double word       32        memory        int. reg.
6729
6730      vector integer           <=16        FP reg.        FP reg.
6731      vector integer       16<s<=32        memory         FP reg.
6732      vector integer            >32        memory         memory
6733
6734      float                       4        FP reg.        FP reg.
6735      double                      8        FP reg.        FP reg.
6736      long double                16        FP reg.        FP reg.
6737
6738      _Complex float              8        FP reg.        FP reg.
6739      _Complex double            16        FP reg.        FP reg.
6740      _Complex long double       32        memory         FP reg.
6741
6742      vector float             <=16        FP reg.        FP reg.
6743      vector float         16<s<=32        memory         FP reg.
6744      vector float              >32        memory         memory
6745
6746      aggregate                <=16         reg.           reg.
6747      aggregate            16<s<=32        memory          reg.
6748      aggregate                 >32        memory         memory
6749
6750
6751
6752Note #1: complex floating-point types follow the extended SPARC ABIs as
6753implemented by the Sun compiler.
6754
6755Note #2: integer vector types follow the scalar floating-point types
6756conventions to match what is implemented by the Sun VIS SDK.
6757
6758Note #3: floating-point vector types follow the aggregate types
6759conventions.  */
6760
6761
6762/* Maximum number of int regs for args.  */
6763#define SPARC_INT_ARG_MAX 6
6764/* Maximum number of fp regs for args.  */
6765#define SPARC_FP_ARG_MAX 16
6766/* Number of words (partially) occupied for a given size in units.  */
6767#define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6768
6769/* Handle the INIT_CUMULATIVE_ARGS macro.
6770   Initialize a variable CUM of type CUMULATIVE_ARGS
6771   for a call to a function whose data type is FNTYPE.
6772   For a library call, FNTYPE is 0.  */
6773
6774void
6775init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6776{
6777  cum->words = 0;
6778  cum->prototype_p = fntype && prototype_p (fntype);
6779  cum->libcall_p = !fntype;
6780}
6781
6782/* Handle promotion of pointer and integer arguments.  */
6783
6784static machine_mode
6785sparc_promote_function_mode (const_tree type, machine_mode mode,
6786			     int *punsignedp, const_tree, int)
6787{
6788  if (type && POINTER_TYPE_P (type))
6789    {
6790      *punsignedp = POINTERS_EXTEND_UNSIGNED;
6791      return Pmode;
6792    }
6793
6794  /* Integral arguments are passed as full words, as per the ABI.  */
6795  if (GET_MODE_CLASS (mode) == MODE_INT
6796      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6797    return word_mode;
6798
6799  return mode;
6800}
6801
6802/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
6803
6804static bool
6805sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6806{
6807  return TARGET_ARCH64 ? true : false;
6808}
6809
6810/* Handle the TARGET_PASS_BY_REFERENCE target hook.
6811   Specify whether to pass the argument by reference.  */
6812
6813static bool
6814sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6815{
6816  tree type = arg.type;
6817  machine_mode mode = arg.mode;
6818  if (TARGET_ARCH32)
6819    /* Original SPARC 32-bit ABI says that structures and unions,
6820       and quad-precision floats are passed by reference.
6821       All other base types are passed in registers.
6822
6823       Extended ABI (as implemented by the Sun compiler) says that all
6824       complex floats are passed by reference.  Pass complex integers
6825       in registers up to 8 bytes.  More generally, enforce the 2-word
6826       cap for passing arguments in registers.
6827
6828       Vector ABI (as implemented by the Sun VIS SDK) says that integer
6829       vectors are passed like floats of the same size, that is in
6830       registers up to 8 bytes.  Pass all vector floats by reference
6831       like structure and unions.  */
6832    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6833	    || mode == SCmode
6834	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
6835	    || GET_MODE_SIZE (mode) > 8
6836	    || (type
6837		&& VECTOR_TYPE_P (type)
6838		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6839  else
6840    /* Original SPARC 64-bit ABI says that structures and unions
6841       smaller than 16 bytes are passed in registers, as well as
6842       all other base types.
6843
6844       Extended ABI (as implemented by the Sun compiler) says that
6845       complex floats are passed in registers up to 16 bytes.  Pass
6846       all complex integers in registers up to 16 bytes.  More generally,
6847       enforce the 2-word cap for passing arguments in registers.
6848
6849       Vector ABI (as implemented by the Sun VIS SDK) says that integer
6850       vectors are passed like floats of the same size, that is in
6851       registers (up to 16 bytes).  Pass all vector floats like structure
6852       and unions.  */
6853    return ((type
6854	     && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6855	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6856	    /* Catch CTImode and TCmode.  */
6857	    || GET_MODE_SIZE (mode) > 16);
6858}
6859
6860/* Traverse the record TYPE recursively and call FUNC on its fields.
6861   NAMED is true if this is for a named parameter.  DATA is passed
6862   to FUNC for each field.  OFFSET is the starting position and
6863   PACKED is true if we are inside a packed record.  */
6864
6865template <typename T, void Func (const_tree, int, bool, T*)>
6866static void
6867traverse_record_type (const_tree type, bool named, T *data,
6868		      int offset = 0, bool packed = false)
6869{
6870  /* The ABI obviously doesn't specify how packed structures are passed.
6871     These are passed in integer regs if possible, otherwise memory.  */
6872  if (!packed)
6873    for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6874      if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6875	{
6876	  packed = true;
6877	  break;
6878	}
6879
6880  /* Walk the real fields, but skip those with no size or a zero size.
6881     ??? Fields with variable offset are handled as having zero offset.  */
6882  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6883    if (TREE_CODE (field) == FIELD_DECL)
6884      {
6885	if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6886	  continue;
6887
6888	int bitpos = offset;
6889	if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6890	  bitpos += int_bit_position (field);
6891
6892	tree field_type = TREE_TYPE (field);
6893	if (TREE_CODE (field_type) == RECORD_TYPE)
6894	  traverse_record_type<T, Func> (field_type, named, data, bitpos,
6895					 packed);
6896	else
6897	  {
6898	    const bool fp_type
6899	      = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6900	    Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6901		  data);
6902	  }
6903      }
6904}
6905
6906/* Handle recursive register classifying for structure layout.  */
6907
6908typedef struct
6909{
6910  bool fp_regs;		/* true if field eligible to FP registers.  */
6911  bool fp_regs_in_first_word;	/* true if such field in first word.  */
6912} classify_data_t;
6913
6914/* A subroutine of function_arg_slotno.  Classify the field.  */
6915
6916inline void
6917classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6918{
6919  if (fp)
6920    {
6921      data->fp_regs = true;
6922      if (bitpos < BITS_PER_WORD)
6923	data->fp_regs_in_first_word = true;
6924    }
6925}
6926
6927/* Compute the slot number to pass an argument in.
6928   Return the slot number or -1 if passing on the stack.
6929
6930   CUM is a variable of type CUMULATIVE_ARGS which gives info about
6931    the preceding args and about the function being called.
6932   MODE is the argument's machine mode.
6933   TYPE is the data type of the argument (as a tree).
6934    This is null for libcalls where that information may
6935    not be available.
6936   NAMED is nonzero if this argument is a named parameter
6937    (otherwise it is an extra parameter matching an ellipsis).
6938   INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6939   *PREGNO records the register number to use if scalar type.
6940   *PPADDING records the amount of padding needed in words.  */
6941
6942static int
6943function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6944		     const_tree type, bool named, bool incoming,
6945		     int *pregno, int *ppadding)
6946{
6947  const int regbase
6948    = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6949  int slotno = cum->words, regno;
6950  enum mode_class mclass = GET_MODE_CLASS (mode);
6951
6952  /* Silence warnings in the callers.  */
6953  *pregno = -1;
6954  *ppadding = -1;
6955
6956  if (type && TREE_ADDRESSABLE (type))
6957    return -1;
6958
6959  /* In 64-bit mode, objects requiring 16-byte alignment get it.  */
6960  if (TARGET_ARCH64
6961      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6962      && (slotno & 1) != 0)
6963    {
6964      slotno++;
6965      *ppadding = 1;
6966    }
6967  else
6968    *ppadding = 0;
6969
6970  /* Vector types deserve special treatment because they are polymorphic wrt
6971     their mode, depending upon whether VIS instructions are enabled.  */
6972  if (type && VECTOR_TYPE_P (type))
6973    {
6974      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6975	{
6976	  /* The SPARC port defines no floating-point vector modes.  */
6977	  gcc_assert (mode == BLKmode);
6978	}
6979      else
6980	{
6981	  /* Integer vector types should either have a vector
6982	     mode or an integral mode, because we are guaranteed
6983	     by pass_by_reference that their size is not greater
6984	     than 16 bytes and TImode is 16-byte wide.  */
6985	  gcc_assert (mode != BLKmode);
6986
6987	  /* Integer vectors are handled like floats as per
6988	     the Sun VIS SDK.  */
6989	  mclass = MODE_FLOAT;
6990	}
6991    }
6992
6993  switch (mclass)
6994    {
6995    case MODE_FLOAT:
6996    case MODE_COMPLEX_FLOAT:
6997    case MODE_VECTOR_INT:
6998      if (TARGET_ARCH64 && TARGET_FPU && named)
6999	{
7000	  /* If all arg slots are filled, then must pass on stack.  */
7001	  if (slotno >= SPARC_FP_ARG_MAX)
7002	    return -1;
7003
7004	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
7005	  /* Arguments filling only one single FP register are
7006	     right-justified in the outer double FP register.  */
7007	  if (GET_MODE_SIZE (mode) <= 4)
7008	    regno++;
7009	  break;
7010	}
7011      /* fallthrough */
7012
7013    case MODE_INT:
7014    case MODE_COMPLEX_INT:
7015      /* If all arg slots are filled, then must pass on stack.  */
7016      if (slotno >= SPARC_INT_ARG_MAX)
7017	return -1;
7018
7019      regno = regbase + slotno;
7020      break;
7021
7022    case MODE_RANDOM:
7023      /* MODE is VOIDmode when generating the actual call.  */
7024      if (mode == VOIDmode)
7025	return -1;
7026
7027      if (TARGET_64BIT && TARGET_FPU && named
7028	  && type
7029	  && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
7030	{
7031	  /* If all arg slots are filled, then must pass on stack.  */
7032	  if (slotno >= SPARC_FP_ARG_MAX)
7033	    return -1;
7034
7035	  if (TREE_CODE (type) == RECORD_TYPE)
7036	    {
7037	      classify_data_t data = { false, false };
7038	      traverse_record_type<classify_data_t, classify_registers>
7039		(type, named, &data);
7040
7041	      if (data.fp_regs)
7042		{
7043		  /* If all FP slots are filled except for the last one and
7044		     there is no FP field in the first word, then must pass
7045		     on stack.  */
7046		  if (slotno >= SPARC_FP_ARG_MAX - 1
7047		      && !data.fp_regs_in_first_word)
7048		    return -1;
7049		}
7050	      else
7051		{
7052		  /* If all int slots are filled, then must pass on stack.  */
7053		  if (slotno >= SPARC_INT_ARG_MAX)
7054		    return -1;
7055		}
7056
7057	      /* PREGNO isn't set since both int and FP regs can be used.  */
7058	      return slotno;
7059	    }
7060
7061	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
7062	}
7063      else
7064	{
7065	  /* If all arg slots are filled, then must pass on stack.  */
7066	  if (slotno >= SPARC_INT_ARG_MAX)
7067	    return -1;
7068
7069	  regno = regbase + slotno;
7070	}
7071      break;
7072
7073    default :
7074      gcc_unreachable ();
7075    }
7076
7077  *pregno = regno;
7078  return slotno;
7079}
7080
7081/* Handle recursive register counting/assigning for structure layout.  */
7082
7083typedef struct
7084{
7085  int slotno;		/* slot number of the argument.  */
7086  int regbase;		/* regno of the base register.  */
7087  int intoffset;	/* offset of the first pending integer field.  */
7088  int nregs;		/* number of words passed in registers.  */
7089  bool stack;		/* true if part of the argument is on the stack.  */
7090  rtx ret;		/* return expression being built.  */
7091} assign_data_t;
7092
7093/* A subroutine of function_arg_record_value.  Compute the number of integer
7094   registers to be assigned between PARMS->intoffset and BITPOS.  Return
7095   true if at least one integer register is assigned or false otherwise.  */
7096
7097static bool
7098compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7099{
7100  if (data->intoffset < 0)
7101    return false;
7102
7103  const int intoffset = data->intoffset;
7104  data->intoffset = -1;
7105
7106  const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7107  const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7108  const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7109  int nregs = (endbit - startbit) / BITS_PER_WORD;
7110
7111  if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7112    {
7113      nregs = SPARC_INT_ARG_MAX - this_slotno;
7114
7115      /* We need to pass this field (partly) on the stack.  */
7116      data->stack = 1;
7117    }
7118
7119  if (nregs <= 0)
7120    return false;
7121
7122  *pnregs = nregs;
7123  return true;
7124}
7125
7126/* A subroutine of function_arg_record_value.  Compute the number and the mode
7127   of the FP registers to be assigned for FIELD.  Return true if at least one
7128   FP register is assigned or false otherwise.  */
7129
7130static bool
7131compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7132		   int *pnregs, machine_mode *pmode)
7133{
7134  const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7135  machine_mode mode = DECL_MODE (field);
7136  int nregs, nslots;
7137
7138  /* Slots are counted as words while regs are counted as having the size of
7139     the (inner) mode.  */
7140  if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7141    {
7142      mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7143      nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7144    }
7145  else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7146    {
7147      mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7148      nregs = 2;
7149    }
7150  else
7151    nregs = 1;
7152
7153  nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7154
7155  if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7156    {
7157      nslots = SPARC_FP_ARG_MAX - this_slotno;
7158      nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7159
7160      /* We need to pass this field (partly) on the stack.  */
7161      data->stack = 1;
7162
7163      if (nregs <= 0)
7164	return false;
7165    }
7166
7167  *pnregs = nregs;
7168  *pmode = mode;
7169  return true;
7170}
7171
7172/* A subroutine of function_arg_record_value.  Count the number of registers
7173   to be assigned for FIELD and between PARMS->intoffset and BITPOS.  */
7174
7175inline void
7176count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7177{
7178  if (fp)
7179    {
7180      int nregs;
7181      machine_mode mode;
7182
7183      if (compute_int_layout (bitpos, data, &nregs))
7184	data->nregs += nregs;
7185
7186      if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7187	data->nregs += nregs;
7188    }
7189  else
7190    {
7191      if (data->intoffset < 0)
7192	data->intoffset = bitpos;
7193    }
7194}
7195
7196/* A subroutine of function_arg_record_value.  Assign the bits of the
7197   structure between PARMS->intoffset and BITPOS to integer registers.  */
7198
7199static void
7200assign_int_registers (int bitpos, assign_data_t *data)
7201{
7202  int intoffset = data->intoffset;
7203  machine_mode mode;
7204  int nregs;
7205
7206  if (!compute_int_layout (bitpos, data, &nregs))
7207    return;
7208
7209  /* If this is the trailing part of a word, only load that much into
7210     the register.  Otherwise load the whole register.  Note that in
7211     the latter case we may pick up unwanted bits.  It's not a problem
7212     at the moment but may wish to revisit.  */
7213  if (intoffset % BITS_PER_WORD != 0)
7214    mode = smallest_int_mode_for_size (BITS_PER_WORD
7215				       - intoffset % BITS_PER_WORD);
7216  else
7217    mode = word_mode;
7218
7219  const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7220  unsigned int regno = data->regbase + this_slotno;
7221  intoffset /= BITS_PER_UNIT;
7222
7223  do
7224    {
7225      rtx reg = gen_rtx_REG (mode, regno);
7226      XVECEXP (data->ret, 0, data->stack + data->nregs)
7227	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7228      data->nregs += 1;
7229      mode = word_mode;
7230      regno += 1;
7231      intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7232    }
7233  while (--nregs > 0);
7234}
7235
7236/* A subroutine of function_arg_record_value.  Assign FIELD at position
7237   BITPOS to FP registers.  */
7238
7239static void
7240assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7241{
7242  int nregs;
7243  machine_mode mode;
7244
7245  if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7246    return;
7247
7248  const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7249  int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7250  if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7251    regno++;
7252  int pos = bitpos / BITS_PER_UNIT;
7253
7254  do
7255    {
7256      rtx reg = gen_rtx_REG (mode, regno);
7257      XVECEXP (data->ret, 0, data->stack + data->nregs)
7258	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7259      data->nregs += 1;
7260      regno += GET_MODE_SIZE (mode) / 4;
7261      pos += GET_MODE_SIZE (mode);
7262    }
7263  while (--nregs > 0);
7264}
7265
7266/* A subroutine of function_arg_record_value.  Assign FIELD and the bits of
7267   the structure between PARMS->intoffset and BITPOS to registers.  */
7268
7269inline void
7270assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7271{
7272  if (fp)
7273    {
7274      assign_int_registers (bitpos, data);
7275
7276      assign_fp_registers (field, bitpos, data);
7277    }
7278  else
7279    {
7280      if (data->intoffset < 0)
7281	data->intoffset = bitpos;
7282    }
7283}
7284
7285/* Used by function_arg and function_value to implement the complex
7286   conventions of the 64-bit ABI for passing and returning structures.
7287   Return an expression valid as a return value for the FUNCTION_ARG
7288   and TARGET_FUNCTION_VALUE.
7289
7290   TYPE is the data type of the argument (as a tree).
7291    This is null for libcalls where that information may
7292    not be available.
7293   MODE is the argument's machine mode.
7294   SLOTNO is the index number of the argument's slot in the parameter array.
7295   NAMED is true if this argument is a named parameter
7296    (otherwise it is an extra parameter matching an ellipsis).
7297   REGBASE is the regno of the base register for the parameter array.  */
7298
7299static rtx
7300function_arg_record_value (const_tree type, machine_mode mode,
7301			   int slotno, bool named, int regbase)
7302{
7303  const int size = int_size_in_bytes (type);
7304  assign_data_t data;
7305  int nregs;
7306
7307  data.slotno = slotno;
7308  data.regbase = regbase;
7309
7310  /* Count how many registers we need.  */
7311  data.nregs = 0;
7312  data.intoffset = 0;
7313  data.stack = false;
7314  traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7315
7316  /* Take into account pending integer fields.  */
7317  if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7318    data.nregs += nregs;
7319
7320  /* Allocate the vector and handle some annoying special cases.  */
7321  nregs = data.nregs;
7322
7323  if (nregs == 0)
7324    {
7325      /* ??? Empty structure has no value?  Duh?  */
7326      if (size <= 0)
7327	{
7328	  /* Though there's nothing really to store, return a word register
7329	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
7330	     leads to breakage due to the fact that there are zero bytes to
7331	     load.  */
7332	  return gen_rtx_REG (mode, regbase);
7333	}
7334
7335      /* ??? C++ has structures with no fields, and yet a size.  Give up
7336	 for now and pass everything back in integer registers.  */
7337      nregs = CEIL_NWORDS (size);
7338      if (nregs + slotno > SPARC_INT_ARG_MAX)
7339	nregs = SPARC_INT_ARG_MAX - slotno;
7340    }
7341
7342  gcc_assert (nregs > 0);
7343
7344  data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7345
7346  /* If at least one field must be passed on the stack, generate
7347     (parallel [(expr_list (nil) ...) ...]) so that all fields will
7348     also be passed on the stack.  We can't do much better because the
7349     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7350     of structures for which the fields passed exclusively in registers
7351     are not at the beginning of the structure.  */
7352  if (data.stack)
7353    XVECEXP (data.ret, 0, 0)
7354      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7355
7356  /* Assign the registers.  */
7357  data.nregs = 0;
7358  data.intoffset = 0;
7359  traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7360
7361  /* Assign pending integer fields.  */
7362  assign_int_registers (size * BITS_PER_UNIT, &data);
7363
7364  gcc_assert (data.nregs == nregs);
7365
7366  return data.ret;
7367}
7368
7369/* Used by function_arg and function_value to implement the conventions
7370   of the 64-bit ABI for passing and returning unions.
7371   Return an expression valid as a return value for the FUNCTION_ARG
7372   and TARGET_FUNCTION_VALUE.
7373
7374   SIZE is the size in bytes of the union.
7375   MODE is the argument's machine mode.
7376   SLOTNO is the index number of the argument's slot in the parameter array.
7377   REGNO is the hard register the union will be passed in.  */
7378
7379static rtx
7380function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7381{
7382  unsigned int nwords;
7383
7384  /* See comment in function_arg_record_value for empty structures.  */
7385  if (size <= 0)
7386    return gen_rtx_REG (mode, regno);
7387
7388  if (slotno == SPARC_INT_ARG_MAX - 1)
7389    nwords = 1;
7390  else
7391    nwords = CEIL_NWORDS (size);
7392
7393  rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7394
7395  /* Unions are passed left-justified.  */
7396  for (unsigned int i = 0; i < nwords; i++)
7397    XVECEXP (regs, 0, i)
7398    = gen_rtx_EXPR_LIST (VOIDmode,
7399			 gen_rtx_REG (word_mode, regno + i),
7400			 GEN_INT (UNITS_PER_WORD * i));
7401
7402  return regs;
7403}
7404
7405/* Used by function_arg and function_value to implement the conventions
7406   of the 64-bit ABI for passing and returning BLKmode vectors.
7407   Return an expression valid as a return value for the FUNCTION_ARG
7408   and TARGET_FUNCTION_VALUE.
7409
7410   SIZE is the size in bytes of the vector.
7411   SLOTNO is the index number of the argument's slot in the parameter array.
7412   NAMED is true if this argument is a named parameter
7413    (otherwise it is an extra parameter matching an ellipsis).
7414   REGNO is the hard register the vector will be passed in.  */
7415
7416static rtx
7417function_arg_vector_value (int size, int slotno, bool named, int regno)
7418{
7419  const int mult = (named ? 2 : 1);
7420  unsigned int nwords;
7421
7422  if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7423    nwords = 1;
7424  else
7425    nwords = CEIL_NWORDS (size);
7426
7427  rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7428
7429  if (size < UNITS_PER_WORD)
7430    XVECEXP (regs, 0, 0)
7431      = gen_rtx_EXPR_LIST (VOIDmode,
7432			   gen_rtx_REG (SImode, regno),
7433			   const0_rtx);
7434  else
7435    for (unsigned int i = 0; i < nwords; i++)
7436      XVECEXP (regs, 0, i)
7437	= gen_rtx_EXPR_LIST (VOIDmode,
7438			     gen_rtx_REG (word_mode, regno + i * mult),
7439			     GEN_INT (i * UNITS_PER_WORD));
7440
7441  return regs;
7442}
7443
7444/* Determine where to put an argument to a function.
7445   Value is zero to push the argument on the stack,
7446   or a hard register in which to store the argument.
7447
7448   CUM is a variable of type CUMULATIVE_ARGS which gives info about
7449    the preceding args and about the function being called.
7450   ARG is a description of the argument.
7451   INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7452    TARGET_FUNCTION_INCOMING_ARG.  */
7453
7454static rtx
7455sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7456		      bool incoming)
7457{
7458  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7459  const int regbase
7460    = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7461  int slotno, regno, padding;
7462  tree type = arg.type;
7463  machine_mode mode = arg.mode;
7464  enum mode_class mclass = GET_MODE_CLASS (mode);
7465  bool named = arg.named;
7466
7467  slotno
7468    = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7469  if (slotno == -1)
7470    return 0;
7471
7472  /* Integer vectors are handled like floats as per the Sun VIS SDK.  */
7473  if (type && VECTOR_INTEGER_TYPE_P (type))
7474    mclass = MODE_FLOAT;
7475
7476  if (TARGET_ARCH32)
7477    return gen_rtx_REG (mode, regno);
7478
7479  /* Structures up to 16 bytes in size are passed in arg slots on the stack
7480     and are promoted to registers if possible.  */
7481  if (type && TREE_CODE (type) == RECORD_TYPE)
7482    {
7483      const int size = int_size_in_bytes (type);
7484      gcc_assert (size <= 16);
7485
7486      return function_arg_record_value (type, mode, slotno, named, regbase);
7487    }
7488
7489  /* Unions up to 16 bytes in size are passed in integer registers.  */
7490  else if (type && TREE_CODE (type) == UNION_TYPE)
7491    {
7492      const int size = int_size_in_bytes (type);
7493      gcc_assert (size <= 16);
7494
7495      return function_arg_union_value (size, mode, slotno, regno);
7496    }
7497
7498   /* Floating-point vectors up to 16 bytes are passed in registers.  */
7499  else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7500    {
7501      const int size = int_size_in_bytes (type);
7502      gcc_assert (size <= 16);
7503
7504      return function_arg_vector_value (size, slotno, named, regno);
7505    }
7506
7507  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7508     but also have the slot allocated for them.
7509     If no prototype is in scope fp values in register slots get passed
7510     in two places, either fp regs and int regs or fp regs and memory.  */
7511  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7512	   && SPARC_FP_REG_P (regno))
7513    {
7514      rtx reg = gen_rtx_REG (mode, regno);
7515      if (cum->prototype_p || cum->libcall_p)
7516	return reg;
7517      else
7518	{
7519	  rtx v0, v1;
7520
7521	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7522	    {
7523	      int intreg;
7524
7525	      /* On incoming, we don't need to know that the value
7526		 is passed in %f0 and %i0, and it confuses other parts
7527		 causing needless spillage even on the simplest cases.  */
7528	      if (incoming)
7529		return reg;
7530
7531	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7532			+ (regno - SPARC_FP_ARG_FIRST) / 2);
7533
7534	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7535	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7536				      const0_rtx);
7537	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7538	    }
7539	  else
7540	    {
7541	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7542	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7543	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7544	    }
7545	}
7546    }
7547
7548  /* All other aggregate types are passed in an integer register in a mode
7549     corresponding to the size of the type.  */
7550  else if (type && AGGREGATE_TYPE_P (type))
7551    {
7552      const int size = int_size_in_bytes (type);
7553      gcc_assert (size <= 16);
7554
7555      mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7556    }
7557
7558  return gen_rtx_REG (mode, regno);
7559}
7560
7561/* Handle the TARGET_FUNCTION_ARG target hook.  */
7562
7563static rtx
7564sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7565{
7566  return sparc_function_arg_1 (cum, arg, false);
7567}
7568
7569/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
7570
7571static rtx
7572sparc_function_incoming_arg (cumulative_args_t cum,
7573			     const function_arg_info &arg)
7574{
7575  return sparc_function_arg_1 (cum, arg, true);
7576}
7577
7578/* For sparc64, objects requiring 16 byte alignment are passed that way.  */
7579
7580static unsigned int
7581sparc_function_arg_boundary (machine_mode mode, const_tree type)
7582{
7583  return ((TARGET_ARCH64
7584	   && (GET_MODE_ALIGNMENT (mode) == 128
7585	       || (type && TYPE_ALIGN (type) == 128)))
7586	  ? 128
7587	  : PARM_BOUNDARY);
7588}
7589
7590/* For an arg passed partly in registers and partly in memory,
7591   this is the number of bytes of registers used.
7592   For args passed entirely in registers or entirely in memory, zero.
7593
7594   Any arg that starts in the first 6 regs but won't entirely fit in them
7595   needs partial registers on v8.  On v9, structures with integer
7596   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7597   values that begin in the last fp reg [where "last fp reg" varies with the
7598   mode] will be split between that reg and memory.  */
7599
7600static int
7601sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7602{
7603  int slotno, regno, padding;
7604
7605  /* We pass false for incoming here, it doesn't matter.  */
7606  slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7607				arg.named, false, &regno, &padding);
7608
7609  if (slotno == -1)
7610    return 0;
7611
7612  if (TARGET_ARCH32)
7613    {
7614      /* We are guaranteed by pass_by_reference that the size of the
7615	 argument is not greater than 8 bytes, so we only need to return
7616	 one word if the argument is partially passed in registers.  */
7617      const int size = GET_MODE_SIZE (arg.mode);
7618
7619      if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7620	return UNITS_PER_WORD;
7621    }
7622  else
7623    {
7624      /* We are guaranteed by pass_by_reference that the size of the
7625	 argument is not greater than 16 bytes, so we only need to return
7626	 one word if the argument is partially passed in registers.  */
7627      if (arg.aggregate_type_p ())
7628	{
7629	  const int size = int_size_in_bytes (arg.type);
7630
7631	  if (size > UNITS_PER_WORD
7632	      && (slotno == SPARC_INT_ARG_MAX - 1
7633		  || slotno == SPARC_FP_ARG_MAX - 1))
7634	    return UNITS_PER_WORD;
7635	}
7636      else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7637	       || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7638		    || (arg.type && VECTOR_TYPE_P (arg.type)))
7639		   && !(TARGET_FPU && arg.named)))
7640	{
7641	  const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7642			   ? int_size_in_bytes (arg.type)
7643			   : GET_MODE_SIZE (arg.mode);
7644
7645	  if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7646	    return UNITS_PER_WORD;
7647	}
7648      else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7649	       || (arg.type && VECTOR_TYPE_P (arg.type)))
7650	{
7651	  const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7652			   ? int_size_in_bytes (arg.type)
7653			   : GET_MODE_SIZE (arg.mode);
7654
7655	  if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7656	    return UNITS_PER_WORD;
7657	}
7658    }
7659
7660  return 0;
7661}
7662
7663/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7664   Update the data in CUM to advance over argument ARG.  */
7665
7666static void
7667sparc_function_arg_advance (cumulative_args_t cum_v,
7668			    const function_arg_info &arg)
7669{
7670  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7671  tree type = arg.type;
7672  machine_mode mode = arg.mode;
7673  int regno, padding;
7674
7675  /* We pass false for incoming here, it doesn't matter.  */
7676  function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
7677
7678  /* If argument requires leading padding, add it.  */
7679  cum->words += padding;
7680
7681  if (TARGET_ARCH32)
7682    cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7683  else
7684    {
7685      /* For types that can have BLKmode, get the size from the type.  */
7686      if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7687	{
7688	  const int size = int_size_in_bytes (type);
7689
7690	  /* See comment in function_arg_record_value for empty structures.  */
7691	  if (size <= 0)
7692	    cum->words++;
7693	  else
7694	    cum->words += CEIL_NWORDS (size);
7695	}
7696      else
7697	cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7698    }
7699}
7700
7701/* Implement TARGET_FUNCTION_ARG_PADDING.  For the 64-bit ABI structs
7702   are always stored left shifted in their argument slot.  */
7703
7704static pad_direction
7705sparc_function_arg_padding (machine_mode mode, const_tree type)
7706{
7707  if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7708    return PAD_UPWARD;
7709
7710  /* Fall back to the default.  */
7711  return default_function_arg_padding (mode, type);
7712}
7713
7714/* Handle the TARGET_RETURN_IN_MEMORY target hook.
7715   Specify whether to return the return value in memory.  */
7716
7717static bool
7718sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7719{
7720  if (TARGET_ARCH32)
7721    /* Original SPARC 32-bit ABI says that structures and unions, and
7722       quad-precision floats are returned in memory.  But note that the
7723       first part is implemented through -fpcc-struct-return being the
7724       default, so here we only implement -freg-struct-return instead.
7725       All other base types are returned in registers.
7726
7727       Extended ABI (as implemented by the Sun compiler) says that
7728       all complex floats are returned in registers (8 FP registers
7729       at most for '_Complex long double').  Return all complex integers
7730       in registers (4 at most for '_Complex long long').
7731
7732       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7733       integers are returned like floats of the same size, that is in
7734       registers up to 8 bytes and in memory otherwise.  Return all
7735       vector floats in memory like structure and unions; note that
7736       they always have BLKmode like the latter.  */
7737    return (TYPE_MODE (type) == BLKmode
7738	    || TYPE_MODE (type) == TFmode
7739	    || (TREE_CODE (type) == VECTOR_TYPE
7740		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7741  else
7742    /* Original SPARC 64-bit ABI says that structures and unions
7743       smaller than 32 bytes are returned in registers, as well as
7744       all other base types.
7745
7746       Extended ABI (as implemented by the Sun compiler) says that all
7747       complex floats are returned in registers (8 FP registers at most
7748       for '_Complex long double').  Return all complex integers in
7749       registers (4 at most for '_Complex TItype').
7750
7751       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7752       integers are returned like floats of the same size, that is in
7753       registers.  Return all vector floats like structure and unions;
7754       note that they always have BLKmode like the latter.  */
7755    return (TYPE_MODE (type) == BLKmode
7756	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7757}
7758
7759/* Handle the TARGET_STRUCT_VALUE target hook.
7760   Return where to find the structure return value address.  */
7761
7762static rtx
7763sparc_struct_value_rtx (tree fndecl, int incoming)
7764{
7765  if (TARGET_ARCH64)
7766    return NULL_RTX;
7767  else
7768    {
7769      rtx mem;
7770
7771      if (incoming)
7772	mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7773						   STRUCT_VALUE_OFFSET));
7774      else
7775	mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7776						   STRUCT_VALUE_OFFSET));
7777
7778      /* Only follow the SPARC ABI for fixed-size structure returns.
7779         Variable size structure returns are handled per the normal
7780         procedures in GCC. This is enabled by -mstd-struct-return */
7781      if (incoming == 2
7782	  && sparc_std_struct_return
7783	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7784	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7785	{
7786	  /* We must check and adjust the return address, as it is optional
7787	     as to whether the return object is really provided.  */
7788	  rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7789	  rtx scratch = gen_reg_rtx (SImode);
7790	  rtx_code_label *endlab = gen_label_rtx ();
7791
7792	  /* Calculate the return object size.  */
7793	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7794	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7795	  /* Construct a temporary return value.  */
7796	  rtx temp_val
7797	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7798
7799	  /* Implement SPARC 32-bit psABI callee return struct checking:
7800
7801	     Fetch the instruction where we will return to and see if
7802	     it's an unimp instruction (the most significant 10 bits
7803	     will be zero).  */
7804	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
7805						plus_constant (Pmode,
7806							       ret_reg, 8)));
7807	  /* Assume the size is valid and pre-adjust.  */
7808	  emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7809	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7810				   0, endlab);
7811	  emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7812	  /* Write the address of the memory pointed to by temp_val into
7813	     the memory pointed to by mem.  */
7814	  emit_move_insn (mem, XEXP (temp_val, 0));
7815	  emit_label (endlab);
7816	}
7817
7818      return mem;
7819    }
7820}
7821
7822/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7823   For v9, function return values are subject to the same rules as arguments,
7824   except that up to 32 bytes may be returned in registers.  */
7825
7826static rtx
7827sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7828{
7829  /* Beware that the two values are swapped here wrt function_arg.  */
7830  const int regbase
7831    = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7832  enum mode_class mclass = GET_MODE_CLASS (mode);
7833  int regno;
7834
7835  /* Integer vectors are handled like floats as per the Sun VIS SDK.
7836     Note that integer vectors larger than 16 bytes have BLKmode so
7837     they need to be handled like floating-point vectors below.  */
7838  if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7839    mclass = MODE_FLOAT;
7840
7841  if (TARGET_ARCH64 && type)
7842    {
7843      /* Structures up to 32 bytes in size are returned in registers.  */
7844      if (TREE_CODE (type) == RECORD_TYPE)
7845	{
7846	  const int size = int_size_in_bytes (type);
7847	  gcc_assert (size <= 32);
7848
7849	  return function_arg_record_value (type, mode, 0, true, regbase);
7850	}
7851
7852      /* Unions up to 32 bytes in size are returned in integer registers.  */
7853      else if (TREE_CODE (type) == UNION_TYPE)
7854	{
7855	  const int size = int_size_in_bytes (type);
7856	  gcc_assert (size <= 32);
7857
7858	  return function_arg_union_value (size, mode, 0, regbase);
7859	}
7860
7861      /* Vectors up to 32 bytes are returned in FP registers.  */
7862      else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7863	{
7864	  const int size = int_size_in_bytes (type);
7865	  gcc_assert (size <= 32);
7866
7867	  return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7868	}
7869
7870      /* Objects that require it are returned in FP registers.  */
7871      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7872	;
7873
7874      /* All other aggregate types are returned in an integer register in a
7875	 mode corresponding to the size of the type.  */
7876      else if (AGGREGATE_TYPE_P (type))
7877	{
7878	  /* All other aggregate types are passed in an integer register
7879	     in a mode corresponding to the size of the type.  */
7880	  const int size = int_size_in_bytes (type);
7881	  gcc_assert (size <= 32);
7882
7883	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7884
7885	  /* ??? We probably should have made the same ABI change in
7886	     3.4.0 as the one we made for unions.   The latter was
7887	     required by the SCD though, while the former is not
7888	     specified, so we favored compatibility and efficiency.
7889
7890	     Now we're stuck for aggregates larger than 16 bytes,
7891	     because OImode vanished in the meantime.  Let's not
7892	     try to be unduly clever, and simply follow the ABI
7893	     for unions in that case.  */
7894	  if (mode == BLKmode)
7895	    return function_arg_union_value (size, mode, 0, regbase);
7896	  else
7897	    mclass = MODE_INT;
7898	}
7899
7900      /* We should only have pointer and integer types at this point.  This
7901	 must match sparc_promote_function_mode.  */
7902      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7903	mode = word_mode;
7904    }
7905
7906  /* We should only have pointer and integer types at this point, except with
7907     -freg-struct-return.  This must match sparc_promote_function_mode.  */
7908  else if (TARGET_ARCH32
7909	   && !(type && AGGREGATE_TYPE_P (type))
7910	   && mclass == MODE_INT
7911	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7912    mode = word_mode;
7913
7914  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7915    regno = SPARC_FP_ARG_FIRST;
7916  else
7917    regno = regbase;
7918
7919  return gen_rtx_REG (mode, regno);
7920}
7921
7922/* Handle TARGET_FUNCTION_VALUE.
7923   On the SPARC, the value is found in the first "output" register, but the
7924   called function leaves it in the first "input" register.  */
7925
7926static rtx
7927sparc_function_value (const_tree valtype,
7928		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7929		      bool outgoing)
7930{
7931  return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7932}
7933
7934/* Handle TARGET_LIBCALL_VALUE.  */
7935
7936static rtx
7937sparc_libcall_value (machine_mode mode,
7938		     const_rtx fun ATTRIBUTE_UNUSED)
7939{
7940  return sparc_function_value_1 (NULL_TREE, mode, false);
7941}
7942
7943/* Handle FUNCTION_VALUE_REGNO_P.
7944   On the SPARC, the first "output" reg is used for integer values, and the
7945   first floating point register is used for floating point values.  */
7946
7947static bool
7948sparc_function_value_regno_p (const unsigned int regno)
7949{
7950  return (regno == 8 || (TARGET_FPU && regno == 32));
7951}
7952
7953/* Do what is necessary for `va_start'.  We look at the current function
7954   to determine if stdarg or varargs is used and return the address of
7955   the first unnamed parameter.  */
7956
7957static rtx
7958sparc_builtin_saveregs (void)
7959{
7960  int first_reg = crtl->args.info.words;
7961  rtx address;
7962  int regno;
7963
7964  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7965    emit_move_insn (gen_rtx_MEM (word_mode,
7966				 gen_rtx_PLUS (Pmode,
7967					       frame_pointer_rtx,
7968					       GEN_INT (FIRST_PARM_OFFSET (0)
7969							+ (UNITS_PER_WORD
7970							   * regno)))),
7971		    gen_rtx_REG (word_mode,
7972				 SPARC_INCOMING_INT_ARG_FIRST + regno));
7973
7974  address = gen_rtx_PLUS (Pmode,
7975			  frame_pointer_rtx,
7976			  GEN_INT (FIRST_PARM_OFFSET (0)
7977				   + UNITS_PER_WORD * first_reg));
7978
7979  return address;
7980}
7981
7982/* Implement `va_start' for stdarg.  */
7983
7984static void
7985sparc_va_start (tree valist, rtx nextarg)
7986{
7987  nextarg = expand_builtin_saveregs ();
7988  std_expand_builtin_va_start (valist, nextarg);
7989}
7990
7991/* Implement `va_arg' for stdarg.  */
7992
7993static tree
7994sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7995		       gimple_seq *post_p)
7996{
7997  HOST_WIDE_INT size, rsize, align;
7998  tree addr, incr;
7999  bool indirect;
8000  tree ptrtype = build_pointer_type (type);
8001
8002  if (pass_va_arg_by_reference (type))
8003    {
8004      indirect = true;
8005      size = rsize = UNITS_PER_WORD;
8006      align = 0;
8007    }
8008  else
8009    {
8010      indirect = false;
8011      size = int_size_in_bytes (type);
8012      rsize = ROUND_UP (size, UNITS_PER_WORD);
8013      align = 0;
8014
8015      if (TARGET_ARCH64)
8016	{
8017	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
8018	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
8019	    align = 2 * UNITS_PER_WORD;
8020
8021	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
8022	     are left-justified in their slots.  */
8023	  if (AGGREGATE_TYPE_P (type))
8024	    {
8025	      if (size == 0)
8026		size = rsize = UNITS_PER_WORD;
8027	      else
8028		size = rsize;
8029	    }
8030	}
8031    }
8032
8033  incr = valist;
8034  if (align)
8035    {
8036      incr = fold_build_pointer_plus_hwi (incr, align - 1);
8037      incr = fold_convert (sizetype, incr);
8038      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
8039			  size_int (-align));
8040      incr = fold_convert (ptr_type_node, incr);
8041    }
8042
8043  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
8044  addr = incr;
8045
8046  if (BYTES_BIG_ENDIAN && size < rsize)
8047    addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8048
8049  if (indirect)
8050    {
8051      addr = fold_convert (build_pointer_type (ptrtype), addr);
8052      addr = build_va_arg_indirect_ref (addr);
8053    }
8054
8055  /* If the address isn't aligned properly for the type, we need a temporary.
8056     FIXME: This is inefficient, usually we can do this in registers.  */
8057  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8058    {
8059      tree tmp = create_tmp_var (type, "va_arg_tmp");
8060      tree dest_addr = build_fold_addr_expr (tmp);
8061      tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8062				   3, dest_addr, addr, size_int (rsize));
8063      TREE_ADDRESSABLE (tmp) = 1;
8064      gimplify_and_add (copy, pre_p);
8065      addr = dest_addr;
8066    }
8067
8068  else
8069    addr = fold_convert (ptrtype, addr);
8070
8071  incr = fold_build_pointer_plus_hwi (incr, rsize);
8072  gimplify_assign (valist, incr, post_p);
8073
8074  return build_va_arg_indirect_ref (addr);
8075}
8076
8077/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8078   Specify whether the vector mode is supported by the hardware.  */
8079
8080static bool
8081sparc_vector_mode_supported_p (machine_mode mode)
8082{
8083  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8084}
8085
8086/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
8087
8088static machine_mode
8089sparc_preferred_simd_mode (scalar_mode mode)
8090{
8091  if (TARGET_VIS)
8092    switch (mode)
8093      {
8094      case E_SImode:
8095	return V2SImode;
8096      case E_HImode:
8097	return V4HImode;
8098      case E_QImode:
8099	return V8QImode;
8100
8101      default:;
8102      }
8103
8104  return word_mode;
8105}
8106
8107/* Implement TARGET_CAN_FOLLOW_JUMP.  */
8108
8109static bool
8110sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8111{
8112  /* Do not fold unconditional jumps that have been created for crossing
8113     partition boundaries.  */
8114  if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8115    return false;
8116
8117  return true;
8118}
8119
8120/* Return the string to output an unconditional branch to LABEL, which is
8121   the operand number of the label.
8122
8123   DEST is the destination insn (i.e. the label), INSN is the source.  */
8124
8125const char *
8126output_ubranch (rtx dest, rtx_insn *insn)
8127{
8128  static char string[64];
8129  bool v9_form = false;
8130  int delta;
8131  char *p;
8132
8133  /* Even if we are trying to use cbcond for this, evaluate
8134     whether we can use V9 branches as our backup plan.  */
8135  delta = 5000000;
8136  if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8137    delta = (INSN_ADDRESSES (INSN_UID (dest))
8138	     - INSN_ADDRESSES (INSN_UID (insn)));
8139
8140  /* Leave some instructions for "slop".  */
8141  if (TARGET_V9 && delta >= -260000 && delta < 260000)
8142    v9_form = true;
8143
8144  if (TARGET_CBCOND)
8145    {
8146      bool emit_nop = emit_cbcond_nop (insn);
8147      bool far = false;
8148      const char *rval;
8149
8150      if (delta < -500 || delta > 500)
8151	far = true;
8152
8153      if (far)
8154	{
8155	  if (v9_form)
8156	    rval = "ba,a,pt\t%%xcc, %l0";
8157	  else
8158	    rval = "b,a\t%l0";
8159	}
8160      else
8161	{
8162	  if (emit_nop)
8163	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8164	  else
8165	    rval = "cwbe\t%%g0, %%g0, %l0";
8166	}
8167      return rval;
8168    }
8169
8170  if (v9_form)
8171    strcpy (string, "ba%*,pt\t%%xcc, ");
8172  else
8173    strcpy (string, "b%*\t");
8174
8175  p = strchr (string, '\0');
8176  *p++ = '%';
8177  *p++ = 'l';
8178  *p++ = '0';
8179  *p++ = '%';
8180  *p++ = '(';
8181  *p = '\0';
8182
8183  return string;
8184}
8185
8186/* Return the string to output a conditional branch to LABEL, which is
8187   the operand number of the label.  OP is the conditional expression.
8188   XEXP (OP, 0) is assumed to be a condition code register (integer or
8189   floating point) and its mode specifies what kind of comparison we made.
8190
8191   DEST is the destination insn (i.e. the label), INSN is the source.
8192
8193   REVERSED is nonzero if we should reverse the sense of the comparison.
8194
8195   ANNUL is nonzero if we should generate an annulling branch.  */
8196
8197const char *
8198output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8199		rtx_insn *insn)
8200{
8201  static char string[64];
8202  enum rtx_code code = GET_CODE (op);
8203  rtx cc_reg = XEXP (op, 0);
8204  machine_mode mode = GET_MODE (cc_reg);
8205  const char *labelno, *branch;
8206  int spaces = 8, far;
8207  char *p;
8208
8209  /* v9 branches are limited to +-1MB.  If it is too far away,
8210     change
8211
8212     bne,pt %xcc, .LC30
8213
8214     to
8215
8216     be,pn %xcc, .+12
8217      nop
8218     ba .LC30
8219
8220     and
8221
8222     fbne,a,pn %fcc2, .LC29
8223
8224     to
8225
8226     fbe,pt %fcc2, .+16
8227      nop
8228     ba .LC29  */
8229
8230  far = TARGET_V9 && (get_attr_length (insn) >= 3);
8231  if (reversed ^ far)
8232    {
8233      /* Reversal of FP compares takes care -- an ordered compare
8234	 becomes an unordered compare and vice versa.  */
8235      if (mode == CCFPmode || mode == CCFPEmode)
8236	code = reverse_condition_maybe_unordered (code);
8237      else
8238	code = reverse_condition (code);
8239    }
8240
8241  /* Start by writing the branch condition.  */
8242  if (mode == CCFPmode || mode == CCFPEmode)
8243    {
8244      switch (code)
8245	{
8246	case NE:
8247	  branch = "fbne";
8248	  break;
8249	case EQ:
8250	  branch = "fbe";
8251	  break;
8252	case GE:
8253	  branch = "fbge";
8254	  break;
8255	case GT:
8256	  branch = "fbg";
8257	  break;
8258	case LE:
8259	  branch = "fble";
8260	  break;
8261	case LT:
8262	  branch = "fbl";
8263	  break;
8264	case UNORDERED:
8265	  branch = "fbu";
8266	  break;
8267	case ORDERED:
8268	  branch = "fbo";
8269	  break;
8270	case UNGT:
8271	  branch = "fbug";
8272	  break;
8273	case UNLT:
8274	  branch = "fbul";
8275	  break;
8276	case UNEQ:
8277	  branch = "fbue";
8278	  break;
8279	case UNGE:
8280	  branch = "fbuge";
8281	  break;
8282	case UNLE:
8283	  branch = "fbule";
8284	  break;
8285	case LTGT:
8286	  branch = "fblg";
8287	  break;
8288	default:
8289	  gcc_unreachable ();
8290	}
8291
8292      /* ??? !v9: FP branches cannot be preceded by another floating point
8293	 insn.  Because there is currently no concept of pre-delay slots,
8294	 we can fix this only by always emitting a nop before a floating
8295	 point branch.  */
8296
8297      string[0] = '\0';
8298      if (! TARGET_V9)
8299	strcpy (string, "nop\n\t");
8300      strcat (string, branch);
8301    }
8302  else
8303    {
8304      switch (code)
8305	{
8306	case NE:
8307	  if (mode == CCVmode || mode == CCXVmode)
8308	    branch = "bvs";
8309	  else
8310	    branch = "bne";
8311	  break;
8312	case EQ:
8313	  if (mode == CCVmode || mode == CCXVmode)
8314	    branch = "bvc";
8315	  else
8316	    branch = "be";
8317	  break;
8318	case GE:
8319	  if (mode == CCNZmode || mode == CCXNZmode)
8320	    branch = "bpos";
8321	  else
8322	    branch = "bge";
8323	  break;
8324	case GT:
8325	  branch = "bg";
8326	  break;
8327	case LE:
8328	  branch = "ble";
8329	  break;
8330	case LT:
8331	  if (mode == CCNZmode || mode == CCXNZmode)
8332	    branch = "bneg";
8333	  else
8334	    branch = "bl";
8335	  break;
8336	case GEU:
8337	  branch = "bgeu";
8338	  break;
8339	case GTU:
8340	  branch = "bgu";
8341	  break;
8342	case LEU:
8343	  branch = "bleu";
8344	  break;
8345	case LTU:
8346	  branch = "blu";
8347	  break;
8348	default:
8349	  gcc_unreachable ();
8350	}
8351      strcpy (string, branch);
8352    }
8353  spaces -= strlen (branch);
8354  p = strchr (string, '\0');
8355
8356  /* Now add the annulling, the label, and a possible noop.  */
8357  if (annul && ! far)
8358    {
8359      strcpy (p, ",a");
8360      p += 2;
8361      spaces -= 2;
8362    }
8363
8364  if (TARGET_V9)
8365    {
8366      rtx note;
8367      int v8 = 0;
8368
8369      if (! far && insn && INSN_ADDRESSES_SET_P ())
8370	{
8371	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
8372		       - INSN_ADDRESSES (INSN_UID (insn)));
8373	  /* Leave some instructions for "slop".  */
8374	  if (delta < -260000 || delta >= 260000)
8375	    v8 = 1;
8376	}
8377
8378      switch (mode)
8379	{
8380	case E_CCmode:
8381	case E_CCNZmode:
8382	case E_CCCmode:
8383	case E_CCVmode:
8384	  labelno = "%%icc, ";
8385	  if (v8)
8386	    labelno = "";
8387	  break;
8388	case E_CCXmode:
8389	case E_CCXNZmode:
8390	case E_CCXCmode:
8391	case E_CCXVmode:
8392	  labelno = "%%xcc, ";
8393	  gcc_assert (!v8);
8394	  break;
8395	case E_CCFPmode:
8396	case E_CCFPEmode:
8397	  {
8398	    static char v9_fcc_labelno[] = "%%fccX, ";
8399	    /* Set the char indicating the number of the fcc reg to use.  */
8400	    v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8401	    labelno = v9_fcc_labelno;
8402	    if (v8)
8403	      {
8404		gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8405		labelno = "";
8406	      }
8407	  }
8408	  break;
8409	default:
8410	  gcc_unreachable ();
8411	}
8412
8413      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8414	{
8415	  strcpy (p,
8416		  ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8417		   >= profile_probability::even ()) ^ far)
8418		  ? ",pt" : ",pn");
8419	  p += 3;
8420	  spaces -= 3;
8421	}
8422    }
8423  else
8424    labelno = "";
8425
8426  if (spaces > 0)
8427    *p++ = '\t';
8428  else
8429    *p++ = ' ';
8430  strcpy (p, labelno);
8431  p = strchr (p, '\0');
8432  if (far)
8433    {
8434      strcpy (p, ".+12\n\t nop\n\tb\t");
8435      /* Skip the next insn if requested or
8436	 if we know that it will be a nop.  */
8437      if (annul || ! final_sequence)
8438        p[3] = '6';
8439      p += 14;
8440    }
8441  *p++ = '%';
8442  *p++ = 'l';
8443  *p++ = label + '0';
8444  *p++ = '%';
8445  *p++ = '#';
8446  *p = '\0';
8447
8448  return string;
8449}
8450
8451/* Emit a library call comparison between floating point X and Y.
8452   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8453   Return the new operator to be used in the comparison sequence.
8454
8455   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8456   values as arguments instead of the TFmode registers themselves,
8457   that's why we cannot call emit_float_lib_cmp.  */
8458
8459rtx
8460sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8461{
8462  const char *qpfunc;
8463  rtx slot0, slot1, result, tem, tem2, libfunc;
8464  machine_mode mode;
8465  enum rtx_code new_comparison;
8466
8467  switch (comparison)
8468    {
8469    case EQ:
8470      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8471      break;
8472
8473    case NE:
8474      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8475      break;
8476
8477    case GT:
8478      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8479      break;
8480
8481    case GE:
8482      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8483      break;
8484
8485    case LT:
8486      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8487      break;
8488
8489    case LE:
8490      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8491      break;
8492
8493    case ORDERED:
8494    case UNORDERED:
8495    case UNGT:
8496    case UNLT:
8497    case UNEQ:
8498    case UNGE:
8499    case UNLE:
8500    case LTGT:
8501      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8502      break;
8503
8504    default:
8505      gcc_unreachable ();
8506    }
8507
8508  if (TARGET_ARCH64)
8509    {
8510      if (MEM_P (x))
8511	{
8512	  tree expr = MEM_EXPR (x);
8513	  if (expr)
8514	    mark_addressable (expr);
8515	  slot0 = x;
8516	}
8517      else
8518	{
8519	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8520	  emit_move_insn (slot0, x);
8521	}
8522
8523      if (MEM_P (y))
8524	{
8525	  tree expr = MEM_EXPR (y);
8526	  if (expr)
8527	    mark_addressable (expr);
8528	  slot1 = y;
8529	}
8530      else
8531	{
8532	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8533	  emit_move_insn (slot1, y);
8534	}
8535
8536      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8537      emit_library_call (libfunc, LCT_NORMAL,
8538			 DImode,
8539			 XEXP (slot0, 0), Pmode,
8540			 XEXP (slot1, 0), Pmode);
8541      mode = DImode;
8542    }
8543  else
8544    {
8545      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8546      emit_library_call (libfunc, LCT_NORMAL,
8547			 SImode,
8548			 x, TFmode, y, TFmode);
8549      mode = SImode;
8550    }
8551
8552
8553  /* Immediately move the result of the libcall into a pseudo
8554     register so reload doesn't clobber the value if it needs
8555     the return register for a spill reg.  */
8556  result = gen_reg_rtx (mode);
8557  emit_move_insn (result, hard_libcall_value (mode, libfunc));
8558
8559  switch (comparison)
8560    {
8561    default:
8562      return gen_rtx_NE (VOIDmode, result, const0_rtx);
8563    case ORDERED:
8564    case UNORDERED:
8565      new_comparison = (comparison == UNORDERED ? EQ : NE);
8566      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8567    case UNGT:
8568    case UNGE:
8569      new_comparison = (comparison == UNGT ? GT : NE);
8570      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8571    case UNLE:
8572      return gen_rtx_NE (VOIDmode, result, const2_rtx);
8573    case UNLT:
8574      tem = gen_reg_rtx (mode);
8575      if (TARGET_ARCH32)
8576	emit_insn (gen_andsi3 (tem, result, const1_rtx));
8577      else
8578	emit_insn (gen_anddi3 (tem, result, const1_rtx));
8579      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8580    case UNEQ:
8581    case LTGT:
8582      tem = gen_reg_rtx (mode);
8583      if (TARGET_ARCH32)
8584	emit_insn (gen_addsi3 (tem, result, const1_rtx));
8585      else
8586	emit_insn (gen_adddi3 (tem, result, const1_rtx));
8587      tem2 = gen_reg_rtx (mode);
8588      if (TARGET_ARCH32)
8589	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8590      else
8591	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8592      new_comparison = (comparison == UNEQ ? EQ : NE);
8593      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8594    }
8595
8596  gcc_unreachable ();
8597}
8598
8599/* Generate an unsigned DImode to FP conversion.  This is the same code
8600   optabs would emit if we didn't have TFmode patterns.  */
8601
8602void
8603sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8604{
8605  rtx i0, i1, f0, in, out;
8606
8607  out = operands[0];
8608  in = force_reg (DImode, operands[1]);
8609  rtx_code_label *neglab = gen_label_rtx ();
8610  rtx_code_label *donelab = gen_label_rtx ();
8611  i0 = gen_reg_rtx (DImode);
8612  i1 = gen_reg_rtx (DImode);
8613  f0 = gen_reg_rtx (mode);
8614
8615  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8616
8617  emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8618  emit_jump_insn (gen_jump (donelab));
8619  emit_barrier ();
8620
8621  emit_label (neglab);
8622
8623  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8624  emit_insn (gen_anddi3 (i1, in, const1_rtx));
8625  emit_insn (gen_iordi3 (i0, i0, i1));
8626  emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8627  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8628
8629  emit_label (donelab);
8630}
8631
8632/* Generate an FP to unsigned DImode conversion.  This is the same code
8633   optabs would emit if we didn't have TFmode patterns.  */
8634
8635void
8636sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8637{
8638  rtx i0, i1, f0, in, out, limit;
8639
8640  out = operands[0];
8641  in = force_reg (mode, operands[1]);
8642  rtx_code_label *neglab = gen_label_rtx ();
8643  rtx_code_label *donelab = gen_label_rtx ();
8644  i0 = gen_reg_rtx (DImode);
8645  i1 = gen_reg_rtx (DImode);
8646  limit = gen_reg_rtx (mode);
8647  f0 = gen_reg_rtx (mode);
8648
8649  emit_move_insn (limit,
8650		  const_double_from_real_value (
8651		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8652  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8653
8654  emit_insn (gen_rtx_SET (out,
8655			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8656  emit_jump_insn (gen_jump (donelab));
8657  emit_barrier ();
8658
8659  emit_label (neglab);
8660
8661  emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8662  emit_insn (gen_rtx_SET (i0,
8663			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8664  emit_insn (gen_movdi (i1, const1_rtx));
8665  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8666  emit_insn (gen_xordi3 (out, i0, i1));
8667
8668  emit_label (donelab);
8669}
8670
8671/* Return the string to output a compare and branch instruction to DEST.
8672   DEST is the destination insn (i.e. the label), INSN is the source,
8673   and OP is the conditional expression.  */
8674
8675const char *
8676output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8677{
8678  machine_mode mode = GET_MODE (XEXP (op, 0));
8679  enum rtx_code code = GET_CODE (op);
8680  const char *cond_str, *tmpl;
8681  int far, emit_nop, len;
8682  static char string[64];
8683  char size_char;
8684
8685  /* Compare and Branch is limited to +-2KB.  If it is too far away,
8686     change
8687
8688     cxbne X, Y, .LC30
8689
8690     to
8691
8692     cxbe X, Y, .+16
8693     nop
8694     ba,pt xcc, .LC30
8695      nop  */
8696
8697  len = get_attr_length (insn);
8698
8699  far = len == 4;
8700  emit_nop = len == 2;
8701
8702  if (far)
8703    code = reverse_condition (code);
8704
8705  size_char = ((mode == SImode) ? 'w' : 'x');
8706
8707  switch (code)
8708    {
8709    case NE:
8710      cond_str = "ne";
8711      break;
8712
8713    case EQ:
8714      cond_str = "e";
8715      break;
8716
8717    case GE:
8718      cond_str = "ge";
8719      break;
8720
8721    case GT:
8722      cond_str = "g";
8723      break;
8724
8725    case LE:
8726      cond_str = "le";
8727      break;
8728
8729    case LT:
8730      cond_str = "l";
8731      break;
8732
8733    case GEU:
8734      cond_str = "cc";
8735      break;
8736
8737    case GTU:
8738      cond_str = "gu";
8739      break;
8740
8741    case LEU:
8742      cond_str = "leu";
8743      break;
8744
8745    case LTU:
8746      cond_str = "cs";
8747      break;
8748
8749    default:
8750      gcc_unreachable ();
8751    }
8752
8753  if (far)
8754    {
8755      int veryfar = 1, delta;
8756
8757      if (INSN_ADDRESSES_SET_P ())
8758	{
8759	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8760		   - INSN_ADDRESSES (INSN_UID (insn)));
8761	  /* Leave some instructions for "slop".  */
8762	  if (delta >= -260000 && delta < 260000)
8763	    veryfar = 0;
8764	}
8765
8766      if (veryfar)
8767	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8768      else
8769	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8770    }
8771  else
8772    {
8773      if (emit_nop)
8774	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8775      else
8776	tmpl = "c%cb%s\t%%1, %%2, %%3";
8777    }
8778
8779  snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8780
8781  return string;
8782}
8783
8784/* Return the string to output a conditional branch to LABEL, testing
8785   register REG.  LABEL is the operand number of the label; REG is the
8786   operand number of the reg.  OP is the conditional expression.  The mode
8787   of REG says what kind of comparison we made.
8788
8789   DEST is the destination insn (i.e. the label), INSN is the source.
8790
8791   REVERSED is nonzero if we should reverse the sense of the comparison.
8792
8793   ANNUL is nonzero if we should generate an annulling branch.  */
8794
8795const char *
8796output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8797		 int annul, rtx_insn *insn)
8798{
8799  static char string[64];
8800  enum rtx_code code = GET_CODE (op);
8801  machine_mode mode = GET_MODE (XEXP (op, 0));
8802  rtx note;
8803  int far;
8804  char *p;
8805
8806  /* branch on register are limited to +-128KB.  If it is too far away,
8807     change
8808
8809     brnz,pt %g1, .LC30
8810
8811     to
8812
8813     brz,pn %g1, .+12
8814      nop
8815     ba,pt %xcc, .LC30
8816
8817     and
8818
8819     brgez,a,pn %o1, .LC29
8820
8821     to
8822
8823     brlz,pt %o1, .+16
8824      nop
8825     ba,pt %xcc, .LC29  */
8826
8827  far = get_attr_length (insn) >= 3;
8828
8829  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
8830  if (reversed ^ far)
8831    code = reverse_condition (code);
8832
8833  /* Only 64-bit versions of these instructions exist.  */
8834  gcc_assert (mode == DImode);
8835
8836  /* Start by writing the branch condition.  */
8837
8838  switch (code)
8839    {
8840    case NE:
8841      strcpy (string, "brnz");
8842      break;
8843
8844    case EQ:
8845      strcpy (string, "brz");
8846      break;
8847
8848    case GE:
8849      strcpy (string, "brgez");
8850      break;
8851
8852    case LT:
8853      strcpy (string, "brlz");
8854      break;
8855
8856    case LE:
8857      strcpy (string, "brlez");
8858      break;
8859
8860    case GT:
8861      strcpy (string, "brgz");
8862      break;
8863
8864    default:
8865      gcc_unreachable ();
8866    }
8867
8868  p = strchr (string, '\0');
8869
8870  /* Now add the annulling, reg, label, and nop.  */
8871  if (annul && ! far)
8872    {
8873      strcpy (p, ",a");
8874      p += 2;
8875    }
8876
8877  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8878    {
8879      strcpy (p,
8880	      ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8881	       >= profile_probability::even ()) ^ far)
8882	      ? ",pt" : ",pn");
8883      p += 3;
8884    }
8885
8886  *p = p < string + 8 ? '\t' : ' ';
8887  p++;
8888  *p++ = '%';
8889  *p++ = '0' + reg;
8890  *p++ = ',';
8891  *p++ = ' ';
8892  if (far)
8893    {
8894      int veryfar = 1, delta;
8895
8896      if (INSN_ADDRESSES_SET_P ())
8897	{
8898	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8899		   - INSN_ADDRESSES (INSN_UID (insn)));
8900	  /* Leave some instructions for "slop".  */
8901	  if (delta >= -260000 && delta < 260000)
8902	    veryfar = 0;
8903	}
8904
8905      strcpy (p, ".+12\n\t nop\n\t");
8906      /* Skip the next insn if requested or
8907	 if we know that it will be a nop.  */
8908      if (annul || ! final_sequence)
8909        p[3] = '6';
8910      p += 12;
8911      if (veryfar)
8912	{
8913	  strcpy (p, "b\t");
8914	  p += 2;
8915	}
8916      else
8917	{
8918	  strcpy (p, "ba,pt\t%%xcc, ");
8919	  p += 13;
8920	}
8921    }
8922  *p++ = '%';
8923  *p++ = 'l';
8924  *p++ = '0' + label;
8925  *p++ = '%';
8926  *p++ = '#';
8927  *p = '\0';
8928
8929  return string;
8930}
8931
8932/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8933   Such instructions cannot be used in the delay slot of return insn on v9.
8934   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8935 */
8936
8937static int
8938epilogue_renumber (register rtx *where, int test)
8939{
8940  register const char *fmt;
8941  register int i;
8942  register enum rtx_code code;
8943
8944  if (*where == 0)
8945    return 0;
8946
8947  code = GET_CODE (*where);
8948
8949  switch (code)
8950    {
8951    case REG:
8952      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
8953	return 1;
8954      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8955	{
8956	  if (ORIGINAL_REGNO (*where))
8957	    {
8958	      rtx n = gen_raw_REG (GET_MODE (*where),
8959				   OUTGOING_REGNO (REGNO (*where)));
8960	      ORIGINAL_REGNO (n) = ORIGINAL_REGNO (*where);
8961	      *where = n;
8962	    }
8963	  else
8964	    *where = gen_rtx_REG (GET_MODE (*where),
8965				  OUTGOING_REGNO (REGNO (*where)));
8966	}
8967      return 0;
8968
8969    case SCRATCH:
8970    case CC0:
8971    case PC:
8972    case CONST_INT:
8973    case CONST_WIDE_INT:
8974    case CONST_DOUBLE:
8975      return 0;
8976
8977      /* Do not replace the frame pointer with the stack pointer because
8978	 it can cause the delayed instruction to load below the stack.
8979	 This occurs when instructions like:
8980
8981	 (set (reg/i:SI 24 %i0)
8982	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8983                       (const_int -20 [0xffffffec])) 0))
8984
8985	 are in the return delayed slot.  */
8986    case PLUS:
8987      if (GET_CODE (XEXP (*where, 0)) == REG
8988	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8989	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8990	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8991	return 1;
8992      break;
8993
8994    case MEM:
8995      if (SPARC_STACK_BIAS
8996	  && GET_CODE (XEXP (*where, 0)) == REG
8997	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8998	return 1;
8999      break;
9000
9001    default:
9002      break;
9003    }
9004
9005  fmt = GET_RTX_FORMAT (code);
9006
9007  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9008    {
9009      if (fmt[i] == 'E')
9010	{
9011	  register int j;
9012	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
9013	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
9014	      return 1;
9015	}
9016      else if (fmt[i] == 'e'
9017	       && epilogue_renumber (&(XEXP (*where, i)), test))
9018	return 1;
9019    }
9020  return 0;
9021}
9022
9023/* Leaf functions and non-leaf functions have different needs.  */
9024
9025static const int
9026reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
9027
9028static const int
9029reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
9030
9031static const int *const reg_alloc_orders[] = {
9032  reg_leaf_alloc_order,
9033  reg_nonleaf_alloc_order};
9034
9035void
9036order_regs_for_local_alloc (void)
9037{
9038  static int last_order_nonleaf = 1;
9039
9040  if (df_regs_ever_live_p (15) != last_order_nonleaf)
9041    {
9042      last_order_nonleaf = !last_order_nonleaf;
9043      memcpy ((char *) reg_alloc_order,
9044	      (const char *) reg_alloc_orders[last_order_nonleaf],
9045	      FIRST_PSEUDO_REGISTER * sizeof (int));
9046    }
9047}
9048
9049/* Return 1 if REG and MEM are legitimate enough to allow the various
9050   MEM<-->REG splits to be run.  */
9051
9052int
9053sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9054{
9055  /* Punt if we are here by mistake.  */
9056  gcc_assert (reload_completed);
9057
9058  /* We must have an offsettable memory reference.  */
9059  if (!offsettable_memref_p (mem))
9060    return 0;
9061
9062  /* If we have legitimate args for ldd/std, we do not want
9063     the split to happen.  */
9064  if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9065    return 0;
9066
9067  /* Success.  */
9068  return 1;
9069}
9070
9071/* Split a REG <-- MEM move into a pair of moves in MODE.  */
9072
9073void
9074sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9075{
9076  rtx high_part = gen_highpart (mode, dest);
9077  rtx low_part = gen_lowpart (mode, dest);
9078  rtx word0 = adjust_address (src, mode, 0);
9079  rtx word1 = adjust_address (src, mode, 4);
9080
9081  if (reg_overlap_mentioned_p (high_part, word1))
9082    {
9083      emit_move_insn_1 (low_part, word1);
9084      emit_move_insn_1 (high_part, word0);
9085    }
9086  else
9087    {
9088      emit_move_insn_1 (high_part, word0);
9089      emit_move_insn_1 (low_part, word1);
9090    }
9091}
9092
9093/* Split a MEM <-- REG move into a pair of moves in MODE.  */
9094
9095void
9096sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9097{
9098  rtx word0 = adjust_address (dest, mode, 0);
9099  rtx word1 = adjust_address (dest, mode, 4);
9100  rtx high_part = gen_highpart (mode, src);
9101  rtx low_part = gen_lowpart (mode, src);
9102
9103  emit_move_insn_1 (word0, high_part);
9104  emit_move_insn_1 (word1, low_part);
9105}
9106
9107/* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves.  */
9108
9109int
9110sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9111{
9112  /* Punt if we are here by mistake.  */
9113  gcc_assert (reload_completed);
9114
9115  if (GET_CODE (reg1) == SUBREG)
9116    reg1 = SUBREG_REG (reg1);
9117  if (GET_CODE (reg1) != REG)
9118    return 0;
9119  const int regno1 = REGNO (reg1);
9120
9121  if (GET_CODE (reg2) == SUBREG)
9122    reg2 = SUBREG_REG (reg2);
9123  if (GET_CODE (reg2) != REG)
9124    return 0;
9125  const int regno2 = REGNO (reg2);
9126
9127  if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9128    return 1;
9129
9130  if (TARGET_VIS3)
9131    {
9132      if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9133	  || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9134	return 1;
9135    }
9136
9137  return 0;
9138}
9139
9140/* Split a REG <--> REG move into a pair of moves in MODE.  */
9141
9142void
9143sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9144{
9145  rtx dest1 = gen_highpart (mode, dest);
9146  rtx dest2 = gen_lowpart (mode, dest);
9147  rtx src1 = gen_highpart (mode, src);
9148  rtx src2 = gen_lowpart (mode, src);
9149
9150  /* Now emit using the real source and destination we found, swapping
9151     the order if we detect overlap.  */
9152  if (reg_overlap_mentioned_p (dest1, src2))
9153    {
9154      emit_move_insn_1 (dest2, src2);
9155      emit_move_insn_1 (dest1, src1);
9156    }
9157  else
9158    {
9159      emit_move_insn_1 (dest1, src1);
9160      emit_move_insn_1 (dest2, src2);
9161    }
9162}
9163
9164/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9165   This makes them candidates for using ldd and std insns.
9166
9167   Note reg1 and reg2 *must* be hard registers.  */
9168
9169int
9170registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9171{
9172  /* We might have been passed a SUBREG.  */
9173  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9174    return 0;
9175
9176  if (REGNO (reg1) % 2 != 0)
9177    return 0;
9178
9179  /* Integer ldd is deprecated in SPARC V9 */
9180  if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9181    return 0;
9182
9183  return (REGNO (reg1) == REGNO (reg2) - 1);
9184}
9185
9186/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9187   an ldd or std insn.
9188
9189   This can only happen when addr1 and addr2, the addresses in mem1
9190   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9191   addr1 must also be aligned on a 64-bit boundary.
9192
9193   Also iff dependent_reg_rtx is not null it should not be used to
9194   compute the address for mem1, i.e. we cannot optimize a sequence
9195   like:
9196   	ld [%o0], %o0
9197	ld [%o0 + 4], %o1
9198   to
9199   	ldd [%o0], %o0
9200   nor:
9201	ld [%g3 + 4], %g3
9202	ld [%g3], %g2
9203   to
9204        ldd [%g3], %g2
9205
9206   But, note that the transformation from:
9207	ld [%g2 + 4], %g3
9208        ld [%g2], %g2
9209   to
9210	ldd [%g2], %g2
9211   is perfectly fine.  Thus, the peephole2 patterns always pass us
9212   the destination register of the first load, never the second one.
9213
9214   For stores we don't have a similar problem, so dependent_reg_rtx is
9215   NULL_RTX.  */
9216
9217int
9218mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9219{
9220  rtx addr1, addr2;
9221  unsigned int reg1;
9222  HOST_WIDE_INT offset1;
9223
9224  /* The mems cannot be volatile.  */
9225  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9226    return 0;
9227
9228  /* MEM1 should be aligned on a 64-bit boundary.  */
9229  if (MEM_ALIGN (mem1) < 64)
9230    return 0;
9231
9232  addr1 = XEXP (mem1, 0);
9233  addr2 = XEXP (mem2, 0);
9234
9235  /* Extract a register number and offset (if used) from the first addr.  */
9236  if (GET_CODE (addr1) == PLUS)
9237    {
9238      /* If not a REG, return zero.  */
9239      if (GET_CODE (XEXP (addr1, 0)) != REG)
9240	return 0;
9241      else
9242	{
9243          reg1 = REGNO (XEXP (addr1, 0));
9244	  /* The offset must be constant!  */
9245	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9246            return 0;
9247          offset1 = INTVAL (XEXP (addr1, 1));
9248	}
9249    }
9250  else if (GET_CODE (addr1) != REG)
9251    return 0;
9252  else
9253    {
9254      reg1 = REGNO (addr1);
9255      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
9256      offset1 = 0;
9257    }
9258
9259  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
9260  if (GET_CODE (addr2) != PLUS)
9261    return 0;
9262
9263  if (GET_CODE (XEXP (addr2, 0)) != REG
9264      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9265    return 0;
9266
9267  if (reg1 != REGNO (XEXP (addr2, 0)))
9268    return 0;
9269
9270  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9271    return 0;
9272
9273  /* The first offset must be evenly divisible by 8 to ensure the
9274     address is 64-bit aligned.  */
9275  if (offset1 % 8 != 0)
9276    return 0;
9277
9278  /* The offset for the second addr must be 4 more than the first addr.  */
9279  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9280    return 0;
9281
9282  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
9283     instructions.  */
9284  return 1;
9285}
9286
9287/* Return the widened memory access made of MEM1 and MEM2 in MODE.  */
9288
9289rtx
9290widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9291{
9292  rtx x = widen_memory_access (mem1, mode, 0);
9293  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9294  return x;
9295}
9296
9297/* Return 1 if reg is a pseudo, or is the first register in
9298   a hard register pair.  This makes it suitable for use in
9299   ldd and std insns.  */
9300
9301int
9302register_ok_for_ldd (rtx reg)
9303{
9304  /* We might have been passed a SUBREG.  */
9305  if (!REG_P (reg))
9306    return 0;
9307
9308  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9309    return (REGNO (reg) % 2 == 0);
9310
9311  return 1;
9312}
9313
9314/* Return 1 if OP, a MEM, has an address which is known to be
9315   aligned to an 8-byte boundary.  */
9316
9317int
9318memory_ok_for_ldd (rtx op)
9319{
9320  /* In 64-bit mode, we assume that the address is word-aligned.  */
9321  if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9322    return 0;
9323
9324  if (! can_create_pseudo_p ()
9325      && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9326    return 0;
9327
9328  return 1;
9329}
9330
9331/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
9332
9333static bool
9334sparc_print_operand_punct_valid_p (unsigned char code)
9335{
9336  if (code == '#'
9337      || code == '*'
9338      || code == '('
9339      || code == ')'
9340      || code == '_'
9341      || code == '&')
9342    return true;
9343
9344  return false;
9345}
9346
9347/* Implement TARGET_PRINT_OPERAND.
9348   Print operand X (an rtx) in assembler syntax to file FILE.
9349   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9350   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
9351
9352static void
9353sparc_print_operand (FILE *file, rtx x, int code)
9354{
9355  const char *s;
9356
9357  switch (code)
9358    {
9359    case '#':
9360      /* Output an insn in a delay slot.  */
9361      if (final_sequence)
9362        sparc_indent_opcode = 1;
9363      else
9364	fputs ("\n\t nop", file);
9365      return;
9366    case '*':
9367      /* Output an annul flag if there's nothing for the delay slot and we
9368	 are optimizing.  This is always used with '(' below.
9369         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9370	 this is a dbx bug.  So, we only do this when optimizing.
9371         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9372	 Always emit a nop in case the next instruction is a branch.  */
9373      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9374	fputs (",a", file);
9375      return;
9376    case '(':
9377      /* Output a 'nop' if there's nothing for the delay slot and we are
9378	 not optimizing.  This is always used with '*' above.  */
9379      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9380	fputs ("\n\t nop", file);
9381      else if (final_sequence)
9382        sparc_indent_opcode = 1;
9383      return;
9384    case ')':
9385      /* Output the right displacement from the saved PC on function return.
9386	 The caller may have placed an "unimp" insn immediately after the call
9387	 so we have to account for it.  This insn is used in the 32-bit ABI
9388	 when calling a function that returns a non zero-sized structure.  The
9389	 64-bit ABI doesn't have it.  Be careful to have this test be the same
9390	 as that for the call.  The exception is when sparc_std_struct_return
9391	 is enabled, the psABI is followed exactly and the adjustment is made
9392	 by the code in sparc_struct_value_rtx.  The call emitted is the same
9393	 when sparc_std_struct_return is enabled. */
9394     if (!TARGET_ARCH64
9395	 && cfun->returns_struct
9396	 && !sparc_std_struct_return
9397	 && DECL_SIZE (DECL_RESULT (current_function_decl))
9398	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9399	     == INTEGER_CST
9400	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9401	fputs ("12", file);
9402      else
9403        fputc ('8', file);
9404      return;
9405    case '_':
9406      /* Output the Embedded Medium/Anywhere code model base register.  */
9407      fputs (EMBMEDANY_BASE_REG, file);
9408      return;
9409    case '&':
9410      /* Print some local dynamic TLS name.  */
9411      if (const char *name = get_some_local_dynamic_name ())
9412	assemble_name (file, name);
9413      else
9414	output_operand_lossage ("'%%&' used without any "
9415				"local dynamic TLS references");
9416      return;
9417
9418    case 'Y':
9419      /* Adjust the operand to take into account a RESTORE operation.  */
9420      if (GET_CODE (x) == CONST_INT)
9421	break;
9422      else if (GET_CODE (x) != REG)
9423	output_operand_lossage ("invalid %%Y operand");
9424      else if (REGNO (x) < 8)
9425	fputs (reg_names[REGNO (x)], file);
9426      else if (REGNO (x) >= 24 && REGNO (x) < 32)
9427	fputs (reg_names[REGNO (x)-16], file);
9428      else
9429	output_operand_lossage ("invalid %%Y operand");
9430      return;
9431    case 'L':
9432      /* Print out the low order register name of a register pair.  */
9433      if (WORDS_BIG_ENDIAN)
9434	fputs (reg_names[REGNO (x)+1], file);
9435      else
9436	fputs (reg_names[REGNO (x)], file);
9437      return;
9438    case 'H':
9439      /* Print out the high order register name of a register pair.  */
9440      if (WORDS_BIG_ENDIAN)
9441	fputs (reg_names[REGNO (x)], file);
9442      else
9443	fputs (reg_names[REGNO (x)+1], file);
9444      return;
9445    case 'R':
9446      /* Print out the second register name of a register pair or quad.
9447	 I.e., R (%o0) => %o1.  */
9448      fputs (reg_names[REGNO (x)+1], file);
9449      return;
9450    case 'S':
9451      /* Print out the third register name of a register quad.
9452	 I.e., S (%o0) => %o2.  */
9453      fputs (reg_names[REGNO (x)+2], file);
9454      return;
9455    case 'T':
9456      /* Print out the fourth register name of a register quad.
9457	 I.e., T (%o0) => %o3.  */
9458      fputs (reg_names[REGNO (x)+3], file);
9459      return;
9460    case 'x':
9461      /* Print a condition code register.  */
9462      if (REGNO (x) == SPARC_ICC_REG)
9463	{
9464	  switch (GET_MODE (x))
9465	    {
9466	    case E_CCmode:
9467	    case E_CCNZmode:
9468	    case E_CCCmode:
9469	    case E_CCVmode:
9470	      s = "%icc";
9471	      break;
9472	    case E_CCXmode:
9473	    case E_CCXNZmode:
9474	    case E_CCXCmode:
9475	    case E_CCXVmode:
9476	      s = "%xcc";
9477	      break;
9478	    default:
9479	      gcc_unreachable ();
9480	    }
9481	  fputs (s, file);
9482	}
9483      else
9484	/* %fccN register */
9485	fputs (reg_names[REGNO (x)], file);
9486      return;
9487    case 'm':
9488      /* Print the operand's address only.  */
9489      output_address (GET_MODE (x), XEXP (x, 0));
9490      return;
9491    case 'r':
9492      /* In this case we need a register.  Use %g0 if the
9493	 operand is const0_rtx.  */
9494      if (x == const0_rtx
9495	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9496	{
9497	  fputs ("%g0", file);
9498	  return;
9499	}
9500      else
9501	break;
9502
9503    case 'A':
9504      switch (GET_CODE (x))
9505	{
9506	case IOR:
9507	  s = "or";
9508	  break;
9509	case AND:
9510	  s = "and";
9511	  break;
9512	case XOR:
9513	  s = "xor";
9514	  break;
9515	default:
9516	  output_operand_lossage ("invalid %%A operand");
9517	  s = "";
9518	  break;
9519	}
9520      fputs (s, file);
9521      return;
9522
9523    case 'B':
9524      switch (GET_CODE (x))
9525	{
9526	case IOR:
9527	  s = "orn";
9528	  break;
9529	case AND:
9530	  s = "andn";
9531	  break;
9532	case XOR:
9533	  s = "xnor";
9534	  break;
9535	default:
9536	  output_operand_lossage ("invalid %%B operand");
9537	  s = "";
9538	  break;
9539	}
9540      fputs (s, file);
9541      return;
9542
9543      /* This is used by the conditional move instructions.  */
9544    case 'C':
9545      {
9546	machine_mode mode = GET_MODE (XEXP (x, 0));
9547	switch (GET_CODE (x))
9548	  {
9549	  case NE:
9550	    if (mode == CCVmode || mode == CCXVmode)
9551	      s = "vs";
9552	    else
9553	      s = "ne";
9554	    break;
9555	  case EQ:
9556	    if (mode == CCVmode || mode == CCXVmode)
9557	      s = "vc";
9558	    else
9559	      s = "e";
9560	    break;
9561	  case GE:
9562	    if (mode == CCNZmode || mode == CCXNZmode)
9563	      s = "pos";
9564	    else
9565	      s = "ge";
9566	    break;
9567	  case GT:
9568	    s = "g";
9569	    break;
9570	  case LE:
9571	    s = "le";
9572	    break;
9573	  case LT:
9574	    if (mode == CCNZmode || mode == CCXNZmode)
9575	      s = "neg";
9576	    else
9577	      s = "l";
9578	    break;
9579	  case GEU:
9580	    s = "geu";
9581	    break;
9582	  case GTU:
9583	    s = "gu";
9584	    break;
9585	  case LEU:
9586	    s = "leu";
9587	    break;
9588	  case LTU:
9589	    s = "lu";
9590	    break;
9591	  case LTGT:
9592	    s = "lg";
9593	    break;
9594	  case UNORDERED:
9595	    s = "u";
9596	    break;
9597	  case ORDERED:
9598	    s = "o";
9599	    break;
9600	  case UNLT:
9601	    s = "ul";
9602	    break;
9603	  case UNLE:
9604	    s = "ule";
9605	    break;
9606	  case UNGT:
9607	    s = "ug";
9608	    break;
9609	  case UNGE:
9610	    s = "uge"
9611	    ; break;
9612	  case UNEQ:
9613	    s = "ue";
9614	    break;
9615	  default:
9616	    output_operand_lossage ("invalid %%C operand");
9617	    s = "";
9618	    break;
9619	  }
9620	fputs (s, file);
9621	return;
9622      }
9623
9624      /* This are used by the movr instruction pattern.  */
9625    case 'D':
9626      {
9627	switch (GET_CODE (x))
9628	  {
9629	  case NE:
9630	    s = "ne";
9631	    break;
9632	  case EQ:
9633	    s = "e";
9634	    break;
9635	  case GE:
9636	    s = "gez";
9637	    break;
9638	  case LT:
9639	    s = "lz";
9640	    break;
9641	  case LE:
9642	    s = "lez";
9643	    break;
9644	  case GT:
9645	    s = "gz";
9646	    break;
9647	  default:
9648	    output_operand_lossage ("invalid %%D operand");
9649	    s = "";
9650	    break;
9651	  }
9652	fputs (s, file);
9653	return;
9654      }
9655
9656    case 'b':
9657      {
9658	/* Print a sign-extended character.  */
9659	int i = trunc_int_for_mode (INTVAL (x), QImode);
9660	fprintf (file, "%d", i);
9661	return;
9662      }
9663
9664    case 'f':
9665      /* Operand must be a MEM; write its address.  */
9666      if (GET_CODE (x) != MEM)
9667	output_operand_lossage ("invalid %%f operand");
9668      output_address (GET_MODE (x), XEXP (x, 0));
9669      return;
9670
9671    case 's':
9672      {
9673	/* Print a sign-extended 32-bit value.  */
9674	HOST_WIDE_INT i;
9675	if (GET_CODE(x) == CONST_INT)
9676	  i = INTVAL (x);
9677	else
9678	  {
9679	    output_operand_lossage ("invalid %%s operand");
9680	    return;
9681	  }
9682	i = trunc_int_for_mode (i, SImode);
9683	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9684	return;
9685      }
9686
9687    case 0:
9688      /* Do nothing special.  */
9689      break;
9690
9691    default:
9692      /* Undocumented flag.  */
9693      output_operand_lossage ("invalid operand output code");
9694    }
9695
9696  if (GET_CODE (x) == REG)
9697    fputs (reg_names[REGNO (x)], file);
9698  else if (GET_CODE (x) == MEM)
9699    {
9700      fputc ('[', file);
9701	/* Poor Sun assembler doesn't understand absolute addressing.  */
9702      if (CONSTANT_P (XEXP (x, 0)))
9703	fputs ("%g0+", file);
9704      output_address (GET_MODE (x), XEXP (x, 0));
9705      fputc (']', file);
9706    }
9707  else if (GET_CODE (x) == HIGH)
9708    {
9709      fputs ("%hi(", file);
9710      output_addr_const (file, XEXP (x, 0));
9711      fputc (')', file);
9712    }
9713  else if (GET_CODE (x) == LO_SUM)
9714    {
9715      sparc_print_operand (file, XEXP (x, 0), 0);
9716      if (TARGET_CM_MEDMID)
9717	fputs ("+%l44(", file);
9718      else
9719	fputs ("+%lo(", file);
9720      output_addr_const (file, XEXP (x, 1));
9721      fputc (')', file);
9722    }
9723  else if (GET_CODE (x) == CONST_DOUBLE)
9724    output_operand_lossage ("floating-point constant not a valid immediate operand");
9725  else
9726    output_addr_const (file, x);
9727}
9728
9729/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
9730
9731static void
9732sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9733{
9734  register rtx base, index = 0;
9735  int offset = 0;
9736  register rtx addr = x;
9737
9738  if (REG_P (addr))
9739    fputs (reg_names[REGNO (addr)], file);
9740  else if (GET_CODE (addr) == PLUS)
9741    {
9742      if (CONST_INT_P (XEXP (addr, 0)))
9743	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9744      else if (CONST_INT_P (XEXP (addr, 1)))
9745	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9746      else
9747	base = XEXP (addr, 0), index = XEXP (addr, 1);
9748      if (GET_CODE (base) == LO_SUM)
9749	{
9750	  gcc_assert (USE_AS_OFFSETABLE_LO10
9751		      && TARGET_ARCH64
9752		      && ! TARGET_CM_MEDMID);
9753	  output_operand (XEXP (base, 0), 0);
9754	  fputs ("+%lo(", file);
9755	  output_address (VOIDmode, XEXP (base, 1));
9756	  fprintf (file, ")+%d", offset);
9757	}
9758      else
9759	{
9760	  fputs (reg_names[REGNO (base)], file);
9761	  if (index == 0)
9762	    fprintf (file, "%+d", offset);
9763	  else if (REG_P (index))
9764	    fprintf (file, "+%s", reg_names[REGNO (index)]);
9765	  else if (GET_CODE (index) == SYMBOL_REF
9766		   || GET_CODE (index) == LABEL_REF
9767		   || GET_CODE (index) == CONST)
9768	    fputc ('+', file), output_addr_const (file, index);
9769	  else gcc_unreachable ();
9770	}
9771    }
9772  else if (GET_CODE (addr) == MINUS
9773	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9774    {
9775      output_addr_const (file, XEXP (addr, 0));
9776      fputs ("-(", file);
9777      output_addr_const (file, XEXP (addr, 1));
9778      fputs ("-.)", file);
9779    }
9780  else if (GET_CODE (addr) == LO_SUM)
9781    {
9782      output_operand (XEXP (addr, 0), 0);
9783      if (TARGET_CM_MEDMID)
9784        fputs ("+%l44(", file);
9785      else
9786        fputs ("+%lo(", file);
9787      output_address (VOIDmode, XEXP (addr, 1));
9788      fputc (')', file);
9789    }
9790  else if (flag_pic
9791	   && GET_CODE (addr) == CONST
9792	   && GET_CODE (XEXP (addr, 0)) == MINUS
9793	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9794	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9795	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9796    {
9797      addr = XEXP (addr, 0);
9798      output_addr_const (file, XEXP (addr, 0));
9799      /* Group the args of the second CONST in parenthesis.  */
9800      fputs ("-(", file);
9801      /* Skip past the second CONST--it does nothing for us.  */
9802      output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9803      /* Close the parenthesis.  */
9804      fputc (')', file);
9805    }
9806  else
9807    {
9808      output_addr_const (file, addr);
9809    }
9810}
9811
9812/* Target hook for assembling integer objects.  The sparc version has
9813   special handling for aligned DI-mode objects.  */
9814
9815static bool
9816sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9817{
9818  /* ??? We only output .xword's for symbols and only then in environments
9819     where the assembler can handle them.  */
9820  if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9821    {
9822      if (TARGET_V9)
9823	{
9824	  assemble_integer_with_op ("\t.xword\t", x);
9825	  return true;
9826	}
9827      else
9828	{
9829	  assemble_aligned_integer (4, const0_rtx);
9830	  assemble_aligned_integer (4, x);
9831	  return true;
9832	}
9833    }
9834  return default_assemble_integer (x, size, aligned_p);
9835}
9836
9837/* Return the value of a code used in the .proc pseudo-op that says
9838   what kind of result this function returns.  For non-C types, we pick
9839   the closest C type.  */
9840
9841#ifndef SHORT_TYPE_SIZE
9842#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9843#endif
9844
9845#ifndef INT_TYPE_SIZE
9846#define INT_TYPE_SIZE BITS_PER_WORD
9847#endif
9848
9849#ifndef LONG_TYPE_SIZE
9850#define LONG_TYPE_SIZE BITS_PER_WORD
9851#endif
9852
9853#ifndef LONG_LONG_TYPE_SIZE
9854#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9855#endif
9856
9857#ifndef FLOAT_TYPE_SIZE
9858#define FLOAT_TYPE_SIZE BITS_PER_WORD
9859#endif
9860
9861#ifndef DOUBLE_TYPE_SIZE
9862#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9863#endif
9864
9865#ifndef LONG_DOUBLE_TYPE_SIZE
9866#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9867#endif
9868
9869unsigned long
9870sparc_type_code (register tree type)
9871{
9872  register unsigned long qualifiers = 0;
9873  register unsigned shift;
9874
9875  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
9876     setting more, since some assemblers will give an error for this.  Also,
9877     we must be careful to avoid shifts of 32 bits or more to avoid getting
9878     unpredictable results.  */
9879
9880  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9881    {
9882      switch (TREE_CODE (type))
9883	{
9884	case ERROR_MARK:
9885	  return qualifiers;
9886
9887	case ARRAY_TYPE:
9888	  qualifiers |= (3 << shift);
9889	  break;
9890
9891	case FUNCTION_TYPE:
9892	case METHOD_TYPE:
9893	  qualifiers |= (2 << shift);
9894	  break;
9895
9896	case POINTER_TYPE:
9897	case REFERENCE_TYPE:
9898	case OFFSET_TYPE:
9899	  qualifiers |= (1 << shift);
9900	  break;
9901
9902	case RECORD_TYPE:
9903	  return (qualifiers | 8);
9904
9905	case UNION_TYPE:
9906	case QUAL_UNION_TYPE:
9907	  return (qualifiers | 9);
9908
9909	case ENUMERAL_TYPE:
9910	  return (qualifiers | 10);
9911
9912	case VOID_TYPE:
9913	  return (qualifiers | 16);
9914
9915	case INTEGER_TYPE:
9916	  /* If this is a range type, consider it to be the underlying
9917	     type.  */
9918	  if (TREE_TYPE (type) != 0)
9919	    break;
9920
9921	  /* Carefully distinguish all the standard types of C,
9922	     without messing up if the language is not C.  We do this by
9923	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
9924	     look at both the names and the above fields, but that's redundant.
9925	     Any type whose size is between two C types will be considered
9926	     to be the wider of the two types.  Also, we do not have a
9927	     special code to use for "long long", so anything wider than
9928	     long is treated the same.  Note that we can't distinguish
9929	     between "int" and "long" in this code if they are the same
9930	     size, but that's fine, since neither can the assembler.  */
9931
9932	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9933	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9934
9935	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9936	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9937
9938	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9939	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9940
9941	  else
9942	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9943
9944	case REAL_TYPE:
9945	  /* If this is a range type, consider it to be the underlying
9946	     type.  */
9947	  if (TREE_TYPE (type) != 0)
9948	    break;
9949
9950	  /* Carefully distinguish all the standard types of C,
9951	     without messing up if the language is not C.  */
9952
9953	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9954	    return (qualifiers | 6);
9955
9956	  else
9957	    return (qualifiers | 7);
9958
9959	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
9960	  /* ??? We need to distinguish between double and float complex types,
9961	     but I don't know how yet because I can't reach this code from
9962	     existing front-ends.  */
9963	  return (qualifiers | 7);	/* Who knows? */
9964
9965	case VECTOR_TYPE:
9966	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
9967	case LANG_TYPE:
9968	case NULLPTR_TYPE:
9969	  return qualifiers;
9970
9971	default:
9972	  gcc_unreachable ();		/* Not a type! */
9973        }
9974    }
9975
9976  return qualifiers;
9977}
9978
9979/* Nested function support.  */
9980
9981/* Emit RTL insns to initialize the variable parts of a trampoline.
9982   FNADDR is an RTX for the address of the function's pure code.
9983   CXT is an RTX for the static chain value for the function.
9984
9985   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9986   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9987   (to store insns).  This is a bit excessive.  Perhaps a different
9988   mechanism would be better here.
9989
9990   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
9991
9992static void
9993sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9994{
9995  /* SPARC 32-bit trampoline:
9996
9997 	sethi	%hi(fn), %g1
9998 	sethi	%hi(static), %g2
9999 	jmp	%g1+%lo(fn)
10000 	or	%g2, %lo(static), %g2
10001
10002    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
10003    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
10004   */
10005
10006  emit_move_insn
10007    (adjust_address (m_tramp, SImode, 0),
10008     expand_binop (SImode, ior_optab,
10009		   expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
10010		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
10011		   NULL_RTX, 1, OPTAB_DIRECT));
10012
10013  emit_move_insn
10014    (adjust_address (m_tramp, SImode, 4),
10015     expand_binop (SImode, ior_optab,
10016		   expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
10017		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
10018		   NULL_RTX, 1, OPTAB_DIRECT));
10019
10020  emit_move_insn
10021    (adjust_address (m_tramp, SImode, 8),
10022     expand_binop (SImode, ior_optab,
10023		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
10024		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
10025		   NULL_RTX, 1, OPTAB_DIRECT));
10026
10027  emit_move_insn
10028    (adjust_address (m_tramp, SImode, 12),
10029     expand_binop (SImode, ior_optab,
10030		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
10031		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
10032		   NULL_RTX, 1, OPTAB_DIRECT));
10033
10034  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
10035     aligned on a 16 byte boundary so one flush clears it all.  */
10036  emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
10037  if (sparc_cpu != PROCESSOR_ULTRASPARC
10038      && sparc_cpu != PROCESSOR_ULTRASPARC3
10039      && sparc_cpu != PROCESSOR_NIAGARA
10040      && sparc_cpu != PROCESSOR_NIAGARA2
10041      && sparc_cpu != PROCESSOR_NIAGARA3
10042      && sparc_cpu != PROCESSOR_NIAGARA4
10043      && sparc_cpu != PROCESSOR_NIAGARA7
10044      && sparc_cpu != PROCESSOR_M8)
10045    emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
10046
10047  /* Call __enable_execute_stack after writing onto the stack to make sure
10048     the stack address is accessible.  */
10049#ifdef HAVE_ENABLE_EXECUTE_STACK
10050  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10051                     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10052#endif
10053
10054}
10055
10056/* The 64-bit version is simpler because it makes more sense to load the
10057   values as "immediate" data out of the trampoline.  It's also easier since
10058   we can read the PC without clobbering a register.  */
10059
10060static void
10061sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10062{
10063  /* SPARC 64-bit trampoline:
10064
10065	rd	%pc, %g1
10066	ldx	[%g1+24], %g5
10067	jmp	%g5
10068	ldx	[%g1+16], %g5
10069	+16 bytes data
10070   */
10071
10072  emit_move_insn (adjust_address (m_tramp, SImode, 0),
10073		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10074  emit_move_insn (adjust_address (m_tramp, SImode, 4),
10075		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10076  emit_move_insn (adjust_address (m_tramp, SImode, 8),
10077		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10078  emit_move_insn (adjust_address (m_tramp, SImode, 12),
10079		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10080  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10081  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10082  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10083
10084  if (sparc_cpu != PROCESSOR_ULTRASPARC
10085      && sparc_cpu != PROCESSOR_ULTRASPARC3
10086      && sparc_cpu != PROCESSOR_NIAGARA
10087      && sparc_cpu != PROCESSOR_NIAGARA2
10088      && sparc_cpu != PROCESSOR_NIAGARA3
10089      && sparc_cpu != PROCESSOR_NIAGARA4
10090      && sparc_cpu != PROCESSOR_NIAGARA7
10091      && sparc_cpu != PROCESSOR_M8)
10092    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10093
10094  /* Call __enable_execute_stack after writing onto the stack to make sure
10095     the stack address is accessible.  */
10096#ifdef HAVE_ENABLE_EXECUTE_STACK
10097  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10098                     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10099#endif
10100}
10101
10102/* Worker for TARGET_TRAMPOLINE_INIT.  */
10103
10104static void
10105sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10106{
10107  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10108  cxt = force_reg (Pmode, cxt);
10109  if (TARGET_ARCH64)
10110    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10111  else
10112    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10113}
10114
10115/* Adjust the cost of a scheduling dependency.  Return the new cost of
10116   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
10117
10118static int
10119supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10120			int cost)
10121{
10122  enum attr_type insn_type;
10123
10124  if (recog_memoized (insn) < 0)
10125    return cost;
10126
10127  insn_type = get_attr_type (insn);
10128
10129  if (dep_type == 0)
10130    {
10131      /* Data dependency; DEP_INSN writes a register that INSN reads some
10132	 cycles later.  */
10133
10134      /* if a load, then the dependence must be on the memory address;
10135	 add an extra "cycle".  Note that the cost could be two cycles
10136	 if the reg was written late in an instruction group; we ca not tell
10137	 here.  */
10138      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10139	return cost + 3;
10140
10141      /* Get the delay only if the address of the store is the dependence.  */
10142      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10143	{
10144	  rtx pat = PATTERN(insn);
10145	  rtx dep_pat = PATTERN (dep_insn);
10146
10147	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10148	    return cost;  /* This should not happen!  */
10149
10150	  /* The dependency between the two instructions was on the data that
10151	     is being stored.  Assume that this implies that the address of the
10152	     store is not dependent.  */
10153	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10154	    return cost;
10155
10156	  return cost + 3;  /* An approximation.  */
10157	}
10158
10159      /* A shift instruction cannot receive its data from an instruction
10160	 in the same cycle; add a one cycle penalty.  */
10161      if (insn_type == TYPE_SHIFT)
10162	return cost + 3;   /* Split before cascade into shift.  */
10163    }
10164  else
10165    {
10166      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10167	 INSN writes some cycles later.  */
10168
10169      /* These are only significant for the fpu unit; writing a fp reg before
10170         the fpu has finished with it stalls the processor.  */
10171
10172      /* Reusing an integer register causes no problems.  */
10173      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10174	return 0;
10175    }
10176
10177  return cost;
10178}
10179
10180static int
10181hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10182			int cost)
10183{
10184  enum attr_type insn_type, dep_type;
10185  rtx pat = PATTERN(insn);
10186  rtx dep_pat = PATTERN (dep_insn);
10187
10188  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10189    return cost;
10190
10191  insn_type = get_attr_type (insn);
10192  dep_type = get_attr_type (dep_insn);
10193
10194  switch (dtype)
10195    {
10196    case 0:
10197      /* Data dependency; DEP_INSN writes a register that INSN reads some
10198	 cycles later.  */
10199
10200      switch (insn_type)
10201	{
10202	case TYPE_STORE:
10203	case TYPE_FPSTORE:
10204	  /* Get the delay iff the address of the store is the dependence.  */
10205	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10206	    return cost;
10207
10208	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10209	    return cost;
10210	  return cost + 3;
10211
10212	case TYPE_LOAD:
10213	case TYPE_SLOAD:
10214	case TYPE_FPLOAD:
10215	  /* If a load, then the dependence must be on the memory address.  If
10216	     the addresses aren't equal, then it might be a false dependency */
10217	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10218	    {
10219	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10220		  || GET_CODE (SET_DEST (dep_pat)) != MEM
10221		  || GET_CODE (SET_SRC (pat)) != MEM
10222		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10223				    XEXP (SET_SRC (pat), 0)))
10224		return cost + 2;
10225
10226	      return cost + 8;
10227	    }
10228	  break;
10229
10230	case TYPE_BRANCH:
10231	  /* Compare to branch latency is 0.  There is no benefit from
10232	     separating compare and branch.  */
10233	  if (dep_type == TYPE_COMPARE)
10234	    return 0;
10235	  /* Floating point compare to branch latency is less than
10236	     compare to conditional move.  */
10237	  if (dep_type == TYPE_FPCMP)
10238	    return cost - 1;
10239	  break;
10240	default:
10241	  break;
10242	}
10243	break;
10244
10245    case REG_DEP_ANTI:
10246      /* Anti-dependencies only penalize the fpu unit.  */
10247      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10248        return 0;
10249      break;
10250
10251    default:
10252      break;
10253    }
10254
10255  return cost;
10256}
10257
10258static int
10259leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10260		   int cost)
10261{
10262  enum attr_type insn_type, dep_type;
10263  rtx pat = PATTERN (insn);
10264  rtx dep_pat = PATTERN (dep_insn);
10265
10266  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10267    return cost;
10268
10269  insn_type = get_attr_type (insn);
10270  dep_type = get_attr_type (dep_insn);
10271
10272  switch (dtype)
10273    {
10274    case REG_DEP_TRUE:
10275      /* Data dependency; DEP_INSN writes a register that INSN reads some
10276	 cycles later.  */
10277
10278      switch (insn_type)
10279	{
10280	case TYPE_STORE:
10281	  /* Try to schedule three instructions between the store and
10282	     the ALU instruction that generated the data.  */
10283	  if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT)
10284	    {
10285	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10286		break;
10287
10288	      if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10289		return 4;
10290	    }
10291	  break;
10292	default:
10293	  break;
10294	}
10295      break;
10296    case REG_DEP_ANTI:
10297      /* Penalize anti-dependencies for FPU instructions.  */
10298      if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD)
10299	return 4;
10300      break;
10301    default:
10302      break;
10303    }
10304
10305  return cost;
10306}
10307
10308static int
10309sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10310		   unsigned int)
10311{
10312  switch (sparc_cpu)
10313    {
10314    case PROCESSOR_LEON5:
10315      cost = leon5_adjust_cost (insn, dep_type, dep, cost);
10316      break;
10317    case PROCESSOR_SUPERSPARC:
10318      cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10319      break;
10320    case PROCESSOR_HYPERSPARC:
10321    case PROCESSOR_SPARCLITE86X:
10322      cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10323      break;
10324    default:
10325      break;
10326    }
10327  return cost;
10328}
10329
10330static void
10331sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10332		  int sched_verbose ATTRIBUTE_UNUSED,
10333		  int max_ready ATTRIBUTE_UNUSED)
10334{}
10335
10336static int
10337sparc_use_sched_lookahead (void)
10338{
10339  switch (sparc_cpu)
10340    {
10341    case PROCESSOR_ULTRASPARC:
10342    case PROCESSOR_ULTRASPARC3:
10343      return 4;
10344    case PROCESSOR_SUPERSPARC:
10345    case PROCESSOR_HYPERSPARC:
10346    case PROCESSOR_SPARCLITE86X:
10347      return 3;
10348    case PROCESSOR_NIAGARA4:
10349    case PROCESSOR_NIAGARA7:
10350    case PROCESSOR_M8:
10351      return 2;
10352    case PROCESSOR_NIAGARA:
10353    case PROCESSOR_NIAGARA2:
10354    case PROCESSOR_NIAGARA3:
10355    default:
10356      return 0;
10357    }
10358}
10359
10360static int
10361sparc_issue_rate (void)
10362{
10363  switch (sparc_cpu)
10364    {
10365    case PROCESSOR_ULTRASPARC:
10366    case PROCESSOR_ULTRASPARC3:
10367    case PROCESSOR_M8:
10368      return 4;
10369    case PROCESSOR_SUPERSPARC:
10370      return 3;
10371    case PROCESSOR_HYPERSPARC:
10372    case PROCESSOR_SPARCLITE86X:
10373    case PROCESSOR_V9:
10374      /* Assume V9 processors are capable of at least dual-issue.  */
10375    case PROCESSOR_NIAGARA4:
10376    case PROCESSOR_NIAGARA7:
10377      return 2;
10378    case PROCESSOR_NIAGARA:
10379    case PROCESSOR_NIAGARA2:
10380    case PROCESSOR_NIAGARA3:
10381    default:
10382      return 1;
10383    }
10384}
10385
10386int
10387sparc_branch_cost (bool speed_p, bool predictable_p)
10388{
10389  if (!speed_p)
10390    return 2;
10391
10392  /* For pre-V9 processors we use a single value (usually 3) to take into
10393     account the potential annulling of the delay slot (which ends up being
10394     a bubble in the pipeline slot) plus a cycle to take into consideration
10395     the instruction cache effects.
10396
10397     On V9 and later processors, which have branch prediction facilities,
10398     we take into account whether the branch is (easily) predictable.  */
10399  const int cost = sparc_costs->branch_cost;
10400
10401  switch (sparc_cpu)
10402    {
10403    case PROCESSOR_V9:
10404    case PROCESSOR_ULTRASPARC:
10405    case PROCESSOR_ULTRASPARC3:
10406    case PROCESSOR_NIAGARA:
10407    case PROCESSOR_NIAGARA2:
10408    case PROCESSOR_NIAGARA3:
10409    case PROCESSOR_NIAGARA4:
10410    case PROCESSOR_NIAGARA7:
10411    case PROCESSOR_M8:
10412      return cost + (predictable_p ? 0 : 2);
10413
10414    default:
10415      return cost;
10416    }
10417}
10418
10419static int
10420set_extends (rtx_insn *insn)
10421{
10422  register rtx pat = PATTERN (insn);
10423
10424  switch (GET_CODE (SET_SRC (pat)))
10425    {
10426      /* Load and some shift instructions zero extend.  */
10427    case MEM:
10428    case ZERO_EXTEND:
10429      /* sethi clears the high bits */
10430    case HIGH:
10431      /* LO_SUM is used with sethi.  sethi cleared the high
10432	 bits and the values used with lo_sum are positive */
10433    case LO_SUM:
10434      /* Store flag stores 0 or 1 */
10435    case LT: case LTU:
10436    case GT: case GTU:
10437    case LE: case LEU:
10438    case GE: case GEU:
10439    case EQ:
10440    case NE:
10441      return 1;
10442    case AND:
10443      {
10444	rtx op0 = XEXP (SET_SRC (pat), 0);
10445	rtx op1 = XEXP (SET_SRC (pat), 1);
10446	if (GET_CODE (op1) == CONST_INT)
10447	  return INTVAL (op1) >= 0;
10448	if (GET_CODE (op0) != REG)
10449	  return 0;
10450	if (sparc_check_64 (op0, insn) == 1)
10451	  return 1;
10452	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10453      }
10454    case IOR:
10455    case XOR:
10456      {
10457	rtx op0 = XEXP (SET_SRC (pat), 0);
10458	rtx op1 = XEXP (SET_SRC (pat), 1);
10459	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10460	  return 0;
10461	if (GET_CODE (op1) == CONST_INT)
10462	  return INTVAL (op1) >= 0;
10463	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10464      }
10465    case LSHIFTRT:
10466      return GET_MODE (SET_SRC (pat)) == SImode;
10467      /* Positive integers leave the high bits zero.  */
10468    case CONST_INT:
10469      return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10470    case ASHIFTRT:
10471    case SIGN_EXTEND:
10472      return - (GET_MODE (SET_SRC (pat)) == SImode);
10473    case REG:
10474      return sparc_check_64 (SET_SRC (pat), insn);
10475    default:
10476      return 0;
10477    }
10478}
10479
10480/* We _ought_ to have only one kind per function, but...  */
10481static GTY(()) rtx sparc_addr_diff_list;
10482static GTY(()) rtx sparc_addr_list;
10483
10484void
10485sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10486{
10487  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10488  if (diff)
10489    sparc_addr_diff_list
10490      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10491  else
10492    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10493}
10494
10495static void
10496sparc_output_addr_vec (rtx vec)
10497{
10498  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10499  int idx, vlen = XVECLEN (body, 0);
10500
10501#ifdef ASM_OUTPUT_ADDR_VEC_START
10502  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10503#endif
10504
10505#ifdef ASM_OUTPUT_CASE_LABEL
10506  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10507			 NEXT_INSN (lab));
10508#else
10509  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10510#endif
10511
10512  for (idx = 0; idx < vlen; idx++)
10513    {
10514      ASM_OUTPUT_ADDR_VEC_ELT
10515	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10516    }
10517
10518#ifdef ASM_OUTPUT_ADDR_VEC_END
10519  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10520#endif
10521}
10522
10523static void
10524sparc_output_addr_diff_vec (rtx vec)
10525{
10526  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10527  rtx base = XEXP (XEXP (body, 0), 0);
10528  int idx, vlen = XVECLEN (body, 1);
10529
10530#ifdef ASM_OUTPUT_ADDR_VEC_START
10531  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10532#endif
10533
10534#ifdef ASM_OUTPUT_CASE_LABEL
10535  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10536			 NEXT_INSN (lab));
10537#else
10538  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10539#endif
10540
10541  for (idx = 0; idx < vlen; idx++)
10542    {
10543      ASM_OUTPUT_ADDR_DIFF_ELT
10544        (asm_out_file,
10545         body,
10546         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10547         CODE_LABEL_NUMBER (base));
10548    }
10549
10550#ifdef ASM_OUTPUT_ADDR_VEC_END
10551  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10552#endif
10553}
10554
10555static void
10556sparc_output_deferred_case_vectors (void)
10557{
10558  rtx t;
10559  int align;
10560
10561  if (sparc_addr_list == NULL_RTX
10562      && sparc_addr_diff_list == NULL_RTX)
10563    return;
10564
10565  /* Align to cache line in the function's code section.  */
10566  switch_to_section (current_function_section ());
10567
10568  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10569  if (align > 0)
10570    ASM_OUTPUT_ALIGN (asm_out_file, align);
10571
10572  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10573    sparc_output_addr_vec (XEXP (t, 0));
10574  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10575    sparc_output_addr_diff_vec (XEXP (t, 0));
10576
10577  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10578}
10579
10580/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10581   unknown.  Return 1 if the high bits are zero, -1 if the register is
10582   sign extended.  */
10583int
10584sparc_check_64 (rtx x, rtx_insn *insn)
10585{
10586  /* If a register is set only once it is safe to ignore insns this
10587     code does not know how to handle.  The loop will either recognize
10588     the single set and return the correct value or fail to recognize
10589     it and return 0.  */
10590  int set_once = 0;
10591  rtx y = x;
10592
10593  gcc_assert (GET_CODE (x) == REG);
10594
10595  if (GET_MODE (x) == DImode)
10596    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10597
10598  if (flag_expensive_optimizations
10599      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10600    set_once = 1;
10601
10602  if (insn == 0)
10603    {
10604      if (set_once)
10605	insn = get_last_insn_anywhere ();
10606      else
10607	return 0;
10608    }
10609
10610  while ((insn = PREV_INSN (insn)))
10611    {
10612      switch (GET_CODE (insn))
10613	{
10614	case JUMP_INSN:
10615	case NOTE:
10616	  break;
10617	case CODE_LABEL:
10618	case CALL_INSN:
10619	default:
10620	  if (! set_once)
10621	    return 0;
10622	  break;
10623	case INSN:
10624	  {
10625	    rtx pat = PATTERN (insn);
10626	    if (GET_CODE (pat) != SET)
10627	      return 0;
10628	    if (rtx_equal_p (x, SET_DEST (pat)))
10629	      return set_extends (insn);
10630	    if (y && rtx_equal_p (y, SET_DEST (pat)))
10631	      return set_extends (insn);
10632	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10633	      return 0;
10634	  }
10635	}
10636    }
10637  return 0;
10638}
10639
10640/* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
10641   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
10642
10643const char *
10644output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10645{
10646  static char asm_code[60];
10647
10648  /* The scratch register is only required when the destination
10649     register is not a 64-bit global or out register.  */
10650  if (which_alternative != 2)
10651    operands[3] = operands[0];
10652
10653  /* We can only shift by constants <= 63. */
10654  if (GET_CODE (operands[2]) == CONST_INT)
10655    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10656
10657  if (GET_CODE (operands[1]) == CONST_INT)
10658    {
10659      output_asm_insn ("mov\t%1, %3", operands);
10660    }
10661  else
10662    {
10663      output_asm_insn ("sllx\t%H1, 32, %3", operands);
10664      if (sparc_check_64 (operands[1], insn) <= 0)
10665	output_asm_insn ("srl\t%L1, 0, %L1", operands);
10666      output_asm_insn ("or\t%L1, %3, %3", operands);
10667    }
10668
10669  strcpy (asm_code, opcode);
10670
10671  if (which_alternative != 2)
10672    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10673  else
10674    return
10675      strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10676}
10677
10678/* Output rtl to increment the profiler label LABELNO
10679   for profiling a function entry.  */
10680
10681void
10682sparc_profile_hook (int labelno)
10683{
10684  char buf[32];
10685  rtx lab, fun;
10686
10687  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10688  if (NO_PROFILE_COUNTERS)
10689    {
10690      emit_library_call (fun, LCT_NORMAL, VOIDmode);
10691    }
10692  else
10693    {
10694      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10695      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10696      emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10697    }
10698}
10699
10700#ifdef TARGET_SOLARIS
10701/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
10702
10703static void
10704sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10705				     tree decl ATTRIBUTE_UNUSED)
10706{
10707  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10708    {
10709      solaris_elf_asm_comdat_section (name, flags, decl);
10710      return;
10711    }
10712
10713  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10714
10715  if (!(flags & SECTION_DEBUG))
10716    fputs (",#alloc", asm_out_file);
10717#if HAVE_GAS_SECTION_EXCLUDE
10718  if (flags & SECTION_EXCLUDE)
10719    fputs (",#exclude", asm_out_file);
10720#endif
10721  if (flags & SECTION_WRITE)
10722    fputs (",#write", asm_out_file);
10723  if (flags & SECTION_TLS)
10724    fputs (",#tls", asm_out_file);
10725  if (flags & SECTION_CODE)
10726    fputs (",#execinstr", asm_out_file);
10727
10728  if (flags & SECTION_NOTYPE)
10729    ;
10730  else if (flags & SECTION_BSS)
10731    fputs (",#nobits", asm_out_file);
10732  else
10733    fputs (",#progbits", asm_out_file);
10734
10735  fputc ('\n', asm_out_file);
10736}
10737#endif /* TARGET_SOLARIS */
10738
10739/* We do not allow indirect calls to be optimized into sibling calls.
10740
10741   We cannot use sibling calls when delayed branches are disabled
10742   because they will likely require the call delay slot to be filled.
10743
10744   Also, on SPARC 32-bit we cannot emit a sibling call when the
10745   current function returns a structure.  This is because the "unimp
10746   after call" convention would cause the callee to return to the
10747   wrong place.  The generic code already disallows cases where the
10748   function being called returns a structure.
10749
10750   It may seem strange how this last case could occur.  Usually there
10751   is code after the call which jumps to epilogue code which dumps the
10752   return value into the struct return area.  That ought to invalidate
10753   the sibling call right?  Well, in the C++ case we can end up passing
10754   the pointer to the struct return area to a constructor (which returns
10755   void) and then nothing else happens.  Such a sibling call would look
10756   valid without the added check here.
10757
10758   VxWorks PIC PLT entries require the global pointer to be initialized
10759   on entry.  We therefore can't emit sibling calls to them.  */
10760static bool
10761sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10762{
10763  return (decl
10764	  && flag_delayed_branch
10765	  && (TARGET_ARCH64 || ! cfun->returns_struct)
10766	  && !(TARGET_VXWORKS_RTP
10767	       && flag_pic
10768	       && !targetm.binds_local_p (decl)));
10769}
10770
10771/* libfunc renaming.  */
10772
10773static void
10774sparc_init_libfuncs (void)
10775{
10776  if (TARGET_ARCH32)
10777    {
10778      /* Use the subroutines that Sun's library provides for integer
10779	 multiply and divide.  The `*' prevents an underscore from
10780	 being prepended by the compiler. .umul is a little faster
10781	 than .mul.  */
10782      set_optab_libfunc (smul_optab, SImode, "*.umul");
10783      set_optab_libfunc (sdiv_optab, SImode, "*.div");
10784      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10785      set_optab_libfunc (smod_optab, SImode, "*.rem");
10786      set_optab_libfunc (umod_optab, SImode, "*.urem");
10787
10788      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
10789      set_optab_libfunc (add_optab, TFmode, "_Q_add");
10790      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10791      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10792      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10793      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10794
10795      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
10796	 is because with soft-float, the SFmode and DFmode sqrt
10797	 instructions will be absent, and the compiler will notice and
10798	 try to use the TFmode sqrt instruction for calls to the
10799	 builtin function sqrt, but this fails.  */
10800      if (TARGET_FPU)
10801	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10802
10803      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10804      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10805      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10806      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10807      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10808      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10809
10810      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
10811      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
10812      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
10813      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
10814
10815      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
10816      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
10817      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10818      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10819
10820      if (DITF_CONVERSION_LIBFUNCS)
10821	{
10822	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
10823	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
10824	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10825	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10826	}
10827
10828      if (SUN_CONVERSION_LIBFUNCS)
10829	{
10830	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10831	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10832	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10833	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10834	}
10835    }
10836  if (TARGET_ARCH64)
10837    {
10838      /* In the SPARC 64bit ABI, SImode multiply and divide functions
10839	 do not exist in the library.  Make sure the compiler does not
10840	 emit calls to them by accident.  (It should always use the
10841         hardware instructions.)  */
10842      set_optab_libfunc (smul_optab, SImode, 0);
10843      set_optab_libfunc (sdiv_optab, SImode, 0);
10844      set_optab_libfunc (udiv_optab, SImode, 0);
10845      set_optab_libfunc (smod_optab, SImode, 0);
10846      set_optab_libfunc (umod_optab, SImode, 0);
10847
10848      if (SUN_INTEGER_MULTIPLY_64)
10849	{
10850	  set_optab_libfunc (smul_optab, DImode, "__mul64");
10851	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
10852	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10853	  set_optab_libfunc (smod_optab, DImode, "__rem64");
10854	  set_optab_libfunc (umod_optab, DImode, "__urem64");
10855	}
10856
10857      if (SUN_CONVERSION_LIBFUNCS)
10858	{
10859	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10860	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10861	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10862	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10863	}
10864    }
10865}
10866
10867/* SPARC builtins.  */
10868enum sparc_builtins
10869{
10870  /* FPU builtins.  */
10871  SPARC_BUILTIN_LDFSR,
10872  SPARC_BUILTIN_STFSR,
10873
10874  /* VIS 1.0 builtins.  */
10875  SPARC_BUILTIN_FPACK16,
10876  SPARC_BUILTIN_FPACK32,
10877  SPARC_BUILTIN_FPACKFIX,
10878  SPARC_BUILTIN_FEXPAND,
10879  SPARC_BUILTIN_FPMERGE,
10880  SPARC_BUILTIN_FMUL8X16,
10881  SPARC_BUILTIN_FMUL8X16AU,
10882  SPARC_BUILTIN_FMUL8X16AL,
10883  SPARC_BUILTIN_FMUL8SUX16,
10884  SPARC_BUILTIN_FMUL8ULX16,
10885  SPARC_BUILTIN_FMULD8SUX16,
10886  SPARC_BUILTIN_FMULD8ULX16,
10887  SPARC_BUILTIN_FALIGNDATAV4HI,
10888  SPARC_BUILTIN_FALIGNDATAV8QI,
10889  SPARC_BUILTIN_FALIGNDATAV2SI,
10890  SPARC_BUILTIN_FALIGNDATADI,
10891  SPARC_BUILTIN_WRGSR,
10892  SPARC_BUILTIN_RDGSR,
10893  SPARC_BUILTIN_ALIGNADDR,
10894  SPARC_BUILTIN_ALIGNADDRL,
10895  SPARC_BUILTIN_PDIST,
10896  SPARC_BUILTIN_EDGE8,
10897  SPARC_BUILTIN_EDGE8L,
10898  SPARC_BUILTIN_EDGE16,
10899  SPARC_BUILTIN_EDGE16L,
10900  SPARC_BUILTIN_EDGE32,
10901  SPARC_BUILTIN_EDGE32L,
10902  SPARC_BUILTIN_FCMPLE16,
10903  SPARC_BUILTIN_FCMPLE32,
10904  SPARC_BUILTIN_FCMPNE16,
10905  SPARC_BUILTIN_FCMPNE32,
10906  SPARC_BUILTIN_FCMPGT16,
10907  SPARC_BUILTIN_FCMPGT32,
10908  SPARC_BUILTIN_FCMPEQ16,
10909  SPARC_BUILTIN_FCMPEQ32,
10910  SPARC_BUILTIN_FPADD16,
10911  SPARC_BUILTIN_FPADD16S,
10912  SPARC_BUILTIN_FPADD32,
10913  SPARC_BUILTIN_FPADD32S,
10914  SPARC_BUILTIN_FPSUB16,
10915  SPARC_BUILTIN_FPSUB16S,
10916  SPARC_BUILTIN_FPSUB32,
10917  SPARC_BUILTIN_FPSUB32S,
10918  SPARC_BUILTIN_ARRAY8,
10919  SPARC_BUILTIN_ARRAY16,
10920  SPARC_BUILTIN_ARRAY32,
10921
10922  /* VIS 2.0 builtins.  */
10923  SPARC_BUILTIN_EDGE8N,
10924  SPARC_BUILTIN_EDGE8LN,
10925  SPARC_BUILTIN_EDGE16N,
10926  SPARC_BUILTIN_EDGE16LN,
10927  SPARC_BUILTIN_EDGE32N,
10928  SPARC_BUILTIN_EDGE32LN,
10929  SPARC_BUILTIN_BMASK,
10930  SPARC_BUILTIN_BSHUFFLEV4HI,
10931  SPARC_BUILTIN_BSHUFFLEV8QI,
10932  SPARC_BUILTIN_BSHUFFLEV2SI,
10933  SPARC_BUILTIN_BSHUFFLEDI,
10934
10935  /* VIS 3.0 builtins.  */
10936  SPARC_BUILTIN_CMASK8,
10937  SPARC_BUILTIN_CMASK16,
10938  SPARC_BUILTIN_CMASK32,
10939  SPARC_BUILTIN_FCHKSM16,
10940  SPARC_BUILTIN_FSLL16,
10941  SPARC_BUILTIN_FSLAS16,
10942  SPARC_BUILTIN_FSRL16,
10943  SPARC_BUILTIN_FSRA16,
10944  SPARC_BUILTIN_FSLL32,
10945  SPARC_BUILTIN_FSLAS32,
10946  SPARC_BUILTIN_FSRL32,
10947  SPARC_BUILTIN_FSRA32,
10948  SPARC_BUILTIN_PDISTN,
10949  SPARC_BUILTIN_FMEAN16,
10950  SPARC_BUILTIN_FPADD64,
10951  SPARC_BUILTIN_FPSUB64,
10952  SPARC_BUILTIN_FPADDS16,
10953  SPARC_BUILTIN_FPADDS16S,
10954  SPARC_BUILTIN_FPSUBS16,
10955  SPARC_BUILTIN_FPSUBS16S,
10956  SPARC_BUILTIN_FPADDS32,
10957  SPARC_BUILTIN_FPADDS32S,
10958  SPARC_BUILTIN_FPSUBS32,
10959  SPARC_BUILTIN_FPSUBS32S,
10960  SPARC_BUILTIN_FUCMPLE8,
10961  SPARC_BUILTIN_FUCMPNE8,
10962  SPARC_BUILTIN_FUCMPGT8,
10963  SPARC_BUILTIN_FUCMPEQ8,
10964  SPARC_BUILTIN_FHADDS,
10965  SPARC_BUILTIN_FHADDD,
10966  SPARC_BUILTIN_FHSUBS,
10967  SPARC_BUILTIN_FHSUBD,
10968  SPARC_BUILTIN_FNHADDS,
10969  SPARC_BUILTIN_FNHADDD,
10970  SPARC_BUILTIN_UMULXHI,
10971  SPARC_BUILTIN_XMULX,
10972  SPARC_BUILTIN_XMULXHI,
10973
10974  /* VIS 4.0 builtins.  */
10975  SPARC_BUILTIN_FPADD8,
10976  SPARC_BUILTIN_FPADDS8,
10977  SPARC_BUILTIN_FPADDUS8,
10978  SPARC_BUILTIN_FPADDUS16,
10979  SPARC_BUILTIN_FPCMPLE8,
10980  SPARC_BUILTIN_FPCMPGT8,
10981  SPARC_BUILTIN_FPCMPULE16,
10982  SPARC_BUILTIN_FPCMPUGT16,
10983  SPARC_BUILTIN_FPCMPULE32,
10984  SPARC_BUILTIN_FPCMPUGT32,
10985  SPARC_BUILTIN_FPMAX8,
10986  SPARC_BUILTIN_FPMAX16,
10987  SPARC_BUILTIN_FPMAX32,
10988  SPARC_BUILTIN_FPMAXU8,
10989  SPARC_BUILTIN_FPMAXU16,
10990  SPARC_BUILTIN_FPMAXU32,
10991  SPARC_BUILTIN_FPMIN8,
10992  SPARC_BUILTIN_FPMIN16,
10993  SPARC_BUILTIN_FPMIN32,
10994  SPARC_BUILTIN_FPMINU8,
10995  SPARC_BUILTIN_FPMINU16,
10996  SPARC_BUILTIN_FPMINU32,
10997  SPARC_BUILTIN_FPSUB8,
10998  SPARC_BUILTIN_FPSUBS8,
10999  SPARC_BUILTIN_FPSUBUS8,
11000  SPARC_BUILTIN_FPSUBUS16,
11001
11002  /* VIS 4.0B builtins.  */
11003
11004  /* Note that all the DICTUNPACK* entries should be kept
11005     contiguous.  */
11006  SPARC_BUILTIN_FIRST_DICTUNPACK,
11007  SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
11008  SPARC_BUILTIN_DICTUNPACK16,
11009  SPARC_BUILTIN_DICTUNPACK32,
11010  SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
11011
11012  /* Note that all the FPCMP*SHL entries should be kept
11013     contiguous.  */
11014  SPARC_BUILTIN_FIRST_FPCMPSHL,
11015  SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
11016  SPARC_BUILTIN_FPCMPGT8SHL,
11017  SPARC_BUILTIN_FPCMPEQ8SHL,
11018  SPARC_BUILTIN_FPCMPNE8SHL,
11019  SPARC_BUILTIN_FPCMPLE16SHL,
11020  SPARC_BUILTIN_FPCMPGT16SHL,
11021  SPARC_BUILTIN_FPCMPEQ16SHL,
11022  SPARC_BUILTIN_FPCMPNE16SHL,
11023  SPARC_BUILTIN_FPCMPLE32SHL,
11024  SPARC_BUILTIN_FPCMPGT32SHL,
11025  SPARC_BUILTIN_FPCMPEQ32SHL,
11026  SPARC_BUILTIN_FPCMPNE32SHL,
11027  SPARC_BUILTIN_FPCMPULE8SHL,
11028  SPARC_BUILTIN_FPCMPUGT8SHL,
11029  SPARC_BUILTIN_FPCMPULE16SHL,
11030  SPARC_BUILTIN_FPCMPUGT16SHL,
11031  SPARC_BUILTIN_FPCMPULE32SHL,
11032  SPARC_BUILTIN_FPCMPUGT32SHL,
11033  SPARC_BUILTIN_FPCMPDE8SHL,
11034  SPARC_BUILTIN_FPCMPDE16SHL,
11035  SPARC_BUILTIN_FPCMPDE32SHL,
11036  SPARC_BUILTIN_FPCMPUR8SHL,
11037  SPARC_BUILTIN_FPCMPUR16SHL,
11038  SPARC_BUILTIN_FPCMPUR32SHL,
11039  SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
11040
11041  SPARC_BUILTIN_MAX
11042};
11043
11044static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
11045static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
11046
11047/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
11048   The instruction should require a constant operand of some sort.  The
11049   function prints an error if OPVAL is not valid.  */
11050
11051static int
11052check_constant_argument (enum insn_code icode, int opnum, rtx opval)
11053{
11054  if (GET_CODE (opval) != CONST_INT)
11055    {
11056      error ("%qs expects a constant argument", insn_data[icode].name);
11057      return false;
11058    }
11059
11060  if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
11061    {
11062      error ("constant argument out of range for %qs", insn_data[icode].name);
11063      return false;
11064    }
11065  return true;
11066}
11067
11068/* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
11069   function decl or NULL_TREE if the builtin was not added.  */
11070
11071static tree
11072def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
11073	     tree type)
11074{
11075  tree t
11076    = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
11077
11078  if (t)
11079    {
11080      sparc_builtins[code] = t;
11081      sparc_builtins_icode[code] = icode;
11082    }
11083
11084  return t;
11085}
11086
11087/* Likewise, but also marks the function as "const".  */
11088
11089static tree
11090def_builtin_const (const char *name, enum insn_code icode,
11091		   enum sparc_builtins code, tree type)
11092{
11093  tree t = def_builtin (name, icode, code, type);
11094
11095  if (t)
11096    TREE_READONLY (t) = 1;
11097
11098  return t;
11099}
11100
11101/* Implement the TARGET_INIT_BUILTINS target hook.
11102   Create builtin functions for special SPARC instructions.  */
11103
11104static void
11105sparc_init_builtins (void)
11106{
11107  if (TARGET_FPU)
11108    sparc_fpu_init_builtins ();
11109
11110  if (TARGET_VIS)
11111    sparc_vis_init_builtins ();
11112}
11113
11114/* Create builtin functions for FPU instructions.  */
11115
11116static void
11117sparc_fpu_init_builtins (void)
11118{
11119  tree ftype
11120    = build_function_type_list (void_type_node,
11121				build_pointer_type (unsigned_type_node), 0);
11122  def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11123	       SPARC_BUILTIN_LDFSR, ftype);
11124  def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11125	       SPARC_BUILTIN_STFSR, ftype);
11126}
11127
11128/* Create builtin functions for VIS instructions.  */
11129
11130static void
11131sparc_vis_init_builtins (void)
11132{
11133  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11134  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11135  tree v4hi = build_vector_type (intHI_type_node, 4);
11136  tree v2hi = build_vector_type (intHI_type_node, 2);
11137  tree v2si = build_vector_type (intSI_type_node, 2);
11138  tree v1si = build_vector_type (intSI_type_node, 1);
11139
11140  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11141  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11142  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11143  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11144  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11145  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11146  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11147  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11148  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11149  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11150  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11151  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11152  tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11153  tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11154  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11155							 v8qi, v8qi,
11156							 intDI_type_node, 0);
11157  tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11158						      v8qi, v8qi, 0);
11159  tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11160						      v8qi, v8qi, 0);
11161  tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11162						    intSI_type_node, 0);
11163  tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11164						    intSI_type_node, 0);
11165  tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11166						    intDI_type_node, 0);
11167  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11168						  intDI_type_node,
11169						  intDI_type_node, 0);
11170  tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11171						  intSI_type_node,
11172						  intSI_type_node, 0);
11173  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11174		        			    ptr_type_node,
11175					            intSI_type_node, 0);
11176  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11177		        			    ptr_type_node,
11178					            intDI_type_node, 0);
11179  tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11180		        			    ptr_type_node,
11181					            ptr_type_node, 0);
11182  tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11183		        			    ptr_type_node,
11184					            ptr_type_node, 0);
11185  tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11186						      v4hi, v4hi, 0);
11187  tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11188						      v2si, v2si, 0);
11189  tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11190						      v4hi, v4hi, 0);
11191  tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11192						      v2si, v2si, 0);
11193  tree void_ftype_di = build_function_type_list (void_type_node,
11194						 intDI_type_node, 0);
11195  tree di_ftype_void = build_function_type_list (intDI_type_node,
11196						 void_type_node, 0);
11197  tree void_ftype_si = build_function_type_list (void_type_node,
11198						 intSI_type_node, 0);
11199  tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11200						  float_type_node,
11201						  float_type_node, 0);
11202  tree df_ftype_df_df = build_function_type_list (double_type_node,
11203						  double_type_node,
11204						  double_type_node, 0);
11205
11206  /* Packing and expanding vectors.  */
11207  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11208	       SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11209  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11210	       SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11211  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11212	       SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11213  def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11214		     SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11215  def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11216		     SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11217
11218  /* Multiplications.  */
11219  def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11220		     SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11221  def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11222		     SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11223  def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11224		     SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11225  def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11226		     SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11227  def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11228		     SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11229  def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11230		     SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11231  def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11232		     SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11233
11234  /* Data aligning.  */
11235  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11236	       SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11237  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11238	       SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11239  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11240	       SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11241  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11242	       SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11243
11244  def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11245	       SPARC_BUILTIN_WRGSR, void_ftype_di);
11246  def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11247	       SPARC_BUILTIN_RDGSR, di_ftype_void);
11248
11249  if (TARGET_ARCH64)
11250    {
11251      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11252		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11253      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11254		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11255    }
11256  else
11257    {
11258      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11259		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11260      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11261		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11262    }
11263
11264  /* Pixel distance.  */
11265  def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11266		     SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11267
11268  /* Edge handling.  */
11269  if (TARGET_ARCH64)
11270    {
11271      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11272			 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11273      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11274			 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11275      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11276			 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11277      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11278			 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11279      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11280			 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11281      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11282			 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11283    }
11284  else
11285    {
11286      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11287			 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11288      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11289			 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11290      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11291			 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11292      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11293			 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11294      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11295			 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11296      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11297			 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11298    }
11299
11300  /* Pixel compare.  */
11301  if (TARGET_ARCH64)
11302    {
11303      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11304			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11305      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11306			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11307      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11308			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11309      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11310			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11311      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11312			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11313      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11314			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11315      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11316			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11317      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11318			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11319    }
11320  else
11321    {
11322      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11323			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11324      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11325			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11326      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11327			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11328      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11329			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11330      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11331			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11332      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11333			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11334      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11335			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11336      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11337			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11338    }
11339
11340  /* Addition and subtraction.  */
11341  def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11342		     SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11343  def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11344		     SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11345  def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11346		     SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11347  def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11348		     SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11349  def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11350		     SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11351  def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11352		     SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11353  def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11354		     SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11355  def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11356		     SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11357
11358  /* Three-dimensional array addressing.  */
11359  if (TARGET_ARCH64)
11360    {
11361      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11362			 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11363      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11364			 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11365      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11366			 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11367    }
11368  else
11369    {
11370      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11371			 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11372      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11373			 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11374      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11375			 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11376    }
11377
11378  if (TARGET_VIS2)
11379    {
11380      /* Edge handling.  */
11381      if (TARGET_ARCH64)
11382	{
11383	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11384			     SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11385	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11386			     SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11387	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11388			     SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11389	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11390			     SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11391	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11392			     SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11393	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11394			     SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11395	}
11396      else
11397	{
11398	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11399			     SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11400	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11401			     SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11402	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11403			     SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11404	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11405			     SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11406	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11407			     SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11408	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11409			     SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11410	}
11411
11412      /* Byte mask and shuffle.  */
11413      if (TARGET_ARCH64)
11414	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11415		     SPARC_BUILTIN_BMASK, di_ftype_di_di);
11416      else
11417	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11418		     SPARC_BUILTIN_BMASK, si_ftype_si_si);
11419      def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11420		   SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11421      def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11422		   SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11423      def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11424		   SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11425      def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11426		   SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11427    }
11428
11429  if (TARGET_VIS3)
11430    {
11431      if (TARGET_ARCH64)
11432	{
11433	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11434		       SPARC_BUILTIN_CMASK8, void_ftype_di);
11435	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11436		       SPARC_BUILTIN_CMASK16, void_ftype_di);
11437	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11438		       SPARC_BUILTIN_CMASK32, void_ftype_di);
11439	}
11440      else
11441	{
11442	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11443		       SPARC_BUILTIN_CMASK8, void_ftype_si);
11444	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11445		       SPARC_BUILTIN_CMASK16, void_ftype_si);
11446	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11447		       SPARC_BUILTIN_CMASK32, void_ftype_si);
11448	}
11449
11450      def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11451			 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11452
11453      def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11454			 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11455      def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11456			 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11457      def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11458			 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11459      def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11460			 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11461      def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11462			 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11463      def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11464			 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11465      def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11466			 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11467      def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11468			 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11469
11470      if (TARGET_ARCH64)
11471	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11472			   SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11473      else
11474	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11475			   SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11476
11477      def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11478			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11479      def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11480			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11481      def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11482			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11483
11484      def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11485			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11486      def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11487			 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11488      def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11489			 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11490      def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11491			 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11492      def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11493			 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11494      def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11495			 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11496      def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11497			 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11498      def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11499			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11500
11501      if (TARGET_ARCH64)
11502	{
11503	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11504			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11505	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11506			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11507	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11508			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11509	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11510			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11511	}
11512      else
11513	{
11514	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11515			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11516	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11517			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11518	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11519			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11520	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11521			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11522	}
11523
11524      def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11525			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11526      def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11527			 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11528      def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11529			 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11530      def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11531			 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11532      def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11533			 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11534      def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11535			 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11536
11537      def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11538			 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11539      def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11540			 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11541      def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11542			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11543    }
11544
11545  if (TARGET_VIS4)
11546    {
11547      def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11548			 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11549      def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11550			 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11551      def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11552			 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11553      def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11554			 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11555
11556
11557      if (TARGET_ARCH64)
11558	{
11559	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11560			     SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11561	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11562			     SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11563	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11564			     SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11565	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11566			     SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11567	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11568			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11569	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11570			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11571	}
11572      else
11573	{
11574	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11575			     SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11576	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11577			     SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11578	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11579			     SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11580	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11581			     SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11582	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11583			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11584	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11585			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11586	}
11587
11588      def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11589			 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11590      def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11591			 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11592      def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11593			 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11594      def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11595			 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11596      def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11597			 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11598      def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11599			 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11600      def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11601			 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11602      def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11603			 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11604      def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11605			 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11606      def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11607			 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11608      def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11609			 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11610      def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11611			 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11612      def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11613			 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11614      def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11615			 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11616      def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11617			 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11618      def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11619			 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11620    }
11621
11622  if (TARGET_VIS4B)
11623    {
11624      def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11625			 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11626      def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11627			 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11628      def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11629			 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11630
11631      if (TARGET_ARCH64)
11632	{
11633	  tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11634								 v8qi, v8qi,
11635								 intSI_type_node, 0);
11636	  tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11637								 v4hi, v4hi,
11638								 intSI_type_node, 0);
11639	  tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11640								 v2si, v2si,
11641								 intSI_type_node, 0);
11642
11643	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11644			     SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11645	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11646			     SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11647	  def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11648			     SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11649	  def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11650			     SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11651
11652	  def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11653			     SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11654	  def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11655			     SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11656	  def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11657			     SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11658	  def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11659			     SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11660
11661	  def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11662			     SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11663	  def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11664			     SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11665	  def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11666			     SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11667	  def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11668			     SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11669
11670
11671	  def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11672			     SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11673	  def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11674			     SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11675
11676	  def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11677			     SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11678	  def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11679			     SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11680
11681	  def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11682			     SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11683	  def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11684			     SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11685
11686	  def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11687			     SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11688	  def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11689			     SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11690	  def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11691			     SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11692
11693	  def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11694			     SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11695	  def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11696			     SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11697	  def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11698			     SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11699
11700	}
11701      else
11702	{
11703	  tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11704								 v8qi, v8qi,
11705								 intSI_type_node, 0);
11706	  tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11707								 v4hi, v4hi,
11708								 intSI_type_node, 0);
11709	  tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11710								 v2si, v2si,
11711								 intSI_type_node, 0);
11712
11713	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11714			     SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11715	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11716			     SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11717	  def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11718			     SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11719	  def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11720			     SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11721
11722	  def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11723			     SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11724	  def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11725			     SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11726	  def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11727			     SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11728	  def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11729			     SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11730
11731	  def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11732			     SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11733	  def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11734			     SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11735	  def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11736			     SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11737	  def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11738			     SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11739
11740
11741	  def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11742			     SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11743	  def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11744			     SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11745
11746	  def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11747			     SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11748	  def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11749			     SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11750
11751	  def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11752			     SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11753	  def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11754			     SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11755
11756	  def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11757			     SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11758	  def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11759			     SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11760	  def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11761			     SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11762
11763	  def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11764			     SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11765	  def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11766			     SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11767	  def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11768			     SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11769	}
11770    }
11771}
11772
11773/* Implement TARGET_BUILTIN_DECL hook.  */
11774
11775static tree
11776sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11777{
11778  if (code >= SPARC_BUILTIN_MAX)
11779    return error_mark_node;
11780
11781  return sparc_builtins[code];
11782}
11783
11784/* Implemented TARGET_EXPAND_BUILTIN hook.  */
11785
11786static rtx
11787sparc_expand_builtin (tree exp, rtx target,
11788		      rtx subtarget ATTRIBUTE_UNUSED,
11789		      machine_mode tmode ATTRIBUTE_UNUSED,
11790		      int ignore ATTRIBUTE_UNUSED)
11791{
11792  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11793  enum sparc_builtins code
11794    = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11795  enum insn_code icode = sparc_builtins_icode[code];
11796  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11797  call_expr_arg_iterator iter;
11798  int arg_count = 0;
11799  rtx pat, op[4];
11800  tree arg;
11801
11802  if (nonvoid)
11803    {
11804      machine_mode tmode = insn_data[icode].operand[0].mode;
11805      if (!target
11806	  || GET_MODE (target) != tmode
11807	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11808	op[0] = gen_reg_rtx (tmode);
11809      else
11810	op[0] = target;
11811    }
11812  else
11813    op[0] = NULL_RTX;
11814
11815  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11816    {
11817      const struct insn_operand_data *insn_op;
11818      int idx;
11819
11820      if (arg == error_mark_node)
11821	return NULL_RTX;
11822
11823      arg_count++;
11824      idx = arg_count - !nonvoid;
11825      insn_op = &insn_data[icode].operand[idx];
11826      op[arg_count] = expand_normal (arg);
11827
11828      /* Some of the builtins require constant arguments.  We check
11829	 for this here.  */
11830      if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11831	   && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11832	   && arg_count == 3)
11833	  || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11834	      && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11835	      && arg_count == 2))
11836	{
11837	  if (!check_constant_argument (icode, idx, op[arg_count]))
11838	    return const0_rtx;
11839	}
11840
11841      if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11842	{
11843	  if (!address_operand (op[arg_count], SImode))
11844	    {
11845	      op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11846	      op[arg_count] = copy_addr_to_reg (op[arg_count]);
11847	    }
11848	  op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11849	}
11850
11851      else if (insn_op->mode == V1DImode
11852	       && GET_MODE (op[arg_count]) == DImode)
11853	op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11854
11855      else if (insn_op->mode == V1SImode
11856	       && GET_MODE (op[arg_count]) == SImode)
11857	op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11858
11859      if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11860							insn_op->mode))
11861	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11862    }
11863
11864  switch (arg_count)
11865    {
11866    case 0:
11867      pat = GEN_FCN (icode) (op[0]);
11868      break;
11869    case 1:
11870      if (nonvoid)
11871	pat = GEN_FCN (icode) (op[0], op[1]);
11872      else
11873	pat = GEN_FCN (icode) (op[1]);
11874      break;
11875    case 2:
11876      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11877      break;
11878    case 3:
11879      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11880      break;
11881    default:
11882      gcc_unreachable ();
11883    }
11884
11885  if (!pat)
11886    return NULL_RTX;
11887
11888  emit_insn (pat);
11889
11890  return (nonvoid ? op[0] : const0_rtx);
11891}
11892
11893/* Return the upper 16 bits of the 8x16 multiplication.  */
11894
11895static int
11896sparc_vis_mul8x16 (int e8, int e16)
11897{
11898  return (e8 * e16 + 128) / 256;
11899}
11900
11901/* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11902   the result into the array N_ELTS, whose elements are of INNER_TYPE.  */
11903
11904static void
11905sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11906			  tree inner_type, tree cst0, tree cst1)
11907{
11908  unsigned i, num = VECTOR_CST_NELTS (cst0);
11909  int scale;
11910
11911  switch (fncode)
11912    {
11913    case SPARC_BUILTIN_FMUL8X16:
11914      for (i = 0; i < num; ++i)
11915	{
11916	  int val
11917	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11918				 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11919	  n_elts->quick_push (build_int_cst (inner_type, val));
11920	}
11921      break;
11922
11923    case SPARC_BUILTIN_FMUL8X16AU:
11924      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11925
11926      for (i = 0; i < num; ++i)
11927	{
11928	  int val
11929	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11930				 scale);
11931	  n_elts->quick_push (build_int_cst (inner_type, val));
11932	}
11933      break;
11934
11935    case SPARC_BUILTIN_FMUL8X16AL:
11936      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11937
11938      for (i = 0; i < num; ++i)
11939	{
11940	  int val
11941	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11942				 scale);
11943	  n_elts->quick_push (build_int_cst (inner_type, val));
11944	}
11945      break;
11946
11947    default:
11948      gcc_unreachable ();
11949    }
11950}
11951
11952/* Implement TARGET_FOLD_BUILTIN hook.
11953
11954   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
11955   result of the function call is ignored.  NULL_TREE is returned if the
11956   function could not be folded.  */
11957
11958static tree
11959sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11960		    tree *args, bool ignore)
11961{
11962  enum sparc_builtins code
11963    = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11964  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11965  tree arg0, arg1, arg2;
11966
11967  if (ignore)
11968    switch (code)
11969      {
11970      case SPARC_BUILTIN_LDFSR:
11971      case SPARC_BUILTIN_STFSR:
11972      case SPARC_BUILTIN_ALIGNADDR:
11973      case SPARC_BUILTIN_WRGSR:
11974      case SPARC_BUILTIN_BMASK:
11975      case SPARC_BUILTIN_CMASK8:
11976      case SPARC_BUILTIN_CMASK16:
11977      case SPARC_BUILTIN_CMASK32:
11978	break;
11979
11980      default:
11981	return build_zero_cst (rtype);
11982      }
11983
11984  switch (code)
11985    {
11986    case SPARC_BUILTIN_FEXPAND:
11987      arg0 = args[0];
11988      STRIP_NOPS (arg0);
11989
11990      if (TREE_CODE (arg0) == VECTOR_CST)
11991	{
11992	  tree inner_type = TREE_TYPE (rtype);
11993	  unsigned i;
11994
11995	  tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11996	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11997	    {
11998	      unsigned HOST_WIDE_INT val
11999		= TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
12000	      n_elts.quick_push (build_int_cst (inner_type, val << 4));
12001	    }
12002	  return n_elts.build ();
12003	}
12004      break;
12005
12006    case SPARC_BUILTIN_FMUL8X16:
12007    case SPARC_BUILTIN_FMUL8X16AU:
12008    case SPARC_BUILTIN_FMUL8X16AL:
12009      arg0 = args[0];
12010      arg1 = args[1];
12011      STRIP_NOPS (arg0);
12012      STRIP_NOPS (arg1);
12013
12014      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12015	{
12016	  tree inner_type = TREE_TYPE (rtype);
12017	  tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
12018	  sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
12019	  return n_elts.build ();
12020	}
12021      break;
12022
12023    case SPARC_BUILTIN_FPMERGE:
12024      arg0 = args[0];
12025      arg1 = args[1];
12026      STRIP_NOPS (arg0);
12027      STRIP_NOPS (arg1);
12028
12029      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12030	{
12031	  tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
12032	  unsigned i;
12033	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12034	    {
12035	      n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
12036	      n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
12037	    }
12038
12039	  return n_elts.build ();
12040	}
12041      break;
12042
12043    case SPARC_BUILTIN_PDIST:
12044    case SPARC_BUILTIN_PDISTN:
12045      arg0 = args[0];
12046      arg1 = args[1];
12047      STRIP_NOPS (arg0);
12048      STRIP_NOPS (arg1);
12049      if (code == SPARC_BUILTIN_PDIST)
12050	{
12051	  arg2 = args[2];
12052	  STRIP_NOPS (arg2);
12053	}
12054      else
12055	arg2 = integer_zero_node;
12056
12057      if (TREE_CODE (arg0) == VECTOR_CST
12058	  && TREE_CODE (arg1) == VECTOR_CST
12059	  && TREE_CODE (arg2) == INTEGER_CST)
12060	{
12061	  bool overflow = false;
12062	  widest_int result = wi::to_widest (arg2);
12063	  widest_int tmp;
12064	  unsigned i;
12065
12066	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12067	    {
12068	      tree e0 = VECTOR_CST_ELT (arg0, i);
12069	      tree e1 = VECTOR_CST_ELT (arg1, i);
12070
12071	      wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
12072
12073	      tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
12074	      tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
12075	      if (wi::neg_p (tmp))
12076		tmp = wi::neg (tmp, &neg2_ovf);
12077	      else
12078		neg2_ovf = wi::OVF_NONE;
12079	      result = wi::add (result, tmp, SIGNED, &add2_ovf);
12080	      overflow |= ((neg1_ovf != wi::OVF_NONE)
12081			   | (neg2_ovf != wi::OVF_NONE)
12082			   | (add1_ovf != wi::OVF_NONE)
12083			   | (add2_ovf != wi::OVF_NONE));
12084	    }
12085
12086	  gcc_assert (!overflow);
12087
12088	  return wide_int_to_tree (rtype, result);
12089	}
12090
12091    default:
12092      break;
12093    }
12094
12095  return NULL_TREE;
12096}
12097
12098/* ??? This duplicates information provided to the compiler by the
12099   ??? scheduler description.  Some day, teach genautomata to output
12100   ??? the latencies and then CSE will just use that.  */
12101
12102static bool
12103sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12104		 int opno ATTRIBUTE_UNUSED,
12105		 int *total, bool speed ATTRIBUTE_UNUSED)
12106{
12107  int code = GET_CODE (x);
12108  bool float_mode_p = FLOAT_MODE_P (mode);
12109
12110  switch (code)
12111    {
12112    case CONST_INT:
12113      if (SMALL_INT (x))
12114	*total = 0;
12115      else
12116	*total = 2;
12117      return true;
12118
12119    case CONST_WIDE_INT:
12120      *total = 0;
12121      if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12122	*total += 2;
12123      if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12124	*total += 2;
12125      return true;
12126
12127    case HIGH:
12128      *total = 2;
12129      return true;
12130
12131    case CONST:
12132    case LABEL_REF:
12133    case SYMBOL_REF:
12134      *total = 4;
12135      return true;
12136
12137    case CONST_DOUBLE:
12138      *total = 8;
12139      return true;
12140
12141    case MEM:
12142      /* If outer-code was a sign or zero extension, a cost
12143	 of COSTS_N_INSNS (1) was already added in.  This is
12144	 why we are subtracting it back out.  */
12145      if (outer_code == ZERO_EXTEND)
12146	{
12147	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12148	}
12149      else if (outer_code == SIGN_EXTEND)
12150	{
12151	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12152	}
12153      else if (float_mode_p)
12154	{
12155	  *total = sparc_costs->float_load;
12156	}
12157      else
12158	{
12159	  *total = sparc_costs->int_load;
12160	}
12161
12162      return true;
12163
12164    case PLUS:
12165    case MINUS:
12166      if (float_mode_p)
12167	*total = sparc_costs->float_plusminus;
12168      else
12169	*total = COSTS_N_INSNS (1);
12170      return false;
12171
12172    case FMA:
12173      {
12174	rtx sub;
12175
12176	gcc_assert (float_mode_p);
12177	*total = sparc_costs->float_mul;
12178
12179	sub = XEXP (x, 0);
12180	if (GET_CODE (sub) == NEG)
12181	  sub = XEXP (sub, 0);
12182	*total += rtx_cost (sub, mode, FMA, 0, speed);
12183
12184	sub = XEXP (x, 2);
12185	if (GET_CODE (sub) == NEG)
12186	  sub = XEXP (sub, 0);
12187	*total += rtx_cost (sub, mode, FMA, 2, speed);
12188	return true;
12189      }
12190
12191    case MULT:
12192      if (float_mode_p)
12193	*total = sparc_costs->float_mul;
12194      else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12195	*total = COSTS_N_INSNS (25);
12196      else
12197	{
12198	  int bit_cost;
12199
12200	  bit_cost = 0;
12201	  if (sparc_costs->int_mul_bit_factor)
12202	    {
12203	      int nbits;
12204
12205	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12206		{
12207		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12208		  for (nbits = 0; value != 0; value &= value - 1)
12209		    nbits++;
12210		}
12211	      else
12212		nbits = 7;
12213
12214	      if (nbits < 3)
12215		nbits = 3;
12216	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12217	      bit_cost = COSTS_N_INSNS (bit_cost);
12218	    }
12219
12220	  if (mode == DImode || !TARGET_HARD_MUL)
12221	    *total = sparc_costs->int_mulX + bit_cost;
12222	  else
12223	    *total = sparc_costs->int_mul + bit_cost;
12224	}
12225      return false;
12226
12227    case ASHIFT:
12228    case ASHIFTRT:
12229    case LSHIFTRT:
12230      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12231      return false;
12232
12233    case DIV:
12234    case UDIV:
12235    case MOD:
12236    case UMOD:
12237      if (float_mode_p)
12238	{
12239	  if (mode == DFmode)
12240	    *total = sparc_costs->float_div_df;
12241	  else
12242	    *total = sparc_costs->float_div_sf;
12243	}
12244      else
12245	{
12246	  if (mode == DImode)
12247	    *total = sparc_costs->int_divX;
12248	  else
12249	    *total = sparc_costs->int_div;
12250	}
12251      return false;
12252
12253    case NEG:
12254      if (! float_mode_p)
12255	{
12256	  *total = COSTS_N_INSNS (1);
12257	  return false;
12258	}
12259      /* FALLTHRU */
12260
12261    case ABS:
12262    case FLOAT:
12263    case UNSIGNED_FLOAT:
12264    case FIX:
12265    case UNSIGNED_FIX:
12266    case FLOAT_EXTEND:
12267    case FLOAT_TRUNCATE:
12268      *total = sparc_costs->float_move;
12269      return false;
12270
12271    case SQRT:
12272      if (mode == DFmode)
12273	*total = sparc_costs->float_sqrt_df;
12274      else
12275	*total = sparc_costs->float_sqrt_sf;
12276      return false;
12277
12278    case COMPARE:
12279      if (float_mode_p)
12280	*total = sparc_costs->float_cmp;
12281      else
12282	*total = COSTS_N_INSNS (1);
12283      return false;
12284
12285    case IF_THEN_ELSE:
12286      if (float_mode_p)
12287	*total = sparc_costs->float_cmove;
12288      else
12289	*total = sparc_costs->int_cmove;
12290      return false;
12291
12292    case IOR:
12293      /* Handle the NAND vector patterns.  */
12294      if (sparc_vector_mode_supported_p (mode)
12295	  && GET_CODE (XEXP (x, 0)) == NOT
12296	  && GET_CODE (XEXP (x, 1)) == NOT)
12297	{
12298	  *total = COSTS_N_INSNS (1);
12299	  return true;
12300	}
12301      else
12302        return false;
12303
12304    default:
12305      return false;
12306    }
12307}
12308
12309/* Return true if CLASS is either GENERAL_REGS or I64_REGS.  */
12310
12311static inline bool
12312general_or_i64_p (reg_class_t rclass)
12313{
12314  return (rclass == GENERAL_REGS || rclass == I64_REGS);
12315}
12316
12317/* Implement TARGET_REGISTER_MOVE_COST.  */
12318
12319static int
12320sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12321			  reg_class_t from, reg_class_t to)
12322{
12323  bool need_memory = false;
12324
12325  /* This helps postreload CSE to eliminate redundant comparisons.  */
12326  if (from == NO_REGS || to == NO_REGS)
12327    return 100;
12328
12329  if (from == FPCC_REGS || to == FPCC_REGS)
12330    need_memory = true;
12331  else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12332	   || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12333    {
12334      if (TARGET_VIS3)
12335	{
12336	  int size = GET_MODE_SIZE (mode);
12337	  if (size == 8 || size == 4)
12338	    {
12339	      if (! TARGET_ARCH32 || size == 4)
12340		return 4;
12341	      else
12342		return 6;
12343	    }
12344	}
12345      need_memory = true;
12346    }
12347
12348  if (need_memory)
12349    {
12350      if (sparc_cpu == PROCESSOR_ULTRASPARC
12351	  || sparc_cpu == PROCESSOR_ULTRASPARC3
12352	  || sparc_cpu == PROCESSOR_NIAGARA
12353	  || sparc_cpu == PROCESSOR_NIAGARA2
12354	  || sparc_cpu == PROCESSOR_NIAGARA3
12355	  || sparc_cpu == PROCESSOR_NIAGARA4
12356	  || sparc_cpu == PROCESSOR_NIAGARA7
12357	  || sparc_cpu == PROCESSOR_M8)
12358	return 12;
12359
12360      return 6;
12361    }
12362
12363  return 2;
12364}
12365
12366/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12367   This is achieved by means of a manual dynamic stack space allocation in
12368   the current frame.  We make the assumption that SEQ doesn't contain any
12369   function calls, with the possible exception of calls to the GOT helper.  */
12370
12371static void
12372emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12373{
12374  /* We must preserve the lowest 16 words for the register save area.  */
12375  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12376  /* We really need only 2 words of fresh stack space.  */
12377  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12378
12379  rtx slot
12380    = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12381					     SPARC_STACK_BIAS + offset));
12382
12383  emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12384  emit_insn (gen_rtx_SET (slot, reg));
12385  if (reg2)
12386    emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12387			    reg2));
12388  emit_insn (seq);
12389  if (reg2)
12390    emit_insn (gen_rtx_SET (reg2,
12391			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
12392  emit_insn (gen_rtx_SET (reg, slot));
12393  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12394}
12395
12396/* Output the assembler code for a thunk function.  THUNK_DECL is the
12397   declaration for the thunk function itself, FUNCTION is the decl for
12398   the target function.  DELTA is an immediate constant offset to be
12399   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
12400   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
12401
12402static void
12403sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12404		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12405		       tree function)
12406{
12407  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12408  rtx this_rtx, funexp;
12409  rtx_insn *insn;
12410  unsigned int int_arg_first;
12411
12412  reload_completed = 1;
12413  epilogue_completed = 1;
12414
12415  emit_note (NOTE_INSN_PROLOGUE_END);
12416
12417  if (TARGET_FLAT)
12418    {
12419      sparc_leaf_function_p = 1;
12420
12421      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12422    }
12423  else if (flag_delayed_branch)
12424    {
12425      /* We will emit a regular sibcall below, so we need to instruct
12426	 output_sibcall that we are in a leaf function.  */
12427      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12428
12429      /* This will cause final.c to invoke leaf_renumber_regs so we
12430	 must behave as if we were in a not-yet-leafified function.  */
12431      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12432    }
12433  else
12434    {
12435      /* We will emit the sibcall manually below, so we will need to
12436	 manually spill non-leaf registers.  */
12437      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12438
12439      /* We really are in a leaf function.  */
12440      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12441    }
12442
12443  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
12444     returns a structure, the structure return pointer is there instead.  */
12445  if (TARGET_ARCH64
12446      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12447    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12448  else
12449    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12450
12451  /* Add DELTA.  When possible use a plain add, otherwise load it into
12452     a register first.  */
12453  if (delta)
12454    {
12455      rtx delta_rtx = GEN_INT (delta);
12456
12457      if (! SPARC_SIMM13_P (delta))
12458	{
12459	  rtx scratch = gen_rtx_REG (Pmode, 1);
12460	  emit_move_insn (scratch, delta_rtx);
12461	  delta_rtx = scratch;
12462	}
12463
12464      /* THIS_RTX += DELTA.  */
12465      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12466    }
12467
12468  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
12469  if (vcall_offset)
12470    {
12471      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12472      rtx scratch = gen_rtx_REG (Pmode, 1);
12473
12474      gcc_assert (vcall_offset < 0);
12475
12476      /* SCRATCH = *THIS_RTX.  */
12477      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12478
12479      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
12480	 may not have any available scratch register at this point.  */
12481      if (SPARC_SIMM13_P (vcall_offset))
12482	;
12483      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
12484      else if (! fixed_regs[5]
12485	       /* The below sequence is made up of at least 2 insns,
12486		  while the default method may need only one.  */
12487	       && vcall_offset < -8192)
12488	{
12489	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
12490	  emit_move_insn (scratch2, vcall_offset_rtx);
12491	  vcall_offset_rtx = scratch2;
12492	}
12493      else
12494	{
12495	  rtx increment = GEN_INT (-4096);
12496
12497	  /* VCALL_OFFSET is a negative number whose typical range can be
12498	     estimated as -32768..0 in 32-bit mode.  In almost all cases
12499	     it is therefore cheaper to emit multiple add insns than
12500	     spilling and loading the constant into a register (at least
12501	     6 insns).  */
12502	  while (! SPARC_SIMM13_P (vcall_offset))
12503	    {
12504	      emit_insn (gen_add2_insn (scratch, increment));
12505	      vcall_offset += 4096;
12506	    }
12507	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12508	}
12509
12510      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
12511      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12512					    gen_rtx_PLUS (Pmode,
12513							  scratch,
12514							  vcall_offset_rtx)));
12515
12516      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
12517      emit_insn (gen_add2_insn (this_rtx, scratch));
12518    }
12519
12520  /* Generate a tail call to the target function.  */
12521  if (! TREE_USED (function))
12522    {
12523      assemble_external (function);
12524      TREE_USED (function) = 1;
12525    }
12526  funexp = XEXP (DECL_RTL (function), 0);
12527
12528  if (flag_delayed_branch)
12529    {
12530      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12531      insn = emit_call_insn (gen_sibcall (funexp));
12532      SIBLING_CALL_P (insn) = 1;
12533    }
12534  else
12535    {
12536      /* The hoops we have to jump through in order to generate a sibcall
12537	 without using delay slots...  */
12538      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12539
12540      if (flag_pic)
12541        {
12542	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
12543	  start_sequence ();
12544	  load_got_register ();  /* clobbers %o7 */
12545	  if (!TARGET_VXWORKS_RTP)
12546	    pic_offset_table_rtx = got_register_rtx;
12547	  scratch = sparc_legitimize_pic_address (funexp, scratch);
12548	  seq = get_insns ();
12549	  end_sequence ();
12550	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12551	}
12552      else if (TARGET_ARCH32)
12553	{
12554	  emit_insn (gen_rtx_SET (scratch,
12555				  gen_rtx_HIGH (SImode, funexp)));
12556	  emit_insn (gen_rtx_SET (scratch,
12557				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
12558	}
12559      else  /* TARGET_ARCH64 */
12560        {
12561	  switch (sparc_code_model)
12562	    {
12563	    case CM_MEDLOW:
12564	    case CM_MEDMID:
12565	      /* The destination can serve as a temporary.  */
12566	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12567	      break;
12568
12569	    case CM_MEDANY:
12570	    case CM_EMBMEDANY:
12571	      /* The destination cannot serve as a temporary.  */
12572	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
12573	      start_sequence ();
12574	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12575	      seq = get_insns ();
12576	      end_sequence ();
12577	      emit_and_preserve (seq, spill_reg, 0);
12578	      break;
12579
12580	    default:
12581	      gcc_unreachable ();
12582	    }
12583	}
12584
12585      emit_jump_insn (gen_indirect_jump (scratch));
12586    }
12587
12588  emit_barrier ();
12589
12590  /* Run just enough of rest_of_compilation to get the insns emitted.
12591     There's not really enough bulk here to make other passes such as
12592     instruction scheduling worth while.  */
12593  insn = get_insns ();
12594  shorten_branches (insn);
12595  assemble_start_function (thunk_fndecl, fnname);
12596  final_start_function (insn, file, 1);
12597  final (insn, file, 1);
12598  final_end_function ();
12599  assemble_end_function (thunk_fndecl, fnname);
12600
12601  reload_completed = 0;
12602  epilogue_completed = 0;
12603}
12604
12605/* Return true if sparc_output_mi_thunk would be able to output the
12606   assembler code for the thunk function specified by the arguments
12607   it is passed, and false otherwise.  */
12608static bool
12609sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12610			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12611			   HOST_WIDE_INT vcall_offset,
12612			   const_tree function ATTRIBUTE_UNUSED)
12613{
12614  /* Bound the loop used in the default method above.  */
12615  return (vcall_offset >= -32768 || ! fixed_regs[5]);
12616}
12617
12618/* How to allocate a 'struct machine_function'.  */
12619
12620static struct machine_function *
12621sparc_init_machine_status (void)
12622{
12623  return ggc_cleared_alloc<machine_function> ();
12624}
12625
12626/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
12627
12628static unsigned HOST_WIDE_INT
12629sparc_asan_shadow_offset (void)
12630{
12631  return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12632}
12633
12634/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12635   We need to emit DTP-relative relocations.  */
12636
12637static void
12638sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12639{
12640  switch (size)
12641    {
12642    case 4:
12643      fputs ("\t.word\t%r_tls_dtpoff32(", file);
12644      break;
12645    case 8:
12646      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12647      break;
12648    default:
12649      gcc_unreachable ();
12650    }
12651  output_addr_const (file, x);
12652  fputs (")", file);
12653}
12654
12655/* Do whatever processing is required at the end of a file.  */
12656
12657static void
12658sparc_file_end (void)
12659{
12660  /* If we need to emit the special GOT helper function, do so now.  */
12661  if (got_helper_needed)
12662    {
12663      const char *name = XSTR (got_helper_rtx, 0);
12664#ifdef DWARF2_UNWIND_INFO
12665      bool do_cfi;
12666#endif
12667
12668      if (USE_HIDDEN_LINKONCE)
12669	{
12670	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12671				  get_identifier (name),
12672				  build_function_type_list (void_type_node,
12673                                                            NULL_TREE));
12674	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12675					   NULL_TREE, void_type_node);
12676	  TREE_PUBLIC (decl) = 1;
12677	  TREE_STATIC (decl) = 1;
12678	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12679	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12680	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
12681	  resolve_unique_section (decl, 0, flag_function_sections);
12682	  allocate_struct_function (decl, true);
12683	  cfun->is_thunk = 1;
12684	  current_function_decl = decl;
12685	  init_varasm_status ();
12686	  assemble_start_function (decl, name);
12687	}
12688      else
12689	{
12690	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12691          switch_to_section (text_section);
12692	  if (align > 0)
12693	    ASM_OUTPUT_ALIGN (asm_out_file, align);
12694	  ASM_OUTPUT_LABEL (asm_out_file, name);
12695	}
12696
12697#ifdef DWARF2_UNWIND_INFO
12698      do_cfi = dwarf2out_do_cfi_asm ();
12699      if (do_cfi)
12700	output_asm_insn (".cfi_startproc", NULL);
12701#endif
12702      if (flag_delayed_branch)
12703	{
12704	  output_asm_insn ("jmp\t%%o7+8", NULL);
12705	  output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12706	}
12707      else
12708	{
12709	  output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12710	  output_asm_insn ("jmp\t%%o7+8", NULL);
12711	  output_asm_insn (" nop", NULL);
12712	}
12713#ifdef DWARF2_UNWIND_INFO
12714      if (do_cfi)
12715	output_asm_insn (".cfi_endproc", NULL);
12716#endif
12717    }
12718
12719  if (NEED_INDICATE_EXEC_STACK)
12720    file_end_indicate_exec_stack ();
12721
12722#ifdef TARGET_SOLARIS
12723  solaris_file_end ();
12724#endif
12725}
12726
12727#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12728/* Implement TARGET_MANGLE_TYPE.  */
12729
12730static const char *
12731sparc_mangle_type (const_tree type)
12732{
12733  if (TARGET_ARCH32
12734      && TYPE_MAIN_VARIANT (type) == long_double_type_node
12735      && TARGET_LONG_DOUBLE_128)
12736    return "g";
12737
12738  /* For all other types, use normal C++ mangling.  */
12739  return NULL;
12740}
12741#endif
12742
12743/* Expand a membar instruction for various use cases.  Both the LOAD_STORE
12744   and BEFORE_AFTER arguments of the form X_Y.  They are two-bit masks where
12745   bit 0 indicates that X is true, and bit 1 indicates Y is true.  */
12746
12747void
12748sparc_emit_membar_for_model (enum memmodel model,
12749			     int load_store, int before_after)
12750{
12751  /* Bits for the MEMBAR mmask field.  */
12752  const int LoadLoad = 1;
12753  const int StoreLoad = 2;
12754  const int LoadStore = 4;
12755  const int StoreStore = 8;
12756
12757  int mm = 0, implied = 0;
12758
12759  switch (sparc_memory_model)
12760    {
12761    case SMM_SC:
12762      /* Sequential Consistency.  All memory transactions are immediately
12763	 visible in sequential execution order.  No barriers needed.  */
12764      implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12765      break;
12766
12767    case SMM_TSO:
12768      /* Total Store Ordering: all memory transactions with store semantics
12769	 are followed by an implied StoreStore.  */
12770      implied |= StoreStore;
12771
12772      /* If we're not looking for a raw barrer (before+after), then atomic
12773	 operations get the benefit of being both load and store.  */
12774      if (load_store == 3 && before_after == 1)
12775	implied |= StoreLoad;
12776      /* FALLTHRU */
12777
12778    case SMM_PSO:
12779      /* Partial Store Ordering: all memory transactions with load semantics
12780	 are followed by an implied LoadLoad | LoadStore.  */
12781      implied |= LoadLoad | LoadStore;
12782
12783      /* If we're not looking for a raw barrer (before+after), then atomic
12784	 operations get the benefit of being both load and store.  */
12785      if (load_store == 3 && before_after == 2)
12786	implied |= StoreLoad | StoreStore;
12787      /* FALLTHRU */
12788
12789    case SMM_RMO:
12790      /* Relaxed Memory Ordering: no implicit bits.  */
12791      break;
12792
12793    default:
12794      gcc_unreachable ();
12795    }
12796
12797  if (before_after & 1)
12798    {
12799      if (is_mm_release (model) || is_mm_acq_rel (model)
12800	  || is_mm_seq_cst (model))
12801	{
12802	  if (load_store & 1)
12803	    mm |= LoadLoad | StoreLoad;
12804	  if (load_store & 2)
12805	    mm |= LoadStore | StoreStore;
12806	}
12807    }
12808  if (before_after & 2)
12809    {
12810      if (is_mm_acquire (model) || is_mm_acq_rel (model)
12811	  || is_mm_seq_cst (model))
12812	{
12813	  if (load_store & 1)
12814	    mm |= LoadLoad | LoadStore;
12815	  if (load_store & 2)
12816	    mm |= StoreLoad | StoreStore;
12817	}
12818    }
12819
12820  /* Remove the bits implied by the system memory model.  */
12821  mm &= ~implied;
12822
12823  /* For raw barriers (before+after), always emit a barrier.
12824     This will become a compile-time barrier if needed.  */
12825  if (mm || before_after == 3)
12826    emit_insn (gen_membar (GEN_INT (mm)));
12827}
12828
12829/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12830   compare and swap on the word containing the byte or half-word.  */
12831
12832static void
12833sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12834				  rtx oldval, rtx newval)
12835{
12836  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12837  rtx addr = gen_reg_rtx (Pmode);
12838  rtx off = gen_reg_rtx (SImode);
12839  rtx oldv = gen_reg_rtx (SImode);
12840  rtx newv = gen_reg_rtx (SImode);
12841  rtx oldvalue = gen_reg_rtx (SImode);
12842  rtx newvalue = gen_reg_rtx (SImode);
12843  rtx res = gen_reg_rtx (SImode);
12844  rtx resv = gen_reg_rtx (SImode);
12845  rtx memsi, val, mask, cc;
12846
12847  emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12848
12849  if (Pmode != SImode)
12850    addr1 = gen_lowpart (SImode, addr1);
12851  emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12852
12853  memsi = gen_rtx_MEM (SImode, addr);
12854  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12855  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12856
12857  val = copy_to_reg (memsi);
12858
12859  emit_insn (gen_rtx_SET (off,
12860			  gen_rtx_XOR (SImode, off,
12861				       GEN_INT (GET_MODE (mem) == QImode
12862						? 3 : 2))));
12863
12864  emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12865
12866  if (GET_MODE (mem) == QImode)
12867    mask = force_reg (SImode, GEN_INT (0xff));
12868  else
12869    mask = force_reg (SImode, GEN_INT (0xffff));
12870
12871  emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12872
12873  emit_insn (gen_rtx_SET (val,
12874			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12875				       val)));
12876
12877  oldval = gen_lowpart (SImode, oldval);
12878  emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12879
12880  newval = gen_lowpart_common (SImode, newval);
12881  emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12882
12883  emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12884
12885  emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12886
12887  rtx_code_label *end_label = gen_label_rtx ();
12888  rtx_code_label *loop_label = gen_label_rtx ();
12889  emit_label (loop_label);
12890
12891  emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12892
12893  emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12894
12895  emit_move_insn (bool_result, const1_rtx);
12896
12897  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12898
12899  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12900
12901  emit_insn (gen_rtx_SET (resv,
12902			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12903				       res)));
12904
12905  emit_move_insn (bool_result, const0_rtx);
12906
12907  cc = gen_compare_reg_1 (NE, resv, val);
12908  emit_insn (gen_rtx_SET (val, resv));
12909
12910  /* Use cbranchcc4 to separate the compare and branch!  */
12911  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12912				  cc, const0_rtx, loop_label));
12913
12914  emit_label (end_label);
12915
12916  emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12917
12918  emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12919
12920  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12921}
12922
12923/* Expand code to perform a compare-and-swap.  */
12924
12925void
12926sparc_expand_compare_and_swap (rtx operands[])
12927{
12928  rtx bval, retval, mem, oldval, newval;
12929  machine_mode mode;
12930  enum memmodel model;
12931
12932  bval = operands[0];
12933  retval = operands[1];
12934  mem = operands[2];
12935  oldval = operands[3];
12936  newval = operands[4];
12937  model = (enum memmodel) INTVAL (operands[6]);
12938  mode = GET_MODE (mem);
12939
12940  sparc_emit_membar_for_model (model, 3, 1);
12941
12942  if (reg_overlap_mentioned_p (retval, oldval))
12943    oldval = copy_to_reg (oldval);
12944
12945  if (mode == QImode || mode == HImode)
12946    sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12947  else
12948    {
12949      rtx (*gen) (rtx, rtx, rtx, rtx);
12950      rtx x;
12951
12952      if (mode == SImode)
12953	gen = gen_atomic_compare_and_swapsi_1;
12954      else
12955	gen = gen_atomic_compare_and_swapdi_1;
12956      emit_insn (gen (retval, mem, oldval, newval));
12957
12958      x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12959      if (x != bval)
12960	convert_move (bval, x, 1);
12961    }
12962
12963  sparc_emit_membar_for_model (model, 3, 2);
12964}
12965
12966void
12967sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12968{
12969  rtx t_1, t_2, t_3;
12970
12971  sel = gen_lowpart (DImode, sel);
12972  switch (vmode)
12973    {
12974    case E_V2SImode:
12975      /* inp = xxxxxxxAxxxxxxxB */
12976      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12977				 NULL_RTX, 1, OPTAB_DIRECT);
12978      /* t_1 = ....xxxxxxxAxxx. */
12979      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12980				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12981      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12982				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12983      /* sel = .......B */
12984      /* t_1 = ...A.... */
12985      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12986      /* sel = ...A...B */
12987      sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12988      /* sel = AAAABBBB * 4 */
12989      t_1 = force_reg (SImode, GEN_INT (0x01230123));
12990      /* sel = { A*4, A*4+1, A*4+2, ... } */
12991      break;
12992
12993    case E_V4HImode:
12994      /* inp = xxxAxxxBxxxCxxxD */
12995      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12996				 NULL_RTX, 1, OPTAB_DIRECT);
12997      t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12998				 NULL_RTX, 1, OPTAB_DIRECT);
12999      t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
13000				 NULL_RTX, 1, OPTAB_DIRECT);
13001      /* t_1 = ..xxxAxxxBxxxCxx */
13002      /* t_2 = ....xxxAxxxBxxxC */
13003      /* t_3 = ......xxxAxxxBxx */
13004      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
13005				 GEN_INT (0x07),
13006				 NULL_RTX, 1, OPTAB_DIRECT);
13007      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
13008				 GEN_INT (0x0700),
13009				 NULL_RTX, 1, OPTAB_DIRECT);
13010      t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
13011				 GEN_INT (0x070000),
13012				 NULL_RTX, 1, OPTAB_DIRECT);
13013      t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
13014				 GEN_INT (0x07000000),
13015				 NULL_RTX, 1, OPTAB_DIRECT);
13016      /* sel = .......D */
13017      /* t_1 = .....C.. */
13018      /* t_2 = ...B.... */
13019      /* t_3 = .A...... */
13020      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
13021      t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
13022      sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
13023      /* sel = .A.B.C.D */
13024      sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
13025      /* sel = AABBCCDD * 2 */
13026      t_1 = force_reg (SImode, GEN_INT (0x01010101));
13027      /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
13028      break;
13029
13030    case E_V8QImode:
13031      /* input = xAxBxCxDxExFxGxH */
13032      sel = expand_simple_binop (DImode, AND, sel,
13033				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
13034					  | 0x0f0f0f0f),
13035				 NULL_RTX, 1, OPTAB_DIRECT);
13036      /* sel = .A.B.C.D.E.F.G.H */
13037      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
13038				 NULL_RTX, 1, OPTAB_DIRECT);
13039      /* t_1 = ..A.B.C.D.E.F.G. */
13040      sel = expand_simple_binop (DImode, IOR, sel, t_1,
13041				 NULL_RTX, 1, OPTAB_DIRECT);
13042      /* sel = .AABBCCDDEEFFGGH */
13043      sel = expand_simple_binop (DImode, AND, sel,
13044				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
13045					  | 0xff00ff),
13046				 NULL_RTX, 1, OPTAB_DIRECT);
13047      /* sel = ..AB..CD..EF..GH */
13048      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
13049				 NULL_RTX, 1, OPTAB_DIRECT);
13050      /* t_1 = ....AB..CD..EF.. */
13051      sel = expand_simple_binop (DImode, IOR, sel, t_1,
13052				 NULL_RTX, 1, OPTAB_DIRECT);
13053      /* sel = ..ABABCDCDEFEFGH */
13054      sel = expand_simple_binop (DImode, AND, sel,
13055				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
13056				 NULL_RTX, 1, OPTAB_DIRECT);
13057      /* sel = ....ABCD....EFGH */
13058      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
13059				 NULL_RTX, 1, OPTAB_DIRECT);
13060      /* t_1 = ........ABCD.... */
13061      sel = gen_lowpart (SImode, sel);
13062      t_1 = gen_lowpart (SImode, t_1);
13063      break;
13064
13065    default:
13066      gcc_unreachable ();
13067    }
13068
13069  /* Always perform the final addition/merge within the bmask insn.  */
13070  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
13071}
13072
13073/* Implement TARGET_VEC_PERM_CONST.  */
13074
13075static bool
13076sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
13077				rtx op1, const vec_perm_indices &sel)
13078{
13079  if (!TARGET_VIS2)
13080    return false;
13081
13082  /* All 8-byte permutes are supported.  */
13083  if (!target)
13084    return GET_MODE_SIZE (vmode) == 8;
13085
13086  /* Force target-independent code to convert constant permutations on other
13087     modes down to V8QI.  Rely on this to avoid the complexity of the byte
13088     order of the permutation.  */
13089  if (vmode != V8QImode)
13090    return false;
13091
13092  unsigned int i, mask;
13093  for (i = mask = 0; i < 8; ++i)
13094    mask |= (sel[i] & 0xf) << (28 - i*4);
13095  rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
13096
13097  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
13098  emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
13099  return true;
13100}
13101
13102/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
13103
13104static bool
13105sparc_frame_pointer_required (void)
13106{
13107  /* If the stack pointer is dynamically modified in the function, it cannot
13108     serve as the frame pointer.  */
13109  if (cfun->calls_alloca)
13110    return true;
13111
13112  /* If the function receives nonlocal gotos, it needs to save the frame
13113     pointer in the nonlocal_goto_save_area object.  */
13114  if (cfun->has_nonlocal_label)
13115    return true;
13116
13117  /* In flat mode, that's it.  */
13118  if (TARGET_FLAT)
13119    return false;
13120
13121  /* Otherwise, the frame pointer is required if the function isn't leaf, but
13122     we cannot use sparc_leaf_function_p since it hasn't been computed yet.  */
13123  return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13124}
13125
13126/* The way this is structured, we can't eliminate SFP in favor of SP
13127   if the frame pointer is required: we want to use the SFP->HFP elimination
13128   in that case.  But the test in update_eliminables doesn't know we are
13129   assuming below that we only do the former elimination.  */
13130
13131static bool
13132sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13133{
13134  return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13135}
13136
13137/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13138   they won't be allocated.  */
13139
13140static void
13141sparc_conditional_register_usage (void)
13142{
13143  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13144    fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13145  /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13146  /* then honor it.  */
13147  if (TARGET_ARCH32 && fixed_regs[5])
13148    fixed_regs[5] = 1;
13149  else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13150    fixed_regs[5] = 0;
13151  if (! TARGET_V9)
13152    {
13153      int regno;
13154      for (regno = SPARC_FIRST_V9_FP_REG;
13155	   regno <= SPARC_LAST_V9_FP_REG;
13156	   regno++)
13157	fixed_regs[regno] = 1;
13158      /* %fcc0 is used by v8 and v9.  */
13159      for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13160	   regno <= SPARC_LAST_V9_FCC_REG;
13161	   regno++)
13162	fixed_regs[regno] = 1;
13163    }
13164  if (! TARGET_FPU)
13165    {
13166      int regno;
13167      for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13168	fixed_regs[regno] = 1;
13169    }
13170  /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13171  /* then honor it.  Likewise with g3 and g4.  */
13172  if (fixed_regs[2] == 2)
13173    fixed_regs[2] = ! TARGET_APP_REGS;
13174  if (fixed_regs[3] == 2)
13175    fixed_regs[3] = ! TARGET_APP_REGS;
13176  if (TARGET_ARCH32 && fixed_regs[4] == 2)
13177    fixed_regs[4] = ! TARGET_APP_REGS;
13178  else if (TARGET_CM_EMBMEDANY)
13179    fixed_regs[4] = 1;
13180  else if (fixed_regs[4] == 2)
13181    fixed_regs[4] = 0;
13182  if (TARGET_FLAT)
13183    {
13184      int regno;
13185      /* Disable leaf functions.  */
13186      memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13187      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13188	leaf_reg_remap [regno] = regno;
13189    }
13190  if (TARGET_VIS)
13191    global_regs[SPARC_GSR_REG] = 1;
13192}
13193
13194/* Implement TARGET_USE_PSEUDO_PIC_REG.  */
13195
13196static bool
13197sparc_use_pseudo_pic_reg (void)
13198{
13199  return !TARGET_VXWORKS_RTP && flag_pic;
13200}
13201
13202/* Implement TARGET_INIT_PIC_REG.  */
13203
13204static void
13205sparc_init_pic_reg (void)
13206{
13207  edge entry_edge;
13208  rtx_insn *seq;
13209
13210  /* In PIC mode, we need to always initialize the PIC register if optimization
13211     is enabled, because we are called from IRA and LRA may later force things
13212     to the constant pool for optimization purposes.  */
13213  if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13214    return;
13215
13216  start_sequence ();
13217  load_got_register ();
13218  if (!TARGET_VXWORKS_RTP)
13219    emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13220  seq = get_insns ();
13221  end_sequence ();
13222
13223  entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13224  insert_insn_on_edge (seq, entry_edge);
13225  commit_one_edge_insertion (entry_edge);
13226}
13227
13228/* Implement TARGET_PREFERRED_RELOAD_CLASS:
13229
13230   - We can't load constants into FP registers.
13231   - We can't load FP constants into integer registers when soft-float,
13232     because there is no soft-float pattern with a r/F constraint.
13233   - We can't load FP constants into integer registers for TFmode unless
13234     it is 0.0L, because there is no movtf pattern with a r/F constraint.
13235   - Try and reload integer constants (symbolic or otherwise) back into
13236     registers directly, rather than having them dumped to memory.  */
13237
13238static reg_class_t
13239sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13240{
13241  machine_mode mode = GET_MODE (x);
13242  if (CONSTANT_P (x))
13243    {
13244      if (FP_REG_CLASS_P (rclass)
13245	  || rclass == GENERAL_OR_FP_REGS
13246	  || rclass == GENERAL_OR_EXTRA_FP_REGS
13247	  || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13248	  || (mode == TFmode && ! const_zero_operand (x, mode)))
13249	return NO_REGS;
13250
13251      if (GET_MODE_CLASS (mode) == MODE_INT)
13252	return GENERAL_REGS;
13253
13254      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13255	{
13256	  if (! FP_REG_CLASS_P (rclass)
13257	      || !(const_zero_operand (x, mode)
13258		   || const_all_ones_operand (x, mode)))
13259	    return NO_REGS;
13260	}
13261    }
13262
13263  if (TARGET_VIS3
13264      && ! TARGET_ARCH64
13265      && (rclass == EXTRA_FP_REGS
13266	  || rclass == GENERAL_OR_EXTRA_FP_REGS))
13267    {
13268      int regno = true_regnum (x);
13269
13270      if (SPARC_INT_REG_P (regno))
13271	return (rclass == EXTRA_FP_REGS
13272		? FP_REGS : GENERAL_OR_FP_REGS);
13273    }
13274
13275  return rclass;
13276}
13277
13278/* Return true if we use LRA instead of reload pass.  */
13279
13280static bool
13281sparc_lra_p (void)
13282{
13283  return TARGET_LRA;
13284}
13285
13286/* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
13287   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
13288
13289const char *
13290output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13291{
13292  char mulstr[32];
13293
13294  gcc_assert (! TARGET_ARCH64);
13295
13296  if (sparc_check_64 (operands[1], insn) <= 0)
13297    output_asm_insn ("srl\t%L1, 0, %L1", operands);
13298  if (which_alternative == 1)
13299    output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13300  if (GET_CODE (operands[2]) == CONST_INT)
13301    {
13302      if (which_alternative == 1)
13303	{
13304	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
13305	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13306	  output_asm_insn (mulstr, operands);
13307	  return "srlx\t%L0, 32, %H0";
13308	}
13309      else
13310	{
13311	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
13312          output_asm_insn ("or\t%L1, %3, %3", operands);
13313          sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13314	  output_asm_insn (mulstr, operands);
13315	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
13316          return "mov\t%3, %L0";
13317	}
13318    }
13319  else if (rtx_equal_p (operands[1], operands[2]))
13320    {
13321      if (which_alternative == 1)
13322	{
13323	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
13324          sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13325	  output_asm_insn (mulstr, operands);
13326	  return "srlx\t%L0, 32, %H0";
13327	}
13328      else
13329	{
13330	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
13331          output_asm_insn ("or\t%L1, %3, %3", operands);
13332	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13333	  output_asm_insn (mulstr, operands);
13334	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
13335          return "mov\t%3, %L0";
13336	}
13337    }
13338  if (sparc_check_64 (operands[2], insn) <= 0)
13339    output_asm_insn ("srl\t%L2, 0, %L2", operands);
13340  if (which_alternative == 1)
13341    {
13342      output_asm_insn ("or\t%L1, %H1, %H1", operands);
13343      output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13344      output_asm_insn ("or\t%L2, %L1, %L1", operands);
13345      sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13346      output_asm_insn (mulstr, operands);
13347      return "srlx\t%L0, 32, %H0";
13348    }
13349  else
13350    {
13351      output_asm_insn ("sllx\t%H1, 32, %3", operands);
13352      output_asm_insn ("sllx\t%H2, 32, %4", operands);
13353      output_asm_insn ("or\t%L1, %3, %3", operands);
13354      output_asm_insn ("or\t%L2, %4, %4", operands);
13355      sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13356      output_asm_insn (mulstr, operands);
13357      output_asm_insn ("srlx\t%3, 32, %H0", operands);
13358      return "mov\t%3, %L0";
13359    }
13360}
13361
13362/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
13363   all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn.  MODE
13364   and INNER_MODE are the modes describing TARGET.  */
13365
13366static void
13367vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13368		      machine_mode inner_mode)
13369{
13370  rtx t1, final_insn, sel;
13371  int bmask;
13372
13373  t1 = gen_reg_rtx (mode);
13374
13375  elt = convert_modes (SImode, inner_mode, elt, true);
13376  emit_move_insn (gen_lowpart(SImode, t1), elt);
13377
13378  switch (mode)
13379    {
13380    case E_V2SImode:
13381      final_insn = gen_bshufflev2si_vis (target, t1, t1);
13382      bmask = 0x45674567;
13383      break;
13384    case E_V4HImode:
13385      final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13386      bmask = 0x67676767;
13387      break;
13388    case E_V8QImode:
13389      final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13390      bmask = 0x77777777;
13391      break;
13392    default:
13393      gcc_unreachable ();
13394    }
13395
13396  sel = force_reg (SImode, GEN_INT (bmask));
13397  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13398  emit_insn (final_insn);
13399}
13400
13401/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
13402   all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn.  */
13403
13404static void
13405vector_init_fpmerge (rtx target, rtx elt)
13406{
13407  rtx t1, t2, t2_low, t3, t3_low;
13408
13409  t1 = gen_reg_rtx (V4QImode);
13410  elt = convert_modes (SImode, QImode, elt, true);
13411  emit_move_insn (gen_lowpart (SImode, t1), elt);
13412
13413  t2 = gen_reg_rtx (V8QImode);
13414  t2_low = gen_lowpart (V4QImode, t2);
13415  emit_insn (gen_fpmerge_vis (t2, t1, t1));
13416
13417  t3 = gen_reg_rtx (V8QImode);
13418  t3_low = gen_lowpart (V4QImode, t3);
13419  emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13420
13421  emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13422}
13423
13424/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
13425   all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn.  */
13426
13427static void
13428vector_init_faligndata (rtx target, rtx elt)
13429{
13430  rtx t1 = gen_reg_rtx (V4HImode);
13431  int i;
13432
13433  elt = convert_modes (SImode, HImode, elt, true);
13434  emit_move_insn (gen_lowpart (SImode, t1), elt);
13435
13436  emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13437				  force_reg (SImode, GEN_INT (6)),
13438				  const0_rtx));
13439
13440  for (i = 0; i < 4; i++)
13441    emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13442}
13443
13444/* Emit code to initialize TARGET to values for individual fields VALS.  */
13445
13446void
13447sparc_expand_vector_init (rtx target, rtx vals)
13448{
13449  const machine_mode mode = GET_MODE (target);
13450  const machine_mode inner_mode = GET_MODE_INNER (mode);
13451  const int n_elts = GET_MODE_NUNITS (mode);
13452  int i, n_var = 0;
13453  bool all_same = true;
13454  rtx mem;
13455
13456  for (i = 0; i < n_elts; i++)
13457    {
13458      rtx x = XVECEXP (vals, 0, i);
13459      if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13460	n_var++;
13461
13462      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13463	all_same = false;
13464    }
13465
13466  if (n_var == 0)
13467    {
13468      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13469      return;
13470    }
13471
13472  if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13473    {
13474      if (GET_MODE_SIZE (inner_mode) == 4)
13475	{
13476	  emit_move_insn (gen_lowpart (SImode, target),
13477			  gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13478	  return;
13479	}
13480      else if (GET_MODE_SIZE (inner_mode) == 8)
13481	{
13482	  emit_move_insn (gen_lowpart (DImode, target),
13483			  gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13484	  return;
13485	}
13486    }
13487  else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13488	   && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13489    {
13490      emit_move_insn (gen_highpart (word_mode, target),
13491		      gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13492      emit_move_insn (gen_lowpart (word_mode, target),
13493		      gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13494      return;
13495    }
13496
13497  if (all_same && GET_MODE_SIZE (mode) == 8)
13498    {
13499      if (TARGET_VIS2)
13500	{
13501	  vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13502	  return;
13503	}
13504      if (mode == V8QImode)
13505	{
13506	  vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13507	  return;
13508	}
13509      if (mode == V4HImode)
13510	{
13511	  vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13512	  return;
13513	}
13514    }
13515
13516  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13517  for (i = 0; i < n_elts; i++)
13518    emit_move_insn (adjust_address_nv (mem, inner_mode,
13519				       i * GET_MODE_SIZE (inner_mode)),
13520		    XVECEXP (vals, 0, i));
13521  emit_move_insn (target, mem);
13522}
13523
13524/* Implement TARGET_SECONDARY_RELOAD.  */
13525
13526static reg_class_t
13527sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13528			machine_mode mode, secondary_reload_info *sri)
13529{
13530  enum reg_class rclass = (enum reg_class) rclass_i;
13531
13532  sri->icode = CODE_FOR_nothing;
13533  sri->extra_cost = 0;
13534
13535  /* We need a temporary when loading/storing a HImode/QImode value
13536     between memory and the FPU registers.  This can happen when combine puts
13537     a paradoxical subreg in a float/fix conversion insn.  */
13538  if (FP_REG_CLASS_P (rclass)
13539      && (mode == HImode || mode == QImode)
13540      && (GET_CODE (x) == MEM
13541	  || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13542	      && true_regnum (x) == -1)))
13543    return GENERAL_REGS;
13544
13545  /* On 32-bit we need a temporary when loading/storing a DFmode value
13546     between unaligned memory and the upper FPU registers.  */
13547  if (TARGET_ARCH32
13548      && rclass == EXTRA_FP_REGS
13549      && mode == DFmode
13550      && GET_CODE (x) == MEM
13551      && ! mem_min_alignment (x, 8))
13552    return FP_REGS;
13553
13554  if (((TARGET_CM_MEDANY
13555	&& symbolic_operand (x, mode))
13556       || (TARGET_CM_EMBMEDANY
13557	   && text_segment_operand (x, mode)))
13558      && ! flag_pic)
13559    {
13560      if (in_p)
13561	sri->icode = direct_optab_handler (reload_in_optab, mode);
13562      else
13563	sri->icode = direct_optab_handler (reload_out_optab, mode);
13564      return NO_REGS;
13565    }
13566
13567  if (TARGET_VIS3 && TARGET_ARCH32)
13568    {
13569      int regno = true_regnum (x);
13570
13571      /* When using VIS3 fp<-->int register moves, on 32-bit we have
13572	 to move 8-byte values in 4-byte pieces.  This only works via
13573	 FP_REGS, and not via EXTRA_FP_REGS.  Therefore if we try to
13574	 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13575	 an FP_REGS intermediate move.  */
13576      if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13577	  || ((general_or_i64_p (rclass)
13578	       || rclass == GENERAL_OR_FP_REGS)
13579	      && SPARC_FP_REG_P (regno)))
13580	{
13581	  sri->extra_cost = 2;
13582	  return FP_REGS;
13583	}
13584    }
13585
13586  return NO_REGS;
13587}
13588
13589/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13590
13591   On SPARC when not VIS3 it is not possible to directly move data
13592   between GENERAL_REGS and FP_REGS.  */
13593
13594static bool
13595sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13596			       reg_class_t class2)
13597{
13598  return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13599	  && (! TARGET_VIS3
13600	      || GET_MODE_SIZE (mode) > 8
13601	      || GET_MODE_SIZE (mode) < 4));
13602}
13603
13604/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13605
13606   get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13607   because the movsi and movsf patterns don't handle r/f moves.
13608   For v8 we copy the default definition.  */
13609
13610static machine_mode
13611sparc_secondary_memory_needed_mode (machine_mode mode)
13612{
13613  if (TARGET_ARCH64)
13614    {
13615      if (GET_MODE_BITSIZE (mode) < 32)
13616	return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13617      return mode;
13618    }
13619  else
13620    {
13621      if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13622	return mode_for_size (BITS_PER_WORD,
13623			      GET_MODE_CLASS (mode), 0).require ();
13624      return mode;
13625    }
13626}
13627
13628/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13629   OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
13630
13631bool
13632sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13633{
13634  enum rtx_code rc = GET_CODE (operands[1]);
13635  machine_mode cmp_mode;
13636  rtx cc_reg, dst, cmp;
13637
13638  cmp = operands[1];
13639  if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13640    return false;
13641
13642  if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13643    cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13644
13645  cmp_mode = GET_MODE (XEXP (cmp, 0));
13646  rc = GET_CODE (cmp);
13647
13648  dst = operands[0];
13649  if (! rtx_equal_p (operands[2], dst)
13650      && ! rtx_equal_p (operands[3], dst))
13651    {
13652      if (reg_overlap_mentioned_p (dst, cmp))
13653	dst = gen_reg_rtx (mode);
13654
13655      emit_move_insn (dst, operands[3]);
13656    }
13657  else if (operands[2] == dst)
13658    {
13659      operands[2] = operands[3];
13660
13661      if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13662        rc = reverse_condition_maybe_unordered (rc);
13663      else
13664        rc = reverse_condition (rc);
13665    }
13666
13667  if (XEXP (cmp, 1) == const0_rtx
13668      && GET_CODE (XEXP (cmp, 0)) == REG
13669      && cmp_mode == DImode
13670      && v9_regcmp_p (rc))
13671    cc_reg = XEXP (cmp, 0);
13672  else
13673    cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13674
13675  cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13676
13677  emit_insn (gen_rtx_SET (dst,
13678			  gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13679
13680  if (dst != operands[0])
13681    emit_move_insn (operands[0], dst);
13682
13683  return true;
13684}
13685
13686/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13687   into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13688   OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
13689   FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13690   code to be used for the condition mask.  */
13691
13692void
13693sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13694{
13695  rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13696  enum rtx_code code = GET_CODE (operands[3]);
13697
13698  mask = gen_reg_rtx (Pmode);
13699  cop0 = operands[4];
13700  cop1 = operands[5];
13701  if (code == LT || code == GE)
13702    {
13703      rtx t;
13704
13705      code = swap_condition (code);
13706      t = cop0; cop0 = cop1; cop1 = t;
13707    }
13708
13709  gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13710
13711  fcmp = gen_rtx_UNSPEC (Pmode,
13712			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13713			 fcode);
13714
13715  cmask = gen_rtx_UNSPEC (DImode,
13716			  gen_rtvec (2, mask, gsr),
13717			  ccode);
13718
13719  bshuf = gen_rtx_UNSPEC (mode,
13720			  gen_rtvec (3, operands[1], operands[2], gsr),
13721			  UNSPEC_BSHUFFLE);
13722
13723  emit_insn (gen_rtx_SET (mask, fcmp));
13724  emit_insn (gen_rtx_SET (gsr, cmask));
13725
13726  emit_insn (gen_rtx_SET (operands[0], bshuf));
13727}
13728
13729/* On sparc, any mode which naturally allocates into the float
13730   registers should return 4 here.  */
13731
13732unsigned int
13733sparc_regmode_natural_size (machine_mode mode)
13734{
13735  int size = UNITS_PER_WORD;
13736
13737  if (TARGET_ARCH64)
13738    {
13739      enum mode_class mclass = GET_MODE_CLASS (mode);
13740
13741      if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13742	size = 4;
13743    }
13744
13745  return size;
13746}
13747
13748/* Implement TARGET_HARD_REGNO_NREGS.
13749
13750   On SPARC, ordinary registers hold 32 bits worth; this means both
13751   integer and floating point registers.  On v9, integer regs hold 64
13752   bits worth; floating point regs hold 32 bits worth (this includes the
13753   new fp regs as even the odd ones are included in the hard register
13754   count).  */
13755
13756static unsigned int
13757sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13758{
13759  if (regno == SPARC_GSR_REG)
13760    return 1;
13761  if (TARGET_ARCH64)
13762    {
13763      if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13764	return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13765      return CEIL (GET_MODE_SIZE (mode), 4);
13766    }
13767  return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13768}
13769
13770/* Implement TARGET_HARD_REGNO_MODE_OK.
13771
13772   ??? Because of the funny way we pass parameters we should allow certain
13773   ??? types of float/complex values to be in integer registers during
13774   ??? RTL generation.  This only matters on arch32.  */
13775
13776static bool
13777sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13778{
13779  return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13780}
13781
13782/* Implement TARGET_MODES_TIEABLE_P.
13783
13784   For V9 we have to deal with the fact that only the lower 32 floating
13785   point registers are 32-bit addressable.  */
13786
13787static bool
13788sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13789{
13790  enum mode_class mclass1, mclass2;
13791  unsigned short size1, size2;
13792
13793  if (mode1 == mode2)
13794    return true;
13795
13796  mclass1 = GET_MODE_CLASS (mode1);
13797  mclass2 = GET_MODE_CLASS (mode2);
13798  if (mclass1 != mclass2)
13799    return false;
13800
13801  if (! TARGET_V9)
13802    return true;
13803
13804  /* Classes are the same and we are V9 so we have to deal with upper
13805     vs. lower floating point registers.  If one of the modes is a
13806     4-byte mode, and the other is not, we have to mark them as not
13807     tieable because only the lower 32 floating point register are
13808     addressable 32-bits at a time.
13809
13810     We can't just test explicitly for SFmode, otherwise we won't
13811     cover the vector mode cases properly.  */
13812
13813  if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13814    return true;
13815
13816  size1 = GET_MODE_SIZE (mode1);
13817  size2 = GET_MODE_SIZE (mode2);
13818  if ((size1 > 4 && size2 == 4)
13819      || (size2 > 4 && size1 == 4))
13820    return false;
13821
13822  return true;
13823}
13824
13825/* Implement TARGET_CSTORE_MODE.  */
13826
13827static scalar_int_mode
13828sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13829{
13830  return (TARGET_ARCH64 ? DImode : SImode);
13831}
13832
13833/* Return the compound expression made of T1 and T2.  */
13834
13835static inline tree
13836compound_expr (tree t1, tree t2)
13837{
13838  return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13839}
13840
13841/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
13842
13843static void
13844sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13845{
13846  if (!TARGET_FPU)
13847    return;
13848
13849  const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13850  const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13851
13852  /* We generate the equivalent of feholdexcept (&fenv_var):
13853
13854       unsigned int fenv_var;
13855       __builtin_store_fsr (&fenv_var);
13856
13857       unsigned int tmp1_var;
13858       tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13859
13860       __builtin_load_fsr (&tmp1_var);  */
13861
13862  tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13863  TREE_ADDRESSABLE (fenv_var) = 1;
13864  tree fenv_addr = build_fold_addr_expr (fenv_var);
13865  tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13866  tree hold_stfsr
13867    = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13868	      build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13869
13870  tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13871  TREE_ADDRESSABLE (tmp1_var) = 1;
13872  tree masked_fenv_var
13873    = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13874	      build_int_cst (unsigned_type_node,
13875			     ~(accrued_exception_mask | trap_enable_mask)));
13876  tree hold_mask
13877    = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13878	      NULL_TREE, NULL_TREE);
13879
13880  tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13881  tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13882  tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13883
13884  *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13885
13886  /* We reload the value of tmp1_var to clear the exceptions:
13887
13888       __builtin_load_fsr (&tmp1_var);  */
13889
13890  *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13891
13892  /* We generate the equivalent of feupdateenv (&fenv_var):
13893
13894       unsigned int tmp2_var;
13895       __builtin_store_fsr (&tmp2_var);
13896
13897       __builtin_load_fsr (&fenv_var);
13898
13899       if (SPARC_LOW_FE_EXCEPT_VALUES)
13900         tmp2_var >>= 5;
13901       __atomic_feraiseexcept ((int) tmp2_var);  */
13902
13903  tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13904  TREE_ADDRESSABLE (tmp2_var) = 1;
13905  tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13906  tree update_stfsr
13907    = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13908	      build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13909
13910  tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13911
13912  tree atomic_feraiseexcept
13913    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13914  tree update_call
13915    = build_call_expr (atomic_feraiseexcept, 1,
13916		       fold_convert (integer_type_node, tmp2_var));
13917
13918  if (SPARC_LOW_FE_EXCEPT_VALUES)
13919    {
13920      tree shifted_tmp2_var
13921	= build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13922		  build_int_cst (unsigned_type_node, 5));
13923      tree update_shift
13924	= build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13925      update_call = compound_expr (update_shift, update_call);
13926    }
13927
13928  *update
13929    = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13930}
13931
13932/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  Borrowed from the PA port.
13933
13934   SImode loads to floating-point registers are not zero-extended.
13935   The definition for LOAD_EXTEND_OP specifies that integer loads
13936   narrower than BITS_PER_WORD will be zero-extended.  As a result,
13937   we inhibit changes from SImode unless they are to a mode that is
13938   identical in size.
13939
13940   Likewise for SFmode, since word-mode paradoxical subregs are
13941   problematic on big-endian architectures.  */
13942
13943static bool
13944sparc_can_change_mode_class (machine_mode from, machine_mode to,
13945			     reg_class_t rclass)
13946{
13947  if (TARGET_ARCH64
13948      && GET_MODE_SIZE (from) == 4
13949      && GET_MODE_SIZE (to) != 4)
13950    return !reg_classes_intersect_p (rclass, FP_REGS);
13951  return true;
13952}
13953
13954/* Implement TARGET_CONSTANT_ALIGNMENT.  */
13955
13956static HOST_WIDE_INT
13957sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13958{
13959  if (TREE_CODE (exp) == STRING_CST)
13960    return MAX (align, FASTEST_ALIGNMENT);
13961  return align;
13962}
13963
13964#include "gt-sparc.h"
13965