Deleted Added
full compact
1/* Subroutines for insn-output.c for Sun SPARC.
2 Copyright (C) 1987, 88, 89, 92-98, 1999 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7This file is part of GNU CC.
8
9GNU CC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GNU CC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GNU CC; see the file COPYING. If not, write to
21the Free Software Foundation, 59 Temple Place - Suite 330,
22Boston, MA 02111-1307, USA. */
23
24#include "config.h"
25#include "system.h"
26#include "tree.h"
27#include "rtl.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "expr.h"
38#include "recog.h"
39#include "toplev.h"
40
41/* 1 if the caller has placed an "unimp" insn immediately after the call.
42 This is used in v8 code when calling a function that returns a structure.
43 v9 doesn't have this. Be careful to have this test be the same as that
44 used on the call. */
45
46#define SKIP_CALLERS_UNIMP_P \
47(!TARGET_ARCH64 && current_function_returns_struct \
48 && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))) \
49 && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) \
50 == INTEGER_CST))
51
52/* Global variables for machine-dependent things. */
53
54/* Size of frame. Need to know this to emit return insns from leaf procedures.
55 ACTUAL_FSIZE is set by compute_frame_size() which is called during the
56 reload pass. This is important as the value is later used in insn
57 scheduling (to see what can go in a delay slot).
58 APPARENT_FSIZE is the size of the stack less the register save area and less
59 the outgoing argument area. It is used when saving call preserved regs. */
60static int apparent_fsize;
61static int actual_fsize;
62
63/* Save the operands last given to a compare for use when we
64 generate a scc or bcc insn. */
65
66rtx sparc_compare_op0, sparc_compare_op1;
67
68/* We may need an epilogue if we spill too many registers.
69 If this is non-zero, then we branch here for the epilogue. */
70static rtx leaf_label;
71
72#ifdef LEAF_REGISTERS
73
74/* Vector to say how input registers are mapped to output
75 registers. FRAME_POINTER_REGNUM cannot be remapped by
76 this function to eliminate it. You must use -fomit-frame-pointer
77 to get that. */
78char leaf_reg_remap[] =
79{ 0, 1, 2, 3, 4, 5, 6, 7,
80 -1, -1, -1, -1, -1, -1, 14, -1,
81 -1, -1, -1, -1, -1, -1, -1, -1,
82 8, 9, 10, 11, 12, 13, -1, 15,
83
84 32, 33, 34, 35, 36, 37, 38, 39,
85 40, 41, 42, 43, 44, 45, 46, 47,
86 48, 49, 50, 51, 52, 53, 54, 55,
87 56, 57, 58, 59, 60, 61, 62, 63,
88 64, 65, 66, 67, 68, 69, 70, 71,
89 72, 73, 74, 75, 76, 77, 78, 79,
90 80, 81, 82, 83, 84, 85, 86, 87,
91 88, 89, 90, 91, 92, 93, 94, 95,
92 96, 97, 98, 99, 100};
93
94#endif
95
96/* Name of where we pretend to think the frame pointer points.
97 Normally, this is "%fp", but if we are in a leaf procedure,
98 this is "%sp+something". We record "something" separately as it may be
99 too big for reg+constant addressing. */
100
101static const char *frame_base_name;
102static int frame_base_offset;
103
104static rtx pic_setup_code PROTO((void));
105static void sparc_init_modes PROTO((void));
106static int save_regs PROTO((FILE *, int, int, const char *,
107 int, int, int));
108static int restore_regs PROTO((FILE *, int, int, const char *, int, int));
109static void build_big_number PROTO((FILE *, int, const char *));
110static int function_arg_slotno PROTO((const CUMULATIVE_ARGS *,
111 enum machine_mode, tree, int, int,
112 int *, int *));
113
114static int supersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
115static int hypersparc_adjust_cost PROTO((rtx, rtx, rtx, int));
116static int ultrasparc_adjust_cost PROTO((rtx, rtx, rtx, int));
117
118static void sparc_output_addr_vec PROTO((rtx));
119static void sparc_output_addr_diff_vec PROTO((rtx));
120static void sparc_output_deferred_case_vectors PROTO((void));
121
122
123#ifdef DWARF2_DEBUGGING_INFO
124extern char *dwarf2out_cfi_label ();
125#endif
126
127/* Option handling. */
128
129/* Code model option as passed by user. */
130const char *sparc_cmodel_string;
131/* Parsed value. */
132enum cmodel sparc_cmodel;
133
134/* Record alignment options as passed by user. */
135const char *sparc_align_loops_string;
136const char *sparc_align_jumps_string;
137const char *sparc_align_funcs_string;
138
139/* Parsed values, as a power of two. */
140int sparc_align_loops;
141int sparc_align_jumps;
142int sparc_align_funcs;
143
144struct sparc_cpu_select sparc_select[] =
145{
146 /* switch name, tune arch */
147 { (char *)0, "default", 1, 1 },
148 { (char *)0, "-mcpu=", 1, 1 },
149 { (char *)0, "-mtune=", 1, 0 },
150 { 0, 0, 0, 0 }
151};
152
153/* CPU type. This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx. */
154enum processor_type sparc_cpu;
155
156/* Validate and override various options, and do some machine dependent
157 initialization. */
158
159void
160sparc_override_options ()
161{
162 static struct code_model {
163 const char *name;
164 int value;
165 } cmodels[] = {
166 { "32", CM_32 },
167 { "medlow", CM_MEDLOW },
168 { "medmid", CM_MEDMID },
169 { "medany", CM_MEDANY },
170 { "embmedany", CM_EMBMEDANY },
171 { 0, 0 }
172 };
173 struct code_model *cmodel;
174 /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
175 static struct cpu_default {
176 int cpu;
177 const char *name;
178 } cpu_default[] = {
179 /* There must be one entry here for each TARGET_CPU value. */
180 { TARGET_CPU_sparc, "cypress" },
181 { TARGET_CPU_sparclet, "tsc701" },
182 { TARGET_CPU_sparclite, "f930" },
183 { TARGET_CPU_v8, "v8" },
184 { TARGET_CPU_hypersparc, "hypersparc" },
185 { TARGET_CPU_sparclite86x, "sparclite86x" },
186 { TARGET_CPU_supersparc, "supersparc" },
187 { TARGET_CPU_v9, "v9" },
188 { TARGET_CPU_ultrasparc, "ultrasparc" },
189 { 0, 0 }
190 };
191 struct cpu_default *def;
192 /* Table of values for -m{cpu,tune}=. */
193 static struct cpu_table {
194 const char *name;
195 enum processor_type processor;
196 int disable;
197 int enable;
198 } cpu_table[] = {
199 { "v7", PROCESSOR_V7, MASK_ISA, 0 },
200 { "cypress", PROCESSOR_CYPRESS, MASK_ISA, 0 },
201 { "v8", PROCESSOR_V8, MASK_ISA, MASK_V8 },
202 /* TI TMS390Z55 supersparc */
203 { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
204 { "sparclite", PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
205 /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
206 The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
207 { "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
208 { "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
209 { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
210 { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU, MASK_V8 },
211 { "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
212 /* TEMIC sparclet */
213 { "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
214 { "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
215 /* TI ultrasparc */
216 { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
217 { 0, 0, 0, 0 }
218 };
219 struct cpu_table *cpu;
220 struct sparc_cpu_select *sel;
221 int fpu;
222
223#ifndef SPARC_BI_ARCH
224 /* Check for unsupported architecture size. */
225 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
226 {
227 error ("%s is not supported by this configuration",
228 DEFAULT_ARCH32_P ? "-m64" : "-m32");
229 }
230#endif
231
232 /* At the moment we don't allow different pointer size and architecture */
233 if (! TARGET_64BIT != ! TARGET_PTR64)
234 {
235 error ("-mptr%d not allowed on -m%d",
236 TARGET_PTR64 ? 64 : 32, TARGET_64BIT ? 64 : 32);
237 if (TARGET_64BIT)
238 target_flags |= MASK_PTR64;
239 else
240 target_flags &= ~MASK_PTR64;
241 }
242
243 /* Code model selection. */
244 sparc_cmodel = SPARC_DEFAULT_CMODEL;
245
246#ifdef SPARC_BI_ARCH
247 if (TARGET_ARCH32)
248 sparc_cmodel = CM_32;
249#endif
250
251 if (sparc_cmodel_string != NULL)
252 {
253 if (TARGET_ARCH64)
254 {
255 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
256 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
257 break;
258 if (cmodel->name == NULL)
259 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
260 else
261 sparc_cmodel = cmodel->value;
262 }
263 else
264 error ("-mcmodel= is not supported on 32 bit systems");
265 }
266
267 fpu = TARGET_FPU; /* save current -mfpu status */
268
269 /* Set the default CPU. */
270 for (def = &cpu_default[0]; def->name; ++def)
271 if (def->cpu == TARGET_CPU_DEFAULT)
272 break;
273 if (! def->name)
274 abort ();
275 sparc_select[0].string = def->name;
276
277 for (sel = &sparc_select[0]; sel->name; ++sel)
278 {
279 if (sel->string)
280 {
281 for (cpu = &cpu_table[0]; cpu->name; ++cpu)
282 if (! strcmp (sel->string, cpu->name))
283 {
284 if (sel->set_tune_p)
285 sparc_cpu = cpu->processor;
286
287 if (sel->set_arch_p)
288 {
289 target_flags &= ~cpu->disable;
290 target_flags |= cpu->enable;
291 }
292 break;
293 }
294
295 if (! cpu->name)
296 error ("bad value (%s) for %s switch", sel->string, sel->name);
297 }
298 }
299
300 /* If -mfpu or -mno-fpu was explicitly used, don't override with
301 the processor default. */
302 if (TARGET_FPU_SET)
303 target_flags = (target_flags & ~MASK_FPU) | fpu;
304
305 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
306 if (TARGET_V9 && TARGET_ARCH32)
307 target_flags |= MASK_DEPRECATED_V8_INSNS;
308
309 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
310 if (! TARGET_V9 || TARGET_ARCH64)
311 target_flags &= ~MASK_V8PLUS;
312
313 /* Don't use stack biasing in 32 bit mode. */
314 if (TARGET_ARCH32)
315 target_flags &= ~MASK_STACK_BIAS;
316
317 /* Don't allow -mvis if FPU is disabled. */
318 if (! TARGET_FPU)
319 target_flags &= ~MASK_VIS;
320
321 /* Validate -malign-loops= value, or provide default. */
322 if (sparc_align_loops_string)
323 {
324 sparc_align_loops = exact_log2 (atoi (sparc_align_loops_string));
325 if (sparc_align_loops < 2 || sparc_align_loops > 7)
326 fatal ("-malign-loops=%s is not between 4 and 128 or is not a power of two",
327 sparc_align_loops_string);
328 }
329 else
330 {
331 /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
332 its 0. This sounds a bit kludgey. */
333 sparc_align_loops = 0;
334 }
335
336 /* Validate -malign-jumps= value, or provide default. */
337 if (sparc_align_jumps_string)
338 {
339 sparc_align_jumps = exact_log2 (atoi (sparc_align_jumps_string));
340 if (sparc_align_jumps < 2 || sparc_align_loops > 7)
341 fatal ("-malign-jumps=%s is not between 4 and 128 or is not a power of two",
342 sparc_align_jumps_string);
343 }
344 else
345 {
346 /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
347 its 0. This sounds a bit kludgey. */
348 sparc_align_jumps = 0;
349 }
350
351 /* Validate -malign-functions= value, or provide default. */
352 if (sparc_align_funcs_string)
353 {
354 sparc_align_funcs = exact_log2 (atoi (sparc_align_funcs_string));
355 if (sparc_align_funcs < 2 || sparc_align_loops > 7)
356 fatal ("-malign-functions=%s is not between 4 and 128 or is not a power of two",
357 sparc_align_funcs_string);
358 }
359 else
360 sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
361
362 /* Validate PCC_STRUCT_RETURN. */
363 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
364 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
365
366 /* Do various machine dependent initializations. */
367 sparc_init_modes ();
368
369 if ((profile_flag || profile_block_flag)
370 && sparc_cmodel != CM_MEDLOW)
371 {
372 error ("profiling does not support code models other than medlow");
373 }
374}
375
376/* Miscellaneous utilities. */
377
378/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
379 or branch on register contents instructions. */
380
381int
382v9_regcmp_p (code)
383 enum rtx_code code;
384{
385 return (code == EQ || code == NE || code == GE || code == LT
386 || code == LE || code == GT);
387}
388
389
390/* Operand constraints. */
391
392/* Return non-zero only if OP is a register of mode MODE,
393 or const0_rtx. Don't allow const0_rtx if TARGET_LIVE_G0 because
394 %g0 may contain anything. */
395
396int
397reg_or_0_operand (op, mode)
398 rtx op;
399 enum machine_mode mode;
400{
401 if (register_operand (op, mode))
402 return 1;
403 if (TARGET_LIVE_G0)
404 return 0;
405 if (op == const0_rtx)
406 return 1;
407 if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
408 && CONST_DOUBLE_HIGH (op) == 0
409 && CONST_DOUBLE_LOW (op) == 0)
410 return 1;
411 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
412 && GET_CODE (op) == CONST_DOUBLE
413 && fp_zero_operand (op))
414 return 1;
415 return 0;
416}
417
418/* Nonzero if OP is a floating point value with value 0.0. */
419
420int
421fp_zero_operand (op)
422 rtx op;
423{
424 REAL_VALUE_TYPE r;
425
426 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
427 return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
428}
429
430/* Nonzero if OP is an integer register. */
431
432int
433intreg_operand (op, mode)
434 rtx op;
435 enum machine_mode mode ATTRIBUTE_UNUSED;
436{
437 return (register_operand (op, SImode)
438 || (TARGET_ARCH64 && register_operand (op, DImode)));
439}
440
441/* Nonzero if OP is a floating point condition code register. */
442
443int
444fcc_reg_operand (op, mode)
445 rtx op;
446 enum machine_mode mode;
447{
448 /* This can happen when recog is called from combine. Op may be a MEM.
449 Fail instead of calling abort in this case. */
450 if (GET_CODE (op) != REG)
451 return 0;
452
453 if (mode != VOIDmode && mode != GET_MODE (op))
454 return 0;
455 if (mode == VOIDmode
456 && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
457 return 0;
458
459#if 0 /* ??? ==> 1 when %fcc0-3 are pseudos first. See gen_compare_reg(). */
460 if (reg_renumber == 0)
461 return REGNO (op) >= FIRST_PSEUDO_REGISTER;
462 return REGNO_OK_FOR_CCFP_P (REGNO (op));
463#else
464 return (unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG < 4;
465#endif
466}
467
468/* Nonzero if OP is an integer or floating point condition code register. */
469
470int
471icc_or_fcc_reg_operand (op, mode)
472 rtx op;
473 enum machine_mode mode;
474{
475 if (GET_CODE (op) == REG && REGNO (op) == SPARC_ICC_REG)
476 {
477 if (mode != VOIDmode && mode != GET_MODE (op))
478 return 0;
479 if (mode == VOIDmode
480 && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
481 return 0;
482 return 1;
483 }
484
485 return fcc_reg_operand (op, mode);
486}
487
488/* Nonzero if OP can appear as the dest of a RESTORE insn. */
489int
490restore_operand (op, mode)
491 rtx op;
492 enum machine_mode mode;
493{
494 return (GET_CODE (op) == REG && GET_MODE (op) == mode
495 && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
496}
497
498/* Call insn on SPARC can take a PC-relative constant address, or any regular
499 memory address. */
500
501int
502call_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
505{
506 if (GET_CODE (op) != MEM)
507 abort ();
508 op = XEXP (op, 0);
509 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
510}
511
512int
513call_operand_address (op, mode)
514 rtx op;
515 enum machine_mode mode;
516{
517 return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
518}
519
520/* Returns 1 if OP is either a symbol reference or a sum of a symbol
521 reference and a constant. */
522
523int
524symbolic_operand (op, mode)
525 register rtx op;
526 enum machine_mode mode;
527{
528 switch (GET_CODE (op))
529 {
530 case SYMBOL_REF:
531 case LABEL_REF:
532 return 1;
533
534 case CONST:
535 op = XEXP (op, 0);
536 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
537 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
538 && GET_CODE (XEXP (op, 1)) == CONST_INT);
539
540 /* ??? This clause seems to be irrelevant. */
541 case CONST_DOUBLE:
542 return GET_MODE (op) == mode;
543
544 default:
545 return 0;
546 }
547}
548
549/* Return truth value of statement that OP is a symbolic memory
550 operand of mode MODE. */
551
552int
553symbolic_memory_operand (op, mode)
554 rtx op;
555 enum machine_mode mode ATTRIBUTE_UNUSED;
556{
557 if (GET_CODE (op) == SUBREG)
558 op = SUBREG_REG (op);
559 if (GET_CODE (op) != MEM)
560 return 0;
561 op = XEXP (op, 0);
562 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
563 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
564}
565
566/* Return truth value of statement that OP is a LABEL_REF of mode MODE. */
567
568int
569label_ref_operand (op, mode)
570 rtx op;
571 enum machine_mode mode;
572{
573 if (GET_CODE (op) != LABEL_REF)
574 return 0;
575 if (GET_MODE (op) != mode)
576 return 0;
577 return 1;
578}
579
580/* Return 1 if the operand is an argument used in generating pic references
581 in either the medium/low or medium/anywhere code models of sparc64. */
582
583int
584sp64_medium_pic_operand (op, mode)
585 rtx op;
586 enum machine_mode mode ATTRIBUTE_UNUSED;
587{
588 /* Check for (const (minus (symbol_ref:GOT)
589 (const (minus (label) (pc))))). */
590 if (GET_CODE (op) != CONST)
591 return 0;
592 op = XEXP (op, 0);
593 if (GET_CODE (op) != MINUS)
594 return 0;
595 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
596 return 0;
597 /* ??? Ensure symbol is GOT. */
598 if (GET_CODE (XEXP (op, 1)) != CONST)
599 return 0;
600 if (GET_CODE (XEXP (XEXP (op, 1), 0)) != MINUS)
601 return 0;
602 return 1;
603}
604
605/* Return 1 if the operand is a data segment reference. This includes
606 the readonly data segment, or in other words anything but the text segment.
607 This is needed in the medium/anywhere code model on v9. These values
608 are accessed with EMBMEDANY_BASE_REG. */
609
610int
611data_segment_operand (op, mode)
612 rtx op;
613 enum machine_mode mode ATTRIBUTE_UNUSED;
614{
615 switch (GET_CODE (op))
616 {
617 case SYMBOL_REF :
618 return ! SYMBOL_REF_FLAG (op);
619 case PLUS :
620 /* Assume canonical format of symbol + constant.
621 Fall through. */
622 case CONST :
623 return data_segment_operand (XEXP (op, 0));
624 default :
625 return 0;
626 }
627}
628
629/* Return 1 if the operand is a text segment reference.
630 This is needed in the medium/anywhere code model on v9. */
631
632int
633text_segment_operand (op, mode)
634 rtx op;
635 enum machine_mode mode ATTRIBUTE_UNUSED;
636{
637 switch (GET_CODE (op))
638 {
639 case LABEL_REF :
640 return 1;
641 case SYMBOL_REF :
642 return SYMBOL_REF_FLAG (op);
643 case PLUS :
644 /* Assume canonical format of symbol + constant.
645 Fall through. */
646 case CONST :
647 return text_segment_operand (XEXP (op, 0));
648 default :
649 return 0;
650 }
651}
652
653/* Return 1 if the operand is either a register or a memory operand that is
654 not symbolic. */
655
656int
657reg_or_nonsymb_mem_operand (op, mode)
658 register rtx op;
659 enum machine_mode mode;
660{
661 if (register_operand (op, mode))
662 return 1;
663
664 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
665 return 1;
666
667 return 0;
668}
669
670int
671splittable_symbolic_memory_operand (op, mode)
672 rtx op;
673 enum machine_mode mode ATTRIBUTE_UNUSED;
674{
675 if (GET_CODE (op) != MEM)
676 return 0;
677 if (! symbolic_operand (XEXP (op, 0), Pmode))
678 return 0;
679 return 1;
680}
681
682int
683splittable_immediate_memory_operand (op, mode)
684 rtx op;
685 enum machine_mode mode ATTRIBUTE_UNUSED;
686{
687 if (GET_CODE (op) != MEM)
688 return 0;
689 if (! immediate_operand (XEXP (op, 0), Pmode))
690 return 0;
691 return 1;
692}
693
694/* Return truth value of whether OP is EQ or NE. */
695
696int
697eq_or_neq (op, mode)
698 rtx op;
699 enum machine_mode mode ATTRIBUTE_UNUSED;
700{
701 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
702}
703
704/* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
705 or LTU for non-floating-point. We handle those specially. */
706
707int
708normal_comp_operator (op, mode)
709 rtx op;
710 enum machine_mode mode ATTRIBUTE_UNUSED;
711{
712 enum rtx_code code = GET_CODE (op);
713
714 if (GET_RTX_CLASS (code) != '<')
715 return 0;
716
717 if (GET_MODE (XEXP (op, 0)) == CCFPmode
718 || GET_MODE (XEXP (op, 0)) == CCFPEmode)
719 return 1;
720
721 return (code != NE && code != EQ && code != GEU && code != LTU);
722}
723
724/* Return 1 if this is a comparison operator. This allows the use of
725 MATCH_OPERATOR to recognize all the branch insns. */
726
727int
728noov_compare_op (op, mode)
729 register rtx op;
730 enum machine_mode mode ATTRIBUTE_UNUSED;
731{
732 enum rtx_code code = GET_CODE (op);
733
734 if (GET_RTX_CLASS (code) != '<')
735 return 0;
736
737 if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
738 /* These are the only branches which work with CC_NOOVmode. */
739 return (code == EQ || code == NE || code == GE || code == LT);
740 return 1;
741}
742
743/* Nonzero if OP is a comparison operator suitable for use in v9
744 conditional move or branch on register contents instructions. */
745
746int
747v9_regcmp_op (op, mode)
748 register rtx op;
749 enum machine_mode mode ATTRIBUTE_UNUSED;
750{
751 enum rtx_code code = GET_CODE (op);
752
753 if (GET_RTX_CLASS (code) != '<')
754 return 0;
755
756 return v9_regcmp_p (code);
757}
758
759/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation. */
760
761int
762extend_op (op, mode)
763 rtx op;
764 enum machine_mode mode ATTRIBUTE_UNUSED;
765{
766 return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
767}
768
769/* Return nonzero if OP is an operator of mode MODE which can set
770 the condition codes explicitly. We do not include PLUS and MINUS
771 because these require CC_NOOVmode, which we handle explicitly. */
772
773int
774cc_arithop (op, mode)
775 rtx op;
776 enum machine_mode mode ATTRIBUTE_UNUSED;
777{
778 if (GET_CODE (op) == AND
779 || GET_CODE (op) == IOR
780 || GET_CODE (op) == XOR)
781 return 1;
782
783 return 0;
784}
785
786/* Return nonzero if OP is an operator of mode MODE which can bitwise
787 complement its second operand and set the condition codes explicitly. */
788
789int
790cc_arithopn (op, mode)
791 rtx op;
792 enum machine_mode mode ATTRIBUTE_UNUSED;
793{
794 /* XOR is not here because combine canonicalizes (xor (not ...) ...)
795 and (xor ... (not ...)) to (not (xor ...)). */
796 return (GET_CODE (op) == AND
797 || GET_CODE (op) == IOR);
798}
799
800/* Return true if OP is a register, or is a CONST_INT that can fit in a
801 signed 13 bit immediate field. This is an acceptable SImode operand for
802 most 3 address instructions. */
803
804int
805arith_operand (op, mode)
806 rtx op;
807 enum machine_mode mode;
808{
809 int val;
810 if (register_operand (op, mode))
811 return 1;
812 if (GET_CODE (op) != CONST_INT)
813 return 0;
814 val = INTVAL (op) & 0xffffffff;
815 return SPARC_SIMM13_P (val);
816}
817
818/* Return true if OP is a constant 4096 */
819
820int
821arith_4096_operand (op, mode)
822 rtx op;
823 enum machine_mode mode ATTRIBUTE_UNUSED;
824{
825 int val;
826 if (GET_CODE (op) != CONST_INT)
827 return 0;
828 val = INTVAL (op) & 0xffffffff;
829 return val == 4096;
830}
831
832/* Return true if OP is suitable as second operand for add/sub */
833
834int
835arith_add_operand (op, mode)
836 rtx op;
837 enum machine_mode mode;
838{
839 return arith_operand (op, mode) || arith_4096_operand (op, mode);
840}
841
842/* Return true if OP is a CONST_INT or a CONST_DOUBLE which can fit in the
843 immediate field of OR and XOR instructions. Used for 64-bit
844 constant formation patterns. */
845int
846const64_operand (op, mode)
847 rtx op;
848 enum machine_mode mode ATTRIBUTE_UNUSED;
849{
850 return ((GET_CODE (op) == CONST_INT
851 && SPARC_SIMM13_P (INTVAL (op)))
852#if HOST_BITS_PER_WIDE_INT != 64
853 || (GET_CODE (op) == CONST_DOUBLE
854 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
855 && (CONST_DOUBLE_HIGH (op) ==
856 ((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
857 (HOST_WIDE_INT)0xffffffff : 0)))
858#endif
859 );
860}
861
862/* The same, but only for sethi instructions. */
863int
864const64_high_operand (op, mode)
865 rtx op;
866 enum machine_mode mode ATTRIBUTE_UNUSED;
867{
868 return ((GET_CODE (op) == CONST_INT
869 && (INTVAL (op) & 0xfffffc00) != 0
870 && SPARC_SETHI_P (INTVAL (op))
871#if HOST_BITS_PER_WIDE_INT != 64
872 /* Must be positive on non-64bit host else the
873 optimizer is fooled into thinking that sethi
874 sign extends, even though it does not. */
875 && INTVAL (op) >= 0
876#endif
877 )
878 || (GET_CODE (op) == CONST_DOUBLE
879 && CONST_DOUBLE_HIGH (op) == 0
880 && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
881 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
882}
883
884/* Return true if OP is a register, or is a CONST_INT that can fit in a
885 signed 11 bit immediate field. This is an acceptable SImode operand for
886 the movcc instructions. */
887
888int
889arith11_operand (op, mode)
890 rtx op;
891 enum machine_mode mode;
892{
893 return (register_operand (op, mode)
894 || (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
895}
896
897/* Return true if OP is a register, or is a CONST_INT that can fit in a
898 signed 10 bit immediate field. This is an acceptable SImode operand for
899 the movrcc instructions. */
900
901int
902arith10_operand (op, mode)
903 rtx op;
904 enum machine_mode mode;
905{
906 return (register_operand (op, mode)
907 || (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
908}
909
910/* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
911 immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
912 immediate field.
913 v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
914 can fit in a 13 bit immediate field. This is an acceptable DImode operand
915 for most 3 address instructions. */
916
917int
918arith_double_operand (op, mode)
919 rtx op;
920 enum machine_mode mode;
921{
922 return (register_operand (op, mode)
923 || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
924 || (! TARGET_ARCH64
925 && GET_CODE (op) == CONST_DOUBLE
926 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
927 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
928 || (TARGET_ARCH64
929 && GET_CODE (op) == CONST_DOUBLE
930 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
931 && ((CONST_DOUBLE_HIGH (op) == -1
932 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
933 || (CONST_DOUBLE_HIGH (op) == 0
934 && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
935}
936
937/* Return true if OP is a constant 4096 for DImode on ARCH64 */
938
939int
940arith_double_4096_operand (op, mode)
941 rtx op;
942 enum machine_mode mode ATTRIBUTE_UNUSED;
943{
944 return (TARGET_ARCH64 &&
945 ((GET_CODE (op) == CONST_INT && INTVAL (op) == 4096) ||
946 (GET_CODE (op) == CONST_DOUBLE &&
947 CONST_DOUBLE_LOW (op) == 4096 &&
948 CONST_DOUBLE_HIGH (op) == 0)));
949}
950
951/* Return true if OP is suitable as second operand for add/sub in DImode */
952
953int
954arith_double_add_operand (op, mode)
955 rtx op;
956 enum machine_mode mode;
957{
958 return arith_double_operand (op, mode) || arith_double_4096_operand (op, mode);
959}
960
961/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
962 can fit in an 11 bit immediate field. This is an acceptable DImode
963 operand for the movcc instructions. */
964/* ??? Replace with arith11_operand? */
965
966int
967arith11_double_operand (op, mode)
968 rtx op;
969 enum machine_mode mode;
970{
971 return (register_operand (op, mode)
972 || (GET_CODE (op) == CONST_DOUBLE
973 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
974 && (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
975 && ((CONST_DOUBLE_HIGH (op) == -1
976 && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
977 || (CONST_DOUBLE_HIGH (op) == 0
978 && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
979 || (GET_CODE (op) == CONST_INT
980 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
981 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x400) < 0x800));
982}
983
984/* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
985 can fit in an 10 bit immediate field. This is an acceptable DImode
986 operand for the movrcc instructions. */
987/* ??? Replace with arith10_operand? */
988
989int
990arith10_double_operand (op, mode)
991 rtx op;
992 enum machine_mode mode;
993{
994 return (register_operand (op, mode)
995 || (GET_CODE (op) == CONST_DOUBLE
996 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
997 && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
998 && ((CONST_DOUBLE_HIGH (op) == -1
999 && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
1000 || (CONST_DOUBLE_HIGH (op) == 0
1001 && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
1002 || (GET_CODE (op) == CONST_INT
1003 && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
1004 && (unsigned HOST_WIDE_INT) (INTVAL (op) + 0x200) < 0x400));
1005}
1006
1007/* Return truth value of whether OP is a integer which fits the
1008 range constraining immediate operands in most three-address insns,
1009 which have a 13 bit immediate field. */
1010
1011int
1012small_int (op, mode)
1013 rtx op;
1014 enum machine_mode mode ATTRIBUTE_UNUSED;
1015{
1016 return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
1017}
1018
1019int
1020small_int_or_double (op, mode)
1021 rtx op;
1022 enum machine_mode mode ATTRIBUTE_UNUSED;
1023{
1024 return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
1025 || (GET_CODE (op) == CONST_DOUBLE
1026 && CONST_DOUBLE_HIGH (op) == 0
1027 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
1028}
1029
1030/* Recognize operand values for the umul instruction. That instruction sign
1031 extends immediate values just like all other sparc instructions, but
1032 interprets the extended result as an unsigned number. */
1033
1034int
1035uns_small_int (op, mode)
1036 rtx op;
1037 enum machine_mode mode ATTRIBUTE_UNUSED;
1038{
1039#if HOST_BITS_PER_WIDE_INT > 32
1040 /* All allowed constants will fit a CONST_INT. */
1041 return (GET_CODE (op) == CONST_INT
1042 && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
1043 || (INTVAL (op) >= 0xFFFFF000
1044 && INTVAL (op) < 0x100000000)));
1045#else
1046 return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
1047 || (GET_CODE (op) == CONST_DOUBLE
1048 && CONST_DOUBLE_HIGH (op) == 0
1049 && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
1050#endif
1051}
1052
1053int
1054uns_arith_operand (op, mode)
1055 rtx op;
1056 enum machine_mode mode;
1057{
1058 return register_operand (op, mode) || uns_small_int (op, mode);
1059}
1060
1061/* Return truth value of statement that OP is a call-clobbered register. */
1062int
1063clobbered_register (op, mode)
1064 rtx op;
1065 enum machine_mode mode ATTRIBUTE_UNUSED;
1066{
1067 return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
1068}
1069
1070/* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns. */
1071
1072int
1073zero_operand (op, mode)
1074 rtx op;
1075 enum machine_mode mode ATTRIBUTE_UNUSED;
1076{
1077 return op == const0_rtx;
1078}
1079
1080/* Return 1 if OP is a valid operand for the source of a move insn. */
1081
1082int
1083input_operand (op, mode)
1084 rtx op;
1085 enum machine_mode mode;
1086{
1087 /* If both modes are non-void they must be the same. */
1088 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
1089 return 0;
1090
1091 /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary. */
1092 if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
1093 return 1;
1094
1095 /* Allow any one instruction integer constant, and all CONST_INT
1096 variants when we are working in DImode and !arch64. */
1097 if (GET_MODE_CLASS (mode) == MODE_INT
1098 && ((GET_CODE (op) == CONST_INT
1099 && ((SPARC_SETHI_P (INTVAL (op))
1100 && (! TARGET_ARCH64
1101 || (INTVAL (op) >= 0)
1102 || mode == SImode))
1103 || SPARC_SIMM13_P (INTVAL (op))
1104 || (mode == DImode
1105 && ! TARGET_ARCH64)))
1106 || (TARGET_ARCH64
1107 && GET_CODE (op) == CONST_DOUBLE
1108 && ((CONST_DOUBLE_HIGH (op) == 0
1109 && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
1110 ||
1111#if HOST_BITS_PER_WIDE_INT == 64
1112 (CONST_DOUBLE_HIGH (op) == 0
1113 && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
1114#else
1115 (SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
1116 && (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
1117 && CONST_DOUBLE_HIGH (op) == 0)
1118 || (CONST_DOUBLE_HIGH (op) == -1)))
1119#endif
1120 ))))
1121 return 1;
1122
1123 /* If !arch64 and this is a DImode const, allow it so that
1124 the splits can be generated. */
1125 if (! TARGET_ARCH64
1126 && mode == DImode
1127 && GET_CODE (op) == CONST_DOUBLE)
1128 return 1;
1129
1130 if (register_operand (op, mode))
1131 return 1;
1132
1133 /* If this is a SUBREG, look inside so that we handle
1134 paradoxical ones. */
1135 if (GET_CODE (op) == SUBREG)
1136 op = SUBREG_REG (op);
1137
1138 /* Check for valid MEM forms. */
1139 if (GET_CODE (op) == MEM)
1140 {
1141 rtx inside = XEXP (op, 0);
1142
1143 if (GET_CODE (inside) == LO_SUM)
1144 {
1145 /* We can't allow these because all of the splits
1146 (eventually as they trickle down into DFmode
1147 splits) require offsettable memory references. */
1148 if (! TARGET_V9
1149 && GET_MODE (op) == TFmode)
1150 return 0;
1151
1152 return (register_operand (XEXP (inside, 0), Pmode)
1153 && CONSTANT_P (XEXP (inside, 1)));
1154 }
1155 return memory_address_p (mode, inside);
1156 }
1157
1158 return 0;
1159}
1160
1161
1162/* We know it can't be done in one insn when we get here,
1163 the movsi expander guarentees this. */
1164void
1165sparc_emit_set_const32 (op0, op1)
1166 rtx op0;
1167 rtx op1;
1168{
1169 enum machine_mode mode = GET_MODE (op0);
1170 rtx temp;
1171
1172 if (GET_CODE (op1) == CONST_INT)
1173 {
1174 HOST_WIDE_INT value = INTVAL (op1);
1175
1176 if (SPARC_SETHI_P (value)
1177 || SPARC_SIMM13_P (value))
1178 abort ();
1179 }
1180
1181 /* Full 2-insn decomposition is needed. */
1182 if (reload_in_progress || reload_completed)
1183 temp = op0;
1184 else
1185 temp = gen_reg_rtx (mode);
1186
1187 if (GET_CODE (op1) == CONST_INT)
1188 {
1189 /* Emit them as real moves instead of a HIGH/LO_SUM,
1190 this way CSE can see everything and reuse intermediate
1191 values if it wants. */
1192 if (TARGET_ARCH64
1193 && HOST_BITS_PER_WIDE_INT != 64
1194 && (INTVAL (op1) & 0x80000000) != 0)
1195 {
1196 emit_insn (gen_rtx_SET (VOIDmode,
1197 temp,
1198 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx,
1199 INTVAL (op1) & 0xfffffc00, 0)));
1200 }
1201 else
1202 {
1203 emit_insn (gen_rtx_SET (VOIDmode,
1204 temp,
1205 GEN_INT (INTVAL (op1) & 0xfffffc00)));
1206 }
1207 emit_insn (gen_rtx_SET (VOIDmode,
1208 op0,
1209 gen_rtx_IOR (mode,
1210 temp,
1211 GEN_INT (INTVAL (op1) & 0x3ff))));
1212 }
1213 else
1214 {
1215 /* A symbol, emit in the traditional way. */
1216 emit_insn (gen_rtx_SET (VOIDmode,
1217 temp,
1218 gen_rtx_HIGH (mode,
1219 op1)));
1220 emit_insn (gen_rtx_SET (VOIDmode,
1221 op0,
1222 gen_rtx_LO_SUM (mode,
1223 temp,
1224 op1)));
1225
1226 }
1227}
1228
1229
1230/* Sparc-v9 code-model support. */
1231void
1232sparc_emit_set_symbolic_const64 (op0, op1, temp1)
1233 rtx op0;
1234 rtx op1;
1235 rtx temp1;
1236{
1237 switch (sparc_cmodel)
1238 {
1239 case CM_MEDLOW:
1240 /* The range spanned by all instructions in the object is less
1241 than 2^31 bytes (2GB) and the distance from any instruction
1242 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1243 than 2^31 bytes (2GB).
1244
1245 The executable must be in the low 4TB of the virtual address
1246 space.
1247
1248 sethi %hi(symbol), %temp
1249 or %temp, %lo(symbol), %reg */
1250 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1251 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1252 break;
1253
1254 case CM_MEDMID:
1255 /* The range spanned by all instructions in the object is less
1256 than 2^31 bytes (2GB) and the distance from any instruction
1257 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1258 than 2^31 bytes (2GB).
1259
1260 The executable must be in the low 16TB of the virtual address
1261 space.
1262
1263 sethi %h44(symbol), %temp1
1264 or %temp1, %m44(symbol), %temp2
1265 sllx %temp2, 12, %temp3
1266 or %temp3, %l44(symbol), %reg */
1267 emit_insn (gen_seth44 (op0, op1));
1268 emit_insn (gen_setm44 (op0, op0, op1));
1269 emit_insn (gen_rtx_SET (VOIDmode, temp1,
1270 gen_rtx_ASHIFT (DImode, op0, GEN_INT (12))));
1271 emit_insn (gen_setl44 (op0, temp1, op1));
1272 break;
1273
1274 case CM_MEDANY:
1275 /* The range spanned by all instructions in the object is less
1276 than 2^31 bytes (2GB) and the distance from any instruction
1277 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1278 than 2^31 bytes (2GB).
1279
1280 The executable can be placed anywhere in the virtual address
1281 space.
1282
1283 sethi %hh(symbol), %temp1
1284 sethi %lm(symbol), %temp2
1285 or %temp1, %hm(symbol), %temp3
1286 or %temp2, %lo(symbol), %temp4
1287 sllx %temp3, 32, %temp5
1288 or %temp4, %temp5, %reg */
1289
1290 /* Getting this right wrt. reloading is really tricky.
1291 We _MUST_ have a seperate temporary at this point,
1292 if we don't barf immediately instead of generating
1293 incorrect code. */
1294 if (temp1 == op0)
1295 abort ();
1296
1297 emit_insn (gen_sethh (op0, op1));
1298 emit_insn (gen_setlm (temp1, op1));
1299 emit_insn (gen_sethm (op0, op0, op1));
1300 emit_insn (gen_rtx_SET (VOIDmode, op0,
1301 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1302 emit_insn (gen_rtx_SET (VOIDmode, op0,
1303 gen_rtx_PLUS (DImode, op0, temp1)));
1304 emit_insn (gen_setlo (op0, op0, op1));
1305 break;
1306
1307 case CM_EMBMEDANY:
1308 /* Old old old backwards compatibility kruft here.
1309 Essentially it is MEDLOW with a fixed 64-bit
1310 virtual base added to all data segment addresses.
1311 Text-segment stuff is computed like MEDANY, we can't
1312 reuse the code above because the relocation knobs
1313 look different.
1314
1315 Data segment: sethi %hi(symbol), %temp1
1316 or %temp1, %lo(symbol), %temp2
1317 add %temp2, EMBMEDANY_BASE_REG, %reg
1318
1319 Text segment: sethi %uhi(symbol), %temp1
1320 sethi %hi(symbol), %temp2
1321 or %temp1, %ulo(symbol), %temp3
1322 or %temp2, %lo(symbol), %temp4
1323 sllx %temp3, 32, %temp5
1324 or %temp4, %temp5, %reg */
1325 if (data_segment_operand (op1, GET_MODE (op1)))
1326 {
1327 emit_insn (gen_embmedany_sethi (temp1, op1));
1328 emit_insn (gen_embmedany_brsum (op0, temp1));
1329 emit_insn (gen_embmedany_losum (op0, op0, op1));
1330 }
1331 else
1332 {
1333 /* Getting this right wrt. reloading is really tricky.
1334 We _MUST_ have a seperate temporary at this point,
1335 so we barf immediately instead of generating
1336 incorrect code. */
1337 if (temp1 == op0)
1338 abort ();
1339
1340 emit_insn (gen_embmedany_textuhi (op0, op1));
1341 emit_insn (gen_embmedany_texthi (temp1, op1));
1342 emit_insn (gen_embmedany_textulo (op0, op0, op1));
1343 emit_insn (gen_rtx_SET (VOIDmode, op0,
1344 gen_rtx_ASHIFT (DImode, op0, GEN_INT (32))));
1345 emit_insn (gen_rtx_SET (VOIDmode, op0,
1346 gen_rtx_PLUS (DImode, op0, temp1)));
1347 emit_insn (gen_embmedany_textlo (op0, op0, op1));
1348 }
1349 break;
1350
1351 default:
1352 abort();
1353 }
1354}
1355
1356/* These avoid problems when cross compiling. If we do not
1357 go through all this hair then the optimizer will see
1358 invalid REG_EQUAL notes or in some cases none at all. */
1359static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
1360static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
1361static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
1362static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
1363
1364#if HOST_BITS_PER_WIDE_INT == 64
1365#define GEN_HIGHINT64(__x) GEN_INT ((__x) & 0xfffffc00)
1366#define GEN_INT64(__x) GEN_INT (__x)
1367#else
1368#define GEN_HIGHINT64(__x) \
1369 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1370 (__x) & 0xfffffc00, 0)
1371#define GEN_INT64(__x) \
1372 gen_rtx_CONST_DOUBLE (VOIDmode, const0_rtx, \
1373 (__x) & 0xffffffff, \
1374 ((__x) & 0x80000000 \
1375 ? 0xffffffff : 0))
1376#endif
1377
1378/* The optimizer is not to assume anything about exactly
1379 which bits are set for a HIGH, they are unspecified.
1380 Unfortunately this leads to many missed optimizations
1381 during CSE. We mask out the non-HIGH bits, and matches
1382 a plain movdi, to alleviate this problem. */
1383static void
1384sparc_emit_set_safe_HIGH64 (dest, val)
1385 rtx dest;
1386 HOST_WIDE_INT val;
1387{
1388 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_HIGHINT64 (val)));
1389}
1390
1391static rtx
1392gen_safe_SET64 (dest, val)
1393 rtx dest;
1394 HOST_WIDE_INT val;
1395{
1396 return gen_rtx_SET (VOIDmode, dest, GEN_INT64 (val));
1397}
1398
1399static rtx
1400gen_safe_OR64 (src, val)
1401 rtx src;
1402 HOST_WIDE_INT val;
1403{
1404 return gen_rtx_IOR (DImode, src, GEN_INT64 (val));
1405}
1406
1407static rtx
1408gen_safe_XOR64 (src, val)
1409 rtx src;
1410 HOST_WIDE_INT val;
1411{
1412 return gen_rtx_XOR (DImode, src, GEN_INT64 (val));
1413}
1414
1415/* Worker routines for 64-bit constant formation on arch64.
1416 One of the key things to be doing in these emissions is
1417 to create as many temp REGs as possible. This makes it
1418 possible for half-built constants to be used later when
1419 such values are similar to something required later on.
1420 Without doing this, the optimizer cannot see such
1421 opportunities. */
1422
1423static void sparc_emit_set_const64_quick1
1424 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
1425
1426static void
1427sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
1428 rtx op0;
1429 rtx temp;
1430 unsigned HOST_WIDE_INT low_bits;
1431 int is_neg;
1432{
1433 unsigned HOST_WIDE_INT high_bits;
1434
1435 if (is_neg)
1436 high_bits = (~low_bits) & 0xffffffff;
1437 else
1438 high_bits = low_bits;
1439
1440 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1441 if (!is_neg)
1442 {
1443 emit_insn (gen_rtx_SET (VOIDmode, op0,
1444 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1445 }
1446 else
1447 {
1448 /* If we are XOR'ing with -1, then we should emit a one's complement
1449 instead. This way the combiner will notice logical operations
1450 such as ANDN later on and substitute. */
1451 if ((low_bits & 0x3ff) == 0x3ff)
1452 {
1453 emit_insn (gen_rtx_SET (VOIDmode, op0,
1454 gen_rtx_NOT (DImode, temp)));
1455 }
1456 else
1457 {
1458 emit_insn (gen_rtx_SET (VOIDmode, op0,
1459 gen_safe_XOR64 (temp,
1460 (-0x400 | (low_bits & 0x3ff)))));
1461 }
1462 }
1463}
1464
1465static void sparc_emit_set_const64_quick2
1466 PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
1467 unsigned HOST_WIDE_INT, int));
1468
1469static void
1470sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_immediate, shift_count)
1471 rtx op0;
1472 rtx temp;
1473 unsigned HOST_WIDE_INT high_bits;
1474 unsigned HOST_WIDE_INT low_immediate;
1475 int shift_count;
1476{
1477 rtx temp2 = op0;
1478
1479 if ((high_bits & 0xfffffc00) != 0)
1480 {
1481 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1482 if ((high_bits & ~0xfffffc00) != 0)
1483 emit_insn (gen_rtx_SET (VOIDmode, op0,
1484 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1485 else
1486 temp2 = temp;
1487 }
1488 else
1489 {
1490 emit_insn (gen_safe_SET64 (temp, high_bits));
1491 temp2 = temp;
1492 }
1493
1494 /* Now shift it up into place. */
1495 emit_insn (gen_rtx_SET (VOIDmode, op0,
1496 gen_rtx_ASHIFT (DImode, temp2,
1497 GEN_INT (shift_count))));
1498
1499 /* If there is a low immediate part piece, finish up by
1500 putting that in as well. */
1501 if (low_immediate != 0)
1502 emit_insn (gen_rtx_SET (VOIDmode, op0,
1503 gen_safe_OR64 (op0, low_immediate)));
1504}
1505
1506static void sparc_emit_set_const64_longway
1507 PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1508
1509/* Full 64-bit constant decomposition. Even though this is the
1510 'worst' case, we still optimize a few things away. */
1511static void
1512sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits)
1513 rtx op0;
1514 rtx temp;
1515 unsigned HOST_WIDE_INT high_bits;
1516 unsigned HOST_WIDE_INT low_bits;
1517{
1518 rtx sub_temp;
1519
1520 if (reload_in_progress || reload_completed)
1521 sub_temp = op0;
1522 else
1523 sub_temp = gen_reg_rtx (DImode);
1524
1525 if ((high_bits & 0xfffffc00) != 0)
1526 {
1527 sparc_emit_set_safe_HIGH64 (temp, high_bits);
1528 if ((high_bits & ~0xfffffc00) != 0)
1529 emit_insn (gen_rtx_SET (VOIDmode,
1530 sub_temp,
1531 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1532 else
1533 sub_temp = temp;
1534 }
1535 else
1536 {
1537 emit_insn (gen_safe_SET64 (temp, high_bits));
1538 sub_temp = temp;
1539 }
1540
1541 if (!reload_in_progress && !reload_completed)
1542 {
1543 rtx temp2 = gen_reg_rtx (DImode);
1544 rtx temp3 = gen_reg_rtx (DImode);
1545 rtx temp4 = gen_reg_rtx (DImode);
1546
1547 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1548 gen_rtx_ASHIFT (DImode, sub_temp,
1549 GEN_INT (32))));
1550
1551 sparc_emit_set_safe_HIGH64 (temp2, low_bits);
1552 if ((low_bits & ~0xfffffc00) != 0)
1553 {
1554 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1555 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1556 emit_insn (gen_rtx_SET (VOIDmode, op0,
1557 gen_rtx_PLUS (DImode, temp4, temp3)));
1558 }
1559 else
1560 {
1561 emit_insn (gen_rtx_SET (VOIDmode, op0,
1562 gen_rtx_PLUS (DImode, temp4, temp2)));
1563 }
1564 }
1565 else
1566 {
1567 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1568 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1569 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1570 int to_shift = 12;
1571
1572 /* We are in the middle of reload, so this is really
1573 painful. However we do still make an attempt to
1574 avoid emitting truly stupid code. */
1575 if (low1 != const0_rtx)
1576 {
1577 emit_insn (gen_rtx_SET (VOIDmode, op0,
1578 gen_rtx_ASHIFT (DImode, sub_temp,
1579 GEN_INT (to_shift))));
1580 emit_insn (gen_rtx_SET (VOIDmode, op0,
1581 gen_rtx_IOR (DImode, op0, low1)));
1582 sub_temp = op0;
1583 to_shift = 12;
1584 }
1585 else
1586 {
1587 to_shift += 12;
1588 }
1589 if (low2 != const0_rtx)
1590 {
1591 emit_insn (gen_rtx_SET (VOIDmode, op0,
1592 gen_rtx_ASHIFT (DImode, sub_temp,
1593 GEN_INT (to_shift))));
1594 emit_insn (gen_rtx_SET (VOIDmode, op0,
1595 gen_rtx_IOR (DImode, op0, low2)));
1596 sub_temp = op0;
1597 to_shift = 8;
1598 }
1599 else
1600 {
1601 to_shift += 8;
1602 }
1603 emit_insn (gen_rtx_SET (VOIDmode, op0,
1604 gen_rtx_ASHIFT (DImode, sub_temp,
1605 GEN_INT (to_shift))));
1606 if (low3 != const0_rtx)
1607 emit_insn (gen_rtx_SET (VOIDmode, op0,
1608 gen_rtx_IOR (DImode, op0, low3)));
1609 /* phew... */
1610 }
1611}
1612
1613/* Analyze a 64-bit constant for certain properties. */
1614static void analyze_64bit_constant
1615 PROTO((unsigned HOST_WIDE_INT,
1616 unsigned HOST_WIDE_INT,
1617 int *, int *, int *));
1618
1619static void
1620analyze_64bit_constant (high_bits, low_bits, hbsp, lbsp, abbasp)
1621 unsigned HOST_WIDE_INT high_bits, low_bits;
1622 int *hbsp, *lbsp, *abbasp;
1623{
1624 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1625 int i;
1626
1627 lowest_bit_set = highest_bit_set = -1;
1628 i = 0;
1629 do
1630 {
1631 if ((lowest_bit_set == -1)
1632 && ((low_bits >> i) & 1))
1633 lowest_bit_set = i;
1634 if ((highest_bit_set == -1)
1635 && ((high_bits >> (32 - i - 1)) & 1))
1636 highest_bit_set = (64 - i - 1);
1637 }
1638 while (++i < 32
1639 && ((highest_bit_set == -1)
1640 || (lowest_bit_set == -1)));
1641 if (i == 32)
1642 {
1643 i = 0;
1644 do
1645 {
1646 if ((lowest_bit_set == -1)
1647 && ((high_bits >> i) & 1))
1648 lowest_bit_set = i + 32;
1649 if ((highest_bit_set == -1)
1650 && ((low_bits >> (32 - i - 1)) & 1))
1651 highest_bit_set = 32 - i - 1;
1652 }
1653 while (++i < 32
1654 && ((highest_bit_set == -1)
1655 || (lowest_bit_set == -1)));
1656 }
1657 /* If there are no bits set this should have gone out
1658 as one instruction! */
1659 if (lowest_bit_set == -1
1660 || highest_bit_set == -1)
1661 abort ();
1662 all_bits_between_are_set = 1;
1663 for (i = lowest_bit_set; i <= highest_bit_set; i++)
1664 {
1665 if (i < 32)
1666 {
1667 if ((low_bits & (1 << i)) != 0)
1668 continue;
1669 }
1670 else
1671 {
1672 if ((high_bits & (1 << (i - 32))) != 0)
1673 continue;
1674 }
1675 all_bits_between_are_set = 0;
1676 break;
1677 }
1678 *hbsp = highest_bit_set;
1679 *lbsp = lowest_bit_set;
1680 *abbasp = all_bits_between_are_set;
1681}
1682
1683static int const64_is_2insns
1684 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
1685
1686static int
1687const64_is_2insns (high_bits, low_bits)
1688 unsigned HOST_WIDE_INT high_bits, low_bits;
1689{
1690 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1691
1692 if (high_bits == 0
1693 || high_bits == 0xffffffff)
1694 return 1;
1695
1696 analyze_64bit_constant (high_bits, low_bits,
1697 &highest_bit_set, &lowest_bit_set,
1698 &all_bits_between_are_set);
1699
1700 if ((highest_bit_set == 63
1701 || lowest_bit_set == 0)
1702 && all_bits_between_are_set != 0)
1703 return 1;
1704
1705 if ((highest_bit_set - lowest_bit_set) < 21)
1706 return 1;
1707
1708 return 0;
1709}
1710
1711static unsigned HOST_WIDE_INT create_simple_focus_bits
1712 PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1713 int, int));
1714
1715static unsigned HOST_WIDE_INT
1716create_simple_focus_bits (high_bits, low_bits, lowest_bit_set, shift)
1717 unsigned HOST_WIDE_INT high_bits, low_bits;
1718 int lowest_bit_set, shift;
1719{
1720 HOST_WIDE_INT hi, lo;
1721
1722 if (lowest_bit_set < 32)
1723 {
1724 lo = (low_bits >> lowest_bit_set) << shift;
1725 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1726 }
1727 else
1728 {
1729 lo = 0;
1730 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1731 }
1732 if (hi & lo)
1733 abort ();
1734 return (hi | lo);
1735}
1736
1737/* Here we are sure to be arch64 and this is an integer constant
1738 being loaded into a register. Emit the most efficient
1739 insn sequence possible. Detection of all the 1-insn cases
1740 has been done already. */
1741void
1742sparc_emit_set_const64 (op0, op1)
1743 rtx op0;
1744 rtx op1;
1745{
1746 unsigned HOST_WIDE_INT high_bits, low_bits;
1747 int lowest_bit_set, highest_bit_set;
1748 int all_bits_between_are_set;
1749 rtx temp;
1750
1751 /* Sanity check that we know what we are working with. */
1752 if (! TARGET_ARCH64
1753 || GET_CODE (op0) != REG
1754 || (REGNO (op0) >= SPARC_FIRST_FP_REG
1755 && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
1756 abort ();
1757
1758 if (reload_in_progress || reload_completed)
1759 temp = op0;
1760 else
1761 temp = gen_reg_rtx (DImode);
1762
1763 if (GET_CODE (op1) != CONST_DOUBLE
1764 && GET_CODE (op1) != CONST_INT)
1765 {
1766 sparc_emit_set_symbolic_const64 (op0, op1, temp);
1767 return;
1768 }
1769
1770 if (GET_CODE (op1) == CONST_DOUBLE)
1771 {
1772#if HOST_BITS_PER_WIDE_INT == 64
1773 high_bits = (CONST_DOUBLE_LOW (op1) >> 32) & 0xffffffff;
1774 low_bits = CONST_DOUBLE_LOW (op1) & 0xffffffff;
1775#else
1776 high_bits = CONST_DOUBLE_HIGH (op1);
1777 low_bits = CONST_DOUBLE_LOW (op1);
1778#endif
1779 }
1780 else
1781 {
1782#if HOST_BITS_PER_WIDE_INT == 64
1783 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1784 low_bits = (INTVAL (op1) & 0xffffffff);
1785#else
1786 high_bits = ((INTVAL (op1) < 0) ?
1787 0xffffffff :
1788 0x00000000);
1789 low_bits = INTVAL (op1);
1790#endif
1791 }
1792
1793 /* low_bits bits 0 --> 31
1794 high_bits bits 32 --> 63 */
1795
1796 analyze_64bit_constant (high_bits, low_bits,
1797 &highest_bit_set, &lowest_bit_set,
1798 &all_bits_between_are_set);
1799
1800 /* First try for a 2-insn sequence. */
1801
1802 /* These situations are preferred because the optimizer can
1803 * do more things with them:
1804 * 1) mov -1, %reg
1805 * sllx %reg, shift, %reg
1806 * 2) mov -1, %reg
1807 * srlx %reg, shift, %reg
1808 * 3) mov some_small_const, %reg
1809 * sllx %reg, shift, %reg
1810 */
1811 if (((highest_bit_set == 63
1812 || lowest_bit_set == 0)
1813 && all_bits_between_are_set != 0)
1814 || ((highest_bit_set - lowest_bit_set) < 12))
1815 {
1816 HOST_WIDE_INT the_const = -1;
1817 int shift = lowest_bit_set;
1818
1819 if ((highest_bit_set != 63
1820 && lowest_bit_set != 0)
1821 || all_bits_between_are_set == 0)
1822 {
1823 the_const =
1824 create_simple_focus_bits (high_bits, low_bits,
1825 lowest_bit_set, 0);
1826 }
1827 else if (lowest_bit_set == 0)
1828 shift = -(63 - highest_bit_set);
1829
1830 if (! SPARC_SIMM13_P (the_const))
1831 abort ();
1832
1833 emit_insn (gen_safe_SET64 (temp, the_const));
1834 if (shift > 0)
1835 emit_insn (gen_rtx_SET (VOIDmode,
1836 op0,
1837 gen_rtx_ASHIFT (DImode,
1838 temp,
1839 GEN_INT (shift))));
1840 else if (shift < 0)
1841 emit_insn (gen_rtx_SET (VOIDmode,
1842 op0,
1843 gen_rtx_LSHIFTRT (DImode,
1844 temp,
1845 GEN_INT (-shift))));
1846 else
1847 abort ();
1848 return;
1849 }
1850
1851 /* Now a range of 22 or less bits set somewhere.
1852 * 1) sethi %hi(focus_bits), %reg
1853 * sllx %reg, shift, %reg
1854 * 2) sethi %hi(focus_bits), %reg
1855 * srlx %reg, shift, %reg
1856 */
1857 if ((highest_bit_set - lowest_bit_set) < 21)
1858 {
1859 unsigned HOST_WIDE_INT focus_bits =
1860 create_simple_focus_bits (high_bits, low_bits,
1861 lowest_bit_set, 10);
1862
1863 if (! SPARC_SETHI_P (focus_bits))
1864 abort ();
1865
1866 sparc_emit_set_safe_HIGH64 (temp, focus_bits);
1867
1868 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
1869 if (lowest_bit_set < 10)
1870 emit_insn (gen_rtx_SET (VOIDmode,
1871 op0,
1872 gen_rtx_LSHIFTRT (DImode, temp,
1873 GEN_INT (10 - lowest_bit_set))));
1874 else if (lowest_bit_set > 10)
1875 emit_insn (gen_rtx_SET (VOIDmode,
1876 op0,
1877 gen_rtx_ASHIFT (DImode, temp,
1878 GEN_INT (lowest_bit_set - 10))));
1879 else
1880 abort ();
1881 return;
1882 }
1883
1884 /* 1) sethi %hi(low_bits), %reg
1885 * or %reg, %lo(low_bits), %reg
1886 * 2) sethi %hi(~low_bits), %reg
1887 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1888 */
1889 if (high_bits == 0
1890 || high_bits == 0xffffffff)
1891 {
1892 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1893 (high_bits == 0xffffffff));
1894 return;
1895 }
1896
1897 /* Now, try 3-insn sequences. */
1898
1899 /* 1) sethi %hi(high_bits), %reg
1900 * or %reg, %lo(high_bits), %reg
1901 * sllx %reg, 32, %reg
1902 */
1903 if (low_bits == 0)
1904 {
1905 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1906 return;
1907 }
1908
1909 /* We may be able to do something quick
1910 when the constant is negated, so try that. */
1911 if (const64_is_2insns ((~high_bits) & 0xffffffff,
1912 (~low_bits) & 0xfffffc00))
1913 {
1914 /* NOTE: The trailing bits get XOR'd so we need the
1915 non-negated bits, not the negated ones. */
1916 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1917
1918 if ((((~high_bits) & 0xffffffff) == 0
1919 && ((~low_bits) & 0x80000000) == 0)
1920 || (((~high_bits) & 0xffffffff) == 0xffffffff
1921 && ((~low_bits) & 0x80000000) != 0))
1922 {
1923 int fast_int = (~low_bits & 0xffffffff);
1924
1925 if ((SPARC_SETHI_P (fast_int)
1926 && (~high_bits & 0xffffffff) == 0)
1927 || SPARC_SIMM13_P (fast_int))
1928 emit_insn (gen_safe_SET64 (temp, fast_int));
1929 else
1930 sparc_emit_set_const64 (temp, GEN_INT64 (fast_int));
1931 }
1932 else
1933 {
1934 rtx negated_const;
1935#if HOST_BITS_PER_WIDE_INT == 64
1936 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1937 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1938#else
1939 negated_const = gen_rtx_CONST_DOUBLE (DImode, const0_rtx,
1940 (~low_bits) & 0xfffffc00,
1941 (~high_bits) & 0xffffffff);
1942#endif
1943 sparc_emit_set_const64 (temp, negated_const);
1944 }
1945
1946 /* If we are XOR'ing with -1, then we should emit a one's complement
1947 instead. This way the combiner will notice logical operations
1948 such as ANDN later on and substitute. */
1949 if (trailing_bits == 0x3ff)
1950 {
1951 emit_insn (gen_rtx_SET (VOIDmode, op0,
1952 gen_rtx_NOT (DImode, temp)));
1953 }
1954 else
1955 {
1956 emit_insn (gen_rtx_SET (VOIDmode,
1957 op0,
1958 gen_safe_XOR64 (temp,
1959 (-0x400 | trailing_bits))));
1960 }
1961 return;
1962 }
1963
1964 /* 1) sethi %hi(xxx), %reg
1965 * or %reg, %lo(xxx), %reg
1966 * sllx %reg, yyy, %reg
1967 *
1968 * ??? This is just a generalized version of the low_bits==0
1969 * thing above, FIXME...
1970 */
1971 if ((highest_bit_set - lowest_bit_set) < 32)
1972 {
1973 unsigned HOST_WIDE_INT focus_bits =
1974 create_simple_focus_bits (high_bits, low_bits,
1975 lowest_bit_set, 0);
1976
1977 /* We can't get here in this state. */
1978 if (highest_bit_set < 32
1979 || lowest_bit_set >= 32)
1980 abort ();
1981
1982 /* So what we know is that the set bits straddle the
1983 middle of the 64-bit word. */
1984 sparc_emit_set_const64_quick2 (op0, temp,
1985 focus_bits, 0,
1986 lowest_bit_set);
1987 return;
1988 }
1989
1990 /* 1) sethi %hi(high_bits), %reg
1991 * or %reg, %lo(high_bits), %reg
1992 * sllx %reg, 32, %reg
1993 * or %reg, low_bits, %reg
1994 */
1995 if (SPARC_SIMM13_P(low_bits)
1996 && ((int)low_bits > 0))
1997 {
1998 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1999 return;
2000 }
2001
2002 /* The easiest way when all else fails, is full decomposition. */
2003#if 0
2004 printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
2005 high_bits, low_bits, ~high_bits, ~low_bits);
2006#endif
2007 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2008}
2009
2010/* X and Y are two things to compare using CODE. Emit the compare insn and
2011 return the rtx for the cc reg in the proper mode. */
2012
2013rtx
2014gen_compare_reg (code, x, y)
2015 enum rtx_code code;
2016 rtx x, y;
2017{
2018 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
2019 rtx cc_reg;
2020
2021 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2022 fcc regs (cse can't tell they're really call clobbered regs and will
2023 remove a duplicate comparison even if there is an intervening function
2024 call - it will then try to reload the cc reg via an int reg which is why
2025 we need the movcc patterns). It is possible to provide the movcc
2026 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2027 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2028 to tell cse that CCFPE mode registers (even pseudos) are call
2029 clobbered. */
2030
2031 /* ??? This is an experiment. Rather than making changes to cse which may
2032 or may not be easy/clean, we do our own cse. This is possible because
2033 we will generate hard registers. Cse knows they're call clobbered (it
2034 doesn't know the same thing about pseudos). If we guess wrong, no big
2035 deal, but if we win, great! */
2036
2037 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2038#if 1 /* experiment */
2039 {
2040 int reg;
2041 /* We cycle through the registers to ensure they're all exercised. */
2042 static int next_fcc_reg = 0;
2043 /* Previous x,y for each fcc reg. */
2044 static rtx prev_args[4][2];
2045
2046 /* Scan prev_args for x,y. */
2047 for (reg = 0; reg < 4; reg++)
2048 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2049 break;
2050 if (reg == 4)
2051 {
2052 reg = next_fcc_reg;
2053 prev_args[reg][0] = x;
2054 prev_args[reg][1] = y;
2055 next_fcc_reg = (next_fcc_reg + 1) & 3;
2056 }
2057 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2058 }
2059#else
2060 cc_reg = gen_reg_rtx (mode);
2061#endif /* ! experiment */
2062 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2063 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2064 else
2065 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2066
2067 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
2068 gen_rtx_COMPARE (mode, x, y)));
2069
2070 return cc_reg;
2071}
2072
2073/* This function is used for v9 only.
2074 CODE is the code for an Scc's comparison.
2075 OPERANDS[0] is the target of the Scc insn.
2076 OPERANDS[1] is the value we compare against const0_rtx (which hasn't
2077 been generated yet).
2078
2079 This function is needed to turn
2080
2081 (set (reg:SI 110)
2082 (gt (reg:CCX 100 %icc)
2083 (const_int 0)))
2084 into
2085 (set (reg:SI 110)
2086 (gt:DI (reg:CCX 100 %icc)
2087 (const_int 0)))
2088
2089 IE: The instruction recognizer needs to see the mode of the comparison to
2090 find the right instruction. We could use "gt:DI" right in the
2091 define_expand, but leaving it out allows us to handle DI, SI, etc.
2092
2093 We refer to the global sparc compare operands sparc_compare_op0 and
2094 sparc_compare_op1. */
2095
2096int
2097gen_v9_scc (compare_code, operands)
2098 enum rtx_code compare_code;
2099 register rtx *operands;
2100{
2101 rtx temp, op0, op1;
2102
2103 if (! TARGET_ARCH64
2104 && (GET_MODE (sparc_compare_op0) == DImode
2105 || GET_MODE (operands[0]) == DImode))
2106 return 0;
2107
2108 /* Handle the case where operands[0] == sparc_compare_op0.
2109 We "early clobber" the result. */
2110 if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
2111 {
2112 op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
2113 emit_move_insn (op0, sparc_compare_op0);
2114 }
2115 else
2116 op0 = sparc_compare_op0;
2117 /* For consistency in the following. */
2118 op1 = sparc_compare_op1;
2119
2120 /* Try to use the movrCC insns. */
2121 if (TARGET_ARCH64
2122 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
2123 && op1 == const0_rtx
2124 && v9_regcmp_p (compare_code))
2125 {
2126 /* Special case for op0 != 0. This can be done with one instruction if
2127 operands[0] == sparc_compare_op0. We don't assume they are equal
2128 now though. */
2129
2130 if (compare_code == NE
2131 && GET_MODE (operands[0]) == DImode
2132 && GET_MODE (op0) == DImode)
2133 {
2134 emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
2135 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2136 gen_rtx_IF_THEN_ELSE (DImode,
2137 gen_rtx_fmt_ee (compare_code, DImode,
2138 op0, const0_rtx),
2139 const1_rtx,
2140 operands[0])));
2141 return 1;
2142 }
2143
2144 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2145 if (GET_MODE (op0) != DImode)
2146 {
2147 temp = gen_reg_rtx (DImode);
2148 convert_move (temp, op0, 0);
2149 }
2150 else
2151 temp = op0;
2152 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2153 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2154 gen_rtx_fmt_ee (compare_code, DImode,
2155 temp, const0_rtx),
2156 const1_rtx,
2157 operands[0])));
2158 return 1;
2159 }
2160 else
2161 {
2162 operands[1] = gen_compare_reg (compare_code, op0, op1);
2163
2164 switch (GET_MODE (operands[1]))
2165 {
2166 case CCmode :
2167 case CCXmode :
2168 case CCFPEmode :
2169 case CCFPmode :
2170 break;
2171 default :
2172 abort ();
2173 }
2174 emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
2175 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2176 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
2177 gen_rtx_fmt_ee (compare_code,
2178 GET_MODE (operands[1]),
2179 operands[1], const0_rtx),
2180 const1_rtx, operands[0])));
2181 return 1;
2182 }
2183}
2184
2185/* Emit a conditional jump insn for the v9 architecture using comparison code
2186 CODE and jump target LABEL.
2187 This function exists to take advantage of the v9 brxx insns. */
2188
2189void
2190emit_v9_brxx_insn (code, op0, label)
2191 enum rtx_code code;
2192 rtx op0, label;
2193{
2194 emit_jump_insn (gen_rtx_SET (VOIDmode,
2195 pc_rtx,
2196 gen_rtx_IF_THEN_ELSE (VOIDmode,
2197 gen_rtx_fmt_ee (code, GET_MODE (op0),
2198 op0, const0_rtx),
2199 gen_rtx_LABEL_REF (VOIDmode, label),
2200 pc_rtx)));
2201}
2202
2203/* Return nonzero if a return peephole merging return with
2204 setting of output register is ok. */
2205int
2206leaf_return_peephole_ok ()
2207{
2208 return (actual_fsize == 0);
2209}
2210
2211/* Return nonzero if TRIAL can go into the function epilogue's
2212 delay slot. SLOT is the slot we are trying to fill. */
2213
2214int
2215eligible_for_epilogue_delay (trial, slot)
2216 rtx trial;
2217 int slot;
2218{
2219 rtx pat, src;
2220
2221 if (slot >= 1)
2222 return 0;
2223
2224 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2225 return 0;
2226
2227 if (get_attr_length (trial) != 1)
2228 return 0;
2229
2230 /* If %g0 is live, there are lots of things we can't handle.
2231 Rather than trying to find them all now, let's punt and only
2232 optimize things as necessary. */
2233 if (TARGET_LIVE_G0)
2234 return 0;
2235
2236 /* In the case of a true leaf function, anything can go into the delay slot.
2237 A delay slot only exists however if the frame size is zero, otherwise
2238 we will put an insn to adjust the stack after the return. */
2239 if (current_function_uses_only_leaf_regs)
2240 {
2241 if (leaf_return_peephole_ok ())
2242 return ((get_attr_in_uncond_branch_delay (trial)
2243 == IN_BRANCH_DELAY_TRUE));
2244 return 0;
2245 }
2246
2247 /* If only trivial `restore' insns work, nothing can go in the
2248 delay slot. */
2249 else if (TARGET_BROKEN_SAVERESTORE)
2250 return 0;
2251
2252 pat = PATTERN (trial);
2253
2254 /* Otherwise, only operations which can be done in tandem with
2255 a `restore' insn can go into the delay slot. */
2256 if (GET_CODE (SET_DEST (pat)) != REG
2257 || REGNO (SET_DEST (pat)) >= 32
2258 || REGNO (SET_DEST (pat)) < 24)
2259 return 0;
2260
2261 /* The set of insns matched here must agree precisely with the set of
2262 patterns paired with a RETURN in sparc.md. */
2263
2264 src = SET_SRC (pat);
2265
2266 /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64. */
2267 if (arith_operand (src, GET_MODE (src)))
2268 {
2269 if (TARGET_ARCH64)
2270 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2271 else
2272 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2273 }
2274
2275 /* This matches "*return_di". */
2276 else if (arith_double_operand (src, GET_MODE (src)))
2277 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2278
2279 /* This matches "*return_sf_no_fpu". */
2280 else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
2281 && register_operand (src, SFmode))
2282 return 1;
2283
2284 /* This matches "*return_addsi". */
2285 else if (GET_CODE (src) == PLUS
2286 && arith_operand (XEXP (src, 0), SImode)
2287 && arith_operand (XEXP (src, 1), SImode)
2288 && (register_operand (XEXP (src, 0), SImode)
2289 || register_operand (XEXP (src, 1), SImode)))
2290 return 1;
2291
2292 /* This matches "*return_adddi". */
2293 else if (GET_CODE (src) == PLUS
2294 && arith_double_operand (XEXP (src, 0), DImode)
2295 && arith_double_operand (XEXP (src, 1), DImode)
2296 && (register_operand (XEXP (src, 0), DImode)
2297 || register_operand (XEXP (src, 1), DImode)))
2298 return 1;
2299
2300 return 0;
2301}
2302
2303static int
2304check_return_regs (x)
2305 rtx x;
2306{
2307 switch (GET_CODE (x))
2308 {
2309 case REG:
2310 return IN_OR_GLOBAL_P (x);
2311
2312 case CONST_INT:
2313 case CONST_DOUBLE:
2314 case CONST:
2315 case SYMBOL_REF:
2316 case LABEL_REF:
2317 return 1;
2318
2319 case SET:
2320 case IOR:
2321 case AND:
2322 case XOR:
2323 case PLUS:
2324 case MINUS:
2325 if (check_return_regs (XEXP (x, 1)) == 0)
2326 return 0;
2327 case NOT:
2328 case NEG:
2329 case MEM:
2330 return check_return_regs (XEXP (x, 0));
2331
2332 default:
2333 return 0;
2334 }
2335
2336}
2337
2338/* Return 1 if TRIAL references only in and global registers. */
2339int
2340eligible_for_return_delay (trial)
2341 rtx trial;
2342{
2343 if (GET_CODE (PATTERN (trial)) != SET)
2344 return 0;
2345
2346 return check_return_regs (PATTERN (trial));
2347}
2348
2349int
2350short_branch (uid1, uid2)
2351 int uid1, uid2;
2352{
2353 unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
2354 if (delta + 1024 < 2048)
2355 return 1;
2356 /* warning ("long branch, distance %d", delta); */
2357 return 0;
2358}
2359
2360/* Return non-zero if REG is not used after INSN.
2361 We assume REG is a reload reg, and therefore does
2362 not live past labels or calls or jumps. */
2363int
2364reg_unused_after (reg, insn)
2365 rtx reg;
2366 rtx insn;
2367{
2368 enum rtx_code code, prev_code = UNKNOWN;
2369
2370 while ((insn = NEXT_INSN (insn)))
2371 {
2372 if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2373 return 1;
2374
2375 code = GET_CODE (insn);
2376 if (GET_CODE (insn) == CODE_LABEL)
2377 return 1;
2378
2379 if (GET_RTX_CLASS (code) == 'i')
2380 {
2381 rtx set = single_set (insn);
2382 int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2383 if (set && in_src)
2384 return 0;
2385 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2386 return 1;
2387 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2388 return 0;
2389 }
2390 prev_code = code;
2391 }
2392 return 1;
2393}
2394
2395/* The table we use to reference PIC data. */
2396static rtx global_offset_table;
2397
2398/* The function we use to get at it. */
2399static rtx get_pc_symbol;
2400static char get_pc_symbol_name[256];
2401
2402/* Ensure that we are not using patterns that are not OK with PIC. */
2403
2404int
2405check_pic (i)
2406 int i;
2407{
2408 switch (flag_pic)
2409 {
2410 case 1:
2411 if (GET_CODE (recog_operand[i]) == SYMBOL_REF
2412 || (GET_CODE (recog_operand[i]) == CONST
2413 && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
2414 && (XEXP (XEXP (recog_operand[i], 0), 0)
2415 == global_offset_table)
2416 && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
2417 == CONST))))
2418 abort ();
2419 case 2:
2420 default:
2421 return 1;
2422 }
2423}
2424
2425/* Return true if X is an address which needs a temporary register when
2426 reloaded while generating PIC code. */
2427
2428int
2429pic_address_needs_scratch (x)
2430 rtx x;
2431{
2432 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
2433 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2434 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2435 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2436 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2437 return 1;
2438
2439 return 0;
2440}
2441
2442/* Legitimize PIC addresses. If the address is already position-independent,
2443 we return ORIG. Newly generated position-independent addresses go into a
2444 reg. This is REG if non zero, otherwise we allocate register(s) as
2445 necessary. */
2446
2447rtx
2448legitimize_pic_address (orig, mode, reg)
2449 rtx orig;
2450 enum machine_mode mode ATTRIBUTE_UNUSED;
2451 rtx reg;
2452{
2453 if (GET_CODE (orig) == SYMBOL_REF)
2454 {
2455 rtx pic_ref, address;
2456 rtx insn;
2457
2458 if (reg == 0)
2459 {
2460 if (reload_in_progress || reload_completed)
2461 abort ();
2462 else
2463 reg = gen_reg_rtx (Pmode);
2464 }
2465
2466 if (flag_pic == 2)
2467 {
2468 /* If not during reload, allocate another temp reg here for loading
2469 in the address, so that these instructions can be optimized
2470 properly. */
2471 rtx temp_reg = ((reload_in_progress || reload_completed)
2472 ? reg : gen_reg_rtx (Pmode));
2473
2474 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
2475 won't get confused into thinking that these two instructions
2476 are loading in the true address of the symbol. If in the
2477 future a PIC rtx exists, that should be used instead. */
2478 if (Pmode == SImode)
2479 {
2480 emit_insn (gen_movsi_high_pic (temp_reg, orig));
2481 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
2482 }
2483 else
2484 {
2485 emit_insn (gen_movdi_high_pic (temp_reg, orig));
2486 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
2487 }
2488 address = temp_reg;
2489 }
2490 else
2491 address = orig;
2492
2493 pic_ref = gen_rtx_MEM (Pmode,
2494 gen_rtx_PLUS (Pmode,
2495 pic_offset_table_rtx, address));
2496 current_function_uses_pic_offset_table = 1;
2497 RTX_UNCHANGING_P (pic_ref) = 1;
2498 insn = emit_move_insn (reg, pic_ref);
2499 /* Put a REG_EQUAL note on this insn, so that it can be optimized
2500 by loop. */
2501 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
2502 REG_NOTES (insn));
2503 return reg;
2504 }
2505 else if (GET_CODE (orig) == CONST)
2506 {
2507 rtx base, offset;
2508
2509 if (GET_CODE (XEXP (orig, 0)) == PLUS
2510 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
2511 return orig;
2512
2513 if (reg == 0)
2514 {
2515 if (reload_in_progress || reload_completed)
2516 abort ();
2517 else
2518 reg = gen_reg_rtx (Pmode);
2519 }
2520
2521 if (GET_CODE (XEXP (orig, 0)) == PLUS)
2522 {
2523 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
2524 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
2525 base == reg ? 0 : reg);
2526 }
2527 else
2528 abort ();
2529
2530 if (GET_CODE (offset) == CONST_INT)
2531 {
2532 if (SMALL_INT (offset))
2533 return plus_constant_for_output (base, INTVAL (offset));
2534 else if (! reload_in_progress && ! reload_completed)
2535 offset = force_reg (Pmode, offset);
2536 else
2537 /* If we reach here, then something is seriously wrong. */
2538 abort ();
2539 }
2540 return gen_rtx_PLUS (Pmode, base, offset);
2541 }
2542 else if (GET_CODE (orig) == LABEL_REF)
2543 /* ??? Why do we do this? */
2544 /* Now movsi_pic_label_ref uses it, but we ought to be checking that
2545 the register is live instead, in case it is eliminated. */
2546 current_function_uses_pic_offset_table = 1;
2547
2548 return orig;
2549}
2550
2551/* Return the RTX for insns to set the PIC register. */
2552
2553static rtx
2554pic_setup_code ()
2555{
2556 rtx seq;
2557
2558 start_sequence ();
2559 emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
2560 get_pc_symbol));
2561 seq = gen_sequence ();
2562 end_sequence ();
2563
2564 return seq;
2565}
2566
2567/* Emit special PIC prologues and epilogues. */
2568
2569void
2570finalize_pic ()
2571{
2572 /* Labels to get the PC in the prologue of this function. */
2573 int orig_flag_pic = flag_pic;
2574 rtx insn;
2575
2576 if (current_function_uses_pic_offset_table == 0)
2577 return;
2578
2579 if (! flag_pic)
2580 abort ();
2581
2582 /* If we havn't emitted the special get_pc helper function, do so now. */
2583 if (get_pc_symbol_name[0] == 0)
2584 {
2585 int align;
2586
2587 ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
2588 text_section ();
2589
2590 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
2591 if (align > 0)
2592 ASM_OUTPUT_ALIGN (asm_out_file, align);
2593 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
2594 fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
2595 }
2596
2597 /* Initialize every time through, since we can't easily
2598 know this to be permanent. */
2599 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2600 get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
2601 flag_pic = 0;
2602
2603 emit_insn_after (pic_setup_code (), get_insns ());
2604
2605 /* Insert the code in each nonlocal goto receiver.
2606 If you make changes here or to the nonlocal_goto_receiver
2607 pattern, make sure the unspec_volatile numbers still
2608 match. */
2609 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2610 if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
2611 && XINT (PATTERN (insn), 1) == 5)
2612 emit_insn_after (pic_setup_code (), insn);
2613
2614 flag_pic = orig_flag_pic;
2615
2616 /* Need to emit this whether or not we obey regdecls,
2617 since setjmp/longjmp can cause life info to screw up.
2618 ??? In the case where we don't obey regdecls, this is not sufficient
2619 since we may not fall out the bottom. */
2620 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
2621}
2622
2623/* Return 1 if RTX is a MEM which is known to be aligned to at
2624 least an 8 byte boundary. */
2625
2626int
2627mem_min_alignment (mem, desired)
2628 rtx mem;
2629 int desired;
2630{
2631 rtx addr, base, offset;
2632
2633 /* If it's not a MEM we can't accept it. */
2634 if (GET_CODE (mem) != MEM)
2635 return 0;
2636
2637 addr = XEXP (mem, 0);
2638 base = offset = NULL_RTX;
2639 if (GET_CODE (addr) == PLUS)
2640 {
2641 if (GET_CODE (XEXP (addr, 0)) == REG)
2642 {
2643 base = XEXP (addr, 0);
2644
2645 /* What we are saying here is that if the base
2646 REG is aligned properly, the compiler will make
2647 sure any REG based index upon it will be so
2648 as well. */
2649 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
2650 offset = XEXP (addr, 1);
2651 else
2652 offset = const0_rtx;
2653 }
2654 }
2655 else if (GET_CODE (addr) == REG)
2656 {
2657 base = addr;
2658 offset = const0_rtx;
2659 }
2660
2661 if (base != NULL_RTX)
2662 {
2663 int regno = REGNO (base);
2664
2665 if (regno != FRAME_POINTER_REGNUM
2666 && regno != STACK_POINTER_REGNUM)
2667 {
2668 /* Check if the compiler has recorded some information
2669 about the alignment of the base REG. If reload has
2670 completed, we already matched with proper alignments. */
2671 if (((regno_pointer_align != NULL
2672 && REGNO_POINTER_ALIGN (regno) >= desired)
2673 || reload_completed)
2674 && ((INTVAL (offset) & (desired - 1)) == 0))
2675 return 1;
2676 }
2677 else
2678 {
2679 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
2680 return 1;
2681 }
2682 }
2683 else if (! TARGET_UNALIGNED_DOUBLES
2684 || CONSTANT_P (addr)
2685 || GET_CODE (addr) == LO_SUM)
2686 {
2687 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
2688 is true, in which case we can only assume that an access is aligned if
2689 it is to a constant address, or the address involves a LO_SUM. */
2690 return 1;
2691 }
2692
2693 /* An obviously unaligned address. */
2694 return 0;
2695}
2696
2697
2698/* Vectors to keep interesting information about registers where it can easily
2699 be got. We use to use the actual mode value as the bit number, but there
2700 are more than 32 modes now. Instead we use two tables: one indexed by
2701 hard register number, and one indexed by mode. */
2702
2703/* The purpose of sparc_mode_class is to shrink the range of modes so that
2704 they all fit (as bit numbers) in a 32 bit word (again). Each real mode is
2705 mapped into one sparc_mode_class mode. */
2706
2707enum sparc_mode_class {
2708 S_MODE, D_MODE, T_MODE, O_MODE,
2709 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
2710 CC_MODE, CCFP_MODE
2711};
2712
2713/* Modes for single-word and smaller quantities. */
2714#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2715
2716/* Modes for double-word and smaller quantities. */
2717#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2718
2719/* Modes for quad-word and smaller quantities. */
2720#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2721
2722/* Modes for single-float quantities. We must allow any single word or
2723 smaller quantity. This is because the fix/float conversion instructions
2724 take integer inputs/outputs from the float registers. */
2725#define SF_MODES (S_MODES)
2726
2727/* Modes for double-float and smaller quantities. */
2728#define DF_MODES (S_MODES | D_MODES)
2729
2730#define DF_MODES64 DF_MODES
2731
2732/* Modes for double-float only quantities. */
2733#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
2734
2735/* Modes for double-float and larger quantities. */
2736#define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2737
2738/* Modes for quad-float only quantities. */
2739#define TF_ONLY_MODES (1 << (int) TF_MODE)
2740
2741/* Modes for quad-float and smaller quantities. */
2742#define TF_MODES (DF_MODES | TF_ONLY_MODES)
2743
2744#define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2745
2746/* Modes for condition codes. */
2747#define CC_MODES (1 << (int) CC_MODE)
2748#define CCFP_MODES (1 << (int) CCFP_MODE)
2749
2750/* Value is 1 if register/mode pair is acceptable on sparc.
2751 The funny mixture of D and T modes is because integer operations
2752 do not specially operate on tetra quantities, so non-quad-aligned
2753 registers can hold quadword quantities (except %o4 and %i4 because
2754 they cross fixed registers). */
2755
2756/* This points to either the 32 bit or the 64 bit version. */
2757int *hard_regno_mode_classes;
2758
2759static int hard_32bit_mode_classes[] = {
2760 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2761 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2762 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2763 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2764
2765 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2766 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2767 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2768 TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2769
2770 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2771 and none can hold SFmode/SImode values. */
2772 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2773 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2774 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2775 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2776
2777 /* %fcc[0123] */
2778 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2779
2780 /* %icc */
2781 CC_MODES
2782};
2783
2784static int hard_64bit_mode_classes[] = {
2785 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2786 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2787 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2788 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2789
2790 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2791 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2792 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2793 TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2794
2795 /* FP regs f32 to f63. Only the even numbered registers actually exist,
2796 and none can hold SFmode/SImode values. */
2797 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2798 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2799 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2800 DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2801
2802 /* %fcc[0123] */
2803 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
2804
2805 /* %icc */
2806 CC_MODES
2807};
2808
2809int sparc_mode_class [NUM_MACHINE_MODES];
2810
2811enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
2812
2813static void
2814sparc_init_modes ()
2815{
2816 int i;
2817
2818 for (i = 0; i < NUM_MACHINE_MODES; i++)
2819 {
2820 switch (GET_MODE_CLASS (i))
2821 {
2822 case MODE_INT:
2823 case MODE_PARTIAL_INT:
2824 case MODE_COMPLEX_INT:
2825 if (GET_MODE_SIZE (i) <= 4)
2826 sparc_mode_class[i] = 1 << (int) S_MODE;
2827 else if (GET_MODE_SIZE (i) == 8)
2828 sparc_mode_class[i] = 1 << (int) D_MODE;
2829 else if (GET_MODE_SIZE (i) == 16)
2830 sparc_mode_class[i] = 1 << (int) T_MODE;
2831 else if (GET_MODE_SIZE (i) == 32)
2832 sparc_mode_class[i] = 1 << (int) O_MODE;
2833 else
2834 sparc_mode_class[i] = 0;
2835 break;
2836 case MODE_FLOAT:
2837 case MODE_COMPLEX_FLOAT:
2838 if (GET_MODE_SIZE (i) <= 4)
2839 sparc_mode_class[i] = 1 << (int) SF_MODE;
2840 else if (GET_MODE_SIZE (i) == 8)
2841 sparc_mode_class[i] = 1 << (int) DF_MODE;
2842 else if (GET_MODE_SIZE (i) == 16)
2843 sparc_mode_class[i] = 1 << (int) TF_MODE;
2844 else if (GET_MODE_SIZE (i) == 32)
2845 sparc_mode_class[i] = 1 << (int) OF_MODE;
2846 else
2847 sparc_mode_class[i] = 0;
2848 break;
2849 case MODE_CC:
2850 default:
2851 /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2852 we must explicitly check for them here. */
2853 if (i == (int) CCFPmode || i == (int) CCFPEmode)
2854 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2855 else if (i == (int) CCmode || i == (int) CC_NOOVmode
2856 || i == (int) CCXmode || i == (int) CCX_NOOVmode)
2857 sparc_mode_class[i] = 1 << (int) CC_MODE;
2858 else
2859 sparc_mode_class[i] = 0;
2860 break;
2861 }
2862 }
2863
2864 if (TARGET_ARCH64)
2865 hard_regno_mode_classes = hard_64bit_mode_classes;
2866 else
2867 hard_regno_mode_classes = hard_32bit_mode_classes;
2868
2869 /* Initialize the array used by REGNO_REG_CLASS. */
2870 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2871 {
2872 if (i < 16 && TARGET_V8PLUS)
2873 sparc_regno_reg_class[i] = I64_REGS;
2874 else if (i < 32)
2875 sparc_regno_reg_class[i] = GENERAL_REGS;
2876 else if (i < 64)
2877 sparc_regno_reg_class[i] = FP_REGS;
2878 else if (i < 96)
2879 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
2880 else if (i < 100)
2881 sparc_regno_reg_class[i] = FPCC_REGS;
2882 else
2883 sparc_regno_reg_class[i] = NO_REGS;
2884 }
2885}
2886
2887/* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2888 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2889 v9 int regs as it simplifies the code. */
2890
2891static int
2892save_regs (file, low, high, base, offset, n_regs, real_offset)
2893 FILE *file;
2894 int low, high;
2895 const char *base;
2896 int offset;
2897 int n_regs;
2898 int real_offset;
2899{
2900 int i;
2901
2902 if (TARGET_ARCH64 && high <= 32)
2903 {
2904 for (i = low; i < high; i++)
2905 {
2906 if (regs_ever_live[i] && ! call_used_regs[i])
2907 {
2908 fprintf (file, "\tstx\t%s, [%s+%d]\n",
2909 reg_names[i], base, offset + 4 * n_regs);
2910 if (dwarf2out_do_frame ())
2911 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2912 n_regs += 2;
2913 }
2914 }
2915 }
2916 else
2917 {
2918 for (i = low; i < high; i += 2)
2919 {
2920 if (regs_ever_live[i] && ! call_used_regs[i])
2921 {
2922 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2923 {
2924 fprintf (file, "\tstd\t%s, [%s+%d]\n",
2925 reg_names[i], base, offset + 4 * n_regs);
2926 if (dwarf2out_do_frame ())
2927 {
2928 char *l = dwarf2out_cfi_label ();
2929 dwarf2out_reg_save (l, i, real_offset + 4 * n_regs);
2930 dwarf2out_reg_save (l, i+1, real_offset + 4 * n_regs + 4);
2931 }
2932 n_regs += 2;
2933 }
2934 else
2935 {
2936 fprintf (file, "\tst\t%s, [%s+%d]\n",
2937 reg_names[i], base, offset + 4 * n_regs);
2938 if (dwarf2out_do_frame ())
2939 dwarf2out_reg_save ("", i, real_offset + 4 * n_regs);
2940 n_regs += 2;
2941 }
2942 }
2943 else
2944 {
2945 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2946 {
2947 fprintf (file, "\tst\t%s, [%s+%d]\n",
2948 reg_names[i+1], base, offset + 4 * n_regs + 4);
2949 if (dwarf2out_do_frame ())
2950 dwarf2out_reg_save ("", i + 1, real_offset + 4 * n_regs + 4);
2951 n_regs += 2;
2952 }
2953 }
2954 }
2955 }
2956 return n_regs;
2957}
2958
2959/* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2960
2961 N_REGS is the number of 4-byte regs saved thus far. This applies even to
2962 v9 int regs as it simplifies the code. */
2963
2964static int
2965restore_regs (file, low, high, base, offset, n_regs)
2966 FILE *file;
2967 int low, high;
2968 const char *base;
2969 int offset;
2970 int n_regs;
2971{
2972 int i;
2973
2974 if (TARGET_ARCH64 && high <= 32)
2975 {
2976 for (i = low; i < high; i++)
2977 {
2978 if (regs_ever_live[i] && ! call_used_regs[i])
2979 fprintf (file, "\tldx\t[%s+%d], %s\n",
2980 base, offset + 4 * n_regs, reg_names[i]),
2981 n_regs += 2;
2982 }
2983 }
2984 else
2985 {
2986 for (i = low; i < high; i += 2)
2987 {
2988 if (regs_ever_live[i] && ! call_used_regs[i])
2989 if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2990 fprintf (file, "\tldd\t[%s+%d], %s\n",
2991 base, offset + 4 * n_regs, reg_names[i]),
2992 n_regs += 2;
2993 else
2994 fprintf (file, "\tld\t[%s+%d],%s\n",
2995 base, offset + 4 * n_regs, reg_names[i]),
2996 n_regs += 2;
2997 else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2998 fprintf (file, "\tld\t[%s+%d],%s\n",
2999 base, offset + 4 * n_regs + 4, reg_names[i+1]),
3000 n_regs += 2;
3001 }
3002 }
3003 return n_regs;
3004}
3005
3006/* Static variables we want to share between prologue and epilogue. */
3007
3008/* Number of live general or floating point registers needed to be saved
3009 (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
3010static int num_gfregs;
3011
3012/* Compute the frame size required by the function. This function is called
3013 during the reload pass and also by output_function_prologue(). */
3014
3015int
3016compute_frame_size (size, leaf_function)
3017 int size;
3018 int leaf_function;
3019{
3020 int n_regs = 0, i;
3021 int outgoing_args_size = (current_function_outgoing_args_size
3022 + REG_PARM_STACK_SPACE (current_function_decl));
3023
3024 if (TARGET_EPILOGUE)
3025 {
3026 /* N_REGS is the number of 4-byte regs saved thus far. This applies
3027 even to v9 int regs to be consistent with save_regs/restore_regs. */
3028
3029 if (TARGET_ARCH64)
3030 {
3031 for (i = 0; i < 8; i++)
3032 if (regs_ever_live[i] && ! call_used_regs[i])
3033 n_regs += 2;
3034 }
3035 else
3036 {
3037 for (i = 0; i < 8; i += 2)
3038 if ((regs_ever_live[i] && ! call_used_regs[i])
3039 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3040 n_regs += 2;
3041 }
3042
3043 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3044 if ((regs_ever_live[i] && ! call_used_regs[i])
3045 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3046 n_regs += 2;
3047 }
3048
3049 /* Set up values for use in `function_epilogue'. */
3050 num_gfregs = n_regs;
3051
3052 if (leaf_function && n_regs == 0
3053 && size == 0 && current_function_outgoing_args_size == 0)
3054 {
3055 actual_fsize = apparent_fsize = 0;
3056 }
3057 else
3058 {
3059 /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
3060 The stack bias (if any) is taken out to undo its effects. */
3061 apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
3062 apparent_fsize += n_regs * 4;
3063 actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3064 }
3065
3066 /* Make sure nothing can clobber our register windows.
3067 If a SAVE must be done, or there is a stack-local variable,
3068 the register window area must be allocated.
3069 ??? For v8 we apparently need an additional 8 bytes of reserved space. */
3070 if (leaf_function == 0 || size > 0)
3071 actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3072
3073 return SPARC_STACK_ALIGN (actual_fsize);
3074}
3075
3076/* Build a (32 bit) big number in a register. */
3077/* ??? We may be able to use the set macro here too. */
3078
3079static void
3080build_big_number (file, num, reg)
3081 FILE *file;
3082 int num;
3083 const char *reg;
3084{
3085 if (num >= 0 || ! TARGET_ARCH64)
3086 {
3087 fprintf (file, "\tsethi\t%%hi(%d), %s\n", num, reg);
3088 if ((num & 0x3ff) != 0)
3089 fprintf (file, "\tor\t%s, %%lo(%d), %s\n", reg, num, reg);
3090 }
3091 else /* num < 0 && TARGET_ARCH64 */
3092 {
3093 /* Sethi does not sign extend, so we must use a little trickery
3094 to use it for negative numbers. Invert the constant before
3095 loading it in, then use xor immediate to invert the loaded bits
3096 (along with the upper 32 bits) to the desired constant. This
3097 works because the sethi and immediate fields overlap. */
3098 int asize = num;
3099 int inv = ~asize;
3100 int low = -0x400 + (asize & 0x3FF);
3101
3102 fprintf (file, "\tsethi\t%%hi(%d), %s\n\txor\t%s, %d, %s\n",
3103 inv, reg, reg, low, reg);
3104 }
3105}
3106
3107/* Output code for the function prologue. */
3108
3109void
3110output_function_prologue (file, size, leaf_function)
3111 FILE *file;
3112 int size;
3113 int leaf_function;
3114{
3115 /* Need to use actual_fsize, since we are also allocating
3116 space for our callee (and our own register save area). */
3117 actual_fsize = compute_frame_size (size, leaf_function);
3118
3119 if (leaf_function)
3120 {
3121 frame_base_name = "%sp";
3122 frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3123 }
3124 else
3125 {
3126 frame_base_name = "%fp";
3127 frame_base_offset = SPARC_STACK_BIAS;
3128 }
3129
3130 /* This is only for the human reader. */
3131 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
3132
3133 if (actual_fsize == 0)
3134 /* do nothing. */ ;
3135 else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
3136 {
3137 if (actual_fsize <= 4096)
3138 fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
3139 else if (actual_fsize <= 8192)
3140 {
3141 fprintf (file, "\tsave\t%%sp, -4096, %%sp\n");
3142 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3143 }
3144 else
3145 {
3146 build_big_number (file, -actual_fsize, "%g1");
3147 fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
3148 }
3149 }
3150 else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
3151 {
3152 /* We assume the environment will properly handle or otherwise avoid
3153 trouble associated with an interrupt occurring after the `save' or
3154 trap occurring during it. */
3155 fprintf (file, "\tsave\n");
3156
3157 if (actual_fsize <= 4096)
3158 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
3159 else if (actual_fsize <= 8192)
3160 {
3161 fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
3162 fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
3163 }
3164 else
3165 {
3166 build_big_number (file, -actual_fsize, "%g1");
3167 fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
3168 }
3169 }
3170 else /* leaf function */
3171 {
3172 if (actual_fsize <= 4096)
3173 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize);
3174 else if (actual_fsize <= 8192)
3175 {
3176 fprintf (file, "\tadd\t%%sp, -4096, %%sp\n");
3177 fprintf (file, "\tadd\t%%sp, -%d, %%sp\n", actual_fsize - 4096);
3178 }
3179 else
3180 {
3181 build_big_number (file, -actual_fsize, "%g1");
3182 fprintf (file, "\tadd\t%%sp, %%g1, %%sp\n");
3183 }
3184 }
3185
3186 if (dwarf2out_do_frame () && actual_fsize)
3187 {
3188 char *label = dwarf2out_cfi_label ();
3189
3190 /* The canonical frame address refers to the top of the frame. */
3191 dwarf2out_def_cfa (label, (leaf_function ? STACK_POINTER_REGNUM
3192 : FRAME_POINTER_REGNUM),
3193 frame_base_offset);
3194
3195 if (! leaf_function)
3196 {
3197 /* Note the register window save. This tells the unwinder that
3198 it needs to restore the window registers from the previous
3199 frame's window save area at 0(cfa). */
3200 dwarf2out_window_save (label);
3201
3202 /* The return address (-8) is now in %i7. */
3203 dwarf2out_return_reg (label, 31);
3204 }
3205 }
3206
3207 /* If doing anything with PIC, do it now. */
3208 if (! flag_pic)
3209 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
3210
3211 /* Call saved registers are saved just above the outgoing argument area. */
3212 if (num_gfregs)
3213 {
3214 int offset, real_offset, n_regs;
3215 const char *base;
3216
3217 real_offset = -apparent_fsize;
3218 offset = -apparent_fsize + frame_base_offset;
3219 if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3220 {
3221 /* ??? This might be optimized a little as %g1 might already have a
3222 value close enough that a single add insn will do. */
3223 /* ??? Although, all of this is probably only a temporary fix
3224 because if %g1 can hold a function result, then
3225 output_function_epilogue will lose (the result will get
3226 clobbered). */
3227 build_big_number (file, offset, "%g1");
3228 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3229 base = "%g1";
3230 offset = 0;
3231 }
3232 else
3233 {
3234 base = frame_base_name;
3235 }
3236
3237 n_regs = 0;
3238 if (TARGET_EPILOGUE && ! leaf_function)
3239 /* ??? Originally saved regs 0-15 here. */
3240 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3241 else if (leaf_function)
3242 /* ??? Originally saved regs 0-31 here. */
3243 n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
3244 if (TARGET_EPILOGUE)
3245 save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
3246 real_offset);
3247 }
3248
3249 leaf_label = 0;
3250 if (leaf_function && actual_fsize != 0)
3251 {
3252 /* warning ("leaf procedure with frame size %d", actual_fsize); */
3253 if (! TARGET_EPILOGUE)
3254 leaf_label = gen_label_rtx ();
3255 }
3256}
3257
3258/* Output code for the function epilogue. */
3259
3260void
3261output_function_epilogue (file, size, leaf_function)
3262 FILE *file;
3263 int size ATTRIBUTE_UNUSED;
3264 int leaf_function;
3265{
3266 const char *ret;
3267
3268 if (leaf_label)
3269 {
3270 emit_label_after (leaf_label, get_last_insn ());
3271 final_scan_insn (get_last_insn (), file, 0, 0, 1);
3272 }
3273
3274#ifdef FUNCTION_BLOCK_PROFILER_EXIT
3275 else if (profile_block_flag == 2)
3276 {
3277 FUNCTION_BLOCK_PROFILER_EXIT(file);
3278 }
3279#endif
3280
3281 else if (current_function_epilogue_delay_list == 0)
3282 {
3283 /* If code does not drop into the epilogue, we need
3284 do nothing except output pending case vectors. */
3285 rtx insn = get_last_insn ();
3286 if (GET_CODE (insn) == NOTE)
3287 insn = prev_nonnote_insn (insn);
3288 if (insn && GET_CODE (insn) == BARRIER)
3289 goto output_vectors;
3290 }
3291
3292 /* Restore any call saved registers. */
3293 if (num_gfregs)
3294 {
3295 int offset, n_regs;
3296 const char *base;
3297
3298 offset = -apparent_fsize + frame_base_offset;
3299 if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3300 {
3301 build_big_number (file, offset, "%g1");
3302 fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
3303 base = "%g1";
3304 offset = 0;
3305 }
3306 else
3307 {
3308 base = frame_base_name;
3309 }
3310
3311 n_regs = 0;
3312 if (TARGET_EPILOGUE && ! leaf_function)
3313 /* ??? Originally saved regs 0-15 here. */
3314 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3315 else if (leaf_function)
3316 /* ??? Originally saved regs 0-31 here. */
3317 n_regs = restore_regs (file, 0, 8, base, offset, 0);
3318 if (TARGET_EPILOGUE)
3319 restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3320 }
3321
3322 /* Work out how to skip the caller's unimp instruction if required. */
3323 if (leaf_function)
3324 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%o7+12" : "retl");
3325 else
3326 ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
3327
3328 if (TARGET_EPILOGUE || leaf_label)
3329 {
3330 int old_target_epilogue = TARGET_EPILOGUE;
3331 target_flags &= ~old_target_epilogue;
3332
3333 if (! leaf_function)
3334 {
3335 /* If we wound up with things in our delay slot, flush them here. */
3336 if (current_function_epilogue_delay_list)
3337 {
3338 rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
3339 get_last_insn ());
3340 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
3341 gen_rtvec (2,
3342 PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3343 PATTERN (insn)));
3344 final_scan_insn (insn, file, 1, 0, 1);
3345 }
3346 else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
3347 fputs ("\treturn\t%i7+8\n\tnop\n", file);
3348 else
3349 fprintf (file, "\t%s\n\trestore\n", ret);
3350 }
3351 /* All of the following cases are for leaf functions. */
3352 else if (current_function_epilogue_delay_list)
3353 {
3354 /* eligible_for_epilogue_delay_slot ensures that if this is a
3355 leaf function, then we will only have insn in the delay slot
3356 if the frame size is zero, thus no adjust for the stack is
3357 needed here. */
3358 if (actual_fsize != 0)
3359 abort ();
3360 fprintf (file, "\t%s\n", ret);
3361 final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3362 file, 1, 0, 1);
3363 }
3364 /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3365 avoid generating confusing assembly language output. */
3366 else if (actual_fsize == 0)
3367 fprintf (file, "\t%s\n\tnop\n", ret);
3368 else if (actual_fsize <= 4096)
3369 fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
3370 else if (actual_fsize <= 8192)
3371 fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
3372 ret, actual_fsize - 4096);
3373 else if ((actual_fsize & 0x3ff) == 0)
3374 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3375 actual_fsize, ret);
3376 else
3377 fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
3378 actual_fsize, actual_fsize, ret);
3379 target_flags |= old_target_epilogue;
3380 }
3381
3382 output_vectors:
3383 sparc_output_deferred_case_vectors ();
3384}
3385
3386/* Functions for handling argument passing.
3387
3388 For v8 the first six args are normally in registers and the rest are
3389 pushed. Any arg that starts within the first 6 words is at least
3390 partially passed in a register unless its data type forbids.
3391
3392 For v9, the argument registers are laid out as an array of 16 elements
3393 and arguments are added sequentially. The first 6 int args and up to the
3394 first 16 fp args (depending on size) are passed in regs.
3395
3396 Slot Stack Integral Float Float in structure Double Long Double
3397 ---- ----- -------- ----- ------------------ ------ -----------
3398 15 [SP+248] %f31 %f30,%f31 %d30
3399 14 [SP+240] %f29 %f28,%f29 %d28 %q28
3400 13 [SP+232] %f27 %f26,%f27 %d26
3401 12 [SP+224] %f25 %f24,%f25 %d24 %q24
3402 11 [SP+216] %f23 %f22,%f23 %d22
3403 10 [SP+208] %f21 %f20,%f21 %d20 %q20
3404 9 [SP+200] %f19 %f18,%f19 %d18
3405 8 [SP+192] %f17 %f16,%f17 %d16 %q16
3406 7 [SP+184] %f15 %f14,%f15 %d14
3407 6 [SP+176] %f13 %f12,%f13 %d12 %q12
3408 5 [SP+168] %o5 %f11 %f10,%f11 %d10
3409 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
3410 3 [SP+152] %o3 %f7 %f6,%f7 %d6
3411 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
3412 1 [SP+136] %o1 %f3 %f2,%f3 %d2
3413 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
3414
3415 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
3416
3417 Integral arguments are always passed as 64 bit quantities appropriately
3418 extended.
3419
3420 Passing of floating point values is handled as follows.
3421 If a prototype is in scope:
3422 If the value is in a named argument (i.e. not a stdarg function or a
3423 value not part of the `...') then the value is passed in the appropriate
3424 fp reg.
3425 If the value is part of the `...' and is passed in one of the first 6
3426 slots then the value is passed in the appropriate int reg.
3427 If the value is part of the `...' and is not passed in one of the first 6
3428 slots then the value is passed in memory.
3429 If a prototype is not in scope:
3430 If the value is one of the first 6 arguments the value is passed in the
3431 appropriate integer reg and the appropriate fp reg.
3432 If the value is not one of the first 6 arguments the value is passed in
3433 the appropriate fp reg and in memory.
3434 */
3435
3436/* Maximum number of int regs for args. */
3437#define SPARC_INT_ARG_MAX 6
3438/* Maximum number of fp regs for args. */
3439#define SPARC_FP_ARG_MAX 16
3440
3441#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
3442
3443/* Handle the INIT_CUMULATIVE_ARGS macro.
3444 Initialize a variable CUM of type CUMULATIVE_ARGS
3445 for a call to a function whose data type is FNTYPE.
3446 For a library call, FNTYPE is 0. */
3447
3448void
3449init_cumulative_args (cum, fntype, libname, indirect)
3450 CUMULATIVE_ARGS *cum;
3451 tree fntype;
3452 tree libname ATTRIBUTE_UNUSED;
3453 int indirect ATTRIBUTE_UNUSED;
3454{
3455 cum->words = 0;
3456 cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
3457 cum->libcall_p = fntype == 0;
3458}
3459
3460/* Compute the slot number to pass an argument in.
3461 Returns the slot number or -1 if passing on the stack.
3462
3463 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3464 the preceding args and about the function being called.
3465 MODE is the argument's machine mode.
3466 TYPE is the data type of the argument (as a tree).
3467 This is null for libcalls where that information may
3468 not be available.
3469 NAMED is nonzero if this argument is a named parameter
3470 (otherwise it is an extra parameter matching an ellipsis).
3471 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
3472 *PREGNO records the register number to use if scalar type.
3473 *PPADDING records the amount of padding needed in words. */
3474
3475static int
3476function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
3477 const CUMULATIVE_ARGS *cum;
3478 enum machine_mode mode;
3479 tree type;
3480 int named;
3481 int incoming_p;
3482 int *pregno;
3483 int *ppadding;
3484{
3485 int regbase = (incoming_p
3486 ? SPARC_INCOMING_INT_ARG_FIRST
3487 : SPARC_OUTGOING_INT_ARG_FIRST);
3488 int slotno = cum->words;
3489 int regno;
3490
3491 *ppadding = 0;
3492
3493 if (type != 0 && TREE_ADDRESSABLE (type))
3494 return -1;
3495 if (TARGET_ARCH32
3496 && type != 0 && mode == BLKmode
3497 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
3498 return -1;
3499
3500 switch (mode)
3501 {
3502 case VOIDmode :
3503 /* MODE is VOIDmode when generating the actual call.
3504 See emit_call_1. */
3505 return -1;
3506
3507 case QImode : case CQImode :
3508 case HImode : case CHImode :
3509 case SImode : case CSImode :
3510 case DImode : case CDImode :
3511 if (slotno >= SPARC_INT_ARG_MAX)
3512 return -1;
3513 regno = regbase + slotno;
3514 break;
3515
3516 case SFmode : case SCmode :
3517 case DFmode : case DCmode :
3518 case TFmode : case TCmode :
3519 if (TARGET_ARCH32)
3520 {
3521 if (slotno >= SPARC_INT_ARG_MAX)
3522 return -1;
3523 regno = regbase + slotno;
3524 }
3525 else
3526 {
3527 if ((mode == TFmode || mode == TCmode)
3528 && (slotno & 1) != 0)
3529 slotno++, *ppadding = 1;
3530 if (TARGET_FPU && named)
3531 {
3532 if (slotno >= SPARC_FP_ARG_MAX)
3533 return -1;
3534 regno = SPARC_FP_ARG_FIRST + slotno * 2;
3535 if (mode == SFmode)
3536 regno++;
3537 }
3538 else
3539 {
3540 if (slotno >= SPARC_INT_ARG_MAX)
3541 return -1;
3542 regno = regbase + slotno;
3543 }
3544 }
3545 break;
3546
3547 case BLKmode :
3548 /* For sparc64, objects requiring 16 byte alignment get it. */
3549 if (TARGET_ARCH64)
3550 {
3551 if (type && TYPE_ALIGN (type) == 128 && (slotno & 1) != 0)
3552 slotno++, *ppadding = 1;
3553 }
3554
3555 if (TARGET_ARCH32
3556 || (type && TREE_CODE (type) == UNION_TYPE))
3557 {
3558 if (slotno >= SPARC_INT_ARG_MAX)
3559 return -1;
3560 regno = regbase + slotno;
3561 }
3562 else
3563 {
3564 tree field;
3565 int intregs_p = 0, fpregs_p = 0;
3566 /* The ABI obviously doesn't specify how packed
3567 structures are passed. These are defined to be passed
3568 in int regs if possible, otherwise memory. */
3569 int packed_p = 0;
3570
3571 /* First see what kinds of registers we need. */
3572 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3573 {
3574 if (TREE_CODE (field) == FIELD_DECL)
3575 {
3576 if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3577 && TARGET_FPU)
3578 fpregs_p = 1;
3579 else
3580 intregs_p = 1;
3581 if (DECL_PACKED (field))
3582 packed_p = 1;
3583 }
3584 }
3585 if (packed_p || !named)
3586 fpregs_p = 0, intregs_p = 1;
3587
3588 /* If all arg slots are filled, then must pass on stack. */
3589 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
3590 return -1;
3591 /* If there are only int args and all int arg slots are filled,
3592 then must pass on stack. */
3593 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
3594 return -1;
3595 /* Note that even if all int arg slots are filled, fp members may
3596 still be passed in regs if such regs are available.
3597 *PREGNO isn't set because there may be more than one, it's up
3598 to the caller to compute them. */
3599 return slotno;
3600 }
3601 break;
3602
3603 default :
3604 abort ();
3605 }
3606
3607 *pregno = regno;
3608 return slotno;
3609}
3610
3611/* Handle recursive register counting for structure field layout. */
3612
3613struct function_arg_record_value_parms
3614{
3615 rtx ret;
3616 int slotno, named, regbase;
3617 int nregs, intoffset;
3618};
3619
3620static void function_arg_record_value_3
3621 PROTO((int, struct function_arg_record_value_parms *));
3622static void function_arg_record_value_2
3623 PROTO((tree, int, struct function_arg_record_value_parms *));
3624static rtx function_arg_record_value
3625 PROTO((tree, enum machine_mode, int, int, int));
3626
3627static void
3628function_arg_record_value_1 (type, startbitpos, parms)
3629 tree type;
3630 int startbitpos;
3631 struct function_arg_record_value_parms *parms;
3632{
3633 tree field;
3634
3635 /* The ABI obviously doesn't specify how packed structures are
3636 passed. These are defined to be passed in int regs if possible,
3637 otherwise memory. */
3638 int packed_p = 0;
3639
3640 /* We need to compute how many registers are needed so we can
3641 allocate the PARALLEL but before we can do that we need to know
3642 whether there are any packed fields. If there are, int regs are
3643 used regardless of whether there are fp values present. */
3644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3645 {
3646 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3647 {
3648 packed_p = 1;
3649 break;
3650 }
3651 }
3652
3653 /* Compute how many registers we need. */
3654 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3655 {
3656 if (TREE_CODE (field) == FIELD_DECL)
3657 {
3658 int bitpos = startbitpos;
3659 if (DECL_FIELD_BITPOS (field))
3660 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3661 /* ??? FIXME: else assume zero offset. */
3662
3663 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3664 {
3665 function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
3666 }
3667 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3668 && TARGET_FPU
3669 && ! packed_p
3670 && parms->named)
3671 {
3672 if (parms->intoffset != -1)
3673 {
3674 int intslots, this_slotno;
3675
3676 intslots = (bitpos - parms->intoffset + BITS_PER_WORD - 1)
3677 / BITS_PER_WORD;
3678 this_slotno = parms->slotno + parms->intoffset
3679 / BITS_PER_WORD;
3680
3681 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3682 intslots = MAX (intslots, 0);
3683 parms->nregs += intslots;
3684 parms->intoffset = -1;
3685 }
3686
3687 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
3688 If it wasn't true we wouldn't be here. */
3689 parms->nregs += 1;
3690 }
3691 else
3692 {
3693 if (parms->intoffset == -1)
3694 parms->intoffset = bitpos;
3695 }
3696 }
3697 }
3698}
3699
3700/* Handle recursive structure field register assignment. */
3701
3702static void
3703function_arg_record_value_3 (bitpos, parms)
3704 int bitpos;
3705 struct function_arg_record_value_parms *parms;
3706{
3707 enum machine_mode mode;
3708 int regno, this_slotno, intslots, intoffset;
3709 rtx reg;
3710
3711 if (parms->intoffset == -1)
3712 return;
3713 intoffset = parms->intoffset;
3714 parms->intoffset = -1;
3715
3716 intslots = (bitpos - intoffset + BITS_PER_WORD - 1) / BITS_PER_WORD;
3717 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
3718
3719 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3720 if (intslots <= 0)
3721 return;
3722
3723 /* If this is the trailing part of a word, only load that much into
3724 the register. Otherwise load the whole register. Note that in
3725 the latter case we may pick up unwanted bits. It's not a problem
3726 at the moment but may wish to revisit. */
3727
3728 if (intoffset % BITS_PER_WORD != 0)
3729 {
3730 mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
3731 MODE_INT, 0);
3732 }
3733 else
3734 mode = word_mode;
3735
3736 intoffset /= BITS_PER_UNIT;
3737 do
3738 {
3739 regno = parms->regbase + this_slotno;
3740 reg = gen_rtx_REG (mode, regno);
3741 XVECEXP (parms->ret, 0, parms->nregs)
3742 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
3743
3744 this_slotno += 1;
3745 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
3746 parms->nregs += 1;
3747 intslots -= 1;
3748 }
3749 while (intslots > 0);
3750}
3751
3752static void
3753function_arg_record_value_2 (type, startbitpos, parms)
3754 tree type;
3755 int startbitpos;
3756 struct function_arg_record_value_parms *parms;
3757{
3758 tree field;
3759 int packed_p = 0;
3760
3761 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3762 {
3763 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
3764 {
3765 packed_p = 1;
3766 break;
3767 }
3768 }
3769
3770 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3771 {
3772 if (TREE_CODE (field) == FIELD_DECL)
3773 {
3774 int bitpos = startbitpos;
3775 if (DECL_FIELD_BITPOS (field))
3776 bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
3777 /* ??? FIXME: else assume zero offset. */
3778
3779 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
3780 {
3781 function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
3782 }
3783 else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
3784 && TARGET_FPU
3785 && ! packed_p
3786 && parms->named)
3787 {
3788 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
3789 rtx reg;
3790
3791 function_arg_record_value_3 (bitpos, parms);
3792
3793 reg = gen_rtx_REG (DECL_MODE (field),
3794 (SPARC_FP_ARG_FIRST + this_slotno * 2
3795 + (DECL_MODE (field) == SFmode
3796 && (bitpos & 32) != 0)));
3797 XVECEXP (parms->ret, 0, parms->nregs)
3798 = gen_rtx_EXPR_LIST (VOIDmode, reg,
3799 GEN_INT (bitpos / BITS_PER_UNIT));
3800 parms->nregs += 1;
3801 }
3802 else
3803 {
3804 if (parms->intoffset == -1)
3805 parms->intoffset = bitpos;
3806 }
3807 }
3808 }
3809}
3810
3811static rtx
3812function_arg_record_value (type, mode, slotno, named, regbase)
3813 tree type;
3814 enum machine_mode mode;
3815 int slotno, named, regbase;
3816{
3817 HOST_WIDE_INT typesize = int_size_in_bytes (type);
3818 struct function_arg_record_value_parms parms;
3819 int nregs;
3820
3821 parms.ret = NULL_RTX;
3822 parms.slotno = slotno;
3823 parms.named = named;
3824 parms.regbase = regbase;
3825
3826 /* Compute how many registers we need. */
3827 parms.nregs = 0;
3828 parms.intoffset = 0;
3829 function_arg_record_value_1 (type, 0, &parms);
3830
3831 if (parms.intoffset != -1)
3832 {
3833 int intslots, this_slotno;
3834
3835 intslots = (typesize*BITS_PER_UNIT - parms.intoffset + BITS_PER_WORD - 1)
3836 / BITS_PER_WORD;
3837 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
3838
3839 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
3840 intslots = MAX (intslots, 0);
3841
3842 parms.nregs += intslots;
3843 }
3844 nregs = parms.nregs;
3845
3846 /* Allocate the vector and handle some annoying special cases. */
3847 if (nregs == 0)
3848 {
3849 /* ??? Empty structure has no value? Duh? */
3850 if (typesize <= 0)
3851 {
3852 /* Though there's nothing really to store, return a word register
3853 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
3854 leads to breakage due to the fact that there are zero bytes to
3855 load. */
3856 return gen_rtx_REG (mode, regbase);
3857 }
3858 else
3859 {
3860 /* ??? C++ has structures with no fields, and yet a size. Give up
3861 for now and pass everything back in integer registers. */
3862 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3863 }
3864 if (nregs + slotno > SPARC_INT_ARG_MAX)
3865 nregs = SPARC_INT_ARG_MAX - slotno;
3866 }
3867 if (nregs == 0)
3868 abort ();
3869
3870 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3871
3872 /* Fill in the entries. */
3873 parms.nregs = 0;
3874 parms.intoffset = 0;
3875 function_arg_record_value_2 (type, 0, &parms);
3876 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
3877
3878 if (parms.nregs != nregs)
3879 abort ();
3880
3881 return parms.ret;
3882}
3883
3884/* Handle the FUNCTION_ARG macro.
3885 Determine where to put an argument to a function.
3886 Value is zero to push the argument on the stack,
3887 or a hard register in which to store the argument.
3888
3889 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3890 the preceding args and about the function being called.
3891 MODE is the argument's machine mode.
3892 TYPE is the data type of the argument (as a tree).
3893 This is null for libcalls where that information may
3894 not be available.
3895 NAMED is nonzero if this argument is a named parameter
3896 (otherwise it is an extra parameter matching an ellipsis).
3897 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. */
3898
3899rtx
3900function_arg (cum, mode, type, named, incoming_p)
3901 const CUMULATIVE_ARGS *cum;
3902 enum machine_mode mode;
3903 tree type;
3904 int named;
3905 int incoming_p;
3906{
3907 int regbase = (incoming_p
3908 ? SPARC_INCOMING_INT_ARG_FIRST
3909 : SPARC_OUTGOING_INT_ARG_FIRST);
3910 int slotno, regno, padding;
3911 rtx reg;
3912
3913 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
3914 &regno, &padding);
3915
3916 if (slotno == -1)
3917 return 0;
3918
3919 if (TARGET_ARCH32)
3920 {
3921 reg = gen_rtx_REG (mode, regno);
3922 return reg;
3923 }
3924
3925 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
3926 but also have the slot allocated for them.
3927 If no prototype is in scope fp values in register slots get passed
3928 in two places, either fp regs and int regs or fp regs and memory. */
3929 if ((GET_MODE_CLASS (mode) == MODE_FLOAT
3930 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3931 && SPARC_FP_REG_P (regno))
3932 {
3933 reg = gen_rtx_REG (mode, regno);
3934 if (cum->prototype_p || cum->libcall_p)
3935 {
3936 /* "* 2" because fp reg numbers are recorded in 4 byte
3937 quantities. */
3938#if 0
3939 /* ??? This will cause the value to be passed in the fp reg and
3940 in the stack. When a prototype exists we want to pass the
3941 value in the reg but reserve space on the stack. That's an
3942 optimization, and is deferred [for a bit]. */
3943 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
3944 return gen_rtx_PARALLEL (mode,
3945 gen_rtvec (2,
3946 gen_rtx_EXPR_LIST (VOIDmode,
3947 NULL_RTX, const0_rtx),
3948 gen_rtx_EXPR_LIST (VOIDmode,
3949 reg, const0_rtx)));
3950 else
3951#else
3952 /* ??? It seems that passing back a register even when past
3953 the area declared by REG_PARM_STACK_SPACE will allocate
3954 space appropriately, and will not copy the data onto the
3955 stack, exactly as we desire.
3956
3957 This is due to locate_and_pad_parm being called in
3958 expand_call whenever reg_parm_stack_space > 0, which
3959 while benefical to our example here, would seem to be
3960 in error from what had been intended. Ho hum... -- r~ */
3961#endif
3962 return reg;
3963 }
3964 else
3965 {
3966 rtx v0, v1;
3967
3968 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
3969 {
3970 int intreg;
3971
3972 /* On incoming, we don't need to know that the value
3973 is passed in %f0 and %i0, and it confuses other parts
3974 causing needless spillage even on the simplest cases. */
3975 if (incoming_p)
3976 return reg;
3977
3978 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
3979 + (regno - SPARC_FP_ARG_FIRST) / 2);
3980
3981 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3982 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
3983 const0_rtx);
3984 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3985 }
3986 else
3987 {
3988 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
3989 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
3990 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
3991 }
3992 }
3993 }
3994 else if (type && TREE_CODE (type) == RECORD_TYPE)
3995 {
3996 /* Structures up to 16 bytes in size are passed in arg slots on the
3997 stack and are promoted to registers where possible. */
3998
3999 if (int_size_in_bytes (type) > 16)
4000 abort (); /* shouldn't get here */
4001
4002 return function_arg_record_value (type, mode, slotno, named, regbase);
4003 }
4004 else if (type && TREE_CODE (type) == UNION_TYPE)
4005 {
4006 enum machine_mode mode;
4007 int bytes = int_size_in_bytes (type);
4008
4009 if (bytes > 16)
4010 abort ();
4011
4012 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4013 reg = gen_rtx_REG (mode, regno);
4014 }
4015 else
4016 {
4017 /* Scalar or complex int. */
4018 reg = gen_rtx_REG (mode, regno);
4019 }
4020
4021 return reg;
4022}
4023
4024/* Handle the FUNCTION_ARG_PARTIAL_NREGS macro.
4025 For an arg passed partly in registers and partly in memory,
4026 this is the number of registers used.
4027 For args passed entirely in registers or entirely in memory, zero.
4028
4029 Any arg that starts in the first 6 regs but won't entirely fit in them
4030 needs partial registers on v8. On v9, structures with integer
4031 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
4032 values that begin in the last fp reg [where "last fp reg" varies with the
4033 mode] will be split between that reg and memory. */
4034
4035int
4036function_arg_partial_nregs (cum, mode, type, named)
4037 const CUMULATIVE_ARGS *cum;
4038 enum machine_mode mode;
4039 tree type;
4040 int named;
4041{
4042 int slotno, regno, padding;
4043
4044 /* We pass 0 for incoming_p here, it doesn't matter. */
4045 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4046
4047 if (slotno == -1)
4048 return 0;
4049
4050 if (TARGET_ARCH32)
4051 {
4052 if ((slotno + (mode == BLKmode
4053 ? ROUND_ADVANCE (int_size_in_bytes (type))
4054 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
4055 > NPARM_REGS (SImode))
4056 return NPARM_REGS (SImode) - slotno;
4057 return 0;
4058 }
4059 else
4060 {
4061 if (type && AGGREGATE_TYPE_P (type))
4062 {
4063 int size = int_size_in_bytes (type);
4064 int align = TYPE_ALIGN (type);
4065
4066 if (align == 16)
4067 slotno += slotno & 1;
4068 if (size > 8 && size <= 16
4069 && slotno == SPARC_INT_ARG_MAX - 1)
4070 return 1;
4071 }
4072 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
4073 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4074 && ! TARGET_FPU))
4075 {
4076 if (GET_MODE_ALIGNMENT (mode) == 128)
4077 {
4078 slotno += slotno & 1;
4079 if (slotno == SPARC_INT_ARG_MAX - 2)
4080 return 1;
4081 }
4082 else
4083 {
4084 if (slotno == SPARC_INT_ARG_MAX - 1)
4085 return 1;
4086 }
4087 }
4088 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4089 {
4090 if (GET_MODE_ALIGNMENT (mode) == 128)
4091 slotno += slotno & 1;
4092 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
4093 > SPARC_FP_ARG_MAX)
4094 return 1;
4095 }
4096 return 0;
4097 }
4098}
4099
4100/* Handle the FUNCTION_ARG_PASS_BY_REFERENCE macro.
4101 !v9: The SPARC ABI stipulates passing struct arguments (of any size) and
4102 quad-precision floats by invisible reference.
4103 v9: Aggregates greater than 16 bytes are passed by reference.
4104 For Pascal, also pass arrays by reference. */
4105
4106int
4107function_arg_pass_by_reference (cum, mode, type, named)
4108 const CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
4109 enum machine_mode mode;
4110 tree type;
4111 int named ATTRIBUTE_UNUSED;
4112{
4113 if (TARGET_ARCH32)
4114 {
4115 return ((type && AGGREGATE_TYPE_P (type))
4116 || mode == TFmode || mode == TCmode);
4117 }
4118 else
4119 {
4120 return ((type && TREE_CODE (type) == ARRAY_TYPE)
4121 /* Consider complex values as aggregates, so care for TCmode. */
4122 || GET_MODE_SIZE (mode) > 16
4123 || (type && AGGREGATE_TYPE_P (type)
4124 && int_size_in_bytes (type) > 16));
4125 }
4126}
4127
4128/* Handle the FUNCTION_ARG_ADVANCE macro.
4129 Update the data in CUM to advance over an argument
4130 of mode MODE and data type TYPE.
4131 TYPE is null for libcalls where that information may not be available. */
4132
4133void
4134function_arg_advance (cum, mode, type, named)
4135 CUMULATIVE_ARGS *cum;
4136 enum machine_mode mode;
4137 tree type;
4138 int named;
4139{
4140 int slotno, regno, padding;
4141
4142 /* We pass 0 for incoming_p here, it doesn't matter. */
4143 slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
4144
4145 /* If register required leading padding, add it. */
4146 if (slotno != -1)
4147 cum->words += padding;
4148
4149 if (TARGET_ARCH32)
4150 {
4151 cum->words += (mode != BLKmode
4152 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4153 : ROUND_ADVANCE (int_size_in_bytes (type)));
4154 }
4155 else
4156 {
4157 if (type && AGGREGATE_TYPE_P (type))
4158 {
4159 int size = int_size_in_bytes (type);
4160
4161 if (size <= 8)
4162 ++cum->words;
4163 else if (size <= 16)
4164 cum->words += 2;
4165 else /* passed by reference */
4166 ++cum->words;
4167 }
4168 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
4169 {
4170 cum->words += 2;
4171 }
4172 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4173 {
4174 cum->words += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
4175 }
4176 else
4177 {
4178 cum->words += (mode != BLKmode
4179 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
4180 : ROUND_ADVANCE (int_size_in_bytes (type)));
4181 }
4182 }
4183}
4184
4185/* Handle the FUNCTION_ARG_PADDING macro.
4186 For the 64 bit ABI structs are always stored left shifted in their
4187 argument slot. */
4188
4189enum direction
4190function_arg_padding (mode, type)
4191 enum machine_mode mode;
4192 tree type;
4193{
4194 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
4195 return upward;
4196
4197 /* This is the default definition. */
4198 return (! BYTES_BIG_ENDIAN
4199 ? upward
4200 : ((mode == BLKmode
4201 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
4202 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
4203 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
4204 ? downward : upward));
4205}
4206
4207/* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
4208 For v9, function return values are subject to the same rules as arguments,
4209 except that up to 32-bytes may be returned in registers. */
4210
4211rtx
4212function_value (type, mode, incoming_p)
4213 tree type;
4214 enum machine_mode mode;
4215 int incoming_p;
4216{
4217 int regno;
4218 int regbase = (incoming_p
4219 ? SPARC_OUTGOING_INT_ARG_FIRST
4220 : SPARC_INCOMING_INT_ARG_FIRST);
4221
4222 if (TARGET_ARCH64 && type)
4223 {
4224 if (TREE_CODE (type) == RECORD_TYPE)
4225 {
4226 /* Structures up to 32 bytes in size are passed in registers,
4227 promoted to fp registers where possible. */
4228
4229 if (int_size_in_bytes (type) > 32)
4230 abort (); /* shouldn't get here */
4231
4232 return function_arg_record_value (type, mode, 0, 1, regbase);
4233 }
4234 else if (TREE_CODE (type) == UNION_TYPE)
4235 {
4236 int bytes = int_size_in_bytes (type);
4237
4238 if (bytes > 32)
4239 abort ();
4240
4241 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
4242 }
4243 }
4244
4245 if (TARGET_ARCH64
4246 && GET_MODE_CLASS (mode) == MODE_INT
4247 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
4248 && type && TREE_CODE (type) != UNION_TYPE)
4249 mode = DImode;
4250
4251 if (incoming_p)
4252 regno = BASE_RETURN_VALUE_REG (mode);
4253 else
4254 regno = BASE_OUTGOING_VALUE_REG (mode);
4255
4256 return gen_rtx_REG (mode, regno);
4257}
4258
4259/* Do what is necessary for `va_start'. The argument is ignored.
4260
4261 We look at the current function to determine if stdarg or varargs
4262 is used and return the address of the first unnamed parameter. */
4263
4264rtx
4265sparc_builtin_saveregs (arglist)
4266 tree arglist ATTRIBUTE_UNUSED;
4267{
4268 int first_reg = current_function_args_info.words;
4269 rtx address;
4270 int regno;
4271
4272 for (regno = first_reg; regno < NPARM_REGS (word_mode); regno++)
4273 emit_move_insn (gen_rtx_MEM (word_mode,
4274 gen_rtx_PLUS (Pmode,
4275 frame_pointer_rtx,
4276 GEN_INT (STACK_POINTER_OFFSET
4277 + UNITS_PER_WORD * regno))),
4278 gen_rtx_REG (word_mode,
4279 BASE_INCOMING_ARG_REG (word_mode) + regno));
4280
4281 address = gen_rtx_PLUS (Pmode,
4282 frame_pointer_rtx,
4283 GEN_INT (STACK_POINTER_OFFSET
4284 + UNITS_PER_WORD * first_reg));
4285
4286 if (current_function_check_memory_usage
4287 && first_reg < NPARM_REGS (word_mode))
4288 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4289 address, ptr_mode,
4290 GEN_INT (UNITS_PER_WORD
4291 * (NPARM_REGS (word_mode) - first_reg)),
4292 TYPE_MODE (sizetype), GEN_INT (MEMORY_USE_RW),
4293 TYPE_MODE (integer_type_node));
4294
4295 return address;
4296}
4297
4298/* Return the string to output a conditional branch to LABEL, which is
4299 the operand number of the label. OP is the conditional expression.
4300 XEXP (OP, 0) is assumed to be a condition code register (integer or
4301 floating point) and its mode specifies what kind of comparison we made.
4302
4303 REVERSED is non-zero if we should reverse the sense of the comparison.
4304
4305 ANNUL is non-zero if we should generate an annulling branch.
4306
4307 NOOP is non-zero if we have to follow this branch by a noop.
4308
4309 INSN, if set, is the insn. */
4310
4311char *
4312output_cbranch (op, label, reversed, annul, noop, insn)
4313 rtx op;
4314 int label;
4315 int reversed, annul, noop;
4316 rtx insn;
4317{
4318 static char string[32];
4319 enum rtx_code code = GET_CODE (op);
4320 rtx cc_reg = XEXP (op, 0);
4321 enum machine_mode mode = GET_MODE (cc_reg);
4322 static char v8_labelno[] = "%lX";
4323 static char v9_icc_labelno[] = "%%icc, %lX";
4324 static char v9_xcc_labelno[] = "%%xcc, %lX";
4325 static char v9_fcc_labelno[] = "%%fccX, %lY";
4326 char *labelno;
4327 int labeloff, spaces = 8;
4328
4329 /* ??? !v9: FP branches cannot be preceded by another floating point insn.
4330 Because there is currently no concept of pre-delay slots, we can fix
4331 this only by always emitting a nop before a floating point branch. */
4332
4333 if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
4334 strcpy (string, "nop\n\t");
4335 else
4336 string[0] = '\0';
4337
4338 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4339 if (reversed
4340 && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
4341 code = reverse_condition (code), reversed = 0;
4342
4343 /* Start by writing the branch condition. */
4344 switch (code)
4345 {
4346 case NE:
4347 if (mode == CCFPmode || mode == CCFPEmode)
4348 {
4349 strcat (string, "fbne");
4350 spaces -= 4;
4351 }
4352 else
4353 {
4354 strcpy (string, "bne");
4355 spaces -= 3;
4356 }
4357 break;
4358
4359 case EQ:
4360 if (mode == CCFPmode || mode == CCFPEmode)
4361 {
4362 strcat (string, "fbe");
4363 spaces -= 3;
4364 }
4365 else
4366 {
4367 strcpy (string, "be");
4368 spaces -= 2;
4369 }
4370 break;
4371
4372 case GE:
4373 if (mode == CCFPmode || mode == CCFPEmode)
4374 {
4375 if (reversed)
4376 strcat (string, "fbul");
4377 else
4378 strcat (string, "fbge");
4379 spaces -= 4;
4380 }
4381 else if (mode == CC_NOOVmode)
4382 {
4383 strcpy (string, "bpos");
4384 spaces -= 4;
4385 }
4386 else
4387 {
4388 strcpy (string, "bge");
4389 spaces -= 3;
4390 }
4391 break;
4392
4393 case GT:
4394 if (mode == CCFPmode || mode == CCFPEmode)
4395 {
4396 if (reversed)
4397 {
4398 strcat (string, "fbule");
4399 spaces -= 5;
4400 }
4401 else
4402 {
4403 strcat (string, "fbg");
4404 spaces -= 3;
4405 }
4406 }
4407 else
4408 {
4409 strcpy (string, "bg");
4410 spaces -= 2;
4411 }
4412 break;
4413
4414 case LE:
4415 if (mode == CCFPmode || mode == CCFPEmode)
4416 {
4417 if (reversed)
4418 strcat (string, "fbug");
4419 else
4420 strcat (string, "fble");
4421 spaces -= 4;
4422 }
4423 else
4424 {
4425 strcpy (string, "ble");
4426 spaces -= 3;
4427 }
4428 break;
4429
4430 case LT:
4431 if (mode == CCFPmode || mode == CCFPEmode)
4432 {
4433 if (reversed)
4434 {
4435 strcat (string, "fbuge");
4436 spaces -= 5;
4437 }
4438 else
4439 {
4440 strcat (string, "fbl");
4441 spaces -= 3;
4442 }
4443 }
4444 else if (mode == CC_NOOVmode)
4445 {
4446 strcpy (string, "bneg");
4447 spaces -= 4;
4448 }
4449 else
4450 {
4451 strcpy (string, "bl");
4452 spaces -= 2;
4453 }
4454 break;
4455
4456 case GEU:
4457 strcpy (string, "bgeu");
4458 spaces -= 4;
4459 break;
4460
4461 case GTU:
4462 strcpy (string, "bgu");
4463 spaces -= 3;
4464 break;
4465
4466 case LEU:
4467 strcpy (string, "bleu");
4468 spaces -= 4;
4469 break;
4470
4471 case LTU:
4472 strcpy (string, "blu");
4473 spaces -= 3;
4474 break;
4475
4476 default:
4477 abort ();
4478 }
4479
4480 /* Now add the annulling, the label, and a possible noop. */
4481 if (annul)
4482 {
4483 strcat (string, ",a");
4484 spaces -= 2;
4485 }
4486
4487 if (! TARGET_V9)
4488 {
4489 labeloff = 2;
4490 labelno = v8_labelno;
4491 }
4492 else
4493 {
4494 rtx note;
4495
4496 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4497 {
4498 strcat (string,
4499 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4500 spaces -= 3;
4501 }
4502
4503 labeloff = 9;
4504 if (mode == CCFPmode || mode == CCFPEmode)
4505 {
4506 labeloff = 10;
4507 labelno = v9_fcc_labelno;
4508 /* Set the char indicating the number of the fcc reg to use. */
4509 labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
4510 }
4511 else if (mode == CCXmode || mode == CCX_NOOVmode)
4512 labelno = v9_xcc_labelno;
4513 else
4514 labelno = v9_icc_labelno;
4515 }
4516 /* Set the char indicating the number of the operand containing the
4517 label_ref. */
4518 labelno[labeloff] = label + '0';
4519 if (spaces > 0)
4520 strcat (string, "\t");
4521 else
4522 strcat (string, " ");
4523 strcat (string, labelno);
4524
4525 if (noop)
4526 strcat (string, "\n\tnop");
4527
4528 return string;
4529}
4530
4531/* Return the string to output a conditional branch to LABEL, testing
4532 register REG. LABEL is the operand number of the label; REG is the
4533 operand number of the reg. OP is the conditional expression. The mode
4534 of REG says what kind of comparison we made.
4535
4536 REVERSED is non-zero if we should reverse the sense of the comparison.
4537
4538 ANNUL is non-zero if we should generate an annulling branch.
4539
4540 NOOP is non-zero if we have to follow this branch by a noop. */
4541
4542char *
4543output_v9branch (op, reg, label, reversed, annul, noop, insn)
4544 rtx op;
4545 int reg, label;
4546 int reversed, annul, noop;
4547 rtx insn;
4548{
4549 static char string[20];
4550 enum rtx_code code = GET_CODE (op);
4551 enum machine_mode mode = GET_MODE (XEXP (op, 0));
4552 static char labelno[] = "%X, %lX";
4553 rtx note;
4554 int spaces = 8;
4555
4556 /* If not floating-point or if EQ or NE, we can just reverse the code. */
4557 if (reversed)
4558 code = reverse_condition (code), reversed = 0;
4559
4560 /* Only 64 bit versions of these instructions exist. */
4561 if (mode != DImode)
4562 abort ();
4563
4564 /* Start by writing the branch condition. */
4565
4566 switch (code)
4567 {
4568 case NE:
4569 strcpy (string, "brnz");
4570 spaces -= 4;
4571 break;
4572
4573 case EQ:
4574 strcpy (string, "brz");
4575 spaces -= 3;
4576 break;
4577
4578 case GE:
4579 strcpy (string, "brgez");
4580 spaces -= 5;
4581 break;
4582
4583 case LT:
4584 strcpy (string, "brlz");
4585 spaces -= 4;
4586 break;
4587
4588 case LE:
4589 strcpy (string, "brlez");
4590 spaces -= 5;
4591 break;
4592
4593 case GT:
4594 strcpy (string, "brgz");
4595 spaces -= 4;
4596 break;
4597
4598 default:
4599 abort ();
4600 }
4601
4602 /* Now add the annulling, reg, label, and nop. */
4603 if (annul)
4604 {
4605 strcat (string, ",a");
4606 spaces -= 2;
4607 }
4608
4609 if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
4610 {
4611 strcat (string,
4612 INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
4613 spaces -= 3;
4614 }
4615
4616 labelno[1] = reg + '0';
4617 labelno[6] = label + '0';
4618 if (spaces > 0)
4619 strcat (string, "\t");
4620 else
4621 strcat (string, " ");
4622 strcat (string, labelno);
4623
4624 if (noop)
4625 strcat (string, "\n\tnop");
4626
4627 return string;
4628}
4629
4630/* Renumber registers in delay slot. Replace registers instead of
4631 renumbering because they may be shared.
4632
4633 This does not handle instructions other than move. */
4634
4635static void
4636epilogue_renumber (where)
4637 rtx *where;
4638{
4639 rtx x = *where;
4640 enum rtx_code code = GET_CODE (x);
4641
4642 switch (code)
4643 {
4644 case MEM:
4645 *where = x = copy_rtx (x);
4646 epilogue_renumber (&XEXP (x, 0));
4647 return;
4648
4649 case REG:
4650 {
4651 int regno = REGNO (x);
4652 if (regno > 8 && regno < 24)
4653 abort ();
4654 if (regno >= 24 && regno < 32)
4655 *where = gen_rtx_REG (GET_MODE (x), regno - 16);
4656 return;
4657 }
4658 case CONST_INT:
4659 case CONST_DOUBLE:
4660 case CONST:
4661 case SYMBOL_REF:
4662 case LABEL_REF:
4663 return;
4664
4665 case IOR:
4666 case AND:
4667 case XOR:
4668 case PLUS:
4669 case MINUS:
4670 epilogue_renumber (&XEXP (x, 1));
4671 case NEG:
4672 case NOT:
4673 epilogue_renumber (&XEXP (x, 0));
4674 return;
4675
4676 default:
4677 debug_rtx (*where);
4678 abort ();
4679 }
4680}
4681
4682/* Output assembler code to return from a function. */
4683
4684const char *
4685output_return (operands)
4686 rtx *operands;
4687{
4688 rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
4689
4690 if (leaf_label)
4691 {
4692 operands[0] = leaf_label;
4693 return "b%* %l0%(";
4694 }
4695 else if (current_function_uses_only_leaf_regs)
4696 {
4697 /* No delay slot in a leaf function. */
4698 if (delay)
4699 abort ();
4700
4701 /* If we didn't allocate a frame pointer for the current function,
4702 the stack pointer might have been adjusted. Output code to
4703 restore it now. */
4704
4705 operands[0] = GEN_INT (actual_fsize);
4706
4707 /* Use sub of negated value in first two cases instead of add to
4708 allow actual_fsize == 4096. */
4709
4710 if (actual_fsize <= 4096)
4711 {
4712 if (SKIP_CALLERS_UNIMP_P)
4713 return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4714 else
4715 return "retl\n\tsub\t%%sp, -%0, %%sp";
4716 }
4717 else if (actual_fsize <= 8192)
4718 {
4719 operands[0] = GEN_INT (actual_fsize - 4096);
4720 if (SKIP_CALLERS_UNIMP_P)
4721 return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
4722 else
4723 return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
4724 }
4725 else if (SKIP_CALLERS_UNIMP_P)
4726 {
4727 if ((actual_fsize & 0x3ff) != 0)
4728 return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4729 else
4730 return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
4731 }
4732 else
4733 {
4734 if ((actual_fsize & 0x3ff) != 0)
4735 return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4736 else
4737 return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
4738 }
4739 }
4740 else if (TARGET_V9)
4741 {
4742 if (delay)
4743 {
4744 epilogue_renumber (&SET_DEST (PATTERN (delay)));
4745 epilogue_renumber (&SET_SRC (PATTERN (delay)));
4746 }
4747 if (SKIP_CALLERS_UNIMP_P)
4748 return "return\t%%i7+12%#";
4749 else
4750 return "return\t%%i7+8%#";
4751 }
4752 else
4753 {
4754 if (delay)
4755 abort ();
4756 if (SKIP_CALLERS_UNIMP_P)
4757 return "jmp\t%%i7+12\n\trestore";
4758 else
4759 return "ret\n\trestore";
4760 }
4761}
4762
4763/* Leaf functions and non-leaf functions have different needs. */
4764
4765static int
4766reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
4767
4768static int
4769reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
4770
4771static int *reg_alloc_orders[] = {
4772 reg_leaf_alloc_order,
4773 reg_nonleaf_alloc_order};
4774
4775void
4776order_regs_for_local_alloc ()
4777{
4778 static int last_order_nonleaf = 1;
4779
4780 if (regs_ever_live[15] != last_order_nonleaf)
4781 {
4782 last_order_nonleaf = !last_order_nonleaf;
4783 bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
4784 (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
4785 }
4786}
4787
4788/* Return 1 if REG and MEM are legitimate enough to allow the various
4789 mem<-->reg splits to be run. */
4790
4791int
4792sparc_splitdi_legitimate (reg, mem)
4793 rtx reg;
4794 rtx mem;
4795{
4796 /* Punt if we are here by mistake. */
4797 if (! reload_completed)
4798 abort ();
4799
4800 /* We must have an offsettable memory reference. */
4801 if (! offsettable_memref_p (mem))
4802 return 0;
4803
4804 /* If we have legitimate args for ldd/std, we do not want
4805 the split to happen. */
4806 if ((REGNO (reg) % 2) == 0
4807 && mem_min_alignment (mem, 8))
4808 return 0;
4809
4810 /* Success. */
4811 return 1;
4812}
4813
4814/* Return 1 if x and y are some kind of REG and they refer to
4815 different hard registers. This test is guarenteed to be
4816 run after reload. */
4817
4818int
4819sparc_absnegfloat_split_legitimate (x, y)
4820 rtx x, y;
4821{
4822 if (GET_CODE (x) == SUBREG)
4823 x = alter_subreg (x);
4824 if (GET_CODE (x) != REG)
4825 return 0;
4826 if (GET_CODE (y) == SUBREG)
4827 y = alter_subreg (y);
4828 if (GET_CODE (y) != REG)
4829 return 0;
4830 if (REGNO (x) == REGNO (y))
4831 return 0;
4832 return 1;
4833}
4834
4835/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
4836 This makes them candidates for using ldd and std insns.
4837
4838 Note reg1 and reg2 *must* be hard registers. */
4839
4840int
4841registers_ok_for_ldd_peep (reg1, reg2)
4842 rtx reg1, reg2;
4843{
4844 /* We might have been passed a SUBREG. */
4845 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
4846 return 0;
4847
4848 if (REGNO (reg1) % 2 != 0)
4849 return 0;
4850
4851 /* Integer ldd is deprecated in SPARC V9 */
4852 if (TARGET_V9 && REGNO (reg1) < 32)
4853 return 0;
4854
4855 return (REGNO (reg1) == REGNO (reg2) - 1);
4856}
4857
4858/* Return 1 if addr1 and addr2 are suitable for use in an ldd or
4859 std insn.
4860
4861 This can only happen when addr1 and addr2 are consecutive memory
4862 locations (addr1 + 4 == addr2). addr1 must also be aligned on a
4863 64 bit boundary (addr1 % 8 == 0).
4864
4865 We know %sp and %fp are kept aligned on a 64 bit boundary. Other
4866 registers are assumed to *never* be properly aligned and are
4867 rejected.
4868
4869 Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
4870 need only check that the offset for addr1 % 8 == 0. */
4871
4872int
4873addrs_ok_for_ldd_peep (addr1, addr2)
4874 rtx addr1, addr2;
4875{
4876 int reg1, offset1;
4877
4878 /* Extract a register number and offset (if used) from the first addr. */
4879 if (GET_CODE (addr1) == PLUS)
4880 {
4881 /* If not a REG, return zero. */
4882 if (GET_CODE (XEXP (addr1, 0)) != REG)
4883 return 0;
4884 else
4885 {
4886 reg1 = REGNO (XEXP (addr1, 0));
4887 /* The offset must be constant! */
4888 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
4889 return 0;
4890 offset1 = INTVAL (XEXP (addr1, 1));
4891 }
4892 }
4893 else if (GET_CODE (addr1) != REG)
4894 return 0;
4895 else
4896 {
4897 reg1 = REGNO (addr1);
4898 /* This was a simple (mem (reg)) expression. Offset is 0. */
4899 offset1 = 0;
4900 }
4901
4902 /* Make sure the second address is a (mem (plus (reg) (const_int). */
4903 if (GET_CODE (addr2) != PLUS)
4904 return 0;
4905
4906 if (GET_CODE (XEXP (addr2, 0)) != REG
4907 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
4908 return 0;
4909
4910 /* Only %fp and %sp are allowed. Additionally both addresses must
4911 use the same register. */
4912 if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
4913 return 0;
4914
4915 if (reg1 != REGNO (XEXP (addr2, 0)))
4916 return 0;
4917
4918 /* The first offset must be evenly divisible by 8 to ensure the
4919 address is 64 bit aligned. */
4920 if (offset1 % 8 != 0)
4921 return 0;
4922
4923 /* The offset for the second addr must be 4 more than the first addr. */
4924 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
4925 return 0;
4926
4927 /* All the tests passed. addr1 and addr2 are valid for ldd and std
4928 instructions. */
4929 return 1;
4930}
4931
4932/* Return 1 if reg is a pseudo, or is the first register in
4933 a hard register pair. This makes it a candidate for use in
4934 ldd and std insns. */
4935
4936int
4937register_ok_for_ldd (reg)
4938 rtx reg;
4939{
4940 /* We might have been passed a SUBREG. */
4941 if (GET_CODE (reg) != REG)
4942 return 0;
4943
4944 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
4945 return (REGNO (reg) % 2 == 0);
4946 else
4947 return 1;
4948}
4949
4950/* Print operand X (an rtx) in assembler syntax to file FILE.
4951 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4952 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4953
4954void
4955print_operand (file, x, code)
4956 FILE *file;
4957 rtx x;
4958 int code;
4959{
4960 switch (code)
4961 {
4962 case '#':
4963 /* Output a 'nop' if there's nothing for the delay slot. */
4964 if (dbr_sequence_length () == 0)
4965 fputs ("\n\t nop", file);
4966 return;
4967 case '*':
4968 /* Output an annul flag if there's nothing for the delay slot and we
4969 are optimizing. This is always used with '(' below. */
4970 /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
4971 this is a dbx bug. So, we only do this when optimizing. */
4972 /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
4973 Always emit a nop in case the next instruction is a branch. */
4974 if (dbr_sequence_length () == 0
4975 && (optimize && (int)sparc_cpu < PROCESSOR_V9))
4976 fputs (",a", file);
4977 return;
4978 case '(':
4979 /* Output a 'nop' if there's nothing for the delay slot and we are
4980 not optimizing. This is always used with '*' above. */
4981 if (dbr_sequence_length () == 0
4982 && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
4983 fputs ("\n\t nop", file);
4984 return;
4985 case '_':
4986 /* Output the Embedded Medium/Anywhere code model base register. */
4987 fputs (EMBMEDANY_BASE_REG, file);
4988 return;
4989 case '@':
4990 /* Print out what we are using as the frame pointer. This might
4991 be %fp, or might be %sp+offset. */
4992 /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
4993 fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
4994 return;
4995 case 'Y':
4996 /* Adjust the operand to take into account a RESTORE operation. */
4997 if (GET_CODE (x) == CONST_INT)
4998 break;
4999 else if (GET_CODE (x) != REG)
5000 output_operand_lossage ("Invalid %%Y operand");
5001 else if (REGNO (x) < 8)
5002 fputs (reg_names[REGNO (x)], file);
5003 else if (REGNO (x) >= 24 && REGNO (x) < 32)
5004 fputs (reg_names[REGNO (x)-16], file);
5005 else
5006 output_operand_lossage ("Invalid %%Y operand");
5007 return;
5008 case 'L':
5009 /* Print out the low order register name of a register pair. */
5010 if (WORDS_BIG_ENDIAN)
5011 fputs (reg_names[REGNO (x)+1], file);
5012 else
5013 fputs (reg_names[REGNO (x)], file);
5014 return;
5015 case 'H':
5016 /* Print out the high order register name of a register pair. */
5017 if (WORDS_BIG_ENDIAN)
5018 fputs (reg_names[REGNO (x)], file);
5019 else
5020 fputs (reg_names[REGNO (x)+1], file);
5021 return;
5022 case 'R':
5023 /* Print out the second register name of a register pair or quad.
5024 I.e., R (%o0) => %o1. */
5025 fputs (reg_names[REGNO (x)+1], file);
5026 return;
5027 case 'S':
5028 /* Print out the third register name of a register quad.
5029 I.e., S (%o0) => %o2. */
5030 fputs (reg_names[REGNO (x)+2], file);
5031 return;
5032 case 'T':
5033 /* Print out the fourth register name of a register quad.
5034 I.e., T (%o0) => %o3. */
5035 fputs (reg_names[REGNO (x)+3], file);
5036 return;
5037 case 'x':
5038 /* Print a condition code register. */
5039 if (REGNO (x) == SPARC_ICC_REG)
5040 {
5041 /* We don't handle CC[X]_NOOVmode because they're not supposed
5042 to occur here. */
5043 if (GET_MODE (x) == CCmode)
5044 fputs ("%icc", file);
5045 else if (GET_MODE (x) == CCXmode)
5046 fputs ("%xcc", file);
5047 else
5048 abort ();
5049 }
5050 else
5051 /* %fccN register */
5052 fputs (reg_names[REGNO (x)], file);
5053 return;
5054 case 'm':
5055 /* Print the operand's address only. */
5056 output_address (XEXP (x, 0));
5057 return;
5058 case 'r':
5059 /* In this case we need a register. Use %g0 if the
5060 operand is const0_rtx. */
5061 if (x == const0_rtx
5062 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5063 {
5064 fputs ("%g0", file);
5065 return;
5066 }
5067 else
5068 break;
5069
5070 case 'A':
5071 switch (GET_CODE (x))
5072 {
5073 case IOR: fputs ("or", file); break;
5074 case AND: fputs ("and", file); break;
5075 case XOR: fputs ("xor", file); break;
5076 default: output_operand_lossage ("Invalid %%A operand");
5077 }
5078 return;
5079
5080 case 'B':
5081 switch (GET_CODE (x))
5082 {
5083 case IOR: fputs ("orn", file); break;
5084 case AND: fputs ("andn", file); break;
5085 case XOR: fputs ("xnor", file); break;
5086 default: output_operand_lossage ("Invalid %%B operand");
5087 }
5088 return;
5089
5090 /* These are used by the conditional move instructions. */
5091 case 'c' :
5092 case 'C':
5093 {
5094 enum rtx_code rc = (code == 'c'
5095 ? reverse_condition (GET_CODE (x))
5096 : GET_CODE (x));
5097 switch (rc)
5098 {
5099 case NE: fputs ("ne", file); break;
5100 case EQ: fputs ("e", file); break;
5101 case GE: fputs ("ge", file); break;
5102 case GT: fputs ("g", file); break;
5103 case LE: fputs ("le", file); break;
5104 case LT: fputs ("l", file); break;
5105 case GEU: fputs ("geu", file); break;
5106 case GTU: fputs ("gu", file); break;
5107 case LEU: fputs ("leu", file); break;
5108 case LTU: fputs ("lu", file); break;
5109 default: output_operand_lossage (code == 'c'
5110 ? "Invalid %%c operand"
5111 : "Invalid %%C operand");
5112 }
5113 return;
5114 }
5115
5116 /* These are used by the movr instruction pattern. */
5117 case 'd':
5118 case 'D':
5119 {
5120 enum rtx_code rc = (code == 'd'
5121 ? reverse_condition (GET_CODE (x))
5122 : GET_CODE (x));
5123 switch (rc)
5124 {
5125 case NE: fputs ("ne", file); break;
5126 case EQ: fputs ("e", file); break;
5127 case GE: fputs ("gez", file); break;
5128 case LT: fputs ("lz", file); break;
5129 case LE: fputs ("lez", file); break;
5130 case GT: fputs ("gz", file); break;
5131 default: output_operand_lossage (code == 'd'
5132 ? "Invalid %%d operand"
5133 : "Invalid %%D operand");
5134 }
5135 return;
5136 }
5137
5138 case 'b':
5139 {
5140 /* Print a sign-extended character. */
5141 int i = INTVAL (x) & 0xff;
5142 if (i & 0x80)
5143 i |= 0xffffff00;
5144 fprintf (file, "%d", i);
5145 return;
5146 }
5147
5148 case 'f':
5149 /* Operand must be a MEM; write its address. */
5150 if (GET_CODE (x) != MEM)
5151 output_operand_lossage ("Invalid %%f operand");
5152 output_address (XEXP (x, 0));
5153 return;
5154
5155 case 0:
5156 /* Do nothing special. */
5157 break;
5158
5159 default:
5160 /* Undocumented flag. */
5161 output_operand_lossage ("invalid operand output code");
5162 }
5163
5164 if (GET_CODE (x) == REG)
5165 fputs (reg_names[REGNO (x)], file);
5166 else if (GET_CODE (x) == MEM)
5167 {
5168 fputc ('[', file);
5169 /* Poor Sun assembler doesn't understand absolute addressing. */
5170 if (CONSTANT_P (XEXP (x, 0))
5171 && ! TARGET_LIVE_G0)
5172 fputs ("%g0+", file);
5173 output_address (XEXP (x, 0));
5174 fputc (']', file);
5175 }
5176 else if (GET_CODE (x) == HIGH)
5177 {
5178 fputs ("%hi(", file);
5179 output_addr_const (file, XEXP (x, 0));
5180 fputc (')', file);
5181 }
5182 else if (GET_CODE (x) == LO_SUM)
5183 {
5184 print_operand (file, XEXP (x, 0), 0);
5185 if (TARGET_CM_MEDMID)
5186 fputs ("+%l44(", file);
5187 else
5188 fputs ("+%lo(", file);
5189 output_addr_const (file, XEXP (x, 1));
5190 fputc (')', file);
5191 }
5192 else if (GET_CODE (x) == CONST_DOUBLE
5193 && (GET_MODE (x) == VOIDmode
5194 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
5195 {
5196 if (CONST_DOUBLE_HIGH (x) == 0)
5197 fprintf (file, "%u", CONST_DOUBLE_LOW (x));
5198 else if (CONST_DOUBLE_HIGH (x) == -1
5199 && CONST_DOUBLE_LOW (x) < 0)
5200 fprintf (file, "%d", CONST_DOUBLE_LOW (x));
5201 else
5202 output_operand_lossage ("long long constant not a valid immediate operand");
5203 }
5204 else if (GET_CODE (x) == CONST_DOUBLE)
5205 output_operand_lossage ("floating point constant not a valid immediate operand");
5206 else { output_addr_const (file, x); }
5207}
5208
5209/* This function outputs assembler code for VALUE to FILE, where VALUE is
5210 a 64 bit (DImode) value. */
5211
5212/* ??? If there is a 64 bit counterpart to .word that the assembler
5213 understands, then using that would simply this code greatly. */
5214/* ??? We only output .xword's for symbols and only then in environments
5215 where the assembler can handle them. */
5216
5217void
5218output_double_int (file, value)
5219 FILE *file;
5220 rtx value;
5221{
5222 if (GET_CODE (value) == CONST_INT)
5223 {
5224 /* ??? This has endianness issues. */
5225#if HOST_BITS_PER_WIDE_INT == 64
5226 HOST_WIDE_INT xword = INTVAL (value);
5227 HOST_WIDE_INT high, low;
5228
5229 high = (xword >> 32) & 0xffffffff;
5230 low = xword & 0xffffffff;
5231 ASM_OUTPUT_INT (file, GEN_INT (high));
5232 ASM_OUTPUT_INT (file, GEN_INT (low));
5233#else
5234 if (INTVAL (value) < 0)
5235 ASM_OUTPUT_INT (file, constm1_rtx);
5236 else
5237 ASM_OUTPUT_INT (file, const0_rtx);
5238 ASM_OUTPUT_INT (file, value);
5239#endif
5240 }
5241 else if (GET_CODE (value) == CONST_DOUBLE)
5242 {
5243 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_HIGH (value)));
5244 ASM_OUTPUT_INT (file, GEN_INT (CONST_DOUBLE_LOW (value)));
5245 }
5246 else if (GET_CODE (value) == SYMBOL_REF
5247 || GET_CODE (value) == CONST
5248 || GET_CODE (value) == PLUS
5249 || (TARGET_ARCH64 &&
5250 (GET_CODE (value) == LABEL_REF
5251 || GET_CODE (value) == CODE_LABEL
5252 || GET_CODE (value) == MINUS)))
5253 {
5254 if (! TARGET_V9)
5255 {
5256 ASM_OUTPUT_INT (file, const0_rtx);
5257 ASM_OUTPUT_INT (file, value);
5258 }
5259 else
5260 {
5261 fprintf (file, "\t%s\t", ASM_LONGLONG);
5262 output_addr_const (file, value);
5263 fprintf (file, "\n");
5264 }
5265 }
5266 else
5267 abort ();
5268}
5269
5270/* Return the value of a code used in the .proc pseudo-op that says
5271 what kind of result this function returns. For non-C types, we pick
5272 the closest C type. */
5273
5274#ifndef CHAR_TYPE_SIZE
5275#define CHAR_TYPE_SIZE BITS_PER_UNIT
5276#endif
5277
5278#ifndef SHORT_TYPE_SIZE
5279#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
5280#endif
5281
5282#ifndef INT_TYPE_SIZE
5283#define INT_TYPE_SIZE BITS_PER_WORD
5284#endif
5285
5286#ifndef LONG_TYPE_SIZE
5287#define LONG_TYPE_SIZE BITS_PER_WORD
5288#endif
5289
5290#ifndef LONG_LONG_TYPE_SIZE
5291#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
5292#endif
5293
5294#ifndef FLOAT_TYPE_SIZE
5295#define FLOAT_TYPE_SIZE BITS_PER_WORD
5296#endif
5297
5298#ifndef DOUBLE_TYPE_SIZE
5299#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5300#endif
5301
5302#ifndef LONG_DOUBLE_TYPE_SIZE
5303#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
5304#endif
5305
5306unsigned long
5307sparc_type_code (type)
5308 register tree type;
5309{
5310 register unsigned long qualifiers = 0;
5311 register unsigned shift;
5312
5313 /* Only the first 30 bits of the qualifier are valid. We must refrain from
5314 setting more, since some assemblers will give an error for this. Also,
5315 we must be careful to avoid shifts of 32 bits or more to avoid getting
5316 unpredictable results. */
5317
5318 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
5319 {
5320 switch (TREE_CODE (type))
5321 {
5322 case ERROR_MARK:
5323 return qualifiers;
5324
5325 case ARRAY_TYPE:
5326 qualifiers |= (3 << shift);
5327 break;
5328
5329 case FUNCTION_TYPE:
5330 case METHOD_TYPE:
5331 qualifiers |= (2 << shift);
5332 break;
5333
5334 case POINTER_TYPE:
5335 case REFERENCE_TYPE:
5336 case OFFSET_TYPE:
5337 qualifiers |= (1 << shift);
5338 break;
5339
5340 case RECORD_TYPE:
5341 return (qualifiers | 8);
5342
5343 case UNION_TYPE:
5344 case QUAL_UNION_TYPE:
5345 return (qualifiers | 9);
5346
5347 case ENUMERAL_TYPE:
5348 return (qualifiers | 10);
5349
5350 case VOID_TYPE:
5351 return (qualifiers | 16);
5352
5353 case INTEGER_TYPE:
5354 /* If this is a range type, consider it to be the underlying
5355 type. */
5356 if (TREE_TYPE (type) != 0)
5357 break;
5358
5359 /* Carefully distinguish all the standard types of C,
5360 without messing up if the language is not C. We do this by
5361 testing TYPE_PRECISION and TREE_UNSIGNED. The old code used to
5362 look at both the names and the above fields, but that's redundant.
5363 Any type whose size is between two C types will be considered
5364 to be the wider of the two types. Also, we do not have a
5365 special code to use for "long long", so anything wider than
5366 long is treated the same. Note that we can't distinguish
5367 between "int" and "long" in this code if they are the same
5368 size, but that's fine, since neither can the assembler. */
5369
5370 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
5371 return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
5372
5373 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
5374 return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
5375
5376 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
5377 return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
5378
5379 else
5380 return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
5381
5382 case REAL_TYPE:
5383 /* If this is a range type, consider it to be the underlying
5384 type. */
5385 if (TREE_TYPE (type) != 0)
5386 break;
5387
5388 /* Carefully distinguish all the standard types of C,
5389 without messing up if the language is not C. */
5390
5391 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
5392 return (qualifiers | 6);
5393
5394 else
5395 return (qualifiers | 7);
5396
5397 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
5398 /* ??? We need to distinguish between double and float complex types,
5399 but I don't know how yet because I can't reach this code from
5400 existing front-ends. */
5401 return (qualifiers | 7); /* Who knows? */
5402
5403 case CHAR_TYPE: /* GNU Pascal CHAR type. Not used in C. */
5404 case BOOLEAN_TYPE: /* GNU Fortran BOOLEAN type. */
5405 case FILE_TYPE: /* GNU Pascal FILE type. */
5406 case SET_TYPE: /* GNU Pascal SET type. */
5407 case LANG_TYPE: /* ? */
5408 return qualifiers;
5409
5410 default:
5411 abort (); /* Not a type! */
5412 }
5413 }
5414
5415 return qualifiers;
5416}
5417
5418/* Nested function support. */
5419
5420/* Emit RTL insns to initialize the variable parts of a trampoline.
5421 FNADDR is an RTX for the address of the function's pure code.
5422 CXT is an RTX for the static chain value for the function.
5423
5424 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
5425 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
5426 (to store insns). This is a bit excessive. Perhaps a different
5427 mechanism would be better here.
5428
5429 Emit enough FLUSH insns to synchronize the data and instruction caches. */
5430
5431void
5432sparc_initialize_trampoline (tramp, fnaddr, cxt)
5433 rtx tramp, fnaddr, cxt;
5434{
5435 /* SPARC 32 bit trampoline:
5436
5437 sethi %hi(fn), %g1
5438 sethi %hi(static), %g2
5439 jmp %g1+%lo(fn)
5440 or %g2, %lo(static), %g2
5441
5442 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
5443 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
5444 */
5445#ifdef TRANSFER_FROM_TRAMPOLINE
5446 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5447 0, VOIDmode, 1, tramp, Pmode);
5448#endif
5449
5450 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
5451 expand_binop (SImode, ior_optab,
5452 expand_shift (RSHIFT_EXPR, SImode, fnaddr,
5453 size_int (10), 0, 1),
5454 GEN_INT (0x03000000),
5455 NULL_RTX, 1, OPTAB_DIRECT));
5456
5457 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5458 expand_binop (SImode, ior_optab,
5459 expand_shift (RSHIFT_EXPR, SImode, cxt,
5460 size_int (10), 0, 1),
5461 GEN_INT (0x05000000),
5462 NULL_RTX, 1, OPTAB_DIRECT));
5463
5464 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5465 expand_binop (SImode, ior_optab,
5466 expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
5467 GEN_INT (0x81c06000),
5468 NULL_RTX, 1, OPTAB_DIRECT));
5469
5470 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5471 expand_binop (SImode, ior_optab,
5472 expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
5473 GEN_INT (0x8410a000),
5474 NULL_RTX, 1, OPTAB_DIRECT));
5475
5476 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
5477 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
5478 aligned on a 16 byte boundary so one flush clears it all. */
5479 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5480 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
5481 plus_constant (tramp, 8)))));
5482}
5483
5484/* The 64 bit version is simpler because it makes more sense to load the
5485 values as "immediate" data out of the trampoline. It's also easier since
5486 we can read the PC without clobbering a register. */
5487
5488void
5489sparc64_initialize_trampoline (tramp, fnaddr, cxt)
5490 rtx tramp, fnaddr, cxt;
5491{
5492#ifdef TRANSFER_FROM_TRAMPOLINE
5493 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
5494 0, VOIDmode, 1, tramp, Pmode);
5495#endif
5496
5497 /*
5498 rd %pc, %g1
5499 ldx [%g1+24], %g5
5500 jmp %g5
5501 ldx [%g1+16], %g5
5502 +16 bytes data
5503 */
5504
5505 emit_move_insn (gen_rtx_MEM (SImode, tramp),
5506 GEN_INT (0x83414000));
5507 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
5508 GEN_INT (0xca586018));
5509 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
5510 GEN_INT (0x81c14000));
5511 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
5512 GEN_INT (0xca586010));
5513 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
5514 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
5515 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
5516
5517 if (sparc_cpu != PROCESSOR_ULTRASPARC)
5518 emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
5519}
5520
5521/* Subroutines to support a flat (single) register window calling
5522 convention. */
5523
5524/* Single-register window sparc stack frames look like:
5525
5526 Before call After call
5527 +-----------------------+ +-----------------------+
5528 high | | | |
5529 mem | caller's temps. | | caller's temps. |
5530 | | | |
5531 +-----------------------+ +-----------------------+
5532 | | | |
5533 | arguments on stack. | | arguments on stack. |
5534 | | | |
5535 +-----------------------+FP+92->+-----------------------+
5536 | 6 words to save | | 6 words to save |
5537 | arguments passed | | arguments passed |
5538 | in registers, even | | in registers, even |
5539 | if not passed. | | if not passed. |
5540 SP+68->+-----------------------+FP+68->+-----------------------+
5541 | 1 word struct addr | | 1 word struct addr |
5542 +-----------------------+FP+64->+-----------------------+
5543 | | | |
5544 | 16 word reg save area | | 16 word reg save area |
5545 | | | |
5546 SP->+-----------------------+ FP->+-----------------------+
5547 | 4 word area for |
5548 | fp/alu reg moves |
5549 FP-16->+-----------------------+
5550 | |
5551 | local variables |
5552 | |
5553 +-----------------------+
5554 | |
5555 | fp register save |
5556 | |
5557 +-----------------------+
5558 | |
5559 | gp register save |
5560 | |
5561 +-----------------------+
5562 | |
5563 | alloca allocations |
5564 | |
5565 +-----------------------+
5566 | |
5567 | arguments on stack |
5568 | |
5569 SP+92->+-----------------------+
5570 | 6 words to save |
5571 | arguments passed |
5572 | in registers, even |
5573 low | if not passed. |
5574 memory SP+68->+-----------------------+
5575 | 1 word struct addr |
5576 SP+64->+-----------------------+
5577 | |
5578 I 16 word reg save area |
5579 | |
5580 SP->+-----------------------+ */
5581
5582/* Structure to be filled in by sparc_flat_compute_frame_size with register
5583 save masks, and offsets for the current function. */
5584
5585struct sparc_frame_info
5586{
5587 unsigned long total_size; /* # bytes that the entire frame takes up. */
5588 unsigned long var_size; /* # bytes that variables take up. */
5589 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5590 unsigned long extra_size; /* # bytes of extra gunk. */
5591 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5592 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5593 unsigned long gmask; /* Mask of saved gp registers. */
5594 unsigned long fmask; /* Mask of saved fp registers. */
5595 unsigned long reg_offset; /* Offset from new sp to store regs. */
5596 int initialized; /* Nonzero if frame size already calculated. */
5597};
5598
5599/* Current frame information calculated by sparc_flat_compute_frame_size. */
5600struct sparc_frame_info current_frame_info;
5601
5602/* Zero structure to initialize current_frame_info. */
5603struct sparc_frame_info zero_frame_info;
5604
5605/* Tell prologue and epilogue if register REGNO should be saved / restored. */
5606
5607#define RETURN_ADDR_REGNUM 15
5608#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
5609#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
5610
5611#define MUST_SAVE_REGISTER(regno) \
5612 ((regs_ever_live[regno] && !call_used_regs[regno]) \
5613 || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed) \
5614 || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
5615
5616/* Return the bytes needed to compute the frame pointer from the current
5617 stack pointer. */
5618
5619unsigned long
5620sparc_flat_compute_frame_size (size)
5621 int size; /* # of var. bytes allocated. */
5622{
5623 int regno;
5624 unsigned long total_size; /* # bytes that the entire frame takes up. */
5625 unsigned long var_size; /* # bytes that variables take up. */
5626 unsigned long args_size; /* # bytes that outgoing arguments take up. */
5627 unsigned long extra_size; /* # extra bytes. */
5628 unsigned int gp_reg_size; /* # bytes needed to store gp regs. */
5629 unsigned int fp_reg_size; /* # bytes needed to store fp regs. */
5630 unsigned long gmask; /* Mask of saved gp registers. */
5631 unsigned long fmask; /* Mask of saved fp registers. */
5632 unsigned long reg_offset; /* Offset to register save area. */
5633 int need_aligned_p; /* 1 if need the save area 8 byte aligned. */
5634
5635 /* This is the size of the 16 word reg save area, 1 word struct addr
5636 area, and 4 word fp/alu register copy area. */
5637 extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
5638 var_size = size;
5639 gp_reg_size = 0;
5640 fp_reg_size = 0;
5641 gmask = 0;
5642 fmask = 0;
5643 reg_offset = 0;
5644 need_aligned_p = 0;
5645
5646 args_size = 0;
5647 if (!leaf_function_p ())
5648 {
5649 /* Also include the size needed for the 6 parameter registers. */
5650 args_size = current_function_outgoing_args_size + 24;
5651 }
5652 total_size = var_size + args_size;
5653
5654 /* Calculate space needed for gp registers. */
5655 for (regno = 1; regno <= 31; regno++)
5656 {
5657 if (MUST_SAVE_REGISTER (regno))
5658 {
5659 /* If we need to save two regs in a row, ensure there's room to bump
5660 up the address to align it to a doubleword boundary. */
5661 if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
5662 {
5663 if (gp_reg_size % 8 != 0)
5664 gp_reg_size += 4;
5665 gp_reg_size += 2 * UNITS_PER_WORD;
5666 gmask |= 3 << regno;
5667 regno++;
5668 need_aligned_p = 1;
5669 }
5670 else
5671 {
5672 gp_reg_size += UNITS_PER_WORD;
5673 gmask |= 1 << regno;
5674 }
5675 }
5676 }
5677
5678 /* Calculate space needed for fp registers. */
5679 for (regno = 32; regno <= 63; regno++)
5680 {
5681 if (regs_ever_live[regno] && !call_used_regs[regno])
5682 {
5683 fp_reg_size += UNITS_PER_WORD;
5684 fmask |= 1 << (regno - 32);
5685 }
5686 }
5687
5688 if (gmask || fmask)
5689 {
5690 int n;
5691 reg_offset = FIRST_PARM_OFFSET(0) + args_size;
5692 /* Ensure save area is 8 byte aligned if we need it. */
5693 n = reg_offset % 8;
5694 if (need_aligned_p && n != 0)
5695 {
5696 total_size += 8 - n;
5697 reg_offset += 8 - n;
5698 }
5699 total_size += gp_reg_size + fp_reg_size;
5700 }
5701
5702 /* If we must allocate a stack frame at all, we must also allocate
5703 room for register window spillage, so as to be binary compatible
5704 with libraries and operating systems that do not use -mflat. */
5705 if (total_size > 0)
5706 total_size += extra_size;
5707 else
5708 extra_size = 0;
5709
5710 total_size = SPARC_STACK_ALIGN (total_size);
5711
5712 /* Save other computed information. */
5713 current_frame_info.total_size = total_size;
5714 current_frame_info.var_size = var_size;
5715 current_frame_info.args_size = args_size;
5716 current_frame_info.extra_size = extra_size;
5717 current_frame_info.gp_reg_size = gp_reg_size;
5718 current_frame_info.fp_reg_size = fp_reg_size;
5719 current_frame_info.gmask = gmask;
5720 current_frame_info.fmask = fmask;
5721 current_frame_info.reg_offset = reg_offset;
5722 current_frame_info.initialized = reload_completed;
5723
5724 /* Ok, we're done. */
5725 return total_size;
5726}
5727
5728/* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
5729 OFFSET.
5730
5731 BASE_REG must be 8 byte aligned. This allows us to test OFFSET for
5732 appropriate alignment and use DOUBLEWORD_OP when we can. We assume
5733 [BASE_REG+OFFSET] will always be a valid address.
5734
5735 WORD_OP is either "st" for save, "ld" for restore.
5736 DOUBLEWORD_OP is either "std" for save, "ldd" for restore. */
5737
5738void
5739sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
5740 doubleword_op, base_offset)
5741 FILE *file;
5742 char *base_reg;
5743 unsigned int offset;
5744 unsigned long gmask;
5745 unsigned long fmask;
5746 char *word_op;
5747 char *doubleword_op;
5748 unsigned long base_offset;
5749{
5750 int regno;
5751
5752 if (gmask == 0 && fmask == 0)
5753 return;
5754
5755 /* Save registers starting from high to low. We've already saved the
5756 previous frame pointer and previous return address for the debugger's
5757 sake. The debugger allows us to not need a nop in the epilog if at least
5758 one register is reloaded in addition to return address. */
5759
5760 if (gmask)
5761 {
5762 for (regno = 1; regno <= 31; regno++)
5763 {
5764 if ((gmask & (1L << regno)) != 0)
5765 {
5766 if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
5767 {
5768 /* We can save two registers in a row. If we're not at a
5769 double word boundary, move to one.
5770 sparc_flat_compute_frame_size ensures there's room to do
5771 this. */
5772 if (offset % 8 != 0)
5773 offset += UNITS_PER_WORD;
5774
5775 if (word_op[0] == 's')
5776 {
5777 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5778 doubleword_op, reg_names[regno],
5779 base_reg, offset);
5780 if (dwarf2out_do_frame ())
5781 {
5782 char *l = dwarf2out_cfi_label ();
5783 dwarf2out_reg_save (l, regno, offset + base_offset);
5784 dwarf2out_reg_save
5785 (l, regno+1, offset+base_offset + UNITS_PER_WORD);
5786 }
5787 }
5788 else
5789 fprintf (file, "\t%s\t[%s+%d], %s\n",
5790 doubleword_op, base_reg, offset,
5791 reg_names[regno]);
5792
5793 offset += 2 * UNITS_PER_WORD;
5794 regno++;
5795 }
5796 else
5797 {
5798 if (word_op[0] == 's')
5799 {
5800 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5801 word_op, reg_names[regno],
5802 base_reg, offset);
5803 if (dwarf2out_do_frame ())
5804 dwarf2out_reg_save ("", regno, offset + base_offset);
5805 }
5806 else
5807 fprintf (file, "\t%s\t[%s+%d], %s\n",
5808 word_op, base_reg, offset, reg_names[regno]);
5809
5810 offset += UNITS_PER_WORD;
5811 }
5812 }
5813 }
5814 }
5815
5816 if (fmask)
5817 {
5818 for (regno = 32; regno <= 63; regno++)
5819 {
5820 if ((fmask & (1L << (regno - 32))) != 0)
5821 {
5822 if (word_op[0] == 's')
5823 {
5824 fprintf (file, "\t%s\t%s, [%s+%d]\n",
5825 word_op, reg_names[regno],
5826 base_reg, offset);
5827 if (dwarf2out_do_frame ())
5828 dwarf2out_reg_save ("", regno, offset + base_offset);
5829 }
5830 else
5831 fprintf (file, "\t%s\t[%s+%d], %s\n",
5832 word_op, base_reg, offset, reg_names[regno]);
5833
5834 offset += UNITS_PER_WORD;
5835 }
5836 }
5837 }
5838}
5839
5840/* Set up the stack and frame (if desired) for the function. */
5841
5842void
5843sparc_flat_output_function_prologue (file, size)
5844 FILE *file;
5845 int size;
5846{
5847 char *sp_str = reg_names[STACK_POINTER_REGNUM];
5848 unsigned long gmask = current_frame_info.gmask;
5849
5850 /* This is only for the human reader. */
5851 fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
5852 fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
5853 ASM_COMMENT_START,
5854 current_frame_info.var_size,
5855 current_frame_info.gp_reg_size / 4,
5856 current_frame_info.fp_reg_size / 4,
5857 current_function_outgoing_args_size,
5858 current_frame_info.extra_size);
5859
5860 size = SPARC_STACK_ALIGN (size);
5861 size = (! current_frame_info.initialized
5862 ? sparc_flat_compute_frame_size (size)
5863 : current_frame_info.total_size);
5864
5865 /* These cases shouldn't happen. Catch them now. */
5866 if (size == 0 && (gmask || current_frame_info.fmask))
5867 abort ();
5868
5869 /* Allocate our stack frame by decrementing %sp.
5870 At present, the only algorithm gdb can use to determine if this is a
5871 flat frame is if we always set %i7 if we set %sp. This can be optimized
5872 in the future by putting in some sort of debugging information that says
5873 this is a `flat' function. However, there is still the case of debugging
5874 code without such debugging information (including cases where most fns
5875 have such info, but there is one that doesn't). So, always do this now
5876 so we don't get a lot of code out there that gdb can't handle.
5877 If the frame pointer isn't needn't then that's ok - gdb won't be able to
5878 distinguish us from a non-flat function but there won't (and shouldn't)
5879 be any differences anyway. The return pc is saved (if necessary) right
5880 after %i7 so gdb won't have to look too far to find it. */
5881 if (size > 0)
5882 {
5883 unsigned int reg_offset = current_frame_info.reg_offset;
5884 char *fp_str = reg_names[FRAME_POINTER_REGNUM];
5885 const char *t1_str = "%g1";
5886
5887 /* Things get a little tricky if local variables take up more than ~4096
5888 bytes and outgoing arguments take up more than ~4096 bytes. When that
5889 happens, the register save area can't be accessed from either end of
5890 the frame. Handle this by decrementing %sp to the start of the gp
5891 register save area, save the regs, update %i7, and then set %sp to its
5892 final value. Given that we only have one scratch register to play
5893 with it is the cheapest solution, and it helps gdb out as it won't
5894 slow down recognition of flat functions.
5895 Don't change the order of insns emitted here without checking with
5896 the gdb folk first. */
5897
5898 /* Is the entire register save area offsettable from %sp? */
5899 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
5900 {
5901 if (size <= 4096)
5902 {
5903 fprintf (file, "\tadd\t%s, %d, %s\n",
5904 sp_str, -size, sp_str);
5905 if (gmask & FRAME_POINTER_MASK)
5906 {
5907 fprintf (file, "\tst\t%s, [%s+%d]\n",
5908 fp_str, sp_str, reg_offset);
5909 fprintf (file, "\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5910 sp_str, -size, fp_str, ASM_COMMENT_START);
5911 reg_offset += 4;
5912 }
5913 }
5914 else
5915 {
5916 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5917 size, t1_str, sp_str, t1_str, sp_str);
5918 if (gmask & FRAME_POINTER_MASK)
5919 {
5920 fprintf (file, "\tst\t%s, [%s+%d]\n",
5921 fp_str, sp_str, reg_offset);
5922 fprintf (file, "\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5923 sp_str, t1_str, fp_str, ASM_COMMENT_START);
5924 reg_offset += 4;
5925 }
5926 }
5927 if (dwarf2out_do_frame ())
5928 {
5929 char *l = dwarf2out_cfi_label ();
5930 if (gmask & FRAME_POINTER_MASK)
5931 {
5932 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5933 reg_offset - 4 - size);
5934 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5935 }
5936 else
5937 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size);
5938 }
5939 if (gmask & RETURN_ADDR_MASK)
5940 {
5941 fprintf (file, "\tst\t%s, [%s+%d]\n",
5942 reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
5943 if (dwarf2out_do_frame ())
5944 dwarf2out_return_save ("", reg_offset - size);
5945 reg_offset += 4;
5946 }
5947 sparc_flat_save_restore (file, sp_str, reg_offset,
5948 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
5949 current_frame_info.fmask,
5950 "st", "std", -size);
5951 }
5952 else
5953 {
5954 /* Subtract %sp in two steps, but make sure there is always a
5955 64 byte register save area, and %sp is properly aligned. */
5956 /* Amount to decrement %sp by, the first time. */
5957 unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
5958 /* Offset to register save area from %sp. */
5959 unsigned int offset = size1 - (size - reg_offset);
5960
5961 if (size1 <= 4096)
5962 {
5963 fprintf (file, "\tadd\t%s, %d, %s\n",
5964 sp_str, -size1, sp_str);
5965 if (gmask & FRAME_POINTER_MASK)
5966 {
5967 fprintf (file, "\tst\t%s, [%s+%d]\n\tsub\t%s, %d, %s\t%s# set up frame pointer\n",
5968 fp_str, sp_str, offset, sp_str, -size1, fp_str,
5969 ASM_COMMENT_START);
5970 offset += 4;
5971 }
5972 }
5973 else
5974 {
5975 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
5976 size1, t1_str, sp_str, t1_str, sp_str);
5977 if (gmask & FRAME_POINTER_MASK)
5978 {
5979 fprintf (file, "\tst\t%s, [%s+%d]\n\tadd\t%s, %s, %s\t%s# set up frame pointer\n",
5980 fp_str, sp_str, offset, sp_str, t1_str, fp_str,
5981 ASM_COMMENT_START);
5982 offset += 4;
5983 }
5984 }
5985 if (dwarf2out_do_frame ())
5986 {
5987 char *l = dwarf2out_cfi_label ();
5988 if (gmask & FRAME_POINTER_MASK)
5989 {
5990 dwarf2out_reg_save (l, FRAME_POINTER_REGNUM,
5991 offset - 4 - size1);
5992 dwarf2out_def_cfa (l, FRAME_POINTER_REGNUM, 0);
5993 }
5994 else
5995 dwarf2out_def_cfa (l, STACK_POINTER_REGNUM, size1);
5996 }
5997 if (gmask & RETURN_ADDR_MASK)
5998 {
5999 fprintf (file, "\tst\t%s, [%s+%d]\n",
6000 reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
6001 if (dwarf2out_do_frame ())
6002 /* offset - size1 == reg_offset - size
6003 if reg_offset were updated above like offset. */
6004 dwarf2out_return_save ("", offset - size1);
6005 offset += 4;
6006 }
6007 sparc_flat_save_restore (file, sp_str, offset,
6008 gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6009 current_frame_info.fmask,
6010 "st", "std", -size1);
6011 fprintf (file, "\tset\t%d, %s\n\tsub\t%s, %s, %s\n",
6012 size - size1, t1_str, sp_str, t1_str, sp_str);
6013 if (dwarf2out_do_frame ())
6014 if (! (gmask & FRAME_POINTER_MASK))
6015 dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, size);
6016 }
6017 }
6018
6019 fprintf (file, "\t%s#PROLOGUE# 1\n", ASM_COMMENT_START);
6020}
6021
6022/* Do any necessary cleanup after a function to restore stack, frame,
6023 and regs. */
6024
6025void
6026sparc_flat_output_function_epilogue (file, size)
6027 FILE *file;
6028 int size;
6029{
6030 rtx epilogue_delay = current_function_epilogue_delay_list;
6031 int noepilogue = FALSE;
6032
6033 /* This is only for the human reader. */
6034 fprintf (file, "\t%s#EPILOGUE#\n", ASM_COMMENT_START);
6035
6036 /* The epilogue does not depend on any registers, but the stack
6037 registers, so we assume that if we have 1 pending nop, it can be
6038 ignored, and 2 it must be filled (2 nops occur for integer
6039 multiply and divide). */
6040
6041 size = SPARC_STACK_ALIGN (size);
6042 size = (!current_frame_info.initialized
6043 ? sparc_flat_compute_frame_size (size)
6044 : current_frame_info.total_size);
6045
6046 if (size == 0 && epilogue_delay == 0)
6047 {
6048 rtx insn = get_last_insn ();
6049
6050 /* If the last insn was a BARRIER, we don't have to write any code
6051 because a jump (aka return) was put there. */
6052 if (GET_CODE (insn) == NOTE)
6053 insn = prev_nonnote_insn (insn);
6054 if (insn && GET_CODE (insn) == BARRIER)
6055 noepilogue = TRUE;
6056 }
6057
6058 if (!noepilogue)
6059 {
6060 unsigned int reg_offset = current_frame_info.reg_offset;
6061 unsigned int size1;
6062 char *sp_str = reg_names[STACK_POINTER_REGNUM];
6063 char *fp_str = reg_names[FRAME_POINTER_REGNUM];
6064 const char *t1_str = "%g1";
6065
6066 /* In the reload sequence, we don't need to fill the load delay
6067 slots for most of the loads, also see if we can fill the final
6068 delay slot if not otherwise filled by the reload sequence. */
6069
6070 if (size > 4095)
6071 fprintf (file, "\tset\t%d, %s\n", size, t1_str);
6072
6073 if (frame_pointer_needed)
6074 {
6075 if (size > 4095)
6076 fprintf (file,"\tsub\t%s, %s, %s\t\t%s# sp not trusted here\n",
6077 fp_str, t1_str, sp_str, ASM_COMMENT_START);
6078 else
6079 fprintf (file,"\tsub\t%s, %d, %s\t\t%s# sp not trusted here\n",
6080 fp_str, size, sp_str, ASM_COMMENT_START);
6081 }
6082
6083 /* Is the entire register save area offsettable from %sp? */
6084 if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
6085 {
6086 size1 = 0;
6087 }
6088 else
6089 {
6090 /* Restore %sp in two steps, but make sure there is always a
6091 64 byte register save area, and %sp is properly aligned. */
6092 /* Amount to increment %sp by, the first time. */
6093 size1 = ((reg_offset - 64 - 16) + 15) & -16;
6094 /* Offset to register save area from %sp. */
6095 reg_offset = size1 - reg_offset;
6096
6097 fprintf (file, "\tset\t%d, %s\n\tadd\t%s, %s, %s\n",
6098 size1, t1_str, sp_str, t1_str, sp_str);
6099 }
6100
6101 /* We must restore the frame pointer and return address reg first
6102 because they are treated specially by the prologue output code. */
6103 if (current_frame_info.gmask & FRAME_POINTER_MASK)
6104 {
6105 fprintf (file, "\tld\t[%s+%d], %s\n",
6106 sp_str, reg_offset, fp_str);
6107 reg_offset += 4;
6108 }
6109 if (current_frame_info.gmask & RETURN_ADDR_MASK)
6110 {
6111 fprintf (file, "\tld\t[%s+%d], %s\n",
6112 sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
6113 reg_offset += 4;
6114 }
6115
6116 /* Restore any remaining saved registers. */
6117 sparc_flat_save_restore (file, sp_str, reg_offset,
6118 current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
6119 current_frame_info.fmask,
6120 "ld", "ldd", 0);
6121
6122 /* If we had to increment %sp in two steps, record it so the second
6123 restoration in the epilogue finishes up. */
6124 if (size1 > 0)
6125 {
6126 size -= size1;
6127 if (size > 4095)
6128 fprintf (file, "\tset\t%d, %s\n",
6129 size, t1_str);
6130 }
6131
6132 if (current_function_returns_struct)
6133 fprintf (file, "\tjmp\t%%o7+12\n");
6134 else
6135 fprintf (file, "\tretl\n");
6136
6137 /* If the only register saved is the return address, we need a
6138 nop, unless we have an instruction to put into it. Otherwise
6139 we don't since reloading multiple registers doesn't reference
6140 the register being loaded. */
6141
6142 if (epilogue_delay)
6143 {
6144 if (size)
6145 abort ();
6146 final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
6147 }
6148
6149 else if (size > 4095)
6150 fprintf (file, "\tadd\t%s, %s, %s\n", sp_str, t1_str, sp_str);
6151
6152 else if (size > 0)
6153 fprintf (file, "\tadd\t%s, %d, %s\n", sp_str, size, sp_str);
6154
6155 else
6156 fprintf (file, "\tnop\n");
6157 }
6158
6159 /* Reset state info for each function. */
6160 current_frame_info = zero_frame_info;
6161
6162 sparc_output_deferred_case_vectors ();
6163}
6164
6165/* Define the number of delay slots needed for the function epilogue.
6166
6167 On the sparc, we need a slot if either no stack has been allocated,
6168 or the only register saved is the return register. */
6169
6170int
6171sparc_flat_epilogue_delay_slots ()
6172{
6173 if (!current_frame_info.initialized)
6174 (void) sparc_flat_compute_frame_size (get_frame_size ());
6175
6176 if (current_frame_info.total_size == 0)
6177 return 1;
6178
6179 return 0;
6180}
6181
6182/* Return true is TRIAL is a valid insn for the epilogue delay slot.
6183 Any single length instruction which doesn't reference the stack or frame
6184 pointer is OK. */
6185
6186int
6187sparc_flat_eligible_for_epilogue_delay (trial, slot)
6188 rtx trial;
6189 int slot ATTRIBUTE_UNUSED;
6190{
6191 rtx pat = PATTERN (trial);
6192
6193 if (get_attr_length (trial) != 1)
6194 return 0;
6195
6196 /* If %g0 is live, there are lots of things we can't handle.
6197 Rather than trying to find them all now, let's punt and only
6198 optimize things as necessary. */
6199 if (TARGET_LIVE_G0)
6200 return 0;
6201
6202 if (! reg_mentioned_p (stack_pointer_rtx, pat)
6203 && ! reg_mentioned_p (frame_pointer_rtx, pat))
6204 return 1;
6205
6206 return 0;
6207}
6208
6209/* Adjust the cost of a scheduling dependency. Return the new cost of
6210 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6211
6212static int
6213supersparc_adjust_cost (insn, link, dep_insn, cost)
6214 rtx insn;
6215 rtx link;
6216 rtx dep_insn;
6217 int cost;
6218{
6219 enum attr_type insn_type;
6220
6221 if (! recog_memoized (insn))
6222 return 0;
6223
6224 insn_type = get_attr_type (insn);
6225
6226 if (REG_NOTE_KIND (link) == 0)
6227 {
6228 /* Data dependency; DEP_INSN writes a register that INSN reads some
6229 cycles later. */
6230
6231 /* if a load, then the dependence must be on the memory address;
6232 add an extra "cycle". Note that the cost could be two cycles
6233 if the reg was written late in an instruction group; we ca not tell
6234 here. */
6235 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
6236 return cost + 3;
6237
6238 /* Get the delay only if the address of the store is the dependence. */
6239 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
6240 {
6241 rtx pat = PATTERN(insn);
6242 rtx dep_pat = PATTERN (dep_insn);
6243
6244 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6245 return cost; /* This should not happen! */
6246
6247 /* The dependency between the two instructions was on the data that
6248 is being stored. Assume that this implies that the address of the
6249 store is not dependent. */
6250 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6251 return cost;
6252
6253 return cost + 3; /* An approximation. */
6254 }
6255
6256 /* A shift instruction cannot receive its data from an instruction
6257 in the same cycle; add a one cycle penalty. */
6258 if (insn_type == TYPE_SHIFT)
6259 return cost + 3; /* Split before cascade into shift. */
6260 }
6261 else
6262 {
6263 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
6264 INSN writes some cycles later. */
6265
6266 /* These are only significant for the fpu unit; writing a fp reg before
6267 the fpu has finished with it stalls the processor. */
6268
6269 /* Reusing an integer register causes no problems. */
6270 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6271 return 0;
6272 }
6273
6274 return cost;
6275}
6276
6277static int
6278hypersparc_adjust_cost (insn, link, dep_insn, cost)
6279 rtx insn;
6280 rtx link;
6281 rtx dep_insn;
6282 int cost;
6283{
6284 enum attr_type insn_type, dep_type;
6285 rtx pat = PATTERN(insn);
6286 rtx dep_pat = PATTERN (dep_insn);
6287
6288 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6289 return cost;
6290
6291 insn_type = get_attr_type (insn);
6292 dep_type = get_attr_type (dep_insn);
6293
6294 switch (REG_NOTE_KIND (link))
6295 {
6296 case 0:
6297 /* Data dependency; DEP_INSN writes a register that INSN reads some
6298 cycles later. */
6299
6300 switch (insn_type)
6301 {
6302 case TYPE_STORE:
6303 case TYPE_FPSTORE:
6304 /* Get the delay iff the address of the store is the dependence. */
6305 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6306 return cost;
6307
6308 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6309 return cost;
6310 return cost + 3;
6311
6312 case TYPE_LOAD:
6313 case TYPE_SLOAD:
6314 case TYPE_FPLOAD:
6315 /* If a load, then the dependence must be on the memory address. If
6316 the addresses aren't equal, then it might be a false dependency */
6317 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6318 {
6319 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6320 || GET_CODE (SET_DEST (dep_pat)) != MEM
6321 || GET_CODE (SET_SRC (pat)) != MEM
6322 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
6323 XEXP (SET_SRC (pat), 0)))
6324 return cost + 2;
6325
6326 return cost + 8;
6327 }
6328 break;
6329
6330 case TYPE_BRANCH:
6331 /* Compare to branch latency is 0. There is no benefit from
6332 separating compare and branch. */
6333 if (dep_type == TYPE_COMPARE)
6334 return 0;
6335 /* Floating point compare to branch latency is less than
6336 compare to conditional move. */
6337 if (dep_type == TYPE_FPCMP)
6338 return cost - 1;
6339 break;
6340 default:
6341 break;
6342 }
6343 break;
6344
6345 case REG_DEP_ANTI:
6346 /* Anti-dependencies only penalize the fpu unit. */
6347 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
6348 return 0;
6349 break;
6350
6351 default:
6352 break;
6353 }
6354
6355 return cost;
6356}
6357
6358static int
6359ultrasparc_adjust_cost (insn, link, dep_insn, cost)
6360 rtx insn;
6361 rtx link;
6362 rtx dep_insn;
6363 int cost;
6364{
6365 enum attr_type insn_type, dep_type;
6366 rtx pat = PATTERN(insn);
6367 rtx dep_pat = PATTERN (dep_insn);
6368
6369 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
6370 return cost;
6371
6372 insn_type = get_attr_type (insn);
6373 dep_type = get_attr_type (dep_insn);
6374
6375 /* Nothing issues in parallel with integer multiplies, so
6376 mark as zero cost since the scheduler can not do anything
6377 about it. */
6378 if (insn_type == TYPE_IMUL)
6379 return 0;
6380
6381#define SLOW_FP(dep_type) \
6382(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
6383
6384 switch (REG_NOTE_KIND (link))
6385 {
6386 case 0:
6387 /* Data dependency; DEP_INSN writes a register that INSN reads some
6388 cycles later. */
6389
6390 if (dep_type == TYPE_CMOVE)
6391 {
6392 /* Instructions that read the result of conditional moves cannot
6393 be in the same group or the following group. */
6394 return cost + 1;
6395 }
6396
6397 switch (insn_type)
6398 {
6399 /* UltraSPARC can dual issue a store and an instruction setting
6400 the value stored, except for divide and square root. */
6401 case TYPE_FPSTORE:
6402 if (! SLOW_FP (dep_type))
6403 return 0;
6404 return cost;
6405
6406 case TYPE_STORE:
6407 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
6408 return cost;
6409
6410 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
6411 /* The dependency between the two instructions is on the data
6412 that is being stored. Assume that the address of the store
6413 is not also dependent. */
6414 return 0;
6415 return cost;
6416
6417 case TYPE_LOAD:
6418 case TYPE_SLOAD:
6419 case TYPE_FPLOAD:
6420 /* A load does not return data until at least 11 cycles after
6421 a store to the same location. 3 cycles are accounted for
6422 in the load latency; add the other 8 here. */
6423 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
6424 {
6425 /* If the addresses are not equal this may be a false
6426 dependency because pointer aliasing could not be
6427 determined. Add only 2 cycles in that case. 2 is
6428 an arbitrary compromise between 8, which would cause
6429 the scheduler to generate worse code elsewhere to
6430 compensate for a dependency which might not really
6431 exist, and 0. */
6432 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
6433 || GET_CODE (SET_SRC (pat)) != MEM
6434 || GET_CODE (SET_DEST (dep_pat)) != MEM
6435 || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
6436 XEXP (SET_DEST (dep_pat), 0)))
6437 return cost + 2;
6438
6439 return cost + 8;
6440 }
6441 return cost;
6442
6443 case TYPE_BRANCH:
6444 /* Compare to branch latency is 0. There is no benefit from
6445 separating compare and branch. */
6446 if (dep_type == TYPE_COMPARE)
6447 return 0;
6448 /* Floating point compare to branch latency is less than
6449 compare to conditional move. */
6450 if (dep_type == TYPE_FPCMP)
6451 return cost - 1;
6452 return cost;
6453
6454 case TYPE_FPCMOVE:
6455 /* FMOVR class instructions can not issue in the same cycle
6456 or the cycle after an instruction which writes any
6457 integer register. Model this as cost 2 for dependent
6458 instructions. */
6459 if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
6460 || dep_type == TYPE_BINARY)
6461 && cost < 2)
6462 return 2;
6463 /* Otherwise check as for integer conditional moves. */
6464
6465 case TYPE_CMOVE:
6466 /* Conditional moves involving integer registers wait until
6467 3 cycles after loads return data. The interlock applies
6468 to all loads, not just dependent loads, but that is hard
6469 to model. */
6470 if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
6471 return cost + 3;
6472 return cost;
6473
6474 default:
6475 break;
6476 }
6477 break;
6478
6479 case REG_DEP_ANTI:
6480 /* Divide and square root lock destination registers for full latency. */
6481 if (! SLOW_FP (dep_type))
6482 return 0;
6483 break;
6484
6485 case REG_DEP_OUTPUT:
6486 /* IEU and FPU instruction that have the same destination
6487 register cannot be grouped together. */
6488 return cost + 1;
6489
6490 default:
6491 break;
6492 }
6493
6494 /* Other costs not accounted for:
6495 - Single precision floating point loads lock the other half of
6496 the even/odd register pair.
6497 - Several hazards associated with ldd/std are ignored because these
6498 instructions are rarely generated for V9.
6499 - The floating point pipeline can not have both a single and double
6500 precision operation active at the same time. Format conversions
6501 and graphics instructions are given honorary double precision status.
6502 - call and jmpl are always the first instruction in a group. */
6503
6504 return cost;
6505
6506#undef SLOW_FP
6507}
6508
6509int
6510sparc_adjust_cost(insn, link, dep, cost)
6511 rtx insn;
6512 rtx link;
6513 rtx dep;
6514 int cost;
6515{
6516 switch (sparc_cpu)
6517 {
6518 case PROCESSOR_SUPERSPARC:
6519 cost = supersparc_adjust_cost (insn, link, dep, cost);
6520 break;
6521 case PROCESSOR_HYPERSPARC:
6522 case PROCESSOR_SPARCLITE86X:
6523 cost = hypersparc_adjust_cost (insn, link, dep, cost);
6524 break;
6525 case PROCESSOR_ULTRASPARC:
6526 cost = ultrasparc_adjust_cost (insn, link, dep, cost);
6527 break;
6528 default:
6529 break;
6530 }
6531 return cost;
6532}
6533
6534/* This describes the state of the UltraSPARC pipeline during
6535 instruction scheduling. */
6536
6537#define TMASK(__x) ((unsigned)1 << ((int)(__x)))
6538#define UMASK(__x) ((unsigned)1 << ((int)(__x)))
6539
6540enum ultra_code { NONE=0, /* no insn at all */
6541 IEU0, /* shifts and conditional moves */
6542 IEU1, /* condition code setting insns, calls+jumps */
6543 IEUN, /* all other single cycle ieu insns */
6544 LSU, /* loads and stores */
6545 CTI, /* branches */
6546 FPM, /* FPU pipeline 1, multiplies and divides */
6547 FPA, /* FPU pipeline 2, all other operations */
6548 SINGLE, /* single issue instructions */
6549 NUM_ULTRA_CODES };
6550
6551static const char *ultra_code_names[NUM_ULTRA_CODES] = {
6552 "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
6553 "FPM", "FPA", "SINGLE" };
6554
6555struct ultrasparc_pipeline_state {
6556 /* The insns in this group. */
6557 rtx group[4];
6558
6559 /* The code for each insn. */
6560 enum ultra_code codes[4];
6561
6562 /* Which insns in this group have been committed by the
6563 scheduler. This is how we determine how many more
6564 can issue this cycle. */
6565 char commit[4];
6566
6567 /* How many insns in this group. */
6568 char group_size;
6569
6570 /* Mask of free slots still in this group. */
6571 char free_slot_mask;
6572
6573 /* The slotter uses the following to determine what other
6574 insn types can still make their way into this group. */
6575 char contents [NUM_ULTRA_CODES];
6576 char num_ieu_insns;
6577};
6578
6579#define ULTRA_NUM_HIST 8
6580static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
6581static int ultra_cur_hist;
6582static int ultra_cycles_elapsed;
6583
6584#define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
6585
6586/* Given TYPE_MASK compute the ultra_code it has. */
6587static enum ultra_code
6588ultra_code_from_mask (type_mask)
6589 int type_mask;
6590{
6591 if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
6592 return IEU0;
6593 else if (type_mask & (TMASK (TYPE_COMPARE) |
6594 TMASK (TYPE_CALL) |
6595 TMASK (TYPE_UNCOND_BRANCH)))
6596 return IEU1;
6597 else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
6598 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
6599 return IEUN;
6600 else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
6601 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
6602 TMASK (TYPE_FPSTORE)))
6603 return LSU;
6604 else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
6605 TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
6606 return FPM;
6607 else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
6608 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
6609 return FPA;
6610 else if (type_mask & TMASK (TYPE_BRANCH))
6611 return CTI;
6612
6613 return SINGLE;
6614}
6615
6616/* Check INSN (a conditional move) and make sure that it's
6617 results are available at this cycle. Return 1 if the
6618 results are in fact ready. */
6619static int
6620ultra_cmove_results_ready_p (insn)
6621 rtx insn;
6622{
6623 struct ultrasparc_pipeline_state *up;
6624 int entry, slot;
6625
6626 /* If this got dispatched in the previous
6627 group, the results are not ready. */
6628 entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6629 up = &ultra_pipe_hist[entry];
6630 slot = 4;
6631 while (--slot >= 0)
6632 if (up->group[slot] == insn)
6633 return 0;
6634
6635 return 1;
6636}
6637
6638/* Walk backwards in pipeline history looking for FPU
6639 operations which use a mode different than FPMODE and
6640 will create a stall if an insn using FPMODE were to be
6641 dispatched this cycle. */
6642static int
6643ultra_fpmode_conflict_exists (fpmode)
6644 enum machine_mode fpmode;
6645{
6646 int hist_ent;
6647 int hist_lim;
6648
6649 hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
6650 if (ultra_cycles_elapsed < 4)
6651 hist_lim = ultra_cycles_elapsed;
6652 else
6653 hist_lim = 4;
6654 while (hist_lim > 0)
6655 {
6656 struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
6657 int slot = 4;
6658
6659 while (--slot >= 0)
6660 {
6661 rtx insn = up->group[slot];
6662 enum machine_mode this_mode;
6663 rtx pat;
6664
6665 if (! insn
6666 || GET_CODE (insn) != INSN
6667 || (pat = PATTERN (insn)) == 0
6668 || GET_CODE (pat) != SET)
6669 continue;
6670
6671 this_mode = GET_MODE (SET_DEST (pat));
6672 if ((this_mode != SFmode
6673 && this_mode != DFmode)
6674 || this_mode == fpmode)
6675 continue;
6676
6677 /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
6678 we will get a stall. Loads and stores are independant
6679 of these rules. */
6680 if (GET_CODE (SET_SRC (pat)) != ABS
6681 && GET_CODE (SET_SRC (pat)) != NEG
6682 && ((TMASK (get_attr_type (insn)) &
6683 (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
6684 TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
6685 TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
6686 return 1;
6687 }
6688 hist_lim--;
6689 hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
6690 }
6691
6692 /* No conflicts, safe to dispatch. */
6693 return 0;
6694}
6695
6696/* Find an instruction in LIST which has one of the
6697 type attributes enumerated in TYPE_MASK. START
6698 says where to begin the search.
6699
6700 NOTE: This scheme depends upon the fact that we
6701 have less than 32 distinct type attributes. */
6702
6703static int ultra_types_avail;
6704
6705static rtx *
6706ultra_find_type (type_mask, list, start)
6707 int type_mask;
6708 rtx *list;
6709 int start;
6710{
6711 int i;
6712
6713 /* Short circuit if no such insn exists in the ready
6714 at the moment. */
6715 if ((type_mask & ultra_types_avail) == 0)
6716 return 0;
6717
6718 for (i = start; i >= 0; i--)
6719 {
6720 rtx insn = list[i];
6721
6722 if (recog_memoized (insn) >= 0
6723 && (TMASK(get_attr_type (insn)) & type_mask))
6724 {
6725 enum machine_mode fpmode = SFmode;
6726 rtx pat = 0;
6727 int slot;
6728 int check_depend = 0;
6729 int check_fpmode_conflict = 0;
6730
6731 if (GET_CODE (insn) == INSN
6732 && (pat = PATTERN(insn)) != 0
6733 && GET_CODE (pat) == SET
6734 && !(type_mask & (TMASK (TYPE_STORE) |
6735 TMASK (TYPE_FPSTORE))))
6736 {
6737 check_depend = 1;
6738 if (GET_MODE (SET_DEST (pat)) == SFmode
6739 || GET_MODE (SET_DEST (pat)) == DFmode)
6740 {
6741 fpmode = GET_MODE (SET_DEST (pat));
6742 check_fpmode_conflict = 1;
6743 }
6744 }
6745
6746 slot = 4;
6747 while(--slot >= 0)
6748 {
6749 rtx slot_insn = ultra_pipe.group[slot];
6750 rtx slot_pat;
6751
6752 /* Already issued, bad dependency, or FPU
6753 mode conflict. */
6754 if (slot_insn != 0
6755 && (slot_pat = PATTERN (slot_insn)) != 0
6756 && ((insn == slot_insn)
6757 || (check_depend == 1
6758 && GET_CODE (slot_insn) == INSN
6759 && GET_CODE (slot_pat) == SET
6760 && ((GET_CODE (SET_DEST (slot_pat)) == REG
6761 && GET_CODE (SET_SRC (pat)) == REG
6762 && REGNO (SET_DEST (slot_pat)) ==
6763 REGNO (SET_SRC (pat)))
6764 || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
6765 && GET_CODE (SET_SRC (pat)) == SUBREG
6766 && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
6767 REGNO (SUBREG_REG (SET_SRC (pat)))
6768 && SUBREG_WORD (SET_DEST (slot_pat)) ==
6769 SUBREG_WORD (SET_SRC (pat)))))
6770 || (check_fpmode_conflict == 1
6771 && GET_CODE (slot_insn) == INSN
6772 && GET_CODE (slot_pat) == SET
6773 && (GET_MODE (SET_DEST (slot_pat)) == SFmode
6774 || GET_MODE (SET_DEST (slot_pat)) == DFmode)
6775 && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
6776 goto next;
6777 }
6778
6779 /* Check for peculiar result availability and dispatch
6780 interference situations. */
6781 if (pat != 0
6782 && ultra_cycles_elapsed > 0)
6783 {
6784 rtx link;
6785
6786 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6787 {
6788 rtx link_insn = XEXP (link, 0);
6789 if (GET_CODE (link_insn) == INSN
6790 && recog_memoized (link_insn) >= 0
6791 && (TMASK (get_attr_type (link_insn)) &
6792 (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
6793 && ! ultra_cmove_results_ready_p (link_insn))
6794 goto next;
6795 }
6796
6797 if (check_fpmode_conflict
6798 && ultra_fpmode_conflict_exists (fpmode))
6799 goto next;
6800 }
6801
6802 return &list[i];
6803 }
6804 next:
6805 ;
6806 }
6807 return 0;
6808}
6809
6810static void
6811ultra_build_types_avail (ready, n_ready)
6812 rtx *ready;
6813 int n_ready;
6814{
6815 int i = n_ready - 1;
6816
6817 ultra_types_avail = 0;
6818 while(i >= 0)
6819 {
6820 rtx insn = ready[i];
6821
6822 if (recog_memoized (insn) >= 0)
6823 ultra_types_avail |= TMASK (get_attr_type (insn));
6824
6825 i -= 1;
6826 }
6827}
6828
6829/* Place insn pointed to my IP into the pipeline.
6830 Make element THIS of READY be that insn if it
6831 is not already. TYPE indicates the pipeline class
6832 this insn falls into. */
6833static void
6834ultra_schedule_insn (ip, ready, this, type)
6835 rtx *ip;
6836 rtx *ready;
6837 int this;
6838 enum ultra_code type;
6839{
6840 int pipe_slot;
6841 char mask = ultra_pipe.free_slot_mask;
6842
6843 /* Obtain free slot. */
6844 for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
6845 if ((mask & (1 << pipe_slot)) != 0)
6846 break;
6847 if (pipe_slot == 4)
6848 abort ();
6849
6850 /* In it goes, and it hasn't been committed yet. */
6851 ultra_pipe.group[pipe_slot] = *ip;
6852 ultra_pipe.codes[pipe_slot] = type;
6853 ultra_pipe.contents[type] = 1;
6854 if (UMASK (type) &
6855 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6856 ultra_pipe.num_ieu_insns += 1;
6857
6858 ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
6859 ultra_pipe.group_size += 1;
6860 ultra_pipe.commit[pipe_slot] = 0;
6861
6862 /* Update ready list. */
6863 if (ip != &ready[this])
6864 {
6865 rtx temp = *ip;
6866
6867 *ip = ready[this];
6868 ready[this] = temp;
6869 }
6870}
6871
6872/* Advance to the next pipeline group. */
6873static void
6874ultra_flush_pipeline ()
6875{
6876 ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
6877 ultra_cycles_elapsed += 1;
6878 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
6879 ultra_pipe.free_slot_mask = 0xf;
6880}
6881
6882static int ultra_reorder_called_this_block;
6883
6884/* Init our data structures for this current block. */
6885void
6886ultrasparc_sched_init (dump, sched_verbose)
6887 FILE *dump ATTRIBUTE_UNUSED;
6888 int sched_verbose ATTRIBUTE_UNUSED;
6889{
6890 bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
6891 ultra_cur_hist = 0;
6892 ultra_cycles_elapsed = 0;
6893 ultra_reorder_called_this_block = 0;
6894 ultra_pipe.free_slot_mask = 0xf;
6895}
6896
6897/* INSN has been scheduled, update pipeline commit state
6898 and return how many instructions are still to be
6899 scheduled in this group. */
6900int
6901ultrasparc_variable_issue (insn)
6902 rtx insn;
6903{
6904 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6905 int i, left_to_fire;
6906
6907 left_to_fire = 0;
6908 for (i = 0; i < 4; i++)
6909 {
6910 if (up->group[i] == 0)
6911 continue;
6912
6913 if (up->group[i] == insn)
6914 {
6915 up->commit[i] = 1;
6916 }
6917 else if (! up->commit[i])
6918 left_to_fire++;
6919 }
6920
6921 return left_to_fire;
6922}
6923
6924/* In actual_hazard_this_instance, we may have yanked some
6925 instructions from the ready list due to conflict cost
6926 adjustments. If so, and such an insn was in our pipeline
6927 group, remove it and update state. */
6928static void
6929ultra_rescan_pipeline_state (ready, n_ready)
6930 rtx *ready;
6931 int n_ready;
6932{
6933 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6934 int i;
6935
6936 for (i = 0; i < 4; i++)
6937 {
6938 rtx insn = up->group[i];
6939 int j;
6940
6941 if (! insn)
6942 continue;
6943
6944 /* If it has been committed, then it was removed from
6945 the ready list because it was actually scheduled,
6946 and that is not the case we are searching for here. */
6947 if (up->commit[i] != 0)
6948 continue;
6949
6950 for (j = n_ready - 1; j >= 0; j--)
6951 if (ready[j] == insn)
6952 break;
6953
6954 /* If we didn't find it, toss it. */
6955 if (j < 0)
6956 {
6957 enum ultra_code ucode = up->codes[i];
6958
6959 up->group[i] = 0;
6960 up->codes[i] = NONE;
6961 up->contents[ucode] = 0;
6962 if (UMASK (ucode) &
6963 (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
6964 up->num_ieu_insns -= 1;
6965
6966 up->free_slot_mask |= (1 << i);
6967 up->group_size -= 1;
6968 up->commit[i] = 0;
6969 }
6970 }
6971}
6972
6973void
6974ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
6975 FILE *dump;
6976 int sched_verbose;
6977 rtx *ready;
6978 int n_ready;
6979{
6980 struct ultrasparc_pipeline_state *up = &ultra_pipe;
6981 int i, this_insn;
6982
6983 /* We get called once unnecessarily per block of insns
6984 scheduled. */
6985 if (ultra_reorder_called_this_block == 0)
6986 {
6987 ultra_reorder_called_this_block = 1;
6988 return;
6989 }
6990
6991 if (sched_verbose)
6992 {
6993 int n;
6994
6995 fprintf (dump, "\n;;\tUltraSPARC Looking at [");
6996 for (n = n_ready - 1; n >= 0; n--)
6997 {
6998 rtx insn = ready[n];
6999 enum ultra_code ucode;
7000
7001 if (recog_memoized (insn) < 0)
7002 continue;
7003 ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
7004 if (n != 0)
7005 fprintf (dump, "%s(%d) ",
7006 ultra_code_names[ucode],
7007 INSN_UID (insn));
7008 else
7009 fprintf (dump, "%s(%d)",
7010 ultra_code_names[ucode],
7011 INSN_UID (insn));
7012 }
7013 fprintf (dump, "]\n");
7014 }
7015
7016 this_insn = n_ready - 1;
7017
7018 /* Skip over junk we don't understand. */
7019 while ((this_insn >= 0)
7020 && recog_memoized (ready[this_insn]) < 0)
7021 this_insn--;
7022
7023 ultra_build_types_avail (ready, this_insn + 1);
7024
7025 while (this_insn >= 0) {
7026 int old_group_size = up->group_size;
7027
7028 if (up->group_size != 0)
7029 {
7030 int num_committed;
7031
7032 num_committed = (up->commit[0] + up->commit[1] +
7033 up->commit[2] + up->commit[3]);
7034 /* If nothing has been commited from our group, or all of
7035 them have. Clear out the (current cycle's) pipeline
7036 state and start afresh. */
7037 if (num_committed == 0
7038 || num_committed == up->group_size)
7039 {
7040 ultra_flush_pipeline ();
7041 up = &ultra_pipe;
7042 old_group_size = 0;
7043 }
7044 else
7045 {
7046 /* OK, some ready list insns got requeued and thus removed
7047 from the ready list. Account for this fact. */
7048 ultra_rescan_pipeline_state (ready, n_ready);
7049
7050 /* Something "changed", make this look like a newly
7051 formed group so the code at the end of the loop
7052 knows that progress was in fact made. */
7053 if (up->group_size != old_group_size)
7054 old_group_size = 0;
7055 }
7056 }
7057
7058 if (up->group_size == 0)
7059 {
7060 /* If the pipeline is (still) empty and we have any single
7061 group insns, get them out now as this is a good time. */
7062 rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
7063 TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
7064 TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
7065 ready, this_insn);
7066 if (ip)
7067 {
7068 ultra_schedule_insn (ip, ready, this_insn, SINGLE);
7069 break;
7070 }
7071
7072 /* If we are not in the process of emptying out the pipe, try to
7073 obtain an instruction which must be the first in it's group. */
7074 ip = ultra_find_type ((TMASK (TYPE_CALL) |
7075 TMASK (TYPE_CALL_NO_DELAY_SLOT) |
7076 TMASK (TYPE_UNCOND_BRANCH)),
7077 ready, this_insn);
7078 if (ip)
7079 {
7080 ultra_schedule_insn (ip, ready, this_insn, IEU1);
7081 this_insn--;
7082 }
7083 else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
7084 TMASK (TYPE_FPDIVD) |
7085 TMASK (TYPE_FPSQRT)),
7086 ready, this_insn)) != 0)
7087 {
7088 ultra_schedule_insn (ip, ready, this_insn, FPM);
7089 this_insn--;
7090 }
7091 }
7092
7093 /* Try to fill the integer pipeline. First, look for an IEU0 specific
7094 operation. We can't do more IEU operations if the first 3 slots are
7095 all full or we have dispatched two IEU insns already. */
7096 if ((up->free_slot_mask & 0x7) != 0
7097 && up->num_ieu_insns < 2
7098 && up->contents[IEU0] == 0
7099 && up->contents[IEUN] == 0)
7100 {
7101 rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
7102 if (ip)
7103 {
7104 ultra_schedule_insn (ip, ready, this_insn, IEU0);
7105 this_insn--;
7106 }
7107 }
7108
7109 /* If we can, try to find an IEU1 specific or an unnamed
7110 IEU instruction. */
7111 if ((up->free_slot_mask & 0x7) != 0
7112 && up->num_ieu_insns < 2)
7113 {
7114 rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7115 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
7116 (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
7117 ready, this_insn);
7118 if (ip)
7119 {
7120 rtx insn = *ip;
7121
7122 ultra_schedule_insn (ip, ready, this_insn,
7123 (!up->contents[IEU1]
7124 && get_attr_type (insn) == TYPE_COMPARE)
7125 ? IEU1 : IEUN);
7126 this_insn--;
7127 }
7128 }
7129
7130 /* If only one IEU insn has been found, try to find another unnamed
7131 IEU operation or an IEU1 specific one. */
7132 if ((up->free_slot_mask & 0x7) != 0
7133 && up->num_ieu_insns < 2)
7134 {
7135 rtx *ip;
7136 int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
7137 TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
7138
7139 if (!up->contents[IEU1])
7140 tmask |= TMASK (TYPE_COMPARE);
7141 ip = ultra_find_type (tmask, ready, this_insn);
7142 if (ip)
7143 {
7144 rtx insn = *ip;
7145
7146 ultra_schedule_insn (ip, ready, this_insn,
7147 (!up->contents[IEU1]
7148 && get_attr_type (insn) == TYPE_COMPARE)
7149 ? IEU1 : IEUN);
7150 this_insn--;
7151 }
7152 }
7153
7154 /* Try for a load or store, but such an insn can only be issued
7155 if it is within' one of the first 3 slots. */
7156 if ((up->free_slot_mask & 0x7) != 0
7157 && up->contents[LSU] == 0)
7158 {
7159 rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
7160 TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
7161 TMASK (TYPE_FPSTORE)), ready, this_insn);
7162 if (ip)
7163 {
7164 ultra_schedule_insn (ip, ready, this_insn, LSU);
7165 this_insn--;
7166 }
7167 }
7168
7169 /* Now find FPU operations, first FPM class. But not divisions or
7170 square-roots because those will break the group up. Unlike all
7171 the previous types, these can go in any slot. */
7172 if (up->free_slot_mask != 0
7173 && up->contents[FPM] == 0)
7174 {
7175 rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
7176 if (ip)
7177 {
7178 ultra_schedule_insn (ip, ready, this_insn, FPM);
7179 this_insn--;
7180 }
7181 }
7182
7183 /* Continue on with FPA class if we have not filled the group already. */
7184 if (up->free_slot_mask != 0
7185 && up->contents[FPA] == 0)
7186 {
7187 rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
7188 TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
7189 ready, this_insn);
7190 if (ip)
7191 {
7192 ultra_schedule_insn (ip, ready, this_insn, FPA);
7193 this_insn--;
7194 }
7195 }
7196
7197 /* Finally, maybe stick a branch in here. */
7198 if (up->free_slot_mask != 0
7199 && up->contents[CTI] == 0)
7200 {
7201 rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
7202
7203 /* Try to slip in a branch only if it is one of the
7204 next 2 in the ready list. */
7205 if (ip && ((&ready[this_insn] - ip) < 2))
7206 {
7207 ultra_schedule_insn (ip, ready, this_insn, CTI);
7208 this_insn--;
7209 }
7210 }
7211
7212 up->group_size = 0;
7213 for (i = 0; i < 4; i++)
7214 if ((up->free_slot_mask & (1 << i)) == 0)
7215 up->group_size++;
7216
7217 /* See if we made any progress... */
7218 if (old_group_size != up->group_size)
7219 break;
7220
7221 /* Clean out the (current cycle's) pipeline state
7222 and try once more. If we placed no instructions
7223 into the pipeline at all, it means a real hard
7224 conflict exists with some earlier issued instruction
7225 so we must advance to the next cycle to clear it up. */
7226 if (up->group_size == 0)
7227 {
7228 ultra_flush_pipeline ();
7229 up = &ultra_pipe;
7230 }
7231 else
7232 {
7233 bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
7234 ultra_pipe.free_slot_mask = 0xf;
7235 }
7236 }
7237
7238 if (sched_verbose)
7239 {
7240 int n, gsize;
7241
7242 fprintf (dump, ";;\tUltraSPARC Launched [");
7243 gsize = up->group_size;
7244 for (n = 0; n < 4; n++)
7245 {
7246 rtx insn = up->group[n];
7247
7248 if (! insn)
7249 continue;
7250
7251 gsize -= 1;
7252 if (gsize != 0)
7253 fprintf (dump, "%s(%d) ",
7254 ultra_code_names[up->codes[n]],
7255 INSN_UID (insn));
7256 else
7257 fprintf (dump, "%s(%d)",
7258 ultra_code_names[up->codes[n]],
7259 INSN_UID (insn));
7260 }
7261 fprintf (dump, "]\n");
7262 }
7263}
7264
7265int
7266sparc_issue_rate ()
7267{
7268 switch (sparc_cpu)
7269 {
7270 default:
7271 return 1;
7272 case PROCESSOR_V9:
7273 /* Assume V9 processors are capable of at least dual-issue. */
7274 return 2;
7275 case PROCESSOR_SUPERSPARC:
7276 return 3;
7277 case PROCESSOR_HYPERSPARC:
7278 case PROCESSOR_SPARCLITE86X:
7279 return 2;
7280 case PROCESSOR_ULTRASPARC:
7281 return 4;
7282 }
7283}
7284
7285static int
7286set_extends(x, insn)
7287 rtx x, insn;
7288{
7289 register rtx pat = PATTERN (insn);
7290
7291 switch (GET_CODE (SET_SRC (pat)))
7292 {
7293 /* Load and some shift instructions zero extend. */
7294 case MEM:
7295 case ZERO_EXTEND:
7296 /* sethi clears the high bits */
7297 case HIGH:
7298 /* LO_SUM is used with sethi. sethi cleared the high
7299 bits and the values used with lo_sum are positive */
7300 case LO_SUM:
7301 /* Store flag stores 0 or 1 */
7302 case LT: case LTU:
7303 case GT: case GTU:
7304 case LE: case LEU:
7305 case GE: case GEU:
7306 case EQ:
7307 case NE:
7308 return 1;
7309 case AND:
7310 {
7311 rtx op1 = XEXP (SET_SRC (pat), 1);
7312 if (GET_CODE (op1) == CONST_INT)
7313 return INTVAL (op1) >= 0;
7314 if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
7315 && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
7316 return 1;
7317 if (GET_CODE (op1) == REG
7318 && sparc_check_64 ((op1), insn) == 1)
7319 return 1;
7320 }
7321 case ASHIFT:
7322 case LSHIFTRT:
7323 return GET_MODE (SET_SRC (pat)) == SImode;
7324 /* Positive integers leave the high bits zero. */
7325 case CONST_DOUBLE:
7326 return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
7327 case CONST_INT:
7328 return ! (INTVAL (x) & 0x80000000);
7329 case ASHIFTRT:
7330 case SIGN_EXTEND:
7331 return - (GET_MODE (SET_SRC (pat)) == SImode);
7332 default:
7333 return 0;
7334 }
7335}
7336
7337/* We _ought_ to have only one kind per function, but... */
7338static rtx sparc_addr_diff_list;
7339static rtx sparc_addr_list;
7340
7341void
7342sparc_defer_case_vector (lab, vec, diff)
7343 rtx lab, vec;
7344 int diff;
7345{
7346 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7347 if (diff)
7348 sparc_addr_diff_list
7349 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7350 else
7351 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7352}
7353
7354static void
7355sparc_output_addr_vec (vec)
7356 rtx vec;
7357{
7358 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7359 int idx, vlen = XVECLEN (body, 0);
7360
7361#ifdef ASM_OUTPUT_ADDR_VEC_START
7362 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7363#endif
7364
7365#ifdef ASM_OUTPUT_CASE_LABEL
7366 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7367 NEXT_INSN (lab));
7368#else
7369 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7370#endif
7371
7372 for (idx = 0; idx < vlen; idx++)
7373 {
7374 ASM_OUTPUT_ADDR_VEC_ELT
7375 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7376 }
7377
7378#ifdef ASM_OUTPUT_ADDR_VEC_END
7379 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7380#endif
7381}
7382
7383static void
7384sparc_output_addr_diff_vec (vec)
7385 rtx vec;
7386{
7387 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7388 rtx base = XEXP (XEXP (body, 0), 0);
7389 int idx, vlen = XVECLEN (body, 1);
7390
7391#ifdef ASM_OUTPUT_ADDR_VEC_START
7392 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7393#endif
7394
7395#ifdef ASM_OUTPUT_CASE_LABEL
7396 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7397 NEXT_INSN (lab));
7398#else
7399 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7400#endif
7401
7402 for (idx = 0; idx < vlen; idx++)
7403 {
7404 ASM_OUTPUT_ADDR_DIFF_ELT
7405 (asm_out_file,
7406 body,
7407 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7408 CODE_LABEL_NUMBER (base));
7409 }
7410
7411#ifdef ASM_OUTPUT_ADDR_VEC_END
7412 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7413#endif
7414}
7415
7416static void
7417sparc_output_deferred_case_vectors ()
7418{
7419 rtx t;
7420 int align;
7421
7422 if (sparc_addr_list == NULL_RTX
7423 && sparc_addr_diff_list == NULL_RTX)
7424 return;
7425
7426 /* Align to cache line in the function's code section. */
7427 function_section (current_function_decl);
7428
7429 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7430 if (align > 0)
7431 ASM_OUTPUT_ALIGN (asm_out_file, align);
7432
7433 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7434 sparc_output_addr_vec (XEXP (t, 0));
7435 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7436 sparc_output_addr_diff_vec (XEXP (t, 0));
7437
7438 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7439}
7440
7441/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7442 unknown. Return 1 if the high bits are zero, -1 if the register is
7443 sign extended. */
7444int
7445sparc_check_64 (x, insn)
7446 rtx x, insn;
7447{
7448 /* If a register is set only once it is safe to ignore insns this
7449 code does not know how to handle. The loop will either recognize
7450 the single set and return the correct value or fail to recognize
7451 it and return 0. */
7452 int set_once = 0;
7453
7454 if (GET_CODE (x) == REG
7455 && flag_expensive_optimizations
7456 && REG_N_SETS (REGNO (x)) == 1)
7457 set_once = 1;
7458
7459 if (insn == 0)
7460 {
7461 if (set_once)
7462 insn = get_last_insn_anywhere ();
7463 else
7464 return 0;
7465 }
7466
7467 while ((insn = PREV_INSN (insn)))
7468 {
7469 switch (GET_CODE (insn))
7470 {
7471 case JUMP_INSN:
7472 case NOTE:
7473 break;
7474 case CODE_LABEL:
7475 case CALL_INSN:
7476 default:
7477 if (! set_once)
7478 return 0;
7479 break;
7480 case INSN:
7481 {
7482 rtx pat = PATTERN (insn);
7483 if (GET_CODE (pat) != SET)
7484 return 0;
7485 if (rtx_equal_p (x, SET_DEST (pat)))
7486 return set_extends (x, insn);
7487 if (reg_overlap_mentioned_p (SET_DEST (pat), x))
7488 return 0;
7489 }
7490 }
7491 }
7492 return 0;
7493}
7494
7495char *
7496sparc_v8plus_shift (operands, insn, opcode)
7497 rtx *operands;
7498 rtx insn;
7499 char *opcode;
7500{
7501 static char asm_code[60];
7502
7503 if (GET_CODE (operands[3]) == SCRATCH)
7504 operands[3] = operands[0];
7505 if (GET_CODE (operands[1]) == CONST_INT)
7506 {
7507 output_asm_insn ("mov %1,%3", operands);
7508 }
7509 else
7510 {
7511 output_asm_insn ("sllx %H1,32,%3", operands);
7512 if (sparc_check_64 (operands[1], insn) <= 0)
7513 output_asm_insn ("srl %L1,0,%L1", operands);
7514 output_asm_insn ("or %L1,%3,%3", operands);
7515 }
7516
7517 strcpy(asm_code, opcode);
7518 if (which_alternative != 2)
7519 return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
7520 else
7521 return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
7522}
7523
7524
7525/* Return 1 if DEST and SRC reference only global and in registers. */
7526
7527int
7528sparc_return_peephole_ok (dest, src)
7529 rtx dest, src;
7530{
7531 if (! TARGET_V9)
7532 return 0;
7533 if (current_function_uses_only_leaf_regs)
7534 return 0;
7535 if (GET_CODE (src) != CONST_INT
7536 && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
7537 return 0;
7538 return IN_OR_GLOBAL_P (dest);
7539}
7540
7541/* Output assembler code to FILE to increment profiler label # LABELNO
7542 for profiling a function entry.
7543
7544 32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
7545 during profiling so we need to save/restore it around the call to mcount.
7546 We're guaranteed that a save has just been done, and we use the space
7547 allocated for intreg/fpreg value passing. */
7548
7549void
7550sparc_function_profiler (file, labelno)
7551 FILE *file;
7552 int labelno;
7553{
7554 char buf[32];
7555 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7556
7557 if (! TARGET_ARCH64)
7558 fputs ("\tst\t%g2,[%fp-4]\n", file);
7559
7560 fputs ("\tsethi\t%hi(", file);
7561 assemble_name (file, buf);
7562 fputs ("),%o0\n", file);
7563
7564 fputs ("\tcall\t", file);
7565 assemble_name (file, MCOUNT_FUNCTION);
7566 putc ('\n', file);
7567
7568 fputs ("\t or\t%o0,%lo(", file);
7569 assemble_name (file, buf);
7570 fputs ("),%o0\n", file);
7571
7572 if (! TARGET_ARCH64)
7573 fputs ("\tld\t[%fp-4],%g2\n", file);
7574}
7575
7576
7577/* The following macro shall output assembler code to FILE
7578 to initialize basic-block profiling.
7579
7580 If profile_block_flag == 2
7581
7582 Output code to call the subroutine `__bb_init_trace_func'
7583 and pass two parameters to it. The first parameter is
7584 the address of a block allocated in the object module.
7585 The second parameter is the number of the first basic block
7586 of the function.
7587
7588 The name of the block is a local symbol made with this statement:
7589
7590 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7591
7592 Of course, since you are writing the definition of
7593 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7594 can take a short cut in the definition of this macro and use the
7595 name that you know will result.
7596
7597 The number of the first basic block of the function is
7598 passed to the macro in BLOCK_OR_LABEL.
7599
7600 If described in a virtual assembler language the code to be
7601 output looks like:
7602
7603 parameter1 <- LPBX0
7604 parameter2 <- BLOCK_OR_LABEL
7605 call __bb_init_trace_func
7606
7607 else if profile_block_flag != 0
7608
7609 Output code to call the subroutine `__bb_init_func'
7610 and pass one single parameter to it, which is the same
7611 as the first parameter to `__bb_init_trace_func'.
7612
7613 The first word of this parameter is a flag which will be nonzero if
7614 the object module has already been initialized. So test this word
7615 first, and do not call `__bb_init_func' if the flag is nonzero.
7616 Note: When profile_block_flag == 2 the test need not be done
7617 but `__bb_init_trace_func' *must* be called.
7618
7619 BLOCK_OR_LABEL may be used to generate a label number as a
7620 branch destination in case `__bb_init_func' will not be called.
7621
7622 If described in a virtual assembler language the code to be
7623 output looks like:
7624
7625 cmp (LPBX0),0
7626 jne local_label
7627 parameter1 <- LPBX0
7628 call __bb_init_func
7629 local_label:
7630
7631*/
7632
7633void
7634sparc_function_block_profiler(file, block_or_label)
7635 FILE *file;
7636 int block_or_label;
7637{
7638 char LPBX[32];
7639 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7640
7641 if (profile_block_flag == 2)
7642 {
7643 fputs ("\tsethi\t%hi(", file);
7644 assemble_name (file, LPBX);
7645 fputs ("),%o0\n", file);
7646
7647 fprintf (file, "\tsethi\t%%hi(%d),%%o1\n", block_or_label);
7648
7649 fputs ("\tor\t%o0,%lo(", file);
7650 assemble_name (file, LPBX);
7651 fputs ("),%o0\n", file);
7652
7653 fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
7654
7655 fprintf (file, "\t or\t%%o1,%%lo(%d),%%o1\n", block_or_label);
7656 }
7657 else if (profile_block_flag != 0)
7658 {
7659 char LPBY[32];
7660 ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
7661
7662 fputs ("\tsethi\t%hi(", file);
7663 assemble_name (file, LPBX);
7664 fputs ("),%o0\n", file);
7665
7666 fputs ("\tld\t[%lo(", file);
7667 assemble_name (file, LPBX);
7668 fputs (")+%o0],%o1\n", file);
7669
7670 fputs ("\ttst\t%o1\n", file);
7671
7672 if (TARGET_V9)
7673 {
7674 fputs ("\tbne,pn\t%icc,", file);
7675 assemble_name (file, LPBY);
7676 putc ('\n', file);
7677 }
7678 else
7679 {
7680 fputs ("\tbne\t", file);
7681 assemble_name (file, LPBY);
7682 putc ('\n', file);
7683 }
7684
7685 fputs ("\t or\t%o0,%lo(", file);
7686 assemble_name (file, LPBX);
7687 fputs ("),%o0\n", file);
7688
7689 fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
7690
7691 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
7692 }
7693}
7694
7695/* The following macro shall output assembler code to FILE
7696 to increment a counter associated with basic block number BLOCKNO.
7697
7698 If profile_block_flag == 2
7699
7700 Output code to initialize the global structure `__bb' and
7701 call the function `__bb_trace_func' which will increment the
7702 counter.
7703
7704 `__bb' consists of two words. In the first word the number
7705 of the basic block has to be stored. In the second word
7706 the address of a block allocated in the object module
7707 has to be stored.
7708
7709 The basic block number is given by BLOCKNO.
7710
7711 The address of the block is given by the label created with
7712
7713 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
7714
7715 by FUNCTION_BLOCK_PROFILER.
7716
7717 Of course, since you are writing the definition of
7718 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7719 can take a short cut in the definition of this macro and use the
7720 name that you know will result.
7721
7722 If described in a virtual assembler language the code to be
7723 output looks like:
7724
7725 move BLOCKNO -> (__bb)
7726 move LPBX0 -> (__bb+4)
7727 call __bb_trace_func
7728
7729 Note that function `__bb_trace_func' must not change the
7730 machine state, especially the flag register. To grant
7731 this, you must output code to save and restore registers
7732 either in this macro or in the macros MACHINE_STATE_SAVE
7733 and MACHINE_STATE_RESTORE. The last two macros will be
7734 used in the function `__bb_trace_func', so you must make
7735 sure that the function prologue does not change any
7736 register prior to saving it with MACHINE_STATE_SAVE.
7737
7738 else if profile_block_flag != 0
7739
7740 Output code to increment the counter directly.
7741 Basic blocks are numbered separately from zero within each
7742 compiled object module. The count associated with block number
7743 BLOCKNO is at index BLOCKNO in an array of words; the name of
7744 this array is a local symbol made with this statement:
7745
7746 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
7747
7748 Of course, since you are writing the definition of
7749 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
7750 can take a short cut in the definition of this macro and use the
7751 name that you know will result.
7752
7753 If described in a virtual assembler language, the code to be
7754 output looks like:
7755
7756 inc (LPBX2+4*BLOCKNO)
7757
7758*/
7759
7760void
7761sparc_block_profiler(file, blockno)
7762 FILE *file;
7763 int blockno;
7764{
7765 char LPBX[32];
7766
7767 if (profile_block_flag == 2)
7768 {
7769 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
7770
7771 fprintf (file, "\tsethi\t%%hi(%s__bb),%%g1\n", user_label_prefix);
7772 fprintf (file, "\tsethi\t%%hi(%d),%%g2\n", blockno);
7773 fprintf (file, "\tor\t%%g1,%%lo(%s__bb),%%g1\n", user_label_prefix);
7774 fprintf (file, "\tor\t%%g2,%%lo(%d),%%g2\n", blockno);
7775
7776 fputs ("\tst\t%g2,[%g1]\n", file);
7777
7778 fputs ("\tsethi\t%hi(", file);
7779 assemble_name (file, LPBX);
7780 fputs ("),%g2\n", file);
7781
7782 fputs ("\tor\t%o2,%lo(", file);
7782 fputs ("\tor\t%g2,%lo(", file);
7783 assemble_name (file, LPBX);
7784 fputs ("),%g2\n", file);
7785
7786 fputs ("\tst\t%g2,[%g1+4]\n", file);
7787 fputs ("\tmov\t%o7,%g2\n", file);
7788
7789 fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
7790
7791 fputs ("\tmov\t%g2,%o7\n", file);
7792 }
7793 else if (profile_block_flag != 0)
7794 {
7795 ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
7796
7797 fputs ("\tsethi\t%hi(", file);
7798 assemble_name (file, LPBX);
7799 fprintf (file, "+%d),%%g1\n", blockno*4);
7800
7801 fputs ("\tld\t[%g1+%lo(", file);
7802 assemble_name (file, LPBX);
7803 fprintf (file, "+%d)],%%g2\n", blockno*4);
7804
7805 fputs ("\tadd\t%g2,1,%g2\n", file);
7806
7807 fputs ("\tst\t%g2,[%g1+%lo(", file);
7808 assemble_name (file, LPBX);
7809 fprintf (file, "+%d)]\n", blockno*4);
7810 }
7811}
7812
7813/* The following macro shall output assembler code to FILE
7814 to indicate a return from function during basic-block profiling.
7815
7816 If profile_block_flag == 2:
7817
7818 Output assembler code to call function `__bb_trace_ret'.
7819
7820 Note that function `__bb_trace_ret' must not change the
7821 machine state, especially the flag register. To grant
7822 this, you must output code to save and restore registers
7823 either in this macro or in the macros MACHINE_STATE_SAVE_RET
7824 and MACHINE_STATE_RESTORE_RET. The last two macros will be
7825 used in the function `__bb_trace_ret', so you must make
7826 sure that the function prologue does not change any
7827 register prior to saving it with MACHINE_STATE_SAVE_RET.
7828
7829 else if profile_block_flag != 0:
7830
7831 The macro will not be used, so it need not distinguish
7832 these cases.
7833*/
7834
7835void
7836sparc_function_block_profiler_exit(file)
7837 FILE *file;
7838{
7839 if (profile_block_flag == 2)
7840 fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
7841 else
7842 abort ();
7843}