ia64.c revision 122180
1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3   Contributed by James E. Wilson <wilson@cygnus.com> and
4   		  David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING.  If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA.  */
22
23#include "config.h"
24#include "system.h"
25#include "rtl.h"
26#include "tree.h"
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
32#include "output.h"
33#include "insn-attr.h"
34#include "flags.h"
35#include "recog.h"
36#include "expr.h"
37#include "optabs.h"
38#include "except.h"
39#include "function.h"
40#include "ggc.h"
41#include "basic-block.h"
42#include "toplev.h"
43#include "sched-int.h"
44#include "timevar.h"
45#include "target.h"
46#include "target-def.h"
47#include "tm_p.h"
48#include "langhooks.h"
49
50/* This is used for communication between ASM_OUTPUT_LABEL and
51   ASM_OUTPUT_LABELREF.  */
52int ia64_asm_output_label = 0;
53
54/* Define the information needed to generate branch and scc insns.  This is
55   stored from the compare operation.  */
56struct rtx_def * ia64_compare_op0;
57struct rtx_def * ia64_compare_op1;
58
59/* Register names for ia64_expand_prologue.  */
60static const char * const ia64_reg_numbers[96] =
61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70  "r104","r105","r106","r107","r108","r109","r110","r111",
71  "r112","r113","r114","r115","r116","r117","r118","r119",
72  "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74/* ??? These strings could be shared with REGISTER_NAMES.  */
75static const char * const ia64_input_reg_names[8] =
76{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
77
78/* ??? These strings could be shared with REGISTER_NAMES.  */
79static const char * const ia64_local_reg_names[80] =
80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91/* ??? These strings could be shared with REGISTER_NAMES.  */
92static const char * const ia64_output_reg_names[8] =
93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95/* String used with the -mfixed-range= option.  */
96const char *ia64_fixed_range_string;
97
98/* Determines whether we use adds, addl, or movl to generate our
99   TLS immediate offsets.  */
100int ia64_tls_size = 22;
101
102/* String used with the -mtls-size= option.  */
103const char *ia64_tls_size_string;
104
105/* Determines whether we run our final scheduling pass or not.  We always
106   avoid the normal second scheduling pass.  */
107static int ia64_flag_schedule_insns2;
108
109/* Variables which are this size or smaller are put in the sdata/sbss
110   sections.  */
111
112unsigned int ia64_section_threshold;
113
114/* Structure to be filled in by ia64_compute_frame_size with register
115   save masks and offsets for the current function.  */
116
117struct ia64_frame_info
118{
119  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
120				   the caller's scratch area.  */
121  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
122  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
123  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
124  HARD_REG_SET mask;		/* mask of saved registers.  */
125  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
126				   registers or long-term scratches.  */
127  int n_spilled;		/* number of spilled registers.  */
128  int reg_fp;			/* register for fp.  */
129  int reg_save_b0;		/* save register for b0.  */
130  int reg_save_pr;		/* save register for prs.  */
131  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
132  int reg_save_ar_unat;		/* save register for ar.unat.  */
133  int reg_save_ar_lc;		/* save register for ar.lc.  */
134  int reg_save_gp;		/* save register for gp.  */
135  int n_input_regs;		/* number of input registers used.  */
136  int n_local_regs;		/* number of local registers used.  */
137  int n_output_regs;		/* number of output registers used.  */
138  int n_rotate_regs;		/* number of rotating registers used.  */
139
140  char need_regstk;		/* true if a .regstk directive needed.  */
141  char initialized;		/* true if the data is finalized.  */
142};
143
144/* Current frame information calculated by ia64_compute_frame_size.  */
145static struct ia64_frame_info current_frame_info;
146
147static rtx gen_tls_get_addr PARAMS ((void));
148static rtx gen_thread_pointer PARAMS ((void));
149static int find_gr_spill PARAMS ((int));
150static int next_scratch_gr_reg PARAMS ((void));
151static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
152static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
153static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
154static void finish_spill_pointers PARAMS ((void));
155static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
156static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
157static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
158static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
159static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
160static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
161
162static enum machine_mode hfa_element_mode PARAMS ((tree, int));
163static void fix_range PARAMS ((const char *));
164static struct machine_function * ia64_init_machine_status PARAMS ((void));
165static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
166static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
167static void emit_predicate_relation_info PARAMS ((void));
168static bool ia64_in_small_data_p PARAMS ((tree));
169static void ia64_encode_section_info PARAMS ((tree, int));
170static const char *ia64_strip_name_encoding PARAMS ((const char *));
171static void process_epilogue PARAMS ((void));
172static int process_set PARAMS ((FILE *, rtx));
173
174static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
175					     tree, rtx));
176static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
177					     tree, rtx));
178static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
179						 enum machine_mode,
180						 int, tree, rtx));
181static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
182						  tree, rtx));
183static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
184static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
185static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
186static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
187static void ia64_output_function_end_prologue PARAMS ((FILE *));
188
189static int ia64_issue_rate PARAMS ((void));
190static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
191static void ia64_sched_init PARAMS ((FILE *, int, int));
192static void ia64_sched_finish PARAMS ((FILE *, int));
193static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
194						int *, int, int));
195static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
196static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
197static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
198
199static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
200					  HOST_WIDE_INT, tree));
201
202static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
203					     unsigned HOST_WIDE_INT));
204static void ia64_rwreloc_select_section PARAMS ((tree, int,
205					         unsigned HOST_WIDE_INT))
206     ATTRIBUTE_UNUSED;
207static void ia64_rwreloc_unique_section PARAMS ((tree, int))
208     ATTRIBUTE_UNUSED;
209static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
210					             unsigned HOST_WIDE_INT))
211     ATTRIBUTE_UNUSED;
212static unsigned int ia64_rwreloc_section_type_flags
213     PARAMS ((tree, const char *, int))
214     ATTRIBUTE_UNUSED;
215
216static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
217     ATTRIBUTE_UNUSED;
218
219/* Table of valid machine attributes.  */
220static const struct attribute_spec ia64_attribute_table[] =
221{
222  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
223  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
224  { NULL,              0, 0, false, false, false, NULL }
225};
226
227/* Initialize the GCC target structure.  */
228#undef TARGET_ATTRIBUTE_TABLE
229#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
230
231#undef TARGET_INIT_BUILTINS
232#define TARGET_INIT_BUILTINS ia64_init_builtins
233
234#undef TARGET_EXPAND_BUILTIN
235#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
236
237#undef TARGET_ASM_BYTE_OP
238#define TARGET_ASM_BYTE_OP "\tdata1\t"
239#undef TARGET_ASM_ALIGNED_HI_OP
240#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
241#undef TARGET_ASM_ALIGNED_SI_OP
242#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
243#undef TARGET_ASM_ALIGNED_DI_OP
244#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
245#undef TARGET_ASM_UNALIGNED_HI_OP
246#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
247#undef TARGET_ASM_UNALIGNED_SI_OP
248#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
249#undef TARGET_ASM_UNALIGNED_DI_OP
250#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
251#undef TARGET_ASM_INTEGER
252#define TARGET_ASM_INTEGER ia64_assemble_integer
253
254#undef TARGET_ASM_FUNCTION_PROLOGUE
255#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
256#undef TARGET_ASM_FUNCTION_END_PROLOGUE
257#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
258#undef TARGET_ASM_FUNCTION_EPILOGUE
259#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
260
261#undef TARGET_IN_SMALL_DATA_P
262#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
263#undef TARGET_ENCODE_SECTION_INFO
264#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
265#undef TARGET_STRIP_NAME_ENCODING
266#define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
267
268#undef TARGET_SCHED_ADJUST_COST
269#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
270#undef TARGET_SCHED_ISSUE_RATE
271#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
272#undef TARGET_SCHED_VARIABLE_ISSUE
273#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
274#undef TARGET_SCHED_INIT
275#define TARGET_SCHED_INIT ia64_sched_init
276#undef TARGET_SCHED_FINISH
277#define TARGET_SCHED_FINISH ia64_sched_finish
278#undef TARGET_SCHED_REORDER
279#define TARGET_SCHED_REORDER ia64_sched_reorder
280#undef TARGET_SCHED_REORDER2
281#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
282
283#ifdef HAVE_AS_TLS
284#undef TARGET_HAVE_TLS
285#define TARGET_HAVE_TLS true
286#endif
287
288#undef TARGET_ASM_OUTPUT_MI_THUNK
289#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
290#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
291#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
292
293struct gcc_target targetm = TARGET_INITIALIZER;
294
295/* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
296
297int
298call_operand (op, mode)
299     rtx op;
300     enum machine_mode mode;
301{
302  if (mode != GET_MODE (op) && mode != VOIDmode)
303    return 0;
304
305  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
306	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
307}
308
309/* Return 1 if OP refers to a symbol in the sdata section.  */
310
311int
312sdata_symbolic_operand (op, mode)
313     rtx op;
314     enum machine_mode mode ATTRIBUTE_UNUSED;
315{
316  switch (GET_CODE (op))
317    {
318    case CONST:
319      if (GET_CODE (XEXP (op, 0)) != PLUS
320	  || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
321	break;
322      op = XEXP (XEXP (op, 0), 0);
323      /* FALLTHRU */
324
325    case SYMBOL_REF:
326      if (CONSTANT_POOL_ADDRESS_P (op))
327	return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
328      else
329	{
330	  const char *str = XSTR (op, 0);
331          return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
332	}
333
334    default:
335      break;
336    }
337
338  return 0;
339}
340
341/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
342
343int
344got_symbolic_operand (op, mode)
345     rtx op;
346     enum machine_mode mode ATTRIBUTE_UNUSED;
347{
348  switch (GET_CODE (op))
349    {
350    case CONST:
351      op = XEXP (op, 0);
352      if (GET_CODE (op) != PLUS)
353	return 0;
354      if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
355	return 0;
356      op = XEXP (op, 1);
357      if (GET_CODE (op) != CONST_INT)
358	return 0;
359
360	return 1;
361
362      /* Ok if we're not using GOT entries at all.  */
363      if (TARGET_NO_PIC || TARGET_AUTO_PIC)
364	return 1;
365
366      /* "Ok" while emitting rtl, since otherwise we won't be provided
367	 with the entire offset during emission, which makes it very
368	 hard to split the offset into high and low parts.  */
369      if (rtx_equal_function_value_matters)
370	return 1;
371
372      /* Force the low 14 bits of the constant to zero so that we do not
373	 use up so many GOT entries.  */
374      return (INTVAL (op) & 0x3fff) == 0;
375
376    case SYMBOL_REF:
377    case LABEL_REF:
378      return 1;
379
380    default:
381      break;
382    }
383  return 0;
384}
385
386/* Return 1 if OP refers to a symbol.  */
387
388int
389symbolic_operand (op, mode)
390     rtx op;
391     enum machine_mode mode ATTRIBUTE_UNUSED;
392{
393  switch (GET_CODE (op))
394    {
395    case CONST:
396    case SYMBOL_REF:
397    case LABEL_REF:
398      return 1;
399
400    default:
401      break;
402    }
403  return 0;
404}
405
406/* Return tls_model if OP refers to a TLS symbol.  */
407
408int
409tls_symbolic_operand (op, mode)
410     rtx op;
411     enum machine_mode mode ATTRIBUTE_UNUSED;
412{
413  const char *str;
414
415  if (GET_CODE (op) != SYMBOL_REF)
416    return 0;
417  str = XSTR (op, 0);
418  if (str[0] != ENCODE_SECTION_INFO_CHAR)
419    return 0;
420  switch (str[1])
421    {
422    case 'G':
423      return TLS_MODEL_GLOBAL_DYNAMIC;
424    case 'L':
425      return TLS_MODEL_LOCAL_DYNAMIC;
426    case 'i':
427      return TLS_MODEL_INITIAL_EXEC;
428    case 'l':
429      return TLS_MODEL_LOCAL_EXEC;
430    }
431  return 0;
432}
433
434
435/* Return 1 if OP refers to a function.  */
436
437int
438function_operand (op, mode)
439     rtx op;
440     enum machine_mode mode ATTRIBUTE_UNUSED;
441{
442  if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
443    return 1;
444  else
445    return 0;
446}
447
448/* Return 1 if OP is setjmp or a similar function.  */
449
450/* ??? This is an unsatisfying solution.  Should rethink.  */
451
452int
453setjmp_operand (op, mode)
454     rtx op;
455     enum machine_mode mode ATTRIBUTE_UNUSED;
456{
457  const char *name;
458  int retval = 0;
459
460  if (GET_CODE (op) != SYMBOL_REF)
461    return 0;
462
463  name = XSTR (op, 0);
464
465  /* The following code is borrowed from special_function_p in calls.c.  */
466
467  /* Disregard prefix _, __ or __x.  */
468  if (name[0] == '_')
469    {
470      if (name[1] == '_' && name[2] == 'x')
471	name += 3;
472      else if (name[1] == '_')
473	name += 2;
474      else
475	name += 1;
476    }
477
478  if (name[0] == 's')
479    {
480      retval
481	= ((name[1] == 'e'
482	    && (! strcmp (name, "setjmp")
483		|| ! strcmp (name, "setjmp_syscall")))
484	   || (name[1] == 'i'
485	       && ! strcmp (name, "sigsetjmp"))
486	   || (name[1] == 'a'
487	       && ! strcmp (name, "savectx")));
488    }
489  else if ((name[0] == 'q' && name[1] == 's'
490	    && ! strcmp (name, "qsetjmp"))
491	   || (name[0] == 'v' && name[1] == 'f'
492	       && ! strcmp (name, "vfork")))
493    retval = 1;
494
495  return retval;
496}
497
498/* Return 1 if OP is a general operand, but when pic exclude symbolic
499   operands.  */
500
501/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
502   from PREDICATE_CODES.  */
503
504int
505move_operand (op, mode)
506     rtx op;
507     enum machine_mode mode;
508{
509  if (! TARGET_NO_PIC && symbolic_operand (op, mode))
510    return 0;
511
512  return general_operand (op, mode);
513}
514
515/* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
516
517int
518gr_register_operand (op, mode)
519     rtx op;
520     enum machine_mode mode;
521{
522  if (! register_operand (op, mode))
523    return 0;
524  if (GET_CODE (op) == SUBREG)
525    op = SUBREG_REG (op);
526  if (GET_CODE (op) == REG)
527    {
528      unsigned int regno = REGNO (op);
529      if (regno < FIRST_PSEUDO_REGISTER)
530	return GENERAL_REGNO_P (regno);
531    }
532  return 1;
533}
534
535/* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
536
537int
538fr_register_operand (op, mode)
539     rtx op;
540     enum machine_mode mode;
541{
542  if (! register_operand (op, mode))
543    return 0;
544  if (GET_CODE (op) == SUBREG)
545    op = SUBREG_REG (op);
546  if (GET_CODE (op) == REG)
547    {
548      unsigned int regno = REGNO (op);
549      if (regno < FIRST_PSEUDO_REGISTER)
550	return FR_REGNO_P (regno);
551    }
552  return 1;
553}
554
555/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
556
557int
558grfr_register_operand (op, mode)
559     rtx op;
560     enum machine_mode mode;
561{
562  if (! register_operand (op, mode))
563    return 0;
564  if (GET_CODE (op) == SUBREG)
565    op = SUBREG_REG (op);
566  if (GET_CODE (op) == REG)
567    {
568      unsigned int regno = REGNO (op);
569      if (regno < FIRST_PSEUDO_REGISTER)
570	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
571    }
572  return 1;
573}
574
575/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
576
577int
578gr_nonimmediate_operand (op, mode)
579     rtx op;
580     enum machine_mode mode;
581{
582  if (! nonimmediate_operand (op, mode))
583    return 0;
584  if (GET_CODE (op) == SUBREG)
585    op = SUBREG_REG (op);
586  if (GET_CODE (op) == REG)
587    {
588      unsigned int regno = REGNO (op);
589      if (regno < FIRST_PSEUDO_REGISTER)
590	return GENERAL_REGNO_P (regno);
591    }
592  return 1;
593}
594
595/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
596
597int
598fr_nonimmediate_operand (op, mode)
599     rtx op;
600     enum machine_mode mode;
601{
602  if (! nonimmediate_operand (op, mode))
603    return 0;
604  if (GET_CODE (op) == SUBREG)
605    op = SUBREG_REG (op);
606  if (GET_CODE (op) == REG)
607    {
608      unsigned int regno = REGNO (op);
609      if (regno < FIRST_PSEUDO_REGISTER)
610	return FR_REGNO_P (regno);
611    }
612  return 1;
613}
614
615/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
616
617int
618grfr_nonimmediate_operand (op, mode)
619     rtx op;
620     enum machine_mode mode;
621{
622  if (! nonimmediate_operand (op, mode))
623    return 0;
624  if (GET_CODE (op) == SUBREG)
625    op = SUBREG_REG (op);
626  if (GET_CODE (op) == REG)
627    {
628      unsigned int regno = REGNO (op);
629      if (regno < FIRST_PSEUDO_REGISTER)
630	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
631    }
632  return 1;
633}
634
635/* Return 1 if OP is a GR register operand, or zero.  */
636
637int
638gr_reg_or_0_operand (op, mode)
639     rtx op;
640     enum machine_mode mode;
641{
642  return (op == const0_rtx || gr_register_operand (op, mode));
643}
644
645/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
646
647int
648gr_reg_or_5bit_operand (op, mode)
649     rtx op;
650     enum machine_mode mode;
651{
652  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
653	  || GET_CODE (op) == CONSTANT_P_RTX
654	  || gr_register_operand (op, mode));
655}
656
657/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
658
659int
660gr_reg_or_6bit_operand (op, mode)
661     rtx op;
662     enum machine_mode mode;
663{
664  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
665	  || GET_CODE (op) == CONSTANT_P_RTX
666	  || gr_register_operand (op, mode));
667}
668
669/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
670
671int
672gr_reg_or_8bit_operand (op, mode)
673     rtx op;
674     enum machine_mode mode;
675{
676  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
677	  || GET_CODE (op) == CONSTANT_P_RTX
678	  || gr_register_operand (op, mode));
679}
680
681/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
682
683int
684grfr_reg_or_8bit_operand (op, mode)
685     rtx op;
686     enum machine_mode mode;
687{
688  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
689	  || GET_CODE (op) == CONSTANT_P_RTX
690	  || grfr_register_operand (op, mode));
691}
692
693/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
694   operand.  */
695
696int
697gr_reg_or_8bit_adjusted_operand (op, mode)
698     rtx op;
699     enum machine_mode mode;
700{
701  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
702	  || GET_CODE (op) == CONSTANT_P_RTX
703	  || gr_register_operand (op, mode));
704}
705
706/* Return 1 if OP is a register operand, or is valid for both an 8 bit
707   immediate and an 8 bit adjusted immediate operand.  This is necessary
708   because when we emit a compare, we don't know what the condition will be,
709   so we need the union of the immediates accepted by GT and LT.  */
710
711int
712gr_reg_or_8bit_and_adjusted_operand (op, mode)
713     rtx op;
714     enum machine_mode mode;
715{
716  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
717	   && CONST_OK_FOR_L (INTVAL (op)))
718	  || GET_CODE (op) == CONSTANT_P_RTX
719	  || gr_register_operand (op, mode));
720}
721
722/* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
723
724int
725gr_reg_or_14bit_operand (op, mode)
726     rtx op;
727     enum machine_mode mode;
728{
729  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
730	  || GET_CODE (op) == CONSTANT_P_RTX
731	  || gr_register_operand (op, mode));
732}
733
734/* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
735
736int
737gr_reg_or_22bit_operand (op, mode)
738     rtx op;
739     enum machine_mode mode;
740{
741  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
742	  || GET_CODE (op) == CONSTANT_P_RTX
743	  || gr_register_operand (op, mode));
744}
745
746/* Return 1 if OP is a 6 bit immediate operand.  */
747
748int
749shift_count_operand (op, mode)
750     rtx op;
751     enum machine_mode mode ATTRIBUTE_UNUSED;
752{
753  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
754	  || GET_CODE (op) == CONSTANT_P_RTX);
755}
756
757/* Return 1 if OP is a 5 bit immediate operand.  */
758
759int
760shift_32bit_count_operand (op, mode)
761     rtx op;
762     enum machine_mode mode ATTRIBUTE_UNUSED;
763{
764  return ((GET_CODE (op) == CONST_INT
765	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
766	  || GET_CODE (op) == CONSTANT_P_RTX);
767}
768
769/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
770
771int
772shladd_operand (op, mode)
773     rtx op;
774     enum machine_mode mode ATTRIBUTE_UNUSED;
775{
776  return (GET_CODE (op) == CONST_INT
777	  && (INTVAL (op) == 2 || INTVAL (op) == 4
778	      || INTVAL (op) == 8 || INTVAL (op) == 16));
779}
780
781/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
782
783int
784fetchadd_operand (op, mode)
785     rtx op;
786     enum machine_mode mode ATTRIBUTE_UNUSED;
787{
788  return (GET_CODE (op) == CONST_INT
789          && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
790              INTVAL (op) == -4  || INTVAL (op) == -1 ||
791              INTVAL (op) == 1   || INTVAL (op) == 4  ||
792              INTVAL (op) == 8   || INTVAL (op) == 16));
793}
794
795/* Return 1 if OP is a floating-point constant zero, one, or a register.  */
796
797int
798fr_reg_or_fp01_operand (op, mode)
799     rtx op;
800     enum machine_mode mode;
801{
802  return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
803	  || fr_register_operand (op, mode));
804}
805
806/* Like nonimmediate_operand, but don't allow MEMs that try to use a
807   POST_MODIFY with a REG as displacement.  */
808
809int
810destination_operand (op, mode)
811     rtx op;
812     enum machine_mode mode;
813{
814  if (! nonimmediate_operand (op, mode))
815    return 0;
816  if (GET_CODE (op) == MEM
817      && GET_CODE (XEXP (op, 0)) == POST_MODIFY
818      && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
819    return 0;
820  return 1;
821}
822
823/* Like memory_operand, but don't allow post-increments.  */
824
825int
826not_postinc_memory_operand (op, mode)
827     rtx op;
828     enum machine_mode mode;
829{
830  return (memory_operand (op, mode)
831	  && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
832}
833
834/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
835   signed immediate operand.  */
836
837int
838normal_comparison_operator (op, mode)
839    register rtx op;
840    enum machine_mode mode;
841{
842  enum rtx_code code = GET_CODE (op);
843  return ((mode == VOIDmode || GET_MODE (op) == mode)
844	  && (code == EQ || code == NE
845	      || code == GT || code == LE || code == GTU || code == LEU));
846}
847
848/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
849   signed immediate operand.  */
850
851int
852adjusted_comparison_operator (op, mode)
853    register rtx op;
854    enum machine_mode mode;
855{
856  enum rtx_code code = GET_CODE (op);
857  return ((mode == VOIDmode || GET_MODE (op) == mode)
858	  && (code == LT || code == GE || code == LTU || code == GEU));
859}
860
861/* Return 1 if this is a signed inequality operator.  */
862
863int
864signed_inequality_operator (op, mode)
865    register rtx op;
866    enum machine_mode mode;
867{
868  enum rtx_code code = GET_CODE (op);
869  return ((mode == VOIDmode || GET_MODE (op) == mode)
870	  && (code == GE || code == GT
871	      || code == LE || code == LT));
872}
873
874/* Return 1 if this operator is valid for predication.  */
875
876int
877predicate_operator (op, mode)
878    register rtx op;
879    enum machine_mode mode;
880{
881  enum rtx_code code = GET_CODE (op);
882  return ((GET_MODE (op) == mode || mode == VOIDmode)
883	  && (code == EQ || code == NE));
884}
885
886/* Return 1 if this operator can be used in a conditional operation.  */
887
888int
889condop_operator (op, mode)
890    register rtx op;
891    enum machine_mode mode;
892{
893  enum rtx_code code = GET_CODE (op);
894  return ((GET_MODE (op) == mode || mode == VOIDmode)
895	  && (code == PLUS || code == MINUS || code == AND
896	      || code == IOR || code == XOR));
897}
898
899/* Return 1 if this is the ar.lc register.  */
900
901int
902ar_lc_reg_operand (op, mode)
903     register rtx op;
904     enum machine_mode mode;
905{
906  return (GET_MODE (op) == DImode
907	  && (mode == DImode || mode == VOIDmode)
908	  && GET_CODE (op) == REG
909	  && REGNO (op) == AR_LC_REGNUM);
910}
911
912/* Return 1 if this is the ar.ccv register.  */
913
914int
915ar_ccv_reg_operand (op, mode)
916     register rtx op;
917     enum machine_mode mode;
918{
919  return ((GET_MODE (op) == mode || mode == VOIDmode)
920	  && GET_CODE (op) == REG
921	  && REGNO (op) == AR_CCV_REGNUM);
922}
923
924/* Return 1 if this is the ar.pfs register.  */
925
926int
927ar_pfs_reg_operand (op, mode)
928     register rtx op;
929     enum machine_mode mode;
930{
931  return ((GET_MODE (op) == mode || mode == VOIDmode)
932	  && GET_CODE (op) == REG
933	  && REGNO (op) == AR_PFS_REGNUM);
934}
935
936/* Like general_operand, but don't allow (mem (addressof)).  */
937
938int
939general_tfmode_operand (op, mode)
940     rtx op;
941     enum machine_mode mode;
942{
943  if (! general_operand (op, mode))
944    return 0;
945  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
946    return 0;
947  return 1;
948}
949
950/* Similarly.  */
951
952int
953destination_tfmode_operand (op, mode)
954     rtx op;
955     enum machine_mode mode;
956{
957  if (! destination_operand (op, mode))
958    return 0;
959  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
960    return 0;
961  return 1;
962}
963
964/* Similarly.  */
965
966int
967tfreg_or_fp01_operand (op, mode)
968     rtx op;
969     enum machine_mode mode;
970{
971  if (GET_CODE (op) == SUBREG)
972    return 0;
973  return fr_reg_or_fp01_operand (op, mode);
974}
975
976/* Return 1 if OP is valid as a base register in a reg + offset address.  */
977
978int
979basereg_operand (op, mode)
980     rtx op;
981     enum machine_mode mode;
982{
983  /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
984     checks from pa.c basereg_operand as well?  Seems to be OK without them
985     in test runs.  */
986
987  return (register_operand (op, mode) &&
988	  REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
989}
990
991/* Return 1 if the operands of a move are ok.  */
992
993int
994ia64_move_ok (dst, src)
995     rtx dst, src;
996{
997  /* If we're under init_recog_no_volatile, we'll not be able to use
998     memory_operand.  So check the code directly and don't worry about
999     the validity of the underlying address, which should have been
1000     checked elsewhere anyway.  */
1001  if (GET_CODE (dst) != MEM)
1002    return 1;
1003  if (GET_CODE (src) == MEM)
1004    return 0;
1005  if (register_operand (src, VOIDmode))
1006    return 1;
1007
1008  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
1009  if (INTEGRAL_MODE_P (GET_MODE (dst)))
1010    return src == const0_rtx;
1011  else
1012    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1013}
1014
1015/* Return 0 if we are doing C++ code.  This optimization fails with
1016   C++ because of GNAT c++/6685.  */
1017
1018int
1019addp4_optimize_ok (op1, op2)
1020     rtx op1, op2;
1021{
1022
1023  if (!strcmp (lang_hooks.name, "GNU C++"))
1024    return 0;
1025
1026  return (basereg_operand (op1, GET_MODE(op1)) !=
1027	  basereg_operand (op2, GET_MODE(op2)));
1028}
1029
1030/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
1031   Return the length of the field, or <= 0 on failure.  */
1032
1033int
1034ia64_depz_field_mask (rop, rshift)
1035     rtx rop, rshift;
1036{
1037  unsigned HOST_WIDE_INT op = INTVAL (rop);
1038  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1039
1040  /* Get rid of the zero bits we're shifting in.  */
1041  op >>= shift;
1042
1043  /* We must now have a solid block of 1's at bit 0.  */
1044  return exact_log2 (op + 1);
1045}
1046
1047/* Expand a symbolic constant load.  */
1048/* ??? Should generalize this, so that we can also support 32 bit pointers.  */
1049
1050void
1051ia64_expand_load_address (dest, src, scratch)
1052      rtx dest, src, scratch;
1053{
1054  rtx temp;
1055
1056  /* The destination could be a MEM during initial rtl generation,
1057     which isn't a valid destination for the PIC load address patterns.  */
1058  if (! register_operand (dest, DImode))
1059    if (! scratch || ! register_operand (scratch, DImode))
1060      temp = gen_reg_rtx (DImode);
1061    else
1062      temp = scratch;
1063  else
1064    temp = dest;
1065
1066  if (tls_symbolic_operand (src, Pmode))
1067    abort ();
1068
1069  if (TARGET_AUTO_PIC)
1070    emit_insn (gen_load_gprel64 (temp, src));
1071  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1072    emit_insn (gen_load_fptr (temp, src));
1073  else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1074           && sdata_symbolic_operand (src, VOIDmode))
1075    emit_insn (gen_load_gprel (temp, src));
1076  else if (GET_CODE (src) == CONST
1077	   && GET_CODE (XEXP (src, 0)) == PLUS
1078	   && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1079	   && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1080    {
1081      rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1082      rtx sym = XEXP (XEXP (src, 0), 0);
1083      HOST_WIDE_INT ofs, hi, lo;
1084
1085      /* Split the offset into a sign extended 14-bit low part
1086	 and a complementary high part.  */
1087      ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1088      lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1089      hi = ofs - lo;
1090
1091      if (! scratch)
1092	scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1093
1094      emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
1095				  scratch));
1096      emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1097    }
1098  else
1099    {
1100      rtx insn;
1101      if (! scratch)
1102	scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1103
1104      insn = emit_insn (gen_load_symptr (temp, src, scratch));
1105#ifdef POINTERS_EXTEND_UNSIGNED
1106      if (GET_MODE (temp) != GET_MODE (src))
1107	src = convert_memory_address (GET_MODE (temp), src);
1108#endif
1109      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1110    }
1111
1112  if (temp != dest)
1113    {
1114      if (GET_MODE (dest) != GET_MODE (temp))
1115	temp = convert_to_mode (GET_MODE (dest), temp, 0);
1116      emit_move_insn (dest, temp);
1117    }
1118}
1119
1120static GTY(()) rtx gen_tls_tga;
1121static rtx
1122gen_tls_get_addr ()
1123{
1124  if (!gen_tls_tga)
1125    {
1126      gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1127     }
1128  return gen_tls_tga;
1129}
1130
1131static GTY(()) rtx thread_pointer_rtx;
1132static rtx
1133gen_thread_pointer ()
1134{
1135  if (!thread_pointer_rtx)
1136    {
1137      thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1138      RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1139    }
1140  return thread_pointer_rtx;
1141}
1142
1143rtx
1144ia64_expand_move (op0, op1)
1145     rtx op0, op1;
1146{
1147  enum machine_mode mode = GET_MODE (op0);
1148
1149  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1150    op1 = force_reg (mode, op1);
1151
1152  if (mode == Pmode || mode == ptr_mode)
1153    {
1154      enum tls_model tls_kind;
1155      if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1156	{
1157	  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1158	  rtx orig_op0 = op0;
1159
1160	  switch (tls_kind)
1161	    {
1162	    case TLS_MODEL_GLOBAL_DYNAMIC:
1163	      start_sequence ();
1164
1165	      tga_op1 = gen_reg_rtx (Pmode);
1166	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1167	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1168	      RTX_UNCHANGING_P (tga_op1) = 1;
1169
1170	      tga_op2 = gen_reg_rtx (Pmode);
1171	      emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1172	      tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1173	      RTX_UNCHANGING_P (tga_op2) = 1;
1174
1175	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1176						 LCT_CONST, Pmode, 2, tga_op1,
1177						 Pmode, tga_op2, Pmode);
1178
1179	      insns = get_insns ();
1180	      end_sequence ();
1181
1182	      if (GET_MODE (op0) != Pmode)
1183		op0 = tga_ret;
1184	      emit_libcall_block (insns, op0, tga_ret, op1);
1185	      break;
1186
1187	    case TLS_MODEL_LOCAL_DYNAMIC:
1188	      /* ??? This isn't the completely proper way to do local-dynamic
1189		 If the call to __tls_get_addr is used only by a single symbol,
1190		 then we should (somehow) move the dtprel to the second arg
1191		 to avoid the extra add.  */
1192	      start_sequence ();
1193
1194	      tga_op1 = gen_reg_rtx (Pmode);
1195	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1196	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1197	      RTX_UNCHANGING_P (tga_op1) = 1;
1198
1199	      tga_op2 = const0_rtx;
1200
1201	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1202						 LCT_CONST, Pmode, 2, tga_op1,
1203						 Pmode, tga_op2, Pmode);
1204
1205	      insns = get_insns ();
1206	      end_sequence ();
1207
1208	      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1209					UNSPEC_LD_BASE);
1210	      tmp = gen_reg_rtx (Pmode);
1211	      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1212
1213	      if (!register_operand (op0, Pmode))
1214		op0 = gen_reg_rtx (Pmode);
1215	      if (TARGET_TLS64)
1216		{
1217		  emit_insn (gen_load_dtprel (op0, op1));
1218		  emit_insn (gen_adddi3 (op0, tmp, op0));
1219		}
1220	      else
1221		emit_insn (gen_add_dtprel (op0, tmp, op1));
1222	      break;
1223
1224	    case TLS_MODEL_INITIAL_EXEC:
1225	      tmp = gen_reg_rtx (Pmode);
1226	      emit_insn (gen_load_ltoff_tprel (tmp, op1));
1227	      tmp = gen_rtx_MEM (Pmode, tmp);
1228	      RTX_UNCHANGING_P (tmp) = 1;
1229	      tmp = force_reg (Pmode, tmp);
1230
1231	      if (!register_operand (op0, Pmode))
1232		op0 = gen_reg_rtx (Pmode);
1233	      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1234	      break;
1235
1236	    case TLS_MODEL_LOCAL_EXEC:
1237	      if (!register_operand (op0, Pmode))
1238		op0 = gen_reg_rtx (Pmode);
1239	      if (TARGET_TLS64)
1240		{
1241		  emit_insn (gen_load_tprel (op0, op1));
1242		  emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1243		}
1244	      else
1245		emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1246	      break;
1247
1248	    default:
1249	      abort ();
1250	    }
1251
1252	  if (orig_op0 == op0)
1253	    return NULL_RTX;
1254	  if (GET_MODE (orig_op0) == Pmode)
1255	    return op0;
1256	  return gen_lowpart (GET_MODE (orig_op0), op0);
1257	}
1258      else if (!TARGET_NO_PIC &&
1259	       (symbolic_operand (op1, Pmode) ||
1260		symbolic_operand (op1, ptr_mode)))
1261	{
1262	  /* Before optimization starts, delay committing to any particular
1263	     type of PIC address load.  If this function gets deferred, we
1264	     may acquire information that changes the value of the
1265	     sdata_symbolic_operand predicate.
1266
1267	     But don't delay for function pointers.  Loading a function address
1268	     actually loads the address of the descriptor not the function.
1269	     If we represent these as SYMBOL_REFs, then they get cse'd with
1270	     calls, and we end up with calls to the descriptor address instead
1271	     of calls to the function address.  Functions are not candidates
1272	     for sdata anyways.
1273
1274	     Don't delay for LABEL_REF because the splitter loses REG_LABEL
1275	     notes.  Don't delay for pool addresses on general principals;
1276	     they'll never become non-local behind our back.  */
1277
1278	  if (rtx_equal_function_value_matters
1279	      && GET_CODE (op1) != LABEL_REF
1280	      && ! (GET_CODE (op1) == SYMBOL_REF
1281		    && (SYMBOL_REF_FLAG (op1)
1282			|| CONSTANT_POOL_ADDRESS_P (op1)
1283			|| STRING_POOL_ADDRESS_P (op1))))
1284	    if (GET_MODE (op1) == DImode)
1285	      emit_insn (gen_movdi_symbolic (op0, op1));
1286	    else
1287	      emit_insn (gen_movsi_symbolic (op0, op1));
1288	  else
1289	    ia64_expand_load_address (op0, op1, NULL_RTX);
1290	  return NULL_RTX;
1291	}
1292    }
1293
1294  return op1;
1295}
1296
1297/* Split a post-reload TImode reference into two DImode components.  */
1298
1299rtx
1300ia64_split_timode (out, in, scratch)
1301     rtx out[2];
1302     rtx in, scratch;
1303{
1304  switch (GET_CODE (in))
1305    {
1306    case REG:
1307      out[0] = gen_rtx_REG (DImode, REGNO (in));
1308      out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1309      return NULL_RTX;
1310
1311    case MEM:
1312      {
1313	rtx base = XEXP (in, 0);
1314
1315	switch (GET_CODE (base))
1316	  {
1317	  case REG:
1318	    out[0] = adjust_address (in, DImode, 0);
1319	    break;
1320	  case POST_MODIFY:
1321	    base = XEXP (base, 0);
1322	    out[0] = adjust_address (in, DImode, 0);
1323	    break;
1324
1325	  /* Since we're changing the mode, we need to change to POST_MODIFY
1326	     as well to preserve the size of the increment.  Either that or
1327	     do the update in two steps, but we've already got this scratch
1328	     register handy so let's use it.  */
1329	  case POST_INC:
1330	    base = XEXP (base, 0);
1331	    out[0]
1332	      = change_address (in, DImode,
1333				gen_rtx_POST_MODIFY
1334				(Pmode, base, plus_constant (base, 16)));
1335	    break;
1336	  case POST_DEC:
1337	    base = XEXP (base, 0);
1338	    out[0]
1339	      = change_address (in, DImode,
1340				gen_rtx_POST_MODIFY
1341				(Pmode, base, plus_constant (base, -16)));
1342	    break;
1343	  default:
1344	    abort ();
1345	  }
1346
1347	if (scratch == NULL_RTX)
1348	  abort ();
1349	out[1] = change_address (in, DImode, scratch);
1350	return gen_adddi3 (scratch, base, GEN_INT (8));
1351      }
1352
1353    case CONST_INT:
1354    case CONST_DOUBLE:
1355      split_double (in, &out[0], &out[1]);
1356      return NULL_RTX;
1357
1358    default:
1359      abort ();
1360    }
1361}
1362
1363/* ??? Fixing GR->FR TFmode moves during reload is hard.  You need to go
1364   through memory plus an extra GR scratch register.  Except that you can
1365   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1366   SECONDARY_RELOAD_CLASS, but not both.
1367
1368   We got into problems in the first place by allowing a construct like
1369   (subreg:TF (reg:TI)), which we got from a union containing a long double.
1370   This solution attempts to prevent this situation from occurring.  When
1371   we see something like the above, we spill the inner register to memory.  */
1372
1373rtx
1374spill_tfmode_operand (in, force)
1375     rtx in;
1376     int force;
1377{
1378  if (GET_CODE (in) == SUBREG
1379      && GET_MODE (SUBREG_REG (in)) == TImode
1380      && GET_CODE (SUBREG_REG (in)) == REG)
1381    {
1382      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
1383      return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1384    }
1385  else if (force && GET_CODE (in) == REG)
1386    {
1387      rtx mem = gen_mem_addressof (in, NULL_TREE, true);
1388      return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1389    }
1390  else if (GET_CODE (in) == MEM
1391	   && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1392    return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1393  else
1394    return in;
1395}
1396
1397/* Emit comparison instruction if necessary, returning the expression
1398   that holds the compare result in the proper mode.  */
1399
1400rtx
1401ia64_expand_compare (code, mode)
1402     enum rtx_code code;
1403     enum machine_mode mode;
1404{
1405  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1406  rtx cmp;
1407
1408  /* If we have a BImode input, then we already have a compare result, and
1409     do not need to emit another comparison.  */
1410  if (GET_MODE (op0) == BImode)
1411    {
1412      if ((code == NE || code == EQ) && op1 == const0_rtx)
1413	cmp = op0;
1414      else
1415	abort ();
1416    }
1417  else
1418    {
1419      cmp = gen_reg_rtx (BImode);
1420      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1421			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1422      code = NE;
1423    }
1424
1425  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1426}
1427
1428/* Emit the appropriate sequence for a call.  */
1429void
1430ia64_expand_call (retval, addr, nextarg, sibcall_p)
1431     rtx retval;
1432     rtx addr;
1433     rtx nextarg ATTRIBUTE_UNUSED;
1434     int sibcall_p;
1435{
1436  rtx insn, b0;
1437
1438  addr = XEXP (addr, 0);
1439  b0 = gen_rtx_REG (DImode, R_BR (0));
1440
1441  /* ??? Should do this for functions known to bind local too.  */
1442  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1443    {
1444      if (sibcall_p)
1445	insn = gen_sibcall_nogp (addr);
1446      else if (! retval)
1447	insn = gen_call_nogp (addr, b0);
1448      else
1449	insn = gen_call_value_nogp (retval, addr, b0);
1450      insn = emit_call_insn (insn);
1451    }
1452  else
1453    {
1454      if (sibcall_p)
1455	insn = gen_sibcall_gp (addr);
1456      else if (! retval)
1457	insn = gen_call_gp (addr, b0);
1458      else
1459	insn = gen_call_value_gp (retval, addr, b0);
1460      insn = emit_call_insn (insn);
1461
1462      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1463    }
1464
1465  if (sibcall_p)
1466    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1467}
1468void
1469ia64_reload_gp ()
1470{
1471  rtx tmp;
1472
1473  if (current_frame_info.reg_save_gp)
1474    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1475  else
1476    {
1477      HOST_WIDE_INT offset;
1478
1479      offset = (current_frame_info.spill_cfa_off
1480	        + current_frame_info.spill_size);
1481      if (frame_pointer_needed)
1482        {
1483          tmp = hard_frame_pointer_rtx;
1484          offset = -offset;
1485        }
1486      else
1487        {
1488          tmp = stack_pointer_rtx;
1489          offset = current_frame_info.total_size - offset;
1490        }
1491
1492      if (CONST_OK_FOR_I (offset))
1493        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1494			       tmp, GEN_INT (offset)));
1495      else
1496        {
1497          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1498          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1499			         pic_offset_table_rtx, tmp));
1500        }
1501
1502      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1503    }
1504
1505  emit_move_insn (pic_offset_table_rtx, tmp);
1506}
1507
1508void
1509ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1510		 noreturn_p, sibcall_p)
1511     rtx retval, addr, retaddr, scratch_r, scratch_b;
1512     int noreturn_p, sibcall_p;
1513{
1514  rtx insn;
1515  bool is_desc = false;
1516
1517  /* If we find we're calling through a register, then we're actually
1518     calling through a descriptor, so load up the values.  */
1519  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1520    {
1521      rtx tmp;
1522      bool addr_dead_p;
1523
1524      /* ??? We are currently constrained to *not* use peep2, because
1525	 we can legitimiately change the global lifetime of the GP
1526	 (in the form of killing where previously live).  This is
1527	 because a call through a descriptor doesn't use the previous
1528	 value of the GP, while a direct call does, and we do not
1529	 commit to either form until the split here.
1530
1531	 That said, this means that we lack precise life info for
1532	 whether ADDR is dead after this call.  This is not terribly
1533	 important, since we can fix things up essentially for free
1534	 with the POST_DEC below, but it's nice to not use it when we
1535	 can immediately tell it's not necessary.  */
1536      addr_dead_p = ((noreturn_p || sibcall_p
1537		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1538					    REGNO (addr)))
1539		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1540
1541      /* Load the code address into scratch_b.  */
1542      tmp = gen_rtx_POST_INC (Pmode, addr);
1543      tmp = gen_rtx_MEM (Pmode, tmp);
1544      emit_move_insn (scratch_r, tmp);
1545      emit_move_insn (scratch_b, scratch_r);
1546
1547      /* Load the GP address.  If ADDR is not dead here, then we must
1548	 revert the change made above via the POST_INCREMENT.  */
1549      if (!addr_dead_p)
1550	tmp = gen_rtx_POST_DEC (Pmode, addr);
1551      else
1552	tmp = addr;
1553      tmp = gen_rtx_MEM (Pmode, tmp);
1554      emit_move_insn (pic_offset_table_rtx, tmp);
1555
1556      is_desc = true;
1557      addr = scratch_b;
1558    }
1559
1560  if (sibcall_p)
1561    insn = gen_sibcall_nogp (addr);
1562  else if (retval)
1563    insn = gen_call_value_nogp (retval, addr, retaddr);
1564  else
1565    insn = gen_call_nogp (addr, retaddr);
1566  emit_call_insn (insn);
1567
1568  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1569    ia64_reload_gp ();
1570}
1571
1572/* Begin the assembly file.  */
1573
1574void
1575emit_safe_across_calls (f)
1576     FILE *f;
1577{
1578  unsigned int rs, re;
1579  int out_state;
1580
1581  rs = 1;
1582  out_state = 0;
1583  while (1)
1584    {
1585      while (rs < 64 && call_used_regs[PR_REG (rs)])
1586	rs++;
1587      if (rs >= 64)
1588	break;
1589      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1590	continue;
1591      if (out_state == 0)
1592	{
1593	  fputs ("\t.pred.safe_across_calls ", f);
1594	  out_state = 1;
1595	}
1596      else
1597	fputc (',', f);
1598      if (re == rs + 1)
1599	fprintf (f, "p%u", rs);
1600      else
1601	fprintf (f, "p%u-p%u", rs, re - 1);
1602      rs = re + 1;
1603    }
1604  if (out_state)
1605    fputc ('\n', f);
1606}
1607
1608/* Helper function for ia64_compute_frame_size: find an appropriate general
1609   register to spill some special register to.  SPECIAL_SPILL_MASK contains
1610   bits in GR0 to GR31 that have already been allocated by this routine.
1611   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
1612
1613static int
1614find_gr_spill (try_locals)
1615     int try_locals;
1616{
1617  int regno;
1618
1619  /* If this is a leaf function, first try an otherwise unused
1620     call-clobbered register.  */
1621  if (current_function_is_leaf)
1622    {
1623      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1624	if (! regs_ever_live[regno]
1625	    && call_used_regs[regno]
1626	    && ! fixed_regs[regno]
1627	    && ! global_regs[regno]
1628	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1629	  {
1630	    current_frame_info.gr_used_mask |= 1 << regno;
1631	    return regno;
1632	  }
1633    }
1634
1635  if (try_locals)
1636    {
1637      regno = current_frame_info.n_local_regs;
1638      /* If there is a frame pointer, then we can't use loc79, because
1639	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
1640	 reg_name switching code in ia64_expand_prologue.  */
1641      if (regno < (80 - frame_pointer_needed))
1642	{
1643	  current_frame_info.n_local_regs = regno + 1;
1644	  return LOC_REG (0) + regno;
1645	}
1646    }
1647
1648  /* Failed to find a general register to spill to.  Must use stack.  */
1649  return 0;
1650}
1651
1652/* In order to make for nice schedules, we try to allocate every temporary
1653   to a different register.  We must of course stay away from call-saved,
1654   fixed, and global registers.  We must also stay away from registers
1655   allocated in current_frame_info.gr_used_mask, since those include regs
1656   used all through the prologue.
1657
1658   Any register allocated here must be used immediately.  The idea is to
1659   aid scheduling, not to solve data flow problems.  */
1660
1661static int last_scratch_gr_reg;
1662
1663static int
1664next_scratch_gr_reg ()
1665{
1666  int i, regno;
1667
1668  for (i = 0; i < 32; ++i)
1669    {
1670      regno = (last_scratch_gr_reg + i + 1) & 31;
1671      if (call_used_regs[regno]
1672	  && ! fixed_regs[regno]
1673	  && ! global_regs[regno]
1674	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1675	{
1676	  last_scratch_gr_reg = regno;
1677	  return regno;
1678	}
1679    }
1680
1681  /* There must be _something_ available.  */
1682  abort ();
1683}
1684
1685/* Helper function for ia64_compute_frame_size, called through
1686   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
1687
1688static void
1689mark_reg_gr_used_mask (reg, data)
1690     rtx reg;
1691     void *data ATTRIBUTE_UNUSED;
1692{
1693  unsigned int regno = REGNO (reg);
1694  if (regno < 32)
1695    {
1696      unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1697      for (i = 0; i < n; ++i)
1698	current_frame_info.gr_used_mask |= 1 << (regno + i);
1699    }
1700}
1701
1702/* Returns the number of bytes offset between the frame pointer and the stack
1703   pointer for the current function.  SIZE is the number of bytes of space
1704   needed for local variables.  */
1705
1706static void
1707ia64_compute_frame_size (size)
1708     HOST_WIDE_INT size;
1709{
1710  HOST_WIDE_INT total_size;
1711  HOST_WIDE_INT spill_size = 0;
1712  HOST_WIDE_INT extra_spill_size = 0;
1713  HOST_WIDE_INT pretend_args_size;
1714  HARD_REG_SET mask;
1715  int n_spilled = 0;
1716  int spilled_gr_p = 0;
1717  int spilled_fr_p = 0;
1718  unsigned int regno;
1719  int i;
1720
1721  if (current_frame_info.initialized)
1722    return;
1723
1724  memset (&current_frame_info, 0, sizeof current_frame_info);
1725  CLEAR_HARD_REG_SET (mask);
1726
1727  /* Don't allocate scratches to the return register.  */
1728  diddle_return_value (mark_reg_gr_used_mask, NULL);
1729
1730  /* Don't allocate scratches to the EH scratch registers.  */
1731  if (cfun->machine->ia64_eh_epilogue_sp)
1732    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1733  if (cfun->machine->ia64_eh_epilogue_bsp)
1734    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1735
1736  /* Find the size of the register stack frame.  We have only 80 local
1737     registers, because we reserve 8 for the inputs and 8 for the
1738     outputs.  */
1739
1740  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1741     since we'll be adjusting that down later.  */
1742  regno = LOC_REG (78) + ! frame_pointer_needed;
1743  for (; regno >= LOC_REG (0); regno--)
1744    if (regs_ever_live[regno])
1745      break;
1746  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1747
1748  /* For functions marked with the syscall_linkage attribute, we must mark
1749     all eight input registers as in use, so that locals aren't visible to
1750     the caller.  */
1751
1752  if (cfun->machine->n_varargs > 0
1753      || lookup_attribute ("syscall_linkage",
1754			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1755    current_frame_info.n_input_regs = 8;
1756  else
1757    {
1758      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1759	if (regs_ever_live[regno])
1760	  break;
1761      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1762    }
1763
1764  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1765    if (regs_ever_live[regno])
1766      break;
1767  i = regno - OUT_REG (0) + 1;
1768
1769  /* When -p profiling, we need one output register for the mcount argument.
1770     Likwise for -a profiling for the bb_init_func argument.  For -ax
1771     profiling, we need two output registers for the two bb_init_trace_func
1772     arguments.  */
1773  if (current_function_profile)
1774    i = MAX (i, 1);
1775  current_frame_info.n_output_regs = i;
1776
1777  /* ??? No rotating register support yet.  */
1778  current_frame_info.n_rotate_regs = 0;
1779
1780  /* Discover which registers need spilling, and how much room that
1781     will take.  Begin with floating point and general registers,
1782     which will always wind up on the stack.  */
1783
1784  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1785    if (regs_ever_live[regno] && ! call_used_regs[regno])
1786      {
1787	SET_HARD_REG_BIT (mask, regno);
1788	spill_size += 16;
1789	n_spilled += 1;
1790	spilled_fr_p = 1;
1791      }
1792
1793  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1794    if (regs_ever_live[regno] && ! call_used_regs[regno])
1795      {
1796	SET_HARD_REG_BIT (mask, regno);
1797	spill_size += 8;
1798	n_spilled += 1;
1799	spilled_gr_p = 1;
1800      }
1801
1802  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1803    if (regs_ever_live[regno] && ! call_used_regs[regno])
1804      {
1805	SET_HARD_REG_BIT (mask, regno);
1806	spill_size += 8;
1807	n_spilled += 1;
1808      }
1809
1810  /* Now come all special registers that might get saved in other
1811     general registers.  */
1812
1813  if (frame_pointer_needed)
1814    {
1815      current_frame_info.reg_fp = find_gr_spill (1);
1816      /* If we did not get a register, then we take LOC79.  This is guaranteed
1817	 to be free, even if regs_ever_live is already set, because this is
1818	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
1819	 as we don't count loc79 above.  */
1820      if (current_frame_info.reg_fp == 0)
1821	{
1822	  current_frame_info.reg_fp = LOC_REG (79);
1823	  current_frame_info.n_local_regs++;
1824	}
1825    }
1826
1827  if (! current_function_is_leaf)
1828    {
1829      /* Emit a save of BR0 if we call other functions.  Do this even
1830	 if this function doesn't return, as EH depends on this to be
1831	 able to unwind the stack.  */
1832      SET_HARD_REG_BIT (mask, BR_REG (0));
1833
1834      current_frame_info.reg_save_b0 = find_gr_spill (1);
1835      if (current_frame_info.reg_save_b0 == 0)
1836	{
1837	  spill_size += 8;
1838	  n_spilled += 1;
1839	}
1840
1841      /* Similarly for ar.pfs.  */
1842      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1843      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1844      if (current_frame_info.reg_save_ar_pfs == 0)
1845	{
1846	  extra_spill_size += 8;
1847	  n_spilled += 1;
1848	}
1849
1850      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
1851	 registers are clobbered, so we fall back to the stack.  */
1852      current_frame_info.reg_save_gp
1853	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1854      if (current_frame_info.reg_save_gp == 0)
1855	{
1856	  SET_HARD_REG_BIT (mask, GR_REG (1));
1857	  spill_size += 8;
1858	  n_spilled += 1;
1859	}
1860    }
1861  else
1862    {
1863      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1864	{
1865	  SET_HARD_REG_BIT (mask, BR_REG (0));
1866	  spill_size += 8;
1867	  n_spilled += 1;
1868	}
1869
1870      if (regs_ever_live[AR_PFS_REGNUM])
1871	{
1872	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1873	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1874	  if (current_frame_info.reg_save_ar_pfs == 0)
1875	    {
1876	      extra_spill_size += 8;
1877	      n_spilled += 1;
1878	    }
1879	}
1880    }
1881
1882  /* Unwind descriptor hackery: things are most efficient if we allocate
1883     consecutive GR save registers for RP, PFS, FP in that order. However,
1884     it is absolutely critical that FP get the only hard register that's
1885     guaranteed to be free, so we allocated it first.  If all three did
1886     happen to be allocated hard regs, and are consecutive, rearrange them
1887     into the preferred order now.  */
1888  if (current_frame_info.reg_fp != 0
1889      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1890      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1891    {
1892      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1893      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1894      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1895    }
1896
1897  /* See if we need to store the predicate register block.  */
1898  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1899    if (regs_ever_live[regno] && ! call_used_regs[regno])
1900      break;
1901  if (regno <= PR_REG (63))
1902    {
1903      SET_HARD_REG_BIT (mask, PR_REG (0));
1904      current_frame_info.reg_save_pr = find_gr_spill (1);
1905      if (current_frame_info.reg_save_pr == 0)
1906	{
1907	  extra_spill_size += 8;
1908	  n_spilled += 1;
1909	}
1910
1911      /* ??? Mark them all as used so that register renaming and such
1912	 are free to use them.  */
1913      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1914	regs_ever_live[regno] = 1;
1915    }
1916
1917  /* If we're forced to use st8.spill, we're forced to save and restore
1918     ar.unat as well.  The check for existing liveness allows inline asm
1919     to touch ar.unat.  */
1920  if (spilled_gr_p || cfun->machine->n_varargs
1921      || regs_ever_live[AR_UNAT_REGNUM])
1922    {
1923      regs_ever_live[AR_UNAT_REGNUM] = 1;
1924      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1925      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1926      if (current_frame_info.reg_save_ar_unat == 0)
1927	{
1928	  extra_spill_size += 8;
1929	  n_spilled += 1;
1930	}
1931    }
1932
1933  if (regs_ever_live[AR_LC_REGNUM])
1934    {
1935      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1936      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1937      if (current_frame_info.reg_save_ar_lc == 0)
1938	{
1939	  extra_spill_size += 8;
1940	  n_spilled += 1;
1941	}
1942    }
1943
1944  /* If we have an odd number of words of pretend arguments written to
1945     the stack, then the FR save area will be unaligned.  We round the
1946     size of this area up to keep things 16 byte aligned.  */
1947  if (spilled_fr_p)
1948    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1949  else
1950    pretend_args_size = current_function_pretend_args_size;
1951
1952  total_size = (spill_size + extra_spill_size + size + pretend_args_size
1953		+ current_function_outgoing_args_size);
1954  total_size = IA64_STACK_ALIGN (total_size);
1955
1956  /* We always use the 16-byte scratch area provided by the caller, but
1957     if we are a leaf function, there's no one to which we need to provide
1958     a scratch area.  */
1959  if (current_function_is_leaf)
1960    total_size = MAX (0, total_size - 16);
1961
1962  current_frame_info.total_size = total_size;
1963  current_frame_info.spill_cfa_off = pretend_args_size - 16;
1964  current_frame_info.spill_size = spill_size;
1965  current_frame_info.extra_spill_size = extra_spill_size;
1966  COPY_HARD_REG_SET (current_frame_info.mask, mask);
1967  current_frame_info.n_spilled = n_spilled;
1968  current_frame_info.initialized = reload_completed;
1969}
1970
1971/* Compute the initial difference between the specified pair of registers.  */
1972
1973HOST_WIDE_INT
1974ia64_initial_elimination_offset (from, to)
1975     int from, to;
1976{
1977  HOST_WIDE_INT offset;
1978
1979  ia64_compute_frame_size (get_frame_size ());
1980  switch (from)
1981    {
1982    case FRAME_POINTER_REGNUM:
1983      if (to == HARD_FRAME_POINTER_REGNUM)
1984	{
1985	  if (current_function_is_leaf)
1986	    offset = -current_frame_info.total_size;
1987	  else
1988	    offset = -(current_frame_info.total_size
1989		       - current_function_outgoing_args_size - 16);
1990	}
1991      else if (to == STACK_POINTER_REGNUM)
1992	{
1993	  if (current_function_is_leaf)
1994	    offset = 0;
1995	  else
1996	    offset = 16 + current_function_outgoing_args_size;
1997	}
1998      else
1999	abort ();
2000      break;
2001
2002    case ARG_POINTER_REGNUM:
2003      /* Arguments start above the 16 byte save area, unless stdarg
2004	 in which case we store through the 16 byte save area.  */
2005      if (to == HARD_FRAME_POINTER_REGNUM)
2006	offset = 16 - current_function_pretend_args_size;
2007      else if (to == STACK_POINTER_REGNUM)
2008	offset = (current_frame_info.total_size
2009		  + 16 - current_function_pretend_args_size);
2010      else
2011	abort ();
2012      break;
2013
2014    default:
2015      abort ();
2016    }
2017
2018  return offset;
2019}
2020
2021/* If there are more than a trivial number of register spills, we use
2022   two interleaved iterators so that we can get two memory references
2023   per insn group.
2024
2025   In order to simplify things in the prologue and epilogue expanders,
2026   we use helper functions to fix up the memory references after the
2027   fact with the appropriate offsets to a POST_MODIFY memory mode.
2028   The following data structure tracks the state of the two iterators
2029   while insns are being emitted.  */
2030
2031struct spill_fill_data
2032{
2033  rtx init_after;		/* point at which to emit initializations */
2034  rtx init_reg[2];		/* initial base register */
2035  rtx iter_reg[2];		/* the iterator registers */
2036  rtx *prev_addr[2];		/* address of last memory use */
2037  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2038  HOST_WIDE_INT prev_off[2];	/* last offset */
2039  int n_iter;			/* number of iterators in use */
2040  int next_iter;		/* next iterator to use */
2041  unsigned int save_gr_used_mask;
2042};
2043
2044static struct spill_fill_data spill_fill_data;
2045
2046static void
2047setup_spill_pointers (n_spills, init_reg, cfa_off)
2048     int n_spills;
2049     rtx init_reg;
2050     HOST_WIDE_INT cfa_off;
2051{
2052  int i;
2053
2054  spill_fill_data.init_after = get_last_insn ();
2055  spill_fill_data.init_reg[0] = init_reg;
2056  spill_fill_data.init_reg[1] = init_reg;
2057  spill_fill_data.prev_addr[0] = NULL;
2058  spill_fill_data.prev_addr[1] = NULL;
2059  spill_fill_data.prev_insn[0] = NULL;
2060  spill_fill_data.prev_insn[1] = NULL;
2061  spill_fill_data.prev_off[0] = cfa_off;
2062  spill_fill_data.prev_off[1] = cfa_off;
2063  spill_fill_data.next_iter = 0;
2064  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2065
2066  spill_fill_data.n_iter = 1 + (n_spills > 2);
2067  for (i = 0; i < spill_fill_data.n_iter; ++i)
2068    {
2069      int regno = next_scratch_gr_reg ();
2070      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2071      current_frame_info.gr_used_mask |= 1 << regno;
2072    }
2073}
2074
2075static void
2076finish_spill_pointers ()
2077{
2078  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2079}
2080
2081static rtx
2082spill_restore_mem (reg, cfa_off)
2083     rtx reg;
2084     HOST_WIDE_INT cfa_off;
2085{
2086  int iter = spill_fill_data.next_iter;
2087  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2088  rtx disp_rtx = GEN_INT (disp);
2089  rtx mem;
2090
2091  if (spill_fill_data.prev_addr[iter])
2092    {
2093      if (CONST_OK_FOR_N (disp))
2094	{
2095	  *spill_fill_data.prev_addr[iter]
2096	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2097				   gen_rtx_PLUS (DImode,
2098						 spill_fill_data.iter_reg[iter],
2099						 disp_rtx));
2100	  REG_NOTES (spill_fill_data.prev_insn[iter])
2101	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2102				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2103	}
2104      else
2105	{
2106	  /* ??? Could use register post_modify for loads.  */
2107	  if (! CONST_OK_FOR_I (disp))
2108	    {
2109	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2110	      emit_move_insn (tmp, disp_rtx);
2111	      disp_rtx = tmp;
2112	    }
2113	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2114				 spill_fill_data.iter_reg[iter], disp_rtx));
2115	}
2116    }
2117  /* Micro-optimization: if we've created a frame pointer, it's at
2118     CFA 0, which may allow the real iterator to be initialized lower,
2119     slightly increasing parallelism.  Also, if there are few saves
2120     it may eliminate the iterator entirely.  */
2121  else if (disp == 0
2122	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2123	   && frame_pointer_needed)
2124    {
2125      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2126      set_mem_alias_set (mem, get_varargs_alias_set ());
2127      return mem;
2128    }
2129  else
2130    {
2131      rtx seq, insn;
2132
2133      if (disp == 0)
2134	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2135			 spill_fill_data.init_reg[iter]);
2136      else
2137	{
2138	  start_sequence ();
2139
2140	  if (! CONST_OK_FOR_I (disp))
2141	    {
2142	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2143	      emit_move_insn (tmp, disp_rtx);
2144	      disp_rtx = tmp;
2145	    }
2146
2147	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2148				 spill_fill_data.init_reg[iter],
2149				 disp_rtx));
2150
2151	  seq = get_insns ();
2152	  end_sequence ();
2153	}
2154
2155      /* Careful for being the first insn in a sequence.  */
2156      if (spill_fill_data.init_after)
2157	insn = emit_insn_after (seq, spill_fill_data.init_after);
2158      else
2159	{
2160	  rtx first = get_insns ();
2161	  if (first)
2162	    insn = emit_insn_before (seq, first);
2163	  else
2164	    insn = emit_insn (seq);
2165	}
2166      spill_fill_data.init_after = insn;
2167
2168      /* If DISP is 0, we may or may not have a further adjustment
2169	 afterward.  If we do, then the load/store insn may be modified
2170	 to be a post-modify.  If we don't, then this copy may be
2171	 eliminated by copyprop_hardreg_forward, which makes this
2172	 insn garbage, which runs afoul of the sanity check in
2173	 propagate_one_insn.  So mark this insn as legal to delete.  */
2174      if (disp == 0)
2175	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2176					     REG_NOTES (insn));
2177    }
2178
2179  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2180
2181  /* ??? Not all of the spills are for varargs, but some of them are.
2182     The rest of the spills belong in an alias set of their own.  But
2183     it doesn't actually hurt to include them here.  */
2184  set_mem_alias_set (mem, get_varargs_alias_set ());
2185
2186  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2187  spill_fill_data.prev_off[iter] = cfa_off;
2188
2189  if (++iter >= spill_fill_data.n_iter)
2190    iter = 0;
2191  spill_fill_data.next_iter = iter;
2192
2193  return mem;
2194}
2195
2196static void
2197do_spill (move_fn, reg, cfa_off, frame_reg)
2198     rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2199     rtx reg, frame_reg;
2200     HOST_WIDE_INT cfa_off;
2201{
2202  int iter = spill_fill_data.next_iter;
2203  rtx mem, insn;
2204
2205  mem = spill_restore_mem (reg, cfa_off);
2206  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2207  spill_fill_data.prev_insn[iter] = insn;
2208
2209  if (frame_reg)
2210    {
2211      rtx base;
2212      HOST_WIDE_INT off;
2213
2214      RTX_FRAME_RELATED_P (insn) = 1;
2215
2216      /* Don't even pretend that the unwind code can intuit its way
2217	 through a pair of interleaved post_modify iterators.  Just
2218	 provide the correct answer.  */
2219
2220      if (frame_pointer_needed)
2221	{
2222	  base = hard_frame_pointer_rtx;
2223	  off = - cfa_off;
2224	}
2225      else
2226	{
2227	  base = stack_pointer_rtx;
2228	  off = current_frame_info.total_size - cfa_off;
2229	}
2230
2231      REG_NOTES (insn)
2232	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2233		gen_rtx_SET (VOIDmode,
2234			     gen_rtx_MEM (GET_MODE (reg),
2235					  plus_constant (base, off)),
2236			     frame_reg),
2237		REG_NOTES (insn));
2238    }
2239}
2240
2241static void
2242do_restore (move_fn, reg, cfa_off)
2243     rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2244     rtx reg;
2245     HOST_WIDE_INT cfa_off;
2246{
2247  int iter = spill_fill_data.next_iter;
2248  rtx insn;
2249
2250  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2251				GEN_INT (cfa_off)));
2252  spill_fill_data.prev_insn[iter] = insn;
2253}
2254
2255/* Wrapper functions that discards the CONST_INT spill offset.  These
2256   exist so that we can give gr_spill/gr_fill the offset they need and
2257   use a consistant function interface.  */
2258
2259static rtx
2260gen_movdi_x (dest, src, offset)
2261     rtx dest, src;
2262     rtx offset ATTRIBUTE_UNUSED;
2263{
2264  return gen_movdi (dest, src);
2265}
2266
2267static rtx
2268gen_fr_spill_x (dest, src, offset)
2269     rtx dest, src;
2270     rtx offset ATTRIBUTE_UNUSED;
2271{
2272  return gen_fr_spill (dest, src);
2273}
2274
2275static rtx
2276gen_fr_restore_x (dest, src, offset)
2277     rtx dest, src;
2278     rtx offset ATTRIBUTE_UNUSED;
2279{
2280  return gen_fr_restore (dest, src);
2281}
2282
2283/* Called after register allocation to add any instructions needed for the
2284   prologue.  Using a prologue insn is favored compared to putting all of the
2285   instructions in output_function_prologue(), since it allows the scheduler
2286   to intermix instructions with the saves of the caller saved registers.  In
2287   some cases, it might be necessary to emit a barrier instruction as the last
2288   insn to prevent such scheduling.
2289
2290   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2291   so that the debug info generation code can handle them properly.
2292
2293   The register save area is layed out like so:
2294   cfa+16
2295	[ varargs spill area ]
2296	[ fr register spill area ]
2297	[ br register spill area ]
2298	[ ar register spill area ]
2299	[ pr register spill area ]
2300	[ gr register spill area ] */
2301
2302/* ??? Get inefficient code when the frame size is larger than can fit in an
2303   adds instruction.  */
2304
2305void
2306ia64_expand_prologue ()
2307{
2308  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2309  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2310  rtx reg, alt_reg;
2311
2312  ia64_compute_frame_size (get_frame_size ());
2313  last_scratch_gr_reg = 15;
2314
2315  /* If there is no epilogue, then we don't need some prologue insns.
2316     We need to avoid emitting the dead prologue insns, because flow
2317     will complain about them.  */
2318  if (optimize)
2319    {
2320      edge e;
2321
2322      for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2323	if ((e->flags & EDGE_FAKE) == 0
2324	    && (e->flags & EDGE_FALLTHRU) != 0)
2325	  break;
2326      epilogue_p = (e != NULL);
2327    }
2328  else
2329    epilogue_p = 1;
2330
2331  /* Set the local, input, and output register names.  We need to do this
2332     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2333     half.  If we use in/loc/out register names, then we get assembler errors
2334     in crtn.S because there is no alloc insn or regstk directive in there.  */
2335  if (! TARGET_REG_NAMES)
2336    {
2337      int inputs = current_frame_info.n_input_regs;
2338      int locals = current_frame_info.n_local_regs;
2339      int outputs = current_frame_info.n_output_regs;
2340
2341      for (i = 0; i < inputs; i++)
2342	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2343      for (i = 0; i < locals; i++)
2344	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2345      for (i = 0; i < outputs; i++)
2346	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2347    }
2348
2349  /* Set the frame pointer register name.  The regnum is logically loc79,
2350     but of course we'll not have allocated that many locals.  Rather than
2351     worrying about renumbering the existing rtxs, we adjust the name.  */
2352  /* ??? This code means that we can never use one local register when
2353     there is a frame pointer.  loc79 gets wasted in this case, as it is
2354     renamed to a register that will never be used.  See also the try_locals
2355     code in find_gr_spill.  */
2356  if (current_frame_info.reg_fp)
2357    {
2358      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2359      reg_names[HARD_FRAME_POINTER_REGNUM]
2360	= reg_names[current_frame_info.reg_fp];
2361      reg_names[current_frame_info.reg_fp] = tmp;
2362    }
2363
2364  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2365  if (current_frame_info.n_local_regs == 0
2366      && current_frame_info.n_output_regs == 0
2367      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2368      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2369    {
2370      /* If there is no alloc, but there are input registers used, then we
2371	 need a .regstk directive.  */
2372      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2373      ar_pfs_save_reg = NULL_RTX;
2374    }
2375  else
2376    {
2377      current_frame_info.need_regstk = 0;
2378
2379      if (current_frame_info.reg_save_ar_pfs)
2380	regno = current_frame_info.reg_save_ar_pfs;
2381      else
2382	regno = next_scratch_gr_reg ();
2383      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2384
2385      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2386				   GEN_INT (current_frame_info.n_input_regs),
2387				   GEN_INT (current_frame_info.n_local_regs),
2388				   GEN_INT (current_frame_info.n_output_regs),
2389				   GEN_INT (current_frame_info.n_rotate_regs)));
2390      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2391    }
2392
2393  /* Set up frame pointer, stack pointer, and spill iterators.  */
2394
2395  n_varargs = cfun->machine->n_varargs;
2396  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2397			stack_pointer_rtx, 0);
2398
2399  if (frame_pointer_needed)
2400    {
2401      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2402      RTX_FRAME_RELATED_P (insn) = 1;
2403    }
2404
2405  if (current_frame_info.total_size != 0)
2406    {
2407      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2408      rtx offset;
2409
2410      if (CONST_OK_FOR_I (- current_frame_info.total_size))
2411	offset = frame_size_rtx;
2412      else
2413	{
2414	  regno = next_scratch_gr_reg ();
2415 	  offset = gen_rtx_REG (DImode, regno);
2416	  emit_move_insn (offset, frame_size_rtx);
2417	}
2418
2419      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2420				    stack_pointer_rtx, offset));
2421
2422      if (! frame_pointer_needed)
2423	{
2424	  RTX_FRAME_RELATED_P (insn) = 1;
2425	  if (GET_CODE (offset) != CONST_INT)
2426	    {
2427	      REG_NOTES (insn)
2428		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2429			gen_rtx_SET (VOIDmode,
2430				     stack_pointer_rtx,
2431				     gen_rtx_PLUS (DImode,
2432						   stack_pointer_rtx,
2433						   frame_size_rtx)),
2434			REG_NOTES (insn));
2435	    }
2436	}
2437
2438      /* ??? At this point we must generate a magic insn that appears to
2439	 modify the stack pointer, the frame pointer, and all spill
2440	 iterators.  This would allow the most scheduling freedom.  For
2441	 now, just hard stop.  */
2442      emit_insn (gen_blockage ());
2443    }
2444
2445  /* Must copy out ar.unat before doing any integer spills.  */
2446  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2447    {
2448      if (current_frame_info.reg_save_ar_unat)
2449	ar_unat_save_reg
2450	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2451      else
2452	{
2453	  alt_regno = next_scratch_gr_reg ();
2454	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2455	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2456	}
2457
2458      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2459      insn = emit_move_insn (ar_unat_save_reg, reg);
2460      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2461
2462      /* Even if we're not going to generate an epilogue, we still
2463	 need to save the register so that EH works.  */
2464      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2465	emit_insn (gen_prologue_use (ar_unat_save_reg));
2466    }
2467  else
2468    ar_unat_save_reg = NULL_RTX;
2469
2470  /* Spill all varargs registers.  Do this before spilling any GR registers,
2471     since we want the UNAT bits for the GR registers to override the UNAT
2472     bits from varargs, which we don't care about.  */
2473
2474  cfa_off = -16;
2475  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2476    {
2477      reg = gen_rtx_REG (DImode, regno);
2478      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2479    }
2480
2481  /* Locate the bottom of the register save area.  */
2482  cfa_off = (current_frame_info.spill_cfa_off
2483	     + current_frame_info.spill_size
2484	     + current_frame_info.extra_spill_size);
2485
2486  /* Save the predicate register block either in a register or in memory.  */
2487  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2488    {
2489      reg = gen_rtx_REG (DImode, PR_REG (0));
2490      if (current_frame_info.reg_save_pr != 0)
2491	{
2492	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2493	  insn = emit_move_insn (alt_reg, reg);
2494
2495	  /* ??? Denote pr spill/fill by a DImode move that modifies all
2496	     64 hard registers.  */
2497	  RTX_FRAME_RELATED_P (insn) = 1;
2498	  REG_NOTES (insn)
2499	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2500			gen_rtx_SET (VOIDmode, alt_reg, reg),
2501			REG_NOTES (insn));
2502
2503	  /* Even if we're not going to generate an epilogue, we still
2504	     need to save the register so that EH works.  */
2505	  if (! epilogue_p)
2506	    emit_insn (gen_prologue_use (alt_reg));
2507	}
2508      else
2509	{
2510	  alt_regno = next_scratch_gr_reg ();
2511	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2512	  insn = emit_move_insn (alt_reg, reg);
2513	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2514	  cfa_off -= 8;
2515	}
2516    }
2517
2518  /* Handle AR regs in numerical order.  All of them get special handling.  */
2519  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2520      && current_frame_info.reg_save_ar_unat == 0)
2521    {
2522      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2523      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2524      cfa_off -= 8;
2525    }
2526
2527  /* The alloc insn already copied ar.pfs into a general register.  The
2528     only thing we have to do now is copy that register to a stack slot
2529     if we'd not allocated a local register for the job.  */
2530  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2531      && current_frame_info.reg_save_ar_pfs == 0)
2532    {
2533      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2534      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2535      cfa_off -= 8;
2536    }
2537
2538  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2539    {
2540      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2541      if (current_frame_info.reg_save_ar_lc != 0)
2542	{
2543	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2544	  insn = emit_move_insn (alt_reg, reg);
2545	  RTX_FRAME_RELATED_P (insn) = 1;
2546
2547	  /* Even if we're not going to generate an epilogue, we still
2548	     need to save the register so that EH works.  */
2549	  if (! epilogue_p)
2550	    emit_insn (gen_prologue_use (alt_reg));
2551	}
2552      else
2553	{
2554	  alt_regno = next_scratch_gr_reg ();
2555	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2556	  emit_move_insn (alt_reg, reg);
2557	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2558	  cfa_off -= 8;
2559	}
2560    }
2561
2562  if (current_frame_info.reg_save_gp)
2563    {
2564      insn = emit_move_insn (gen_rtx_REG (DImode,
2565					  current_frame_info.reg_save_gp),
2566			     pic_offset_table_rtx);
2567      /* We don't know for sure yet if this is actually needed, since
2568	 we've not split the PIC call patterns.  If all of the calls
2569	 are indirect, and not followed by any uses of the gp, then
2570	 this save is dead.  Allow it to go away.  */
2571      REG_NOTES (insn)
2572	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2573    }
2574
2575  /* We should now be at the base of the gr/br/fr spill area.  */
2576  if (cfa_off != (current_frame_info.spill_cfa_off
2577		  + current_frame_info.spill_size))
2578    abort ();
2579
2580  /* Spill all general registers.  */
2581  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2582    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2583      {
2584	reg = gen_rtx_REG (DImode, regno);
2585	do_spill (gen_gr_spill, reg, cfa_off, reg);
2586	cfa_off -= 8;
2587      }
2588
2589  /* Handle BR0 specially -- it may be getting stored permanently in
2590     some GR register.  */
2591  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2592    {
2593      reg = gen_rtx_REG (DImode, BR_REG (0));
2594      if (current_frame_info.reg_save_b0 != 0)
2595	{
2596	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2597	  insn = emit_move_insn (alt_reg, reg);
2598	  RTX_FRAME_RELATED_P (insn) = 1;
2599
2600	  /* Even if we're not going to generate an epilogue, we still
2601	     need to save the register so that EH works.  */
2602	  if (! epilogue_p)
2603	    emit_insn (gen_prologue_use (alt_reg));
2604	}
2605      else
2606	{
2607	  alt_regno = next_scratch_gr_reg ();
2608	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2609	  emit_move_insn (alt_reg, reg);
2610	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2611	  cfa_off -= 8;
2612	}
2613    }
2614
2615  /* Spill the rest of the BR registers.  */
2616  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2617    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2618      {
2619	alt_regno = next_scratch_gr_reg ();
2620	alt_reg = gen_rtx_REG (DImode, alt_regno);
2621	reg = gen_rtx_REG (DImode, regno);
2622	emit_move_insn (alt_reg, reg);
2623	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2624	cfa_off -= 8;
2625      }
2626
2627  /* Align the frame and spill all FR registers.  */
2628  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2629    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2630      {
2631        if (cfa_off & 15)
2632	  abort ();
2633	reg = gen_rtx_REG (TFmode, regno);
2634	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2635	cfa_off -= 16;
2636      }
2637
2638  if (cfa_off != current_frame_info.spill_cfa_off)
2639    abort ();
2640
2641  finish_spill_pointers ();
2642}
2643
2644/* Called after register allocation to add any instructions needed for the
2645   epilogue.  Using an epilogue insn is favored compared to putting all of the
2646   instructions in output_function_prologue(), since it allows the scheduler
2647   to intermix instructions with the saves of the caller saved registers.  In
2648   some cases, it might be necessary to emit a barrier instruction as the last
2649   insn to prevent such scheduling.  */
2650
2651void
2652ia64_expand_epilogue (sibcall_p)
2653     int sibcall_p;
2654{
2655  rtx insn, reg, alt_reg, ar_unat_save_reg;
2656  int regno, alt_regno, cfa_off;
2657
2658  ia64_compute_frame_size (get_frame_size ());
2659
2660  /* If there is a frame pointer, then we use it instead of the stack
2661     pointer, so that the stack pointer does not need to be valid when
2662     the epilogue starts.  See EXIT_IGNORE_STACK.  */
2663  if (frame_pointer_needed)
2664    setup_spill_pointers (current_frame_info.n_spilled,
2665			  hard_frame_pointer_rtx, 0);
2666  else
2667    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2668			  current_frame_info.total_size);
2669
2670  if (current_frame_info.total_size != 0)
2671    {
2672      /* ??? At this point we must generate a magic insn that appears to
2673         modify the spill iterators and the frame pointer.  This would
2674	 allow the most scheduling freedom.  For now, just hard stop.  */
2675      emit_insn (gen_blockage ());
2676    }
2677
2678  /* Locate the bottom of the register save area.  */
2679  cfa_off = (current_frame_info.spill_cfa_off
2680	     + current_frame_info.spill_size
2681	     + current_frame_info.extra_spill_size);
2682
2683  /* Restore the predicate registers.  */
2684  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2685    {
2686      if (current_frame_info.reg_save_pr != 0)
2687	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2688      else
2689	{
2690	  alt_regno = next_scratch_gr_reg ();
2691	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2692	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2693	  cfa_off -= 8;
2694	}
2695      reg = gen_rtx_REG (DImode, PR_REG (0));
2696      emit_move_insn (reg, alt_reg);
2697    }
2698
2699  /* Restore the application registers.  */
2700
2701  /* Load the saved unat from the stack, but do not restore it until
2702     after the GRs have been restored.  */
2703  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2704    {
2705      if (current_frame_info.reg_save_ar_unat != 0)
2706        ar_unat_save_reg
2707	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2708      else
2709	{
2710	  alt_regno = next_scratch_gr_reg ();
2711	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2712	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2713	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2714	  cfa_off -= 8;
2715	}
2716    }
2717  else
2718    ar_unat_save_reg = NULL_RTX;
2719
2720  if (current_frame_info.reg_save_ar_pfs != 0)
2721    {
2722      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2723      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2724      emit_move_insn (reg, alt_reg);
2725    }
2726  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2727    {
2728      alt_regno = next_scratch_gr_reg ();
2729      alt_reg = gen_rtx_REG (DImode, alt_regno);
2730      do_restore (gen_movdi_x, alt_reg, cfa_off);
2731      cfa_off -= 8;
2732      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2733      emit_move_insn (reg, alt_reg);
2734    }
2735
2736  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2737    {
2738      if (current_frame_info.reg_save_ar_lc != 0)
2739	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2740      else
2741	{
2742	  alt_regno = next_scratch_gr_reg ();
2743	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2744	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2745	  cfa_off -= 8;
2746	}
2747      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2748      emit_move_insn (reg, alt_reg);
2749    }
2750
2751  /* We should now be at the base of the gr/br/fr spill area.  */
2752  if (cfa_off != (current_frame_info.spill_cfa_off
2753		  + current_frame_info.spill_size))
2754    abort ();
2755
2756  /* The GP may be stored on the stack in the prologue, but it's
2757     never restored in the epilogue.  Skip the stack slot.  */
2758  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2759    cfa_off -= 8;
2760
2761  /* Restore all general registers.  */
2762  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2763    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2764      {
2765	reg = gen_rtx_REG (DImode, regno);
2766	do_restore (gen_gr_restore, reg, cfa_off);
2767	cfa_off -= 8;
2768      }
2769
2770  /* Restore the branch registers.  Handle B0 specially, as it may
2771     have gotten stored in some GR register.  */
2772  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2773    {
2774      if (current_frame_info.reg_save_b0 != 0)
2775	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2776      else
2777	{
2778	  alt_regno = next_scratch_gr_reg ();
2779	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2780	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2781	  cfa_off -= 8;
2782	}
2783      reg = gen_rtx_REG (DImode, BR_REG (0));
2784      emit_move_insn (reg, alt_reg);
2785    }
2786
2787  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2788    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2789      {
2790	alt_regno = next_scratch_gr_reg ();
2791	alt_reg = gen_rtx_REG (DImode, alt_regno);
2792	do_restore (gen_movdi_x, alt_reg, cfa_off);
2793	cfa_off -= 8;
2794	reg = gen_rtx_REG (DImode, regno);
2795	emit_move_insn (reg, alt_reg);
2796      }
2797
2798  /* Restore floating point registers.  */
2799  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2800    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2801      {
2802        if (cfa_off & 15)
2803	  abort ();
2804	reg = gen_rtx_REG (TFmode, regno);
2805	do_restore (gen_fr_restore_x, reg, cfa_off);
2806	cfa_off -= 16;
2807      }
2808
2809  /* Restore ar.unat for real.  */
2810  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2811    {
2812      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2813      emit_move_insn (reg, ar_unat_save_reg);
2814    }
2815
2816  if (cfa_off != current_frame_info.spill_cfa_off)
2817    abort ();
2818
2819  finish_spill_pointers ();
2820
2821  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2822    {
2823      /* ??? At this point we must generate a magic insn that appears to
2824         modify the spill iterators, the stack pointer, and the frame
2825	 pointer.  This would allow the most scheduling freedom.  For now,
2826	 just hard stop.  */
2827      emit_insn (gen_blockage ());
2828    }
2829
2830  if (cfun->machine->ia64_eh_epilogue_sp)
2831    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2832  else if (frame_pointer_needed)
2833    {
2834      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2835      RTX_FRAME_RELATED_P (insn) = 1;
2836    }
2837  else if (current_frame_info.total_size)
2838    {
2839      rtx offset, frame_size_rtx;
2840
2841      frame_size_rtx = GEN_INT (current_frame_info.total_size);
2842      if (CONST_OK_FOR_I (current_frame_info.total_size))
2843	offset = frame_size_rtx;
2844      else
2845	{
2846	  regno = next_scratch_gr_reg ();
2847	  offset = gen_rtx_REG (DImode, regno);
2848	  emit_move_insn (offset, frame_size_rtx);
2849	}
2850
2851      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2852				    offset));
2853
2854      RTX_FRAME_RELATED_P (insn) = 1;
2855      if (GET_CODE (offset) != CONST_INT)
2856	{
2857	  REG_NOTES (insn)
2858	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2859			gen_rtx_SET (VOIDmode,
2860				     stack_pointer_rtx,
2861				     gen_rtx_PLUS (DImode,
2862						   stack_pointer_rtx,
2863						   frame_size_rtx)),
2864			REG_NOTES (insn));
2865	}
2866    }
2867
2868  if (cfun->machine->ia64_eh_epilogue_bsp)
2869    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2870
2871  if (! sibcall_p)
2872    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2873  else
2874    {
2875      int fp = GR_REG (2);
2876      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2877	 first available call clobbered register.  If there was a frame_pointer
2878	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2879	 so we have to make sure we're using the string "r2" when emitting
2880	 the register name for the assmbler.  */
2881      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2882	fp = HARD_FRAME_POINTER_REGNUM;
2883
2884      /* We must emit an alloc to force the input registers to become output
2885	 registers.  Otherwise, if the callee tries to pass its parameters
2886	 through to another call without an intervening alloc, then these
2887	 values get lost.  */
2888      /* ??? We don't need to preserve all input registers.  We only need to
2889	 preserve those input registers used as arguments to the sibling call.
2890	 It is unclear how to compute that number here.  */
2891      if (current_frame_info.n_input_regs != 0)
2892	emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2893			      GEN_INT (0), GEN_INT (0),
2894			      GEN_INT (current_frame_info.n_input_regs),
2895			      GEN_INT (0)));
2896    }
2897}
2898
2899/* Return 1 if br.ret can do all the work required to return from a
2900   function.  */
2901
2902int
2903ia64_direct_return ()
2904{
2905  if (reload_completed && ! frame_pointer_needed)
2906    {
2907      ia64_compute_frame_size (get_frame_size ());
2908
2909      return (current_frame_info.total_size == 0
2910	      && current_frame_info.n_spilled == 0
2911	      && current_frame_info.reg_save_b0 == 0
2912	      && current_frame_info.reg_save_pr == 0
2913	      && current_frame_info.reg_save_ar_pfs == 0
2914	      && current_frame_info.reg_save_ar_unat == 0
2915	      && current_frame_info.reg_save_ar_lc == 0);
2916    }
2917  return 0;
2918}
2919
2920/* Return the magic cookie that we use to hold the return address
2921   during early compilation.  */
2922
2923rtx
2924ia64_return_addr_rtx (count, frame)
2925     HOST_WIDE_INT count;
2926     rtx frame ATTRIBUTE_UNUSED;
2927{
2928  if (count != 0)
2929    return NULL;
2930  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2931}
2932
2933/* Split this value after reload, now that we know where the return
2934   address is saved.  */
2935
2936void
2937ia64_split_return_addr_rtx (dest)
2938     rtx dest;
2939{
2940  rtx src;
2941
2942  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2943    {
2944      if (current_frame_info.reg_save_b0 != 0)
2945	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2946      else
2947	{
2948	  HOST_WIDE_INT off;
2949	  unsigned int regno;
2950
2951	  /* Compute offset from CFA for BR0.  */
2952	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
2953	  off = (current_frame_info.spill_cfa_off
2954		 + current_frame_info.spill_size);
2955	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2956	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2957	      off -= 8;
2958
2959	  /* Convert CFA offset to a register based offset.  */
2960	  if (frame_pointer_needed)
2961	    src = hard_frame_pointer_rtx;
2962	  else
2963	    {
2964	      src = stack_pointer_rtx;
2965	      off += current_frame_info.total_size;
2966	    }
2967
2968	  /* Load address into scratch register.  */
2969	  if (CONST_OK_FOR_I (off))
2970	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2971	  else
2972	    {
2973	      emit_move_insn (dest, GEN_INT (off));
2974	      emit_insn (gen_adddi3 (dest, src, dest));
2975	    }
2976
2977	  src = gen_rtx_MEM (Pmode, dest);
2978	}
2979    }
2980  else
2981    src = gen_rtx_REG (DImode, BR_REG (0));
2982
2983  emit_move_insn (dest, src);
2984}
2985
2986int
2987ia64_hard_regno_rename_ok (from, to)
2988     int from;
2989     int to;
2990{
2991  /* Don't clobber any of the registers we reserved for the prologue.  */
2992  if (to == current_frame_info.reg_fp
2993      || to == current_frame_info.reg_save_b0
2994      || to == current_frame_info.reg_save_pr
2995      || to == current_frame_info.reg_save_ar_pfs
2996      || to == current_frame_info.reg_save_ar_unat
2997      || to == current_frame_info.reg_save_ar_lc)
2998    return 0;
2999
3000  if (from == current_frame_info.reg_fp
3001      || from == current_frame_info.reg_save_b0
3002      || from == current_frame_info.reg_save_pr
3003      || from == current_frame_info.reg_save_ar_pfs
3004      || from == current_frame_info.reg_save_ar_unat
3005      || from == current_frame_info.reg_save_ar_lc)
3006    return 0;
3007
3008  /* Don't use output registers outside the register frame.  */
3009  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3010    return 0;
3011
3012  /* Retain even/oddness on predicate register pairs.  */
3013  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3014    return (from & 1) == (to & 1);
3015
3016  return 1;
3017}
3018
3019/* Target hook for assembling integer objects.  Handle word-sized
3020   aligned objects and detect the cases when @fptr is needed.  */
3021
3022static bool
3023ia64_assemble_integer (x, size, aligned_p)
3024     rtx x;
3025     unsigned int size;
3026     int aligned_p;
3027{
3028  if (size == (TARGET_ILP32 ? 4 : 8)
3029      && aligned_p
3030      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3031      && GET_CODE (x) == SYMBOL_REF
3032      && SYMBOL_REF_FLAG (x))
3033    {
3034      if (TARGET_ILP32)
3035	fputs ("\tdata4\t@fptr(", asm_out_file);
3036      else
3037	fputs ("\tdata8\t@fptr(", asm_out_file);
3038      output_addr_const (asm_out_file, x);
3039      fputs (")\n", asm_out_file);
3040      return true;
3041    }
3042  return default_assemble_integer (x, size, aligned_p);
3043}
3044
3045/* Emit the function prologue.  */
3046
3047static void
3048ia64_output_function_prologue (file, size)
3049     FILE *file;
3050     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3051{
3052  int mask, grsave, grsave_prev;
3053
3054  if (current_frame_info.need_regstk)
3055    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3056	     current_frame_info.n_input_regs,
3057	     current_frame_info.n_local_regs,
3058	     current_frame_info.n_output_regs,
3059	     current_frame_info.n_rotate_regs);
3060
3061  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3062    return;
3063
3064  /* Emit the .prologue directive.  */
3065
3066  mask = 0;
3067  grsave = grsave_prev = 0;
3068  if (current_frame_info.reg_save_b0 != 0)
3069    {
3070      mask |= 8;
3071      grsave = grsave_prev = current_frame_info.reg_save_b0;
3072    }
3073  if (current_frame_info.reg_save_ar_pfs != 0
3074      && (grsave_prev == 0
3075	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3076    {
3077      mask |= 4;
3078      if (grsave_prev == 0)
3079	grsave = current_frame_info.reg_save_ar_pfs;
3080      grsave_prev = current_frame_info.reg_save_ar_pfs;
3081    }
3082  if (current_frame_info.reg_fp != 0
3083      && (grsave_prev == 0
3084	  || current_frame_info.reg_fp == grsave_prev + 1))
3085    {
3086      mask |= 2;
3087      if (grsave_prev == 0)
3088	grsave = HARD_FRAME_POINTER_REGNUM;
3089      grsave_prev = current_frame_info.reg_fp;
3090    }
3091  if (current_frame_info.reg_save_pr != 0
3092      && (grsave_prev == 0
3093	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3094    {
3095      mask |= 1;
3096      if (grsave_prev == 0)
3097	grsave = current_frame_info.reg_save_pr;
3098    }
3099
3100  if (mask)
3101    fprintf (file, "\t.prologue %d, %d\n", mask,
3102	     ia64_dbx_register_number (grsave));
3103  else
3104    fputs ("\t.prologue\n", file);
3105
3106  /* Emit a .spill directive, if necessary, to relocate the base of
3107     the register spill area.  */
3108  if (current_frame_info.spill_cfa_off != -16)
3109    fprintf (file, "\t.spill %ld\n",
3110	     (long) (current_frame_info.spill_cfa_off
3111		     + current_frame_info.spill_size));
3112}
3113
3114/* Emit the .body directive at the scheduled end of the prologue.  */
3115
3116static void
3117ia64_output_function_end_prologue (file)
3118     FILE *file;
3119{
3120  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3121    return;
3122
3123  fputs ("\t.body\n", file);
3124}
3125
3126/* Emit the function epilogue.  */
3127
3128static void
3129ia64_output_function_epilogue (file, size)
3130     FILE *file ATTRIBUTE_UNUSED;
3131     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3132{
3133  int i;
3134
3135  if (current_frame_info.reg_fp)
3136    {
3137      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3138      reg_names[HARD_FRAME_POINTER_REGNUM]
3139	= reg_names[current_frame_info.reg_fp];
3140      reg_names[current_frame_info.reg_fp] = tmp;
3141    }
3142  if (! TARGET_REG_NAMES)
3143    {
3144      for (i = 0; i < current_frame_info.n_input_regs; i++)
3145	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3146      for (i = 0; i < current_frame_info.n_local_regs; i++)
3147	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3148      for (i = 0; i < current_frame_info.n_output_regs; i++)
3149	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3150    }
3151
3152  current_frame_info.initialized = 0;
3153}
3154
3155int
3156ia64_dbx_register_number (regno)
3157     int regno;
3158{
3159  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3160     from its home at loc79 to something inside the register frame.  We
3161     must perform the same renumbering here for the debug info.  */
3162  if (current_frame_info.reg_fp)
3163    {
3164      if (regno == HARD_FRAME_POINTER_REGNUM)
3165	regno = current_frame_info.reg_fp;
3166      else if (regno == current_frame_info.reg_fp)
3167	regno = HARD_FRAME_POINTER_REGNUM;
3168    }
3169
3170  if (IN_REGNO_P (regno))
3171    return 32 + regno - IN_REG (0);
3172  else if (LOC_REGNO_P (regno))
3173    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3174  else if (OUT_REGNO_P (regno))
3175    return (32 + current_frame_info.n_input_regs
3176	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3177  else
3178    return regno;
3179}
3180
3181void
3182ia64_initialize_trampoline (addr, fnaddr, static_chain)
3183     rtx addr, fnaddr, static_chain;
3184{
3185  rtx addr_reg, eight = GEN_INT (8);
3186
3187  /* Load up our iterator.  */
3188  addr_reg = gen_reg_rtx (Pmode);
3189  emit_move_insn (addr_reg, addr);
3190
3191  /* The first two words are the fake descriptor:
3192     __ia64_trampoline, ADDR+16.  */
3193  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3194		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3195  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3196
3197  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3198		  copy_to_reg (plus_constant (addr, 16)));
3199  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3200
3201  /* The third word is the target descriptor.  */
3202  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3203  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3204
3205  /* The fourth word is the static chain.  */
3206  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3207}
3208
3209/* Do any needed setup for a variadic function.  CUM has not been updated
3210   for the last named argument which has type TYPE and mode MODE.
3211
3212   We generate the actual spill instructions during prologue generation.  */
3213
3214void
3215ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3216     CUMULATIVE_ARGS cum;
3217     int             int_mode;
3218     tree            type;
3219     int *           pretend_size;
3220     int	     second_time ATTRIBUTE_UNUSED;
3221{
3222  /* Skip the current argument.  */
3223  ia64_function_arg_advance (&cum, int_mode, type, 1);
3224
3225  if (cum.words < MAX_ARGUMENT_SLOTS)
3226    {
3227      int n = MAX_ARGUMENT_SLOTS - cum.words;
3228      *pretend_size = n * UNITS_PER_WORD;
3229      cfun->machine->n_varargs = n;
3230    }
3231}
3232
3233/* Check whether TYPE is a homogeneous floating point aggregate.  If
3234   it is, return the mode of the floating point type that appears
3235   in all leafs.  If it is not, return VOIDmode.
3236
3237   An aggregate is a homogeneous floating point aggregate is if all
3238   fields/elements in it have the same floating point type (e.g,
3239   SFmode).  128-bit quad-precision floats are excluded.  */
3240
3241static enum machine_mode
3242hfa_element_mode (type, nested)
3243     tree type;
3244     int nested;
3245{
3246  enum machine_mode element_mode = VOIDmode;
3247  enum machine_mode mode;
3248  enum tree_code code = TREE_CODE (type);
3249  int know_element_mode = 0;
3250  tree t;
3251
3252  switch (code)
3253    {
3254    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3255    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3256    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3257    case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
3258    case FUNCTION_TYPE:
3259      return VOIDmode;
3260
3261      /* Fortran complex types are supposed to be HFAs, so we need to handle
3262	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3263	 types though.  */
3264    case COMPLEX_TYPE:
3265      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3266	  && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3267	return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3268			      * BITS_PER_UNIT, MODE_FLOAT, 0);
3269      else
3270	return VOIDmode;
3271
3272    case REAL_TYPE:
3273      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3274	 mode if this is contained within an aggregate.  */
3275      if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3276	return TYPE_MODE (type);
3277      else
3278	return VOIDmode;
3279
3280    case ARRAY_TYPE:
3281      return hfa_element_mode (TREE_TYPE (type), 1);
3282
3283    case RECORD_TYPE:
3284    case UNION_TYPE:
3285    case QUAL_UNION_TYPE:
3286      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3287	{
3288	  if (TREE_CODE (t) != FIELD_DECL)
3289	    continue;
3290
3291	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3292	  if (know_element_mode)
3293	    {
3294	      if (mode != element_mode)
3295		return VOIDmode;
3296	    }
3297	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3298	    return VOIDmode;
3299	  else
3300	    {
3301	      know_element_mode = 1;
3302	      element_mode = mode;
3303	    }
3304	}
3305      return element_mode;
3306
3307    default:
3308      /* If we reach here, we probably have some front-end specific type
3309	 that the backend doesn't know about.  This can happen via the
3310	 aggregate_value_p call in init_function_start.  All we can do is
3311	 ignore unknown tree types.  */
3312      return VOIDmode;
3313    }
3314
3315  return VOIDmode;
3316}
3317
3318/* Return rtx for register where argument is passed, or zero if it is passed
3319   on the stack.  */
3320
3321/* ??? 128-bit quad-precision floats are always passed in general
3322   registers.  */
3323
3324rtx
3325ia64_function_arg (cum, mode, type, named, incoming)
3326     CUMULATIVE_ARGS *cum;
3327     enum machine_mode mode;
3328     tree type;
3329     int named;
3330     int incoming;
3331{
3332  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3333  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3334		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3335	       / UNITS_PER_WORD);
3336  int offset = 0;
3337  enum machine_mode hfa_mode = VOIDmode;
3338
3339  /* Integer and float arguments larger than 8 bytes start at the next even
3340     boundary.  Aggregates larger than 8 bytes start at the next even boundary
3341     if the aggregate has 16 byte alignment.  Net effect is that types with
3342     alignment greater than 8 start at the next even boundary.  */
3343  /* ??? The ABI does not specify how to handle aggregates with alignment from
3344     9 to 15 bytes, or greater than 16.   We handle them all as if they had
3345     16 byte alignment.  Such aggregates can occur only if gcc extensions are
3346     used.  */
3347  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3348       : (words > 1))
3349      && (cum->words & 1))
3350    offset = 1;
3351
3352  /* If all argument slots are used, then it must go on the stack.  */
3353  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3354    return 0;
3355
3356  /* Check for and handle homogeneous FP aggregates.  */
3357  if (type)
3358    hfa_mode = hfa_element_mode (type, 0);
3359
3360  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3361     and unprototyped hfas are passed specially.  */
3362  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3363    {
3364      rtx loc[16];
3365      int i = 0;
3366      int fp_regs = cum->fp_regs;
3367      int int_regs = cum->words + offset;
3368      int hfa_size = GET_MODE_SIZE (hfa_mode);
3369      int byte_size;
3370      int args_byte_size;
3371
3372      /* If prototyped, pass it in FR regs then GR regs.
3373	 If not prototyped, pass it in both FR and GR regs.
3374
3375	 If this is an SFmode aggregate, then it is possible to run out of
3376	 FR regs while GR regs are still left.  In that case, we pass the
3377	 remaining part in the GR regs.  */
3378
3379      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3380	 of the argument, the last FP register, or the last argument slot.  */
3381
3382      byte_size = ((mode == BLKmode)
3383		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3384      args_byte_size = int_regs * UNITS_PER_WORD;
3385      offset = 0;
3386      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3387	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3388	{
3389	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3390				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3391							      + fp_regs)),
3392				      GEN_INT (offset));
3393	  offset += hfa_size;
3394	  args_byte_size += hfa_size;
3395	  fp_regs++;
3396	}
3397
3398      /* If no prototype, then the whole thing must go in GR regs.  */
3399      if (! cum->prototype)
3400	offset = 0;
3401      /* If this is an SFmode aggregate, then we might have some left over
3402	 that needs to go in GR regs.  */
3403      else if (byte_size != offset)
3404	int_regs += offset / UNITS_PER_WORD;
3405
3406      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
3407
3408      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3409	{
3410	  enum machine_mode gr_mode = DImode;
3411
3412	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
3413	     then this goes in a GR reg left adjusted/little endian, right
3414	     adjusted/big endian.  */
3415	  /* ??? Currently this is handled wrong, because 4-byte hunks are
3416	     always right adjusted/little endian.  */
3417	  if (offset & 0x4)
3418	    gr_mode = SImode;
3419	  /* If we have an even 4 byte hunk because the aggregate is a
3420	     multiple of 4 bytes in size, then this goes in a GR reg right
3421	     adjusted/little endian.  */
3422	  else if (byte_size - offset == 4)
3423	    gr_mode = SImode;
3424	  /* Complex floats need to have float mode.  */
3425	  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3426	    gr_mode = hfa_mode;
3427
3428	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3429				      gen_rtx_REG (gr_mode, (basereg
3430							     + int_regs)),
3431				      GEN_INT (offset));
3432	  offset += GET_MODE_SIZE (gr_mode);
3433	  int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3434		      ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3435	}
3436
3437      /* If we ended up using just one location, just return that one loc.  */
3438      if (i == 1)
3439	return XEXP (loc[0], 0);
3440      else
3441	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3442    }
3443
3444  /* Integral and aggregates go in general registers.  If we have run out of
3445     FR registers, then FP values must also go in general registers.  This can
3446     happen when we have a SFmode HFA.  */
3447  else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3448          || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3449    {
3450      int byte_size = ((mode == BLKmode)
3451                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3452      if (BYTES_BIG_ENDIAN
3453	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3454	&& byte_size < UNITS_PER_WORD
3455	&& byte_size > 0)
3456	{
3457	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3458					  gen_rtx_REG (DImode,
3459						       (basereg + cum->words
3460							+ offset)),
3461					  const0_rtx);
3462	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3463	}
3464      else
3465	return gen_rtx_REG (mode, basereg + cum->words + offset);
3466
3467    }
3468
3469  /* If there is a prototype, then FP values go in a FR register when
3470     named, and in a GR registeer when unnamed.  */
3471  else if (cum->prototype)
3472    {
3473      if (! named)
3474	return gen_rtx_REG (mode, basereg + cum->words + offset);
3475      else
3476	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3477    }
3478  /* If there is no prototype, then FP values go in both FR and GR
3479     registers.  */
3480  else
3481    {
3482      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3483				      gen_rtx_REG (mode, (FR_ARG_FIRST
3484							  + cum->fp_regs)),
3485				      const0_rtx);
3486      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3487				      gen_rtx_REG (mode,
3488						   (basereg + cum->words
3489						    + offset)),
3490				      const0_rtx);
3491
3492      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3493    }
3494}
3495
3496/* Return number of words, at the beginning of the argument, that must be
3497   put in registers.  0 is the argument is entirely in registers or entirely
3498   in memory.  */
3499
3500int
3501ia64_function_arg_partial_nregs (cum, mode, type, named)
3502     CUMULATIVE_ARGS *cum;
3503     enum machine_mode mode;
3504     tree type;
3505     int named ATTRIBUTE_UNUSED;
3506{
3507  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3508		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3509	       / UNITS_PER_WORD);
3510  int offset = 0;
3511
3512  /* Arguments with alignment larger than 8 bytes start at the next even
3513     boundary.  */
3514  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3515       : (words > 1))
3516      && (cum->words & 1))
3517    offset = 1;
3518
3519  /* If all argument slots are used, then it must go on the stack.  */
3520  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3521    return 0;
3522
3523  /* It doesn't matter whether the argument goes in FR or GR regs.  If
3524     it fits within the 8 argument slots, then it goes entirely in
3525     registers.  If it extends past the last argument slot, then the rest
3526     goes on the stack.  */
3527
3528  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3529    return 0;
3530
3531  return MAX_ARGUMENT_SLOTS - cum->words - offset;
3532}
3533
3534/* Update CUM to point after this argument.  This is patterned after
3535   ia64_function_arg.  */
3536
3537void
3538ia64_function_arg_advance (cum, mode, type, named)
3539     CUMULATIVE_ARGS *cum;
3540     enum machine_mode mode;
3541     tree type;
3542     int named;
3543{
3544  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3545		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3546	       / UNITS_PER_WORD);
3547  int offset = 0;
3548  enum machine_mode hfa_mode = VOIDmode;
3549
3550  /* If all arg slots are already full, then there is nothing to do.  */
3551  if (cum->words >= MAX_ARGUMENT_SLOTS)
3552    return;
3553
3554  /* Arguments with alignment larger than 8 bytes start at the next even
3555     boundary.  */
3556  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3557       : (words > 1))
3558      && (cum->words & 1))
3559    offset = 1;
3560
3561  cum->words += words + offset;
3562
3563  /* Check for and handle homogeneous FP aggregates.  */
3564  if (type)
3565    hfa_mode = hfa_element_mode (type, 0);
3566
3567  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3568     and unprototyped hfas are passed specially.  */
3569  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3570    {
3571      int fp_regs = cum->fp_regs;
3572      /* This is the original value of cum->words + offset.  */
3573      int int_regs = cum->words - words;
3574      int hfa_size = GET_MODE_SIZE (hfa_mode);
3575      int byte_size;
3576      int args_byte_size;
3577
3578      /* If prototyped, pass it in FR regs then GR regs.
3579	 If not prototyped, pass it in both FR and GR regs.
3580
3581	 If this is an SFmode aggregate, then it is possible to run out of
3582	 FR regs while GR regs are still left.  In that case, we pass the
3583	 remaining part in the GR regs.  */
3584
3585      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3586	 of the argument, the last FP register, or the last argument slot.  */
3587
3588      byte_size = ((mode == BLKmode)
3589		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3590      args_byte_size = int_regs * UNITS_PER_WORD;
3591      offset = 0;
3592      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3593	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3594	{
3595	  offset += hfa_size;
3596	  args_byte_size += hfa_size;
3597	  fp_regs++;
3598	}
3599
3600      cum->fp_regs = fp_regs;
3601    }
3602
3603  /* Integral and aggregates go in general registers.  If we have run out of
3604     FR registers, then FP values must also go in general registers.  This can
3605     happen when we have a SFmode HFA.  */
3606  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3607    cum->int_regs = cum->words;
3608
3609  /* If there is a prototype, then FP values go in a FR register when
3610     named, and in a GR registeer when unnamed.  */
3611  else if (cum->prototype)
3612    {
3613      if (! named)
3614	cum->int_regs = cum->words;
3615      else
3616	/* ??? Complex types should not reach here.  */
3617	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3618    }
3619  /* If there is no prototype, then FP values go in both FR and GR
3620     registers.  */
3621  else
3622    {
3623      /* ??? Complex types should not reach here.  */
3624      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3625      cum->int_regs = cum->words;
3626    }
3627}
3628
3629/* Variable sized types are passed by reference.  */
3630/* ??? At present this is a GCC extension to the IA-64 ABI.  */
3631
3632int
3633ia64_function_arg_pass_by_reference (cum, mode, type, named)
3634     CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3635     enum machine_mode mode ATTRIBUTE_UNUSED;
3636     tree type;
3637     int named ATTRIBUTE_UNUSED;
3638{
3639  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3640}
3641
3642
3643/* Implement va_arg.  */
3644
3645rtx
3646ia64_va_arg (valist, type)
3647     tree valist, type;
3648{
3649  tree t;
3650
3651  /* Variable sized types are passed by reference.  */
3652  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3653    {
3654      rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3655      return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3656    }
3657
3658  /* Arguments with alignment larger than 8 bytes start at the next even
3659     boundary.  */
3660  if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3661    {
3662      t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3663		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3664      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3665		 build_int_2 (-2 * UNITS_PER_WORD, -1));
3666      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3667      TREE_SIDE_EFFECTS (t) = 1;
3668      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3669    }
3670
3671  return std_expand_builtin_va_arg (valist, type);
3672}
3673
3674/* Return 1 if function return value returned in memory.  Return 0 if it is
3675   in a register.  */
3676
3677int
3678ia64_return_in_memory (valtype)
3679     tree valtype;
3680{
3681  enum machine_mode mode;
3682  enum machine_mode hfa_mode;
3683  HOST_WIDE_INT byte_size;
3684
3685  mode = TYPE_MODE (valtype);
3686  byte_size = GET_MODE_SIZE (mode);
3687  if (mode == BLKmode)
3688    {
3689      byte_size = int_size_in_bytes (valtype);
3690      if (byte_size < 0)
3691	return 1;
3692    }
3693
3694  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
3695
3696  hfa_mode = hfa_element_mode (valtype, 0);
3697  if (hfa_mode != VOIDmode)
3698    {
3699      int hfa_size = GET_MODE_SIZE (hfa_mode);
3700
3701      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3702	return 1;
3703      else
3704	return 0;
3705    }
3706  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3707    return 1;
3708  else
3709    return 0;
3710}
3711
3712/* Return rtx for register that holds the function return value.  */
3713
3714rtx
3715ia64_function_value (valtype, func)
3716     tree valtype;
3717     tree func ATTRIBUTE_UNUSED;
3718{
3719  enum machine_mode mode;
3720  enum machine_mode hfa_mode;
3721
3722  mode = TYPE_MODE (valtype);
3723  hfa_mode = hfa_element_mode (valtype, 0);
3724
3725  if (hfa_mode != VOIDmode)
3726    {
3727      rtx loc[8];
3728      int i;
3729      int hfa_size;
3730      int byte_size;
3731      int offset;
3732
3733      hfa_size = GET_MODE_SIZE (hfa_mode);
3734      byte_size = ((mode == BLKmode)
3735		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3736      offset = 0;
3737      for (i = 0; offset < byte_size; i++)
3738	{
3739	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3740				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3741				      GEN_INT (offset));
3742	  offset += hfa_size;
3743	}
3744
3745      if (i == 1)
3746	return XEXP (loc[0], 0);
3747      else
3748	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3749    }
3750  else if (FLOAT_TYPE_P (valtype) &&
3751           ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3752    return gen_rtx_REG (mode, FR_ARG_FIRST);
3753  else
3754    {
3755      if (BYTES_BIG_ENDIAN
3756	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3757	{
3758	  rtx loc[8];
3759	  int offset;
3760	  int bytesize;
3761	  int i;
3762
3763	  offset = 0;
3764	  bytesize = int_size_in_bytes (valtype);
3765	  for (i = 0; offset < bytesize; i++)
3766	    {
3767	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3768					  gen_rtx_REG (DImode,
3769						       GR_RET_FIRST + i),
3770					  GEN_INT (offset));
3771	      offset += UNITS_PER_WORD;
3772	    }
3773	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3774	}
3775      else
3776	return gen_rtx_REG (mode, GR_RET_FIRST);
3777    }
3778}
3779
3780/* Print a memory address as an operand to reference that memory location.  */
3781
3782/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
3783   also call this from ia64_print_operand for memory addresses.  */
3784
3785void
3786ia64_print_operand_address (stream, address)
3787     FILE * stream ATTRIBUTE_UNUSED;
3788     rtx    address ATTRIBUTE_UNUSED;
3789{
3790}
3791
3792/* Print an operand to an assembler instruction.
3793   C	Swap and print a comparison operator.
3794   D	Print an FP comparison operator.
3795   E    Print 32 - constant, for SImode shifts as extract.
3796   e    Print 64 - constant, for DImode rotates.
3797   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3798        a floating point register emitted normally.
3799   I	Invert a predicate register by adding 1.
3800   J    Select the proper predicate register for a condition.
3801   j    Select the inverse predicate register for a condition.
3802   O	Append .acq for volatile load.
3803   P	Postincrement of a MEM.
3804   Q	Append .rel for volatile store.
3805   S	Shift amount for shladd instruction.
3806   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3807	for Intel assembler.
3808   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3809	for Intel assembler.
3810   r	Print register name, or constant 0 as r0.  HP compatibility for
3811	Linux kernel.  */
3812void
3813ia64_print_operand (file, x, code)
3814     FILE * file;
3815     rtx    x;
3816     int    code;
3817{
3818  const char *str;
3819
3820  switch (code)
3821    {
3822    case 0:
3823      /* Handled below.  */
3824      break;
3825
3826    case 'C':
3827      {
3828	enum rtx_code c = swap_condition (GET_CODE (x));
3829	fputs (GET_RTX_NAME (c), file);
3830	return;
3831      }
3832
3833    case 'D':
3834      switch (GET_CODE (x))
3835	{
3836	case NE:
3837	  str = "neq";
3838	  break;
3839	case UNORDERED:
3840	  str = "unord";
3841	  break;
3842	case ORDERED:
3843	  str = "ord";
3844	  break;
3845	default:
3846	  str = GET_RTX_NAME (GET_CODE (x));
3847	  break;
3848	}
3849      fputs (str, file);
3850      return;
3851
3852    case 'E':
3853      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3854      return;
3855
3856    case 'e':
3857      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3858      return;
3859
3860    case 'F':
3861      if (x == CONST0_RTX (GET_MODE (x)))
3862	str = reg_names [FR_REG (0)];
3863      else if (x == CONST1_RTX (GET_MODE (x)))
3864	str = reg_names [FR_REG (1)];
3865      else if (GET_CODE (x) == REG)
3866	str = reg_names [REGNO (x)];
3867      else
3868	abort ();
3869      fputs (str, file);
3870      return;
3871
3872    case 'I':
3873      fputs (reg_names [REGNO (x) + 1], file);
3874      return;
3875
3876    case 'J':
3877    case 'j':
3878      {
3879	unsigned int regno = REGNO (XEXP (x, 0));
3880	if (GET_CODE (x) == EQ)
3881	  regno += 1;
3882	if (code == 'j')
3883	  regno ^= 1;
3884        fputs (reg_names [regno], file);
3885      }
3886      return;
3887
3888    case 'O':
3889      if (MEM_VOLATILE_P (x))
3890	fputs(".acq", file);
3891      return;
3892
3893    case 'P':
3894      {
3895	HOST_WIDE_INT value;
3896
3897	switch (GET_CODE (XEXP (x, 0)))
3898	  {
3899	  default:
3900	    return;
3901
3902	  case POST_MODIFY:
3903	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3904	    if (GET_CODE (x) == CONST_INT)
3905	      value = INTVAL (x);
3906	    else if (GET_CODE (x) == REG)
3907	      {
3908		fprintf (file, ", %s", reg_names[REGNO (x)]);
3909		return;
3910	      }
3911	    else
3912	      abort ();
3913	    break;
3914
3915	  case POST_INC:
3916	    value = GET_MODE_SIZE (GET_MODE (x));
3917	    break;
3918
3919	  case POST_DEC:
3920	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3921	    break;
3922	  }
3923
3924	putc (',', file);
3925	putc (' ', file);
3926	fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3927	return;
3928      }
3929
3930    case 'Q':
3931      if (MEM_VOLATILE_P (x))
3932	fputs(".rel", file);
3933      return;
3934
3935    case 'S':
3936      fprintf (file, "%d", exact_log2 (INTVAL (x)));
3937      return;
3938
3939    case 'T':
3940      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3941	{
3942	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3943	  return;
3944	}
3945      break;
3946
3947    case 'U':
3948      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3949	{
3950	  const char *prefix = "0x";
3951	  if (INTVAL (x) & 0x80000000)
3952	    {
3953	      fprintf (file, "0xffffffff");
3954	      prefix = "";
3955	    }
3956	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3957	  return;
3958	}
3959      break;
3960
3961    case 'r':
3962      /* If this operand is the constant zero, write it as register zero.
3963	 Any register, zero, or CONST_INT value is OK here.  */
3964      if (GET_CODE (x) == REG)
3965	fputs (reg_names[REGNO (x)], file);
3966      else if (x == CONST0_RTX (GET_MODE (x)))
3967	fputs ("r0", file);
3968      else if (GET_CODE (x) == CONST_INT)
3969	output_addr_const (file, x);
3970      else
3971	output_operand_lossage ("invalid %%r value");
3972      return;
3973
3974    case '+':
3975      {
3976	const char *which;
3977
3978	/* For conditional branches, returns or calls, substitute
3979	   sptk, dptk, dpnt, or spnt for %s.  */
3980	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3981	if (x)
3982	  {
3983	    int pred_val = INTVAL (XEXP (x, 0));
3984
3985	    /* Guess top and bottom 10% statically predicted.  */
3986	    if (pred_val < REG_BR_PROB_BASE / 50)
3987	      which = ".spnt";
3988	    else if (pred_val < REG_BR_PROB_BASE / 2)
3989	      which = ".dpnt";
3990	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3991	      which = ".dptk";
3992	    else
3993	      which = ".sptk";
3994	  }
3995	else if (GET_CODE (current_output_insn) == CALL_INSN)
3996	  which = ".sptk";
3997	else
3998	  which = ".dptk";
3999
4000	fputs (which, file);
4001	return;
4002      }
4003
4004    case ',':
4005      x = current_insn_predicate;
4006      if (x)
4007	{
4008	  unsigned int regno = REGNO (XEXP (x, 0));
4009	  if (GET_CODE (x) == EQ)
4010	    regno += 1;
4011          fprintf (file, "(%s) ", reg_names [regno]);
4012	}
4013      return;
4014
4015    default:
4016      output_operand_lossage ("ia64_print_operand: unknown code");
4017      return;
4018    }
4019
4020  switch (GET_CODE (x))
4021    {
4022      /* This happens for the spill/restore instructions.  */
4023    case POST_INC:
4024    case POST_DEC:
4025    case POST_MODIFY:
4026      x = XEXP (x, 0);
4027      /* ... fall through ...  */
4028
4029    case REG:
4030      fputs (reg_names [REGNO (x)], file);
4031      break;
4032
4033    case MEM:
4034      {
4035	rtx addr = XEXP (x, 0);
4036	if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4037	  addr = XEXP (addr, 0);
4038	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4039	break;
4040      }
4041
4042    default:
4043      output_addr_const (file, x);
4044      break;
4045    }
4046
4047  return;
4048}
4049
4050/* Calulate the cost of moving data from a register in class FROM to
4051   one in class TO, using MODE.  */
4052
4053int
4054ia64_register_move_cost (mode, from, to)
4055     enum machine_mode mode;
4056     enum reg_class from, to;
4057{
4058  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4059  if (to == ADDL_REGS)
4060    to = GR_REGS;
4061  if (from == ADDL_REGS)
4062    from = GR_REGS;
4063
4064  /* All costs are symmetric, so reduce cases by putting the
4065     lower number class as the destination.  */
4066  if (from < to)
4067    {
4068      enum reg_class tmp = to;
4069      to = from, from = tmp;
4070    }
4071
4072  /* Moving from FR<->GR in TFmode must be more expensive than 2,
4073     so that we get secondary memory reloads.  Between FR_REGS,
4074     we have to make this at least as expensive as MEMORY_MOVE_COST
4075     to avoid spectacularly poor register class preferencing.  */
4076  if (mode == TFmode)
4077    {
4078      if (to != GR_REGS || from != GR_REGS)
4079        return MEMORY_MOVE_COST (mode, to, 0);
4080      else
4081	return 3;
4082    }
4083
4084  switch (to)
4085    {
4086    case PR_REGS:
4087      /* Moving between PR registers takes two insns.  */
4088      if (from == PR_REGS)
4089	return 3;
4090      /* Moving between PR and anything but GR is impossible.  */
4091      if (from != GR_REGS)
4092	return MEMORY_MOVE_COST (mode, to, 0);
4093      break;
4094
4095    case BR_REGS:
4096      /* Moving between BR and anything but GR is impossible.  */
4097      if (from != GR_REGS && from != GR_AND_BR_REGS)
4098	return MEMORY_MOVE_COST (mode, to, 0);
4099      break;
4100
4101    case AR_I_REGS:
4102    case AR_M_REGS:
4103      /* Moving between AR and anything but GR is impossible.  */
4104      if (from != GR_REGS)
4105	return MEMORY_MOVE_COST (mode, to, 0);
4106      break;
4107
4108    case GR_REGS:
4109    case FR_REGS:
4110    case GR_AND_FR_REGS:
4111    case GR_AND_BR_REGS:
4112    case ALL_REGS:
4113      break;
4114
4115    default:
4116      abort ();
4117    }
4118
4119  return 2;
4120}
4121
4122/* This function returns the register class required for a secondary
4123   register when copying between one of the registers in CLASS, and X,
4124   using MODE.  A return value of NO_REGS means that no secondary register
4125   is required.  */
4126
4127enum reg_class
4128ia64_secondary_reload_class (class, mode, x)
4129     enum reg_class class;
4130     enum machine_mode mode ATTRIBUTE_UNUSED;
4131     rtx x;
4132{
4133  int regno = -1;
4134
4135  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4136    regno = true_regnum (x);
4137
4138  switch (class)
4139    {
4140    case BR_REGS:
4141    case AR_M_REGS:
4142    case AR_I_REGS:
4143      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4144	 interaction.  We end up with two pseudos with overlapping lifetimes
4145	 both of which are equiv to the same constant, and both which need
4146	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4147	 changes depending on the path length, which means the qty_first_reg
4148	 check in make_regs_eqv can give different answers at different times.
4149	 At some point I'll probably need a reload_indi pattern to handle
4150	 this.
4151
4152	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4153	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4154	 non-general registers for good measure.  */
4155      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4156	return GR_REGS;
4157
4158      /* This is needed if a pseudo used as a call_operand gets spilled to a
4159	 stack slot.  */
4160      if (GET_CODE (x) == MEM)
4161	return GR_REGS;
4162      break;
4163
4164    case FR_REGS:
4165      /* Need to go through general regsters to get to other class regs.  */
4166      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4167	return GR_REGS;
4168
4169      /* This can happen when a paradoxical subreg is an operand to the
4170	 muldi3 pattern.  */
4171      /* ??? This shouldn't be necessary after instruction scheduling is
4172	 enabled, because paradoxical subregs are not accepted by
4173	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4174	 stop the paradoxical subreg stupidity in the *_operand functions
4175	 in recog.c.  */
4176      if (GET_CODE (x) == MEM
4177	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4178	      || GET_MODE (x) == QImode))
4179	return GR_REGS;
4180
4181      /* This can happen because of the ior/and/etc patterns that accept FP
4182	 registers as operands.  If the third operand is a constant, then it
4183	 needs to be reloaded into a FP register.  */
4184      if (GET_CODE (x) == CONST_INT)
4185	return GR_REGS;
4186
4187      /* This can happen because of register elimination in a muldi3 insn.
4188	 E.g. `26107 * (unsigned long)&u'.  */
4189      if (GET_CODE (x) == PLUS)
4190	return GR_REGS;
4191      break;
4192
4193    case PR_REGS:
4194      /* ??? This happens if we cse/gcse a BImode value across a call,
4195	 and the function has a nonlocal goto.  This is because global
4196	 does not allocate call crossing pseudos to hard registers when
4197	 current_function_has_nonlocal_goto is true.  This is relatively
4198	 common for C++ programs that use exceptions.  To reproduce,
4199	 return NO_REGS and compile libstdc++.  */
4200      if (GET_CODE (x) == MEM)
4201	return GR_REGS;
4202
4203      /* This can happen when we take a BImode subreg of a DImode value,
4204	 and that DImode value winds up in some non-GR register.  */
4205      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4206	return GR_REGS;
4207      break;
4208
4209    case GR_REGS:
4210      /* Since we have no offsettable memory addresses, we need a temporary
4211	 to hold the address of the second word.  */
4212      if (mode == TImode)
4213	return GR_REGS;
4214      break;
4215
4216    default:
4217      break;
4218    }
4219
4220  return NO_REGS;
4221}
4222
4223/* Emit text to declare externally defined variables and functions, because
4224   the Intel assembler does not support undefined externals.  */
4225
4226void
4227ia64_asm_output_external (file, decl, name)
4228     FILE *file;
4229     tree decl;
4230     const char *name;
4231{
4232  int save_referenced;
4233
4234  /* GNU as does not need anything here, but the HP linker does need
4235     something for external functions.  */
4236
4237  if (TARGET_GNU_AS
4238      && (!TARGET_HPUX_LD
4239	  || TREE_CODE (decl) != FUNCTION_DECL
4240	  || strstr(name, "__builtin_") == name))
4241    return;
4242
4243  /* ??? The Intel assembler creates a reference that needs to be satisfied by
4244     the linker when we do this, so we need to be careful not to do this for
4245     builtin functions which have no library equivalent.  Unfortunately, we
4246     can't tell here whether or not a function will actually be called by
4247     expand_expr, so we pull in library functions even if we may not need
4248     them later.  */
4249  if (! strcmp (name, "__builtin_next_arg")
4250      || ! strcmp (name, "alloca")
4251      || ! strcmp (name, "__builtin_constant_p")
4252      || ! strcmp (name, "__builtin_args_info"))
4253    return;
4254
4255  if (TARGET_HPUX_LD)
4256    ia64_hpux_add_extern_decl (name);
4257  else
4258    {
4259      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4260         restore it.  */
4261      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4262      if (TREE_CODE (decl) == FUNCTION_DECL)
4263        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4264      (*targetm.asm_out.globalize_label) (file, name);
4265      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4266    }
4267}
4268
4269/* Parse the -mfixed-range= option string.  */
4270
4271static void
4272fix_range (const_str)
4273     const char *const_str;
4274{
4275  int i, first, last;
4276  char *str, *dash, *comma;
4277
4278  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4279     REG2 are either register names or register numbers.  The effect
4280     of this option is to mark the registers in the range from REG1 to
4281     REG2 as ``fixed'' so they won't be used by the compiler.  This is
4282     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
4283
4284  i = strlen (const_str);
4285  str = (char *) alloca (i + 1);
4286  memcpy (str, const_str, i + 1);
4287
4288  while (1)
4289    {
4290      dash = strchr (str, '-');
4291      if (!dash)
4292	{
4293	  warning ("value of -mfixed-range must have form REG1-REG2");
4294	  return;
4295	}
4296      *dash = '\0';
4297
4298      comma = strchr (dash + 1, ',');
4299      if (comma)
4300	*comma = '\0';
4301
4302      first = decode_reg_name (str);
4303      if (first < 0)
4304	{
4305	  warning ("unknown register name: %s", str);
4306	  return;
4307	}
4308
4309      last = decode_reg_name (dash + 1);
4310      if (last < 0)
4311	{
4312	  warning ("unknown register name: %s", dash + 1);
4313	  return;
4314	}
4315
4316      *dash = '-';
4317
4318      if (first > last)
4319	{
4320	  warning ("%s-%s is an empty range", str, dash + 1);
4321	  return;
4322	}
4323
4324      for (i = first; i <= last; ++i)
4325	fixed_regs[i] = call_used_regs[i] = 1;
4326
4327      if (!comma)
4328	break;
4329
4330      *comma = ',';
4331      str = comma + 1;
4332    }
4333}
4334
4335static struct machine_function *
4336ia64_init_machine_status ()
4337{
4338  return ggc_alloc_cleared (sizeof (struct machine_function));
4339}
4340
4341/* Handle TARGET_OPTIONS switches.  */
4342
4343void
4344ia64_override_options ()
4345{
4346  if (TARGET_AUTO_PIC)
4347    target_flags |= MASK_CONST_GP;
4348
4349  if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4350    {
4351      warning ("cannot optimize floating point division for both latency and throughput");
4352      target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4353    }
4354
4355  if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4356    {
4357      warning ("cannot optimize integer division for both latency and throughput");
4358      target_flags &= ~MASK_INLINE_INT_DIV_THR;
4359    }
4360
4361  if (ia64_fixed_range_string)
4362    fix_range (ia64_fixed_range_string);
4363
4364  if (ia64_tls_size_string)
4365    {
4366      char *end;
4367      unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4368      if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4369	error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4370      else
4371	ia64_tls_size = tmp;
4372    }
4373
4374  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4375  flag_schedule_insns_after_reload = 0;
4376
4377  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4378
4379  init_machine_status = ia64_init_machine_status;
4380
4381  /* Tell the compiler which flavor of TFmode we're using.  */
4382  if (INTEL_EXTENDED_IEEE_FORMAT)
4383    real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4384}
4385
4386static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4387static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4388static enum attr_type ia64_safe_type PARAMS((rtx));
4389
4390static enum attr_itanium_requires_unit0
4391ia64_safe_itanium_requires_unit0 (insn)
4392     rtx insn;
4393{
4394  if (recog_memoized (insn) >= 0)
4395    return get_attr_itanium_requires_unit0 (insn);
4396  else
4397    return ITANIUM_REQUIRES_UNIT0_NO;
4398}
4399
4400static enum attr_itanium_class
4401ia64_safe_itanium_class (insn)
4402     rtx insn;
4403{
4404  if (recog_memoized (insn) >= 0)
4405    return get_attr_itanium_class (insn);
4406  else
4407    return ITANIUM_CLASS_UNKNOWN;
4408}
4409
4410static enum attr_type
4411ia64_safe_type (insn)
4412     rtx insn;
4413{
4414  if (recog_memoized (insn) >= 0)
4415    return get_attr_type (insn);
4416  else
4417    return TYPE_UNKNOWN;
4418}
4419
4420/* The following collection of routines emit instruction group stop bits as
4421   necessary to avoid dependencies.  */
4422
4423/* Need to track some additional registers as far as serialization is
4424   concerned so we can properly handle br.call and br.ret.  We could
4425   make these registers visible to gcc, but since these registers are
4426   never explicitly used in gcc generated code, it seems wasteful to
4427   do so (plus it would make the call and return patterns needlessly
4428   complex).  */
4429#define REG_GP		(GR_REG (1))
4430#define REG_RP		(BR_REG (0))
4431#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
4432/* This is used for volatile asms which may require a stop bit immediately
4433   before and after them.  */
4434#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
4435#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
4436#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
4437
4438/* For each register, we keep track of how it has been written in the
4439   current instruction group.
4440
4441   If a register is written unconditionally (no qualifying predicate),
4442   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4443
4444   If a register is written if its qualifying predicate P is true, we
4445   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
4446   may be written again by the complement of P (P^1) and when this happens,
4447   WRITE_COUNT gets set to 2.
4448
4449   The result of this is that whenever an insn attempts to write a register
4450   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4451
4452   If a predicate register is written by a floating-point insn, we set
4453   WRITTEN_BY_FP to true.
4454
4455   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4456   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
4457
4458struct reg_write_state
4459{
4460  unsigned int write_count : 2;
4461  unsigned int first_pred : 16;
4462  unsigned int written_by_fp : 1;
4463  unsigned int written_by_and : 1;
4464  unsigned int written_by_or : 1;
4465};
4466
4467/* Cumulative info for the current instruction group.  */
4468struct reg_write_state rws_sum[NUM_REGS];
4469/* Info for the current instruction.  This gets copied to rws_sum after a
4470   stop bit is emitted.  */
4471struct reg_write_state rws_insn[NUM_REGS];
4472
4473/* Indicates whether this is the first instruction after a stop bit,
4474   in which case we don't need another stop bit.  Without this, we hit
4475   the abort in ia64_variable_issue when scheduling an alloc.  */
4476static int first_instruction;
4477
4478/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4479   RTL for one instruction.  */
4480struct reg_flags
4481{
4482  unsigned int is_write : 1;	/* Is register being written?  */
4483  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
4484  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
4485  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
4486  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
4487  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
4488};
4489
4490static void rws_update PARAMS ((struct reg_write_state *, int,
4491				struct reg_flags, int));
4492static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4493static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4494static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4495static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4496static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4497static void init_insn_group_barriers PARAMS ((void));
4498static int group_barrier_needed_p PARAMS ((rtx));
4499static int safe_group_barrier_needed_p PARAMS ((rtx));
4500
4501/* Update *RWS for REGNO, which is being written by the current instruction,
4502   with predicate PRED, and associated register flags in FLAGS.  */
4503
4504static void
4505rws_update (rws, regno, flags, pred)
4506     struct reg_write_state *rws;
4507     int regno;
4508     struct reg_flags flags;
4509     int pred;
4510{
4511  if (pred)
4512    rws[regno].write_count++;
4513  else
4514    rws[regno].write_count = 2;
4515  rws[regno].written_by_fp |= flags.is_fp;
4516  /* ??? Not tracking and/or across differing predicates.  */
4517  rws[regno].written_by_and = flags.is_and;
4518  rws[regno].written_by_or = flags.is_or;
4519  rws[regno].first_pred = pred;
4520}
4521
4522/* Handle an access to register REGNO of type FLAGS using predicate register
4523   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
4524   a dependency with an earlier instruction in the same group.  */
4525
4526static int
4527rws_access_regno (regno, flags, pred)
4528     int regno;
4529     struct reg_flags flags;
4530     int pred;
4531{
4532  int need_barrier = 0;
4533
4534  if (regno >= NUM_REGS)
4535    abort ();
4536
4537  if (! PR_REGNO_P (regno))
4538    flags.is_and = flags.is_or = 0;
4539
4540  if (flags.is_write)
4541    {
4542      int write_count;
4543
4544      /* One insn writes same reg multiple times?  */
4545      if (rws_insn[regno].write_count > 0)
4546	abort ();
4547
4548      /* Update info for current instruction.  */
4549      rws_update (rws_insn, regno, flags, pred);
4550      write_count = rws_sum[regno].write_count;
4551
4552      switch (write_count)
4553	{
4554	case 0:
4555	  /* The register has not been written yet.  */
4556	  rws_update (rws_sum, regno, flags, pred);
4557	  break;
4558
4559	case 1:
4560	  /* The register has been written via a predicate.  If this is
4561	     not a complementary predicate, then we need a barrier.  */
4562	  /* ??? This assumes that P and P+1 are always complementary
4563	     predicates for P even.  */
4564	  if (flags.is_and && rws_sum[regno].written_by_and)
4565	    ;
4566	  else if (flags.is_or && rws_sum[regno].written_by_or)
4567	    ;
4568	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
4569	    need_barrier = 1;
4570	  rws_update (rws_sum, regno, flags, pred);
4571	  break;
4572
4573	case 2:
4574	  /* The register has been unconditionally written already.  We
4575	     need a barrier.  */
4576	  if (flags.is_and && rws_sum[regno].written_by_and)
4577	    ;
4578	  else if (flags.is_or && rws_sum[regno].written_by_or)
4579	    ;
4580	  else
4581	    need_barrier = 1;
4582	  rws_sum[regno].written_by_and = flags.is_and;
4583	  rws_sum[regno].written_by_or = flags.is_or;
4584	  break;
4585
4586	default:
4587	  abort ();
4588	}
4589    }
4590  else
4591    {
4592      if (flags.is_branch)
4593	{
4594	  /* Branches have several RAW exceptions that allow to avoid
4595	     barriers.  */
4596
4597	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4598	    /* RAW dependencies on branch regs are permissible as long
4599	       as the writer is a non-branch instruction.  Since we
4600	       never generate code that uses a branch register written
4601	       by a branch instruction, handling this case is
4602	       easy.  */
4603	    return 0;
4604
4605	  if (REGNO_REG_CLASS (regno) == PR_REGS
4606	      && ! rws_sum[regno].written_by_fp)
4607	    /* The predicates of a branch are available within the
4608	       same insn group as long as the predicate was written by
4609	       something other than a floating-point instruction.  */
4610	    return 0;
4611	}
4612
4613      if (flags.is_and && rws_sum[regno].written_by_and)
4614	return 0;
4615      if (flags.is_or && rws_sum[regno].written_by_or)
4616	return 0;
4617
4618      switch (rws_sum[regno].write_count)
4619	{
4620	case 0:
4621	  /* The register has not been written yet.  */
4622	  break;
4623
4624	case 1:
4625	  /* The register has been written via a predicate.  If this is
4626	     not a complementary predicate, then we need a barrier.  */
4627	  /* ??? This assumes that P and P+1 are always complementary
4628	     predicates for P even.  */
4629	  if ((rws_sum[regno].first_pred ^ 1) != pred)
4630	    need_barrier = 1;
4631	  break;
4632
4633	case 2:
4634	  /* The register has been unconditionally written already.  We
4635	     need a barrier.  */
4636	  need_barrier = 1;
4637	  break;
4638
4639	default:
4640	  abort ();
4641	}
4642    }
4643
4644  return need_barrier;
4645}
4646
4647static int
4648rws_access_reg (reg, flags, pred)
4649     rtx reg;
4650     struct reg_flags flags;
4651     int pred;
4652{
4653  int regno = REGNO (reg);
4654  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4655
4656  if (n == 1)
4657    return rws_access_regno (regno, flags, pred);
4658  else
4659    {
4660      int need_barrier = 0;
4661      while (--n >= 0)
4662	need_barrier |= rws_access_regno (regno + n, flags, pred);
4663      return need_barrier;
4664    }
4665}
4666
4667/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4668   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
4669
4670static void
4671update_set_flags (x, pflags, ppred, pcond)
4672     rtx x;
4673     struct reg_flags *pflags;
4674     int *ppred;
4675     rtx *pcond;
4676{
4677  rtx src = SET_SRC (x);
4678
4679  *pcond = 0;
4680
4681  switch (GET_CODE (src))
4682    {
4683    case CALL:
4684      return;
4685
4686    case IF_THEN_ELSE:
4687      if (SET_DEST (x) == pc_rtx)
4688	/* X is a conditional branch.  */
4689	return;
4690      else
4691	{
4692	  int is_complemented = 0;
4693
4694	  /* X is a conditional move.  */
4695	  rtx cond = XEXP (src, 0);
4696	  if (GET_CODE (cond) == EQ)
4697	    is_complemented = 1;
4698	  cond = XEXP (cond, 0);
4699	  if (GET_CODE (cond) != REG
4700	      && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4701	    abort ();
4702	  *pcond = cond;
4703	  if (XEXP (src, 1) == SET_DEST (x)
4704	      || XEXP (src, 2) == SET_DEST (x))
4705	    {
4706	      /* X is a conditional move that conditionally writes the
4707		 destination.  */
4708
4709	      /* We need another complement in this case.  */
4710	      if (XEXP (src, 1) == SET_DEST (x))
4711		is_complemented = ! is_complemented;
4712
4713	      *ppred = REGNO (cond);
4714	      if (is_complemented)
4715		++*ppred;
4716	    }
4717
4718	  /* ??? If this is a conditional write to the dest, then this
4719	     instruction does not actually read one source.  This probably
4720	     doesn't matter, because that source is also the dest.  */
4721	  /* ??? Multiple writes to predicate registers are allowed
4722	     if they are all AND type compares, or if they are all OR
4723	     type compares.  We do not generate such instructions
4724	     currently.  */
4725	}
4726      /* ... fall through ...  */
4727
4728    default:
4729      if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4730	  && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4731	/* Set pflags->is_fp to 1 so that we know we're dealing
4732	   with a floating point comparison when processing the
4733	   destination of the SET.  */
4734	pflags->is_fp = 1;
4735
4736      /* Discover if this is a parallel comparison.  We only handle
4737	 and.orcm and or.andcm at present, since we must retain a
4738	 strict inverse on the predicate pair.  */
4739      else if (GET_CODE (src) == AND)
4740	pflags->is_and = 1;
4741      else if (GET_CODE (src) == IOR)
4742	pflags->is_or = 1;
4743
4744      break;
4745    }
4746}
4747
4748/* Subroutine of rtx_needs_barrier; this function determines whether the
4749   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
4750   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
4751   for this insn.  */
4752
4753static int
4754set_src_needs_barrier (x, flags, pred, cond)
4755     rtx x;
4756     struct reg_flags flags;
4757     int pred;
4758     rtx cond;
4759{
4760  int need_barrier = 0;
4761  rtx dst;
4762  rtx src = SET_SRC (x);
4763
4764  if (GET_CODE (src) == CALL)
4765    /* We don't need to worry about the result registers that
4766       get written by subroutine call.  */
4767    return rtx_needs_barrier (src, flags, pred);
4768  else if (SET_DEST (x) == pc_rtx)
4769    {
4770      /* X is a conditional branch.  */
4771      /* ??? This seems redundant, as the caller sets this bit for
4772	 all JUMP_INSNs.  */
4773      flags.is_branch = 1;
4774      return rtx_needs_barrier (src, flags, pred);
4775    }
4776
4777  need_barrier = rtx_needs_barrier (src, flags, pred);
4778
4779  /* This instruction unconditionally uses a predicate register.  */
4780  if (cond)
4781    need_barrier |= rws_access_reg (cond, flags, 0);
4782
4783  dst = SET_DEST (x);
4784  if (GET_CODE (dst) == ZERO_EXTRACT)
4785    {
4786      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4787      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4788      dst = XEXP (dst, 0);
4789    }
4790  return need_barrier;
4791}
4792
4793/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4794   Return 1 is this access creates a dependency with an earlier instruction
4795   in the same group.  */
4796
4797static int
4798rtx_needs_barrier (x, flags, pred)
4799     rtx x;
4800     struct reg_flags flags;
4801     int pred;
4802{
4803  int i, j;
4804  int is_complemented = 0;
4805  int need_barrier = 0;
4806  const char *format_ptr;
4807  struct reg_flags new_flags;
4808  rtx cond = 0;
4809
4810  if (! x)
4811    return 0;
4812
4813  new_flags = flags;
4814
4815  switch (GET_CODE (x))
4816    {
4817    case SET:
4818      update_set_flags (x, &new_flags, &pred, &cond);
4819      need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4820      if (GET_CODE (SET_SRC (x)) != CALL)
4821	{
4822	  new_flags.is_write = 1;
4823	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4824	}
4825      break;
4826
4827    case CALL:
4828      new_flags.is_write = 0;
4829      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4830
4831      /* Avoid multiple register writes, in case this is a pattern with
4832	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
4833      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4834	{
4835	  new_flags.is_write = 1;
4836	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4837	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4838	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4839	}
4840      break;
4841
4842    case COND_EXEC:
4843      /* X is a predicated instruction.  */
4844
4845      cond = COND_EXEC_TEST (x);
4846      if (pred)
4847	abort ();
4848      need_barrier = rtx_needs_barrier (cond, flags, 0);
4849
4850      if (GET_CODE (cond) == EQ)
4851	is_complemented = 1;
4852      cond = XEXP (cond, 0);
4853      if (GET_CODE (cond) != REG
4854	  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4855	abort ();
4856      pred = REGNO (cond);
4857      if (is_complemented)
4858	++pred;
4859
4860      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4861      return need_barrier;
4862
4863    case CLOBBER:
4864    case USE:
4865      /* Clobber & use are for earlier compiler-phases only.  */
4866      break;
4867
4868    case ASM_OPERANDS:
4869    case ASM_INPUT:
4870      /* We always emit stop bits for traditional asms.  We emit stop bits
4871	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
4872      if (GET_CODE (x) != ASM_OPERANDS
4873	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4874	{
4875	  /* Avoid writing the register multiple times if we have multiple
4876	     asm outputs.  This avoids an abort in rws_access_reg.  */
4877	  if (! rws_insn[REG_VOLATILE].write_count)
4878	    {
4879	      new_flags.is_write = 1;
4880	      rws_access_regno (REG_VOLATILE, new_flags, pred);
4881	    }
4882	  return 1;
4883	}
4884
4885      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4886	 We can not just fall through here since then we would be confused
4887	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4888	 traditional asms unlike their normal usage.  */
4889
4890      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4891	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4892	  need_barrier = 1;
4893      break;
4894
4895    case PARALLEL:
4896      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4897	{
4898	  rtx pat = XVECEXP (x, 0, i);
4899	  if (GET_CODE (pat) == SET)
4900	    {
4901	      update_set_flags (pat, &new_flags, &pred, &cond);
4902	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4903	    }
4904	  else if (GET_CODE (pat) == USE
4905		   || GET_CODE (pat) == CALL
4906		   || GET_CODE (pat) == ASM_OPERANDS)
4907	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4908	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4909	    abort ();
4910	}
4911      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4912	{
4913	  rtx pat = XVECEXP (x, 0, i);
4914	  if (GET_CODE (pat) == SET)
4915	    {
4916	      if (GET_CODE (SET_SRC (pat)) != CALL)
4917		{
4918		  new_flags.is_write = 1;
4919		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4920						     pred);
4921		}
4922	    }
4923	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4924	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4925	}
4926      break;
4927
4928    case SUBREG:
4929      x = SUBREG_REG (x);
4930      /* FALLTHRU */
4931    case REG:
4932      if (REGNO (x) == AR_UNAT_REGNUM)
4933	{
4934	  for (i = 0; i < 64; ++i)
4935	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4936	}
4937      else
4938	need_barrier = rws_access_reg (x, flags, pred);
4939      break;
4940
4941    case MEM:
4942      /* Find the regs used in memory address computation.  */
4943      new_flags.is_write = 0;
4944      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4945      break;
4946
4947    case CONST_INT:   case CONST_DOUBLE:
4948    case SYMBOL_REF:  case LABEL_REF:     case CONST:
4949      break;
4950
4951      /* Operators with side-effects.  */
4952    case POST_INC:    case POST_DEC:
4953      if (GET_CODE (XEXP (x, 0)) != REG)
4954	abort ();
4955
4956      new_flags.is_write = 0;
4957      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4958      new_flags.is_write = 1;
4959      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4960      break;
4961
4962    case POST_MODIFY:
4963      if (GET_CODE (XEXP (x, 0)) != REG)
4964	abort ();
4965
4966      new_flags.is_write = 0;
4967      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4968      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4969      new_flags.is_write = 1;
4970      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4971      break;
4972
4973      /* Handle common unary and binary ops for efficiency.  */
4974    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
4975    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
4976    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
4977    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
4978    case NE:       case EQ:      case GE:      case GT:        case LE:
4979    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
4980      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4981      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4982      break;
4983
4984    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
4985    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
4986    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
4987    case SQRT:     case FFS:
4988      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4989      break;
4990
4991    case UNSPEC:
4992      switch (XINT (x, 1))
4993	{
4994	case UNSPEC_LTOFF_DTPMOD:
4995	case UNSPEC_LTOFF_DTPREL:
4996	case UNSPEC_DTPREL:
4997	case UNSPEC_LTOFF_TPREL:
4998	case UNSPEC_TPREL:
4999	case UNSPEC_PRED_REL_MUTEX:
5000	case UNSPEC_PIC_CALL:
5001        case UNSPEC_MF:
5002        case UNSPEC_FETCHADD_ACQ:
5003	case UNSPEC_BSP_VALUE:
5004	case UNSPEC_FLUSHRS:
5005	case UNSPEC_BUNDLE_SELECTOR:
5006          break;
5007
5008	case UNSPEC_GR_SPILL:
5009	case UNSPEC_GR_RESTORE:
5010	  {
5011	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5012	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5013
5014	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5015	    new_flags.is_write = (XINT (x, 1) == 1);
5016	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5017					      new_flags, pred);
5018	    break;
5019	  }
5020
5021	case UNSPEC_FR_SPILL:
5022	case UNSPEC_FR_RESTORE:
5023	case UNSPEC_POPCNT:
5024	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5025	  break;
5026
5027        case UNSPEC_ADDP4:
5028	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5029	  break;
5030
5031	case UNSPEC_FR_RECIP_APPROX:
5032	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5033	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5034	  break;
5035
5036        case UNSPEC_CMPXCHG_ACQ:
5037	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5038	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5039	  break;
5040
5041	default:
5042	  abort ();
5043	}
5044      break;
5045
5046    case UNSPEC_VOLATILE:
5047      switch (XINT (x, 1))
5048	{
5049	case UNSPECV_ALLOC:
5050	  /* Alloc must always be the first instruction of a group.
5051	     We force this by always returning true.  */
5052	  /* ??? We might get better scheduling if we explicitly check for
5053	     input/local/output register dependencies, and modify the
5054	     scheduler so that alloc is always reordered to the start of
5055	     the current group.  We could then eliminate all of the
5056	     first_instruction code.  */
5057	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5058
5059	  new_flags.is_write = 1;
5060	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5061	  return 1;
5062
5063	case UNSPECV_SET_BSP:
5064	  need_barrier = 1;
5065          break;
5066
5067	case UNSPECV_BLOCKAGE:
5068	case UNSPECV_INSN_GROUP_BARRIER:
5069	case UNSPECV_BREAK:
5070	case UNSPECV_PSAC_ALL:
5071	case UNSPECV_PSAC_NORMAL:
5072	  return 0;
5073
5074	default:
5075	  abort ();
5076	}
5077      break;
5078
5079    case RETURN:
5080      new_flags.is_write = 0;
5081      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5082      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5083
5084      new_flags.is_write = 1;
5085      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5086      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5087      break;
5088
5089    default:
5090      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5091      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5092	switch (format_ptr[i])
5093	  {
5094	  case '0':	/* unused field */
5095	  case 'i':	/* integer */
5096	  case 'n':	/* note */
5097	  case 'w':	/* wide integer */
5098	  case 's':	/* pointer to string */
5099	  case 'S':	/* optional pointer to string */
5100	    break;
5101
5102	  case 'e':
5103	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5104	      need_barrier = 1;
5105	    break;
5106
5107	  case 'E':
5108	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5109	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5110		need_barrier = 1;
5111	    break;
5112
5113	  default:
5114	    abort ();
5115	  }
5116      break;
5117    }
5118  return need_barrier;
5119}
5120
5121/* Clear out the state for group_barrier_needed_p at the start of a
5122   sequence of insns.  */
5123
5124static void
5125init_insn_group_barriers ()
5126{
5127  memset (rws_sum, 0, sizeof (rws_sum));
5128  first_instruction = 1;
5129}
5130
5131/* Given the current state, recorded by previous calls to this function,
5132   determine whether a group barrier (a stop bit) is necessary before INSN.
5133   Return nonzero if so.  */
5134
5135static int
5136group_barrier_needed_p (insn)
5137     rtx insn;
5138{
5139  rtx pat;
5140  int need_barrier = 0;
5141  struct reg_flags flags;
5142
5143  memset (&flags, 0, sizeof (flags));
5144  switch (GET_CODE (insn))
5145    {
5146    case NOTE:
5147      break;
5148
5149    case BARRIER:
5150      /* A barrier doesn't imply an instruction group boundary.  */
5151      break;
5152
5153    case CODE_LABEL:
5154      memset (rws_insn, 0, sizeof (rws_insn));
5155      return 1;
5156
5157    case CALL_INSN:
5158      flags.is_branch = 1;
5159      flags.is_sibcall = SIBLING_CALL_P (insn);
5160      memset (rws_insn, 0, sizeof (rws_insn));
5161
5162      /* Don't bundle a call following another call.  */
5163      if ((pat = prev_active_insn (insn))
5164	  && GET_CODE (pat) == CALL_INSN)
5165	{
5166	  need_barrier = 1;
5167	  break;
5168	}
5169
5170      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5171      break;
5172
5173    case JUMP_INSN:
5174      flags.is_branch = 1;
5175
5176      /* Don't bundle a jump following a call.  */
5177      if ((pat = prev_active_insn (insn))
5178	  && GET_CODE (pat) == CALL_INSN)
5179	{
5180	  need_barrier = 1;
5181	  break;
5182	}
5183      /* FALLTHRU */
5184
5185    case INSN:
5186      if (GET_CODE (PATTERN (insn)) == USE
5187	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5188	/* Don't care about USE and CLOBBER "insns"---those are used to
5189	   indicate to the optimizer that it shouldn't get rid of
5190	   certain operations.  */
5191	break;
5192
5193      pat = PATTERN (insn);
5194
5195      /* Ug.  Hack hacks hacked elsewhere.  */
5196      switch (recog_memoized (insn))
5197	{
5198	  /* We play dependency tricks with the epilogue in order
5199	     to get proper schedules.  Undo this for dv analysis.  */
5200	case CODE_FOR_epilogue_deallocate_stack:
5201	case CODE_FOR_prologue_allocate_stack:
5202	  pat = XVECEXP (pat, 0, 0);
5203	  break;
5204
5205	  /* The pattern we use for br.cloop confuses the code above.
5206	     The second element of the vector is representative.  */
5207	case CODE_FOR_doloop_end_internal:
5208	  pat = XVECEXP (pat, 0, 1);
5209	  break;
5210
5211	  /* Doesn't generate code.  */
5212	case CODE_FOR_pred_rel_mutex:
5213	case CODE_FOR_prologue_use:
5214	  return 0;
5215
5216	default:
5217	  break;
5218	}
5219
5220      memset (rws_insn, 0, sizeof (rws_insn));
5221      need_barrier = rtx_needs_barrier (pat, flags, 0);
5222
5223      /* Check to see if the previous instruction was a volatile
5224	 asm.  */
5225      if (! need_barrier)
5226	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5227      break;
5228
5229    default:
5230      abort ();
5231    }
5232
5233  if (first_instruction)
5234    {
5235      need_barrier = 0;
5236      first_instruction = 0;
5237    }
5238
5239  return need_barrier;
5240}
5241
5242/* Like group_barrier_needed_p, but do not clobber the current state.  */
5243
5244static int
5245safe_group_barrier_needed_p (insn)
5246     rtx insn;
5247{
5248  struct reg_write_state rws_saved[NUM_REGS];
5249  int saved_first_instruction;
5250  int t;
5251
5252  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5253  saved_first_instruction = first_instruction;
5254
5255  t = group_barrier_needed_p (insn);
5256
5257  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5258  first_instruction = saved_first_instruction;
5259
5260  return t;
5261}
5262
5263/* INSNS is an chain of instructions.  Scan the chain, and insert stop bits
5264   as necessary to eliminate dependendencies.  This function assumes that
5265   a final instruction scheduling pass has been run which has already
5266   inserted most of the necessary stop bits.  This function only inserts
5267   new ones at basic block boundaries, since these are invisible to the
5268   scheduler.  */
5269
5270static void
5271emit_insn_group_barriers (dump, insns)
5272     FILE *dump;
5273     rtx insns;
5274{
5275  rtx insn;
5276  rtx last_label = 0;
5277  int insns_since_last_label = 0;
5278
5279  init_insn_group_barriers ();
5280
5281  for (insn = insns; insn; insn = NEXT_INSN (insn))
5282    {
5283      if (GET_CODE (insn) == CODE_LABEL)
5284	{
5285	  if (insns_since_last_label)
5286	    last_label = insn;
5287	  insns_since_last_label = 0;
5288	}
5289      else if (GET_CODE (insn) == NOTE
5290	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5291	{
5292	  if (insns_since_last_label)
5293	    last_label = insn;
5294	  insns_since_last_label = 0;
5295	}
5296      else if (GET_CODE (insn) == INSN
5297	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5298	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5299	{
5300	  init_insn_group_barriers ();
5301	  last_label = 0;
5302	}
5303      else if (INSN_P (insn))
5304	{
5305	  insns_since_last_label = 1;
5306
5307	  if (group_barrier_needed_p (insn))
5308	    {
5309	      if (last_label)
5310		{
5311		  if (dump)
5312		    fprintf (dump, "Emitting stop before label %d\n",
5313			     INSN_UID (last_label));
5314		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5315		  insn = last_label;
5316
5317		  init_insn_group_barriers ();
5318		  last_label = 0;
5319		}
5320	    }
5321	}
5322    }
5323}
5324
5325/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5326   This function has to emit all necessary group barriers.  */
5327
5328static void
5329emit_all_insn_group_barriers (dump, insns)
5330     FILE *dump ATTRIBUTE_UNUSED;
5331     rtx insns;
5332{
5333  rtx insn;
5334
5335  init_insn_group_barriers ();
5336
5337  for (insn = insns; insn; insn = NEXT_INSN (insn))
5338    {
5339      if (GET_CODE (insn) == BARRIER)
5340	{
5341	  rtx last = prev_active_insn (insn);
5342
5343	  if (! last)
5344	    continue;
5345	  if (GET_CODE (last) == JUMP_INSN
5346	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5347	    last = prev_active_insn (last);
5348	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5349	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5350
5351	  init_insn_group_barriers ();
5352	}
5353      else if (INSN_P (insn))
5354	{
5355	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5356	    init_insn_group_barriers ();
5357	  else if (group_barrier_needed_p (insn))
5358	    {
5359	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5360	      init_insn_group_barriers ();
5361	      group_barrier_needed_p (insn);
5362	    }
5363	}
5364    }
5365}
5366
5367static int errata_find_address_regs PARAMS ((rtx *, void *));
5368static void errata_emit_nops PARAMS ((rtx));
5369static void fixup_errata PARAMS ((void));
5370
5371/* This structure is used to track some details about the previous insns
5372   groups so we can determine if it may be necessary to insert NOPs to
5373   workaround hardware errata.  */
5374static struct group
5375{
5376  HARD_REG_SET p_reg_set;
5377  HARD_REG_SET gr_reg_conditionally_set;
5378} last_group[2];
5379
5380/* Index into the last_group array.  */
5381static int group_idx;
5382
5383/* Called through for_each_rtx; determines if a hard register that was
5384   conditionally set in the previous group is used as an address register.
5385   It ensures that for_each_rtx returns 1 in that case.  */
5386static int
5387errata_find_address_regs (xp, data)
5388     rtx *xp;
5389     void *data ATTRIBUTE_UNUSED;
5390{
5391  rtx x = *xp;
5392  if (GET_CODE (x) != MEM)
5393    return 0;
5394  x = XEXP (x, 0);
5395  if (GET_CODE (x) == POST_MODIFY)
5396    x = XEXP (x, 0);
5397  if (GET_CODE (x) == REG)
5398    {
5399      struct group *prev_group = last_group + (group_idx ^ 1);
5400      if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5401			     REGNO (x)))
5402	return 1;
5403      return -1;
5404    }
5405  return 0;
5406}
5407
5408/* Called for each insn; this function keeps track of the state in
5409   last_group and emits additional NOPs if necessary to work around
5410   an Itanium A/B step erratum.  */
5411static void
5412errata_emit_nops (insn)
5413     rtx insn;
5414{
5415  struct group *this_group = last_group + group_idx;
5416  struct group *prev_group = last_group + (group_idx ^ 1);
5417  rtx pat = PATTERN (insn);
5418  rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5419  rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5420  enum attr_type type;
5421  rtx set = real_pat;
5422
5423  if (GET_CODE (real_pat) == USE
5424      || GET_CODE (real_pat) == CLOBBER
5425      || GET_CODE (real_pat) == ASM_INPUT
5426      || GET_CODE (real_pat) == ADDR_VEC
5427      || GET_CODE (real_pat) == ADDR_DIFF_VEC
5428      || asm_noperands (PATTERN (insn)) >= 0)
5429    return;
5430
5431  /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5432     parts of it.  */
5433
5434  if (GET_CODE (set) == PARALLEL)
5435    {
5436      int i;
5437      set = XVECEXP (real_pat, 0, 0);
5438      for (i = 1; i < XVECLEN (real_pat, 0); i++)
5439	if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5440	    && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5441	  {
5442	    set = 0;
5443	    break;
5444	  }
5445    }
5446
5447  if (set && GET_CODE (set) != SET)
5448    set = 0;
5449
5450  type  = get_attr_type (insn);
5451
5452  if (type == TYPE_F
5453      && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5454    SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5455
5456  if ((type == TYPE_M || type == TYPE_A) && cond && set
5457      && REG_P (SET_DEST (set))
5458      && GET_CODE (SET_SRC (set)) != PLUS
5459      && GET_CODE (SET_SRC (set)) != MINUS
5460      && (GET_CODE (SET_SRC (set)) != ASHIFT
5461	  || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5462      && (GET_CODE (SET_SRC (set)) != MEM
5463	  || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5464      && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5465    {
5466      if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5467	  || ! REG_P (XEXP (cond, 0)))
5468	abort ();
5469
5470      if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5471	SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5472    }
5473  if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5474    {
5475      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5476      emit_insn_before (gen_nop (), insn);
5477      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5478      group_idx = 0;
5479      memset (last_group, 0, sizeof last_group);
5480    }
5481}
5482
5483/* Emit extra nops if they are required to work around hardware errata.  */
5484
5485static void
5486fixup_errata ()
5487{
5488  rtx insn;
5489
5490  if (! TARGET_B_STEP)
5491    return;
5492
5493  group_idx = 0;
5494  memset (last_group, 0, sizeof last_group);
5495
5496  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5497    {
5498      if (!INSN_P (insn))
5499	continue;
5500
5501      if (ia64_safe_type (insn) == TYPE_S)
5502	{
5503	  group_idx ^= 1;
5504	  memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5505	}
5506      else
5507	errata_emit_nops (insn);
5508    }
5509}
5510
5511/* Instruction scheduling support.  */
5512/* Describe one bundle.  */
5513
5514struct bundle
5515{
5516  /* Zero if there's no possibility of a stop in this bundle other than
5517     at the end, otherwise the position of the optional stop bit.  */
5518  int possible_stop;
5519  /* The types of the three slots.  */
5520  enum attr_type t[3];
5521  /* The pseudo op to be emitted into the assembler output.  */
5522  const char *name;
5523};
5524
5525#define NR_BUNDLES 10
5526
5527/* A list of all available bundles.  */
5528
5529static const struct bundle bundle[NR_BUNDLES] =
5530{
5531  { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5532  { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5533  { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5534  { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5535#if NR_BUNDLES == 10
5536  { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5537  { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5538#endif
5539  { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5540  { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5541  { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5542  /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5543     it matches an L type insn.  Otherwise we'll try to generate L type
5544     nops.  */
5545  { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5546};
5547
5548/* Describe a packet of instructions.  Packets consist of two bundles that
5549   are visible to the hardware in one scheduling window.  */
5550
5551struct ia64_packet
5552{
5553  const struct bundle *t1, *t2;
5554  /* Precomputed value of the first split issue in this packet if a cycle
5555     starts at its beginning.  */
5556  int first_split;
5557  /* For convenience, the insn types are replicated here so we don't have
5558     to go through T1 and T2 all the time.  */
5559  enum attr_type t[6];
5560};
5561
5562/* An array containing all possible packets.  */
5563#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5564static struct ia64_packet packets[NR_PACKETS];
5565
5566/* Map attr_type to a string with the name.  */
5567
5568static const char *const type_names[] =
5569{
5570  "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5571};
5572
5573/* Nonzero if we should insert stop bits into the schedule.  */
5574int ia64_final_schedule = 0;
5575
5576static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5577static rtx ia64_single_set PARAMS ((rtx));
5578static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5579static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5580static void maybe_rotate PARAMS ((FILE *));
5581static void finish_last_head PARAMS ((FILE *, int));
5582static void rotate_one_bundle PARAMS ((FILE *));
5583static void rotate_two_bundles PARAMS ((FILE *));
5584static void nop_cycles_until PARAMS ((int, FILE *));
5585static void cycle_end_fill_slots PARAMS ((FILE *));
5586static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5587static int get_split PARAMS ((const struct ia64_packet *, int));
5588static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5589				   const struct ia64_packet *, int));
5590static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5591				      rtx *, enum attr_type *, int));
5592static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5593static void dump_current_packet PARAMS ((FILE *));
5594static void schedule_stop PARAMS ((FILE *));
5595static rtx gen_nop_type PARAMS ((enum attr_type));
5596static void ia64_emit_nops PARAMS ((void));
5597
5598/* Map a bundle number to its pseudo-op.  */
5599
5600const char *
5601get_bundle_name (b)
5602     int b;
5603{
5604  return bundle[b].name;
5605}
5606
5607/* Compute the slot which will cause a split issue in packet P if the
5608   current cycle begins at slot BEGIN.  */
5609
5610static int
5611itanium_split_issue (p, begin)
5612     const struct ia64_packet *p;
5613     int begin;
5614{
5615  int type_count[TYPE_S];
5616  int i;
5617  int split = 6;
5618
5619  if (begin < 3)
5620    {
5621      /* Always split before and after MMF.  */
5622      if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5623	return 3;
5624      if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5625	return 3;
5626      /* Always split after MBB and BBB.  */
5627      if (p->t[1] == TYPE_B)
5628	return 3;
5629      /* Split after first bundle in MIB BBB combination.  */
5630      if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5631	return 3;
5632    }
5633
5634  memset (type_count, 0, sizeof type_count);
5635  for (i = begin; i < split; i++)
5636    {
5637      enum attr_type t0 = p->t[i];
5638      /* An MLX bundle reserves the same units as an MFI bundle.  */
5639      enum attr_type t = (t0 == TYPE_L ? TYPE_F
5640			  : t0 == TYPE_X ? TYPE_I
5641			  : t0);
5642
5643      /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5644	 2 integer per cycle.  */
5645      int max = (t == TYPE_B ? 3 : 2);
5646      if (type_count[t] == max)
5647	return i;
5648
5649      type_count[t]++;
5650    }
5651  return split;
5652}
5653
5654/* Return the maximum number of instructions a cpu can issue.  */
5655
5656static int
5657ia64_issue_rate ()
5658{
5659  return 6;
5660}
5661
5662/* Helper function - like single_set, but look inside COND_EXEC.  */
5663
5664static rtx
5665ia64_single_set (insn)
5666     rtx insn;
5667{
5668  rtx x = PATTERN (insn), ret;
5669  if (GET_CODE (x) == COND_EXEC)
5670    x = COND_EXEC_CODE (x);
5671  if (GET_CODE (x) == SET)
5672    return x;
5673
5674  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5675     Although they are not classical single set, the second set is there just
5676     to protect it from moving past FP-relative stack accesses.  */
5677  switch (recog_memoized (insn))
5678    {
5679    case CODE_FOR_prologue_allocate_stack:
5680    case CODE_FOR_epilogue_deallocate_stack:
5681      ret = XVECEXP (x, 0, 0);
5682      break;
5683
5684    default:
5685      ret = single_set_2 (insn, x);
5686      break;
5687    }
5688
5689  return ret;
5690}
5691
5692/* Adjust the cost of a scheduling dependency.  Return the new cost of
5693   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
5694
5695static int
5696ia64_adjust_cost (insn, link, dep_insn, cost)
5697     rtx insn, link, dep_insn;
5698     int cost;
5699{
5700  enum attr_type dep_type;
5701  enum attr_itanium_class dep_class;
5702  enum attr_itanium_class insn_class;
5703  rtx dep_set, set, src, addr;
5704
5705  if (GET_CODE (PATTERN (insn)) == CLOBBER
5706      || GET_CODE (PATTERN (insn)) == USE
5707      || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5708      || GET_CODE (PATTERN (dep_insn)) == USE
5709      /* @@@ Not accurate for indirect calls.  */
5710      || GET_CODE (insn) == CALL_INSN
5711      || ia64_safe_type (insn) == TYPE_S)
5712    return 0;
5713
5714  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5715      || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5716    return 0;
5717
5718  dep_type = ia64_safe_type (dep_insn);
5719  dep_class = ia64_safe_itanium_class (dep_insn);
5720  insn_class = ia64_safe_itanium_class (insn);
5721
5722  /* Compares that feed a conditional branch can execute in the same
5723     cycle.  */
5724  dep_set = ia64_single_set (dep_insn);
5725  set = ia64_single_set (insn);
5726
5727  if (dep_type != TYPE_F
5728      && dep_set
5729      && GET_CODE (SET_DEST (dep_set)) == REG
5730      && PR_REG (REGNO (SET_DEST (dep_set)))
5731      && GET_CODE (insn) == JUMP_INSN)
5732    return 0;
5733
5734  if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5735    {
5736      /* ??? Can't find any information in the documenation about whether
5737	 a sequence
5738	   st [rx] = ra
5739	   ld rb = [ry]
5740	 splits issue.  Assume it doesn't.  */
5741      return 0;
5742    }
5743
5744  src = set ? SET_SRC (set) : 0;
5745  addr = 0;
5746  if (set)
5747    {
5748      if (GET_CODE (SET_DEST (set)) == MEM)
5749	addr = XEXP (SET_DEST (set), 0);
5750      else if (GET_CODE (SET_DEST (set)) == SUBREG
5751	       && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5752	addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5753      else
5754	{
5755	  addr = src;
5756	  if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5757	    addr = XVECEXP (addr, 0, 0);
5758	  while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5759	    addr = XEXP (addr, 0);
5760
5761	  /* Note that LO_SUM is used for GOT loads.  */
5762	  if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
5763	    addr = XEXP (addr, 0);
5764	  else
5765	    addr = 0;
5766	}
5767    }
5768
5769  if (addr && GET_CODE (addr) == POST_MODIFY)
5770    addr = XEXP (addr, 0);
5771
5772  set = ia64_single_set (dep_insn);
5773
5774  if ((dep_class == ITANIUM_CLASS_IALU
5775       || dep_class == ITANIUM_CLASS_ILOG
5776       || dep_class == ITANIUM_CLASS_LD)
5777      && (insn_class == ITANIUM_CLASS_LD
5778	  || insn_class == ITANIUM_CLASS_ST))
5779    {
5780      if (! addr || ! set)
5781	abort ();
5782      /* This isn't completely correct - an IALU that feeds an address has
5783	 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5784	 otherwise.  Unfortunately there's no good way to describe this.  */
5785      if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5786	return cost + 1;
5787    }
5788
5789  if ((dep_class == ITANIUM_CLASS_IALU
5790       || dep_class == ITANIUM_CLASS_ILOG
5791       || dep_class == ITANIUM_CLASS_LD)
5792      && (insn_class == ITANIUM_CLASS_MMMUL
5793	  || insn_class == ITANIUM_CLASS_MMSHF
5794	  || insn_class == ITANIUM_CLASS_MMSHFI))
5795    return 3;
5796
5797  if (dep_class == ITANIUM_CLASS_FMAC
5798      && (insn_class == ITANIUM_CLASS_FMISC
5799	  || insn_class == ITANIUM_CLASS_FCVTFX
5800	  || insn_class == ITANIUM_CLASS_XMPY))
5801    return 7;
5802
5803  if ((dep_class == ITANIUM_CLASS_FMAC
5804       || dep_class == ITANIUM_CLASS_FMISC
5805       || dep_class == ITANIUM_CLASS_FCVTFX
5806       || dep_class == ITANIUM_CLASS_XMPY)
5807      && insn_class == ITANIUM_CLASS_STF)
5808    return 8;
5809
5810  /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5811     but HP engineers say any non-MM operation.  */
5812  if ((dep_class == ITANIUM_CLASS_MMMUL
5813       || dep_class == ITANIUM_CLASS_MMSHF
5814       || dep_class == ITANIUM_CLASS_MMSHFI)
5815      && insn_class != ITANIUM_CLASS_MMMUL
5816      && insn_class != ITANIUM_CLASS_MMSHF
5817      && insn_class != ITANIUM_CLASS_MMSHFI)
5818    return 4;
5819
5820  return cost;
5821}
5822
5823/* Describe the current state of the Itanium pipeline.  */
5824static struct
5825{
5826  /* The first slot that is used in the current cycle.  */
5827  int first_slot;
5828  /* The next slot to fill.  */
5829  int cur;
5830  /* The packet we have selected for the current issue window.  */
5831  const struct ia64_packet *packet;
5832  /* The position of the split issue that occurs due to issue width
5833     limitations (6 if there's no split issue).  */
5834  int split;
5835  /* Record data about the insns scheduled so far in the same issue
5836     window.  The elements up to but not including FIRST_SLOT belong
5837     to the previous cycle, the ones starting with FIRST_SLOT belong
5838     to the current cycle.  */
5839  enum attr_type types[6];
5840  rtx insns[6];
5841  int stopbit[6];
5842  /* Nonzero if we decided to schedule a stop bit.  */
5843  int last_was_stop;
5844} sched_data;
5845
5846/* Temporary arrays; they have enough elements to hold all insns that
5847   can be ready at the same time while scheduling of the current block.
5848   SCHED_READY can hold ready insns, SCHED_TYPES their types.  */
5849static rtx *sched_ready;
5850static enum attr_type *sched_types;
5851
5852/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5853   of packet P.  */
5854
5855static int
5856insn_matches_slot (p, itype, slot, insn)
5857     const struct ia64_packet *p;
5858     enum attr_type itype;
5859     int slot;
5860     rtx insn;
5861{
5862  enum attr_itanium_requires_unit0 u0;
5863  enum attr_type stype = p->t[slot];
5864
5865  if (insn)
5866    {
5867      u0 = ia64_safe_itanium_requires_unit0 (insn);
5868      if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5869	{
5870	  int i;
5871	  for (i = sched_data.first_slot; i < slot; i++)
5872	    if (p->t[i] == stype
5873		|| (stype == TYPE_F && p->t[i] == TYPE_L)
5874		|| (stype == TYPE_I && p->t[i] == TYPE_X))
5875	      return 0;
5876	}
5877      if (GET_CODE (insn) == CALL_INSN)
5878	{
5879	  /* Reject calls in multiway branch packets.  We want to limit
5880	     the number of multiway branches we generate (since the branch
5881	     predictor is limited), and this seems to work fairly well.
5882	     (If we didn't do this, we'd have to add another test here to
5883	     force calls into the third slot of the bundle.)  */
5884	  if (slot < 3)
5885	    {
5886	      if (p->t[1] == TYPE_B)
5887		return 0;
5888	    }
5889	  else
5890	    {
5891	      if (p->t[4] == TYPE_B)
5892		return 0;
5893	    }
5894	}
5895    }
5896
5897  if (itype == stype)
5898    return 1;
5899  if (itype == TYPE_A)
5900    return stype == TYPE_M || stype == TYPE_I;
5901  return 0;
5902}
5903
5904/* Like emit_insn_before, but skip cycle_display notes.
5905   ??? When cycle display notes are implemented, update this.  */
5906
5907static void
5908ia64_emit_insn_before (insn, before)
5909     rtx insn, before;
5910{
5911  emit_insn_before (insn, before);
5912}
5913
5914/* When rotating a bundle out of the issue window, insert a bundle selector
5915   insn in front of it.  DUMP is the scheduling dump file or NULL.  START
5916   is either 0 or 3, depending on whether we want to emit a bundle selector
5917   for the first bundle or the second bundle in the current issue window.
5918
5919   The selector insns are emitted this late because the selected packet can
5920   be changed until parts of it get rotated out.  */
5921
5922static void
5923finish_last_head (dump, start)
5924     FILE *dump;
5925     int start;
5926{
5927  const struct ia64_packet *p = sched_data.packet;
5928  const struct bundle *b = start == 0 ? p->t1 : p->t2;
5929  int bundle_type = b - bundle;
5930  rtx insn;
5931  int i;
5932
5933  if (! ia64_final_schedule)
5934    return;
5935
5936  for (i = start; sched_data.insns[i] == 0; i++)
5937    if (i == start + 3)
5938      abort ();
5939  insn = sched_data.insns[i];
5940
5941  if (dump)
5942    fprintf (dump, "//    Emitting template before %d: %s\n",
5943	     INSN_UID (insn), b->name);
5944
5945  ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5946}
5947
5948/* We can't schedule more insns this cycle.  Fix up the scheduling state
5949   and advance FIRST_SLOT and CUR.
5950   We have to distribute the insns that are currently found between
5951   FIRST_SLOT and CUR into the slots of the packet we have selected.  So
5952   far, they are stored successively in the fields starting at FIRST_SLOT;
5953   now they must be moved to the correct slots.
5954   DUMP is the current scheduling dump file, or NULL.  */
5955
5956static void
5957cycle_end_fill_slots (dump)
5958     FILE *dump;
5959{
5960  const struct ia64_packet *packet = sched_data.packet;
5961  int slot, i;
5962  enum attr_type tmp_types[6];
5963  rtx tmp_insns[6];
5964
5965  memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5966  memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5967
5968  for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5969    {
5970      enum attr_type t = tmp_types[i];
5971      if (t != ia64_safe_type (tmp_insns[i]))
5972	abort ();
5973      while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5974	{
5975	  if (slot > sched_data.split)
5976	    abort ();
5977	  if (dump)
5978	    fprintf (dump, "// Packet needs %s, have %s\n",
5979		     type_names[packet->t[slot]], type_names[t]);
5980	  sched_data.types[slot] = packet->t[slot];
5981	  sched_data.insns[slot] = 0;
5982	  sched_data.stopbit[slot] = 0;
5983
5984	  /* ??? TYPE_L instructions always fill up two slots, but we don't
5985	     support TYPE_L nops.  */
5986	  if (packet->t[slot] == TYPE_L)
5987	    abort ();
5988
5989	  slot++;
5990	}
5991
5992      /* Do _not_ use T here.  If T == TYPE_A, then we'd risk changing the
5993	 actual slot type later.  */
5994      sched_data.types[slot] = packet->t[slot];
5995      sched_data.insns[slot] = tmp_insns[i];
5996      sched_data.stopbit[slot] = 0;
5997      slot++;
5998
5999      /* TYPE_L instructions always fill up two slots.  */
6000      if (t == TYPE_L)
6001	{
6002	  sched_data.types[slot] = packet->t[slot];
6003	  sched_data.insns[slot] = 0;
6004	  sched_data.stopbit[slot] = 0;
6005	  slot++;
6006	}
6007    }
6008
6009  /* This isn't right - there's no need to pad out until the forced split;
6010     the CPU will automatically split if an insn isn't ready.  */
6011#if 0
6012  while (slot < sched_data.split)
6013    {
6014      sched_data.types[slot] = packet->t[slot];
6015      sched_data.insns[slot] = 0;
6016      sched_data.stopbit[slot] = 0;
6017      slot++;
6018    }
6019#endif
6020
6021  sched_data.first_slot = sched_data.cur = slot;
6022}
6023
6024/* Bundle rotations, as described in the Itanium optimization manual.
6025   We can rotate either one or both bundles out of the issue window.
6026   DUMP is the current scheduling dump file, or NULL.  */
6027
6028static void
6029rotate_one_bundle (dump)
6030     FILE *dump;
6031{
6032  if (dump)
6033    fprintf (dump, "// Rotating one bundle.\n");
6034
6035  finish_last_head (dump, 0);
6036  if (sched_data.cur > 3)
6037    {
6038      sched_data.cur -= 3;
6039      sched_data.first_slot -= 3;
6040      memmove (sched_data.types,
6041	       sched_data.types + 3,
6042	       sched_data.cur * sizeof *sched_data.types);
6043      memmove (sched_data.stopbit,
6044	       sched_data.stopbit + 3,
6045	       sched_data.cur * sizeof *sched_data.stopbit);
6046      memmove (sched_data.insns,
6047	       sched_data.insns + 3,
6048	       sched_data.cur * sizeof *sched_data.insns);
6049      sched_data.packet
6050	= &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
6051    }
6052  else
6053    {
6054      sched_data.cur = 0;
6055      sched_data.first_slot = 0;
6056    }
6057}
6058
6059static void
6060rotate_two_bundles (dump)
6061     FILE *dump;
6062{
6063  if (dump)
6064    fprintf (dump, "// Rotating two bundles.\n");
6065
6066  if (sched_data.cur == 0)
6067    return;
6068
6069  finish_last_head (dump, 0);
6070  if (sched_data.cur > 3)
6071    finish_last_head (dump, 3);
6072  sched_data.cur = 0;
6073  sched_data.first_slot = 0;
6074}
6075
6076/* We're beginning a new block.  Initialize data structures as necessary.  */
6077
6078static void
6079ia64_sched_init (dump, sched_verbose, max_ready)
6080     FILE *dump ATTRIBUTE_UNUSED;
6081     int sched_verbose ATTRIBUTE_UNUSED;
6082     int max_ready;
6083{
6084  static int initialized = 0;
6085
6086  if (! initialized)
6087    {
6088      int b1, b2, i;
6089
6090      initialized = 1;
6091
6092      for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
6093	{
6094	  const struct bundle *t1 = bundle + b1;
6095	  for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6096	    {
6097	      const struct bundle *t2 = bundle + b2;
6098
6099	      packets[i].t1 = t1;
6100	      packets[i].t2 = t2;
6101	    }
6102	}
6103      for (i = 0; i < NR_PACKETS; i++)
6104	{
6105	  int j;
6106	  for (j = 0; j < 3; j++)
6107	    packets[i].t[j] = packets[i].t1->t[j];
6108	  for (j = 0; j < 3; j++)
6109	    packets[i].t[j + 3] = packets[i].t2->t[j];
6110	  packets[i].first_split = itanium_split_issue (packets + i, 0);
6111	}
6112
6113    }
6114
6115  init_insn_group_barriers ();
6116
6117  memset (&sched_data, 0, sizeof sched_data);
6118  sched_types = (enum attr_type *) xmalloc (max_ready
6119					    * sizeof (enum attr_type));
6120  sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
6121}
6122
6123/* See if the packet P can match the insns we have already scheduled.  Return
6124   nonzero if so.  In *PSLOT, we store the first slot that is available for
6125   more instructions if we choose this packet.
6126   SPLIT holds the last slot we can use, there's a split issue after it so
6127   scheduling beyond it would cause us to use more than one cycle.  */
6128
6129static int
6130packet_matches_p (p, split, pslot)
6131     const struct ia64_packet *p;
6132     int split;
6133     int *pslot;
6134{
6135  int filled = sched_data.cur;
6136  int first = sched_data.first_slot;
6137  int i, slot;
6138
6139  /* First, check if the first of the two bundles must be a specific one (due
6140     to stop bits).  */
6141  if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
6142    return 0;
6143  if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
6144    return 0;
6145
6146  for (i = 0; i < first; i++)
6147    if (! insn_matches_slot (p, sched_data.types[i], i,
6148			     sched_data.insns[i]))
6149      return 0;
6150  for (i = slot = first; i < filled; i++)
6151    {
6152      while (slot < split)
6153	{
6154	  if (insn_matches_slot (p, sched_data.types[i], slot,
6155				 sched_data.insns[i]))
6156	    break;
6157	  slot++;
6158	}
6159      if (slot == split)
6160	return 0;
6161      slot++;
6162    }
6163
6164  if (pslot)
6165    *pslot = slot;
6166  return 1;
6167}
6168
6169/* A frontend for itanium_split_issue.  For a packet P and a slot
6170   number FIRST that describes the start of the current clock cycle,
6171   return the slot number of the first split issue.  This function
6172   uses the cached number found in P if possible.  */
6173
6174static int
6175get_split (p, first)
6176     const struct ia64_packet *p;
6177     int first;
6178{
6179  if (first == 0)
6180    return p->first_split;
6181  return itanium_split_issue (p, first);
6182}
6183
6184/* Given N_READY insns in the array READY, whose types are found in the
6185   corresponding array TYPES, return the insn that is best suited to be
6186   scheduled in slot SLOT of packet P.  */
6187
6188static int
6189find_best_insn (ready, types, n_ready, p, slot)
6190     rtx *ready;
6191     enum attr_type *types;
6192     int n_ready;
6193     const struct ia64_packet *p;
6194     int slot;
6195{
6196  int best = -1;
6197  int best_pri = 0;
6198  while (n_ready-- > 0)
6199    {
6200      rtx insn = ready[n_ready];
6201      if (! insn)
6202	continue;
6203      if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6204	break;
6205      /* If we have equally good insns, one of which has a stricter
6206	 slot requirement, prefer the one with the stricter requirement.  */
6207      if (best >= 0 && types[n_ready] == TYPE_A)
6208	continue;
6209      if (insn_matches_slot (p, types[n_ready], slot, insn))
6210	{
6211	  best = n_ready;
6212	  best_pri = INSN_PRIORITY (ready[best]);
6213
6214	  /* If there's no way we could get a stricter requirement, stop
6215	     looking now.  */
6216	  if (types[n_ready] != TYPE_A
6217	      && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6218	    break;
6219	  break;
6220	}
6221    }
6222  return best;
6223}
6224
6225/* Select the best packet to use given the current scheduler state and the
6226   current ready list.
6227   READY is an array holding N_READY ready insns; TYPES is a corresponding
6228   array that holds their types.  Store the best packet in *PPACKET and the
6229   number of insns that can be scheduled in the current cycle in *PBEST.  */
6230
6231static void
6232find_best_packet (pbest, ppacket, ready, types, n_ready)
6233     int *pbest;
6234     const struct ia64_packet **ppacket;
6235     rtx *ready;
6236     enum attr_type *types;
6237     int n_ready;
6238{
6239  int first = sched_data.first_slot;
6240  int best = 0;
6241  int lowest_end = 6;
6242  const struct ia64_packet *best_packet = NULL;
6243  int i;
6244
6245  for (i = 0; i < NR_PACKETS; i++)
6246    {
6247      const struct ia64_packet *p = packets + i;
6248      int slot;
6249      int split = get_split (p, first);
6250      int win = 0;
6251      int first_slot, last_slot;
6252      int b_nops = 0;
6253
6254      if (! packet_matches_p (p, split, &first_slot))
6255	continue;
6256
6257      memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6258
6259      win = 0;
6260      last_slot = 6;
6261      for (slot = first_slot; slot < split; slot++)
6262	{
6263	  int insn_nr;
6264
6265	  /* Disallow a degenerate case where the first bundle doesn't
6266	     contain anything but NOPs!  */
6267	  if (first_slot == 0 && win == 0 && slot == 3)
6268	    {
6269	      win = -1;
6270	      break;
6271	    }
6272
6273	  insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6274	  if (insn_nr >= 0)
6275	    {
6276	      sched_ready[insn_nr] = 0;
6277	      last_slot = slot;
6278	      win++;
6279	    }
6280	  else if (p->t[slot] == TYPE_B)
6281	    b_nops++;
6282	}
6283      /* We must disallow MBB/BBB packets if any of their B slots would be
6284	 filled with nops.  */
6285      if (last_slot < 3)
6286	{
6287	  if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6288	    win = -1;
6289	}
6290      else
6291	{
6292	  if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6293	    win = -1;
6294	}
6295
6296      if (win > best
6297	  || (win == best && last_slot < lowest_end))
6298	{
6299	  best = win;
6300	  lowest_end = last_slot;
6301	  best_packet = p;
6302	}
6303    }
6304  *pbest = best;
6305  *ppacket = best_packet;
6306}
6307
6308/* Reorder the ready list so that the insns that can be issued in this cycle
6309   are found in the correct order at the end of the list.
6310   DUMP is the scheduling dump file, or NULL.  READY points to the start,
6311   E_READY to the end of the ready list.  MAY_FAIL determines what should be
6312   done if no insns can be scheduled in this cycle: if it is zero, we abort,
6313   otherwise we return 0.
6314   Return 1 if any insns can be scheduled in this cycle.  */
6315
6316static int
6317itanium_reorder (dump, ready, e_ready, may_fail)
6318     FILE *dump;
6319     rtx *ready;
6320     rtx *e_ready;
6321     int may_fail;
6322{
6323  const struct ia64_packet *best_packet;
6324  int n_ready = e_ready - ready;
6325  int first = sched_data.first_slot;
6326  int i, best, best_split, filled;
6327
6328  for (i = 0; i < n_ready; i++)
6329    sched_types[i] = ia64_safe_type (ready[i]);
6330
6331  find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6332
6333  if (best == 0)
6334    {
6335      if (may_fail)
6336	return 0;
6337      abort ();
6338    }
6339
6340  if (dump)
6341    {
6342      fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6343	       best_packet->t1->name,
6344	       best_packet->t2 ? best_packet->t2->name : NULL, best);
6345    }
6346
6347  best_split = itanium_split_issue (best_packet, first);
6348  packet_matches_p (best_packet, best_split, &filled);
6349
6350  for (i = filled; i < best_split; i++)
6351    {
6352      int insn_nr;
6353
6354      insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6355      if (insn_nr >= 0)
6356	{
6357	  rtx insn = ready[insn_nr];
6358	  memmove (ready + insn_nr, ready + insn_nr + 1,
6359		   (n_ready - insn_nr - 1) * sizeof (rtx));
6360	  memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6361		   (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6362	  ready[--n_ready] = insn;
6363	}
6364    }
6365
6366  sched_data.packet = best_packet;
6367  sched_data.split = best_split;
6368  return 1;
6369}
6370
6371/* Dump information about the current scheduling state to file DUMP.  */
6372
6373static void
6374dump_current_packet (dump)
6375     FILE *dump;
6376{
6377  int i;
6378  fprintf (dump, "//    %d slots filled:", sched_data.cur);
6379  for (i = 0; i < sched_data.first_slot; i++)
6380    {
6381      rtx insn = sched_data.insns[i];
6382      fprintf (dump, " %s", type_names[sched_data.types[i]]);
6383      if (insn)
6384	fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6385      if (sched_data.stopbit[i])
6386	fprintf (dump, " ;;");
6387    }
6388  fprintf (dump, " :::");
6389  for (i = sched_data.first_slot; i < sched_data.cur; i++)
6390    {
6391      rtx insn = sched_data.insns[i];
6392      enum attr_type t = ia64_safe_type (insn);
6393      fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6394    }
6395  fprintf (dump, "\n");
6396}
6397
6398/* Schedule a stop bit.  DUMP is the current scheduling dump file, or
6399   NULL.  */
6400
6401static void
6402schedule_stop (dump)
6403     FILE *dump;
6404{
6405  const struct ia64_packet *best = sched_data.packet;
6406  int i;
6407  int best_stop = 6;
6408
6409  if (dump)
6410    fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6411
6412  if (sched_data.cur == 0)
6413    {
6414      if (dump)
6415	fprintf (dump, "//   At start of bundle, so nothing to do.\n");
6416
6417      rotate_two_bundles (NULL);
6418      return;
6419    }
6420
6421  for (i = -1; i < NR_PACKETS; i++)
6422    {
6423      /* This is a slight hack to give the current packet the first chance.
6424	 This is done to avoid e.g. switching from MIB to MBB bundles.  */
6425      const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6426      int split = get_split (p, sched_data.first_slot);
6427      const struct bundle *compare;
6428      int next, stoppos;
6429
6430      if (! packet_matches_p (p, split, &next))
6431	continue;
6432
6433      compare = next > 3 ? p->t2 : p->t1;
6434
6435      stoppos = 3;
6436      if (compare->possible_stop)
6437	stoppos = compare->possible_stop;
6438      if (next > 3)
6439	stoppos += 3;
6440
6441      if (stoppos < next || stoppos >= best_stop)
6442	{
6443	  if (compare->possible_stop == 0)
6444	    continue;
6445	  stoppos = (next > 3 ? 6 : 3);
6446	}
6447      if (stoppos < next || stoppos >= best_stop)
6448	continue;
6449
6450      if (dump)
6451	fprintf (dump, "//   switching from %s %s to %s %s (stop at %d)\n",
6452		 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6453		 stoppos);
6454
6455      best_stop = stoppos;
6456      best = p;
6457    }
6458
6459  sched_data.packet = best;
6460  cycle_end_fill_slots (dump);
6461  while (sched_data.cur < best_stop)
6462    {
6463      sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6464      sched_data.insns[sched_data.cur] = 0;
6465      sched_data.stopbit[sched_data.cur] = 0;
6466      sched_data.cur++;
6467    }
6468  sched_data.stopbit[sched_data.cur - 1] = 1;
6469  sched_data.first_slot = best_stop;
6470
6471  if (dump)
6472    dump_current_packet (dump);
6473}
6474
6475/* If necessary, perform one or two rotations on the scheduling state.
6476   This should only be called if we are starting a new cycle.  */
6477
6478static void
6479maybe_rotate (dump)
6480     FILE *dump;
6481{
6482  cycle_end_fill_slots (dump);
6483  if (sched_data.cur == 6)
6484    rotate_two_bundles (dump);
6485  else if (sched_data.cur >= 3)
6486    rotate_one_bundle (dump);
6487  sched_data.first_slot = sched_data.cur;
6488}
6489
6490/* The clock cycle when ia64_sched_reorder was last called.  */
6491static int prev_cycle;
6492
6493/* The first insn scheduled in the previous cycle.  This is the saved
6494   value of sched_data.first_slot.  */
6495static int prev_first;
6496
6497/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR.  Used to
6498   pad out the delay between MM (shifts, etc.) and integer operations.  */
6499
6500static void
6501nop_cycles_until (clock_var, dump)
6502     int clock_var;
6503     FILE *dump;
6504{
6505  int prev_clock = prev_cycle;
6506  int cycles_left = clock_var - prev_clock;
6507  bool did_stop = false;
6508
6509  /* Finish the previous cycle; pad it out with NOPs.  */
6510  if (sched_data.cur == 3)
6511    {
6512      sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6513      did_stop = true;
6514      maybe_rotate (dump);
6515    }
6516  else if (sched_data.cur > 0)
6517    {
6518      int need_stop = 0;
6519      int split = itanium_split_issue (sched_data.packet, prev_first);
6520
6521      if (sched_data.cur < 3 && split > 3)
6522	{
6523	  split = 3;
6524	  need_stop = 1;
6525	}
6526
6527      if (split > sched_data.cur)
6528	{
6529	  int i;
6530	  for (i = sched_data.cur; i < split; i++)
6531	    {
6532	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6533	      sched_data.types[i] = sched_data.packet->t[i];
6534	      sched_data.insns[i] = t;
6535	      sched_data.stopbit[i] = 0;
6536	    }
6537	  sched_data.cur = split;
6538	}
6539
6540      if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6541	  && cycles_left > 1)
6542	{
6543	  int i;
6544	  for (i = sched_data.cur; i < 6; i++)
6545	    {
6546	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6547	      sched_data.types[i] = sched_data.packet->t[i];
6548	      sched_data.insns[i] = t;
6549	      sched_data.stopbit[i] = 0;
6550	    }
6551	  sched_data.cur = 6;
6552	  cycles_left--;
6553	  need_stop = 1;
6554	}
6555
6556      if (need_stop || sched_data.cur == 6)
6557	{
6558	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6559	  did_stop = true;
6560	}
6561      maybe_rotate (dump);
6562    }
6563
6564  cycles_left--;
6565  while (cycles_left > 0)
6566    {
6567      sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6568      sched_emit_insn (gen_nop_type (TYPE_M));
6569      sched_emit_insn (gen_nop_type (TYPE_I));
6570      if (cycles_left > 1)
6571	{
6572	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6573	  cycles_left--;
6574	}
6575      sched_emit_insn (gen_nop_type (TYPE_I));
6576      sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6577      did_stop = true;
6578      cycles_left--;
6579    }
6580
6581  if (did_stop)
6582    init_insn_group_barriers ();
6583}
6584
6585/* We are about to being issuing insns for this clock cycle.
6586   Override the default sort algorithm to better slot instructions.  */
6587
6588static int
6589ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6590		    reorder_type, clock_var)
6591     FILE *dump ATTRIBUTE_UNUSED;
6592     int sched_verbose ATTRIBUTE_UNUSED;
6593     rtx *ready;
6594     int *pn_ready;
6595     int reorder_type, clock_var;
6596{
6597  int n_asms;
6598  int n_ready = *pn_ready;
6599  rtx *e_ready = ready + n_ready;
6600  rtx *insnp;
6601
6602  if (sched_verbose)
6603    {
6604      fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6605      dump_current_packet (dump);
6606    }
6607
6608  /* Work around the pipeline flush that will occurr if the results of
6609     an MM instruction are accessed before the result is ready.  Intel
6610     documentation says this only happens with IALU, ISHF, ILOG, LD,
6611     and ST consumers, but experimental evidence shows that *any* non-MM
6612     type instruction will incurr the flush.  */
6613  if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6614    {
6615      for (insnp = ready; insnp < e_ready; insnp++)
6616	{
6617	  rtx insn = *insnp, link;
6618	  enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6619
6620	  if (t == ITANIUM_CLASS_MMMUL
6621	      || t == ITANIUM_CLASS_MMSHF
6622	      || t == ITANIUM_CLASS_MMSHFI)
6623	    continue;
6624
6625	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6626	    if (REG_NOTE_KIND (link) == 0)
6627	      {
6628		rtx other = XEXP (link, 0);
6629		enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6630		if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6631		  {
6632		    nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6633		    goto out;
6634		  }
6635	      }
6636	}
6637    }
6638 out:
6639
6640  prev_first = sched_data.first_slot;
6641  prev_cycle = clock_var;
6642
6643  if (reorder_type == 0)
6644    maybe_rotate (sched_verbose ? dump : NULL);
6645
6646  /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6647  n_asms = 0;
6648  for (insnp = ready; insnp < e_ready; insnp++)
6649    if (insnp < e_ready)
6650      {
6651	rtx insn = *insnp;
6652	enum attr_type t = ia64_safe_type (insn);
6653	if (t == TYPE_UNKNOWN)
6654	  {
6655	    if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6656		|| asm_noperands (PATTERN (insn)) >= 0)
6657	      {
6658		rtx lowest = ready[n_asms];
6659		ready[n_asms] = insn;
6660		*insnp = lowest;
6661		n_asms++;
6662	      }
6663	    else
6664	      {
6665		rtx highest = ready[n_ready - 1];
6666		ready[n_ready - 1] = insn;
6667		*insnp = highest;
6668		if (ia64_final_schedule && group_barrier_needed_p (insn))
6669		  {
6670		    schedule_stop (sched_verbose ? dump : NULL);
6671		    sched_data.last_was_stop = 1;
6672		    maybe_rotate (sched_verbose ? dump : NULL);
6673		  }
6674
6675		return 1;
6676	      }
6677	  }
6678      }
6679  if (n_asms < n_ready)
6680    {
6681      /* Some normal insns to process.  Skip the asms.  */
6682      ready += n_asms;
6683      n_ready -= n_asms;
6684    }
6685  else if (n_ready > 0)
6686    {
6687      /* Only asm insns left.  */
6688      if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6689	{
6690	  schedule_stop (sched_verbose ? dump : NULL);
6691	  sched_data.last_was_stop = 1;
6692	  maybe_rotate (sched_verbose ? dump : NULL);
6693	}
6694      cycle_end_fill_slots (sched_verbose ? dump : NULL);
6695      return 1;
6696    }
6697
6698  if (ia64_final_schedule)
6699    {
6700      int nr_need_stop = 0;
6701
6702      for (insnp = ready; insnp < e_ready; insnp++)
6703	if (safe_group_barrier_needed_p (*insnp))
6704	  nr_need_stop++;
6705
6706      /* Schedule a stop bit if
6707          - all insns require a stop bit, or
6708          - we are starting a new cycle and _any_ insns require a stop bit.
6709         The reason for the latter is that if our schedule is accurate, then
6710         the additional stop won't decrease performance at this point (since
6711	 there's a split issue at this point anyway), but it gives us more
6712         freedom when scheduling the currently ready insns.  */
6713      if ((reorder_type == 0 && nr_need_stop)
6714	  || (reorder_type == 1 && n_ready == nr_need_stop))
6715	{
6716	  schedule_stop (sched_verbose ? dump : NULL);
6717	  sched_data.last_was_stop = 1;
6718	  maybe_rotate (sched_verbose ? dump : NULL);
6719	  if (reorder_type == 1)
6720	    return 0;
6721	}
6722      else
6723	{
6724	  int deleted = 0;
6725	  insnp = e_ready;
6726	  /* Move down everything that needs a stop bit, preserving relative
6727	     order.  */
6728	  while (insnp-- > ready + deleted)
6729	    while (insnp >= ready + deleted)
6730	      {
6731		rtx insn = *insnp;
6732		if (! safe_group_barrier_needed_p (insn))
6733		  break;
6734		memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6735		*ready = insn;
6736		deleted++;
6737	      }
6738	  n_ready -= deleted;
6739	  ready += deleted;
6740	  if (deleted != nr_need_stop)
6741	    abort ();
6742	}
6743    }
6744
6745  return itanium_reorder (sched_verbose ? dump : NULL,
6746			  ready, e_ready, reorder_type == 1);
6747}
6748
6749static int
6750ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6751     FILE *dump;
6752     int sched_verbose;
6753     rtx *ready;
6754     int *pn_ready;
6755     int clock_var;
6756{
6757  return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6758				      pn_ready, 0, clock_var);
6759}
6760
6761/* Like ia64_sched_reorder, but called after issuing each insn.
6762   Override the default sort algorithm to better slot instructions.  */
6763
6764static int
6765ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6766     FILE *dump ATTRIBUTE_UNUSED;
6767     int sched_verbose ATTRIBUTE_UNUSED;
6768     rtx *ready;
6769     int *pn_ready;
6770     int clock_var;
6771{
6772  if (sched_data.last_was_stop)
6773    return 0;
6774
6775  /* Detect one special case and try to optimize it.
6776     If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6777     then we can get better code by transforming this to 1.MFB;; 2.MIx.  */
6778  if (sched_data.first_slot == 1
6779      && sched_data.stopbit[0]
6780      && ((sched_data.cur == 4
6781	   && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6782	   && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6783	   && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6784	  || (sched_data.cur == 3
6785	      && (sched_data.types[1] == TYPE_M
6786		  || sched_data.types[1] == TYPE_A)
6787	      && (sched_data.types[2] != TYPE_M
6788		  && sched_data.types[2] != TYPE_I
6789		  && sched_data.types[2] != TYPE_A))))
6790
6791    {
6792      int i, best;
6793      rtx stop = sched_data.insns[1];
6794
6795      /* Search backward for the stop bit that must be there.  */
6796      while (1)
6797	{
6798	  int insn_code;
6799
6800	  stop = PREV_INSN (stop);
6801	  if (GET_CODE (stop) != INSN)
6802	    abort ();
6803	  insn_code = recog_memoized (stop);
6804
6805	  /* Ignore .pred.rel.mutex.
6806
6807	     ??? Update this to ignore cycle display notes too
6808	     ??? once those are implemented  */
6809	  if (insn_code == CODE_FOR_pred_rel_mutex
6810	      || insn_code == CODE_FOR_prologue_use)
6811	    continue;
6812
6813	  if (insn_code == CODE_FOR_insn_group_barrier)
6814	    break;
6815	  abort ();
6816	}
6817
6818      /* Adjust the stop bit's slot selector.  */
6819      if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6820	abort ();
6821      XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6822
6823      sched_data.stopbit[0] = 0;
6824      sched_data.stopbit[2] = 1;
6825
6826      sched_data.types[5] = sched_data.types[3];
6827      sched_data.types[4] = sched_data.types[2];
6828      sched_data.types[3] = sched_data.types[1];
6829      sched_data.insns[5] = sched_data.insns[3];
6830      sched_data.insns[4] = sched_data.insns[2];
6831      sched_data.insns[3] = sched_data.insns[1];
6832      sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6833      sched_data.cur += 2;
6834      sched_data.first_slot = 3;
6835      for (i = 0; i < NR_PACKETS; i++)
6836	{
6837	  const struct ia64_packet *p = packets + i;
6838	  if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6839	    {
6840	      sched_data.packet = p;
6841	      break;
6842	    }
6843	}
6844      rotate_one_bundle (sched_verbose ? dump : NULL);
6845
6846      best = 6;
6847      for (i = 0; i < NR_PACKETS; i++)
6848	{
6849	  const struct ia64_packet *p = packets + i;
6850	  int split = get_split (p, sched_data.first_slot);
6851	  int next;
6852
6853	  /* Disallow multiway branches here.  */
6854	  if (p->t[1] == TYPE_B)
6855	    continue;
6856
6857	  if (packet_matches_p (p, split, &next) && next < best)
6858	    {
6859	      best = next;
6860	      sched_data.packet = p;
6861	      sched_data.split = split;
6862	    }
6863	}
6864      if (best == 6)
6865	abort ();
6866    }
6867
6868  if (*pn_ready > 0)
6869    {
6870      int more = ia64_internal_sched_reorder (dump, sched_verbose,
6871					      ready, pn_ready, 1,
6872					      clock_var);
6873      if (more)
6874	return more;
6875      /* Did we schedule a stop?  If so, finish this cycle.  */
6876      if (sched_data.cur == sched_data.first_slot)
6877	return 0;
6878    }
6879
6880  if (sched_verbose)
6881    fprintf (dump, "//   Can't issue more this cycle; updating type array.\n");
6882
6883  cycle_end_fill_slots (sched_verbose ? dump : NULL);
6884  if (sched_verbose)
6885    dump_current_packet (dump);
6886  return 0;
6887}
6888
6889/* We are about to issue INSN.  Return the number of insns left on the
6890   ready queue that can be issued this cycle.  */
6891
6892static int
6893ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6894     FILE *dump;
6895     int sched_verbose;
6896     rtx insn;
6897     int can_issue_more ATTRIBUTE_UNUSED;
6898{
6899  enum attr_type t = ia64_safe_type (insn);
6900
6901  if (sched_data.last_was_stop)
6902    {
6903      int t = sched_data.first_slot;
6904      if (t == 0)
6905	t = 3;
6906      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6907      init_insn_group_barriers ();
6908      sched_data.last_was_stop = 0;
6909    }
6910
6911  if (t == TYPE_UNKNOWN)
6912    {
6913      if (sched_verbose)
6914	fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6915      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6916	  || asm_noperands (PATTERN (insn)) >= 0)
6917	{
6918	  /* This must be some kind of asm.  Clear the scheduling state.  */
6919	  rotate_two_bundles (sched_verbose ? dump : NULL);
6920	  if (ia64_final_schedule)
6921	    group_barrier_needed_p (insn);
6922	}
6923      return 1;
6924    }
6925
6926  /* This is _not_ just a sanity check.  group_barrier_needed_p will update
6927     important state info.  Don't delete this test.  */
6928  if (ia64_final_schedule
6929      && group_barrier_needed_p (insn))
6930    abort ();
6931
6932  sched_data.stopbit[sched_data.cur] = 0;
6933  sched_data.insns[sched_data.cur] = insn;
6934  sched_data.types[sched_data.cur] = t;
6935
6936  sched_data.cur++;
6937  if (sched_verbose)
6938    fprintf (dump, "// Scheduling insn %d of type %s\n",
6939	     INSN_UID (insn), type_names[t]);
6940
6941  if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6942    {
6943      schedule_stop (sched_verbose ? dump : NULL);
6944      sched_data.last_was_stop = 1;
6945    }
6946
6947  return 1;
6948}
6949
6950/* Free data allocated by ia64_sched_init.  */
6951
6952static void
6953ia64_sched_finish (dump, sched_verbose)
6954     FILE *dump;
6955     int sched_verbose;
6956{
6957  if (sched_verbose)
6958    fprintf (dump, "// Finishing schedule.\n");
6959  rotate_two_bundles (NULL);
6960  free (sched_types);
6961  free (sched_ready);
6962}
6963
6964/* Emit pseudo-ops for the assembler to describe predicate relations.
6965   At present this assumes that we only consider predicate pairs to
6966   be mutex, and that the assembler can deduce proper values from
6967   straight-line code.  */
6968
6969static void
6970emit_predicate_relation_info ()
6971{
6972  basic_block bb;
6973
6974  FOR_EACH_BB_REVERSE (bb)
6975    {
6976      int r;
6977      rtx head = bb->head;
6978
6979      /* We only need such notes at code labels.  */
6980      if (GET_CODE (head) != CODE_LABEL)
6981	continue;
6982      if (GET_CODE (NEXT_INSN (head)) == NOTE
6983	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6984	head = NEXT_INSN (head);
6985
6986      for (r = PR_REG (0); r < PR_REG (64); r += 2)
6987	if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6988	  {
6989	    rtx p = gen_rtx_REG (BImode, r);
6990	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6991	    if (head == bb->end)
6992	      bb->end = n;
6993	    head = n;
6994	  }
6995    }
6996
6997  /* Look for conditional calls that do not return, and protect predicate
6998     relations around them.  Otherwise the assembler will assume the call
6999     returns, and complain about uses of call-clobbered predicates after
7000     the call.  */
7001  FOR_EACH_BB_REVERSE (bb)
7002    {
7003      rtx insn = bb->head;
7004
7005      while (1)
7006	{
7007	  if (GET_CODE (insn) == CALL_INSN
7008	      && GET_CODE (PATTERN (insn)) == COND_EXEC
7009	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7010	    {
7011	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7012	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7013	      if (bb->head == insn)
7014		bb->head = b;
7015	      if (bb->end == insn)
7016		bb->end = a;
7017	    }
7018
7019	  if (insn == bb->end)
7020	    break;
7021	  insn = NEXT_INSN (insn);
7022	}
7023    }
7024}
7025
7026/* Generate a NOP instruction of type T.  We will never generate L type
7027   nops.  */
7028
7029static rtx
7030gen_nop_type (t)
7031     enum attr_type t;
7032{
7033  switch (t)
7034    {
7035    case TYPE_M:
7036      return gen_nop_m ();
7037    case TYPE_I:
7038      return gen_nop_i ();
7039    case TYPE_B:
7040      return gen_nop_b ();
7041    case TYPE_F:
7042      return gen_nop_f ();
7043    case TYPE_X:
7044      return gen_nop_x ();
7045    default:
7046      abort ();
7047    }
7048}
7049
7050/* After the last scheduling pass, fill in NOPs.  It's easier to do this
7051   here than while scheduling.  */
7052
7053static void
7054ia64_emit_nops ()
7055{
7056  rtx insn;
7057  const struct bundle *b = 0;
7058  int bundle_pos = 0;
7059
7060  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7061    {
7062      rtx pat;
7063      enum attr_type t;
7064      pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
7065      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
7066	continue;
7067      if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
7068	  || GET_CODE (insn) == CODE_LABEL)
7069	{
7070	  if (b)
7071	    while (bundle_pos < 3)
7072	      {
7073		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7074		bundle_pos++;
7075	      }
7076	  if (GET_CODE (insn) != CODE_LABEL)
7077	    b = bundle + INTVAL (XVECEXP (pat, 0, 0));
7078	  else
7079	    b = 0;
7080	  bundle_pos = 0;
7081	  continue;
7082	}
7083      else if (GET_CODE (pat) == UNSPEC_VOLATILE
7084	       && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
7085	{
7086	  int t = INTVAL (XVECEXP (pat, 0, 0));
7087	  if (b)
7088	    while (bundle_pos < t)
7089	      {
7090		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7091		bundle_pos++;
7092	      }
7093	  continue;
7094	}
7095
7096      if (bundle_pos == 3)
7097	b = 0;
7098
7099      if (b && INSN_P (insn))
7100	{
7101	  t = ia64_safe_type (insn);
7102	  if (asm_noperands (PATTERN (insn)) >= 0
7103	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)
7104	    {
7105	      while (bundle_pos < 3)
7106		{
7107		  if (b->t[bundle_pos] != TYPE_L)
7108		    emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7109		  bundle_pos++;
7110		}
7111	      continue;
7112	    }
7113
7114	  if (t == TYPE_UNKNOWN)
7115	    continue;
7116	  while (bundle_pos < 3)
7117	    {
7118	      if (t == b->t[bundle_pos]
7119		  || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
7120				      || b->t[bundle_pos] == TYPE_I)))
7121		break;
7122
7123	      emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7124	      bundle_pos++;
7125	    }
7126	  if (bundle_pos < 3)
7127	    bundle_pos++;
7128	}
7129    }
7130}
7131
7132/* Perform machine dependent operations on the rtl chain INSNS.  */
7133
7134void
7135ia64_reorg (insns)
7136     rtx insns;
7137{
7138  /* We are freeing block_for_insn in the toplev to keep compatibility
7139     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7140  compute_bb_for_insn ();
7141
7142  /* If optimizing, we'll have split before scheduling.  */
7143  if (optimize == 0)
7144    split_all_insns (0);
7145
7146  /* ??? update_life_info_in_dirty_blocks fails to terminate during
7147     non-optimizing bootstrap.  */
7148  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7149
7150  if (ia64_flag_schedule_insns2)
7151    {
7152      timevar_push (TV_SCHED2);
7153      ia64_final_schedule = 1;
7154      schedule_ebbs (rtl_dump_file);
7155      ia64_final_schedule = 0;
7156      timevar_pop (TV_SCHED2);
7157
7158      /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
7159	 place as they were during scheduling.  */
7160      emit_insn_group_barriers (rtl_dump_file, insns);
7161      ia64_emit_nops ();
7162    }
7163  else
7164    emit_all_insn_group_barriers (rtl_dump_file, insns);
7165
7166  /* A call must not be the last instruction in a function, so that the
7167     return address is still within the function, so that unwinding works
7168     properly.  Note that IA-64 differs from dwarf2 on this point.  */
7169  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7170    {
7171      rtx insn;
7172      int saw_stop = 0;
7173
7174      insn = get_last_insn ();
7175      if (! INSN_P (insn))
7176        insn = prev_active_insn (insn);
7177      if (GET_CODE (insn) == INSN
7178	  && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7179	  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7180	{
7181	  saw_stop = 1;
7182	  insn = prev_active_insn (insn);
7183	}
7184      if (GET_CODE (insn) == CALL_INSN)
7185	{
7186	  if (! saw_stop)
7187	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7188	  emit_insn (gen_break_f ());
7189	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7190	}
7191    }
7192
7193  fixup_errata ();
7194  emit_predicate_relation_info ();
7195}
7196
7197/* Return true if REGNO is used by the epilogue.  */
7198
7199int
7200ia64_epilogue_uses (regno)
7201     int regno;
7202{
7203  switch (regno)
7204    {
7205    case R_GR (1):
7206      /* With a call to a function in another module, we will write a new
7207	 value to "gp".  After returning from such a call, we need to make
7208	 sure the function restores the original gp-value, even if the
7209	 function itself does not use the gp anymore.  */
7210      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7211
7212    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7213    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7214      /* For functions defined with the syscall_linkage attribute, all
7215	 input registers are marked as live at all function exits.  This
7216	 prevents the register allocator from using the input registers,
7217	 which in turn makes it possible to restart a system call after
7218	 an interrupt without having to save/restore the input registers.
7219	 This also prevents kernel data from leaking to application code.  */
7220      return lookup_attribute ("syscall_linkage",
7221	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7222
7223    case R_BR (0):
7224      /* Conditional return patterns can't represent the use of `b0' as
7225         the return address, so we force the value live this way.  */
7226      return 1;
7227
7228    case AR_PFS_REGNUM:
7229      /* Likewise for ar.pfs, which is used by br.ret.  */
7230      return 1;
7231
7232    default:
7233      return 0;
7234    }
7235}
7236
7237/* Return true if REGNO is used by the frame unwinder.  */
7238
7239int
7240ia64_eh_uses (regno)
7241     int regno;
7242{
7243  if (! reload_completed)
7244    return 0;
7245
7246  if (current_frame_info.reg_save_b0
7247      && regno == current_frame_info.reg_save_b0)
7248    return 1;
7249  if (current_frame_info.reg_save_pr
7250      && regno == current_frame_info.reg_save_pr)
7251    return 1;
7252  if (current_frame_info.reg_save_ar_pfs
7253      && regno == current_frame_info.reg_save_ar_pfs)
7254    return 1;
7255  if (current_frame_info.reg_save_ar_unat
7256      && regno == current_frame_info.reg_save_ar_unat)
7257    return 1;
7258  if (current_frame_info.reg_save_ar_lc
7259      && regno == current_frame_info.reg_save_ar_lc)
7260    return 1;
7261
7262  return 0;
7263}
7264
7265/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7266
7267   We add @ to the name if this goes in small data/bss.  We can only put
7268   a variable in small data/bss if it is defined in this module or a module
7269   that we are statically linked with.  We can't check the second condition,
7270   but TREE_STATIC gives us the first one.  */
7271
7272/* ??? If we had IPA, we could check the second condition.  We could support
7273   programmer added section attributes if the variable is not defined in this
7274   module.  */
7275
7276/* ??? See the v850 port for a cleaner way to do this.  */
7277
7278/* ??? We could also support own long data here.  Generating movl/add/ld8
7279   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7280   code faster because there is one less load.  This also includes incomplete
7281   types which can't go in sdata/sbss.  */
7282
7283static bool
7284ia64_in_small_data_p (exp)
7285     tree exp;
7286{
7287  if (TARGET_NO_SDATA)
7288    return false;
7289
7290  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7291    {
7292      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7293      if (strcmp (section, ".sdata") == 0
7294	  || strcmp (section, ".sbss") == 0)
7295	return true;
7296    }
7297  else
7298    {
7299      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7300
7301      /* If this is an incomplete type with size 0, then we can't put it
7302	 in sdata because it might be too big when completed.  */
7303      if (size > 0 && size <= ia64_section_threshold)
7304	return true;
7305    }
7306
7307  return false;
7308}
7309
7310static void
7311ia64_encode_section_info (decl, first)
7312     tree decl;
7313     int first ATTRIBUTE_UNUSED;
7314{
7315  const char *symbol_str;
7316  bool is_local;
7317  rtx symbol;
7318  char encoding = 0;
7319
7320  if (TREE_CODE (decl) == FUNCTION_DECL)
7321    {
7322      SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7323      return;
7324    }
7325
7326  /* Careful not to prod global register variables.  */
7327  if (TREE_CODE (decl) != VAR_DECL
7328      || GET_CODE (DECL_RTL (decl)) != MEM
7329      || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7330    return;
7331
7332  symbol = XEXP (DECL_RTL (decl), 0);
7333  symbol_str = XSTR (symbol, 0);
7334
7335  is_local = (*targetm.binds_local_p) (decl);
7336
7337  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7338    encoding = " GLil"[decl_tls_model (decl)];
7339  /* Determine if DECL will wind up in .sdata/.sbss.  */
7340  else if (is_local && ia64_in_small_data_p (decl))
7341    encoding = 's';
7342
7343  /* Finally, encode this into the symbol string.  */
7344  if (encoding)
7345    {
7346      char *newstr;
7347      size_t len;
7348
7349      if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7350	{
7351	  if (encoding == symbol_str[1])
7352	    return;
7353	  /* ??? Sdata became thread or thread becaome not thread.  Lose.  */
7354	  abort ();
7355	}
7356
7357      len = strlen (symbol_str);
7358      newstr = alloca (len + 3);
7359      newstr[0] = ENCODE_SECTION_INFO_CHAR;
7360      newstr[1] = encoding;
7361      memcpy (newstr + 2, symbol_str, len + 1);
7362
7363      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7364    }
7365
7366  /* This decl is marked as being in small data/bss but it shouldn't be;
7367     one likely explanation for this is that the decl has been moved into
7368     a different section from the one it was in when encode_section_info
7369     was first called.  Remove the encoding.  */
7370  else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7371    XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7372}
7373
7374static const char *
7375ia64_strip_name_encoding (str)
7376     const char *str;
7377{
7378  if (str[0] == ENCODE_SECTION_INFO_CHAR)
7379    str += 2;
7380  if (str[0] == '*')
7381    str++;
7382  return str;
7383}
7384
7385/* True if it is OK to do sibling call optimization for the specified
7386   call expression EXP.  DECL will be the called function, or NULL if
7387   this is an indirect call.  */
7388bool
7389ia64_function_ok_for_sibcall (decl)
7390     tree decl;
7391{
7392  /* We must always return with our current GP.  This means we can
7393     only sibcall to functions defined in the current module.  */
7394  return decl && (*targetm.binds_local_p) (decl);
7395}
7396
7397/* Output assembly directives for prologue regions.  */
7398
7399/* The current basic block number.  */
7400
7401static bool last_block;
7402
7403/* True if we need a copy_state command at the start of the next block.  */
7404
7405static bool need_copy_state;
7406
7407/* The function emits unwind directives for the start of an epilogue.  */
7408
7409static void
7410process_epilogue ()
7411{
7412  /* If this isn't the last block of the function, then we need to label the
7413     current state, and copy it back in at the start of the next block.  */
7414
7415  if (!last_block)
7416    {
7417      fprintf (asm_out_file, "\t.label_state 1\n");
7418      need_copy_state = true;
7419    }
7420
7421  fprintf (asm_out_file, "\t.restore sp\n");
7422}
7423
7424/* This function processes a SET pattern looking for specific patterns
7425   which result in emitting an assembly directive required for unwinding.  */
7426
7427static int
7428process_set (asm_out_file, pat)
7429     FILE *asm_out_file;
7430     rtx pat;
7431{
7432  rtx src = SET_SRC (pat);
7433  rtx dest = SET_DEST (pat);
7434  int src_regno, dest_regno;
7435
7436  /* Look for the ALLOC insn.  */
7437  if (GET_CODE (src) == UNSPEC_VOLATILE
7438      && XINT (src, 1) == UNSPECV_ALLOC
7439      && GET_CODE (dest) == REG)
7440    {
7441      dest_regno = REGNO (dest);
7442
7443      /* If this isn't the final destination for ar.pfs, the alloc
7444	 shouldn't have been marked frame related.  */
7445      if (dest_regno != current_frame_info.reg_save_ar_pfs)
7446	abort ();
7447
7448      fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7449	       ia64_dbx_register_number (dest_regno));
7450      return 1;
7451    }
7452
7453  /* Look for SP = ....  */
7454  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7455    {
7456      if (GET_CODE (src) == PLUS)
7457        {
7458	  rtx op0 = XEXP (src, 0);
7459	  rtx op1 = XEXP (src, 1);
7460	  if (op0 == dest && GET_CODE (op1) == CONST_INT)
7461	    {
7462	      if (INTVAL (op1) < 0)
7463		{
7464		  fputs ("\t.fframe ", asm_out_file);
7465		  fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7466			   -INTVAL (op1));
7467		  fputc ('\n', asm_out_file);
7468		}
7469	      else
7470		process_epilogue ();
7471	    }
7472	  else
7473	    abort ();
7474	}
7475      else if (GET_CODE (src) == REG
7476	       && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7477	process_epilogue ();
7478      else
7479	abort ();
7480
7481      return 1;
7482    }
7483
7484  /* Register move we need to look at.  */
7485  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7486    {
7487      src_regno = REGNO (src);
7488      dest_regno = REGNO (dest);
7489
7490      switch (src_regno)
7491	{
7492	case BR_REG (0):
7493	  /* Saving return address pointer.  */
7494	  if (dest_regno != current_frame_info.reg_save_b0)
7495	    abort ();
7496	  fprintf (asm_out_file, "\t.save rp, r%d\n",
7497		   ia64_dbx_register_number (dest_regno));
7498	  return 1;
7499
7500	case PR_REG (0):
7501	  if (dest_regno != current_frame_info.reg_save_pr)
7502	    abort ();
7503	  fprintf (asm_out_file, "\t.save pr, r%d\n",
7504		   ia64_dbx_register_number (dest_regno));
7505	  return 1;
7506
7507	case AR_UNAT_REGNUM:
7508	  if (dest_regno != current_frame_info.reg_save_ar_unat)
7509	    abort ();
7510	  fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7511		   ia64_dbx_register_number (dest_regno));
7512	  return 1;
7513
7514	case AR_LC_REGNUM:
7515	  if (dest_regno != current_frame_info.reg_save_ar_lc)
7516	    abort ();
7517	  fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7518		   ia64_dbx_register_number (dest_regno));
7519	  return 1;
7520
7521	case STACK_POINTER_REGNUM:
7522	  if (dest_regno != HARD_FRAME_POINTER_REGNUM
7523	      || ! frame_pointer_needed)
7524	    abort ();
7525	  fprintf (asm_out_file, "\t.vframe r%d\n",
7526		   ia64_dbx_register_number (dest_regno));
7527	  return 1;
7528
7529	default:
7530	  /* Everything else should indicate being stored to memory.  */
7531	  abort ();
7532	}
7533    }
7534
7535  /* Memory store we need to look at.  */
7536  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7537    {
7538      long off;
7539      rtx base;
7540      const char *saveop;
7541
7542      if (GET_CODE (XEXP (dest, 0)) == REG)
7543	{
7544	  base = XEXP (dest, 0);
7545	  off = 0;
7546	}
7547      else if (GET_CODE (XEXP (dest, 0)) == PLUS
7548	       && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7549	{
7550	  base = XEXP (XEXP (dest, 0), 0);
7551	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
7552	}
7553      else
7554	abort ();
7555
7556      if (base == hard_frame_pointer_rtx)
7557	{
7558	  saveop = ".savepsp";
7559	  off = - off;
7560	}
7561      else if (base == stack_pointer_rtx)
7562	saveop = ".savesp";
7563      else
7564	abort ();
7565
7566      src_regno = REGNO (src);
7567      switch (src_regno)
7568	{
7569	case BR_REG (0):
7570	  if (current_frame_info.reg_save_b0 != 0)
7571	    abort ();
7572	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7573	  return 1;
7574
7575	case PR_REG (0):
7576	  if (current_frame_info.reg_save_pr != 0)
7577	    abort ();
7578	  fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7579	  return 1;
7580
7581	case AR_LC_REGNUM:
7582	  if (current_frame_info.reg_save_ar_lc != 0)
7583	    abort ();
7584	  fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7585	  return 1;
7586
7587	case AR_PFS_REGNUM:
7588	  if (current_frame_info.reg_save_ar_pfs != 0)
7589	    abort ();
7590	  fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7591	  return 1;
7592
7593	case AR_UNAT_REGNUM:
7594	  if (current_frame_info.reg_save_ar_unat != 0)
7595	    abort ();
7596	  fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7597	  return 1;
7598
7599	case GR_REG (4):
7600	case GR_REG (5):
7601	case GR_REG (6):
7602	case GR_REG (7):
7603	  fprintf (asm_out_file, "\t.save.g 0x%x\n",
7604		   1 << (src_regno - GR_REG (4)));
7605	  return 1;
7606
7607	case BR_REG (1):
7608	case BR_REG (2):
7609	case BR_REG (3):
7610	case BR_REG (4):
7611	case BR_REG (5):
7612	  fprintf (asm_out_file, "\t.save.b 0x%x\n",
7613		   1 << (src_regno - BR_REG (1)));
7614	  return 1;
7615
7616	case FR_REG (2):
7617	case FR_REG (3):
7618	case FR_REG (4):
7619	case FR_REG (5):
7620	  fprintf (asm_out_file, "\t.save.f 0x%x\n",
7621		   1 << (src_regno - FR_REG (2)));
7622	  return 1;
7623
7624	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7625	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7626	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7627	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7628	  fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7629		   1 << (src_regno - FR_REG (12)));
7630	  return 1;
7631
7632	default:
7633	  return 0;
7634	}
7635    }
7636
7637  return 0;
7638}
7639
7640
7641/* This function looks at a single insn and emits any directives
7642   required to unwind this insn.  */
7643void
7644process_for_unwind_directive (asm_out_file, insn)
7645     FILE *asm_out_file;
7646     rtx insn;
7647{
7648  if (flag_unwind_tables
7649      || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7650    {
7651      rtx pat;
7652
7653      if (GET_CODE (insn) == NOTE
7654	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7655	{
7656	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7657
7658	  /* Restore unwind state from immediately before the epilogue.  */
7659	  if (need_copy_state)
7660	    {
7661	      fprintf (asm_out_file, "\t.body\n");
7662	      fprintf (asm_out_file, "\t.copy_state 1\n");
7663	      need_copy_state = false;
7664	    }
7665	}
7666
7667      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7668	return;
7669
7670      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7671      if (pat)
7672	pat = XEXP (pat, 0);
7673      else
7674	pat = PATTERN (insn);
7675
7676      switch (GET_CODE (pat))
7677        {
7678	case SET:
7679	  process_set (asm_out_file, pat);
7680	  break;
7681
7682	case PARALLEL:
7683	  {
7684	    int par_index;
7685	    int limit = XVECLEN (pat, 0);
7686	    for (par_index = 0; par_index < limit; par_index++)
7687	      {
7688		rtx x = XVECEXP (pat, 0, par_index);
7689		if (GET_CODE (x) == SET)
7690		  process_set (asm_out_file, x);
7691	      }
7692	    break;
7693	  }
7694
7695	default:
7696	  abort ();
7697	}
7698    }
7699}
7700
7701
7702void
7703ia64_init_builtins ()
7704{
7705  tree psi_type_node = build_pointer_type (integer_type_node);
7706  tree pdi_type_node = build_pointer_type (long_integer_type_node);
7707
7708  /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7709  tree si_ftype_psi_si_si
7710    = build_function_type_list (integer_type_node,
7711				psi_type_node, integer_type_node,
7712				integer_type_node, NULL_TREE);
7713
7714  /* __sync_val_compare_and_swap_di */
7715  tree di_ftype_pdi_di_di
7716    = build_function_type_list (long_integer_type_node,
7717				pdi_type_node, long_integer_type_node,
7718				long_integer_type_node, NULL_TREE);
7719  /* __sync_bool_compare_and_swap_di */
7720  tree si_ftype_pdi_di_di
7721    = build_function_type_list (integer_type_node,
7722				pdi_type_node, long_integer_type_node,
7723				long_integer_type_node, NULL_TREE);
7724  /* __sync_synchronize */
7725  tree void_ftype_void
7726    = build_function_type (void_type_node, void_list_node);
7727
7728  /* __sync_lock_test_and_set_si */
7729  tree si_ftype_psi_si
7730    = build_function_type_list (integer_type_node,
7731				psi_type_node, integer_type_node, NULL_TREE);
7732
7733  /* __sync_lock_test_and_set_di */
7734  tree di_ftype_pdi_di
7735    = build_function_type_list (long_integer_type_node,
7736				pdi_type_node, long_integer_type_node,
7737				NULL_TREE);
7738
7739  /* __sync_lock_release_si */
7740  tree void_ftype_psi
7741    = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7742
7743  /* __sync_lock_release_di */
7744  tree void_ftype_pdi
7745    = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7746
7747#define def_builtin(name, type, code) \
7748  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7749
7750  def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7751	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7752  def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7753	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7754  def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7755	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7756  def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7757	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7758
7759  def_builtin ("__sync_synchronize", void_ftype_void,
7760	       IA64_BUILTIN_SYNCHRONIZE);
7761
7762  def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7763	       IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7764  def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7765	       IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7766  def_builtin ("__sync_lock_release_si", void_ftype_psi,
7767	       IA64_BUILTIN_LOCK_RELEASE_SI);
7768  def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7769	       IA64_BUILTIN_LOCK_RELEASE_DI);
7770
7771  def_builtin ("__builtin_ia64_bsp",
7772	       build_function_type (ptr_type_node, void_list_node),
7773	       IA64_BUILTIN_BSP);
7774
7775  def_builtin ("__builtin_ia64_flushrs",
7776	       build_function_type (void_type_node, void_list_node),
7777	       IA64_BUILTIN_FLUSHRS);
7778
7779  def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7780	       IA64_BUILTIN_FETCH_AND_ADD_SI);
7781  def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7782	       IA64_BUILTIN_FETCH_AND_SUB_SI);
7783  def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7784	       IA64_BUILTIN_FETCH_AND_OR_SI);
7785  def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7786	       IA64_BUILTIN_FETCH_AND_AND_SI);
7787  def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7788	       IA64_BUILTIN_FETCH_AND_XOR_SI);
7789  def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7790	       IA64_BUILTIN_FETCH_AND_NAND_SI);
7791
7792  def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7793	       IA64_BUILTIN_ADD_AND_FETCH_SI);
7794  def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7795	       IA64_BUILTIN_SUB_AND_FETCH_SI);
7796  def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7797	       IA64_BUILTIN_OR_AND_FETCH_SI);
7798  def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7799	       IA64_BUILTIN_AND_AND_FETCH_SI);
7800  def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7801	       IA64_BUILTIN_XOR_AND_FETCH_SI);
7802  def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7803	       IA64_BUILTIN_NAND_AND_FETCH_SI);
7804
7805  def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7806	       IA64_BUILTIN_FETCH_AND_ADD_DI);
7807  def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7808	       IA64_BUILTIN_FETCH_AND_SUB_DI);
7809  def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7810	       IA64_BUILTIN_FETCH_AND_OR_DI);
7811  def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7812	       IA64_BUILTIN_FETCH_AND_AND_DI);
7813  def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7814	       IA64_BUILTIN_FETCH_AND_XOR_DI);
7815  def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7816	       IA64_BUILTIN_FETCH_AND_NAND_DI);
7817
7818  def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7819	       IA64_BUILTIN_ADD_AND_FETCH_DI);
7820  def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7821	       IA64_BUILTIN_SUB_AND_FETCH_DI);
7822  def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7823	       IA64_BUILTIN_OR_AND_FETCH_DI);
7824  def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7825	       IA64_BUILTIN_AND_AND_FETCH_DI);
7826  def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7827	       IA64_BUILTIN_XOR_AND_FETCH_DI);
7828  def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7829	       IA64_BUILTIN_NAND_AND_FETCH_DI);
7830
7831#undef def_builtin
7832}
7833
7834/* Expand fetch_and_op intrinsics.  The basic code sequence is:
7835
7836     mf
7837     tmp = [ptr];
7838     do {
7839       ret = tmp;
7840       ar.ccv = tmp;
7841       tmp <op>= value;
7842       cmpxchgsz.acq tmp = [ptr], tmp
7843     } while (tmp != ret)
7844*/
7845
7846static rtx
7847ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7848     optab binoptab;
7849     enum machine_mode mode;
7850     tree arglist;
7851     rtx target;
7852{
7853  rtx ret, label, tmp, ccv, insn, mem, value;
7854  tree arg0, arg1;
7855
7856  arg0 = TREE_VALUE (arglist);
7857  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7858  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7859#ifdef POINTERS_EXTEND_UNSIGNED
7860  if (GET_MODE(mem) != Pmode)
7861    mem = convert_memory_address (Pmode, mem);
7862#endif
7863  value = expand_expr (arg1, NULL_RTX, mode, 0);
7864
7865  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7866  MEM_VOLATILE_P (mem) = 1;
7867
7868  if (target && register_operand (target, mode))
7869    ret = target;
7870  else
7871    ret = gen_reg_rtx (mode);
7872
7873  emit_insn (gen_mf ());
7874
7875  /* Special case for fetchadd instructions.  */
7876  if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7877    {
7878      if (mode == SImode)
7879        insn = gen_fetchadd_acq_si (ret, mem, value);
7880      else
7881        insn = gen_fetchadd_acq_di (ret, mem, value);
7882      emit_insn (insn);
7883      return ret;
7884    }
7885
7886  tmp = gen_reg_rtx (mode);
7887  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7888  emit_move_insn (tmp, mem);
7889
7890  label = gen_label_rtx ();
7891  emit_label (label);
7892  emit_move_insn (ret, tmp);
7893  emit_move_insn (ccv, tmp);
7894
7895  /* Perform the specific operation.  Special case NAND by noticing
7896     one_cmpl_optab instead.  */
7897  if (binoptab == one_cmpl_optab)
7898    {
7899      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7900      binoptab = and_optab;
7901    }
7902  tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7903
7904  if (mode == SImode)
7905    insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7906  else
7907    insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7908  emit_insn (insn);
7909
7910  emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7911
7912  return ret;
7913}
7914
7915/* Expand op_and_fetch intrinsics.  The basic code sequence is:
7916
7917     mf
7918     tmp = [ptr];
7919     do {
7920       old = tmp;
7921       ar.ccv = tmp;
7922       ret = tmp <op> value;
7923       cmpxchgsz.acq tmp = [ptr], ret
7924     } while (tmp != old)
7925*/
7926
7927static rtx
7928ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7929     optab binoptab;
7930     enum machine_mode mode;
7931     tree arglist;
7932     rtx target;
7933{
7934  rtx old, label, tmp, ret, ccv, insn, mem, value;
7935  tree arg0, arg1;
7936
7937  arg0 = TREE_VALUE (arglist);
7938  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7939  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7940#ifdef POINTERS_EXTEND_UNSIGNED
7941  if (GET_MODE(mem) != Pmode)
7942    mem = convert_memory_address (Pmode, mem);
7943#endif
7944
7945  value = expand_expr (arg1, NULL_RTX, mode, 0);
7946
7947  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7948  MEM_VOLATILE_P (mem) = 1;
7949
7950  if (target && ! register_operand (target, mode))
7951    target = NULL_RTX;
7952
7953  emit_insn (gen_mf ());
7954  tmp = gen_reg_rtx (mode);
7955  old = gen_reg_rtx (mode);
7956  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7957
7958  emit_move_insn (tmp, mem);
7959
7960  label = gen_label_rtx ();
7961  emit_label (label);
7962  emit_move_insn (old, tmp);
7963  emit_move_insn (ccv, tmp);
7964
7965  /* Perform the specific operation.  Special case NAND by noticing
7966     one_cmpl_optab instead.  */
7967  if (binoptab == one_cmpl_optab)
7968    {
7969      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7970      binoptab = and_optab;
7971    }
7972  ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7973
7974  if (mode == SImode)
7975    insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7976  else
7977    insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7978  emit_insn (insn);
7979
7980  emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7981
7982  return ret;
7983}
7984
7985/* Expand val_ and bool_compare_and_swap.  For val_ we want:
7986
7987     ar.ccv = oldval
7988     mf
7989     cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7990     return ret
7991
7992   For bool_ it's the same except return ret == oldval.
7993*/
7994
7995static rtx
7996ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
7997     enum machine_mode rmode;
7998     enum machine_mode mode;
7999     int boolp;
8000     tree arglist;
8001     rtx target;
8002{
8003  tree arg0, arg1, arg2;
8004  rtx mem, old, new, ccv, tmp, insn;
8005
8006  arg0 = TREE_VALUE (arglist);
8007  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8008  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8009  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8010  old = expand_expr (arg1, NULL_RTX, mode, 0);
8011  new = expand_expr (arg2, NULL_RTX, mode, 0);
8012
8013  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8014  MEM_VOLATILE_P (mem) = 1;
8015
8016  if (! register_operand (old, mode))
8017    old = copy_to_mode_reg (mode, old);
8018  if (! register_operand (new, mode))
8019    new = copy_to_mode_reg (mode, new);
8020
8021  if (! boolp && target && register_operand (target, mode))
8022    tmp = target;
8023  else
8024    tmp = gen_reg_rtx (mode);
8025
8026  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8027  if (mode == DImode)
8028    emit_move_insn (ccv, old);
8029  else
8030    {
8031      rtx ccvtmp = gen_reg_rtx (DImode);
8032      emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
8033      emit_move_insn (ccv, ccvtmp);
8034    }
8035  emit_insn (gen_mf ());
8036  if (mode == SImode)
8037    insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8038  else
8039    insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8040  emit_insn (insn);
8041
8042  if (boolp)
8043    {
8044      if (! target)
8045	target = gen_reg_rtx (rmode);
8046      return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8047    }
8048  else
8049    return tmp;
8050}
8051
8052/* Expand lock_test_and_set.  I.e. `xchgsz ret = [ptr], new'.  */
8053
8054static rtx
8055ia64_expand_lock_test_and_set (mode, arglist, target)
8056     enum machine_mode mode;
8057     tree arglist;
8058     rtx target;
8059{
8060  tree arg0, arg1;
8061  rtx mem, new, ret, insn;
8062
8063  arg0 = TREE_VALUE (arglist);
8064  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8065  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8066  new = expand_expr (arg1, NULL_RTX, mode, 0);
8067
8068  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8069  MEM_VOLATILE_P (mem) = 1;
8070  if (! register_operand (new, mode))
8071    new = copy_to_mode_reg (mode, new);
8072
8073  if (target && register_operand (target, mode))
8074    ret = target;
8075  else
8076    ret = gen_reg_rtx (mode);
8077
8078  if (mode == SImode)
8079    insn = gen_xchgsi (ret, mem, new);
8080  else
8081    insn = gen_xchgdi (ret, mem, new);
8082  emit_insn (insn);
8083
8084  return ret;
8085}
8086
8087/* Expand lock_release.  I.e. `stsz.rel [ptr] = r0'.  */
8088
8089static rtx
8090ia64_expand_lock_release (mode, arglist, target)
8091     enum machine_mode mode;
8092     tree arglist;
8093     rtx target ATTRIBUTE_UNUSED;
8094{
8095  tree arg0;
8096  rtx mem;
8097
8098  arg0 = TREE_VALUE (arglist);
8099  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8100
8101  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8102  MEM_VOLATILE_P (mem) = 1;
8103
8104  emit_move_insn (mem, const0_rtx);
8105
8106  return const0_rtx;
8107}
8108
8109rtx
8110ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8111     tree exp;
8112     rtx target;
8113     rtx subtarget ATTRIBUTE_UNUSED;
8114     enum machine_mode mode ATTRIBUTE_UNUSED;
8115     int ignore ATTRIBUTE_UNUSED;
8116{
8117  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8118  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8119  tree arglist = TREE_OPERAND (exp, 1);
8120  enum machine_mode rmode = VOIDmode;
8121
8122  switch (fcode)
8123    {
8124    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8125    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8126      mode = SImode;
8127      rmode = SImode;
8128      break;
8129
8130    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8131    case IA64_BUILTIN_LOCK_RELEASE_SI:
8132    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8133    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8134    case IA64_BUILTIN_FETCH_AND_OR_SI:
8135    case IA64_BUILTIN_FETCH_AND_AND_SI:
8136    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8137    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8138    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8139    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8140    case IA64_BUILTIN_OR_AND_FETCH_SI:
8141    case IA64_BUILTIN_AND_AND_FETCH_SI:
8142    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8143    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8144      mode = SImode;
8145      break;
8146
8147    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8148      mode = DImode;
8149      rmode = SImode;
8150      break;
8151
8152    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8153      mode = DImode;
8154      rmode = DImode;
8155      break;
8156
8157    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8158    case IA64_BUILTIN_LOCK_RELEASE_DI:
8159    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8160    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8161    case IA64_BUILTIN_FETCH_AND_OR_DI:
8162    case IA64_BUILTIN_FETCH_AND_AND_DI:
8163    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8164    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8165    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8166    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8167    case IA64_BUILTIN_OR_AND_FETCH_DI:
8168    case IA64_BUILTIN_AND_AND_FETCH_DI:
8169    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8170    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8171      mode = DImode;
8172      break;
8173
8174    default:
8175      break;
8176    }
8177
8178  switch (fcode)
8179    {
8180    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8181    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8182      return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8183					   target);
8184
8185    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8186    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8187      return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8188					   target);
8189
8190    case IA64_BUILTIN_SYNCHRONIZE:
8191      emit_insn (gen_mf ());
8192      return const0_rtx;
8193
8194    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8195    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8196      return ia64_expand_lock_test_and_set (mode, arglist, target);
8197
8198    case IA64_BUILTIN_LOCK_RELEASE_SI:
8199    case IA64_BUILTIN_LOCK_RELEASE_DI:
8200      return ia64_expand_lock_release (mode, arglist, target);
8201
8202    case IA64_BUILTIN_BSP:
8203      if (! target || ! register_operand (target, DImode))
8204	target = gen_reg_rtx (DImode);
8205      emit_insn (gen_bsp_value (target));
8206      return target;
8207
8208    case IA64_BUILTIN_FLUSHRS:
8209      emit_insn (gen_flushrs ());
8210      return const0_rtx;
8211
8212    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8213    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8214      return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8215
8216    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8217    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8218      return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8219
8220    case IA64_BUILTIN_FETCH_AND_OR_SI:
8221    case IA64_BUILTIN_FETCH_AND_OR_DI:
8222      return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8223
8224    case IA64_BUILTIN_FETCH_AND_AND_SI:
8225    case IA64_BUILTIN_FETCH_AND_AND_DI:
8226      return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8227
8228    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8229    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8230      return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8231
8232    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8233    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8234      return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8235
8236    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8237    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8238      return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8239
8240    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8241    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8242      return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8243
8244    case IA64_BUILTIN_OR_AND_FETCH_SI:
8245    case IA64_BUILTIN_OR_AND_FETCH_DI:
8246      return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8247
8248    case IA64_BUILTIN_AND_AND_FETCH_SI:
8249    case IA64_BUILTIN_AND_AND_FETCH_DI:
8250      return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8251
8252    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8253    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8254      return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8255
8256    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8257    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8258      return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8259
8260    default:
8261      break;
8262    }
8263
8264  return NULL_RTX;
8265}
8266
8267/* For the HP-UX IA64 aggregate parameters are passed stored in the
8268   most significant bits of the stack slot.  */
8269
8270enum direction
8271ia64_hpux_function_arg_padding (mode, type)
8272     enum machine_mode mode;
8273     tree type;
8274{
8275   /* Exception to normal case for structures/unions/etc.  */
8276
8277   if (type && AGGREGATE_TYPE_P (type)
8278       && int_size_in_bytes (type) < UNITS_PER_WORD)
8279     return upward;
8280
8281   /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8282      hardwired to be true.  */
8283
8284   return((mode == BLKmode
8285       ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8286          && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8287       : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8288      ? downward : upward);
8289}
8290
8291/* Linked list of all external functions that are to be emitted by GCC.
8292   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8293   order to avoid putting out names that are never really used.  */
8294
8295struct extern_func_list
8296{
8297  struct extern_func_list *next; /* next external */
8298  char *name;                    /* name of the external */
8299} *extern_func_head = 0;
8300
8301static void
8302ia64_hpux_add_extern_decl (name)
8303        const char *name;
8304{
8305  struct extern_func_list *p;
8306
8307  p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8308  p->name = xmalloc (strlen (name) + 1);
8309  strcpy(p->name, name);
8310  p->next = extern_func_head;
8311  extern_func_head = p;
8312}
8313
8314/* Print out the list of used global functions.  */
8315
8316void
8317ia64_hpux_asm_file_end (file)
8318	FILE *file;
8319{
8320  while (extern_func_head)
8321    {
8322      const char *real_name;
8323      tree decl;
8324
8325      real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8326      decl = maybe_get_identifier (real_name);
8327
8328      if (!decl
8329	  || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8330        {
8331	  if (decl)
8332	    TREE_ASM_WRITTEN (decl) = 1;
8333	  (*targetm.asm_out.globalize_label) (file, extern_func_head->name);
8334	  fprintf (file, "%s", TYPE_ASM_OP);
8335	  assemble_name (file, extern_func_head->name);
8336	  putc (',', file);
8337	  fprintf (file, TYPE_OPERAND_FMT, "function");
8338	  putc ('\n', file);
8339        }
8340      extern_func_head = extern_func_head->next;
8341    }
8342}
8343
8344
8345/* Switch to the section to which we should output X.  The only thing
8346   special we do here is to honor small data.  */
8347
8348static void
8349ia64_select_rtx_section (mode, x, align)
8350     enum machine_mode mode;
8351     rtx x;
8352     unsigned HOST_WIDE_INT align;
8353{
8354  if (GET_MODE_SIZE (mode) > 0
8355      && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8356    sdata_section ();
8357  else
8358    default_elf_select_rtx_section (mode, x, align);
8359}
8360
8361/* It is illegal to have relocations in shared segments on AIX and HPUX.
8362   Pretend flag_pic is always set.  */
8363
8364static void
8365ia64_rwreloc_select_section (exp, reloc, align)
8366     tree exp;
8367     int reloc;
8368     unsigned HOST_WIDE_INT align;
8369{
8370  default_elf_select_section_1 (exp, reloc, align, true);
8371}
8372
8373static void
8374ia64_rwreloc_unique_section (decl, reloc)
8375     tree decl;
8376     int reloc;
8377{
8378  default_unique_section_1 (decl, reloc, true);
8379}
8380
8381static void
8382ia64_rwreloc_select_rtx_section (mode, x, align)
8383     enum machine_mode mode;
8384     rtx x;
8385     unsigned HOST_WIDE_INT align;
8386{
8387  int save_pic = flag_pic;
8388  flag_pic = 1;
8389  ia64_select_rtx_section (mode, x, align);
8390  flag_pic = save_pic;
8391}
8392
8393static unsigned int
8394ia64_rwreloc_section_type_flags (decl, name, reloc)
8395     tree decl;
8396     const char *name;
8397     int reloc;
8398{
8399  return default_section_type_flags_1 (decl, name, reloc, true);
8400}
8401
8402
8403/* Output the assembler code for a thunk function.  THUNK_DECL is the
8404   declaration for the thunk function itself, FUNCTION is the decl for
8405   the target function.  DELTA is an immediate constant offset to be
8406   added to THIS.  If VCALL_OFFSET is non-zero, the word at
8407   *(*this + vcall_offset) should be added to THIS.  */
8408
8409static void
8410ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8411     FILE *file;
8412     tree thunk ATTRIBUTE_UNUSED;
8413     HOST_WIDE_INT delta;
8414     HOST_WIDE_INT vcall_offset;
8415     tree function;
8416{
8417  rtx this, insn, funexp;
8418
8419  reload_completed = 1;
8420  no_new_pseudos = 1;
8421
8422  /* Set things up as ia64_expand_prologue might.  */
8423  last_scratch_gr_reg = 15;
8424
8425  memset (&current_frame_info, 0, sizeof (current_frame_info));
8426  current_frame_info.spill_cfa_off = -16;
8427  current_frame_info.n_input_regs = 1;
8428  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8429
8430  if (!TARGET_REG_NAMES)
8431    reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8432
8433  /* Mark the end of the (empty) prologue.  */
8434  emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8435
8436  this = gen_rtx_REG (Pmode, IN_REG (0));
8437  if (TARGET_ILP32)
8438    emit_insn (gen_ptr_extend (this,
8439			       gen_rtx_REG (ptr_mode, IN_REG (0))));
8440
8441  /* Apply the constant offset, if required.  */
8442  if (delta)
8443    {
8444      rtx delta_rtx = GEN_INT (delta);
8445
8446      if (!CONST_OK_FOR_I (delta))
8447	{
8448	  rtx tmp = gen_rtx_REG (Pmode, 2);
8449	  emit_move_insn (tmp, delta_rtx);
8450	  delta_rtx = tmp;
8451	}
8452      emit_insn (gen_adddi3 (this, this, delta_rtx));
8453    }
8454
8455  /* Apply the offset from the vtable, if required.  */
8456  if (vcall_offset)
8457    {
8458      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8459      rtx tmp = gen_rtx_REG (Pmode, 2);
8460
8461      if (TARGET_ILP32)
8462	{
8463	  rtx t = gen_rtx_REG (ptr_mode, 2);
8464	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8465	  emit_insn (gen_ptr_extend (tmp, t));
8466	}
8467      else
8468	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8469
8470      if (!CONST_OK_FOR_J (vcall_offset))
8471	{
8472	  rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8473	  emit_move_insn (tmp2, vcall_offset_rtx);
8474	  vcall_offset_rtx = tmp2;
8475	}
8476      emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8477
8478      if (TARGET_ILP32)
8479	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8480			gen_rtx_MEM (ptr_mode, tmp));
8481      else
8482	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8483
8484      emit_insn (gen_adddi3 (this, this, tmp));
8485    }
8486
8487  /* Generate a tail call to the target function.  */
8488  if (! TREE_USED (function))
8489    {
8490      assemble_external (function);
8491      TREE_USED (function) = 1;
8492    }
8493  funexp = XEXP (DECL_RTL (function), 0);
8494  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8495  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8496  insn = get_last_insn ();
8497  SIBLING_CALL_P (insn) = 1;
8498
8499  /* Code generation for calls relies on splitting.  */
8500  reload_completed = 1;
8501  try_split (PATTERN (insn), insn, 0);
8502
8503  emit_barrier ();
8504
8505  /* Run just enough of rest_of_compilation to get the insns emitted.
8506     There's not really enough bulk here to make other passes such as
8507     instruction scheduling worth while.  Note that use_thunk calls
8508     assemble_start_function and assemble_end_function.  */
8509
8510  insn = get_insns ();
8511  emit_all_insn_group_barriers (NULL, insn);
8512  shorten_branches (insn);
8513  final_start_function (insn, file, 1);
8514  final (insn, file, 1, 0);
8515  final_end_function ();
8516
8517  reload_completed = 0;
8518  no_new_pseudos = 0;
8519}
8520
8521#include "gt-ia64.h"
8522